import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import SGD
from nnfwtbn import Variable, Process, Cut, \
HepNet, ClassicalCV, EstimatorNormalizer, \
HistogramFactory, confusion_matrix, atlasify, \
McStack
from nnfwtbn import toydata
df = toydata.get()
p_ztt = Process(r"$Z\rightarrow\tau\tau$", range=(0, 0))
p_sig = Process(r"Signal", range=(1, 1))
s_all = McStack(p_ztt, p_sig)
hist_factory = HistogramFactory(df, stacks=[s_all], weight="weight")
hist_factory(Variable("$\Delta \eta^{jj}$",
lambda d: (d.jet_1_eta - d.jet_2_eta).abs()),
bins=20, range=(0, 8))
hist_factory(Variable("$m^{jj}$", "m_jj"),
bins=20, range=(0, 1500))
None
c_sr = Cut(lambda d: d.m_jj > 400) & \
Cut(lambda d: d.jet_2_pt >= 30) & \
Cut(lambda d: d.jet_1_eta * d.jet_2_eta < 0) & \
Cut(lambda d: (d.jet_2_eta - d.jet_1_eta).abs() > 3)
c_sr.label = "Signal"
c_rest = (~c_sr)
c_rest.label = "Rest"
confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest], info=False,
x_label="Signal", y_label="Region", annot=True, weight="weight")
confusion_matrix(df, [p_sig, p_ztt], [c_sr, c_rest], normalize_rows=True, info=False,
x_label="Signal", y_label="Region", annot=True, weight="weight")
None
df['dijet_deta'] = (df.jet_1_eta - df.jet_2_eta).abs()
df['dijet_prod_eta'] = (df.jet_1_eta * df.jet_2_eta)
input_var = ['dijet_prod_eta', 'm_jj', 'dijet_deta', 'higgs_pt', 'jet_2_pt', 'jet_1_eta', 'jet_2_eta', 'tau_eta']
output_var = ['is_sig', 'is_ztt']
df["is_sig"] = p_sig.selection.idx_array(df)
df["is_ztt"] = p_ztt.selection.idx_array(df)
sample_df = df.sample(frac=1000 / len(df)).compute()
sns.pairplot(sample_df, vars=input_var, hue="is_sig")
None
def model():
m = Sequential()
m.add(Dense(units=15, activation='relu', input_dim=len(input_var)))
m.add(Dense(units=5, activation='relu'))
m.add(Dense(units=2, activation='softmax'))
m.compile(loss='categorical_crossentropy',
optimizer=SGD(lr=0.1),
metrics=['categorical_accuracy'])
return m
cv = ClassicalCV(5, frac_var='random')
net = HepNet(model, cv, EstimatorNormalizer, input_var, output_var)
sig_wf = len(p_sig.selection(df).weight) / p_sig.selection(df).weight.sum()
ztt_wf = len(p_ztt.selection(df).weight) / p_ztt.selection(df).weight.sum()
net.fit(df.compute(), epochs=150, verbose=0, batch_size=2048,
weight=Variable("weight", lambda d: d.weight * (d.is_sig * sig_wf + d.is_ztt * ztt_wf)))
sns.lineplot(x='epoch', y='loss', data=net.history, label="Training")
sns.lineplot(x='epoch', y='val_loss', data=net.history, label="Validation")
plt.ylabel("loss")
atlasify("Internal")
None
sns.lineplot(x='epoch', y='categorical_accuracy', data=net.history, label="Training")
sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, label="Validation")
plt.ylabel("Accuracy")
atlasify("Internal")
None
sns.lineplot(x='epoch', y='val_categorical_accuracy', data=net.history, hue="fold")
atlasify("Internal", enlarge=1.6)
None
out = net.predict(df.compute(), cv='test')
out['pred_sig'] = out.pred_is_sig >= 0.5
c_pred_sig = Process("Signal", lambda d: d.pred_is_sig >= 0.5)
c_pred_ztt = Process(r"$Z\rightarrow\tau\tau$", lambda d: d.pred_is_sig < 0.5)
confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt], info=False,
x_label="Truth", y_label="Classification", annot=True, weight="weight")
confusion_matrix(out, [p_sig, p_ztt], [c_pred_sig, c_pred_ztt], normalize_rows=True, info=False,
x_label="Truth", y_label="Classification", annot=True, weight="weight")
None
In order to use the network in lwtnn, we need to export the neural network with the export()
method. This export one network per fold. It is the reposibility of the use to implement the cross validation in the analysis framework.
net.export("lwtnn")
!ls lwtnn*
lwtnn.sh lwtnn_arch_3.json lwtnn_vars_2.json lwtnn_wght_1.h5 lwtnn_arch_0.json lwtnn_arch_4.json lwtnn_vars_3.json lwtnn_wght_2.h5 lwtnn_arch_1.json lwtnn_vars_0.json lwtnn_vars_4.json lwtnn_wght_3.h5 lwtnn_arch_2.json lwtnn_vars_1.json lwtnn_wght_0.h5 lwtnn_wght_4.h5
The final, manuel step is to run the lwtnn's converter using the shortcut script test.sh
.