import pandas as pd
import numpy as np
[docs]class Cut:
"""
Representation of an analysis cut. The class can be used to apply event
selections based on conditions on columns in a pandas dataframe or derived
quantities.
Cuts store the condition to be applied to a dataframe. New cut objects
accept all event by default. The selection can be limited by passing a
lambda to the constructor.
>>> sel_all = Cut()
>>> sel_pos = Cut(lambda df: df.value > 0)
The cut object lives independently of the dataframe. Calling the cut with
a dataframe returns a new dataframe containing only rows which pass the
selection criteria.
>>> df = pd.DataFrame([0, 1, -2, -3, 4], columns=["value"])
>>> sel_all(df)
value
0 0
1 1
2 -2
3 -3
4 4
>>> sel_pos(df)
value
1 1
4 4
The index array for a given data set is calculated by calling the
idx_array() method with a data dataframe.
>>> sel_pos.idx_array(df)
0 False
1 True
2 False
3 False
4 True
Name: value, dtype: bool
Cuts can be used to build logical expression using the bitwise and (&), or
(|), xor (^) and not (~).
>>> sel_even = Cut(lambda df: df.value % 2 == 0)
>>> sel_pos_even = sel_pos & sel_even
>>> sel_pos_even(df)
value
4 4
Equivalently, cuts support logical operations directly using lambdas.
>>> sel_pos_even_lambda = sel_pos & (lambda df: df.value % 2 == 0)
>>> sel_pos_even_lambda(df)
value
4 4
Cuts might be named by passing the 'label' argument to the constructor.
Cut names can be used during plotting as labels to specify the plotted
region.
>>> sel_sr = Cut(lambda df: df.is_sr == 1, label="Signal Region")
>>> sel_sr.label
'Signal Region'
"""
[docs] def __init__(self, func=None, label=None):
"""
Creates a new cut. The optional func argument is called with the
dataframe upon evaluation. The function must return an index array. If
the optional function is omitted, Every row in the dataframe is
accepted by this cut.
"""
if isinstance(func, Cut):
self.func = func.func
self.label = label or func.label
else:
self.func = func
self.label = label
[docs] def __call__(self, dataframe):
"""
Applies the internally stored cut to the given dataframe and returns a
new dataframe containing only entries passing the event selection.
"""
return dataframe[self.idx_array(dataframe)]
[docs] def idx_array(self, dataframe):
"""
Applies the internally stored cut to the given dataframe and returns
an index array, specifying which event passed the event selection.
"""
if self.func is None:
return pd.Series(np.ones(len(dataframe), dtype='bool'))
return self.func(dataframe)
[docs] def __and__(self, other):
"""
Returns a new cut implementing the logical AND of this cut and the
other cut. The other cat be a Cut or any callable.
"""
if isinstance(other, Cut):
return Cut(lambda df: self.idx_array(df) & other.idx_array(df))
elif callable(other):
return Cut(lambda df: self.idx_array(df) & other(df))
else:
return Cut(lambda df: self.idx_array(df) & other)
[docs] def __or__(self, other):
"""
Returns a new cut implementing the logical OR of this cut and the
other cut. The other cat be a Cut or any callable.
"""
if isinstance(other, Cut):
return Cut(lambda df: self.idx_array(df) | other.idx_array(df))
elif callable(other):
return Cut(lambda df: self.idx_array(df) | other(df))
else:
return Cut(lambda df: self.idx_array(df) | other)
[docs] def __xor__(self, other):
"""
Returns a new cut implementing the logical XOR of this cut and the
other cut. The other can be a callable.
"""
if isinstance(other, Cut):
return Cut(lambda df: self.idx_array(df) ^ other.idx_array(df))
elif callable(other):
return Cut(lambda df: self.idx_array(df) ^ other(df))
else:
return Cut(lambda df: self.idx_array(df) ^ other)
[docs] def __invert__(self):
"""
Returns a new cut implementing the logical NOT of this cut.
"""
return Cut(lambda df: ~self.idx_array(df))
[docs] def __rand__(self, other):
return self & other
[docs] def __ror__(self, other):
return self | other
[docs] def __rxor__(self, other):
return self ^ other