import logging
import numpy as np
import pandas as pd
[docs]
def filter_events(
events: pd.DataFrame,
filters: dict[str, list[float]] | None = None,
finite_params=None,
):
"""
Apply data filtering to a pandas dataframe.
Each filtering range is applied if the column name exists in the DataFrame so that
`(events >= range[0]) & (events <= range[1])`
If the column name does not exist, the filtering is simply not applied
Parameters
----------
events: `pandas.DataFrame`
filters: dict containing events features names and their filtering range
finite_params: optional, None or list of strings
extra filter to ensure finite parameters
Returns
-------
`pandas.DataFrame`
"""
if filters is None:
filters = dict(
intensity=[0, np.inf],
width=[0, np.inf],
length=[0, np.inf],
wl=[0, np.inf],
r=[0, np.inf],
leakage_intensity_width_2=[0, 1],
)
filter = np.ones(len(events), dtype=bool)
for k in filters.keys():
if k in events.columns:
filter &= (events[k] >= filters[k][0]) & (events[k] <= filters[k][1])
if finite_params is not None:
_finite_params = list(set(finite_params).intersection(list(events.columns)))
with pd.option_context("mode.use_inf_as_null", True):
not_finite_mask = events[_finite_params].isnull()
filter &= ~(not_finite_mask.any(axis=1))
not_finite_counts = (not_finite_mask).sum(axis=0)[_finite_params]
if (not_finite_counts > 0).any():
logging.warning("Data contains not-predictable events.")
logging.warning("Column | Number of non finite values")
for k, v in not_finite_counts.items():
if v > 0:
logging.warning(f"{k} : {v}")
return events[filter]