Source code for rta_selection.filters

import logging

import numpy as np
import pandas as pd


[docs] def filter_events( events: pd.DataFrame, filters: dict[str, list[float]] | None = None, finite_params=None, ): """ Apply data filtering to a pandas dataframe. Each filtering range is applied if the column name exists in the DataFrame so that `(events >= range[0]) & (events <= range[1])` If the column name does not exist, the filtering is simply not applied Parameters ---------- events: `pandas.DataFrame` filters: dict containing events features names and their filtering range finite_params: optional, None or list of strings extra filter to ensure finite parameters Returns ------- `pandas.DataFrame` """ if filters is None: filters = dict( intensity=[0, np.inf], width=[0, np.inf], length=[0, np.inf], wl=[0, np.inf], r=[0, np.inf], leakage_intensity_width_2=[0, 1], ) filter = np.ones(len(events), dtype=bool) for k in filters.keys(): if k in events.columns: filter &= (events[k] >= filters[k][0]) & (events[k] <= filters[k][1]) if finite_params is not None: _finite_params = list(set(finite_params).intersection(list(events.columns))) with pd.option_context("mode.use_inf_as_null", True): not_finite_mask = events[_finite_params].isnull() filter &= ~(not_finite_mask.any(axis=1)) not_finite_counts = (not_finite_mask).sum(axis=0)[_finite_params] if (not_finite_counts > 0).any(): logging.warning("Data contains not-predictable events.") logging.warning("Column | Number of non finite values") for k, v in not_finite_counts.items(): if v > 0: logging.warning(f"{k} : {v}") return events[filter]