evidently.metrics.data_integrity
Submodules
column_missing_values_metric module
class ColumnMissingValues(number_of_rows: int, different_missing_values: Dict[Any, int], number_of_different_missing_values: int, number_of_missing_values: int, share_of_missing_values: float)
Bases: object
Statistics about missing values in a column
Attributes:
different_missing_values : Dict[Any, int]
number_of_different_missing_values : int
number_of_missing_values : int
number_of_rows : int
share_of_missing_values : float
class ColumnMissingValuesMetric(column_name: str, missing_values: Optional[list] = None, replace: bool = True)
Bases: Metric
[ColumnMissingValuesMetricResult
]
Count missing values in a column.
Missing value is a null or NaN value.
Calculate an amount of missing values kinds and count for such values. NA-types like numpy.NaN, pandas.NaT are counted as one type.
You can set you own missing values list with missing_values parameter. Value None in the list means that Pandas null values will be included in the calculation.
If replace parameter is False - add defaults to user’s list. If replace parameter is True - use values from missing_values list only.
Attributes:
DEFAULT_MISSING_VALUES = ['', inf, -inf, None]
column_name : str
missing_values : frozenset
Methods:
calculate(data: InputData)
class ColumnMissingValuesMetricRenderer(color_options: Optional[ColorOptions] = None)
Bases: MetricRenderer
Attributes:
color_options : ColorOptions
Methods:
render_html(obj: ColumnMissingValuesMetric)
render_json(obj: ColumnMissingValuesMetric)
class ColumnMissingValuesMetricResult(column_name: str, current: ColumnMissingValues, reference: Optional[ColumnMissingValues] = None)
Bases: object
Attributes:
column_name : str
current : ColumnMissingValues
reference : Optional[ColumnMissingValues] = None
column_regexp_metric module
class ColumnRegExpMetric(column_name: str, reg_exp: str, top: int = 10)
Bases: Metric
[DataIntegrityValueByRegexpMetricResult
]
Count number of values in a column matched or not by a regular expression (regexp)
Attributes:
column_name : str
reg_exp : str
top : int
Methods:
calculate(data: InputData)
class ColumnRegExpMetricRenderer(color_options: Optional[ColorOptions] = None)
Bases: MetricRenderer
Attributes:
color_options : ColorOptions
Methods:
render_html(obj: ColumnRegExpMetric)
render_json(obj: ColumnRegExpMetric)
class DataIntegrityValueByRegexpMetricResult(column_name: str, reg_exp: str, top: int, current: DataIntegrityValueByRegexpStat, reference: Optional[DataIntegrityValueByRegexpStat] = None)
Bases: object
Attributes:
column_name : str
current : DataIntegrityValueByRegexpStat
reference : Optional[DataIntegrityValueByRegexpStat] = None
reg_exp : str
top : int
class DataIntegrityValueByRegexpStat(number_of_matched: int, number_of_not_matched: int, number_of_rows: int, table_of_matched: Dict[str, int], table_of_not_matched: Dict[str, int])
Bases: object
Statistics about matched by a regular expression values in a column for one dataset
Attributes:
number_of_matched : int
number_of_not_matched : int
number_of_rows : int
table_of_matched : Dict[str, int]
table_of_not_matched : Dict[str, int]
column_summary_metric module
class CategoricalCharacteristics(number_of_rows: int, count: int, unique: Optional[int], unique_percentage: Optional[float], most_common: Optional[object], most_common_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], new_in_current_values_count: Optional[int] = None, unused_in_current_values_count: Optional[int] = None)
Bases: object
Attributes:
count : int
missing : Optional[int]
missing_percentage : Optional[float]
most_common : Optional[object]
most_common_percentage : Optional[float]
new_in_current_values_count : Optional[int] = None
number_of_rows : int
unique : Optional[int]
unique_percentage : Optional[float]
unused_in_current_values_count : Optional[int] = None
class ColumnSummary(column_name: str, column_type: str, reference_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics, NoneType], current_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics], plot_data: DataQualityPlot)
Bases: object
Attributes:
column_name : str
column_type : str
current_characteristics : Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics]
plot_data : DataQualityPlot
reference_characteristics : Optional[Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics]]
class ColumnSummaryMetric(column_name: str)
Bases: Metric
[ColumnSummary
]
Methods:
calculate(data: InputData)
static map_data(stats: FeatureQualityStats)
class ColumnSummaryMetricRenderer(color_options: Optional[ColorOptions] = None)
Bases: MetricRenderer
Attributes:
color_options : ColorOptions
Methods:
render_html(obj: ColumnSummaryMetric)
render_json(obj: ColumnSummaryMetric)
class DataByTarget(data_for_plots: Dict[str, Dict[str, Union[list, pandas.core.frame.DataFrame]]], target_name: str, target_type: str)
Bases: object
Attributes:
data_for_plots : Dict[str, Dict[str, Union[list, DataFrame]]]
target_name : str
target_type : str
class DataInTime(data_for_plots: Dict[str, pandas.core.frame.DataFrame], freq: str, datetime_name: str)
Bases: object
Attributes:
data_for_plots : Dict[str, DataFrame]
datetime_name : str
freq : str
class DataQualityPlot(bins_for_hist: Dict[str, pandas.core.frame.DataFrame], data_in_time: Optional[DataInTime], data_by_target: Optional[DataByTarget], counts_of_values: Optional[Dict[str, pandas.core.frame.DataFrame]])
Bases: object
Attributes:
bins_for_hist : Dict[str, DataFrame]
counts_of_values : Optional[Dict[str, DataFrame]]
data_by_target : Optional[DataByTarget]
data_in_time : Optional[DataInTime]
class DatetimeCharacteristics(number_of_rows: int, count: int, unique: Optional[int], unique_percentage: Optional[float], most_common: Optional[object], most_common_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], first: Optional[str], last: Optional[str])
Bases: object
Attributes:
count : int
first : Optional[str]
last : Optional[str]
missing : Optional[int]
missing_percentage : Optional[float]
most_common : Optional[object]
most_common_percentage : Optional[float]
number_of_rows : int
unique : Optional[int]
unique_percentage : Optional[float]
class NumericCharacteristics(number_of_rows: int, count: int, mean: Union[float, int, NoneType], std: Union[float, int, NoneType], min: Union[float, int, NoneType], p25: Union[float, int, NoneType], p50: Union[float, int, NoneType], p75: Union[float, int, NoneType], max: Union[float, int, NoneType], unique: Optional[int], unique_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], infinite_count: Optional[int], infinite_percentage: Optional[float], most_common: Union[float, int, NoneType], most_common_percentage: Optional[float])
Bases: object
Attributes:
count : int
infinite_count : Optional[int]
infinite_percentage : Optional[float]
max : Optional[Union[float, int]]
mean : Optional[Union[float, int]]
min : Optional[Union[float, int]]
missing : Optional[int]
missing_percentage : Optional[float]
most_common : Optional[Union[float, int]]
most_common_percentage : Optional[float]
number_of_rows : int
p25 : Optional[Union[float, int]]
p50 : Optional[Union[float, int]]
p75 : Optional[Union[float, int]]
std : Optional[Union[float, int]]
unique : Optional[int]
unique_percentage : Optional[float]
dataset_missing_values_metric module
class DatasetMissingValues(different_missing_values: Dict[Any, int], number_of_different_missing_values: int, different_missing_values_by_column: Dict[str, Dict[Any, int]], number_of_different_missing_values_by_column: Dict[str, int], number_of_missing_values: int, share_of_missing_values: float, number_of_missing_values_by_column: Dict[str, int], share_of_missing_values_by_column: Dict[str, float], number_of_rows: int, number_of_rows_with_missing_values: int, share_of_rows_with_missing_values: float, number_of_columns: int, columns_with_missing_values: List[str], number_of_columns_with_missing_values: int, share_of_columns_with_missing_values: float)
Bases: object
Statistics about missed values in a dataset
Attributes:
columns_with_missing_values : List[str]
different_missing_values : Dict[Any, int]
different_missing_values_by_column : Dict[str, Dict[Any, int]]
number_of_columns : int
number_of_columns_with_missing_values : int
number_of_different_missing_values : int
number_of_different_missing_values_by_column : Dict[str, int]
number_of_missing_values : int
number_of_missing_values_by_column : Dict[str, int]
number_of_rows : int
number_of_rows_with_missing_values : int
share_of_columns_with_missing_values : float
share_of_missing_values : float
share_of_missing_values_by_column : Dict[str, float]
share_of_rows_with_missing_values : float
class DatasetMissingValuesMetric(missing_values: Optional[list] = None, replace: bool = True)
Bases: Metric
[DatasetMissingValuesMetricResult
]
Count missing values in a dataset.
Missing value is a null or NaN value.
Calculate an amount of missing values kinds and count for such values. NA-types like numpy.NaN, pandas.NaT are counted as one type.
You can set you own missing values list with missing_values parameter. Value None in the list means that Pandas null values will be included in the calculation.
If replace parameter is False - add defaults to user’s list. If replace parameter is True - use values from missing_values list only.
Attributes:
DEFAULT_MISSING_VALUES = ['', inf, -inf, None]
missing_values : frozenset
Methods:
calculate(data: InputData)
class DatasetMissingValuesMetricRenderer(color_options: Optional[ColorOptions] = None)
Bases: MetricRenderer
Attributes:
color_options : ColorOptions
Methods:
render_html(obj: DatasetMissingValuesMetric)
render_json(obj: DatasetMissingValuesMetric)
class DatasetMissingValuesMetricResult(current: DatasetMissingValues, reference: Optional[DatasetMissingValues] = None)
Bases: object
Attributes:
current : DatasetMissingValues
reference : Optional[DatasetMissingValues] = None
dataset_summary_metric module
class DatasetSummary(target: Optional[str], prediction: Optional[Union[str, Sequence[str]]], date_column: Optional[str], id_column: Optional[str], number_of_columns: int, number_of_rows: int, number_of_missing_values: int, number_of_categorical_columns: int, number_of_numeric_columns: int, number_of_datetime_columns: int, number_of_constant_columns: int, number_of_almost_constant_columns: int, number_of_duplicated_columns: int, number_of_almost_duplicated_columns: int, number_of_empty_rows: int, number_of_empty_columns: int, number_of_duplicated_rows: int, columns_type: dict, nans_by_columns: dict, number_uniques_by_columns: dict)
Bases: object
Columns information in a dataset
Attributes:
columns_type : dict
date_column : Optional[str]
id_column : Optional[str]
nans_by_columns : dict
number_of_almost_constant_columns : int
number_of_almost_duplicated_columns : int
number_of_categorical_columns : int
number_of_columns : int
number_of_constant_columns : int
number_of_datetime_columns : int
number_of_duplicated_columns : int
number_of_duplicated_rows : int
number_of_empty_columns : int
number_of_empty_rows : int
number_of_missing_values : int
number_of_numeric_columns : int
number_of_rows : int
number_uniques_by_columns : dict
prediction : Optional[Union[str, Sequence[str]]]
target : Optional[str]
class DatasetSummaryMetric(almost_duplicated_threshold: float = 0.95, almost_constant_threshold: float = 0.95)
Bases: Metric
[DatasetSummaryMetricResult
]
Common dataset(s) columns/features characteristics
Attributes:
almost_constant_threshold : float
almost_duplicated_threshold : float
Methods:
calculate(data: InputData)
class DatasetSummaryMetricRenderer(color_options: Optional[ColorOptions] = None)
Bases: MetricRenderer
Attributes:
color_options : ColorOptions
Methods:
render_html(obj: DatasetSummaryMetric)
render_json(obj: DatasetSummaryMetric)
class DatasetSummaryMetricResult(almost_duplicated_threshold: float, current: DatasetSummary, reference: Optional[DatasetSummary] = None)
Bases: object
Attributes:
almost_duplicated_threshold : float
current : DatasetSummary
reference : Optional[DatasetSummary] = None
Last updated