evidently.metrics.data_integrity
Bases:
object
Statistics about missing values in a column
different_missing_values : Dict[Any, int]
number_of_different_missing_values : int
number_of_missing_values : int
number_of_rows : int
share_of_missing_values : float
Count missing values in a column.
Missing value is a null or NaN value.
Calculate an amount of missing values kinds and count for such values. NA-types like numpy.NaN, pandas.NaT are counted as one type.
You can set you own missing values list with missing_values parameter. Value None in the list means that Pandas null values will be included in the calculation.
If replace parameter is False - add defaults to user’s list. If replace parameter is True - use values from missing_values list only.
DEFAULT_MISSING_VALUES = ['', inf, -inf, None]
column_name : str
missing_values : frozenset
render_html(obj: ColumnMissingValuesMetric)
render_json(obj: ColumnMissingValuesMetric)
Bases:
object
column_name : str
current : ColumnMissingValues
reference : Optional[ColumnMissingValues] = None
Count number of values in a column matched or not by a regular expression (regexp)
column_name : str
reg_exp : str
top : int
render_html(obj: ColumnRegExpMetric)
render_json(obj: ColumnRegExpMetric)
Bases:
object
column_name : str
current : DataIntegrityValueByRegexpStat
reference : Optional[DataIntegrityValueByRegexpStat] = None
reg_exp : str
top : int
Bases:
object
Statistics about matched by a regular expression values in a column for one dataset
number_of_matched : int
number_of_not_matched : int
number_of_rows : int
table_of_matched : Dict[str, int]
table_of_not_matched : Dict[str, int]
class CategoricalCharacteristics(number_of_rows: int, count: int, unique: Optional[int], unique_percentage: Optional[float], most_common: Optional[object], most_common_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], new_in_current_values_count: Optional[int] = None, unused_in_current_values_count: Optional[int] = None)
Bases:
object
count : int
missing : Optional[int]
missing_percentage : Optional[float]
most_common : Optional[object]
most_common_percentage : Optional[float]
new_in_current_values_count : Optional[int] = None
number_of_rows : int
unique : Optional[int]
unique_percentage : Optional[float]
unused_in_current_values_count : Optional[int] = None
class ColumnSummary(column_name: str, column_type: str, reference_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics, NoneType], current_characteristics: Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics], plot_data: DataQualityPlot)
Bases:
object
column_name : str
column_type : str
current_characteristics : Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics]
plot_data : DataQualityPlot
reference_characteristics : Optional[Union[NumericCharacteristics, CategoricalCharacteristics, DatetimeCharacteristics]]
render_html(obj: ColumnSummaryMetric)
render_json(obj: ColumnSummaryMetric)
Bases:
object
data_for_plots : Dict[str, Dict[str, Union[list, DataFrame]]]
target_name : str
target_type : str
Bases:
object
data_for_plots : Dict[str, DataFrame]
datetime_name : str
freq : str
Bases:
object
bins_for_hist : Dict[str, DataFrame]
counts_of_values : Optional[Dict[str, DataFrame]]
data_by_target : Optional[DataByTarget]
data_in_time : Optional[DataInTime]
Bases:
object
count : int
first : Optional[str]
last : Optional[str]
missing : Optional[int]
missing_percentage : Optional[float]
most_common : Optional[object]
most_common_percentage : Optional[float]
number_of_rows : int
unique : Optional[int]
unique_percentage : Optional[float]
class NumericCharacteristics(number_of_rows: int, count: int, mean: Union[float, int, NoneType], std: Union[float, int, NoneType], min: Union[float, int, NoneType], p25: Union[float, int, NoneType], p50: Union[float, int, NoneType], p75: Union[float, int, NoneType], max: Union[float, int, NoneType], unique: Optional[int], unique_percentage: Optional[float], missing: Optional[int], missing_percentage: Optional[float], infinite_count: Optional[int], infinite_percentage: Optional[float], most_common: Union[float, int, NoneType], most_common_percentage: Optional[float])
Bases:
object
count : int
infinite_count : Optional[int]
infinite_percentage : Optional[float]
max : Optional[Union[float, int]]
mean : Optional[Union[float, int]]
min : Optional[Union[float, int]]
missing : Optional[int]
missing_percentage : Optional[float]
most_common : Optional[Union[float, int]]
most_common_percentage : Optional[float]
number_of_rows : int
p25 : Optional[Union[float, int]]
p50 : Optional[Union[float, int]]
p75 : Optional[Union[float, int]]
std : Optional[Union[float, int]]
unique : Optional[int]
unique_percentage : Optional[float]
class DatasetMissingValues(different_missing_values: Dict[Any, int], number_of_different_missing_values: int, different_missing_values_by_column: Dict[str, Dict[Any, int]], number_of_different_missing_values_by_column: Dict[str, int], number_of_missing_values: int, share_of_missing_values: float, number_of_missing_values_by_column: Dict[str, int], share_of_missing_values_by_column: Dict[str, float], number_of_rows: int, number_of_rows_with_missing_values: int, share_of_rows_with_missing_values: float, number_of_columns: int, columns_with_missing_values: List[str], number_of_columns_with_missing_values: int, share_of_columns_with_missing_values: float)
Bases:
object
Statistics about missed values in a dataset
columns_with_missing_values : List[str]
different_missing_values : Dict[Any, int]
different_missing_values_by_column : Dict[str, Dict[Any, int]]
number_of_columns : int
number_of_columns_with_missing_values : int
number_of_different_missing_values : int
number_of_different_missing_values_by_column : Dict[str, int]
number_of_missing_values : int
number_of_missing_values_by_column : Dict[str, int]
number_of_rows : int
number_of_rows_with_missing_values : int
share_of_columns_with_missing_values : float
share_of_missing_values : float
share_of_missing_values_by_column : Dict[str, float]
share_of_rows_with_missing_values : float
Count missing values in a dataset.
Missing value is a null or NaN value.
Calculate an amount of missing values kinds and count for such values. NA-types like numpy.NaN, pandas.NaT are counted as one type.
You can set you own missing values list with missing_values parameter. Value None in the list means that Pandas null values will be included in the calculation.
If replace parameter is False - add defaults to user’s list. If replace parameter is True - use values from missing_values list only.
DEFAULT_MISSING_VALUES = ['', inf, -inf, None]
missing_values : frozenset
render_html(obj: DatasetMissingValuesMetric)
render_json(obj: DatasetMissingValuesMetric)
Bases:
object
current : DatasetMissingValues
reference : Optional[DatasetMissingValues] = None
class DatasetSummary(target: Optional[str], prediction: Optional[Union[str, Sequence[str]]], date_column: Optional[str], id_column: Optional[str], number_of_columns: int, number_of_rows: int, number_of_missing_values: int, number_of_categorical_columns: int, number_of_numeric_columns: int, number_of_datetime_columns: int, number_of_constant_columns: int, number_of_almost_constant_columns: int, number_of_duplicated_columns: int, number_of_almost_duplicated_columns: int, number_of_empty_rows: int, number_of_empty_columns: int, number_of_duplicated_rows: int, columns_type: dict, nans_by_columns: dict, number_uniques_by_columns: dict)
Bases:
object
Columns information in a dataset
columns_type : dict
date_column : Optional[str]
id_column : Optional[str]
nans_by_columns : dict
number_of_almost_constant_columns : int
number_of_almost_duplicated_columns : int
number_of_categorical_columns : int
number_of_columns : int
number_of_constant_columns : int
number_of_datetime_columns : int
number_of_duplicated_columns : int
number_of_duplicated_rows : int
number_of_empty_columns : int
number_of_empty_rows : int
number_of_missing_values : int
number_of_numeric_columns : int
number_of_rows : int
number_uniques_by_columns : dict
prediction : Optional[Union[str, Sequence[str]]]
target : Optional[str]
Common dataset(s) columns/features characteristics
almost_constant_threshold : float
almost_duplicated_threshold : float
render_html(obj: DatasetSummaryMetric)
render_json(obj: DatasetSummaryMetric)
Bases:
object
almost_duplicated_threshold : float
current : DatasetSummary
reference : Optional[DatasetSummary] = None
Last modified 6mo ago