diff --git a/examples/features/images_cats_and_dogs.py b/examples/features/images_cats_and_dogs.py index d2b1fd82a..518a8a13e 100644 --- a/examples/features/images_cats_and_dogs.py +++ b/examples/features/images_cats_and_dogs.py @@ -14,7 +14,10 @@ # However, popular larger files shouldn't be a problem (LFW, CelebA). data_path = get_data_path() / "cat-and-dog" kaggle.api.dataset_download_files( - "tongpython/cat-and-dog", path=str(data_path), quiet=False, unzip=True, + "tongpython/cat-and-dog", + path=str(data_path), + quiet=False, + unzip=True, ) # At the first run, we find that the dataset not only contains images, "_DS_Store" and "cat-and-dog.zip" are present too. diff --git a/src/pandas_profiling/config.py b/src/pandas_profiling/config.py index 9201361a5..1471fa9bf 100644 --- a/src/pandas_profiling/config.py +++ b/src/pandas_profiling/config.py @@ -40,16 +40,28 @@ def set_file(self, file_name: Union[str, Path]) -> None: "vars": {"cat": {"redact": True}}, }, "dark_mode": { - "html": {"style": {"theme": "flatly", "primary_color": "#2c3e50",}} + "html": { + "style": { + "theme": "flatly", + "primary_color": "#2c3e50", + } + } }, "orange_mode": { - "html": {"style": {"theme": "united", "primary_color": "#d34615",}} + "html": { + "style": { + "theme": "united", + "primary_color": "#d34615", + } + } }, "explorative": { "vars": { "cat": {"unicode": True}, "file": {"active": True}, - "image": {"active": True,}, + "image": { + "active": True, + }, }, "n_obs_unique": 10, "n_extreme_obs": 10, diff --git a/src/pandas_profiling/controller/console.py b/src/pandas_profiling/controller/console.py index 9ca8cc266..22809a606 100644 --- a/src/pandas_profiling/controller/console.py +++ b/src/pandas_profiling/controller/console.py @@ -84,7 +84,7 @@ def parse_args(args: Union[list, None] = None) -> argparse.Namespace: def main(args=None) -> None: - """ Run the `pandas_profiling` package. + """Run the `pandas_profiling` package. Args: args: Arguments for the programme (Default value=None). diff --git a/src/pandas_profiling/model/correlations.py b/src/pandas_profiling/model/correlations.py index c41d5c4e3..834351805 100644 --- a/src/pandas_profiling/model/correlations.py +++ b/src/pandas_profiling/model/correlations.py @@ -112,16 +112,16 @@ def calculate_correlation( df: pd.DataFrame, variables: dict, correlation_name: str ) -> Union[pd.DataFrame, None]: """Calculate the correlation coefficients between variables for the correlation types selected in the config - (pearson, spearman, kendall, phi_k, cramers). + (pearson, spearman, kendall, phi_k, cramers). - Args: - variables: A dict with column names and variable types. - df: The DataFrame with variables. - correlation_name: + Args: + variables: A dict with column names and variable types. + df: The DataFrame with variables. + correlation_name: - Returns: - The correlation matrices for the given correlation measures. Return None if correlation is empty. - """ + Returns: + The correlation matrices for the given correlation measures. Return None if correlation is empty. + """ categorical_correlations = {"cramers": cramers_matrix} correlation = None diff --git a/src/pandas_profiling/model/describe.py b/src/pandas_profiling/model/describe.py index 3a6a92937..918a514d6 100644 --- a/src/pandas_profiling/model/describe.py +++ b/src/pandas_profiling/model/describe.py @@ -54,7 +54,13 @@ def describe(title: str, df: pd.DataFrame, sample: Optional[dict] = None) -> dic correlation_names = [ correlation_name - for correlation_name in ["pearson", "spearman", "kendall", "phi_k", "cramers",] + for correlation_name in [ + "pearson", + "spearman", + "kendall", + "phi_k", + "cramers", + ] if config["correlations"][correlation_name]["calculate"].get(bool) ] diff --git a/src/pandas_profiling/model/summary.py b/src/pandas_profiling/model/summary.py index 811251989..35d1d423d 100644 --- a/src/pandas_profiling/model/summary.py +++ b/src/pandas_profiling/model/summary.py @@ -178,9 +178,9 @@ def describe_numeric_1d(series: pd.Series, series_description: dict) -> dict: """ def mad(arr): - """ Median Absolute Deviation: a "Robust" version of standard deviation. - Indices variability of the sample. - https://en.wikipedia.org/wiki/Median_absolute_deviation + """Median Absolute Deviation: a "Robust" version of standard deviation. + Indices variability of the sample. + https://en.wikipedia.org/wiki/Median_absolute_deviation """ return np.median(np.abs(arr - np.median(arr))) diff --git a/src/pandas_profiling/report/structure/report.py b/src/pandas_profiling/report/structure/report.py index cdf3fc1aa..cde911201 100644 --- a/src/pandas_profiling/report/structure/report.py +++ b/src/pandas_profiling/report/structure/report.py @@ -174,7 +174,9 @@ def get_duplicates_items(duplicates: pd.DataFrame): if duplicates is not None and len(duplicates) > 0: items.append( Duplicate( - duplicate=duplicates, name="Most frequent", anchor_id="duplicates", + duplicate=duplicates, + name="Most frequent", + anchor_id="duplicates", ) ) return items @@ -192,7 +194,11 @@ def get_definition_items(definitions: pd.DataFrame): items = [] if definitions is not None and len(definitions) > 0: items.append( - Duplicate(duplicate=definitions, name="Columns", anchor_id="definitions",) + Duplicate( + duplicate=definitions, + name="Columns", + anchor_id="definitions", + ) ) return items diff --git a/src/pandas_profiling/report/structure/variables/render_categorical.py b/src/pandas_profiling/report/structure/variables/render_categorical.py index 320e41b6c..f7a30b7f4 100644 --- a/src/pandas_profiling/report/structure/variables/render_categorical.py +++ b/src/pandas_profiling/report/structure/variables/render_categorical.py @@ -261,7 +261,10 @@ def render_categorical_unicode(summary, varid, redact): ] return Container( - citems, name="Unicode", sequence_type="tabs", anchor_id=f"{varid}unicode", + citems, + name="Unicode", + sequence_type="tabs", + anchor_id=f"{varid}unicode", ) diff --git a/src/pandas_profiling/report/structure/variables/render_file.py b/src/pandas_profiling/report/structure/variables/render_file.py index 068f55d7e..4c6d3702d 100644 --- a/src/pandas_profiling/report/structure/variables/render_file.py +++ b/src/pandas_profiling/report/structure/variables/render_file.py @@ -51,7 +51,10 @@ def render_file(summary): ) file_tab = Container( - file_tabs, name="File", sequence_type="tabs", anchor_id=f"{varid}file", + file_tabs, + name="File", + sequence_type="tabs", + anchor_id=f"{varid}file", ) template_variables["bottom"].content["items"].append(file_tab) diff --git a/src/pandas_profiling/report/structure/variables/render_image.py b/src/pandas_profiling/report/structure/variables/render_image.py index 2a2a0cc0d..06b7bc6c3 100644 --- a/src/pandas_profiling/report/structure/variables/render_image.py +++ b/src/pandas_profiling/report/structure/variables/render_image.py @@ -177,7 +177,10 @@ def render_image(summary): image_items.append(image_shape) image_tab = Container( - image_items, name="Image", sequence_type="tabs", anchor_id=f"{varid}image", + image_items, + name="Image", + sequence_type="tabs", + anchor_id=f"{varid}image", ) template_variables["bottom"].content["items"].append(image_tab) diff --git a/src/pandas_profiling/report/structure/variables/render_path.py b/src/pandas_profiling/report/structure/variables/render_path.py index 84b226695..8c53740ed 100644 --- a/src/pandas_profiling/report/structure/variables/render_path.py +++ b/src/pandas_profiling/report/structure/variables/render_path.py @@ -114,7 +114,10 @@ def render_path(summary): ] path_tab = Container( - path_items, name="Path", sequence_type="tabs", anchor_id=f"{varid}path", + path_items, + name="Path", + sequence_type="tabs", + anchor_id=f"{varid}path", ) template_variables["bottom"].content["items"].append(path_tab) diff --git a/src/pandas_profiling/report/structure/variables/render_real.py b/src/pandas_profiling/report/structure/variables/render_real.py index b1767e740..c24777dc5 100644 --- a/src/pandas_profiling/report/structure/variables/render_real.py +++ b/src/pandas_profiling/report/structure/variables/render_real.py @@ -227,7 +227,9 @@ def render_real(summary): ) template_variables["bottom"] = Container( - [statistics, hist, fq, evs], sequence_type="tabs", anchor_id=f"{varid}bottom", + [statistics, hist, fq, evs], + sequence_type="tabs", + anchor_id=f"{varid}bottom", ) return template_variables diff --git a/src/pandas_profiling/serialize_report.py b/src/pandas_profiling/serialize_report.py index e6ca6ad9b..8dcc04855 100644 --- a/src/pandas_profiling/serialize_report.py +++ b/src/pandas_profiling/serialize_report.py @@ -129,11 +129,11 @@ def dump(self, output_file: Union[Path, str]): def load(self, load_file: Union[Path, str], ignore_config: bool = False): """ - Load ProfileReport from file + Load ProfileReport from file - Raises: - ValueError: if the DataFrame or Config do not match with the current ProfileReport - """ + Raises: + ValueError: if the DataFrame or Config do not match with the current ProfileReport + """ if not isinstance(load_file, Path): load_file = Path(str(load_file)) diff --git a/src/pandas_profiling/utils/common.py b/src/pandas_profiling/utils/common.py index 42c379172..5ac592560 100644 --- a/src/pandas_profiling/utils/common.py +++ b/src/pandas_profiling/utils/common.py @@ -10,7 +10,7 @@ def update(d: dict, u: Mapping) -> dict: - """ Recursively update a dict. + """Recursively update a dict. Args: d: Dictionary to update. diff --git a/src/pandas_profiling/visualisation/context.py b/src/pandas_profiling/visualisation/context.py index 14b08cc13..1089210e9 100644 --- a/src/pandas_profiling/visualisation/context.py +++ b/src/pandas_profiling/visualisation/context.py @@ -12,8 +12,7 @@ @contextlib.contextmanager def manage_matplotlib_context(): - """Return a context manager for temporarily changing matplotlib unit registries and rcParams. - """ + """Return a context manager for temporarily changing matplotlib unit registries and rcParams.""" originalRcParams = matplotlib.rcParams.copy() ## Credits for this style go to the ggplot and seaborn packages. diff --git a/src/pandas_profiling/visualisation/plot.py b/src/pandas_profiling/visualisation/plot.py index 44d13e34f..8491dcf9b 100644 --- a/src/pandas_profiling/visualisation/plot.py +++ b/src/pandas_profiling/visualisation/plot.py @@ -171,7 +171,9 @@ def correlation_matrix(data: pd.DataFrame, vmin: int = -1) -> str: legend_elements = [Patch(facecolor=cmap(np.nan), label="invalid\ncoefficient")] plt.legend( - handles=legend_elements, loc="upper right", handleheight=2.5, + handles=legend_elements, + loc="upper right", + handleheight=2.5, ) axes_cor.set_xticks(np.arange(0, data.shape[0], float(data.shape[0]) / len(labels))) diff --git a/tests/unit/test_dataset_schema.py b/tests/unit/test_dataset_schema.py index 42d357063..5a36c6acd 100644 --- a/tests/unit/test_dataset_schema.py +++ b/tests/unit/test_dataset_schema.py @@ -17,7 +17,11 @@ def test_dataset_schema(): ) # Length left out due to correlation with weight. - report = df.profile_report(title="Dataset schema", dataset=metadata, minimal=True,) + report = df.profile_report( + title="Dataset schema", + dataset=metadata, + minimal=True, + ) html = report.to_html() @@ -36,7 +40,9 @@ def test_dataset_schema_empty(): # Length left out due to correlation with weight. report = df.profile_report( - title="Dataset schema empty", minimal=True, dataset=None, + title="Dataset schema empty", + minimal=True, + dataset=None, ) html = report.to_html() diff --git a/tests/unit/test_html_export.py b/tests/unit/test_html_export.py index 82bc5933f..2b7407110 100644 --- a/tests/unit/test_html_export.py +++ b/tests/unit/test_html_export.py @@ -76,7 +76,8 @@ def test_html_export_cdn(test_output_dir): ) profile = df.profile_report( - minimal=True, html={"inline": False, "use_local_assets": False}, + minimal=True, + html={"inline": False, "use_local_assets": False}, ) report = test_output_dir / "cdn.html" @@ -96,7 +97,8 @@ def test_html_export_theme(test_output_dir): ) profile = df.profile_report( - minimal=True, html={"inline": False, "style": {"theme": "united"}}, + minimal=True, + html={"inline": False, "style": {"theme": "united"}}, ) report = test_output_dir / "united.html"