diff --git a/docs/sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/arima.md b/docs/sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/arima.md new file mode 100644 index 000000000..5f8008516 --- /dev/null +++ b/docs/sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/arima.md @@ -0,0 +1 @@ +::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.prediction.arima diff --git a/docs/sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.md b/docs/sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.md new file mode 100644 index 000000000..33bb559b9 --- /dev/null +++ b/docs/sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.md @@ -0,0 +1 @@ +::: src.sdk.python.rtdip_sdk.pipelines.data_quality.data_manipulation.spark.prediction.auto_arima diff --git a/mkdocs.yml b/mkdocs.yml index 4ac23f5a6..98b8f099e 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -236,18 +236,34 @@ nav: - Deploy: - Databricks: sdk/code-reference/pipelines/deploy/databricks.md - Data Quality: - - Monitoring: + - Monitoring: - Check Value Ranges: sdk/code-reference/pipelines/data_quality/monitoring/spark/check_value_ranges.md - - Great Expectations: - - Data Quality Monitoring: sdk/code-reference/pipelines/data_quality/monitoring/spark/great_expectations.md + - Great Expectations: + - Data Quality Monitoring: sdk/code-reference/pipelines/data_quality/monitoring/spark/great_expectations.md - Flatline Detection: sdk/code-reference/pipelines/data_quality/monitoring/spark/flatline_detection.md - Identify Missing Data: - Interval Based: sdk/code-reference/pipelines/data_quality/monitoring/spark/identify_missing_data_interval.md - Pattern Based: sdk/code-reference/pipelines/data_quality/monitoring/spark/identify_missing_data_pattern.md - - Data Manipulation: + - Data Manipulation: - Duplicate Detection: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/duplicate_detection.md - - Filter Out of Range Values: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/delete_out_of_range_values.md + - Filter Out of Range Values: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/out_of_range_value_filter.md - Flatline Filter: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/flatline_filter.md + - Dimensionality Reduction: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/dimensionality_reduction.md + - Interval Filtering: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/interval_filtering.md + - K-Sigma Anomaly Detection: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/k_sigma_anomaly_detection.md + - Missing Value Imputation: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/missing_value_imputation.md + - Normalization: + - Normalization: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization.md + - Normalization Mean: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_mean.md + - Normalization MinMax: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_minmax.md + - Normalization ZScore: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/normalization/normalization_zscore.md + - Prediction: + - Arima: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/arima.md + - Auto Arima: sdk/code-reference/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.md + - Machine Learning: + - Data Binning: sdk/code-reference/pipelines/machine_learning/spark/data_binning.md + - Linear Regression: sdk/code-reference/pipelines/machine_learning/spark/linear_regression.md + - Jobs: sdk/pipelines/jobs.md - Deploy: - Databricks Workflows: sdk/pipelines/deploy/databricks.md @@ -339,4 +355,4 @@ nav: - blog/index.md - University: - University: university/overview.md - \ No newline at end of file + diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/arima.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/arima.py index ea70120d1..ad4446f0d 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/arima.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/arima.py @@ -99,7 +99,6 @@ class ArimaPrediction(DataManipulationBaseInterface, InputValidator): timestamp_name (str): Name of column, where event timestamps are stored source_name (str): Name of column in source-based format, where source of events are stored status_name (str): Name of column in source-based format, where status of events are stored - # Options for ARIMA external_regressor_names (List[str]): Currently not working. Names of the columns with data to use for prediction, but not extend number_of_data_points_to_predict (int): Amount of points to forecast number_of_data_points_to_analyze (int): Amount of most recent points to train on @@ -319,7 +318,7 @@ def filter(self) -> PySparkDataFrame: value imputation to prevent learning on dirty data. Returns: - DataFrame: A PySpark DataFrame with forcasted value entries depending on constructor parameters. + DataFrame: A PySpark DataFrame with forecasted value entries depending on constructor parameters. """ # expected_scheme = StructType( # [ diff --git a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.py b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.py index 2f5ef22e6..808bd3ced 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.py +++ b/src/sdk/python/rtdip_sdk/pipelines/data_quality/data_manipulation/spark/prediction/auto_arima.py @@ -77,8 +77,6 @@ class ArimaAutoPrediction(ArimaPrediction): number_of_data_points_to_predict (int): Amount of points to forecast number_of_data_points_to_analyze (int): Amount of most recent points to train on seasonal (bool): Setting for AutoArima, is past_data seasonal? - # Options for ARIMA - trend (str): ARIMA-Specific setting enforce_stationarity (bool): ARIMA-Specific setting enforce_invertibility (bool): ARIMA-Specific setting concentrate_scale (bool): ARIMA-Specific setting