Skip to content

Commit

Permalink
added help function
Browse files Browse the repository at this point in the history
  • Loading branch information
enriquea committed Apr 2, 2024
1 parent f5934de commit f906e15
Showing 1 changed file with 12 additions and 8 deletions.
20 changes: 12 additions & 8 deletions fsspark/fs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,6 @@ def get_sample_label(self) -> list:
"""
return self.__indexed_instances.tolist()

# def get_samples(self) -> pyspark.pandas.Series:
# """
# Get samples identifiers from DataFrame. Coerce data type to string.
#
# :return: Pandas Series
# """
# return self.__df[self.__sample_col].astype("str")

def get_sdf_vector(self, output_column_vector: str = 'features') -> pyspark.sql.DataFrame:
"""
Return a Spark dataframe with feature columns assembled into a column vector (a.k.a. Dense Vector column).
Expand All @@ -204,6 +196,18 @@ def get_sdf_vector(self, output_column_vector: str = 'features') -> pyspark.sql.

return sdf_vector

def get_sdf_and_label(self,
output_column_vector: str = 'features') -> Tuple[pyspark.sql.dataframe.DataFrame, str, str]:
"""
Extracts the Spark DataFrame and label column name from FSDataFrame.
:param: output_column_vector: Name of the output column vector.
:return: A tuple containing the Spark DataFrame and the label column name.
"""
sdf = self.get_sdf_vector(output_column_vector=output_column_vector)
label_col = self.get_label_col_name()
return sdf, label_col, output_column_vector

def _collect_features_as_array(self) -> np.array:
"""
Collect features from FSDataFrame as an array.
Expand Down

0 comments on commit f906e15

Please sign in to comment.