docstring cleanup; csv futzing

monarch-initiative · Sep 27, 2021 · 1cc9b48 · 1cc9b48
1 parent efefe50
commit 1cc9b48
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 14 deletions.
diff --git a/koza/io/reader/csv_reader.py b/koza/io/reader/csv_reader.py
@@ -151,6 +151,7 @@ def __next__(self) -> Dict[str, Any]:
 
         else:
             self.fieldnames = self.field_type_map.keys()
+
         try:
             row = next(self.reader)
         except StopIteration:
@@ -171,25 +172,32 @@ def __next__(self) -> Dict[str, Any]:
         # to determine what to do here
         fields_len = len(self.fieldnames)
         row_len = len(row)
+
+        # if we've made it here we can convert a row to a dict
+        field_map = dict(zip(self.fieldnames, row))
+
         if fields_len > row_len:
-            LOG.warning(
+            raise ValueError(
                 f"CSV file {self.name} has {fields_len - row_len} fewer columns at {self.reader.line_num}"
             )
-        elif row_len > fields_len:
+
+        elif fields_len < row_len:
             LOG.warning(
                 f"CSV file {self.name} has {row_len - fields_len} extra columns at {self.reader.line_num}"
             )
-
-        # if we've made it here we can convert a row to a dict
-        field_map = dict(zip(self.fieldnames, row))
+            # Not sure if this would serve a purpose
+            #
+            # if not 'extra_cols' in self.field_type_map:
+            #     # Create a type map for extra columns
+            #     self.field_type_map['extra_cols'] = FieldType.str
+            # field_map['extra_cols'] = row[fields_len:]
 
         typed_field_map = {}
 
         for field, field_value in field_map.items():
-            # This is really unreadable - malkovich malkovich
             # Take the value and coerce it using self.field_type_map (field: FieldType)
             # FIELD_TYPE is map of the field_type enum to the python
-            # built-in type or custom extras defined in koza
+            # to built-in type or custom extras defined in the source config
             try:
                 typed_field_map[field] = FIELDTYPE_CLASS[self.field_type_map[field]](field_value)
             except KeyError as key_error:

diff --git a/koza/model/config/source_config.py b/koza/model/config/source_config.py
@@ -87,24 +87,24 @@ class FieldType(str, Enum):
     str = 'str'
     int = 'int'
     float = 'float'
-    # Proportion = 'Proportion'
 
 
 class OutputFormat(str, Enum):
     """
-    Have this set up but for prototyping removing this
-    as an option to only support the TSV output format
+    Output formats
     """
 
-    tsv = 'tsv'
-    json = 'json'
+    tsv = 'tsv'  # TODO
     jsonl = 'jsonl'
+    kgx = 'kgx'
 
 
 class TransformMode(str, Enum):
     """
-    Have this set up but for prototyping removing this
-    as an option to only support the TSV output format
+    Configures how an external transform file is processed
+    flat uses importlib and watches for a StopIteration
+    exception, loop runs the code once and expects that
+    a for loop is being used to iterate over a file
     """
 
     flat = 'flat'
@@ -128,6 +128,7 @@ class DatasetDescription:
     These currently do not serve a purpose in koza other
     than documentation
     """
+
     id: str = None  # TODO constrain to a curie?
     name: str = None  # If empty use source name
     ingest_title: str = None  # Map to biolink name
@@ -303,6 +304,10 @@ def field_type_map(self):
 
 @dataclass(config=PydanticConfig)
 class PrimaryFileConfig(SourceConfig):
+    """
+    node_properties and edge_properties are used for configuring
+    the KGX writer
+    """
     node_properties: List[str] = None
     edge_properties: List[str] = None
     depends_on: List[str] = field(default_factory=list)