From 8b17d59eb3bcd8ab246e44450d234e1152c82871 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Am=C3=A9lie=20Rondot?= <119412389+amelie-rondot@users.noreply.github.com> Date: Fri, 30 Aug 2024 13:28:47 +0200 Subject: [PATCH] Fix #1610 unexpected field-error related to a well-formatted value example of a schema's boolean field using frictionless validate (#1615) - fixes #1610 This PR fixes the `field-error` which occured doing `frictionless validate data.csv --schema schema.json` with a `TableSchema` `schema` containing a `BooleanField` customised with 'trueValues' or 'falseValues' and 'example' value , for example: ``` schema.json ---- { "$schema": "https://frictionlessdata.io/schemas/table-schema.json", "fields": [ { "name": "IsTrue", "type": "boolean", "trueValues": ["yes"], "falseValues": ["no"], "example": "yes" } ] } ``` ``` data.csv ---- isTrue yes no ``` This PR add tests cases to ensure that example value is correctly evaluated according to customized 'trueValues' or 'falseValues', as expected. --------- Co-authored-by: Pierre Camilleri <22995923+pierrecamilleri@users.noreply.github.com> --- frictionless/fields/__spec__/test_boolean.py | 64 +++++++++++++++++++- frictionless/schema/field.py | 20 ++++-- 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/frictionless/fields/__spec__/test_boolean.py b/frictionless/fields/__spec__/test_boolean.py index 72d7954dda..574bf81c2c 100644 --- a/frictionless/fields/__spec__/test_boolean.py +++ b/frictionless/fields/__spec__/test_boolean.py @@ -1,6 +1,8 @@ import pytest -from frictionless import Field +from frictionless import Field, Schema +from frictionless.errors.metadata import SchemaError +from frictionless.exception import FrictionlessException # General @@ -32,12 +34,70 @@ ("default", "3.14", None, {}), ("default", "", None, {}), ("default", "Yes", None, {"trueValues": ["yes"]}), + ("default", "true", None, {"trueValues": ["yes"]}), + ("default", "True", None, {"trueValues": ["yes"]}), + ("default", "TRUE", None, {"trueValues": ["yes"]}), + ("default", "1", None, {"trueValues": ["yes"]}), ("default", "No", None, {"falseValues": ["no"]}), + ("default", "false", None, {"falseValues": ["no"]}), + ("default", "False", None, {"falseValues": ["no"]}), + ("default", "FALSE", None, {"falseValues": ["no"]}), + ("default", "0", None, {"falseValues": ["no"]}), ], ) def test_boolean_read_cell(format, source, target, options): descriptor = {"name": "name", "type": "boolean", "format": format} descriptor.update(options) field = Field.from_descriptor(descriptor) - cell, notes = field.read_cell(source) + cell, _ = field.read_cell(source) assert cell == target + + schema_descriptor = {"fields": [{"name": "IsTrue", "type": "boolean"}]} + schema_descriptor["fields"][0].update(options) + schema = Schema.from_descriptor(schema_descriptor) + fields = schema.fields + cell, _ = fields[0].read_cell(source) + assert cell == target + + +@pytest.mark.parametrize( + "source, target, options", + [ + (True, True, {"trueValues": ["yes"], "example": "yes"}), + ("yes", True, {"trueValues": ["yes"], "example": "yes"}), + ("true", None, {"trueValues": ["yes"], "example": "yes"}), + (False, False, {"falseValues": ["no"], "example": "no"}), + ("no", False, {"falseValues": ["no"], "example": "no"}), + ("false", None, {"falseValues": ["no"], "example": "no"}), + ], +) +def test_boolean_from_schema_descriptor_with_valid_example_fix_issue_1610( + source, target, options +): + schema_descriptor = { + "$schema": "https://frictionlessdata.io/schemas/table-schema.json", + "fields": [{"name": "IsTrue", "type": "boolean"}], + } + schema_descriptor["fields"][0].update(options) + schema = Schema.from_descriptor(schema_descriptor) + fields = schema.fields + cell, _ = fields[0].read_cell(source) + assert cell == target + + +def test_boolean_from_schema_descriptor_with_invalid_example_fix_issue_1610(): + schema_descriptor = { + "$schema": "https://frictionlessdata.io/schemas/table-schema.json", + "fields": [ + { + "name": "IsTrue", + "type": "boolean", + "falseValues": ["no"], + "example": "invalid", + } + ], + } + with pytest.raises(FrictionlessException) as excinfo: + Schema.from_descriptor(schema_descriptor) + err = excinfo.value.error + assert isinstance(err, SchemaError) diff --git a/frictionless/schema/field.py b/frictionless/schema/field.py index 75995174e3..5b9f8c3eb5 100644 --- a/frictionless/schema/field.py +++ b/frictionless/schema/field.py @@ -50,7 +50,9 @@ class Field(Metadata): For example: "default","array" etc. """ - missing_values: List[str] = attrs.field(factory=settings.DEFAULT_MISSING_VALUES.copy) + missing_values: List[str] = attrs.field( + factory=settings.DEFAULT_MISSING_VALUES.copy + ) """ List of string values to be set as missing values in the field. If any of string in missing values is found in the field value then it is set as None. @@ -260,13 +262,23 @@ def metadata_validate(cls, descriptor: IDescriptor): # type: ignore if example: type = descriptor.get("type") Class = system.select_field_class(type) + field = Class( - name=descriptor.get("name", "example"), - format=descriptor.get("format", "default"), # type: ignore + name=descriptor.get("name"), # type: ignore + format=descriptor.get("format", "default"), ) + + if type == "boolean": + # 'example' value must be compared to customized 'trueValues' and 'falseValues' + if "trueValues" in descriptor.keys(): + field.true_values = descriptor["trueValues"] + if "falseValues" in descriptor.keys(): + field.false_values = descriptor["falseValues"] _, notes = field.read_cell(example) if notes is not None: - note = f'example value "{example}" for field "{field.name}" is not valid' + note = ( + f'example value "{example}" for field "{field.name}" is not valid' + ) yield errors.FieldError(note=note) # Misleading