Skip to content

Commit

Permalink
Fix #1610 unexpected field-error related to a well-formatted value ex…
Browse files Browse the repository at this point in the history
…ample of a schema's boolean field using frictionless validate (#1615)

- fixes #1610

This PR fixes the `field-error` which occured doing `frictionless
validate data.csv --schema schema.json` with a `TableSchema` `schema`
containing a `BooleanField` customised with 'trueValues' or
'falseValues' and 'example' value , for example:
```
schema.json
----
{
  "$schema": "https://frictionlessdata.io/schemas/table-schema.json",
  "fields": [
    {
      "name": "IsTrue",
      "type": "boolean",
      "trueValues": ["yes"],
      "falseValues": ["no"],
      "example": "yes"
    }
  ]
}
```
```
data.csv
----
isTrue
yes
no
```

This PR add tests cases to ensure that example value is correctly
evaluated according to customized 'trueValues' or 'falseValues', as
expected.

---------

Co-authored-by: Pierre Camilleri <[email protected]>
  • Loading branch information
amelie-rondot and pierrecamilleri authored Aug 30, 2024
1 parent 7861f0e commit 8b17d59
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 6 deletions.
64 changes: 62 additions & 2 deletions frictionless/fields/__spec__/test_boolean.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import pytest

from frictionless import Field
from frictionless import Field, Schema
from frictionless.errors.metadata import SchemaError
from frictionless.exception import FrictionlessException

# General

Expand Down Expand Up @@ -32,12 +34,70 @@
("default", "3.14", None, {}),
("default", "", None, {}),
("default", "Yes", None, {"trueValues": ["yes"]}),
("default", "true", None, {"trueValues": ["yes"]}),
("default", "True", None, {"trueValues": ["yes"]}),
("default", "TRUE", None, {"trueValues": ["yes"]}),
("default", "1", None, {"trueValues": ["yes"]}),
("default", "No", None, {"falseValues": ["no"]}),
("default", "false", None, {"falseValues": ["no"]}),
("default", "False", None, {"falseValues": ["no"]}),
("default", "FALSE", None, {"falseValues": ["no"]}),
("default", "0", None, {"falseValues": ["no"]}),
],
)
def test_boolean_read_cell(format, source, target, options):
descriptor = {"name": "name", "type": "boolean", "format": format}
descriptor.update(options)
field = Field.from_descriptor(descriptor)
cell, notes = field.read_cell(source)
cell, _ = field.read_cell(source)
assert cell == target

schema_descriptor = {"fields": [{"name": "IsTrue", "type": "boolean"}]}
schema_descriptor["fields"][0].update(options)
schema = Schema.from_descriptor(schema_descriptor)
fields = schema.fields
cell, _ = fields[0].read_cell(source)
assert cell == target


@pytest.mark.parametrize(
"source, target, options",
[
(True, True, {"trueValues": ["yes"], "example": "yes"}),
("yes", True, {"trueValues": ["yes"], "example": "yes"}),
("true", None, {"trueValues": ["yes"], "example": "yes"}),
(False, False, {"falseValues": ["no"], "example": "no"}),
("no", False, {"falseValues": ["no"], "example": "no"}),
("false", None, {"falseValues": ["no"], "example": "no"}),
],
)
def test_boolean_from_schema_descriptor_with_valid_example_fix_issue_1610(
source, target, options
):
schema_descriptor = {
"$schema": "https://frictionlessdata.io/schemas/table-schema.json",
"fields": [{"name": "IsTrue", "type": "boolean"}],
}
schema_descriptor["fields"][0].update(options)
schema = Schema.from_descriptor(schema_descriptor)
fields = schema.fields
cell, _ = fields[0].read_cell(source)
assert cell == target


def test_boolean_from_schema_descriptor_with_invalid_example_fix_issue_1610():
schema_descriptor = {
"$schema": "https://frictionlessdata.io/schemas/table-schema.json",
"fields": [
{
"name": "IsTrue",
"type": "boolean",
"falseValues": ["no"],
"example": "invalid",
}
],
}
with pytest.raises(FrictionlessException) as excinfo:
Schema.from_descriptor(schema_descriptor)
err = excinfo.value.error
assert isinstance(err, SchemaError)
20 changes: 16 additions & 4 deletions frictionless/schema/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ class Field(Metadata):
For example: "default","array" etc.
"""

missing_values: List[str] = attrs.field(factory=settings.DEFAULT_MISSING_VALUES.copy)
missing_values: List[str] = attrs.field(
factory=settings.DEFAULT_MISSING_VALUES.copy
)
"""
List of string values to be set as missing values in the field. If any of string in missing values
is found in the field value then it is set as None.
Expand Down Expand Up @@ -260,13 +262,23 @@ def metadata_validate(cls, descriptor: IDescriptor): # type: ignore
if example:
type = descriptor.get("type")
Class = system.select_field_class(type)

field = Class(
name=descriptor.get("name", "example"),
format=descriptor.get("format", "default"), # type: ignore
name=descriptor.get("name"), # type: ignore
format=descriptor.get("format", "default"),
)

if type == "boolean":
# 'example' value must be compared to customized 'trueValues' and 'falseValues'
if "trueValues" in descriptor.keys():
field.true_values = descriptor["trueValues"]
if "falseValues" in descriptor.keys():
field.false_values = descriptor["falseValues"]
_, notes = field.read_cell(example)
if notes is not None:
note = f'example value "{example}" for field "{field.name}" is not valid'
note = (
f'example value "{example}" for field "{field.name}" is not valid'
)
yield errors.FieldError(note=note)

# Misleading
Expand Down

0 comments on commit 8b17d59

Please sign in to comment.