Skip to content

Commit

Permalink
Respect csv.field_size_limit
Browse files Browse the repository at this point in the history
  • Loading branch information
MKuranowski committed Feb 19, 2024
1 parent eaa2c4e commit 126a3a7
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 4 deletions.
25 changes: 22 additions & 3 deletions aiocsv/_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ typedef struct {
/// C-friendly representation of the csv dialect.
Dialect dialect;

/// Limit for the field size
long field_size_limit;

/// Zero-based line number of the current position, which is equivalent to
/// a one-based line number of the last-encountered line.
unsigned int line_num;
Expand Down Expand Up @@ -313,6 +316,20 @@ static PyObject* Parser_new(PyObject* module, PyObject* args, PyObject* kwargs)
Py_INCREF(reader);
self->reader = reader;

PyObject* field_size_limit_obj =
PyObject_CallObject(module_get_state(module)->csv_field_size_limit, NULL);
if (!field_size_limit_obj) {
Py_DECREF(self);
return NULL;
}

self->field_size_limit = PyLong_AsLong(field_size_limit_obj);
Py_DECREF(field_size_limit_obj);
if (PyErr_Occurred()) {
Py_DECREF(self);
return NULL;
}

self->current_read = NULL;
self->record_so_far = NULL;
self->field_so_far = NULL;
Expand All @@ -331,9 +348,11 @@ static PyObject* Parser_new(PyObject* module, PyObject* args, PyObject* kwargs)
}

static int Parser_add_char(Parser* self, Py_UCS4 c) {
// TODO: Check against csv.field_size_limit

if (self->field_so_far_len >= self->field_so_far_capacity) {
if (self->field_so_far_len == self->field_size_limit) {
PyObject* err = module_get_state(self->module)->csv_error;
PyErr_Format(err, "field larger than field limit (%ld)", self->field_size_limit);
return 0;
} else if (self->field_so_far_len >= self->field_so_far_capacity) {
Py_ssize_t new_capacity =
self->field_so_far_capacity ? self->field_so_far_capacity * 2 : 4096;
Py_UCS4* new_buffer = self->field_so_far;
Expand Down
4 changes: 3 additions & 1 deletion aiocsv/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def __init__(self, reader: WithAsyncRead, dialect: DialectLike) -> None:
self.state = ParserState.START_RECORD
self.record_so_far: list[str] = []
self.field_so_far: list[str] = []
self.field_limit: int = csv.field_size_limit()
self.field_was_numeric: bool = False
self.last_char_was_cr: bool = False

Expand Down Expand Up @@ -231,7 +232,8 @@ def process_char_in_eat_newline(self, c: str) -> Decision:
return Decision.DONE if c == "\n" else Decision.DONE_WITHOUT_CONSUMING

def add_char(self, c: str) -> None:
# TODO: Check against field_limit
if len(self.field_so_far) == self.field_limit:
raise csv.Error(f"field larger than field limit ({self.field_limit})")
self.field_so_far.append(c)

def save_field(self) -> None:
Expand Down
16 changes: 16 additions & 0 deletions tests/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,3 +247,19 @@ async def test_parsing_line_num(parser: Type[Parser]):

assert csv_result == expected_result
assert custom_result == expected_result


@pytest.mark.asyncio
@pytest.mark.parametrize("parser", PARSERS, ids=PARSER_NAMES)
async def test_parsing_field_size_limit(parser: Type[Parser]):
csv.field_size_limit(64)

data = "a" * 65 + "\r\n"

csv_parser = csv.reader(io.StringIO(data, newline=""), strict=True)

with pytest.raises(csv.Error, match=r"field larger than field limit \(64\)"):
list(csv_parser)

with pytest.raises(csv.Error, match=r"field larger than field limit \(64\)"):
[r async for r in parser(AsyncStringIO(data), csv_parser.dialect)]

0 comments on commit 126a3a7

Please sign in to comment.