From ce3fe742c3eee53cbf8f9db269f72fa50ab4c0a5 Mon Sep 17 00:00:00 2001 From: Tim Jenness Date: Mon, 30 Sep 2024 14:07:25 -0700 Subject: [PATCH] Create a masked record array if any of the columns are masked --- python/lsst/daf/butler/formatters/parquet.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/python/lsst/daf/butler/formatters/parquet.py b/python/lsst/daf/butler/formatters/parquet.py index 6e8937a019..34d401961a 100644 --- a/python/lsst/daf/butler/formatters/parquet.py +++ b/python/lsst/daf/butler/formatters/parquet.py @@ -231,12 +231,14 @@ def arrow_to_numpy(arrow_table: pa.Table) -> np.ndarray: ------- array : `numpy.ndarray` (N,) Numpy array table with N rows and the same column names - as the input arrow table. + as the input arrow table. Will be masked records if any values + in the table are null. """ import numpy as np numpy_dict = arrow_to_numpy_dict(arrow_table) + has_mask = False dtype = [] for name, col in numpy_dict.items(): if len(shape := numpy_dict[name].shape) <= 1: @@ -244,8 +246,13 @@ def arrow_to_numpy(arrow_table: pa.Table) -> np.ndarray: else: dtype.append((name, (col.dtype, shape[1:]))) - array = np.rec.fromarrays(numpy_dict.values(), dtype=dtype) + if not has_mask and isinstance(col, np.ma.MaskedArray): + has_mask = True + if has_mask: + array = np.ma.mrecords.fromarrays(numpy_dict.values(), dtype=dtype) + else: + array = np.rec.fromarrays(numpy_dict.values(), dtype=dtype) return array