Skip to content

Commit

Permalink
Fix bug in string column handling
Browse files Browse the repository at this point in the history
  • Loading branch information
jeromekelleher committed Feb 7, 2024
1 parent e89e3fd commit 2638cc3
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 5 deletions.
9 changes: 5 additions & 4 deletions sgkit/io/vcf/vcf_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,11 @@ def smallest_dtype(self):
ret = "bool"
else:
assert self.vcf_type == "String"
if s.max_number == 0:
ret = "str"
else:
ret = "O"
ret = "str"
# if s.max_number == 0:
# ret = "str"
# else:
# ret = "O"
# print("smallest dtype", self.name, self.vcf_type,":", ret)
return ret

Expand Down
4 changes: 3 additions & 1 deletion sgkit/tests/io/vcf/test_vcf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,10 +1856,12 @@ def test_compare_vcf_to_zarr_convert(shared_datadir, tmp_path, vcf_name):
# input for
convert_vcf([vcf_path], zarr2_path)
ds2 = load_dataset(zarr2_path)
vcf_to_zarr(vcf_path, zarr1_path, max_alt_alleles=ds2.variant_allele.shape[1] - 1)
vcf_to_zarr(vcf_path, zarr1_path, mixed_ploidy=True, max_alt_alleles=ds2.variant_allele.shape[1] - 1)
ds1 = load_dataset(zarr1_path)

# convert reads all variables by default.
base_vars = list(ds1)
ds2 = load_dataset(zarr2_path)
# print(ds1.call_genotype.values)
# print(ds2.call_genotype.values)
xr.testing.assert_equal(ds1, ds2[base_vars])

0 comments on commit 2638cc3

Please sign in to comment.