diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index 51d38f6..3ebba60 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -1219,17 +1219,22 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame { types := make([]series.Type, len(headers)) rawcols := make([][]string, len(headers)) for i, colname := range headers { + t, useCustomType := cfg.types[colname] rawcol := make([]string, len(records)) for j := 0; j < len(records); j++ { rawcol[j] = records[j][i] + if useCustomType && t == series.String { + // skip the convertion when using custom string type + continue + } if findInStringSlice(rawcol[j], cfg.nanValues) != -1 { rawcol[j] = "NaN" } } rawcols[i] = rawcol - t, ok := cfg.types[colname] - if !ok { + // try to auto detect the data type + if !useCustomType { t = cfg.defaultType if cfg.detectTypes { if l, err := findType(rawcol); err == nil { diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 6cb0c2b..2ca8f70 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -1420,6 +1420,49 @@ Spain,2012-02-01,66,555.42,00241 } } +// test case for issue #169 +func TestReadCSV_Issue169(t *testing.T) { + // Load the data from a CSV string and try to infer the type of the + // columns, but NA won't be converted to NaN when data type is specified + // as string. + const ExampleData = ` +Country,Region,Date,Age,Amount,Id +"United States",NA,2012-02-01,50,112.1,01234 +"United States",US,2012-02-01,32,321.31,54320 +"United Kingdom",GB,2012-02-01,17,18.2,12345 +"United States",NA,2012-02-01,32,321.31,54320 +"United States","NA",2012-02-01,17,321.31,54320 +"United Kingdom",GB,2012-02-01,NA,18.2,12345 +"United States",NA,2012-02-01,32,321.31,54320 +Spain,EU,2012-02-01,66,555.42,00241 +` + + df := ReadCSV( + strings.NewReader(ExampleData), + WithTypes(map[string]series.Type{ + "Region": series.String, + "Age": series.String, + }), + ) + + if df.Err != nil { + t.Errorf("Expected success, got error: %v", df.Err) + } + + for _, v := range df.Col("Region").Records() { + if v == "NaN" { + t.Errorf("Expected not to convert NA to NaN, but it does") + } + } + + for _, v := range df.Col("Age").Records() { + if v == "NaN" { + t.Errorf("Expected not to convert NA to NaN, but it does") + } + } + +} + func TestReadJSON(t *testing.T) { table := []struct { jsonStr string