Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Issue #169 - NA is converted to NaN #175

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions dataframe/dataframe.go
Original file line number Diff line number Diff line change
Expand Up @@ -1219,17 +1219,22 @@ func LoadRecords(records [][]string, options ...LoadOption) DataFrame {
types := make([]series.Type, len(headers))
rawcols := make([][]string, len(headers))
for i, colname := range headers {
t, useCustomType := cfg.types[colname]
rawcol := make([]string, len(records))
for j := 0; j < len(records); j++ {
rawcol[j] = records[j][i]
if useCustomType && t == series.String {
// skip the convertion when using custom string type
continue
}
if findInStringSlice(rawcol[j], cfg.nanValues) != -1 {
rawcol[j] = "NaN"
}
}
rawcols[i] = rawcol

t, ok := cfg.types[colname]
if !ok {
// try to auto detect the data type
if !useCustomType {
t = cfg.defaultType
if cfg.detectTypes {
if l, err := findType(rawcol); err == nil {
Expand Down
43 changes: 43 additions & 0 deletions dataframe/dataframe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1420,6 +1420,49 @@ Spain,2012-02-01,66,555.42,00241
}
}

// test case for issue #169
func TestReadCSV_Issue169(t *testing.T) {
// Load the data from a CSV string and try to infer the type of the
// columns, but NA won't be converted to NaN when data type is specified
// as string.
const ExampleData = `
Country,Region,Date,Age,Amount,Id
"United States",NA,2012-02-01,50,112.1,01234
"United States",US,2012-02-01,32,321.31,54320
"United Kingdom",GB,2012-02-01,17,18.2,12345
"United States",NA,2012-02-01,32,321.31,54320
"United States","NA",2012-02-01,17,321.31,54320
"United Kingdom",GB,2012-02-01,NA,18.2,12345
"United States",NA,2012-02-01,32,321.31,54320
Spain,EU,2012-02-01,66,555.42,00241
`

df := ReadCSV(
strings.NewReader(ExampleData),
WithTypes(map[string]series.Type{
"Region": series.String,
"Age": series.String,
}),
)

if df.Err != nil {
t.Errorf("Expected success, got error: %v", df.Err)
}

for _, v := range df.Col("Region").Records() {
if v == "NaN" {
t.Errorf("Expected not to convert NA to NaN, but it does")
}
}

for _, v := range df.Col("Age").Records() {
if v == "NaN" {
t.Errorf("Expected not to convert NA to NaN, but it does")
}
}

}

func TestReadJSON(t *testing.T) {
table := []struct {
jsonStr string
Expand Down