diff --git a/dataframe/dataframe.go b/dataframe/dataframe.go index fbfa7af..cc40ca2 100644 --- a/dataframe/dataframe.go +++ b/dataframe/dataframe.go @@ -477,6 +477,34 @@ func (df DataFrame) Concat(dfb DataFrame) DataFrame { return New(expandedSeries...) } +// Insert will add new dataframe to an existing DataFrame at a given position. +func (df DataFrame) Insert(dfb DataFrame, pos int) DataFrame { + if df.Err != nil { + return df + } + if dfb.Err != nil { + return dfb + } + expandedSeries := make([]series.Series, df.ncols) + for k, v := range df.Names() { + idx := findInStringSlice(v, dfb.Names()) + if idx == -1 { + return DataFrame{Err: fmt.Errorf("insert: column names are not compatible")} + } + + originalSeries := df.columns[k] + addedSeries := dfb.columns[idx] + + originalSeries.Insert(addedSeries, pos) + if err := originalSeries.Err; err != nil { + return DataFrame{Err: fmt.Errorf("insert: %v", err)} + } + + expandedSeries[k] = originalSeries + } + return New(expandedSeries...) +} + // Mutate changes a column of the DataFrame with the given Series or adds it as // a new column if the column name does not exist. func (df DataFrame) Mutate(s series.Series) DataFrame { @@ -1198,7 +1226,7 @@ func ReadCSV(r io.Reader, options ...LoadOption) DataFrame { // resulting records. func ReadJSON(r io.Reader, options ...LoadOption) DataFrame { var m []map[string]interface{} - d:=json.NewDecoder(r) + d := json.NewDecoder(r) d.UseNumber() err := d.Decode(&m) if err != nil { diff --git a/dataframe/dataframe_test.go b/dataframe/dataframe_test.go index 0c1f748..a4738dd 100644 --- a/dataframe/dataframe_test.go +++ b/dataframe/dataframe_test.go @@ -849,7 +849,7 @@ func TestDataFrame_Filter_And(t *testing.T) { expDf DataFrame }{ { - []F{{"COL.2", series.GreaterEq, 4}}, + []F{{Colname: "COL.2", Comparator: series.GreaterEq, Comparando: 4}}, New( series.New([]string{"b", "c", "d"}, series.String, "COL.1"), series.New([]int{4, 5, 4}, series.Int, "COL.2"), @@ -859,8 +859,8 @@ func TestDataFrame_Filter_And(t *testing.T) { // should not have any rows { []F{ - {"COL.2", series.Greater, 4}, - {"COL.2", series.Eq, 1}, + {Colname: "COL.2", Comparator: series.Greater, Comparando: 4}, + {Colname: "COL.2", Comparator: series.Eq, Comparando: 1}, }, New( series.New([]string{}, series.String, "COL.1"), @@ -870,8 +870,8 @@ func TestDataFrame_Filter_And(t *testing.T) { }, { []F{ - {"COL.2", series.Less, 4}, - {"COL.1", series.Eq, "b"}, + {Colname: "COL.2", Comparator: series.Less, Comparando: 4}, + {Colname: "COL.1", Comparator: series.Eq, Comparando: "b"}, }, New( series.New([]string{"b"}, series.String, "COL.1"), @@ -2835,3 +2835,88 @@ func TestDescribe(t *testing.T) { } } } + +func TestDataFrame_Insert(t *testing.T) { + tests := []struct { + desc string + df DataFrame + value DataFrame + pos int + expected DataFrame + }{ + { + "TestDataFrame_Insert:0: DataframeString.Insert(DataframeString) & pos=end of Series", + LoadRecords( + [][]string{ + {"A", "C", "D"}, + {"1", "5.1", "true"}, + {"NaN", "6.0", "true"}, + {"2", "6.0", "false"}, + }, + ), + LoadRecords( + [][]string{ + {"A", "C", "D"}, + {"2", "7.1", "false"}, + }, + ), + -1, + LoadRecords( + [][]string{ + {"A", "C", "D"}, + {"1", "5.1", "true"}, + {"NaN", "6.0", "true"}, + {"2", "6.0", "false"}, + {"2", "7.1", "false"}, + }, + ), + }, + { + "TestDataFrame_Insert:1: DataFrameString.Insert(DataFrameString) & pos=0", + LoadRecords( + [][]string{ + {"A", "C", "D"}, + {"1", "5.1", "true"}, + {"NaN", "6.0", "true"}, + {"2", "6.0", "false"}, + }, + ), + LoadRecords( + [][]string{ + {"A", "C", "D"}, + {"2", "7.1", "false"}, + }, + ), + 0, + LoadRecords( + [][]string{ + {"A", "C", "D"}, + {"2", "7.1", "false"}, + {"1", "5.1", "true"}, + {"NaN", "6.0", "true"}, + {"2", "6.0", "false"}, + }, + ), + }, + } + + for i, test := range tests { + actual := test.df.Insert(test.value, test.pos) + + if test.df.Err != nil { + t.Errorf("Test: %d\nError:%v", i, test.df.Err) + } + // Check that the types are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Types(), actual.Types()) { + t.Errorf("Test: %d\nDifferent types:\nexpected:%v\nactual:%v", i, test.expected.Types(), actual.Types()) + } + // Check that the colnames are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Names(), actual.Names()) { + t.Errorf("Test: %d\nDifferent colnames:\nexpected:%v\nactual:%v", i, test.expected.Names(), actual.Names()) + } + // Check that the values are the same between both DataFrames + if !reflect.DeepEqual(test.expected.Records(), actual.Records()) { + t.Errorf("Test: %d: Different values:\nexpected:%v\nactual:%v", i, test.expected, actual) + } + } +} diff --git a/series/series.go b/series/series.go index f345a90..1e444b9 100644 --- a/series/series.go +++ b/series/series.go @@ -258,6 +258,41 @@ func (s *Series) Append(values interface{}) { } } +// Insert adds new elements to the nth position of the Series provided by 'pos' parameter +// e.g. pos = 2 implies: +// 1. Insert after 2 elements of Series +// 2. Or 0th and 1st elements of the Series stay as is +// When using Insert, the Series is modified in place. +func (s *Series) Insert(values interface{}, pos int) { + if pos > s.elements.Len() { + s.Err = fmt.Errorf("pos (=%v) cannot be greater than length of the series (=%v)", pos, s.elements.Len()) + return + } + if pos == -1 { + pos = s.elements.Len() + } + + if err := s.Err; err != nil { + return + } + news := New(values, s.t, s.Name) + + switch s.t { + case String: + // the following won't work in some cases: + // s.elements = append(append(s.elements.(stringElements)[:pos], news.elements.(stringElements)...), s.elements.(stringElements)[pos:]...) + // it may cause mutation of s.elements during inner append resulting in undesired output + s.elements = append(s.elements.(stringElements)[:pos], append(news.elements.(stringElements), s.elements.(stringElements)[pos:]...)...) + case Int: + s.elements = append(s.elements.(intElements)[:pos], append(news.elements.(intElements), s.elements.(intElements)[pos:]...)...) + case Float: + s.elements = append(s.elements.(floatElements)[:pos], append(news.elements.(floatElements), s.elements.(floatElements)[pos:]...)...) + case Bool: + s.elements = append(s.elements.(boolElements)[:pos], append(news.elements.(boolElements), s.elements.(boolElements)[pos:]...)...) + } + return +} + // Concat concatenates two series together. It will return a new Series with the // combined elements of both Series. func (s Series) Concat(x Series) Series { diff --git a/series/series_test.go b/series/series_test.go index 54f1bea..588144b 100644 --- a/series/series_test.go +++ b/series/series_test.go @@ -1731,3 +1731,65 @@ func TestSeries_Map(t *testing.T) { } } } + +func TestSeries_Insert(t *testing.T) { + + tests := []struct { + desc string + series Series + value interface{} + pos int + expected string + }{ + { + "TestSeries_Insert:0: SeriesString.Insert([]String) & pos=end of Series", + Strings([]string{"1", "2", "3", "a", "b", "c"}), + []string{"1", "2", "3", "a", "b", "c"}, + 6, + "[1 2 3 a b c 1 2 3 a b c]", + }, + { + "TestSeries_Insert:1: SeriesString.Insert([]String) & pos=2 of Series i.e. after 2 elements of Series", + Strings([]string{"1", "2", "3", "a", "b", "c"}), + []string{"1", "2", "3", "a", "b", "c"}, + 2, + "[1 2 1 2 3 a b c 3 a b c]", + }, + { + "TestSeries_Insert:2: SeriesInt.Insert([]Int) & pos=3 of Series i.e. after 3 elements of Series", + Ints([]int{1, 2, 3, 6, 7}), + []int{4, 5}, + 3, + "[1 2 3 4 5 6 7]", + }, + { + "TestSeries_Insert:3: SeriesFloat.Insert([]Float) & pos=3 of Series i.e. after 3 elements of Series", + Floats([]float64{1.0, 2.0, 3.0, 6.0, 7.0}), + []float64{4, 5}, + 3, + "[1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000]", + }, + { + "TestSeries_Insert:4: SeriesBool.Insert([]Bool) & pos=-1", + Bools([]bool{true, true}), + []bool{false, false}, + -1, + "[true true false false]", + }, + { + "TestSeries_Insert_ERROR:5: SeriesBool.Insert([]Bool) & pos > length of series", + Bools([]bool{true, true}), + []bool{false, false}, + 3, + "pos (=3) cannot be greater than length of the series (=2)", + }, + } + + for testnum, test := range tests { + test.series.Insert(test.value, test.pos) + + if fmt.Sprint(test.series) != test.expected && fmt.Sprint(test.series.Err) != test.expected { + t.Errorf("Test:%v failed. %v \n expected=%v \t actualValue=%v \t actualError=%v", testnum, test.desc, test.expected, test.series, test.series.Err) + } + } +}