From 4dd98bba83cf6af1c666267d57a14330756d5506 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sat, 4 Feb 2023 16:34:46 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=E8=A1=A5=E5=85=85=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E5=88=97=E8=A1=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 3513ddf..bca3f62 100644 --- a/README.md +++ b/README.md @@ -13,21 +13,27 @@ golang版本的pandas ## 2. 功能/模块划分 ### 2.1 特性列表 -| 模块 | 一级功能 | 二级功能 | 进展情况 | -|:----------|:--------------|:----------|:-----------------------------------| -| dataframe | dataframe | new | [√] | -| dataframe | 类型约束 | string | [√] | -| dataframe | 类型约束 | bool | [√] | -| dataframe | 类型约束 | int64 | [√] | -| dataframe | 类型约束 | float64 | [√] | -| dataframe | 泛型类型 | 支持全部的基础类型 | [√] | -| dataframe | 泛型类型 | 自动检测类型 | [√]优先级:string > bool > float > int | -| series | series | new | [√] series的列元素类型和reflect.Kind保持一致 | -| series | 伪泛型 | 构建 | [√] 再新建series完成之后类型就确定了 | -| series | SeriesBool | bool类型 | [√] | -| series | SeriesString | string类型 | [√] | -| series | SeriesInt64 | int64类型 | [√] | -| series | SeriesFloat64 | float64类型 | [√] | +| 模块 | 一级功能 | 二级功能 | 进展情况 | +|:----------|:--------------|:-------------|:------------------------------------| +| dataframe | dataframe | new | [√] | +| dataframe | 类型约束 | string | [√] | +| dataframe | 类型约束 | bool | [√] | +| dataframe | 类型约束 | int64 | [√] | +| dataframe | 类型约束 | float64 | [√] | +| dataframe | 泛型类型 | 支持全部的基础类型 | [√] | +| dataframe | 泛型类型 | 自动检测类型 | [√] 优先级:string > bool > float > int | +| dataframe | align | series长度自动对齐 | [√] | +| dataframe | col | 选择 | [√] | +| dataframe | col | 新增1列 | [√] | +| dataframe | row | 删除多行 | [√] | +| dataframe | name | 改名, 支持单一列改名 | [√] | +| series | series | new | [√] series的列元素类型和reflect.Kind保持一致 | +| series | 伪泛型 | 构建 | [√] 再新建series完成之后类型就确定了 | +| series | SeriesBool | bool类型 | [√] | +| series | SeriesString | string类型 | [√] | +| series | SeriesInt64 | int64类型 | [√] | +| series | SeriesFloat64 | float64类型 | [√] | +|series | rolling | 支持序列化参数 | [√] | -- Gitee From 43ca1f0d62a2864d0c64c9b26b1bfc9fb4ad4cea Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sat, 4 Feb 2023 17:27:09 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E8=B0=83=E6=95=B4append=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E7=9A=84=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic_append.go | 17 +++++++++++++++++ series_generic.go | 16 ---------------- 2 files changed, 17 insertions(+), 16 deletions(-) create mode 100644 generic_append.go diff --git a/generic_append.go b/generic_append.go new file mode 100644 index 0000000..bace12f --- /dev/null +++ b/generic_append.go @@ -0,0 +1,17 @@ +package pandas + +func series_append(frame *NDFrame, idx, size int, v any) { + if frame.type_ == SERIES_TYPE_BOOL { + val := AnyToBool(v) + assign[bool](frame, idx, size, val) + } else if frame.type_ == SERIES_TYPE_INT { + val := AnyToInt64(v) + assign[int64](frame, idx, size, val) + } else if frame.type_ == SERIES_TYPE_FLOAT { + val := AnyToFloat64(v) + assign[float64](frame, idx, size, val) + } else { + val := AnyToString(v) + assign[string](frame, idx, size, val) + } +} diff --git a/series_generic.go b/series_generic.go index a95d79d..5dea5eb 100644 --- a/series_generic.go +++ b/series_generic.go @@ -98,19 +98,3 @@ func NewSeriesWithType(_type Type, name string, values ...interface{}) Series { return &frame } - -func series_append(frame *NDFrame, idx, size int, v any) { - if frame.type_ == SERIES_TYPE_BOOL { - val := AnyToBool(v) - assign[bool](frame, idx, size, val) - } else if frame.type_ == SERIES_TYPE_INT { - val := AnyToInt64(v) - assign[int64](frame, idx, size, val) - } else if frame.type_ == SERIES_TYPE_FLOAT { - val := AnyToFloat64(v) - assign[float64](frame, idx, size, val) - } else { - val := AnyToString(v) - assign[string](frame, idx, size, val) - } -} -- Gitee From d3314a04f058b7e0c79298fbe2ad1d3efeb3dec3 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sat, 4 Feb 2023 17:51:20 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E7=BB=9F=E4=B8=80=E6=96=B9=E6=B3=95?= =?UTF-8?q?=E5=AE=9A=E4=B9=89=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe.go | 36 +++++++++++++++---------------- dataframe_csv.go | 4 ++-- dataframe_select.go | 18 ++++++++-------- dataframe_subset.go | 30 ++++++++++++++++---------- dataframe_xstring.go | 28 ++++++++++++------------ series_generic.go | 51 ++++++++++++++++++++++---------------------- 6 files changed, 88 insertions(+), 79 deletions(-) diff --git a/dataframe.go b/dataframe.go index e2df4d5..784b507 100644 --- a/dataframe.go +++ b/dataframe.go @@ -55,23 +55,23 @@ func NewDataFrame(se ...Series) DataFrame { } // Dims retrieves the dimensions of a DataFrame. -func (df DataFrame) Dims() (int, int) { - return df.Nrow(), df.Ncol() +func (self DataFrame) Dims() (int, int) { + return self.Nrow(), self.Ncol() } // Nrow returns the number of rows on a DataFrame. -func (df DataFrame) Nrow() int { - return df.nrows +func (self DataFrame) Nrow() int { + return self.nrows } // Ncol returns the number of columns on a DataFrame. -func (df DataFrame) Ncol() int { - return df.ncols +func (self DataFrame) Ncol() int { + return self.ncols } // Returns error or nil if no error occured -func (df *DataFrame) Error() error { - return df.Err +func (self DataFrame) Error() error { + return self.Err } // 检查列的尺寸 @@ -100,23 +100,23 @@ func checkColumnsDimensions(se ...Series) (nrows, ncols int, err error) { } // Types returns the types of the columns on a DataFrame. -func (df DataFrame) Types() []string { - coltypes := make([]string, df.ncols) - for i, s := range df.columns { +func (self DataFrame) Types() []string { + coltypes := make([]string, self.ncols) + for i, s := range self.columns { coltypes[i] = s.Type().String() } return coltypes } // Records return the string record representation of a DataFrame. -func (df DataFrame) Records() [][]string { +func (self DataFrame) Records() [][]string { var records [][]string - records = append(records, df.Names()) - if df.ncols == 0 || df.nrows == 0 { + records = append(records, self.Names()) + if self.ncols == 0 || self.nrows == 0 { return records } var tRecords [][]string - for _, col := range df.columns { + for _, col := range self.columns { tRecords = append(tRecords, col.Records()) } records = append(records, transposeRecords(tRecords)...) @@ -127,9 +127,9 @@ func (df DataFrame) Records() [][]string { // ==================================== // Names returns the name of the columns on a DataFrame. -func (df DataFrame) Names() []string { - colnames := make([]string, df.ncols) - for i, s := range df.columns { +func (self DataFrame) Names() []string { + colnames := make([]string, self.ncols) + for i, s := range self.columns { colnames[i] = s.Name() } return colnames diff --git a/dataframe_csv.go b/dataframe_csv.go index ade134c..e8bc327 100644 --- a/dataframe_csv.go +++ b/dataframe_csv.go @@ -77,7 +77,7 @@ func WriteHeader(b bool) WriteOption { // WriteCSV writes the DataFrame to the given io.Writer as a CSV file. // 支持文件名和io两种方式写入数据 -func (df DataFrame) WriteCSV(out any, options ...WriteOption) error { +func (self DataFrame) WriteCSV(out any, options ...WriteOption) error { var ( writer io.Writer filename string @@ -111,7 +111,7 @@ func (df DataFrame) WriteCSV(out any, options ...WriteOption) error { option(&cfg) } - records := df.Records() + records := self.Records() if !cfg.writeHeader { records = records[1:] } diff --git a/dataframe_select.go b/dataframe_select.go index 05a7016..d88f8b0 100644 --- a/dataframe_select.go +++ b/dataframe_select.go @@ -4,34 +4,34 @@ import "fmt" // Col returns a copy of the Series with the given column name contained in the DataFrame. // 选取一列 -func (df DataFrame) Col(colname string) Series { - if df.Err != nil { +func (self DataFrame) Col(colname string) Series { + if self.Err != nil { return NewSeriesWithType(SERIES_TYPE_INVAILD, "") } // Check that colname exist on dataframe - idx := findInStringSlice(colname, df.Names()) + idx := findInStringSlice(colname, self.Names()) if idx < 0 { return NewSeriesWithType(SERIES_TYPE_INVAILD, "") } - return df.columns[idx].Copy() + return self.columns[idx].Copy() } // SetNames changes the column names of a DataFrame to the ones passed as an // argument. // 修改全部的列名 -func (df DataFrame) SetNames(colnames ...string) error { - if len(colnames) != df.ncols { +func (self DataFrame) SetNames(colnames ...string) error { + if len(colnames) != self.ncols { return fmt.Errorf("setting names: wrong dimensions") } for k, s := range colnames { - df.columns[k].Rename(s) + self.columns[k].Rename(s) } return nil } // SetName 修改一个series的名称 -func (df DataFrame) SetName(from string, to string) { - for _, s := range df.columns { +func (self DataFrame) SetName(from string, to string) { + for _, s := range self.columns { if s.Name() == from { s.Rename(to) } diff --git a/dataframe_subset.go b/dataframe_subset.go index db634bd..b2c7987 100644 --- a/dataframe_subset.go +++ b/dataframe_subset.go @@ -2,12 +2,12 @@ package pandas // Subset returns a subset of the rows of the original DataFrame based on the // Series subsetting indexes. -func (df DataFrame) Subset(start, end int) DataFrame { - if df.Err != nil { - return df +func (self DataFrame) Subset(start, end int) DataFrame { + if self.Err != nil { + return self } - columns := make([]Series, df.ncols) - for i, column := range df.columns { + columns := make([]Series, self.ncols) + for i, column := range self.columns { s := column.Subset(start, end) columns[i] = s } @@ -22,12 +22,20 @@ func (df DataFrame) Subset(start, end int) DataFrame { } } -// 选择一段记录 -func (df DataFrame) Select(p Range) DataFrame { - serieses := []Series{} - for i := range df.columns { - serieses = append(serieses, df.columns[i].Select(p)) +// Select 选择一段记录 +func (self DataFrame) Select(p Range) DataFrame { + columns := []Series{} + for i := range self.columns { + columns = append(columns, self.columns[i].Select(p)) + } + nrows, ncols, err := checkColumnsDimensions(columns...) + if err != nil { + return DataFrame{Err: err} + } + newDF := DataFrame{ + columns: columns, + ncols: ncols, + nrows: nrows, } - newDF := DataFrame{columns: serieses} return newDF } diff --git a/dataframe_xstring.go b/dataframe_xstring.go index bbcc5e4..be5a8f6 100644 --- a/dataframe_xstring.go +++ b/dataframe_xstring.go @@ -8,11 +8,11 @@ import ( ) // String implements the Stringer interface for DataFrame -func (df DataFrame) String() (str string) { - return df.print(true, true, true, true, 10, 70, "DataFrame") +func (self DataFrame) String() (str string) { + return self.print(true, true, true, true, 10, 70, "DataFrame") } -func (df DataFrame) print( +func (self DataFrame) print( shortRows, shortCols, showDims, showTypes bool, maxRows int, maxCharsTotal int, @@ -32,11 +32,11 @@ func (df DataFrame) print( return s } - if df.Err != nil { - str = fmt.Sprintf("%s error: %v", class, df.Err) + if self.Err != nil { + str = fmt.Sprintf("%s error: %v", class, self.Err) return } - nrows, ncols := df.Dims() + nrows, ncols := self.Dims() if nrows == 0 || ncols == 0 { str = fmt.Sprintf("Empty %s", class) return @@ -49,10 +49,10 @@ func (df DataFrame) print( shortening := false if shortRows && nrows > maxRows { shortening = true - df = df.Subset(0, maxRows) - records = df.Records() + self = self.Subset(0, maxRows) + records = self.Records() } else { - records = df.Records() + records = self.Records() } if showDims { @@ -60,7 +60,7 @@ func (df DataFrame) print( } // Add the row numbers - for i := 0; i < df.nrows+1; i++ { + for i := 0; i < self.nrows+1; i++ { add := "" if i != 0 { add = strconv.Itoa(i-1) + ":" @@ -74,7 +74,7 @@ func (df DataFrame) print( } records = append(records, dots) } - types := df.Types() + types := self.Types() typesrow := make([]string, ncols) for i := 0; i < ncols; i++ { typesrow[i] = fmt.Sprintf("<%v>", types[i]) @@ -85,9 +85,9 @@ func (df DataFrame) print( records = append(records, typesrow) } - maxChars := make([]int, df.ncols+1) + maxChars := make([]int, self.ncols+1) for i := 0; i < len(records); i++ { - for j := 0; j < df.ncols+1; j++ { + for j := 0; j < self.ncols+1; j++ { // Escape special characters records[i][j] = strconv.Quote(records[i][j]) records[i][j] = records[i][j][1 : len(records[i][j])-1] @@ -119,7 +119,7 @@ func (df DataFrame) print( for i := 0; i < len(records); i++ { // Add right padding to all elements records[i][0] = addLeftPadding(records[i][0], maxChars[0]+1) - for j := 1; j < df.ncols; j++ { + for j := 1; j < self.ncols; j++ { records[i][j] = addRightPadding(records[i][j], maxChars[j]) } records[i] = records[i][0:maxCols] diff --git a/series_generic.go b/series_generic.go index 5dea5eb..6441cab 100644 --- a/series_generic.go +++ b/series_generic.go @@ -70,31 +70,32 @@ func NewSeriesWithType(_type Type, name string, values ...interface{}) Series { } //series.Data = make([]float64, 0) // Warning: filled with 0.0 (not NaN) //size := len(series.values) - size := 0 - for idx, v := range values { - switch val := v.(type) { - case nil, int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64, bool, string: - // 基础类型 - series_append(&frame, idx, size, val) - default: - vv := reflect.ValueOf(val) - vk := vv.Kind() - switch vk { - //case reflect.Invalid: // {interface} nil - // series.assign(idx, size, Nil2Float64) - case reflect.Slice, reflect.Array: // 切片或数组 - for i := 0; i < vv.Len(); i++ { - tv := vv.Index(i).Interface() - //series.assign(idx, size, str) - series_append(&frame, idx, size, tv) - } - case reflect.Struct: // 忽略结构体 - continue - default: - series_append(&frame, idx, size, nil) - } - } - } + //size := 0 + //for idx, v := range values { + // switch val := v.(type) { + // case nil, int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64, bool, string: + // // 基础类型 + // series_append(&frame, idx, size, val) + // default: + // vv := reflect.ValueOf(val) + // vk := vv.Kind() + // switch vk { + // //case reflect.Invalid: // {interface} nil + // // series.assign(idx, size, Nil2Float64) + // case reflect.Slice, reflect.Array: // 切片或数组 + // for i := 0; i < vv.Len(); i++ { + // tv := vv.Index(i).Interface() + // //series.assign(idx, size, str) + // series_append(&frame, idx, size, tv) + // } + // case reflect.Struct: // 忽略结构体 + // continue + // default: + // series_append(&frame, idx, size, nil) + // } + // } + //} + frame.Append(values...) return &frame } -- Gitee From f127e8afb8719f1691836e3198f15f9be08ee7c2 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sat, 4 Feb 2023 17:52:52 +0800 Subject: [PATCH 4/4] =?UTF-8?q?#I6CB66=20=E5=AE=9E=E7=8E=B0dataframe/serie?= =?UTF-8?q?s=E5=88=A0=E9=99=A4=E8=AE=B0=E5=BD=95=E7=9A=84=E5=8A=9F?= =?UTF-8?q?=E8=83=BD,=20=E8=A1=8D=E7=94=9F=E5=87=BAappend=E6=89=B9?= =?UTF-8?q?=E9=87=8F=E5=A2=9E=E5=8A=A0=E8=AE=B0=E5=BD=95=E7=9A=84=E5=8A=9F?= =?UTF-8?q?=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe_remove.go | 26 ++++++++++++++++++++++ dataframe_remove_test.go | 30 +++++++++++++++++++++++++ generic_append.go | 47 +++++++++++++++++++++++++++++++++------- series.go | 2 ++ 4 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 dataframe_remove.go create mode 100644 dataframe_remove_test.go diff --git a/dataframe_remove.go b/dataframe_remove.go new file mode 100644 index 0000000..34f8a0d --- /dev/null +++ b/dataframe_remove.go @@ -0,0 +1,26 @@ +package pandas + +// Remove 删除一段范围内的记录 +func (self DataFrame) Remove(p Range) DataFrame { + rowLen := self.Nrow() + start, end, err := p.Limits(rowLen) + if err != nil { + return self + } + columns := []Series{} + for i := range self.columns { + ht := self.columns[i].Subset(0, start, true) + tail := self.columns[i].Subset(end+1, rowLen).Values() + ht.Append(tail) + columns = append(columns, ht) + } + nrows, ncols, err := checkColumnsDimensions(columns...) + if err != nil { + return DataFrame{Err: err} + } + return DataFrame{ + columns: columns, + ncols: ncols, + nrows: nrows, + } +} diff --git a/dataframe_remove_test.go b/dataframe_remove_test.go new file mode 100644 index 0000000..dac8997 --- /dev/null +++ b/dataframe_remove_test.go @@ -0,0 +1,30 @@ +package pandas + +import ( + "fmt" + "testing" +) + +func TestDataFrame_Remove(t *testing.T) { + type testStruct struct { + A string + B int + C bool + D float64 + } + data := []testStruct{ + {"a", 1, true, 0.0}, + {"b", 2, false, 0.5}, + } + df1 := LoadStructs(data) + fmt.Println(df1) + + // 增加1列 + s_e := GenericSeries[string]("x", "a0", "a1", "a2", "a3", "a4") + df2 := df1.Join(s_e) + fmt.Println(df2) + r := RangeFinite(3, 3) + df3 := df2.Remove(r) + fmt.Println(df3) + +} diff --git a/generic_append.go b/generic_append.go index bace12f..d12f32b 100644 --- a/generic_append.go +++ b/generic_append.go @@ -1,17 +1,48 @@ package pandas -func series_append(frame *NDFrame, idx, size int, v any) { - if frame.type_ == SERIES_TYPE_BOOL { +import "reflect" + +// 插入一条记录 +func (self *NDFrame) insert(idx, size int, v any) { + if self.type_ == SERIES_TYPE_BOOL { val := AnyToBool(v) - assign[bool](frame, idx, size, val) - } else if frame.type_ == SERIES_TYPE_INT { + assign[bool](self, idx, size, val) + } else if self.type_ == SERIES_TYPE_INT { val := AnyToInt64(v) - assign[int64](frame, idx, size, val) - } else if frame.type_ == SERIES_TYPE_FLOAT { + assign[int64](self, idx, size, val) + } else if self.type_ == SERIES_TYPE_FLOAT { val := AnyToFloat64(v) - assign[float64](frame, idx, size, val) + assign[float64](self, idx, size, val) } else { val := AnyToString(v) - assign[string](frame, idx, size, val) + assign[string](self, idx, size, val) + } +} + +// Append 批量增加记录 +func (self *NDFrame) Append(values ...interface{}) { + size := 0 + for idx, v := range values { + switch val := v.(type) { + case nil, int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64, bool, string: + // 基础类型 + self.insert(idx, size, val) + default: + vv := reflect.ValueOf(val) + vk := vv.Kind() + switch vk { + //case reflect.Invalid: // {interface} nil + // series.assign(idx, size, Nil2Float64) + case reflect.Slice, reflect.Array: // 切片或数组 + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + self.insert(idx, size, tv) + } + case reflect.Struct: // 忽略结构体 + continue + default: + self.insert(idx, size, nil) + } + } } } diff --git a/series.go b/series.go index 397d938..fcc57db 100644 --- a/series.go +++ b/series.go @@ -73,6 +73,8 @@ type Series interface { Min() any // Select 选取一段记录 Select(r Range) Series + // Append 增加一批记录 + Append(values ...interface{}) } // NewSeries 指定类型创建序列 -- Gitee