From 4d730c01e4dbd77e379bffa0e9e66d1b76adf35c Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 07:27:02 +0800 Subject: [PATCH 1/8] =?UTF-8?q?=E6=B5=8B=E8=AF=95=E4=BB=A3=E7=A0=81?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0golang=E7=9A=84struct=E7=BB=A7=E6=89=BF,=20?= =?UTF-8?q?=E6=8A=BD=E8=B1=A1=E7=9A=84=E5=AE=9E=E7=8E=B0=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/abstract.go | 77 ++++++++++++++++++++++++++++++++++++++++++ tests/abstract_test.go | 19 +++++++++++ 2 files changed, 96 insertions(+) create mode 100644 tests/abstract.go create mode 100644 tests/abstract_test.go diff --git a/tests/abstract.go b/tests/abstract.go new file mode 100644 index 0000000..9257efe --- /dev/null +++ b/tests/abstract.go @@ -0,0 +1,77 @@ +package tests + +import "fmt" + +type Person struct { +} + +func (this *Person) Eat() { + fmt.Println("Person Eat") +} + +func (this *Person) Run() { + fmt.Println("Person Run") +} + +func (this *Person) Sleep() { + fmt.Println("Person Sleep") +} + +type Man struct { + Person +} + +func (this *Man) Eat() { + // 类似于Java的 super.Eat() + this.Person.Eat() + fmt.Println("Man Eat") +} + +func (this *Man) Run() { + fmt.Println("Man Run") +} + +// 抽象的用法: 函数指针 + +type AbstractDog struct { + Sleep func() +} + +func (this *AbstractDog) Eat() { + fmt.Println("AbstractDog Eat") + this.Sleep() +} + +func (this *AbstractDog) Run() { + fmt.Println("AbstractDog Run") +} + +// Akita 秋田犬 +type Akita struct { + AbstractDog +} + +func NewAkita() *Akita { + ptr := &Akita{} + ptr.AbstractDog.Sleep = ptr.Sleep + return ptr +} + +func (this *Akita) Sleep() { + fmt.Println("Akita Sleep") +} + +// Labrador 拉布拉多犬 +type Labrador struct { + AbstractDog +} + +func NewLabrador() *Labrador { + ptr := &Labrador{} + ptr.AbstractDog.Sleep = ptr.Sleep + return ptr +} + +func (this *Labrador) Sleep() { + fmt.Println("Labrador Sleep") +} diff --git a/tests/abstract_test.go b/tests/abstract_test.go new file mode 100644 index 0000000..59c3cff --- /dev/null +++ b/tests/abstract_test.go @@ -0,0 +1,19 @@ +package tests + +import "testing" + +func TestAbstract(t *testing.T) { + m := &Man{} + m.Eat() + m.Run() + m.Sleep() + +} + +func TestAbstract2(t *testing.T) { + akita := NewAkita() + akita.Eat() + + labrador := NewLabrador() + labrador.Eat() +} -- Gitee From 861f6129dc36961f168244e35eaca613e377d543 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 12:38:30 +0800 Subject: [PATCH 2/8] =?UTF-8?q?=E4=BF=AE=E8=AE=A2any=E5=BC=BA=E5=88=B6?= =?UTF-8?q?=E8=BD=AC=E6=8D=A2string=E7=9A=84=E6=B5=8B=E8=AF=95=E7=94=A8?= =?UTF-8?q?=E4=BE=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- type_string_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/type_string_test.go b/type_string_test.go index facb879..562a791 100644 --- a/type_string_test.go +++ b/type_string_test.go @@ -22,16 +22,16 @@ func TestAnyToString(t *testing.T) { want: StringNaN, }, { - name: "test: float true", + name: "test: true true", args: args{ v: true, }, want: True2String, }, { - name: "test: float false", + name: "test: false false", args: args{ - v: true, + v: false, }, want: False2String, }, -- Gitee From c165dc0ad0a65340ac1dfa101ec1b85a25d9b74f Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 12:39:27 +0800 Subject: [PATCH 3/8] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=BA=9F=E5=BC=83?= =?UTF-8?q?=E7=9A=84=E6=BA=90=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series_frame.go | 131 ------------------------------------------------ 1 file changed, 131 deletions(-) delete mode 100644 series_frame.go diff --git a/series_frame.go b/series_frame.go deleted file mode 100644 index 013efda..0000000 --- a/series_frame.go +++ /dev/null @@ -1,131 +0,0 @@ -package pandas - -import ( - "github.com/huandu/go-clone" - "math" - "sync" -) - -type SeriesFrame struct { - valFormatter ValueToStringFormatter - lock sync.RWMutex - name string - nilCount int - //elements any -} - -func NewSeries(t Type, name string, vals ...interface{}) *Series { - var series Series - if t == SERIES_TYPE_BOOL { - series = NewSeriesBool(name, vals...) - } else if t == SERIES_TYPE_INT { - series = NewSeriesInt64(name, vals...) - } else if t == SERIES_TYPE_STRING { - series = NewSeriesString(name, vals...) - } else { - series = NewSeriesFloat64(name, vals...) - } - return &series -} - -func Shift[T ~int64 | ~float64 | ~bool | ~string](s *Series, periods int, cbNan func() T) *Series { - var d Series - d = clone.Clone(*s).(Series) - if periods == 0 { - return &d - } - - values := d.Values().([]T) - - var ( - naVals []T - dst []T - src []T - ) - - if shlen := int(math.Abs(float64(periods))); shlen < len(values) { - if periods > 0 { - naVals = values[:shlen] - dst = values[shlen:] - src = values - } else { - naVals = values[len(values)-shlen:] - dst = values[:len(values)-shlen] - src = values[shlen:] - } - - copy(dst, src) - } else { - naVals = values - } - for i := range naVals { - naVals[i] = cbNan() - } - _ = naVals - return &d -} - -func (self *SeriesFrame) Name() string { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Rename(n string) { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Type() Type { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Len() int { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Values() any { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Empty() Series { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Records() []string { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Subset(start, end int) *Series { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Repeat(x any, repeats int) *Series { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Shift(periods int) *Series { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Rolling(window int) RollingWindow { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) Mean() float64 { - //TODO implement me - panic("implement me") -} - -func (self *SeriesFrame) StdDev() float64 { - //TODO implement me - panic("implement me") -} -- Gitee From 0961c9367c5dce816bbec6110efdd2d1ed3719fa Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 12:40:45 +0800 Subject: [PATCH 4/8] =?UTF-8?q?repeat=E5=87=BD=E6=95=B0=E8=BF=81=E7=A7=BB?= =?UTF-8?q?=E5=88=B0generic=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- builtin.go | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/builtin.go b/builtin.go index 51fd11a..a996824 100644 --- a/builtin.go +++ b/builtin.go @@ -1,6 +1,7 @@ package pandas import ( + gc "github.com/huandu/go-clone" "math" "strings" ) @@ -37,11 +38,7 @@ func IsEmpty(s string) bool { } } -// Repeat 重复生成a -func Repeat[T SeriesGenericType](a T, n int) []T { - dst := make([]T, n) - for i := 0; i < n; i++ { - dst[i] = a - } - return dst +// Clone 克隆一个any +func clone(v any) any { + return gc.Clone(v) } -- Gitee From f2d97f88c6cf9e1a27ef145af42bd8c4fe37e516 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 12:41:33 +0800 Subject: [PATCH 5/8] =?UTF-8?q?=E8=B0=83=E6=95=B4series=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=E7=9A=84=E8=BF=94=E5=9B=9E=E5=80=BC,=20=E5=8E=BB=E6=8E=89?= =?UTF-8?q?=E6=8C=87=E9=92=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dataframe.go b/dataframe.go index 5f54868..ff61dbf 100644 --- a/dataframe.go +++ b/dataframe.go @@ -8,6 +8,7 @@ import ( "unicode/utf8" ) +// DataFrame 以gota的DataFrame的方法为主, 兼顾新流程, 避免单元格元素结构化 type DataFrame struct { columns []Series ncols int @@ -236,7 +237,7 @@ func (df DataFrame) Subset(start, end int) DataFrame { columns := make([]Series, df.ncols) for i, column := range df.columns { s := column.Subset(start, end) - columns[i] = *s + columns[i] = s } nrows, ncols, err := checkColumnsDimensions(columns...) if err != nil { -- Gitee From 2f83d8460e3143a6e6afd6c5bb6c42529a7d9f11 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 12:41:52 +0800 Subject: [PATCH 6/8] =?UTF-8?q?=E8=B0=83=E6=95=B4series=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=E7=9A=84=E8=BF=94=E5=9B=9E=E5=80=BC,=20=E5=8E=BB=E6=8E=89?= =?UTF-8?q?=E6=8C=87=E9=92=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dataframe_test.go b/dataframe_test.go index 1e4b1ce..3283c08 100644 --- a/dataframe_test.go +++ b/dataframe_test.go @@ -15,7 +15,7 @@ func TestDataFrameT0(t *testing.T) { t.Errorf("wrong val: expected: %v actual: %v", expected, s1.Len()) } s2 := s1.Shift(-2) - df := NewDataFrame(s1, *s2) + df := NewDataFrame(s1, s2) fmt.Println(df) _ = s2 -- Gitee From 0812e65b27828fc0b16d5c0a9b57b419ee007d9a Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 12:43:12 +0800 Subject: [PATCH 7/8] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=B3=9B=E5=9E=8B?= =?UTF-8?q?=E5=BA=8F=E5=88=97=E7=9A=84=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic.go | 289 ++++++++++++++++++++++++ series_frame_test.go => generic_test.go | 28 ++- series.go | 154 +++++++++++-- series_bool.go | 23 +- series_bool_test.go | 4 +- series_float64.go | 30 +-- series_float64_test.go | 4 +- series_int64.go | 23 +- series_int64_test.go | 10 +- series_rolling.go | 2 +- series_xstring.go | 23 +- 11 files changed, 512 insertions(+), 78 deletions(-) create mode 100644 generic.go rename series_frame_test.go => generic_test.go (55%) diff --git a/generic.go b/generic.go new file mode 100644 index 0000000..240a05d --- /dev/null +++ b/generic.go @@ -0,0 +1,289 @@ +package pandas + +import ( + "reflect" + "sync" +) + +// GenericType Series支持的所有类型 +type GenericType interface { + ~bool | ~int64 | ~float64 | ~string +} + +// NDFrame 这里本意是想做一个父类 +type NDFrame struct { + lock sync.RWMutex // 读写锁 + formatter StringFormatter // 字符串格式化工具 + name string // 帧名称 + type_ Type // values元素类型 + nilCount int // nil和nan的元素有多少, 这种统计在bool和int64类型中不会大于0, 只对float64及string有效 + rows int // 行数 + values any // 只能是一个一维slice, 在所有的运算中, values强制转换成float64切片 + +} + +//""" +//N-dimensional analogue of DataFrame. Store multi-dimensional in a +//size-mutable, labeled data structure +// +//Parameters +//---------- +//data : BlockManager +//axes : list +//copy : bool, default False +//""" + +func NewNDFrame[E GenericType](name string, rows ...E) *NDFrame { + frame := NDFrame{ + formatter: DefaultFormatter, + name: name, + type_: SERIES_TYPE_INVAILD, + nilCount: 0, + rows: 0, + //values: []E, + } + // TODO: 不知道rows是否存在全部为空的情况, 只能先创建一个空的slice + frame.values = make([]E, 0) // Warning: filled with 0.0 (not NaN) + // 这个地方可以放心的强制转换, E已经做了类型约束 + size := len(frame.values.([]E)) + for idx, v := range rows { + assign(&frame, idx, size, v) + } + + return &frame +} + +// 赋值 +func assign[T GenericType](frame *NDFrame, idx, size int, v T) { + // 检测类型 + if frame.type_ == SERIES_TYPE_INVAILD { + _type, _ := detectTypes(v) + if _type != SERIES_TYPE_INVAILD { + frame.type_ = _type + } + } + _vv := reflect.ValueOf(v) + _vi := _vv.Interface() + // float和string类型有可能是NaN, 对nil和NaN进行计数 + if frame.Type() == SERIES_TYPE_FLOAT && IsNaN(_vi.(float64)) { + frame.nilCount++ + } else if frame.Type() == SERIES_TYPE_STRING && StringIsNaN(_vi.(string)) { + frame.nilCount++ + // 以下修正string的NaN值, 统一为"NaN" + //_rv := reflect.ValueOf(StringNaN) + //_vv.Set(_rv) // 这样赋值会崩溃 + // TODO:值可修改条件之一: 可被寻址 + // 通过反射修改变量值的前提条件之一: 这个值必须可以被寻址, 简单地说就是这个变量必须能被修改. + // 第一步: 通过变量v反射(v的地址) + _vp := reflect.ValueOf(&v) + // 第二步: 取出v地址的元素(v的值) + _vv := _vp.Elem() + // 判断_vv是否能被修改 + if _vv.CanSet() { + // 修改v的值为新值 + _vv.SetString(StringNaN) + // 执行之后, 通过debug可以看到assign入参的v已经变成了"NaN" + } + } + // 确保只添加了1个元素 + if idx < size { + frame.values.([]T)[idx] = v + } else { + frame.values = append(frame.values.([]T), v) + } + // 行数+1 + frame.rows += 1 +} + +// Repeat 重复生成a +func Repeat[T GenericType](a T, n int) []T { + dst := make([]T, n) + for i := 0; i < n; i++ { + dst[i] = a + } + return dst +} + +func (self *NDFrame) Name() string { + return self.name +} + +func (self *NDFrame) Rename(n string) { + self.name = n +} + +func (self *NDFrame) Type() Type { + return self.type_ +} + +func (self *NDFrame) Len() int { + return self.rows +} + +func (self *NDFrame) Values() any { + return self.values +} + +func (self *NDFrame) Empty() Series { + var frame NDFrame + if self.type_ == SERIES_TYPE_STRING { + frame = NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: 0, + values: []string{}, + } + } else if self.type_ == SERIES_TYPE_BOOL { + frame = NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: 0, + values: []bool{}, + } + } else if self.type_ == SERIES_TYPE_INT { + frame = NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: 0, + values: []int64{}, + } + } else if self.type_ == SERIES_TYPE_FLOAT { + frame = NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: 0, + values: []float64{}, + } + } else { + panic("无法识别的类型") + } + return &frame +} + +func (self *NDFrame) apply(f func(idx int, v any)) { + vv := reflect.ValueOf(self.values) + vk := vv.Kind() + switch vk { + case reflect.Invalid: // {interface} nil + //series.assign(idx, size, Nil2Float) + case reflect.Slice: // 切片, 不定长 + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + f(i, tv) + } + case reflect.Array: // 数组, 定长 + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + f(i, tv) + } + default: + // 其它类型忽略 + } +} + +func (self *NDFrame) Records() []string { + ret := make([]string, self.Len()) + self.apply(func(idx int, v any) { + ret[idx] = AnyToString(v) + }) + return ret +} + +func (self *NDFrame) Subset(start, end int, opt ...any) Series { + // 默认不copy + var __optCopy bool = false + if len(opt) > 0 { + // 第一个参数为是否copy + if _cp, ok := opt[0].(bool); ok { + __optCopy = _cp + } + } + var vs any + var rows int + switch values := self.values.(type) { + case []bool: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]bool, 0) + _vs = append(_vs, subset...) + vs = _vs + } + case []string: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]string, 0) + _vs = append(_vs, subset...) + vs = _vs + } + case []int64: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]int64, 0) + _vs = append(_vs, subset...) + vs = _vs + } + case []float64: + subset := values[start:end] + rows = len(subset) + if !__optCopy { + vs = subset + } else { + _vs := make([]float64, 0) + _vs = append(_vs, subset...) + vs = _vs + } + } + frame := NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: rows, + values: vs, + } + var s Series + s = &frame + return s +} + +func (self *NDFrame) Repeat(x any, repeats int) Series { + //TODO implement me + panic("implement me") +} + +func (self *NDFrame) Shift(periods int) Series { + //TODO implement me + panic("implement me") +} + +func (self *NDFrame) Rolling(window int) RollingWindow { + //TODO implement me + panic("implement me") +} + +func (self *NDFrame) Mean() float64 { + //TODO implement me + panic("implement me") +} + +func (self *NDFrame) StdDev() float64 { + //TODO implement me + panic("implement me") +} diff --git a/series_frame_test.go b/generic_test.go similarity index 55% rename from series_frame_test.go rename to generic_test.go index 11ae574..f17286c 100644 --- a/series_frame_test.go +++ b/generic_test.go @@ -19,7 +19,7 @@ func TestNewSeriesFrame(t *testing.T) { tests := []struct { name string args args - want *SeriesFrame + want *NDFrame }{ // TODO: Add test cases. } @@ -41,4 +41,30 @@ func TestSeriesFrame(t *testing.T) { d1 = data s2 := NewSeries(SERIES_TYPE_FLOAT, "x", d1) fmt.Printf("%+v\n", s2) + + var s3 Series + s3 = NewSeriesBool("x", data) + fmt.Printf("%+v\n", s3.Values()) + + var s4 Series + ts4 := GenericSeries[float64]("x", data...) + s4 = *ts4 + fmt.Printf("%+v\n", s4.Values()) +} + +func TestNDFrameNew(t *testing.T) { + // float64 + d1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, NaN(), 12} + nd1 := NewNDFrame[float64]("x", d1...) + fmt.Println(nd1) + fmt.Println(nd1.Records()) + nd11 := nd1.Subset(1, 2, true) + fmt.Println(nd11.Records()) + + // string + d2 := []string{"1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "nan", "12"} + nd2 := NewNDFrame[string]("x", d2...) + fmt.Println(nd2) + fmt.Println(nd2.Records()) + fmt.Println(nd2.Empty()) } diff --git a/series.go b/series.go index 5cf4931..6942886 100644 --- a/series.go +++ b/series.go @@ -3,28 +3,27 @@ package pandas import ( "fmt" "github.com/google/go-cmp/cmp" + "math" + "reflect" ) // Type is a convenience alias that can be used for a more type safe way of // reason and use Series types. type Type = string -type SeriesGenericType interface { - ~bool | ~int64 | ~float64 | ~string -} - // Supported Series Types const ( - SERIES_TYPE_BOOL = "bool" - SERIES_TYPE_INT = "int" - SERIES_TYPE_FLOAT = "float" - SERIES_TYPE_STRING = "string" + SERIES_TYPE_INVAILD = "unknown" // 未知类型 + SERIES_TYPE_BOOL = "bool" // 布尔类型 + SERIES_TYPE_INT = "int" // int64 + SERIES_TYPE_FLOAT = "float" // float64 + SERIES_TYPE_STRING = "string" // string ) -// ValueToStringFormatter is used to convert a value +// StringFormatter is used to convert a value // into a string. Val can be nil or the concrete // type stored by the series. -type ValueToStringFormatter func(val interface{}) string +type StringFormatter func(val interface{}) string type Series interface { // Name 取得series名称 @@ -42,13 +41,15 @@ type Series interface { Empty() Series // Records returns the elements of a Series as a []string Records() []string + // Copy 复制 + //Copy() Series // Subset 获取子集 - Subset(start, end int) *Series + Subset(start, end int, opt ...any) Series // Repeat elements of an array. - Repeat(x any, repeats int) *Series + Repeat(x any, repeats int) Series // Shift index by desired number of periods with an optional time freq. - // 使用可选的时间频率按所需的周期数移动索引。 - Shift(periods int) *Series + // 使用可选的时间频率按所需的周期数移动索引. + Shift(periods int) Series // Rolling creates new RollingWindow Rolling(window int) RollingWindow // Mean calculates the average value of a series @@ -57,17 +58,138 @@ type Series interface { StdDev() float64 } +// NewSeries 指定类型创建序列 +func NewSeries(t Type, name string, vals ...interface{}) *Series { + var series Series + if t == SERIES_TYPE_BOOL { + series = NewSeriesBool(name, vals...) + } else if t == SERIES_TYPE_INT { + series = NewSeriesInt64(name, vals...) + } else if t == SERIES_TYPE_STRING { + series = NewSeriesString(name, vals...) + } else { + // 默认全部强制转换成float64 + series = NewSeriesFloat64(name, vals...) + } + return &series +} + +// 泛型方法, 构造序列, 比其它方式对类型的统一性要求更严格 +func GenericSeries[T GenericType](name string, values ...T) *Series { + // 第一遍, 确定类型, 找到第一个非nil的值 + var _type Type = SERIES_TYPE_STRING + for _, v := range values { + // 泛型处理这里会出现一个错误, invalid operation: v == nil (mismatched types T and untyped nil) + //if v == nil { + // continue + //} + vv := reflect.ValueOf(v) + vk := vv.Kind() + switch vk { + //case reflect.Invalid: // {interface} nil + // series.assign(idx, size, Nil2Float) + //case reflect.Slice: // 切片, 不定长 + // for i := 0; i < vv.Len(); i++ { + // tv := vv.Index(i).Interface() + // str := AnyToFloat64(tv) + // series.assign(idx, size, str) + // } + //case reflect.Array: // 数组, 定长 + // for i := 0; i < vv.Len(); i++ { + // tv := vv.Index(i).Interface() + // av := AnyToFloat64(tv) + // series.assign(idx, size, av) + // } + //case reflect.Struct: // 忽略结构体 + // continue + //default: + // vv := AnyToFloat64(val) + // series.assign(idx, size, vv) + case reflect.Bool: + _type = SERIES_TYPE_BOOL + case reflect.Int64: + _type = SERIES_TYPE_INT + case reflect.Float64: + _type = SERIES_TYPE_FLOAT + case reflect.String: + _type = SERIES_TYPE_STRING + default: + panic(fmt.Errorf("unknown type, %+v", v)) + } + break + } + return NewSeries(_type, name, values) +} + // DefaultIsEqualFunc is the default comparitor to determine if // two values in the series are the same. func DefaultIsEqualFunc(a, b interface{}) bool { return cmp.Equal(a, b) } -// DefaultValueFormatter will return a string representation +// DefaultFormatter will return a string representation // of the data in a particular row. -func DefaultValueFormatter(v interface{}) string { +func DefaultFormatter(v interface{}) string { if v == nil { return StringNaN } return fmt.Sprintf("%v", v) } + +func detectTypes[T GenericType](v T) (Type, any) { + var _type = SERIES_TYPE_STRING + vv := reflect.ValueOf(v) + vk := vv.Kind() + switch vk { + case reflect.Invalid: + _type = SERIES_TYPE_INVAILD + case reflect.Bool: + _type = SERIES_TYPE_BOOL + case reflect.Int64: + _type = SERIES_TYPE_INT + case reflect.Float64: + _type = SERIES_TYPE_FLOAT + case reflect.String: + _type = SERIES_TYPE_STRING + default: + panic(fmt.Errorf("unknown type, %+v", v)) + } + return _type, vv.Interface() +} + +// Shift series切片, 使用可选的时间频率按所需的周期数移动索引 +func Shift[T GenericType](s *Series, periods int, cbNan func() T) Series { + var d Series + d = clone(*s).(Series) + if periods == 0 { + return d + } + + values := d.Values().([]T) + + var ( + naVals []T + dst []T + src []T + ) + + if shlen := int(math.Abs(float64(periods))); shlen < len(values) { + if periods > 0 { + naVals = values[:shlen] + dst = values[shlen:] + src = values + } else { + naVals = values[len(values)-shlen:] + dst = values[:len(values)-shlen] + src = values[shlen:] + } + copy(dst, src) + } else { + naVals = values + } + for i := range naVals { + naVals[i] = cbNan() + } + _ = naVals + return d +} diff --git a/series_bool.go b/series_bool.go index a894516..77124ec 100644 --- a/series_bool.go +++ b/series_bool.go @@ -1,22 +1,21 @@ package pandas import ( - "github.com/huandu/go-clone" "reflect" ) type SeriesBool struct { - SeriesFrame + NDFrame Data []bool } // NewSeriesBool creates a new series with the underlying type as bool. func NewSeriesBool(name string, vals ...interface{}) *SeriesBool { series := SeriesBool{ - SeriesFrame: SeriesFrame{ - name: name, - nilCount: 0, - valFormatter: DefaultValueFormatter, + NDFrame: NDFrame{ + name: name, + nilCount: 0, + formatter: DefaultFormatter, }, Data: []bool{}, } @@ -119,23 +118,23 @@ func (self *SeriesBool) Records() []string { return ret } -func (self *SeriesBool) Subset(start, end int) *Series { +func (self *SeriesBool) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesBool(self.name, self.Data[start:end]) - return &d + return d } -func (self *SeriesBool) Repeat(x any, repeats int) *Series { +func (self *SeriesBool) Repeat(x any, repeats int) Series { a := AnyToFloat64(x) data := Repeat(a, repeats) var d Series d = NewSeriesBool(self.name, data) - return &d + return d } -func (self *SeriesBool) Shift(periods int) *Series { +func (self *SeriesBool) Shift(periods int) Series { var d Series - d = clone.Clone(self).(Series) + d = clone(self).(Series) return Shift[bool](&d, periods, func() bool { return BoolNaN }) diff --git a/series_bool_test.go b/series_bool_test.go index cab5832..3f6f9be 100644 --- a/series_bool_test.go +++ b/series_bool_test.go @@ -15,10 +15,10 @@ func TestNewSeriesBool(t *testing.T) { t.Errorf("wrong val: expected: %v actual: %v", expected, s1.Len()) } s2 := s1.Shift(2) - fmt.Println((*s2).Values()) + fmt.Println(s2.Values()) s3 := s1.Repeat(1, 2) - fmt.Println((*s3).Values()) + fmt.Println(s3.Values()) //s4 := NewSeriesBool("x", []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) //d4 := s4.Rolling(5).Mean() diff --git a/series_float64.go b/series_float64.go index ba7efe3..af1f71f 100644 --- a/series_float64.go +++ b/series_float64.go @@ -3,22 +3,22 @@ package pandas import ( "gitee.com/quant1x/pandas/algorithms" "gitee.com/quant1x/pandas/algorithms/avx2" - "github.com/huandu/go-clone" + //"github.com/huandu/go-clone" "gonum.org/v1/gonum/stat" "reflect" ) type SeriesFloat64 struct { - SeriesFrame + NDFrame Data []float64 } func NewSeriesFloat64(name string, vals ...interface{}) *SeriesFloat64 { series := SeriesFloat64{ - SeriesFrame: SeriesFrame{ - name: name, - nilCount: 0, - valFormatter: DefaultValueFormatter, + NDFrame: NDFrame{ + name: name, + nilCount: 0, + formatter: DefaultFormatter, }, Data: []float64{}, } @@ -109,9 +109,9 @@ func (self *SeriesFloat64) Len() int { return len(self.Data) } -func (self *SeriesFloat64) Shift(periods int) *Series { +func (self *SeriesFloat64) Shift(periods int) Series { var d Series - d = clone.Clone(self).(Series) + d = clone(self).(Series) return Shift[float64](&d, periods, func() float64 { return Nil2Float }) @@ -120,7 +120,7 @@ func (self *SeriesFloat64) Shift(periods int) *Series { // deprecated: 不推荐使用 func (self *SeriesFloat64) oldShift(periods int) *Series { var d Series - d = clone.Clone(self).(Series) + d = clone(self).(Series) if periods == 0 { return &d } @@ -160,13 +160,13 @@ func (self *SeriesFloat64) Values() any { return self.Data } -func (self *SeriesFloat64) Repeat(x any, repeats int) *Series { +func (self *SeriesFloat64) Repeat(x any, repeats int) Series { a := AnyToFloat64(x) - //data := avx2.Repeat(a, repeats) - data := Repeat(a, repeats) + data := avx2.Repeat(a, repeats) + //data := Repeat(a, repeats) var d Series d = NewSeriesFloat64(self.name, data) - return &d + return d } // Empty returns an empty Series of the same type @@ -185,10 +185,10 @@ func (self *SeriesFloat64) Records() []string { return ret } -func (self *SeriesFloat64) Subset(start, end int) *Series { +func (self *SeriesFloat64) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesFloat64(self.name, self.Data[start:end]) - return &d + return d } // Rolling creates new RollingWindow diff --git a/series_float64_test.go b/series_float64_test.go index 3e31743..75ecdfd 100644 --- a/series_float64_test.go +++ b/series_float64_test.go @@ -15,10 +15,10 @@ func TestNewSeriesFloat64(t *testing.T) { t.Errorf("wrong val: expected: %v actual: %v", expected, s1.Len()) } s2 := s1.Shift(2) - fmt.Println((*s2).Values()) + fmt.Println(s2.Values()) s3 := s1.Repeat(1, 2) - fmt.Println((*s3).Values()) + fmt.Println(s3.Values()) s4 := NewSeriesFloat64("x", []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) d4 := s4.Rolling(5).Mean() diff --git a/series_int64.go b/series_int64.go index f1f80ea..e4a8390 100644 --- a/series_int64.go +++ b/series_int64.go @@ -1,23 +1,22 @@ package pandas import ( - "github.com/huandu/go-clone" "gonum.org/v1/gonum/stat" "reflect" ) type SeriesInt64 struct { - SeriesFrame + NDFrame Data []int64 } // NewSeriesInt64 creates a new series with the underlying type as int64. func NewSeriesInt64(name string, vals ...interface{}) *SeriesInt64 { series := SeriesInt64{ - SeriesFrame: SeriesFrame{ - name: name, - nilCount: 0, - valFormatter: DefaultValueFormatter, + NDFrame: NDFrame{ + name: name, + nilCount: 0, + formatter: DefaultFormatter, }, Data: []int64{}, } @@ -100,9 +99,9 @@ func (self *SeriesInt64) Type() Type { return SERIES_TYPE_INT } -func (self *SeriesInt64) Shift(periods int) *Series { +func (self *SeriesInt64) Shift(periods int) Series { var d Series - d = clone.Clone(self).(Series) + d = clone(self).(Series) return Shift[int64](&d, periods, func() int64 { return IntNaN }) @@ -118,12 +117,12 @@ func (self *SeriesInt64) Values() any { return self.Data } -func (self *SeriesInt64) Repeat(x any, repeats int) *Series { +func (self *SeriesInt64) Repeat(x any, repeats int) Series { a := AnyToFloat64(x) data := Repeat(a, repeats) var d Series d = NewSeriesInt64(self.name, data) - return &d + return d } func (self *SeriesInt64) Rolling(window int) RollingWindow { @@ -145,10 +144,10 @@ func (self *SeriesInt64) Records() []string { return ret } -func (self *SeriesInt64) Subset(start, end int) *Series { +func (self *SeriesInt64) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesInt64(self.name, self.Data[start:end]) - return &d + return d } func (self *SeriesInt64) Mean() float64 { diff --git a/series_int64_test.go b/series_int64_test.go index 8d75fa3..031090c 100644 --- a/series_int64_test.go +++ b/series_int64_test.go @@ -15,12 +15,12 @@ func TestNewSeriesInt64(t *testing.T) { t.Errorf("wrong val: expected: %v actual: %v", expected, s1.Len()) } s2 := s1.Shift(2) - fmt.Println((*s2).Values()) + fmt.Println(s2.Values()) s3 := s1.Repeat(1, 2) - fmt.Println((*s3).Values()) + fmt.Println(s3.Values()) - s4 := NewSeriesInt64("x", []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) - d4 := s4.Rolling(5).Mean() - fmt.Printf("d4 = %+v\n", d4.Values()) + //s4 := NewSeriesInt64("x", []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}) + //d4 := s4.Rolling(5).Mean() + //fmt.Printf("d4 = %+v\n", d4.Values()) } diff --git a/series_rolling.go b/series_rolling.go index 70c449f..7109c00 100644 --- a/series_rolling.go +++ b/series_rolling.go @@ -15,7 +15,7 @@ func (r RollingWindow) getBlocks() (blocks []Series) { start := i - r.window end := i - blocks = append(blocks, *r.series.Subset(start, end)) + blocks = append(blocks, r.series.Subset(start, end)) } return diff --git a/series_xstring.go b/series_xstring.go index e51c959..59f2bb8 100644 --- a/series_xstring.go +++ b/series_xstring.go @@ -1,23 +1,22 @@ package pandas import ( - "github.com/huandu/go-clone" "reflect" ) // SeriesString 字符串类型序列 type SeriesString struct { - SeriesFrame + NDFrame Data []string } // NewSeriesString creates a new series with the underlying type as string. func NewSeriesString(name string, vals ...interface{}) *SeriesString { series := SeriesString{ - SeriesFrame: SeriesFrame{ - name: name, - nilCount: 0, - valFormatter: DefaultValueFormatter, + NDFrame: NDFrame{ + name: name, + nilCount: 0, + formatter: DefaultFormatter, }, Data: []string{}, } @@ -124,23 +123,23 @@ func (self *SeriesString) Records() []string { return ret } -func (self *SeriesString) Subset(start, end int) *Series { +func (self *SeriesString) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesString(self.name, self.Data[start:end]) - return &d + return d } -func (self *SeriesString) Repeat(x any, repeats int) *Series { +func (self *SeriesString) Repeat(x any, repeats int) Series { a := AnyToFloat64(x) data := Repeat(a, repeats) var d Series d = NewSeriesString(self.name, data) - return &d + return d } -func (self *SeriesString) Shift(periods int) *Series { +func (self *SeriesString) Shift(periods int) Series { var d Series - d = clone.Clone(self).(Series) + d = clone(self).(Series) return Shift[string](&d, periods, func() string { return Nil2String }) -- Gitee From 410e90cf1c3ab5dbeee2863d1c2ac4572c0ef32a Mon Sep 17 00:00:00 2001 From: wangfeng Date: Wed, 1 Feb 2023 12:53:11 +0800 Subject: [PATCH 8/8] =?UTF-8?q?=E5=A2=9E=E5=8A=A0copy=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic.go | 5 +++++ series.go | 2 +- series_bool.go | 5 +++++ series_bool_test.go | 2 ++ series_float64.go | 5 +++++ series_int64.go | 15 ++++++++++----- series_xstring.go | 5 +++++ 7 files changed, 33 insertions(+), 6 deletions(-) diff --git a/generic.go b/generic.go index 240a05d..5edd9d5 100644 --- a/generic.go +++ b/generic.go @@ -197,6 +197,11 @@ func (self *NDFrame) Records() []string { return ret } +func (self *NDFrame) Copy() Series { + vlen := self.Len() + return self.Subset(0, vlen) +} + func (self *NDFrame) Subset(start, end int, opt ...any) Series { // 默认不copy var __optCopy bool = false diff --git a/series.go b/series.go index 6942886..279efa8 100644 --- a/series.go +++ b/series.go @@ -42,7 +42,7 @@ type Series interface { // Records returns the elements of a Series as a []string Records() []string // Copy 复制 - //Copy() Series + Copy() Series // Subset 获取子集 Subset(start, end int, opt ...any) Series // Repeat elements of an array. diff --git a/series_bool.go b/series_bool.go index 77124ec..89cd783 100644 --- a/series_bool.go +++ b/series_bool.go @@ -118,6 +118,11 @@ func (self *SeriesBool) Records() []string { return ret } +func (self *SeriesBool) Copy() Series { + vlen := self.Len() + return self.Subset(0, vlen) +} + func (self *SeriesBool) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesBool(self.name, self.Data[start:end]) diff --git a/series_bool_test.go b/series_bool_test.go index 3f6f9be..09b83f6 100644 --- a/series_bool_test.go +++ b/series_bool_test.go @@ -14,6 +14,8 @@ func TestNewSeriesBool(t *testing.T) { if s1.Len() != expected { t.Errorf("wrong val: expected: %v actual: %v", expected, s1.Len()) } + s11 := s1.Copy() + fmt.Println(s11.Values()) s2 := s1.Shift(2) fmt.Println(s2.Values()) diff --git a/series_float64.go b/series_float64.go index af1f71f..90c18ec 100644 --- a/series_float64.go +++ b/series_float64.go @@ -185,6 +185,11 @@ func (self *SeriesFloat64) Records() []string { return ret } +func (self *SeriesFloat64) Copy() Series { + vlen := self.Len() + return self.Subset(0, vlen) +} + func (self *SeriesFloat64) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesFloat64(self.name, self.Data[start:end]) diff --git a/series_int64.go b/series_int64.go index e4a8390..eb05719 100644 --- a/series_int64.go +++ b/series_int64.go @@ -125,11 +125,6 @@ func (self *SeriesInt64) Repeat(x any, repeats int) Series { return d } -func (self *SeriesInt64) Rolling(window int) RollingWindow { - //TODO implement me - panic("implement me") -} - func (self *SeriesInt64) Empty() Series { return NewSeriesInt64(self.name, []int64{}) } @@ -144,12 +139,22 @@ func (self *SeriesInt64) Records() []string { return ret } +func (self *SeriesInt64) Copy() Series { + vlen := self.Len() + return self.Subset(0, vlen) +} + func (self *SeriesInt64) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesInt64(self.name, self.Data[start:end]) return d } +func (self *SeriesInt64) Rolling(window int) RollingWindow { + //TODO implement me + panic("implement me") +} + func (self *SeriesInt64) Mean() float64 { if self.Len() < 1 { return NaN() diff --git a/series_xstring.go b/series_xstring.go index 59f2bb8..c149cef 100644 --- a/series_xstring.go +++ b/series_xstring.go @@ -123,6 +123,11 @@ func (self *SeriesString) Records() []string { return ret } +func (self *SeriesString) Copy() Series { + vlen := self.Len() + return self.Subset(0, vlen) +} + func (self *SeriesString) Subset(start, end int, opt ...any) Series { var d Series d = NewSeriesString(self.name, self.Data[start:end]) -- Gitee