From 1562ae808c1de90fad9546f5204ab6f875e01742 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 13:29:28 +0800 Subject: [PATCH 01/14] =?UTF-8?q?=E8=B0=83=E6=95=B4scope=20limit=E7=9A=84p?= =?UTF-8?q?ackage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series.go | 2 +- scope_limit.go => stat/scope_limit.go | 2 +- scope_limit_test.go => stat/scope_limit_test.go | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename scope_limit.go => stat/scope_limit.go (99%) rename scope_limit_test.go => stat/scope_limit_test.go (98%) diff --git a/series.go b/series.go index 0d026c4..d37599f 100644 --- a/series.go +++ b/series.go @@ -84,7 +84,7 @@ type Series interface { // Min 找出最小值 Min() any // Select 选取一段记录 - Select(r ScopeLimit) Series + Select(r stat.ScopeLimit) Series // Append 增加一批记录 Append(values ...any) // Apply 接受一个回调函数 diff --git a/scope_limit.go b/stat/scope_limit.go similarity index 99% rename from scope_limit.go rename to stat/scope_limit.go index 917dad0..4e6cca6 100644 --- a/scope_limit.go +++ b/stat/scope_limit.go @@ -1,6 +1,6 @@ // Copyright 2018-20 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved. -package pandas +package stat import ( "errors" diff --git a/scope_limit_test.go b/stat/scope_limit_test.go similarity index 98% rename from scope_limit_test.go rename to stat/scope_limit_test.go index 1e2e5a1..04d2b9f 100644 --- a/scope_limit_test.go +++ b/stat/scope_limit_test.go @@ -1,6 +1,6 @@ // Copyright 2018-20 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved. -package pandas +package stat import ( "testing" -- Gitee From a363c1552fc35c1da40e9206ef5cdb3b3406384f Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 13:29:49 +0800 Subject: [PATCH 02/14] =?UTF-8?q?=E8=B0=83=E6=95=B4scope=20limit=E7=9A=84p?= =?UTF-8?q?ackage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe_remove.go | 4 +++- dataframe_remove_test.go | 3 ++- dataframe_subset.go | 4 +++- generic_range.go | 3 ++- generic_test.go | 3 ++- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/dataframe_remove.go b/dataframe_remove.go index 095d6f4..107c5af 100644 --- a/dataframe_remove.go +++ b/dataframe_remove.go @@ -1,7 +1,9 @@ package pandas +import "gitee.com/quant1x/pandas/stat" + // Remove 删除一段范围内的记录 -func (self DataFrame) Remove(p ScopeLimit) DataFrame { +func (self DataFrame) Remove(p stat.ScopeLimit) DataFrame { rowLen := self.Nrow() start, end, err := p.Limits(rowLen) if err != nil { diff --git a/dataframe_remove_test.go b/dataframe_remove_test.go index dac8997..222f364 100644 --- a/dataframe_remove_test.go +++ b/dataframe_remove_test.go @@ -2,6 +2,7 @@ package pandas import ( "fmt" + "gitee.com/quant1x/pandas/stat" "testing" ) @@ -23,7 +24,7 @@ func TestDataFrame_Remove(t *testing.T) { s_e := GenericSeries[string]("x", "a0", "a1", "a2", "a3", "a4") df2 := df1.Join(s_e) fmt.Println(df2) - r := RangeFinite(3, 3) + r := stat.RangeFinite(3, 3) df3 := df2.Remove(r) fmt.Println(df3) diff --git a/dataframe_subset.go b/dataframe_subset.go index 276b90b..6008932 100644 --- a/dataframe_subset.go +++ b/dataframe_subset.go @@ -1,5 +1,7 @@ package pandas +import "gitee.com/quant1x/pandas/stat" + // Subset returns a subset of the rows of the original DataFrame based on the // Series subsetting indexes. func (self DataFrame) Subset(start, end int) DataFrame { @@ -23,7 +25,7 @@ func (self DataFrame) Subset(start, end int) DataFrame { } // Select 选择一段记录 -func (self DataFrame) SelectRows(p ScopeLimit) DataFrame { +func (self DataFrame) SelectRows(p stat.ScopeLimit) DataFrame { columns := []Series{} for i := range self.columns { columns = append(columns, self.columns[i].Select(p)) diff --git a/generic_range.go b/generic_range.go index bad5040..118b970 100644 --- a/generic_range.go +++ b/generic_range.go @@ -1,6 +1,7 @@ package pandas import ( + "gitee.com/quant1x/pandas/stat" gc "github.com/huandu/go-clone" "reflect" ) @@ -115,7 +116,7 @@ func (self *NDFrame) oldSubset(start, end int, opt ...any) Series { } // Select 选取一段记录 -func (self *NDFrame) Select(r ScopeLimit) Series { +func (self *NDFrame) Select(r stat.ScopeLimit) Series { start, end, err := r.Limits(self.Len()) if err != nil { return nil diff --git a/generic_test.go b/generic_test.go index 381e43a..a916f19 100644 --- a/generic_test.go +++ b/generic_test.go @@ -2,6 +2,7 @@ package pandas import ( "fmt" + "gitee.com/quant1x/pandas/stat" "testing" ) @@ -34,7 +35,7 @@ func TestNDFrameNew(t *testing.T) { nd1 := NewNDFrame[float64]("x", d1...) fmt.Println(nd1) - r := RangeFinite(-1) + r := stat.RangeFinite(-1) ndr1 := nd1.Select(r) fmt.Println(ndr1.Values()) -- Gitee From 5b46f17db9ccdfd00dfefa3a4972434619286fae Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 19:21:33 +0800 Subject: [PATCH 03/14] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=88=86=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/{frame_type.go => ndarray_type.go} | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) rename stat/{frame_type.go => ndarray_type.go} (93%) diff --git a/stat/frame_type.go b/stat/ndarray_type.go similarity index 93% rename from stat/frame_type.go rename to stat/ndarray_type.go index cbf7246..e0af290 100644 --- a/stat/frame_type.go +++ b/stat/ndarray_type.go @@ -106,11 +106,15 @@ func checkoutRawType(frame any) reflect.Kind { if pos < 0 { return reflect.Invalid } - strType = strings.TrimSpace(strType[:pos]) - if len(strType) < 1 { - return reflect.Invalid + rawType := strings.TrimSpace(strType[:pos]) + // 如果是0, 这个应该是个slice + if len(rawType) < 1 { + rawType = strings.TrimSpace(strType[pos+1:]) + if len(rawType) < 1 { + return reflect.Invalid + } } - if t, ok := mapKind[strType]; ok { + if t, ok := mapKind[rawType]; ok { return t } return reflect.Invalid -- Gitee From 4a03dd9b114eb5a9419ac1b914f5607dc63ee989 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 21:44:00 +0800 Subject: [PATCH 04/14] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=88=86=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- builtin_test.go | 3 +- dataframe_csv_test.go | 2 +- dataframe_indexes.go | 2 +- generic.go | 2 +- generic_number.go | 224 ------------------------------------------ generic_ref.go | 2 +- series_int64.go | 4 +- slice_float32.go | 67 ------------- slice_float64.go | 49 --------- 9 files changed, 8 insertions(+), 347 deletions(-) delete mode 100644 generic_number.go delete mode 100644 slice_float32.go delete mode 100644 slice_float64.go diff --git a/builtin_test.go b/builtin_test.go index 543cb06..6c20850 100644 --- a/builtin_test.go +++ b/builtin_test.go @@ -2,6 +2,7 @@ package pandas import ( "fmt" + "gitee.com/quant1x/pandas/stat" "testing" ) @@ -80,7 +81,7 @@ func TestPoint(t *testing.T) { p1 = &a fmt.Printf("*int = nil, result=%v\n", isPoint(p1)) - var p2 *BigFloat + var p2 *stat.BigFloat fmt.Printf("*BigFloat = nil, result=%v\n", isPoint(p2)) } diff --git a/dataframe_csv_test.go b/dataframe_csv_test.go index b6faa2f..745f39f 100644 --- a/dataframe_csv_test.go +++ b/dataframe_csv_test.go @@ -34,7 +34,7 @@ Spain,2012-02-01,66,555.42,00241,1.23 // //closes := df.Col("d") //ma5 := closes.RollingV1(5).Mean() - //dframe.NewSeries(closes, dframe.Float, "") + //dframe.NewSeries(closes, dframe.Floats, "") //fmt.Println(ma5) d := df.Col("d") fmt.Println(d) diff --git a/dataframe_indexes.go b/dataframe_indexes.go index 160d4f0..6030cdf 100644 --- a/dataframe_indexes.go +++ b/dataframe_indexes.go @@ -45,7 +45,7 @@ func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, // //} // switch s.Type() { // case SERIES_TYPE_INT64: - // return s.AsInt() + // return s.Ints() // case series.Bool: // bools, err := s.Bool() // if err != nil { diff --git a/generic.go b/generic.go index d801ab4..8d23552 100644 --- a/generic.go +++ b/generic.go @@ -152,7 +152,7 @@ func (self *NDFrame) NaN() any { } func (self *NDFrame) Float() []float32 { - return ToFloat32(self) + return stat.SliceToFloat32(self.values) } // DTypes 计算以这个函数为主 diff --git a/generic_number.go b/generic_number.go deleted file mode 100644 index b44187e..0000000 --- a/generic_number.go +++ /dev/null @@ -1,224 +0,0 @@ -package pandas - -import ( - "gitee.com/quant1x/pandas/stat" - "math/big" -) - -type BigFloat = big.Float // 预留将来可能扩展float - -type Number8 interface { - ~int8 | ~uint8 -} - -type Number16 interface { - ~int16 | ~uint16 -} - -type Number32 interface { - ~int32 | ~uint32 | float32 -} - -type Number64 interface { - ~int64 | ~uint64 | float64 | int | uint -} - -// NumberOfCPUBitsRelated The number of CPU bits is related -type NumberOfCPUBitsRelated interface { - ~int | ~uint | ~uintptr -} - -type Integer interface { - Number8 | Number16 | Number32 | Number64 -} - -// Number int和uint的长度取决于CPU是多少位 -type Number interface { - Integer | Float -} - -//type Number interface { -// constraints.Float | constraints.Integer -//} - -// Signed is a constraint that permits any signed integer type. -// If future releases of Go add new predeclared signed integer types, -// this constraint will be modified to include them. -type Signed interface { - ~int | ~int8 | ~int16 | ~int32 | ~int64 -} - -// Unsigned is a constraint that permits any unsigned integer type. -// If future releases of Go add new predeclared unsigned integer types, -// this constraint will be modified to include them. -// TODO:~uintptr应该是没有应用场景 -type Unsigned interface { - ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr -} - -// Integer_old Integer is a constraint that permits any integer type. -// If future releases of Go add new predeclared integer types, -// this constraint will be modified to include them. -type Integer_old interface { - Signed | Unsigned -} - -// Float is a constraint that permits any floating-point type. -// If future releases of Go add new predeclared floating-point types, -// this constraint will be modified to include them. -type Float interface { - ~float32 | ~float64 -} - -// Complex is a constraint that permits any complex numeric type. -// If future releases of Go add new predeclared complex numeric types, -// this constraint will be modified to include them. -type Complex interface { - ~complex64 | ~complex128 -} - -// Ordered is a constraint that permits any ordered type: any type -// that supports the operators < <= >= >. -// If future releases of Go add new ordered types, -// this constraint will be modified to include them. -type Ordered interface { - Integer | Float | ~string -} - -//const ( -// True2Float32 float32 = float32(1) // true转float32 -// False2Float32 float32 = float32(0) // false转float32 -// StringTrue2Float32 float32 = float32(1) // 字符串true转float32 -// StringFalse2Float32 float32 = float32(0) // 字符串false转float32 -//) - -// Mean gonum.org/v1/gonum/stat不支持整型, 每次都要转换有点难受啊 -func Mean[T Number](x []T) float64 { - d := numberToFloat64(x) - s := stat.Mean(d) - return float64(s) -} - -// any转number -func valueToNumber[T Number](v any, nil2t T, bool2t func(b bool) T, string2t func(s string, v any) T) T { - switch val := v.(type) { - case nil: // 这个地方判断nil值 - return nil2t - case int8: - return T(val) - case uint8: - return T(val) - case int16: - return T(val) - case uint16: - return T(val) - case int32: - return T(val) - case uint32: - return T(val) - case int64: - return T(val) - case uint64: - return T(val) - case int: - return T(val) - case uint: - return T(val) - case float32: - return T(val) - case float64: - return T(val) - case bool: - return bool2t(val) - case string: - return string2t(val, v) - } - return T(0) -} - -// 指针转number -func pointToNumber[T Number](v any, nil2t T, bool2t func(b bool) T, string2t func(s string, v any) T) T { - switch val := v.(type) { - case *int8: - if val == nil { - return nil2t - } - return T(*val) - case *uint8: - if val == nil { - return nil2t - } - return T(*val) - case *int16: - if val == nil { - return nil2t - } - return T(*val) - case *uint16: - if val == nil { - return nil2t - } - return T(*val) - case *int32: - if val == nil { - return nil2t - } - return T(*val) - case *uint32: - if val == nil { - return nil2t - } - return T(*val) - case *int64: - if val == nil { - return nil2t - } - return T(*val) - case *uint64: - if val == nil { - return nil2t - } - return T(*val) - case *int: - if val == nil { - return nil2t - } - return T(*val) - case *uint: - if val == nil { - return nil2t - } - return T(*val) - case *float32: - if val == nil { - return nil2t - } - return T(*val) - case *float64: - if val == nil { - return nil2t - } - return T(*val) - case *bool: - if val == nil { - return nil2t - } - return bool2t(*val) - case *string: - if val == nil { - return nil2t - } - return string2t(*val, v) - } - return T(0) -} - -//func anyToNumber(v any) int { -// switch val := v.(type) { -// case nil, int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64, bool, string: -// // 基础类型 -// series_append(&frame, idx, size, val) -// default: -// } -// return 0 -//} diff --git a/generic_ref.go b/generic_ref.go index ec87433..b631f2d 100644 --- a/generic_ref.go +++ b/generic_ref.go @@ -14,7 +14,7 @@ func (self *NDFrame) Ref(param any) (s Series) { N = stat.Align(v, Nil2Float32, self.Len()) case Series: vs := v.Values() - N = SliceToFloat32(vs) + N = stat.SliceToFloat32(vs) N = stat.Align(N, Nil2Float32, self.Len()) default: panic(exception.New(1, "error window")) diff --git a/series_int64.go b/series_int64.go index 1a10cab..2c2f2e3 100644 --- a/series_int64.go +++ b/series_int64.go @@ -162,8 +162,8 @@ func (self *SeriesInt64) Mean() float64 { if self.Len() < 1 { return NaN() } - stdDev := Mean(self.Data) - return stdDev + stdDev := stat.Mean(self.Data) + return float64(stdDev) } func (self *SeriesInt64) StdDev() float64 { diff --git a/slice_float32.go b/slice_float32.go deleted file mode 100644 index b9d1bb0..0000000 --- a/slice_float32.go +++ /dev/null @@ -1,67 +0,0 @@ -package pandas - -import "gitee.com/quant1x/pandas/stat" - -func slice_any_to_float32[T Number](s []T) []float32 { - count := len(s) - if count == 0 { - return []float32{} - } - d := make([]float32, count) - for idx, iv := range s { - // 强制转换 - d[idx] = float32(iv) - } - return d -} - -// SliceToFloat32 any输入只能是一维slice或者数组 -func SliceToFloat32(v any) []float32 { - var vs []float32 - switch values := v.(type) { - case []int8: - return slice_any_to_float32(values) - case []uint8: - return slice_any_to_float32(values) - case []int16: - return slice_any_to_float32(values) - case []uint16: - return slice_any_to_float32(values) - case []int32: - return slice_any_to_float32(values) - case []uint32: - return slice_any_to_float32(values) - case []int64: - return slice_any_to_float32(values) - case []uint64: - return slice_any_to_float32(values) - case []int: - return slice_any_to_float32(values) - case []uint: - return slice_any_to_float32(values) - case []float32: - // TODO:直接返回会不会有问题 - return values - case []float64: - return slice_any_to_float32(values) - case []bool: - count := len(values) - if count == 0 { - return []float32{} - } - vs = make([]float32, count) - for idx, iv := range values { - vs[idx] = boolToFloat32(iv) - } - case []string: - count := len(values) - if count == 0 { - return []float32{} - } - vs = make([]float32, count) - for idx, iv := range values { - vs[idx] = float32(stat.AnyToFloat64(iv)) - } - } - return []float32{} -} diff --git a/slice_float64.go b/slice_float64.go deleted file mode 100644 index 5b277a0..0000000 --- a/slice_float64.go +++ /dev/null @@ -1,49 +0,0 @@ -package pandas - -import "gitee.com/quant1x/pandas/stat" - -func slice_any_to_float64[T Number](s []T) []float64 { - count := len(s) - if count == 0 { - return []float64{} - } - d := make([]float64, count) - for idx, iv := range s { - d[idx] = float64(iv) - } - return d -} - -// any输入只能是一维slice或者数组 -func numberToFloat64(v any) []float64 { - var vs []float64 - switch values := v.(type) { - case []float64: - return values - case []int64: - return slice_any_to_float64(values) - case []int32: - return slice_any_to_float64(values) - case []int: - return slice_any_to_float64(values) - case []bool: - count := len(values) - if count == 0 { - return []float64{} - } - vs = make([]float64, count) - for idx, iv := range values { - vs[idx] = stat.AnyToFloat64(iv) - } - case []string: - count := len(values) - if count == 0 { - return []float64{} - } - vs = make([]float64, count) - for idx, iv := range values { - vs[idx] = stat.AnyToFloat64(iv) - } - } - return vs -} -- Gitee From 0672c2507025480a6aafcc6792360050ff245b28 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 21:45:07 +0800 Subject: [PATCH 05/14] =?UTF-8?q?=E6=89=A9=E5=85=85=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- formula/ema.go | 4 +-- formula/ref.go | 2 +- formula/sma.go | 6 ++-- num/Adder.go | 2 +- num/array.go | 2 +- num/array_test.go | 2 +- num/compare.go | 2 +- num/equal.go | 2 +- stat/diff.go | 27 ++++++++++++++++++ stat/diff_test.go | 6 ++++ stat/errors.go | 10 ++++++- stat/fillna.go | 8 +++--- stat/max.go | 61 +++++++++++++++++++++++++-------------- stat/maximum.go | 3 +- stat/mean.go | 39 +++++++++++++++++++++++++ stat/min.go | 41 +++++++++++++++++++++++++- stat/shift.go | 35 ++++++++++++++++++++++- stat/stddev.go | 4 +-- stat/type.go | 7 ++++- stat/type_bool.go | 7 +++++ stat/type_int32.go | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 21 files changed, 298 insertions(+), 43 deletions(-) create mode 100644 stat/type_int32.go diff --git a/formula/ema.go b/formula/ema.go index 0b844a3..8fae77a 100644 --- a/formula/ema.go +++ b/formula/ema.go @@ -43,7 +43,7 @@ func EMA_v2(S pandas.Series, N any) any { X = float32(v) case pandas.Series: vs := v.Values() - fs := pandas.SliceToFloat32(vs) + fs := stat.SliceToFloat32(vs) X = fs[len(fs)-1] default: panic(exception.New(1, "error window")) @@ -60,7 +60,7 @@ func EMA_v0(S pandas.Series, N any) any { X = float32(v) case pandas.Series: vs := v.Values() - fs := pandas.SliceToFloat32(vs) + fs := stat.SliceToFloat32(vs) X = fs[len(fs)-1] default: panic(exception.New(1, "error window")) diff --git a/formula/ref.go b/formula/ref.go index ff6fed8..3e201c2 100644 --- a/formula/ref.go +++ b/formula/ref.go @@ -14,7 +14,7 @@ func REF(S pandas.Series, N any) any { X = stat.Repeat[float32](float32(v), S.Len()) case pandas.Series: vs := v.Values() - X = pandas.SliceToFloat32(vs) + X = stat.SliceToFloat32(vs) X = stat.Align(X, pandas.Nil2Float32, S.Len()) default: panic(exception.New(1, "error window")) diff --git a/formula/sma.go b/formula/sma.go index a836711..ea982e0 100644 --- a/formula/sma.go +++ b/formula/sma.go @@ -17,7 +17,7 @@ func SMA(S pandas.Series, N any, M int) any { X = float32(v) case pandas.Series: vs := v.Values() - fs := pandas.SliceToFloat32(vs) + fs := stat.SliceToFloat32(vs) X = fs[len(fs)-1] default: panic(exception.New(1, "error window")) @@ -37,7 +37,7 @@ func SMA_v5(S pandas.Series, N any, M int) any { X = stat.Repeat[float32](float32(v), S.Len()) case pandas.Series: vs := v.Values() - X = pandas.SliceToFloat32(vs) + X = stat.SliceToFloat32(vs) X = stat.Align(X, pandas.Nil2Float32, S.Len()) default: panic(exception.New(1, "error window")) @@ -64,7 +64,7 @@ func SMA_v4(S pandas.Series, N any, M int) any { X = stat.Repeat[float32](float32(v), S.Len()) case pandas.Series: vs := v.Values() - X = pandas.SliceToFloat32(vs) + X = stat.SliceToFloat32(vs) X = stat.Align(X, pandas.Nil2Float32, S.Len()) default: panic(exception.New(1, "error window")) diff --git a/num/Adder.go b/num/Adder.go index 0a3cc9f..9f24147 100644 --- a/num/Adder.go +++ b/num/Adder.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "fmt" diff --git a/num/array.go b/num/array.go index 3964baf..063ab15 100644 --- a/num/array.go +++ b/num/array.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "errors" diff --git a/num/array_test.go b/num/array_test.go index f04dd3d..b161585 100644 --- a/num/array_test.go +++ b/num/array_test.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "fmt" diff --git a/num/compare.go b/num/compare.go index 3f5382a..287b863 100644 --- a/num/compare.go +++ b/num/compare.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "errors" diff --git a/num/equal.go b/num/equal.go index 8cfd7b2..64f1313 100644 --- a/num/equal.go +++ b/num/equal.go @@ -1,4 +1,4 @@ -package lambda +package num type Equal interface { Equals(obj interface{}) bool diff --git a/stat/diff.go b/stat/diff.go index d8102dd..24d0c08 100644 --- a/stat/diff.go +++ b/stat/diff.go @@ -30,3 +30,30 @@ func Diff[T Number](s []T, param any) []T { return d } + +func Diff2[T BaseType](s []T, param any) []T { + var d any + switch vs := any(s).(type) { + case []float32: + d = Diff(vs, param) + case []float64: + d = Diff(vs, param) + case []int: + d = Diff(vs, param) + case []int8: + d = Diff(vs, param) + case []int16: + d = Diff(vs, param) + case []int32: + d = Diff(vs, param) + case []int64: + d = Diff(vs, param) + //case []uint, []uint8, []uint16, []uint32, []uint64, []uintptr: + // d = xv + default: + // 其它类型原样返回 + panic(Throw(any(s))) + } + + return d.([]T) +} diff --git a/stat/diff_test.go b/stat/diff_test.go index fc6e219..9650631 100644 --- a/stat/diff_test.go +++ b/stat/diff_test.go @@ -19,3 +19,9 @@ func TestDiff(t *testing.T) { r2 := Diff(d1, s1) fmt.Println("序列化结果:", r2) } + +func TestDiff2(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5} + r1 := Diff2(d1, 1) + fmt.Println(r1) +} diff --git a/stat/errors.go b/stat/errors.go index 0dd7988..92f4252 100644 --- a/stat/errors.go +++ b/stat/errors.go @@ -1,6 +1,9 @@ package stat -import "gitee.com/quant1x/pandas/exception" +import ( + "gitee.com/quant1x/pandas/exception" + "reflect" +) const ( errorTypeBase = 0 @@ -10,3 +13,8 @@ var ( // ErrUnsupportedType 不支持的类型 ErrUnsupportedType = exception.New(errorTypeBase+0, "Unsupported type") ) + +func Throw(tv any) *exception.Exception { + typeName := reflect.TypeOf(tv).String() + return exception.New(errorTypeBase+1, "Unsupported type: "+typeName) +} diff --git a/stat/fillna.go b/stat/fillna.go index f62d150..14e2ef9 100644 --- a/stat/fillna.go +++ b/stat/fillna.go @@ -39,7 +39,7 @@ import "golang.org/x/exp/slices" // Returns // ------- // []T or None -func Fill[T Number | ~string](v []T, d T, args ...any) (rows []T) { +func Fill[T BaseType](v []T, d T, args ...any) (rows []T) { // 默认不替换 var __optInplace = false if len(args) > 0 { @@ -84,7 +84,7 @@ func Fill[T Number | ~string](v []T, d T, args ...any) (rows []T) { } // FillNa NaN填充默认值 -func FillNa[T Number | ~string](v []T, args ...any) []T { +func FillNa[T BaseType](x []T, v any, args ...any) []T { // 默认不copy var __optInplace = false if len(args) > 0 { @@ -95,9 +95,9 @@ func FillNa[T Number | ~string](v []T, args ...any) []T { } var dest []T if __optInplace { - dest = v + dest = x } else { - dest = slices.Clone(v) + dest = slices.Clone(x) } var values any = dest switch rows := values.(type) { diff --git a/stat/max.go b/stat/max.go index 6acbc3c..55c7112 100644 --- a/stat/max.go +++ b/stat/max.go @@ -10,27 +10,7 @@ func Max[T Number](x []T) T { return unaryOperations1[T](x, vek32.Max, vek.Max, __max_go[T]) } -//func Max[T Float](f []T) T { -// if len(f) == 0 { -// return T(0) -// } -// -// var d any -// var s any -// s = f -// switch fs := s.(type) { -// case []float32: -// d = vek32.Max(fs) -// case []float64: -// d = vek.Max(fs) -// default: -// panic(ErrUnsupportedType) -// } -// -// return d.(T) -//} - -func __max_go[T Number](x []T) T { +func __max_go[T Number | ~string](x []T) T { max := x[0] for _, v := range x[1:] { if v > max { @@ -39,3 +19,42 @@ func __max_go[T Number](x []T) T { } return max } + +func Max2[T BaseType](x []T) T { + var d any + switch vs := any(x).(type) { + case []float32: + d = Max(vs) + case []float64: + d = Max(vs) + case []int: + d = Max(vs) + case []int8: + d = Max(vs) + case []int16: + d = Max(vs) + case []int32: + d = Max(vs) + case []int64: + d = Max(vs) + case []uint: + d = Max(vs) + case []uint8: + d = Max(vs) + case []uint16: + d = Max(vs) + case []uint32: + d = Max(vs) + case []uint64: + d = Max(vs) + case []uintptr: + d = Max(vs) + case []string: + d = __max_go(vs) + default: + // 其它类型原样返回 + panic(Throw(any(x))) + } + + return d.(T) +} diff --git a/stat/maximum.go b/stat/maximum.go index 9d066b3..32f6250 100644 --- a/stat/maximum.go +++ b/stat/maximum.go @@ -7,7 +7,8 @@ import ( ) // Maximum AVX2版本, 两个序列横向比较最大值 -// TODO:print(np.maximum(1.4, np.nan)) 输出nan +// +// TODO:print(np.maximum(1.4, np.nan)) 输出nan func Maximum[T Number](f1, f2 []T) []T { xlen := len(f1) ylen := len(f2) diff --git a/stat/mean.go b/stat/mean.go index e5b4491..031dc2c 100644 --- a/stat/mean.go +++ b/stat/mean.go @@ -13,3 +13,42 @@ func Mean[T Number](x []T) T { func __mean_go[T Number](x []T) T { return __sum(x) / T(len(x)) } + +func Mean2[T BaseType](x []T) T { + var d any + switch vs := any(x).(type) { + case []float32: + d = Mean(vs) + case []float64: + d = Mean(vs) + case []int: + d = Mean(vs) + case []int8: + d = Mean(vs) + case []int16: + d = Mean(vs) + case []int32: + d = Mean(vs) + case []int64: + d = Mean(vs) + case []uint: + d = Mean(vs) + case []uint8: + d = Mean(vs) + case []uint16: + d = Mean(vs) + case []uint32: + d = Mean(vs) + case []uint64: + d = Mean(vs) + case []uintptr: + d = Mean(vs) + //case []string: + // d = __max_go(vs) + default: + // 其它类型原样返回 + panic(Throw(any(x))) + } + + return d.(T) +} diff --git a/stat/min.go b/stat/min.go index a91b5f9..230732a 100644 --- a/stat/min.go +++ b/stat/min.go @@ -10,7 +10,7 @@ func Min[T Number](x []T) T { return unaryOperations1[T](x, vek32.Min, vek.Min, __min_go[T]) } -func __min_go[T Number](x []T) T { +func __min_go[T Number | ~string](x []T) T { min := x[0] for _, v := range x[1:] { if v < min { @@ -19,3 +19,42 @@ func __min_go[T Number](x []T) T { } return min } + +func Min2[T BaseType](x []T) T { + var d any + switch vs := any(x).(type) { + case []float32: + d = Min(vs) + case []float64: + d = Min(vs) + case []int: + d = Min(vs) + case []int8: + d = Min(vs) + case []int16: + d = Min(vs) + case []int32: + d = Min(vs) + case []int64: + d = Min(vs) + case []uint: + d = Min(vs) + case []uint8: + d = Min(vs) + case []uint16: + d = Min(vs) + case []uint32: + d = Min(vs) + case []uint64: + d = Min(vs) + case []uintptr: + d = Min(vs) + case []string: + d = __min_go(vs) + default: + // 其它类型原样返回 + panic(Throw(any(x))) + } + + return d.(T) +} diff --git a/stat/shift.go b/stat/shift.go index 72078b7..d8bd265 100644 --- a/stat/shift.go +++ b/stat/shift.go @@ -1,12 +1,13 @@ package stat import ( + "gitee.com/quant1x/pandas/exception" "golang.org/x/exp/slices" "math" ) // Shift series切片, 使用可选的时间频率按所需的周期数移动索引 -func Shift[T GenericType](S []T, periods int) []T { +func Shift[T BaseType](S []T, periods int) []T { d := slices.Clone(S) if periods == 0 { return d @@ -59,3 +60,35 @@ func Shift2[T GenericType](S []T, N []DType) []T { return d } + +// Shift3 series切片, 使用可选的时间频率按所需的周期数移动索引 +// +// param不支持负值 +func Shift3[T BaseType](S []T, param any) []T { + sLen := len(S) + var N []DType + switch v := param.(type) { + case int: + N = Repeat[DType](DType(v), sLen) + case []DType: + N = Align(v, DTypeNaN, sLen) + default: + panic(exception.New(1, "error window")) + } + var d []T + d = slices.Clone(S) + if len(N) == 0 { + return d + } + values := d + for i, _ := range S { + x := N[i] + if DTypeIsNaN(x) || int(x) > i { + values[i] = typeDefault[T]() + continue + } + values[i] = S[i-int(x)] + } + + return d +} diff --git a/stat/stddev.go b/stat/stddev.go index bd9d033..b43a9cf 100644 --- a/stat/stddev.go +++ b/stat/stddev.go @@ -32,9 +32,9 @@ func Std_TODO[T Float](f []T) T { } // Std 计算标准差 -func Std[T Float](f []T) T { +func Std[T BaseType](f []T) T { if len(f) == 0 { - return T(0) + return typeDefault[T]() } var d any var s any diff --git a/stat/type.go b/stat/type.go index e7e3b09..382953a 100644 --- a/stat/type.go +++ b/stat/type.go @@ -47,6 +47,11 @@ type Ordered interface { Integer | Float | ~string } +// NumberOfCPUBitsRelated The number of CPU bits is related +type NumberOfCPUBitsRelated interface { + ~int | ~uint | ~uintptr +} + // /*nil, */ int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64 , bool, string // ~int8 | ~uint8 | ~int16 | ~uint16 | ~int32 | ~uint32 | ~int64 | ~uint64 | ~int | ~uint | ~float32 | ~float64 | ~bool | ~string // uintptr @@ -152,7 +157,7 @@ func valueToNumber[T Number](v any, nil2t T, bool2t func(b bool) T, string2t fun case string: return string2t(val, v) default: - panic(ErrUnsupportedType) + panic(Throw(v)) } return T(0) } diff --git a/stat/type_bool.go b/stat/type_bool.go index 6c65d47..e5730b3 100644 --- a/stat/type_bool.go +++ b/stat/type_bool.go @@ -35,6 +35,13 @@ func isFalse(s string) bool { } } +func boolToInt32(b bool) int32 { + if b { + return True2Int32 + } + return False2Int32 +} + func boolToInt64(b bool) int64 { if b { return True2Int64 diff --git a/stat/type_int32.go b/stat/type_int32.go new file mode 100644 index 0000000..e829669 --- /dev/null +++ b/stat/type_int32.go @@ -0,0 +1,71 @@ +package stat + +import ( + "fmt" + "github.com/mymmsc/gox/logger" + "math" + "strconv" +) + +const ( + MaxInt32 = int32(math.MaxInt32) + MinInt32 = int32(math.MinInt32) + Nil2Int32 = int32(0) // 空指针转int32 + Int32NaN = int32(0) // int32 无效值 + True2Int32 = int32(1) // true转int32 + False2Int32 = int32(0) // false 转int32 + StringBad2Int32 = int32(0) // 字符串解析int32异常 + StringTrue2Int32 = int32(1) // 字符串true转int32 + StringFalse2Int32 = int32(0) // 字符串false转int32 +) + +// ParseInt32 解析int字符串, 尝试解析10进制和16进制 +func ParseInt32(s string, v any) int32 { + defer func() { + // 解析失败以后输出日志, 以备检查 + if err := recover(); err != nil { + logger.Errorf("ParseInt32 %+v, error=%+v\n", v, err) + } + }() + if IsEmpty(s) { + return Nil2Int32 + } + if isTrue(s) { + return StringTrue2Int32 + } else if isFalse(s) { + return StringFalse2Int32 + } + i, err := strconv.ParseInt(s, 10, 32) + if err == nil { + return int32(i) + } + // 解析失败继续解析16进制 + i, err = strconv.ParseInt(s, 16, 32) + if err == nil { + return int32(i) + } + logger.Errorf("%s, error=%+v\n", s, err) + if IgnoreParseExceptions { + i = int64(StringBad2Int32) + } else { + _ = v.(int32) // Intentionally panic + } + return int32(i) +} + +func int32ToString(v int32) string { + if Float64IsNaN(float64(v)) { + return StringNaN + } + return fmt.Sprint(v) +} + +// AnyToInt32 any转换int32 +func AnyToInt32(v any) int32 { + if vv, ok := extraceValueFromPointer(v); ok { + v = vv + } + + f := valueToNumber[int32](v, Nil2Int32, boolToInt32, ParseInt32) + return f +} -- Gitee From ab68dcda9adccc8e7c9bc28cd8070f4f31909a30 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 21:45:42 +0800 Subject: [PATCH 06/14] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=88=86=E7=B1=BB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/array.go | 66 ------------------- stat/array_test.go | 13 ---- generic_convert.go => stat/ndarray_convert.go | 12 ++-- .../ndarray_convert_test.go | 2 +- 4 files changed, 6 insertions(+), 87 deletions(-) delete mode 100644 stat/array.go delete mode 100644 stat/array_test.go rename generic_convert.go => stat/ndarray_convert.go (92%) rename generic_convert_test.go => stat/ndarray_convert_test.go (80%) diff --git a/stat/array.go b/stat/array.go deleted file mode 100644 index dc4139b..0000000 --- a/stat/array.go +++ /dev/null @@ -1,66 +0,0 @@ -package stat - -type NDArray []DType - -type Array[T Number] []T - -func (a Array[T]) Len() int { - return len(a) -} - -//type FloatX interface { -// ~float64 | []float64 | int64 | []int64 | int | []int | int32 | []int32 | [][]float64 | [][]int | [][]int64 | [][]int32 -//} -// -//// IsVector checks if a variable is a slice -//func IsVector[T FloatX](obj T) bool { -// switch reflect.TypeOf(obj).Kind() { -// case reflect.Slice: -// return true -// default: -// return false -// } -//} -// -//// AsSlice converts a variable to a slice -//func AsSlice[T FloatX](obj T) []float64 { -// switch reflect.TypeOf(obj).Kind() { -// case reflect.Slice: -// return any(obj).([]float64) -// default: -// return []float64{any(obj).(float64)} -// } -//} -// -//// asFloat64 converts a variable to a float64 -//func asFloat64[T FloatX](obj T) float64 { -// switch reflect.TypeOf(obj).Kind() { -// case reflect.Slice: -// return any(obj).([]float64)[0] -// case reflect.Int: -// return float64(any(obj).(int)) -// case reflect.Int64: -// return float64(any(obj).(int)) -// case reflect.Int32: -// return float64(any(obj).(int)) -// default: -// return any(obj).(float64) -// } -//} -// -//// Round rounds a slice of numbers to a given decimal -//func Round[T FloatX](element T, decimals int) any { -// if IsVector(element) { -// var roundedArray []float64 -// array := AsSlice(element) -// for i := range array { -// roundedNum := math.Round(array[i]*math.Pow(10, float64(decimals))) / math.Pow(10, float64(decimals)) -// roundedArray = append(roundedArray, roundedNum) -// } -// return roundedArray -// } else { -// number := asFloat64(element) -// return math.Round(number*math.Pow(10, float64(decimals))) / math.Pow(10, float64(decimals)) -// } -// -//} diff --git a/stat/array_test.go b/stat/array_test.go deleted file mode 100644 index be63f30..0000000 --- a/stat/array_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package stat - -import ( - "fmt" - "testing" -) - -func TestArray_Len(t *testing.T) { - f1 := []float64{1, 2, 3, 4, 5} - a1 := Array[float64](f1) - fmt.Println(a1) - fmt.Println(a1.Len()) -} diff --git a/generic_convert.go b/stat/ndarray_convert.go similarity index 92% rename from generic_convert.go rename to stat/ndarray_convert.go index d51e6fb..8caecbd 100644 --- a/generic_convert.go +++ b/stat/ndarray_convert.go @@ -1,4 +1,4 @@ -package pandas +package stat import ( "github.com/viterin/vek" @@ -7,17 +7,15 @@ import ( ) // 这里做数组统一转换 -func convert[T GenericType](s Series, v T) { - +func convert[T GenericType](s Array, v T) { values := s.Values() rawType := checkoutRawType(values) values, ok := values.([]T) _ = rawType _ = ok - } -func ToFloat32(s Series) []float32 { +func ToFloat32(s Array) []float32 { length := s.Len() defaultSlice := vek32.Repeat(Nil2Float32, length) values := s.Values() @@ -39,7 +37,7 @@ func ToFloat32(s Series) []float32 { } } -func ToFloat64(s Series) []float64 { +func ToFloat64(s Array) []float64 { length := s.Len() defaultSlice := vek.Repeat(Nil2Float64, length) values := s.Values() @@ -61,7 +59,7 @@ func ToFloat64(s Series) []float64 { } } -func ToBool(s Series) []bool { +func ToBool(s Array) []bool { length := s.Len() defaultSlice := make([]bool, length) values := s.Values() diff --git a/generic_convert_test.go b/stat/ndarray_convert_test.go similarity index 80% rename from generic_convert_test.go rename to stat/ndarray_convert_test.go index 303bd73..bdcccb7 100644 --- a/generic_convert_test.go +++ b/stat/ndarray_convert_test.go @@ -1,4 +1,4 @@ -package pandas +package stat import ( "testing" -- Gitee From 97c75fee874b8c8d18fa8b5858fcfe1329e734f1 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 21:46:16 +0800 Subject: [PATCH 07/14] =?UTF-8?q?NDArray=E5=A2=9E=E5=8A=A0=E6=8E=92?= =?UTF-8?q?=E5=BA=8F=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/ndarray_sort.go | 67 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 stat/ndarray_sort.go diff --git a/stat/ndarray_sort.go b/stat/ndarray_sort.go new file mode 100644 index 0000000..cd8ad78 --- /dev/null +++ b/stat/ndarray_sort.go @@ -0,0 +1,67 @@ +package stat + +func (arr NDArray[T]) Len() int { + return len(arr) +} + +// Less 实现sort.Interface接口的比较元素方法 +func (arr NDArray[T]) Less(i, j int) bool { + type_ := arr.Type() + if type_ == SERIES_TYPE_BOOL { + values := arr.Values().([]bool) + var ( + a = int(0) + b = int(0) + ) + if values[i] { + a = 1 + } + if values[j] { + b = 1 + } + return a < b + } else if type_ == SERIES_TYPE_INT64 { + values := arr.Values().([]int64) + return values[i] < values[j] + } else if type_ == SERIES_TYPE_FLOAT32 { + values := arr.Values().([]float32) + return values[i] < values[j] + } else if type_ == SERIES_TYPE_FLOAT64 { + values := arr.Values().([]float64) + return values[i] < values[j] + } else if type_ == SERIES_TYPE_STRING { + values := arr.Values().([]string) + return values[i] < values[j] + } else { + // SERIES_TYPE_INVAILD + // 应该到不了这里, Len()会返回0 + panic(ErrUnsupportedType) + } + return false + +} + +// Swap 实现sort.Interface接口的交换元素方法 +func (arr NDArray[T]) Swap(i, j int) { + type_ := arr.Type() + if type_ == SERIES_TYPE_BOOL { + values := arr.Values().([]bool) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_INT64 { + values := arr.Values().([]int64) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_FLOAT32 { + values := arr.Values().([]float32) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_FLOAT64 { + values := arr.Values().([]float64) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_STRING { + values := arr.Values().([]string) + values[i], values[j] = values[j], values[i] + } else { + // SERIES_TYPE_INVAILD + // 应该到不了这里, Len()会返回0 + panic(ErrUnsupportedType) + } +} -- Gitee From 552f50b6a9ec479929dd537f0374e96e214db94e Mon Sep 17 00:00:00 2001 From: wangfeng Date: Mon, 13 Feb 2023 21:46:57 +0800 Subject: [PATCH 08/14] =?UTF-8?q?=E5=88=9D=E6=AD=A5=E5=AE=8C=E6=88=90NDArr?= =?UTF-8?q?ay=E6=B3=9B=E5=9E=8B=E6=95=B0=E7=BB=84=E7=9A=84=E5=9F=BA?= =?UTF-8?q?=E7=A1=80=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/ndarray.go | 269 +++++++++++++++++++++++++++++++++++++++++++ stat/ndarray_test.go | 54 +++++++++ 2 files changed, 323 insertions(+) create mode 100644 stat/ndarray.go create mode 100644 stat/ndarray_test.go diff --git a/stat/ndarray.go b/stat/ndarray.go new file mode 100644 index 0000000..13e8562 --- /dev/null +++ b/stat/ndarray.go @@ -0,0 +1,269 @@ +package stat + +import ( + gc "github.com/huandu/go-clone" + "reflect" +) + +type Array interface { + // Type returns the type of data the series holds. + // 返回series的数据类型 + Type() Type + // Values 获得全部数据集 + Values() any + + // NaN 输出默认的NaN + NaN() any + // Floats 强制转成[]float32 + Floats() []float32 + // DTypes 强制转[]stat.DType + DTypes() []DType + // Ints 强制转换成整型 + Ints() []Int + + // sort.Interface + + // Len 获得行数, 实现sort.Interface接口的获取元素数量方法 + Len() int + // Less 实现sort.Interface接口的比较元素方法 + Less(i, j int) bool + // Swap 实现sort.Interface接口的交换元素方法 + Swap(i, j int) + + // Empty returns an empty Series of the same type + Empty() Array + // Copy 复制 + Copy() Array + // Records returns the elements of a Series as a []string + Records() []string + // Subset 获取子集 + Subset(start, end int, opt ...any) Array + // Repeat elements of an array. + Repeat(x any, repeats int) Array + // Shift index by desired number of periods with an optional time freq. + // 使用可选的时间频率按所需的周期数移动索引. + Shift(periods int) Array + // Rolling 序列化版本 + //Rolling(param any) RollingAndExpandingMixin + + // Mean calculates the average value of a series + Mean() DType + // StdDev calculates the standard deviation of a series + StdDev() DType + // FillNa Fill NA/NaN values using the specified method. + FillNa(v any, inplace bool) Array + // Max 找出最大值 + Max() any + // Min 找出最小值 + Min() any + // Select 选取一段记录 + Select(r ScopeLimit) Array + // Append 增加一批记录 + Append(values ...any) + // Apply 接受一个回调函数 + Apply(f func(idx int, v any)) + // Logic 逻辑处理 + Logic(f func(idx int, v any) bool) []bool + // Diff 元素的第一个离散差 + Diff(param any) Array + // Ref 引用其它周期的数据 + Ref(param any) Array + // Std 计算标准差 + Std() DType + // Sum 计算累和 + Sum() DType + // EWM Provide exponentially weighted (EW) calculations. + // + // Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + // provided if ``times`` is not provided. If ``times`` is provided, + // ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. + //EWM(alpha EW) ExponentialMovingWindow + +} + +type NDArray[T BaseType] []T + +func (self NDArray[T]) Type() Type { + return checkoutRawType(self) +} + +func (self NDArray[T]) Values() any { + return []T(self) +} + +func (self NDArray[T]) NaN() any { + switch any(self).(type) { + case []bool: + return BoolNaN + case []string: + return StringNaN + case []int64: + return Nil2Int64 + case []float32: + return Nil2Float32 + case []float64: + return Nil2Float64 + default: + panic(ErrUnsupportedType) + } +} + +func (self NDArray[T]) Floats() []float32 { + return SliceToFloat32([]T(self)) +} + +func (self NDArray[T]) DTypes() []DType { + return SliceToFloat64([]T(self)) +} + +func (self NDArray[T]) Ints() []Int { + d := make([]Int, self.Len()) + for i, v := range self { + d[i] = AnyToInt32(v) + } + return d +} + +func (self NDArray[T]) Empty() Array { + var empty []T + return NDArray[T](empty) +} + +func (self NDArray[T]) Copy() Array { + vlen := self.Len() + return self.Subset(0, vlen, true) +} + +func (self NDArray[T]) Records() []string { + ret := make([]string, self.Len()) + self.Apply(func(idx int, v any) { + ret[idx] = AnyToString(v) + }) + return ret + +} + +func (self NDArray[T]) Subset(start, end int, opt ...any) Array { + // 默认不copy + var __optCopy bool = false + if len(opt) > 0 { + // 第一个参数为是否copy + if _cp, ok := opt[0].(bool); ok { + __optCopy = _cp + } + } + var vs any + var rows int + vv := reflect.ValueOf(self.Values()) + vk := vv.Kind() + switch vk { + case reflect.Slice, reflect.Array: // 切片和数组同样的处理逻辑 + vvs := vv.Slice(start, end) + vs = vvs.Interface() + rows = vv.Len() + if __optCopy && rows > 0 { + vs = gc.Clone(vs) + } + rows = vvs.Len() + var d Array + d = NDArray[T](vs.([]T)) + return d + default: + // 其它类型忽略 + } + return self.Empty() +} + +func (self NDArray[T]) Repeat(x any, repeats int) Array { + var d any + switch values := self.Values().(type) { + case []bool: + _ = values + d = Repeat(AnyToBool(x), repeats) + case []string: + d = Repeat(AnyToString(x), repeats) + case []int64: + d = Repeat(AnyToInt64(x), repeats) + case []float32: + d = Repeat(AnyToFloat32(x), repeats) + default: //case []float64: + d = Repeat(AnyToFloat64(x), repeats) + } + return NDArray[T](d.([]T)) +} + +func (self NDArray[T]) Shift(periods int) Array { + values := self.Values().([]T) + d := Shift(values, periods) + return NDArray[T](d) +} + +func (self NDArray[T]) Mean() DType { + d := Mean2(self) + return Any2DType(d) +} + +func (self NDArray[T]) StdDev() DType { + return self.Std() +} + +func (self NDArray[T]) FillNa(v any, inplace bool) Array { + d := FillNa(self, v, inplace) + return NDArray[T](d) +} + +func (self NDArray[T]) Max() any { + d := Min2(self) + return d +} + +func (self NDArray[T]) Min() any { + d := Min2(self) + return d +} + +func (self NDArray[T]) Select(r ScopeLimit) Array { + //TODO implement me + panic("implement me") +} + +func (self NDArray[T]) Append(values ...any) { + //TODO implement me + panic("implement me") +} + +func (self NDArray[T]) Apply(f func(idx int, v any)) { + for i, v := range self { + f(i, v) + } +} + +func (self NDArray[T]) Logic(f func(idx int, v any) bool) []bool { + d := make([]bool, self.Len()) + for i, v := range self { + d[i] = f(i, v) + } + return d +} + +func (self NDArray[T]) Diff(param any) Array { + d := Diff2(self, param) + return NDArray[T](d) +} + +func (self NDArray[T]) Ref(param any) Array { + values := self.Values().([]T) + d := Shift3(values, param) + return NDArray[T](d) +} + +func (self NDArray[T]) Std() DType { + d := Std(self) + return Any2DType(d) +} + +func (self NDArray[T]) Sum() DType { + values := Slice2DType(self) + d := Sum(values) + return Any2DType(d) +} diff --git a/stat/ndarray_test.go b/stat/ndarray_test.go new file mode 100644 index 0000000..d9e8821 --- /dev/null +++ b/stat/ndarray_test.go @@ -0,0 +1,54 @@ +package stat + +import ( + "fmt" + "reflect" + "testing" + "unsafe" +) + +func TestNDArray_Len(t *testing.T) { + f1 := []float64{1, 2, 3, 4, 5} + a1 := NDArray[float64](f1) + fmt.Println(a1) + fmt.Println(a1.Len()) +} + +type X int + +func TestNDArrayAll(t *testing.T) { + var x1 X = 5 + var x2 int + x2 = int(x1) + fmt.Println(x2) + d := []float32{1, 2, 3, 4, 5} + sh1 := (*reflect.SliceHeader)(unsafe.Pointer(&d)) + fmt.Printf("s : %#v\n", sh1) + var s Array + s = NDArray[float32](d) + //s3 := []float32(s) + //fmt.Println(s3) + sh2 := (*reflect.SliceHeader)(unsafe.Pointer(&s)) + fmt.Printf("s : %#v\n", sh2.Data) + fmt.Println(s.Len()) + s4 := s.Values() + fmt.Println(s.Type()) + fmt.Println(s.Floats()) + + f32 := ToFloat32(s) + fmt.Println(f32) + + a1 := s.Diff(1) + fmt.Println(a1) + a2 := s.Ref(1) + fmt.Println(a2) + a2 = a2.FillNa(9, true) + fmt.Println(a2) + + a3 := s.Mean() + fmt.Println(a3) + + a4 := s.Shift(-1) + fmt.Println(a4) + _ = s4 +} -- Gitee From 40a074adc22d40a8e5e21b3d6b4704242204e066 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Tue, 14 Feb 2023 05:22:34 +0800 Subject: [PATCH 09/14] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=BA=9F=E5=BC=83?= =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/unsafe.go | 8 -------- stat/unsafe_test.go | 12 ------------ 2 files changed, 20 deletions(-) delete mode 100644 stat/unsafe.go delete mode 100644 stat/unsafe_test.go diff --git a/stat/unsafe.go b/stat/unsafe.go deleted file mode 100644 index 45766af..0000000 --- a/stat/unsafe.go +++ /dev/null @@ -1,8 +0,0 @@ -package stat - -func math_abs[T StatType](v T) T { - if v < 0 { - return v * -1 - } - return v -} diff --git a/stat/unsafe_test.go b/stat/unsafe_test.go deleted file mode 100644 index e53ad50..0000000 --- a/stat/unsafe_test.go +++ /dev/null @@ -1,12 +0,0 @@ -package stat - -import ( - "fmt" - "testing" -) - -func Test_frombits(t *testing.T) { - i32_1 := float64(-1) - //fmt.Println(math.Abs(i32_1)) - fmt.Println(math_abs(i32_1)) -} -- Gitee From f4a15d4b04861d53811e89da5b518edcd89696c6 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Tue, 14 Feb 2023 07:29:01 +0800 Subject: [PATCH 10/14] =?UTF-8?q?=E6=89=A9=E5=85=85=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/abs.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stat/abs.go b/stat/abs.go index 70c84a8..9254341 100644 --- a/stat/abs.go +++ b/stat/abs.go @@ -6,7 +6,7 @@ import ( ) // Abs 泛型绝对值 -func Abs[T Number](x []T) []T { +func Abs[T BaseType](x []T) []T { var d any var v any = x switch xv := v.(type) { @@ -24,8 +24,8 @@ func Abs[T Number](x []T) []T { d = __abs_go(xv) case []int64: d = __abs_go(xv) - //case []uint, []uint8, []uint16, []uint32, []uint64, []uintptr: - // d = xv + case []uint, []uint8, []uint16, []uint32, []uint64, []uintptr: + d = xv default: // 其它类型原样返回 d = xv -- Gitee From 2d130e52ab98cab5ab2ed01ded2fb9fc80c4d744 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Tue, 14 Feb 2023 07:40:22 +0800 Subject: [PATCH 11/14] =?UTF-8?q?=E5=A2=9E=E5=8A=A0bool2int=E5=87=BD?= =?UTF-8?q?=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/type_bool.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/stat/type_bool.go b/stat/type_bool.go index e5730b3..7aefcc6 100644 --- a/stat/type_bool.go +++ b/stat/type_bool.go @@ -35,6 +35,13 @@ func isFalse(s string) bool { } } +func bool2Int(b bool) int8 { + if b { + return int8(1) + } + return int8(0) +} + func boolToInt32(b bool) int32 { if b { return True2Int32 -- Gitee From 471bfd9c41022157cf421cc87a4e7d62129697de Mon Sep 17 00:00:00 2001 From: wangfeng Date: Tue, 14 Feb 2023 08:53:21 +0800 Subject: [PATCH 12/14] =?UTF-8?q?=E6=A0=87=E6=B3=A8=E5=8D=B3=E5=B0=86?= =?UTF-8?q?=E5=BA=9F=E5=BC=83=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic_type.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic_type.go b/generic_type.go index 27bc2d7..aa5128a 100644 --- a/generic_type.go +++ b/generic_type.go @@ -159,7 +159,7 @@ func parseType(s string) (Type, error) { return SERIES_TYPE_INVAILD, fmt.Errorf("type (%s) is not supported", s) } -func detectTypes[T GenericType](v T) (Type, any) { +func detectTypes[T stat.GenericType](v T) (Type, any) { var _type = SERIES_TYPE_STRING vv := reflect.ValueOf(v) vk := vv.Kind() -- Gitee From 5b2e0af4eeabdc83b9b0ccbbc23bd03d6f32b2c6 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Tue, 14 Feb 2023 08:53:59 +0800 Subject: [PATCH 13/14] =?UTF-8?q?=E6=A0=87=E6=B3=A8=E5=8D=B3=E5=B0=86?= =?UTF-8?q?=E5=BA=9F=E5=BC=83=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic.go | 29 ++++++++++++----------------- generic_fillna.go | 2 +- generic_shift.go | 4 ++-- series.go | 7 +------ 4 files changed, 16 insertions(+), 26 deletions(-) diff --git a/generic.go b/generic.go index 8d23552..90c5605 100644 --- a/generic.go +++ b/generic.go @@ -7,21 +7,16 @@ import ( "sync" ) -// GenericType Series支持的所有类型 -type GenericType interface { - ~bool | ~int64 | ~float32 | ~float64 | ~string -} - // NDFrame 这里本意是想做一个父类, 实际的效果是一个抽象类 type NDFrame struct { - lock sync.RWMutex // 读写锁 - formatter StringFormatter // 字符串格式化工具 - name string // 帧名称 - type_ Type // values元素类型 - copy_ bool // 是否副本 - nilCount int // nil和nan的元素有多少, 这种统计在bool和int64类型中不会大于0, 只对float64及string有效 - rows int // 行数 - values any // 只能是一个一维slice, 在所有的运算中, values强制转换成float64切片 + lock sync.RWMutex // 读写锁 + formatter stat.StringFormatter // 字符串格式化工具 + name string // 帧名称 + type_ Type // values元素类型 + copy_ bool // 是否副本 + nilCount int // nil和nan的元素有多少, 这种统计在bool和int64类型中不会大于0, 只对float64及string有效 + rows int // 行数 + values any // 只能是一个一维slice, 在所有的运算中, values强制转换成float64切片 } @@ -36,7 +31,7 @@ type NDFrame struct { //copy : bool, default False //""" -func NewNDFrame[E GenericType](name string, rows ...E) *NDFrame { +func NewNDFrame[E stat.GenericType](name string, rows ...E) *NDFrame { frame := NDFrame{ formatter: stat.DefaultFormatter, name: name, @@ -57,7 +52,7 @@ func NewNDFrame[E GenericType](name string, rows ...E) *NDFrame { } // 赋值 -func assign[T GenericType](frame *NDFrame, idx, size int, v T) { +func assign[T stat.GenericType](frame *NDFrame, idx, size int, v T) { // 检测类型 if frame.type_ == SERIES_TYPE_INVAILD { _type, _ := detectTypes(v) @@ -101,7 +96,7 @@ func assign[T GenericType](frame *NDFrame, idx, size int, v T) { } // Repeat 重复生成a -func Repeat[T GenericType](a T, n int) []T { +func Repeat[T stat.GenericType](a T, n int) []T { dst := make([]T, n) for i := 0; i < n; i++ { dst[i] = a @@ -110,7 +105,7 @@ func Repeat[T GenericType](a T, n int) []T { } // Repeat2 重复生成a -func Repeat2[T GenericType](dst []T, a T, n int) []T { +func Repeat2[T stat.GenericType](dst []T, a T, n int) []T { for i := 0; i < n; i++ { dst[i] = a } diff --git a/generic_fillna.go b/generic_fillna.go index ce57ce3..e2a89be 100644 --- a/generic_fillna.go +++ b/generic_fillna.go @@ -5,7 +5,7 @@ import "gitee.com/quant1x/pandas/stat" // FillNa 填充NaN的元素为v // inplace为真是修改series元素的值 // 如果v和Values()返回值的slice类型不一致就会panic -func FillNa[T GenericType](s *NDFrame, v T, inplace bool) *NDFrame { +func FillNa[T stat.GenericType](s *NDFrame, v T, inplace bool) *NDFrame { values := s.Values() switch rows := values.(type) { case []string: diff --git a/generic_shift.go b/generic_shift.go index d137fcd..5ed85b3 100644 --- a/generic_shift.go +++ b/generic_shift.go @@ -6,7 +6,7 @@ import ( ) // Shift series切片, 使用可选的时间频率按所需的周期数移动索引 -func Shift[T GenericType](s *Series, periods int, cbNan func() T) Series { +func Shift[T stat.GenericType](s *Series, periods int, cbNan func() T) Series { var d Series d = clone(*s).(Series) if periods == 0 { @@ -43,7 +43,7 @@ func Shift[T GenericType](s *Series, periods int, cbNan func() T) Series { } // Shift2 series切片, 使用可选的时间频率按所需的周期数移动索引 -func Shift2[T GenericType](s *Series, N []float32, cbNan func() T) Series { +func Shift2[T stat.GenericType](s *Series, N []float32, cbNan func() T) Series { var d Series d = clone(*s).(Series) if len(N) == 0 { diff --git a/series.go b/series.go index d37599f..a5b30ab 100644 --- a/series.go +++ b/series.go @@ -21,11 +21,6 @@ const ( SERIES_TYPE_STRING = reflect.String // string ) -// StringFormatter is used to convert a value -// into a string. Val can be nil or the concrete -// type stored by the series. -type StringFormatter func(val interface{}) string - type Series interface { // Name 取得series名称 Name() string @@ -141,7 +136,7 @@ func NewSeries(t Type, name string, vals any) Series { //} // GenericSeries 泛型方法, 构造序列, 比其它方式对类型的统一性要求更严格 -func GenericSeries[T GenericType](name string, values ...T) Series { +func GenericSeries[T stat.GenericType](name string, values ...T) Series { // 第一遍, 确定类型, 找到第一个非nil的值 var _type Type = SERIES_TYPE_STRING for _, v := range values { -- Gitee From 17670f3819bcc1fe255b0aebc7a1779892295a0f Mon Sep 17 00:00:00 2001 From: wangfeng Date: Tue, 14 Feb 2023 08:58:13 +0800 Subject: [PATCH 14/14] =?UTF-8?q?#I6EVCO=20=E5=AE=9E=E7=8E=B0=E4=BA=86NDAr?= =?UTF-8?q?ray?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- stat/frame.go | 159 +++++++++----------- stat/frame_test.go | 24 --- stat/ndarray.go | 115 +++----------- stat/{frame_assign.go => ndarray_append.go} | 94 ++++++++---- stat/ndarray_convert.go | 8 +- stat/ndarray_test.go | 4 +- stat/{frame_xstring.go => strings.go} | 0 stat/type.go | 107 +++++++++++++ stat/type_test.go | 13 ++ 9 files changed, 284 insertions(+), 240 deletions(-) delete mode 100644 stat/frame_test.go rename stat/{frame_assign.go => ndarray_append.go} (31%) rename stat/{frame_xstring.go => strings.go} (100%) diff --git a/stat/frame.go b/stat/frame.go index 8aa6399..77b8130 100644 --- a/stat/frame.go +++ b/stat/frame.go @@ -1,107 +1,82 @@ package stat -import ( - "reflect" - "sync" -) - -type Frame[T GenericType] interface { +type Frame interface { // Name 取得series名称 Name() string - // ReName renames the series. - ReName(name string) + // Rename renames the series. + Rename(name string) + // Type returns the type of data the series holds. // 返回series的数据类型 Type() Type - // Len 获得行数 - Len() int // Values 获得全部数据集 - Values() []T // 如果确定类型, 后面可能无法自动调整 -} - -type GenericFrame[T GenericType] struct { - lock sync.RWMutex // 读写锁 - formatter StringFormatter // 字符串格式化工具 - name string // 帧名称 - type_ Type // values元素类型 - nilCount int // nil和nan的元素有多少, 这种统计在bool和int64类型中不会大于0, 只对float64及string有效 - rows int // 行数 - values []T // 只能是一个一维slice, 在所有的运算中, values强制转换成float64切片 -} + Values() any -func NewFrame[T GenericType](name string, values ...any) Frame[T] { - frame := GenericFrame[T]{ - formatter: DefaultFormatter, - name: name, - type_: reflect.Invalid, - nilCount: 0, - rows: 0, - values: nil, - } - // 确定泛型的具体类型, 以便后面创建slice - kind := checkoutRawType(&frame) - if kind == reflect.Invalid { - return &frame - } - frame.type_ = kind - if frame.type_ == SERIES_TYPE_BOOL { - // bool - frame.values = reflect.MakeSlice(typeBool, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_INT64 { - // int64 - frame.values = reflect.MakeSlice(typeInt64, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_FLOAT32 { - // float32 - frame.values = reflect.MakeSlice(typeFloat32, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_FLOAT64 { - // float64 - frame.values = reflect.MakeSlice(typeFloat64, 0, 0).Interface().([]T) - } else { - // string, 字符串最后容错使用 - frame.values = reflect.MakeSlice(typeString, 0, 0).Interface().([]T) - } - size := 0 - for idx, v := range values { - vv := reflect.ValueOf(v) - vk := vv.Kind() - switch vk { - case reflect.Invalid: // {interface} nil - frame.assign(idx, size, nil) - case reflect.Slice, reflect.Array: // 切片或者数组 - for i := 0; i < vv.Len(); i++ { - tv := vv.Index(i).Interface() - frame.assign(idx, size, tv) - } - default: - // 默认为基础数据类型 - tv := vv.Interface() - frame.assign(idx, size, tv) - } - } - return &frame -} + // NaN 输出默认的NaN + NaN() any + // Floats 强制转成[]float32 + Floats() []float32 + // DTypes 强制转[]stat.DType + DTypes() []DType + // Ints 强制转换成整型 + Ints() []Int -func (self *GenericFrame[T]) Name() string { - //TODO implement me - panic("implement me") -} + // sort.Interface -func (self *GenericFrame[T]) ReName(name string) { - //TODO implement me - panic("implement me") -} + // Len 获得行数, 实现sort.Interface接口的获取元素数量方法 + Len() int + // Less 实现sort.Interface接口的比较元素方法 + Less(i, j int) bool + // Swap 实现sort.Interface接口的交换元素方法 + Swap(i, j int) -func (self *GenericFrame[T]) Type() Type { - //TODO implement me - panic("implement me") -} + // Empty returns an empty Series of the same type + Empty() Frame + // Copy 复制 + Copy() Frame + // Records returns the elements of a Series as a []string + Records() []string + // Subset 获取子集 + Subset(start, end int, opt ...any) Frame + // Repeat elements of an array. + Repeat(x any, repeats int) Frame + // Shift index by desired number of periods with an optional time freq. + // 使用可选的时间频率按所需的周期数移动索引. + Shift(periods int) Frame + // Rolling 序列化版本 + //Rolling(param any) RollingAndExpandingMixin -func (self *GenericFrame[T]) Len() int { - //TODO implement me - panic("implement me") -} + // Mean calculates the average value of a series + Mean() DType + // StdDev calculates the standard deviation of a series + StdDev() DType + // FillNa Fill NA/NaN values using the specified method. + FillNa(v any, inplace bool) Frame + // Max 找出最大值 + Max() any + // Min 找出最小值 + Min() any + // Select 选取一段记录 + Select(r ScopeLimit) Frame + // Append 增加一批记录 + Append(values ...any) Frame + // Apply 接受一个回调函数 + Apply(f func(idx int, v any)) + // Logic 逻辑处理 + Logic(f func(idx int, v any) bool) []bool + // Diff 元素的第一个离散差 + Diff(param any) Frame + // Ref 引用其它周期的数据 + Ref(param any) Frame + // Std 计算标准差 + Std() DType + // Sum 计算累和 + Sum() DType + // EWM Provide exponentially weighted (EW) calculations. + // + // Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + // provided if ``times`` is not provided. If ``times`` is provided, + // ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. + //EWM(alpha EW) ExponentialMovingWindow -func (self *GenericFrame[T]) Values() []T { - //TODO implement me - panic("implement me") } diff --git a/stat/frame_test.go b/stat/frame_test.go deleted file mode 100644 index 260f7d9..0000000 --- a/stat/frame_test.go +++ /dev/null @@ -1,24 +0,0 @@ -package stat - -import ( - "fmt" - "testing" -) - -func TestNewFrameT1(t *testing.T) { - f1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, NaN(), 12} - f2 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} - s1 := []string{"a", "b", "c"} - i1 := []int64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20} - gf1 := NewFrame[float64]("x", f1) - fmt.Printf("%+v\n", gf1) - - t0 := []any{nil, 1, true, "abc", 3.45, NaN()} - gf2 := NewFrame[float64]("x", t0...) - fmt.Printf("%+v\n", gf2) - _ = f1 - _ = f2 - _ = s1 - _ = i1 - _ = gf1 -} diff --git a/stat/ndarray.go b/stat/ndarray.go index 13e8562..ef0a6c3 100644 --- a/stat/ndarray.go +++ b/stat/ndarray.go @@ -5,83 +5,17 @@ import ( "reflect" ) -type Array interface { - // Type returns the type of data the series holds. - // 返回series的数据类型 - Type() Type - // Values 获得全部数据集 - Values() any - - // NaN 输出默认的NaN - NaN() any - // Floats 强制转成[]float32 - Floats() []float32 - // DTypes 强制转[]stat.DType - DTypes() []DType - // Ints 强制转换成整型 - Ints() []Int - - // sort.Interface - - // Len 获得行数, 实现sort.Interface接口的获取元素数量方法 - Len() int - // Less 实现sort.Interface接口的比较元素方法 - Less(i, j int) bool - // Swap 实现sort.Interface接口的交换元素方法 - Swap(i, j int) - - // Empty returns an empty Series of the same type - Empty() Array - // Copy 复制 - Copy() Array - // Records returns the elements of a Series as a []string - Records() []string - // Subset 获取子集 - Subset(start, end int, opt ...any) Array - // Repeat elements of an array. - Repeat(x any, repeats int) Array - // Shift index by desired number of periods with an optional time freq. - // 使用可选的时间频率按所需的周期数移动索引. - Shift(periods int) Array - // Rolling 序列化版本 - //Rolling(param any) RollingAndExpandingMixin - - // Mean calculates the average value of a series - Mean() DType - // StdDev calculates the standard deviation of a series - StdDev() DType - // FillNa Fill NA/NaN values using the specified method. - FillNa(v any, inplace bool) Array - // Max 找出最大值 - Max() any - // Min 找出最小值 - Min() any - // Select 选取一段记录 - Select(r ScopeLimit) Array - // Append 增加一批记录 - Append(values ...any) - // Apply 接受一个回调函数 - Apply(f func(idx int, v any)) - // Logic 逻辑处理 - Logic(f func(idx int, v any) bool) []bool - // Diff 元素的第一个离散差 - Diff(param any) Array - // Ref 引用其它周期的数据 - Ref(param any) Array - // Std 计算标准差 - Std() DType - // Sum 计算累和 - Sum() DType - // EWM Provide exponentially weighted (EW) calculations. - // - // Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be - // provided if ``times`` is not provided. If ``times`` is provided, - // ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. - //EWM(alpha EW) ExponentialMovingWindow +type NDArray[T BaseType] []T +func (self NDArray[T]) Name() string { + //TODO implement me + panic("implement me") } -type NDArray[T BaseType] []T +func (self NDArray[T]) Rename(name string) { + //TODO implement me + panic("implement me") +} func (self NDArray[T]) Type() Type { return checkoutRawType(self) @@ -124,12 +58,12 @@ func (self NDArray[T]) Ints() []Int { return d } -func (self NDArray[T]) Empty() Array { +func (self NDArray[T]) Empty() Frame { var empty []T return NDArray[T](empty) } -func (self NDArray[T]) Copy() Array { +func (self NDArray[T]) Copy() Frame { vlen := self.Len() return self.Subset(0, vlen, true) } @@ -143,7 +77,7 @@ func (self NDArray[T]) Records() []string { } -func (self NDArray[T]) Subset(start, end int, opt ...any) Array { +func (self NDArray[T]) Subset(start, end int, opt ...any) Frame { // 默认不copy var __optCopy bool = false if len(opt) > 0 { @@ -165,7 +99,7 @@ func (self NDArray[T]) Subset(start, end int, opt ...any) Array { vs = gc.Clone(vs) } rows = vvs.Len() - var d Array + var d Frame d = NDArray[T](vs.([]T)) return d default: @@ -174,7 +108,7 @@ func (self NDArray[T]) Subset(start, end int, opt ...any) Array { return self.Empty() } -func (self NDArray[T]) Repeat(x any, repeats int) Array { +func (self NDArray[T]) Repeat(x any, repeats int) Frame { var d any switch values := self.Values().(type) { case []bool: @@ -192,7 +126,7 @@ func (self NDArray[T]) Repeat(x any, repeats int) Array { return NDArray[T](d.([]T)) } -func (self NDArray[T]) Shift(periods int) Array { +func (self NDArray[T]) Shift(periods int) Frame { values := self.Values().([]T) d := Shift(values, periods) return NDArray[T](d) @@ -207,7 +141,7 @@ func (self NDArray[T]) StdDev() DType { return self.Std() } -func (self NDArray[T]) FillNa(v any, inplace bool) Array { +func (self NDArray[T]) FillNa(v any, inplace bool) Frame { d := FillNa(self, v, inplace) return NDArray[T](d) } @@ -222,14 +156,13 @@ func (self NDArray[T]) Min() any { return d } -func (self NDArray[T]) Select(r ScopeLimit) Array { - //TODO implement me - panic("implement me") -} - -func (self NDArray[T]) Append(values ...any) { - //TODO implement me - panic("implement me") +func (self NDArray[T]) Select(r ScopeLimit) Frame { + start, end, err := r.Limits(self.Len()) + if err != nil { + return nil + } + series := self.Subset(start, end+1) + return series } func (self NDArray[T]) Apply(f func(idx int, v any)) { @@ -246,12 +179,12 @@ func (self NDArray[T]) Logic(f func(idx int, v any) bool) []bool { return d } -func (self NDArray[T]) Diff(param any) Array { +func (self NDArray[T]) Diff(param any) Frame { d := Diff2(self, param) return NDArray[T](d) } -func (self NDArray[T]) Ref(param any) Array { +func (self NDArray[T]) Ref(param any) Frame { values := self.Values().([]T) d := Shift3(values, param) return NDArray[T](d) diff --git a/stat/frame_assign.go b/stat/ndarray_append.go similarity index 31% rename from stat/frame_assign.go rename to stat/ndarray_append.go index fa83556..8ae823a 100644 --- a/stat/frame_assign.go +++ b/stat/ndarray_append.go @@ -1,35 +1,18 @@ package stat -import ( - "reflect" -) +import "reflect" // 赋值 -func (self *GenericFrame[T]) assign(idx, size int, val any) { - var v any - if self.type_ == SERIES_TYPE_BOOL { - v = AnyToBool(val) - } else if self.type_ == SERIES_TYPE_INT64 { - v = AnyToInt64(val) - } else if self.type_ == SERIES_TYPE_FLOAT64 { - v = AnyToFloat64(val) - } else { - v = AnyToString(val) - } - //// 检测类型 - //if self.type_ == SERIES_TYPE_INVAILD { - // _type, _ := detectTypes(v) - // if _type != SERIES_TYPE_INVAILD { - // self.type_ = _type - // } - //} +func assign[T BaseType](type_ Type, array Frame, idx, size int, v T) Frame { _vv := reflect.ValueOf(v) _vi := _vv.Interface() // float和string类型有可能是NaN, 对nil和NaN进行计数 - if self.type_ == SERIES_TYPE_FLOAT64 && Float64IsNaN(_vi.(float64)) { - self.nilCount++ - } else if self.type_ == SERIES_TYPE_STRING && StringIsNaN(_vi.(string)) { - self.nilCount++ + if type_ == SERIES_TYPE_FLOAT32 && Float32IsNaN(_vi.(float32)) { + //array.nilCount++ + } else if type_ == SERIES_TYPE_FLOAT64 && Float64IsNaN(_vi.(float64)) { + //array.nilCount++ + } else if type_ == SERIES_TYPE_STRING && StringIsNaN(_vi.(string)) { + //array.nilCount++ // 以下修正string的NaN值, 统一为"NaN" //_rv := reflect.ValueOf(StringNaN) //_vv.Set(_rv) // 这样赋值会崩溃 @@ -46,12 +29,67 @@ func (self *GenericFrame[T]) assign(idx, size int, val any) { // 执行之后, 通过debug可以看到assign入参的v已经变成了"NaN" } } + values := (array).Values().([]T) // 确保只添加了1个元素 if idx < size { - self.values[idx] = v.(T) + values[idx] = v } else { - self.values = append(self.values, v.(T)) + values = append(values, v) } // 行数+1 - self.rows += 1 + //array.rows += 1 + return NDArray[T](values) +} + +// 插入一条记录 +func (self NDArray[T]) insert(idx, size int, v any) NDArray[T] { + type_ := checkoutRawType(self) + if type_ == SERIES_TYPE_BOOL { + val := AnyToBool(v) + an := assign[bool](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else if type_ == SERIES_TYPE_INT64 { + val := AnyToInt64(v) + an := assign[int64](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else if type_ == SERIES_TYPE_FLOAT32 { + val := AnyToFloat32(v) + an := assign[float32](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else if type_ == SERIES_TYPE_FLOAT64 { + val := AnyToFloat64(v) + an := assign[float64](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else { + val := AnyToString(v) + an := assign[string](type_, self, idx, size, val) + self = an.(NDArray[T]) + } + return self +} + +func (self NDArray[T]) Append(values ...any) Frame { + size := 0 + for idx, v := range values { + switch val := v.(type) { + case nil, int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64, bool, string: + // 基础类型 + self = self.insert(idx, size, val) + default: + vv := reflect.ValueOf(val) + vk := vv.Kind() + switch vk { + case reflect.Slice, reflect.Array: // 切片或数组 + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + self = self.insert(idx, size, tv) + } + case reflect.Struct: // 忽略结构体 + continue + default: + self = self.insert(idx, size, nil) + } + } + } + return self } diff --git a/stat/ndarray_convert.go b/stat/ndarray_convert.go index 8caecbd..8c120d7 100644 --- a/stat/ndarray_convert.go +++ b/stat/ndarray_convert.go @@ -7,7 +7,7 @@ import ( ) // 这里做数组统一转换 -func convert[T GenericType](s Array, v T) { +func convert[T GenericType](s Frame, v T) { values := s.Values() rawType := checkoutRawType(values) values, ok := values.([]T) @@ -15,7 +15,7 @@ func convert[T GenericType](s Array, v T) { _ = ok } -func ToFloat32(s Array) []float32 { +func ToFloat32(s Frame) []float32 { length := s.Len() defaultSlice := vek32.Repeat(Nil2Float32, length) values := s.Values() @@ -37,7 +37,7 @@ func ToFloat32(s Array) []float32 { } } -func ToFloat64(s Array) []float64 { +func ToFloat64(s Frame) []float64 { length := s.Len() defaultSlice := vek.Repeat(Nil2Float64, length) values := s.Values() @@ -59,7 +59,7 @@ func ToFloat64(s Array) []float64 { } } -func ToBool(s Array) []bool { +func ToBool(s Frame) []bool { length := s.Len() defaultSlice := make([]bool, length) values := s.Values() diff --git a/stat/ndarray_test.go b/stat/ndarray_test.go index d9e8821..93096ae 100644 --- a/stat/ndarray_test.go +++ b/stat/ndarray_test.go @@ -24,7 +24,7 @@ func TestNDArrayAll(t *testing.T) { d := []float32{1, 2, 3, 4, 5} sh1 := (*reflect.SliceHeader)(unsafe.Pointer(&d)) fmt.Printf("s : %#v\n", sh1) - var s Array + var s Frame s = NDArray[float32](d) //s3 := []float32(s) //fmt.Println(s3) @@ -50,5 +50,7 @@ func TestNDArrayAll(t *testing.T) { a4 := s.Shift(-1) fmt.Println(a4) + s = s.Append(10, 11) + fmt.Println(s) _ = s4 } diff --git a/stat/frame_xstring.go b/stat/strings.go similarity index 100% rename from stat/frame_xstring.go rename to stat/strings.go diff --git a/stat/type.go b/stat/type.go index 382953a..8c896f3 100644 --- a/stat/type.go +++ b/stat/type.go @@ -2,6 +2,7 @@ package stat import ( "math/big" + "reflect" ) // Signed is a constraint that permits any signed integer type. @@ -48,6 +49,7 @@ type Ordered interface { } // NumberOfCPUBitsRelated The number of CPU bits is related +// Deprecated: 不推荐使用 type NumberOfCPUBitsRelated interface { ~int | ~uint | ~uintptr } @@ -62,33 +64,40 @@ type BaseType interface { } // GenericType Series支持的所有类型 +// Deprecated: 不推荐使用 type GenericType interface { ~bool | ~int32 | ~int64 | ~int | ~float32 | ~float64 | ~string } // StatType 可以统计的类型 +// Deprecated: 不推荐使用 type StatType interface { ~int32 | ~int64 | ~float32 | ~float64 } type BigFloat = big.Float // 预留将来可能扩展float +// Deprecated: 不推荐使用 type Number8 interface { ~int8 | ~uint8 } +// Deprecated: 不推荐使用 type Number16 interface { ~int16 | ~uint16 } +// Deprecated: 不推荐使用 type Number32 interface { ~int32 | ~uint32 | float32 } +// Deprecated: 不推荐使用 type Number64 interface { ~int64 | ~uint64 | float64 | int | uint } +// Deprecated: 已弃用 type MoveType interface { StatType | ~bool | ~string } @@ -161,3 +170,101 @@ func valueToNumber[T Number](v any, nil2t T, bool2t func(b bool) T, string2t fun } return T(0) } + +// any转number +func __anyToNumber[T Number](v any) T { + switch val := v.(type) { + case nil: // 这个地方判断nil值 + return typeDefault[T]() + case int8: + return T(val) + case uint8: + return T(val) + case int16: + return T(val) + case uint16: + return T(val) + case int32: + return T(val) + case uint32: + return T(val) + case int64: + return T(val) + case uint64: + return T(val) + case int: + return T(val) + case uint: + return T(val) + case uintptr: + return T(val) + case float32: + return T(val) + case float64: + return T(val) + case bool: + return T(bool2Int(val)) + case string: + vt := ParseFloat64(val, v) + if Float64IsNaN(vt) { + td := T(0) + //rawType :=checkoutRawType(td) + if !reflect.ValueOf(td).CanFloat() { + return td + } + } + return T(vt) + default: + panic(Throw(v)) + } + return T(0) +} + +// any转其它类型 +// 支持3个方向: any到number, any到bool, any到string +func anyToGeneric[T BaseType](v any) T { + var d any + var to T + switch any(to).(type) { + case int8: + d = __anyToNumber[int8](v) + case uint8: + d = __anyToNumber[uint8](v) + case int16: + d = __anyToNumber[int16](v) + case uint16: + d = __anyToNumber[uint16](v) + case int32: + d = __anyToNumber[int32](v) + case uint32: + d = __anyToNumber[uint32](v) + case int64: + d = __anyToNumber[int64](v) + case uint64: + d = __anyToNumber[uint64](v) + case int: + d = __anyToNumber[int](v) + case uint: + d = __anyToNumber[uint](v) + case uintptr: + d = __anyToNumber[uintptr](v) + case float32: + d = __anyToNumber[float32](v) + case float64: + d = __anyToNumber[float64](v) + case bool: + d = AnyToBool(v) + case string: + d = AnyToString(v) + case []int8, []uint8, []int16, []uint16, []int32, []uint32, []int64, []uint64, []int, []uint, []uintptr, []float32, []float64: + // 什么也不处理, 给个默认值 + d = to + case []bool: + d = to + case []string: + d = to + default: + panic(Throw(v)) + } + return d.(T) +} diff --git a/stat/type_test.go b/stat/type_test.go index 84493a8..345b47b 100644 --- a/stat/type_test.go +++ b/stat/type_test.go @@ -23,3 +23,16 @@ func Test_typeDefault(t *testing.T) { fmt.Println(typeDefault[int]()) fmt.Println(typeDefault[uint]()) } + +func Test_Number(t *testing.T) { + +} + +func Test_anyToGeneric(t *testing.T) { + fmt.Println(anyToGeneric[int](true)) + fmt.Println(anyToGeneric[int]("true")) + fmt.Println(anyToGeneric[int]("false")) + fmt.Println(anyToGeneric[int]("aa")) + fmt.Println(anyToGeneric[int]("tt")) + fmt.Println(anyToGeneric[int](3.00)) +} -- Gitee