diff --git a/builtin_test.go b/builtin_test.go index 543cb060677af60f772add15ffc84bcfcfa102cf..6c208502ce29546c8164dea98b0b5e42f84ae0d2 100644 --- a/builtin_test.go +++ b/builtin_test.go @@ -2,6 +2,7 @@ package pandas import ( "fmt" + "gitee.com/quant1x/pandas/stat" "testing" ) @@ -80,7 +81,7 @@ func TestPoint(t *testing.T) { p1 = &a fmt.Printf("*int = nil, result=%v\n", isPoint(p1)) - var p2 *BigFloat + var p2 *stat.BigFloat fmt.Printf("*BigFloat = nil, result=%v\n", isPoint(p2)) } diff --git a/dataframe_csv_test.go b/dataframe_csv_test.go index b6faa2f1228f1c336a6ea871e1e0d7b4bf43a18b..745f39fd9ca08bad66c8d29d00216939d849c4af 100644 --- a/dataframe_csv_test.go +++ b/dataframe_csv_test.go @@ -34,7 +34,7 @@ Spain,2012-02-01,66,555.42,00241,1.23 // //closes := df.Col("d") //ma5 := closes.RollingV1(5).Mean() - //dframe.NewSeries(closes, dframe.Float, "") + //dframe.NewSeries(closes, dframe.Floats, "") //fmt.Println(ma5) d := df.Col("d") fmt.Println(d) diff --git a/dataframe_indexes.go b/dataframe_indexes.go index 160d4f0943f13e541661c0aec941beec51acfbfb..6030cdfff44260402f9cdcd019dd21d1b8f8d8e0 100644 --- a/dataframe_indexes.go +++ b/dataframe_indexes.go @@ -45,7 +45,7 @@ func parseSelectIndexes(l int, indexes SelectIndexes, colnames []string) ([]int, // //} // switch s.Type() { // case SERIES_TYPE_INT64: - // return s.AsInt() + // return s.Ints() // case series.Bool: // bools, err := s.Bool() // if err != nil { diff --git a/dataframe_remove.go b/dataframe_remove.go index 095d6f47ad19f7ee4b89e29e51410a36601bddb4..107c5af54d639c78c7b38030fc560a1bec93d81a 100644 --- a/dataframe_remove.go +++ b/dataframe_remove.go @@ -1,7 +1,9 @@ package pandas +import "gitee.com/quant1x/pandas/stat" + // Remove 删除一段范围内的记录 -func (self DataFrame) Remove(p ScopeLimit) DataFrame { +func (self DataFrame) Remove(p stat.ScopeLimit) DataFrame { rowLen := self.Nrow() start, end, err := p.Limits(rowLen) if err != nil { diff --git a/dataframe_remove_test.go b/dataframe_remove_test.go index dac89974661e576e5457779d0aae65a5dea24dc5..222f364b143c25d514035adcdc2a42f2e316d0fd 100644 --- a/dataframe_remove_test.go +++ b/dataframe_remove_test.go @@ -2,6 +2,7 @@ package pandas import ( "fmt" + "gitee.com/quant1x/pandas/stat" "testing" ) @@ -23,7 +24,7 @@ func TestDataFrame_Remove(t *testing.T) { s_e := GenericSeries[string]("x", "a0", "a1", "a2", "a3", "a4") df2 := df1.Join(s_e) fmt.Println(df2) - r := RangeFinite(3, 3) + r := stat.RangeFinite(3, 3) df3 := df2.Remove(r) fmt.Println(df3) diff --git a/dataframe_subset.go b/dataframe_subset.go index 276b90b1a5744ecb793698a6553700f92286adf8..60089324dce9ba349577244b36987f00b33efb1b 100644 --- a/dataframe_subset.go +++ b/dataframe_subset.go @@ -1,5 +1,7 @@ package pandas +import "gitee.com/quant1x/pandas/stat" + // Subset returns a subset of the rows of the original DataFrame based on the // Series subsetting indexes. func (self DataFrame) Subset(start, end int) DataFrame { @@ -23,7 +25,7 @@ func (self DataFrame) Subset(start, end int) DataFrame { } // Select 选择一段记录 -func (self DataFrame) SelectRows(p ScopeLimit) DataFrame { +func (self DataFrame) SelectRows(p stat.ScopeLimit) DataFrame { columns := []Series{} for i := range self.columns { columns = append(columns, self.columns[i].Select(p)) diff --git a/formula/ema.go b/formula/ema.go index 0b844a3e473c73614e587742b322c18f82edaa42..8fae77a016871bbd26df5fc0bdf69b31da4e3157 100644 --- a/formula/ema.go +++ b/formula/ema.go @@ -43,7 +43,7 @@ func EMA_v2(S pandas.Series, N any) any { X = float32(v) case pandas.Series: vs := v.Values() - fs := pandas.SliceToFloat32(vs) + fs := stat.SliceToFloat32(vs) X = fs[len(fs)-1] default: panic(exception.New(1, "error window")) @@ -60,7 +60,7 @@ func EMA_v0(S pandas.Series, N any) any { X = float32(v) case pandas.Series: vs := v.Values() - fs := pandas.SliceToFloat32(vs) + fs := stat.SliceToFloat32(vs) X = fs[len(fs)-1] default: panic(exception.New(1, "error window")) diff --git a/formula/ref.go b/formula/ref.go index ff6fed81613357addc5d4bf046432237aa0e87f2..3e201c29b3dfac21efd2788221eed549230f6c47 100644 --- a/formula/ref.go +++ b/formula/ref.go @@ -14,7 +14,7 @@ func REF(S pandas.Series, N any) any { X = stat.Repeat[float32](float32(v), S.Len()) case pandas.Series: vs := v.Values() - X = pandas.SliceToFloat32(vs) + X = stat.SliceToFloat32(vs) X = stat.Align(X, pandas.Nil2Float32, S.Len()) default: panic(exception.New(1, "error window")) diff --git a/formula/sma.go b/formula/sma.go index a836711840b0b93c52aac854656785fb98993876..ea982e0b78a8e931af9f8ca10c0ace0695f615f0 100644 --- a/formula/sma.go +++ b/formula/sma.go @@ -17,7 +17,7 @@ func SMA(S pandas.Series, N any, M int) any { X = float32(v) case pandas.Series: vs := v.Values() - fs := pandas.SliceToFloat32(vs) + fs := stat.SliceToFloat32(vs) X = fs[len(fs)-1] default: panic(exception.New(1, "error window")) @@ -37,7 +37,7 @@ func SMA_v5(S pandas.Series, N any, M int) any { X = stat.Repeat[float32](float32(v), S.Len()) case pandas.Series: vs := v.Values() - X = pandas.SliceToFloat32(vs) + X = stat.SliceToFloat32(vs) X = stat.Align(X, pandas.Nil2Float32, S.Len()) default: panic(exception.New(1, "error window")) @@ -64,7 +64,7 @@ func SMA_v4(S pandas.Series, N any, M int) any { X = stat.Repeat[float32](float32(v), S.Len()) case pandas.Series: vs := v.Values() - X = pandas.SliceToFloat32(vs) + X = stat.SliceToFloat32(vs) X = stat.Align(X, pandas.Nil2Float32, S.Len()) default: panic(exception.New(1, "error window")) diff --git a/generic.go b/generic.go index d801ab4625901030188b535c14cd49432d744b21..90c5605ff757ea92e3e140fa0f1ca6132c00b9d9 100644 --- a/generic.go +++ b/generic.go @@ -7,21 +7,16 @@ import ( "sync" ) -// GenericType Series支持的所有类型 -type GenericType interface { - ~bool | ~int64 | ~float32 | ~float64 | ~string -} - // NDFrame 这里本意是想做一个父类, 实际的效果是一个抽象类 type NDFrame struct { - lock sync.RWMutex // 读写锁 - formatter StringFormatter // 字符串格式化工具 - name string // 帧名称 - type_ Type // values元素类型 - copy_ bool // 是否副本 - nilCount int // nil和nan的元素有多少, 这种统计在bool和int64类型中不会大于0, 只对float64及string有效 - rows int // 行数 - values any // 只能是一个一维slice, 在所有的运算中, values强制转换成float64切片 + lock sync.RWMutex // 读写锁 + formatter stat.StringFormatter // 字符串格式化工具 + name string // 帧名称 + type_ Type // values元素类型 + copy_ bool // 是否副本 + nilCount int // nil和nan的元素有多少, 这种统计在bool和int64类型中不会大于0, 只对float64及string有效 + rows int // 行数 + values any // 只能是一个一维slice, 在所有的运算中, values强制转换成float64切片 } @@ -36,7 +31,7 @@ type NDFrame struct { //copy : bool, default False //""" -func NewNDFrame[E GenericType](name string, rows ...E) *NDFrame { +func NewNDFrame[E stat.GenericType](name string, rows ...E) *NDFrame { frame := NDFrame{ formatter: stat.DefaultFormatter, name: name, @@ -57,7 +52,7 @@ func NewNDFrame[E GenericType](name string, rows ...E) *NDFrame { } // 赋值 -func assign[T GenericType](frame *NDFrame, idx, size int, v T) { +func assign[T stat.GenericType](frame *NDFrame, idx, size int, v T) { // 检测类型 if frame.type_ == SERIES_TYPE_INVAILD { _type, _ := detectTypes(v) @@ -101,7 +96,7 @@ func assign[T GenericType](frame *NDFrame, idx, size int, v T) { } // Repeat 重复生成a -func Repeat[T GenericType](a T, n int) []T { +func Repeat[T stat.GenericType](a T, n int) []T { dst := make([]T, n) for i := 0; i < n; i++ { dst[i] = a @@ -110,7 +105,7 @@ func Repeat[T GenericType](a T, n int) []T { } // Repeat2 重复生成a -func Repeat2[T GenericType](dst []T, a T, n int) []T { +func Repeat2[T stat.GenericType](dst []T, a T, n int) []T { for i := 0; i < n; i++ { dst[i] = a } @@ -152,7 +147,7 @@ func (self *NDFrame) NaN() any { } func (self *NDFrame) Float() []float32 { - return ToFloat32(self) + return stat.SliceToFloat32(self.values) } // DTypes 计算以这个函数为主 diff --git a/generic_fillna.go b/generic_fillna.go index ce57ce395faf79a8b91d75ab08cd3c8aa7cf2c98..e2a89be0aad46b19c01b7c9ce19efa38d3e75ab4 100644 --- a/generic_fillna.go +++ b/generic_fillna.go @@ -5,7 +5,7 @@ import "gitee.com/quant1x/pandas/stat" // FillNa 填充NaN的元素为v // inplace为真是修改series元素的值 // 如果v和Values()返回值的slice类型不一致就会panic -func FillNa[T GenericType](s *NDFrame, v T, inplace bool) *NDFrame { +func FillNa[T stat.GenericType](s *NDFrame, v T, inplace bool) *NDFrame { values := s.Values() switch rows := values.(type) { case []string: diff --git a/generic_number.go b/generic_number.go deleted file mode 100644 index b44187e2234988bf9c158fd232db8ef4d90bdd04..0000000000000000000000000000000000000000 --- a/generic_number.go +++ /dev/null @@ -1,224 +0,0 @@ -package pandas - -import ( - "gitee.com/quant1x/pandas/stat" - "math/big" -) - -type BigFloat = big.Float // 预留将来可能扩展float - -type Number8 interface { - ~int8 | ~uint8 -} - -type Number16 interface { - ~int16 | ~uint16 -} - -type Number32 interface { - ~int32 | ~uint32 | float32 -} - -type Number64 interface { - ~int64 | ~uint64 | float64 | int | uint -} - -// NumberOfCPUBitsRelated The number of CPU bits is related -type NumberOfCPUBitsRelated interface { - ~int | ~uint | ~uintptr -} - -type Integer interface { - Number8 | Number16 | Number32 | Number64 -} - -// Number int和uint的长度取决于CPU是多少位 -type Number interface { - Integer | Float -} - -//type Number interface { -// constraints.Float | constraints.Integer -//} - -// Signed is a constraint that permits any signed integer type. -// If future releases of Go add new predeclared signed integer types, -// this constraint will be modified to include them. -type Signed interface { - ~int | ~int8 | ~int16 | ~int32 | ~int64 -} - -// Unsigned is a constraint that permits any unsigned integer type. -// If future releases of Go add new predeclared unsigned integer types, -// this constraint will be modified to include them. -// TODO:~uintptr应该是没有应用场景 -type Unsigned interface { - ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr -} - -// Integer_old Integer is a constraint that permits any integer type. -// If future releases of Go add new predeclared integer types, -// this constraint will be modified to include them. -type Integer_old interface { - Signed | Unsigned -} - -// Float is a constraint that permits any floating-point type. -// If future releases of Go add new predeclared floating-point types, -// this constraint will be modified to include them. -type Float interface { - ~float32 | ~float64 -} - -// Complex is a constraint that permits any complex numeric type. -// If future releases of Go add new predeclared complex numeric types, -// this constraint will be modified to include them. -type Complex interface { - ~complex64 | ~complex128 -} - -// Ordered is a constraint that permits any ordered type: any type -// that supports the operators < <= >= >. -// If future releases of Go add new ordered types, -// this constraint will be modified to include them. -type Ordered interface { - Integer | Float | ~string -} - -//const ( -// True2Float32 float32 = float32(1) // true转float32 -// False2Float32 float32 = float32(0) // false转float32 -// StringTrue2Float32 float32 = float32(1) // 字符串true转float32 -// StringFalse2Float32 float32 = float32(0) // 字符串false转float32 -//) - -// Mean gonum.org/v1/gonum/stat不支持整型, 每次都要转换有点难受啊 -func Mean[T Number](x []T) float64 { - d := numberToFloat64(x) - s := stat.Mean(d) - return float64(s) -} - -// any转number -func valueToNumber[T Number](v any, nil2t T, bool2t func(b bool) T, string2t func(s string, v any) T) T { - switch val := v.(type) { - case nil: // 这个地方判断nil值 - return nil2t - case int8: - return T(val) - case uint8: - return T(val) - case int16: - return T(val) - case uint16: - return T(val) - case int32: - return T(val) - case uint32: - return T(val) - case int64: - return T(val) - case uint64: - return T(val) - case int: - return T(val) - case uint: - return T(val) - case float32: - return T(val) - case float64: - return T(val) - case bool: - return bool2t(val) - case string: - return string2t(val, v) - } - return T(0) -} - -// 指针转number -func pointToNumber[T Number](v any, nil2t T, bool2t func(b bool) T, string2t func(s string, v any) T) T { - switch val := v.(type) { - case *int8: - if val == nil { - return nil2t - } - return T(*val) - case *uint8: - if val == nil { - return nil2t - } - return T(*val) - case *int16: - if val == nil { - return nil2t - } - return T(*val) - case *uint16: - if val == nil { - return nil2t - } - return T(*val) - case *int32: - if val == nil { - return nil2t - } - return T(*val) - case *uint32: - if val == nil { - return nil2t - } - return T(*val) - case *int64: - if val == nil { - return nil2t - } - return T(*val) - case *uint64: - if val == nil { - return nil2t - } - return T(*val) - case *int: - if val == nil { - return nil2t - } - return T(*val) - case *uint: - if val == nil { - return nil2t - } - return T(*val) - case *float32: - if val == nil { - return nil2t - } - return T(*val) - case *float64: - if val == nil { - return nil2t - } - return T(*val) - case *bool: - if val == nil { - return nil2t - } - return bool2t(*val) - case *string: - if val == nil { - return nil2t - } - return string2t(*val, v) - } - return T(0) -} - -//func anyToNumber(v any) int { -// switch val := v.(type) { -// case nil, int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64, bool, string: -// // 基础类型 -// series_append(&frame, idx, size, val) -// default: -// } -// return 0 -//} diff --git a/generic_range.go b/generic_range.go index bad504039fcd8cfd5ecb2688bc9542cfe9d45415..118b97068df7860f25825790f16be18ce6566def 100644 --- a/generic_range.go +++ b/generic_range.go @@ -1,6 +1,7 @@ package pandas import ( + "gitee.com/quant1x/pandas/stat" gc "github.com/huandu/go-clone" "reflect" ) @@ -115,7 +116,7 @@ func (self *NDFrame) oldSubset(start, end int, opt ...any) Series { } // Select 选取一段记录 -func (self *NDFrame) Select(r ScopeLimit) Series { +func (self *NDFrame) Select(r stat.ScopeLimit) Series { start, end, err := r.Limits(self.Len()) if err != nil { return nil diff --git a/generic_ref.go b/generic_ref.go index ec874336824bef87c67d0a374950f7d02dc649a5..b631f2df411e782c56cab433560a4685550b7d6f 100644 --- a/generic_ref.go +++ b/generic_ref.go @@ -14,7 +14,7 @@ func (self *NDFrame) Ref(param any) (s Series) { N = stat.Align(v, Nil2Float32, self.Len()) case Series: vs := v.Values() - N = SliceToFloat32(vs) + N = stat.SliceToFloat32(vs) N = stat.Align(N, Nil2Float32, self.Len()) default: panic(exception.New(1, "error window")) diff --git a/generic_shift.go b/generic_shift.go index d137fcd97e61e66136d3f09f25914c1e306f496a..5ed85b3e58c3e83698e03c9be3f7c17cbbc2bc97 100644 --- a/generic_shift.go +++ b/generic_shift.go @@ -6,7 +6,7 @@ import ( ) // Shift series切片, 使用可选的时间频率按所需的周期数移动索引 -func Shift[T GenericType](s *Series, periods int, cbNan func() T) Series { +func Shift[T stat.GenericType](s *Series, periods int, cbNan func() T) Series { var d Series d = clone(*s).(Series) if periods == 0 { @@ -43,7 +43,7 @@ func Shift[T GenericType](s *Series, periods int, cbNan func() T) Series { } // Shift2 series切片, 使用可选的时间频率按所需的周期数移动索引 -func Shift2[T GenericType](s *Series, N []float32, cbNan func() T) Series { +func Shift2[T stat.GenericType](s *Series, N []float32, cbNan func() T) Series { var d Series d = clone(*s).(Series) if len(N) == 0 { diff --git a/generic_test.go b/generic_test.go index 381e43ad64ee96e8cead9e8fadffb44527a8f950..a916f1996b85b861bbe1355c3b8b8fe5802adb3f 100644 --- a/generic_test.go +++ b/generic_test.go @@ -2,6 +2,7 @@ package pandas import ( "fmt" + "gitee.com/quant1x/pandas/stat" "testing" ) @@ -34,7 +35,7 @@ func TestNDFrameNew(t *testing.T) { nd1 := NewNDFrame[float64]("x", d1...) fmt.Println(nd1) - r := RangeFinite(-1) + r := stat.RangeFinite(-1) ndr1 := nd1.Select(r) fmt.Println(ndr1.Values()) diff --git a/generic_type.go b/generic_type.go index 27bc2d73b76ece4d41aab06833cb8b3fb24fea3b..aa5128ae5aad9155ea4539cfe27eb28a82262080 100644 --- a/generic_type.go +++ b/generic_type.go @@ -159,7 +159,7 @@ func parseType(s string) (Type, error) { return SERIES_TYPE_INVAILD, fmt.Errorf("type (%s) is not supported", s) } -func detectTypes[T GenericType](v T) (Type, any) { +func detectTypes[T stat.GenericType](v T) (Type, any) { var _type = SERIES_TYPE_STRING vv := reflect.ValueOf(v) vk := vv.Kind() diff --git a/num/Adder.go b/num/Adder.go index 0a3cc9f9678e8cdc3d72bb0a5d454a279a20ae43..9f2414702342cdc07b94cc245d0399a2725a960e 100644 --- a/num/Adder.go +++ b/num/Adder.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "fmt" diff --git a/num/array.go b/num/array.go index 3964baf76c84ba6ba06923cdfaf734bbc66a21ed..063ab1554e9b4d56dab146b91f4fe4d4627068d2 100644 --- a/num/array.go +++ b/num/array.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "errors" diff --git a/num/array_test.go b/num/array_test.go index f04dd3d7ff07e9d7d70708f24be9fca52a0d965e..b161585ca2915eaf42698e2d0a96841511058c49 100644 --- a/num/array_test.go +++ b/num/array_test.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "fmt" diff --git a/num/compare.go b/num/compare.go index 3f5382ad54544c57b8545f5200e8f22aac792c48..287b86374b2710a6fd6e6059e31d4247dbbc27d4 100644 --- a/num/compare.go +++ b/num/compare.go @@ -1,4 +1,4 @@ -package lambda +package num import ( "errors" diff --git a/num/equal.go b/num/equal.go index 8cfd7b2f58fd065b467e0b5b210488dcbfdc7e25..64f13139516ec746db5a39d878b3da07b7c02b8a 100644 --- a/num/equal.go +++ b/num/equal.go @@ -1,4 +1,4 @@ -package lambda +package num type Equal interface { Equals(obj interface{}) bool diff --git a/series.go b/series.go index 0d026c4925d2dd2915b64dfe074a8b10d8c23899..a5b30abdf00cff082a1d0519a3f6a4a8fcf71d61 100644 --- a/series.go +++ b/series.go @@ -21,11 +21,6 @@ const ( SERIES_TYPE_STRING = reflect.String // string ) -// StringFormatter is used to convert a value -// into a string. Val can be nil or the concrete -// type stored by the series. -type StringFormatter func(val interface{}) string - type Series interface { // Name 取得series名称 Name() string @@ -84,7 +79,7 @@ type Series interface { // Min 找出最小值 Min() any // Select 选取一段记录 - Select(r ScopeLimit) Series + Select(r stat.ScopeLimit) Series // Append 增加一批记录 Append(values ...any) // Apply 接受一个回调函数 @@ -141,7 +136,7 @@ func NewSeries(t Type, name string, vals any) Series { //} // GenericSeries 泛型方法, 构造序列, 比其它方式对类型的统一性要求更严格 -func GenericSeries[T GenericType](name string, values ...T) Series { +func GenericSeries[T stat.GenericType](name string, values ...T) Series { // 第一遍, 确定类型, 找到第一个非nil的值 var _type Type = SERIES_TYPE_STRING for _, v := range values { diff --git a/series_int64.go b/series_int64.go index 1a10cab04f3e01c30d06413e8c8273883955cb65..2c2f2e3242089701f07b91e9f8d877c01eac517e 100644 --- a/series_int64.go +++ b/series_int64.go @@ -162,8 +162,8 @@ func (self *SeriesInt64) Mean() float64 { if self.Len() < 1 { return NaN() } - stdDev := Mean(self.Data) - return stdDev + stdDev := stat.Mean(self.Data) + return float64(stdDev) } func (self *SeriesInt64) StdDev() float64 { diff --git a/slice_float32.go b/slice_float32.go deleted file mode 100644 index b9d1bb0422f5a393f71d90d40d72a62fbc1c28ac..0000000000000000000000000000000000000000 --- a/slice_float32.go +++ /dev/null @@ -1,67 +0,0 @@ -package pandas - -import "gitee.com/quant1x/pandas/stat" - -func slice_any_to_float32[T Number](s []T) []float32 { - count := len(s) - if count == 0 { - return []float32{} - } - d := make([]float32, count) - for idx, iv := range s { - // 强制转换 - d[idx] = float32(iv) - } - return d -} - -// SliceToFloat32 any输入只能是一维slice或者数组 -func SliceToFloat32(v any) []float32 { - var vs []float32 - switch values := v.(type) { - case []int8: - return slice_any_to_float32(values) - case []uint8: - return slice_any_to_float32(values) - case []int16: - return slice_any_to_float32(values) - case []uint16: - return slice_any_to_float32(values) - case []int32: - return slice_any_to_float32(values) - case []uint32: - return slice_any_to_float32(values) - case []int64: - return slice_any_to_float32(values) - case []uint64: - return slice_any_to_float32(values) - case []int: - return slice_any_to_float32(values) - case []uint: - return slice_any_to_float32(values) - case []float32: - // TODO:直接返回会不会有问题 - return values - case []float64: - return slice_any_to_float32(values) - case []bool: - count := len(values) - if count == 0 { - return []float32{} - } - vs = make([]float32, count) - for idx, iv := range values { - vs[idx] = boolToFloat32(iv) - } - case []string: - count := len(values) - if count == 0 { - return []float32{} - } - vs = make([]float32, count) - for idx, iv := range values { - vs[idx] = float32(stat.AnyToFloat64(iv)) - } - } - return []float32{} -} diff --git a/slice_float64.go b/slice_float64.go deleted file mode 100644 index 5b277a0a8df8420f4937fbe3f04b4c26543fc202..0000000000000000000000000000000000000000 --- a/slice_float64.go +++ /dev/null @@ -1,49 +0,0 @@ -package pandas - -import "gitee.com/quant1x/pandas/stat" - -func slice_any_to_float64[T Number](s []T) []float64 { - count := len(s) - if count == 0 { - return []float64{} - } - d := make([]float64, count) - for idx, iv := range s { - d[idx] = float64(iv) - } - return d -} - -// any输入只能是一维slice或者数组 -func numberToFloat64(v any) []float64 { - var vs []float64 - switch values := v.(type) { - case []float64: - return values - case []int64: - return slice_any_to_float64(values) - case []int32: - return slice_any_to_float64(values) - case []int: - return slice_any_to_float64(values) - case []bool: - count := len(values) - if count == 0 { - return []float64{} - } - vs = make([]float64, count) - for idx, iv := range values { - vs[idx] = stat.AnyToFloat64(iv) - } - case []string: - count := len(values) - if count == 0 { - return []float64{} - } - vs = make([]float64, count) - for idx, iv := range values { - vs[idx] = stat.AnyToFloat64(iv) - } - } - return vs -} diff --git a/stat/abs.go b/stat/abs.go index 70c84a8c8273cef85d48a6f58aacba96587bf56a..92543410af065964f5fb97cd7d6e2cd86302df04 100644 --- a/stat/abs.go +++ b/stat/abs.go @@ -6,7 +6,7 @@ import ( ) // Abs 泛型绝对值 -func Abs[T Number](x []T) []T { +func Abs[T BaseType](x []T) []T { var d any var v any = x switch xv := v.(type) { @@ -24,8 +24,8 @@ func Abs[T Number](x []T) []T { d = __abs_go(xv) case []int64: d = __abs_go(xv) - //case []uint, []uint8, []uint16, []uint32, []uint64, []uintptr: - // d = xv + case []uint, []uint8, []uint16, []uint32, []uint64, []uintptr: + d = xv default: // 其它类型原样返回 d = xv diff --git a/stat/array.go b/stat/array.go deleted file mode 100644 index dc4139bbaac28b7a7a856769107cc6ae12063aa7..0000000000000000000000000000000000000000 --- a/stat/array.go +++ /dev/null @@ -1,66 +0,0 @@ -package stat - -type NDArray []DType - -type Array[T Number] []T - -func (a Array[T]) Len() int { - return len(a) -} - -//type FloatX interface { -// ~float64 | []float64 | int64 | []int64 | int | []int | int32 | []int32 | [][]float64 | [][]int | [][]int64 | [][]int32 -//} -// -//// IsVector checks if a variable is a slice -//func IsVector[T FloatX](obj T) bool { -// switch reflect.TypeOf(obj).Kind() { -// case reflect.Slice: -// return true -// default: -// return false -// } -//} -// -//// AsSlice converts a variable to a slice -//func AsSlice[T FloatX](obj T) []float64 { -// switch reflect.TypeOf(obj).Kind() { -// case reflect.Slice: -// return any(obj).([]float64) -// default: -// return []float64{any(obj).(float64)} -// } -//} -// -//// asFloat64 converts a variable to a float64 -//func asFloat64[T FloatX](obj T) float64 { -// switch reflect.TypeOf(obj).Kind() { -// case reflect.Slice: -// return any(obj).([]float64)[0] -// case reflect.Int: -// return float64(any(obj).(int)) -// case reflect.Int64: -// return float64(any(obj).(int)) -// case reflect.Int32: -// return float64(any(obj).(int)) -// default: -// return any(obj).(float64) -// } -//} -// -//// Round rounds a slice of numbers to a given decimal -//func Round[T FloatX](element T, decimals int) any { -// if IsVector(element) { -// var roundedArray []float64 -// array := AsSlice(element) -// for i := range array { -// roundedNum := math.Round(array[i]*math.Pow(10, float64(decimals))) / math.Pow(10, float64(decimals)) -// roundedArray = append(roundedArray, roundedNum) -// } -// return roundedArray -// } else { -// number := asFloat64(element) -// return math.Round(number*math.Pow(10, float64(decimals))) / math.Pow(10, float64(decimals)) -// } -// -//} diff --git a/stat/array_test.go b/stat/array_test.go deleted file mode 100644 index be63f3075bfe08de781e7854b47461f0890ea9dc..0000000000000000000000000000000000000000 --- a/stat/array_test.go +++ /dev/null @@ -1,13 +0,0 @@ -package stat - -import ( - "fmt" - "testing" -) - -func TestArray_Len(t *testing.T) { - f1 := []float64{1, 2, 3, 4, 5} - a1 := Array[float64](f1) - fmt.Println(a1) - fmt.Println(a1.Len()) -} diff --git a/stat/diff.go b/stat/diff.go index d8102dde2439d3c23cc31e7d415084ff147205ff..24d0c084495ae70edf388c4785e42c88c48307f7 100644 --- a/stat/diff.go +++ b/stat/diff.go @@ -30,3 +30,30 @@ func Diff[T Number](s []T, param any) []T { return d } + +func Diff2[T BaseType](s []T, param any) []T { + var d any + switch vs := any(s).(type) { + case []float32: + d = Diff(vs, param) + case []float64: + d = Diff(vs, param) + case []int: + d = Diff(vs, param) + case []int8: + d = Diff(vs, param) + case []int16: + d = Diff(vs, param) + case []int32: + d = Diff(vs, param) + case []int64: + d = Diff(vs, param) + //case []uint, []uint8, []uint16, []uint32, []uint64, []uintptr: + // d = xv + default: + // 其它类型原样返回 + panic(Throw(any(s))) + } + + return d.([]T) +} diff --git a/stat/diff_test.go b/stat/diff_test.go index fc6e21956745f721d508f5e9fa3d112e301eba7c..9650631b262aec402e4d1832fa2f3003cbcba38b 100644 --- a/stat/diff_test.go +++ b/stat/diff_test.go @@ -19,3 +19,9 @@ func TestDiff(t *testing.T) { r2 := Diff(d1, s1) fmt.Println("序列化结果:", r2) } + +func TestDiff2(t *testing.T) { + d1 := []float64{1, 2, 3, 4, 5} + r1 := Diff2(d1, 1) + fmt.Println(r1) +} diff --git a/stat/errors.go b/stat/errors.go index 0dd79882a7d3a6e5a19ee5976b66d61758b9e6b9..92f4252ede495560d7789dbcbb45720061a194ac 100644 --- a/stat/errors.go +++ b/stat/errors.go @@ -1,6 +1,9 @@ package stat -import "gitee.com/quant1x/pandas/exception" +import ( + "gitee.com/quant1x/pandas/exception" + "reflect" +) const ( errorTypeBase = 0 @@ -10,3 +13,8 @@ var ( // ErrUnsupportedType 不支持的类型 ErrUnsupportedType = exception.New(errorTypeBase+0, "Unsupported type") ) + +func Throw(tv any) *exception.Exception { + typeName := reflect.TypeOf(tv).String() + return exception.New(errorTypeBase+1, "Unsupported type: "+typeName) +} diff --git a/stat/fillna.go b/stat/fillna.go index f62d150af71aafca44c6f211ff939b2e1b983376..14e2ef9d316609275274dfcb3314497dd1fe8d88 100644 --- a/stat/fillna.go +++ b/stat/fillna.go @@ -39,7 +39,7 @@ import "golang.org/x/exp/slices" // Returns // ------- // []T or None -func Fill[T Number | ~string](v []T, d T, args ...any) (rows []T) { +func Fill[T BaseType](v []T, d T, args ...any) (rows []T) { // 默认不替换 var __optInplace = false if len(args) > 0 { @@ -84,7 +84,7 @@ func Fill[T Number | ~string](v []T, d T, args ...any) (rows []T) { } // FillNa NaN填充默认值 -func FillNa[T Number | ~string](v []T, args ...any) []T { +func FillNa[T BaseType](x []T, v any, args ...any) []T { // 默认不copy var __optInplace = false if len(args) > 0 { @@ -95,9 +95,9 @@ func FillNa[T Number | ~string](v []T, args ...any) []T { } var dest []T if __optInplace { - dest = v + dest = x } else { - dest = slices.Clone(v) + dest = slices.Clone(x) } var values any = dest switch rows := values.(type) { diff --git a/stat/frame.go b/stat/frame.go index 8aa6399ffec2cf2e7b92cdcefdcbeca4ee9eebf3..77b8130aa4ceb21346987ea812f8fd5f53720bda 100644 --- a/stat/frame.go +++ b/stat/frame.go @@ -1,107 +1,82 @@ package stat -import ( - "reflect" - "sync" -) - -type Frame[T GenericType] interface { +type Frame interface { // Name 取得series名称 Name() string - // ReName renames the series. - ReName(name string) + // Rename renames the series. + Rename(name string) + // Type returns the type of data the series holds. // 返回series的数据类型 Type() Type - // Len 获得行数 - Len() int // Values 获得全部数据集 - Values() []T // 如果确定类型, 后面可能无法自动调整 -} - -type GenericFrame[T GenericType] struct { - lock sync.RWMutex // 读写锁 - formatter StringFormatter // 字符串格式化工具 - name string // 帧名称 - type_ Type // values元素类型 - nilCount int // nil和nan的元素有多少, 这种统计在bool和int64类型中不会大于0, 只对float64及string有效 - rows int // 行数 - values []T // 只能是一个一维slice, 在所有的运算中, values强制转换成float64切片 -} + Values() any -func NewFrame[T GenericType](name string, values ...any) Frame[T] { - frame := GenericFrame[T]{ - formatter: DefaultFormatter, - name: name, - type_: reflect.Invalid, - nilCount: 0, - rows: 0, - values: nil, - } - // 确定泛型的具体类型, 以便后面创建slice - kind := checkoutRawType(&frame) - if kind == reflect.Invalid { - return &frame - } - frame.type_ = kind - if frame.type_ == SERIES_TYPE_BOOL { - // bool - frame.values = reflect.MakeSlice(typeBool, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_INT64 { - // int64 - frame.values = reflect.MakeSlice(typeInt64, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_FLOAT32 { - // float32 - frame.values = reflect.MakeSlice(typeFloat32, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_FLOAT64 { - // float64 - frame.values = reflect.MakeSlice(typeFloat64, 0, 0).Interface().([]T) - } else { - // string, 字符串最后容错使用 - frame.values = reflect.MakeSlice(typeString, 0, 0).Interface().([]T) - } - size := 0 - for idx, v := range values { - vv := reflect.ValueOf(v) - vk := vv.Kind() - switch vk { - case reflect.Invalid: // {interface} nil - frame.assign(idx, size, nil) - case reflect.Slice, reflect.Array: // 切片或者数组 - for i := 0; i < vv.Len(); i++ { - tv := vv.Index(i).Interface() - frame.assign(idx, size, tv) - } - default: - // 默认为基础数据类型 - tv := vv.Interface() - frame.assign(idx, size, tv) - } - } - return &frame -} + // NaN 输出默认的NaN + NaN() any + // Floats 强制转成[]float32 + Floats() []float32 + // DTypes 强制转[]stat.DType + DTypes() []DType + // Ints 强制转换成整型 + Ints() []Int -func (self *GenericFrame[T]) Name() string { - //TODO implement me - panic("implement me") -} + // sort.Interface -func (self *GenericFrame[T]) ReName(name string) { - //TODO implement me - panic("implement me") -} + // Len 获得行数, 实现sort.Interface接口的获取元素数量方法 + Len() int + // Less 实现sort.Interface接口的比较元素方法 + Less(i, j int) bool + // Swap 实现sort.Interface接口的交换元素方法 + Swap(i, j int) -func (self *GenericFrame[T]) Type() Type { - //TODO implement me - panic("implement me") -} + // Empty returns an empty Series of the same type + Empty() Frame + // Copy 复制 + Copy() Frame + // Records returns the elements of a Series as a []string + Records() []string + // Subset 获取子集 + Subset(start, end int, opt ...any) Frame + // Repeat elements of an array. + Repeat(x any, repeats int) Frame + // Shift index by desired number of periods with an optional time freq. + // 使用可选的时间频率按所需的周期数移动索引. + Shift(periods int) Frame + // Rolling 序列化版本 + //Rolling(param any) RollingAndExpandingMixin -func (self *GenericFrame[T]) Len() int { - //TODO implement me - panic("implement me") -} + // Mean calculates the average value of a series + Mean() DType + // StdDev calculates the standard deviation of a series + StdDev() DType + // FillNa Fill NA/NaN values using the specified method. + FillNa(v any, inplace bool) Frame + // Max 找出最大值 + Max() any + // Min 找出最小值 + Min() any + // Select 选取一段记录 + Select(r ScopeLimit) Frame + // Append 增加一批记录 + Append(values ...any) Frame + // Apply 接受一个回调函数 + Apply(f func(idx int, v any)) + // Logic 逻辑处理 + Logic(f func(idx int, v any) bool) []bool + // Diff 元素的第一个离散差 + Diff(param any) Frame + // Ref 引用其它周期的数据 + Ref(param any) Frame + // Std 计算标准差 + Std() DType + // Sum 计算累和 + Sum() DType + // EWM Provide exponentially weighted (EW) calculations. + // + // Exactly one of ``com``, ``span``, ``halflife``, or ``alpha`` must be + // provided if ``times`` is not provided. If ``times`` is provided, + // ``halflife`` and one of ``com``, ``span`` or ``alpha`` may be provided. + //EWM(alpha EW) ExponentialMovingWindow -func (self *GenericFrame[T]) Values() []T { - //TODO implement me - panic("implement me") } diff --git a/stat/frame_test.go b/stat/frame_test.go deleted file mode 100644 index 260f7d99c3edb5a062d938cd3e71944ec77bb0bd..0000000000000000000000000000000000000000 --- a/stat/frame_test.go +++ /dev/null @@ -1,24 +0,0 @@ -package stat - -import ( - "fmt" - "testing" -) - -func TestNewFrameT1(t *testing.T) { - f1 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, NaN(), 12} - f2 := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} - s1 := []string{"a", "b", "c"} - i1 := []int64{11, 12, 13, 14, 15, 16, 17, 18, 19, 20} - gf1 := NewFrame[float64]("x", f1) - fmt.Printf("%+v\n", gf1) - - t0 := []any{nil, 1, true, "abc", 3.45, NaN()} - gf2 := NewFrame[float64]("x", t0...) - fmt.Printf("%+v\n", gf2) - _ = f1 - _ = f2 - _ = s1 - _ = i1 - _ = gf1 -} diff --git a/stat/max.go b/stat/max.go index 6acbc3c0f58cfe8ced38e67a534ec9bda97436f1..55c711242a0fb2e68f0be96bf2ebaba4c9079aee 100644 --- a/stat/max.go +++ b/stat/max.go @@ -10,27 +10,7 @@ func Max[T Number](x []T) T { return unaryOperations1[T](x, vek32.Max, vek.Max, __max_go[T]) } -//func Max[T Float](f []T) T { -// if len(f) == 0 { -// return T(0) -// } -// -// var d any -// var s any -// s = f -// switch fs := s.(type) { -// case []float32: -// d = vek32.Max(fs) -// case []float64: -// d = vek.Max(fs) -// default: -// panic(ErrUnsupportedType) -// } -// -// return d.(T) -//} - -func __max_go[T Number](x []T) T { +func __max_go[T Number | ~string](x []T) T { max := x[0] for _, v := range x[1:] { if v > max { @@ -39,3 +19,42 @@ func __max_go[T Number](x []T) T { } return max } + +func Max2[T BaseType](x []T) T { + var d any + switch vs := any(x).(type) { + case []float32: + d = Max(vs) + case []float64: + d = Max(vs) + case []int: + d = Max(vs) + case []int8: + d = Max(vs) + case []int16: + d = Max(vs) + case []int32: + d = Max(vs) + case []int64: + d = Max(vs) + case []uint: + d = Max(vs) + case []uint8: + d = Max(vs) + case []uint16: + d = Max(vs) + case []uint32: + d = Max(vs) + case []uint64: + d = Max(vs) + case []uintptr: + d = Max(vs) + case []string: + d = __max_go(vs) + default: + // 其它类型原样返回 + panic(Throw(any(x))) + } + + return d.(T) +} diff --git a/stat/maximum.go b/stat/maximum.go index 9d066b3d48a42cfba3db2f865da4d5cd3c21f65f..32f62508967dc7faff12d6d5e612abb39d9043ae 100644 --- a/stat/maximum.go +++ b/stat/maximum.go @@ -7,7 +7,8 @@ import ( ) // Maximum AVX2版本, 两个序列横向比较最大值 -// TODO:print(np.maximum(1.4, np.nan)) 输出nan +// +// TODO:print(np.maximum(1.4, np.nan)) 输出nan func Maximum[T Number](f1, f2 []T) []T { xlen := len(f1) ylen := len(f2) diff --git a/stat/mean.go b/stat/mean.go index e5b44919d706bc04eca938afd0b62e2a466732cd..031dc2ca635fb7a0bc4e8410a7b9c29c24d7fb2b 100644 --- a/stat/mean.go +++ b/stat/mean.go @@ -13,3 +13,42 @@ func Mean[T Number](x []T) T { func __mean_go[T Number](x []T) T { return __sum(x) / T(len(x)) } + +func Mean2[T BaseType](x []T) T { + var d any + switch vs := any(x).(type) { + case []float32: + d = Mean(vs) + case []float64: + d = Mean(vs) + case []int: + d = Mean(vs) + case []int8: + d = Mean(vs) + case []int16: + d = Mean(vs) + case []int32: + d = Mean(vs) + case []int64: + d = Mean(vs) + case []uint: + d = Mean(vs) + case []uint8: + d = Mean(vs) + case []uint16: + d = Mean(vs) + case []uint32: + d = Mean(vs) + case []uint64: + d = Mean(vs) + case []uintptr: + d = Mean(vs) + //case []string: + // d = __max_go(vs) + default: + // 其它类型原样返回 + panic(Throw(any(x))) + } + + return d.(T) +} diff --git a/stat/min.go b/stat/min.go index a91b5f9eda91df8ecc2c3809d07aa7847e35842c..230732a69d0e565ea75b6996d2190353c32aeae9 100644 --- a/stat/min.go +++ b/stat/min.go @@ -10,7 +10,7 @@ func Min[T Number](x []T) T { return unaryOperations1[T](x, vek32.Min, vek.Min, __min_go[T]) } -func __min_go[T Number](x []T) T { +func __min_go[T Number | ~string](x []T) T { min := x[0] for _, v := range x[1:] { if v < min { @@ -19,3 +19,42 @@ func __min_go[T Number](x []T) T { } return min } + +func Min2[T BaseType](x []T) T { + var d any + switch vs := any(x).(type) { + case []float32: + d = Min(vs) + case []float64: + d = Min(vs) + case []int: + d = Min(vs) + case []int8: + d = Min(vs) + case []int16: + d = Min(vs) + case []int32: + d = Min(vs) + case []int64: + d = Min(vs) + case []uint: + d = Min(vs) + case []uint8: + d = Min(vs) + case []uint16: + d = Min(vs) + case []uint32: + d = Min(vs) + case []uint64: + d = Min(vs) + case []uintptr: + d = Min(vs) + case []string: + d = __min_go(vs) + default: + // 其它类型原样返回 + panic(Throw(any(x))) + } + + return d.(T) +} diff --git a/stat/ndarray.go b/stat/ndarray.go new file mode 100644 index 0000000000000000000000000000000000000000..ef0a6c3457aed48c7b1024f7bc8f5cc4356e74e7 --- /dev/null +++ b/stat/ndarray.go @@ -0,0 +1,202 @@ +package stat + +import ( + gc "github.com/huandu/go-clone" + "reflect" +) + +type NDArray[T BaseType] []T + +func (self NDArray[T]) Name() string { + //TODO implement me + panic("implement me") +} + +func (self NDArray[T]) Rename(name string) { + //TODO implement me + panic("implement me") +} + +func (self NDArray[T]) Type() Type { + return checkoutRawType(self) +} + +func (self NDArray[T]) Values() any { + return []T(self) +} + +func (self NDArray[T]) NaN() any { + switch any(self).(type) { + case []bool: + return BoolNaN + case []string: + return StringNaN + case []int64: + return Nil2Int64 + case []float32: + return Nil2Float32 + case []float64: + return Nil2Float64 + default: + panic(ErrUnsupportedType) + } +} + +func (self NDArray[T]) Floats() []float32 { + return SliceToFloat32([]T(self)) +} + +func (self NDArray[T]) DTypes() []DType { + return SliceToFloat64([]T(self)) +} + +func (self NDArray[T]) Ints() []Int { + d := make([]Int, self.Len()) + for i, v := range self { + d[i] = AnyToInt32(v) + } + return d +} + +func (self NDArray[T]) Empty() Frame { + var empty []T + return NDArray[T](empty) +} + +func (self NDArray[T]) Copy() Frame { + vlen := self.Len() + return self.Subset(0, vlen, true) +} + +func (self NDArray[T]) Records() []string { + ret := make([]string, self.Len()) + self.Apply(func(idx int, v any) { + ret[idx] = AnyToString(v) + }) + return ret + +} + +func (self NDArray[T]) Subset(start, end int, opt ...any) Frame { + // 默认不copy + var __optCopy bool = false + if len(opt) > 0 { + // 第一个参数为是否copy + if _cp, ok := opt[0].(bool); ok { + __optCopy = _cp + } + } + var vs any + var rows int + vv := reflect.ValueOf(self.Values()) + vk := vv.Kind() + switch vk { + case reflect.Slice, reflect.Array: // 切片和数组同样的处理逻辑 + vvs := vv.Slice(start, end) + vs = vvs.Interface() + rows = vv.Len() + if __optCopy && rows > 0 { + vs = gc.Clone(vs) + } + rows = vvs.Len() + var d Frame + d = NDArray[T](vs.([]T)) + return d + default: + // 其它类型忽略 + } + return self.Empty() +} + +func (self NDArray[T]) Repeat(x any, repeats int) Frame { + var d any + switch values := self.Values().(type) { + case []bool: + _ = values + d = Repeat(AnyToBool(x), repeats) + case []string: + d = Repeat(AnyToString(x), repeats) + case []int64: + d = Repeat(AnyToInt64(x), repeats) + case []float32: + d = Repeat(AnyToFloat32(x), repeats) + default: //case []float64: + d = Repeat(AnyToFloat64(x), repeats) + } + return NDArray[T](d.([]T)) +} + +func (self NDArray[T]) Shift(periods int) Frame { + values := self.Values().([]T) + d := Shift(values, periods) + return NDArray[T](d) +} + +func (self NDArray[T]) Mean() DType { + d := Mean2(self) + return Any2DType(d) +} + +func (self NDArray[T]) StdDev() DType { + return self.Std() +} + +func (self NDArray[T]) FillNa(v any, inplace bool) Frame { + d := FillNa(self, v, inplace) + return NDArray[T](d) +} + +func (self NDArray[T]) Max() any { + d := Min2(self) + return d +} + +func (self NDArray[T]) Min() any { + d := Min2(self) + return d +} + +func (self NDArray[T]) Select(r ScopeLimit) Frame { + start, end, err := r.Limits(self.Len()) + if err != nil { + return nil + } + series := self.Subset(start, end+1) + return series +} + +func (self NDArray[T]) Apply(f func(idx int, v any)) { + for i, v := range self { + f(i, v) + } +} + +func (self NDArray[T]) Logic(f func(idx int, v any) bool) []bool { + d := make([]bool, self.Len()) + for i, v := range self { + d[i] = f(i, v) + } + return d +} + +func (self NDArray[T]) Diff(param any) Frame { + d := Diff2(self, param) + return NDArray[T](d) +} + +func (self NDArray[T]) Ref(param any) Frame { + values := self.Values().([]T) + d := Shift3(values, param) + return NDArray[T](d) +} + +func (self NDArray[T]) Std() DType { + d := Std(self) + return Any2DType(d) +} + +func (self NDArray[T]) Sum() DType { + values := Slice2DType(self) + d := Sum(values) + return Any2DType(d) +} diff --git a/stat/frame_assign.go b/stat/ndarray_append.go similarity index 31% rename from stat/frame_assign.go rename to stat/ndarray_append.go index fa835567348de4eeebf8e04bb999593b0a041afa..8ae823aa413eea93c8fde8018cb6629a5ceba324 100644 --- a/stat/frame_assign.go +++ b/stat/ndarray_append.go @@ -1,35 +1,18 @@ package stat -import ( - "reflect" -) +import "reflect" // 赋值 -func (self *GenericFrame[T]) assign(idx, size int, val any) { - var v any - if self.type_ == SERIES_TYPE_BOOL { - v = AnyToBool(val) - } else if self.type_ == SERIES_TYPE_INT64 { - v = AnyToInt64(val) - } else if self.type_ == SERIES_TYPE_FLOAT64 { - v = AnyToFloat64(val) - } else { - v = AnyToString(val) - } - //// 检测类型 - //if self.type_ == SERIES_TYPE_INVAILD { - // _type, _ := detectTypes(v) - // if _type != SERIES_TYPE_INVAILD { - // self.type_ = _type - // } - //} +func assign[T BaseType](type_ Type, array Frame, idx, size int, v T) Frame { _vv := reflect.ValueOf(v) _vi := _vv.Interface() // float和string类型有可能是NaN, 对nil和NaN进行计数 - if self.type_ == SERIES_TYPE_FLOAT64 && Float64IsNaN(_vi.(float64)) { - self.nilCount++ - } else if self.type_ == SERIES_TYPE_STRING && StringIsNaN(_vi.(string)) { - self.nilCount++ + if type_ == SERIES_TYPE_FLOAT32 && Float32IsNaN(_vi.(float32)) { + //array.nilCount++ + } else if type_ == SERIES_TYPE_FLOAT64 && Float64IsNaN(_vi.(float64)) { + //array.nilCount++ + } else if type_ == SERIES_TYPE_STRING && StringIsNaN(_vi.(string)) { + //array.nilCount++ // 以下修正string的NaN值, 统一为"NaN" //_rv := reflect.ValueOf(StringNaN) //_vv.Set(_rv) // 这样赋值会崩溃 @@ -46,12 +29,67 @@ func (self *GenericFrame[T]) assign(idx, size int, val any) { // 执行之后, 通过debug可以看到assign入参的v已经变成了"NaN" } } + values := (array).Values().([]T) // 确保只添加了1个元素 if idx < size { - self.values[idx] = v.(T) + values[idx] = v } else { - self.values = append(self.values, v.(T)) + values = append(values, v) } // 行数+1 - self.rows += 1 + //array.rows += 1 + return NDArray[T](values) +} + +// 插入一条记录 +func (self NDArray[T]) insert(idx, size int, v any) NDArray[T] { + type_ := checkoutRawType(self) + if type_ == SERIES_TYPE_BOOL { + val := AnyToBool(v) + an := assign[bool](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else if type_ == SERIES_TYPE_INT64 { + val := AnyToInt64(v) + an := assign[int64](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else if type_ == SERIES_TYPE_FLOAT32 { + val := AnyToFloat32(v) + an := assign[float32](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else if type_ == SERIES_TYPE_FLOAT64 { + val := AnyToFloat64(v) + an := assign[float64](type_, self, idx, size, val) + self = an.(NDArray[T]) + } else { + val := AnyToString(v) + an := assign[string](type_, self, idx, size, val) + self = an.(NDArray[T]) + } + return self +} + +func (self NDArray[T]) Append(values ...any) Frame { + size := 0 + for idx, v := range values { + switch val := v.(type) { + case nil, int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64, bool, string: + // 基础类型 + self = self.insert(idx, size, val) + default: + vv := reflect.ValueOf(val) + vk := vv.Kind() + switch vk { + case reflect.Slice, reflect.Array: // 切片或数组 + for i := 0; i < vv.Len(); i++ { + tv := vv.Index(i).Interface() + self = self.insert(idx, size, tv) + } + case reflect.Struct: // 忽略结构体 + continue + default: + self = self.insert(idx, size, nil) + } + } + } + return self } diff --git a/generic_convert.go b/stat/ndarray_convert.go similarity index 92% rename from generic_convert.go rename to stat/ndarray_convert.go index d51e6fb579bc962a39cec7b7af5d81f6e121c265..8c120d744975140c17d406b99166f0d14bf760a4 100644 --- a/generic_convert.go +++ b/stat/ndarray_convert.go @@ -1,4 +1,4 @@ -package pandas +package stat import ( "github.com/viterin/vek" @@ -7,17 +7,15 @@ import ( ) // 这里做数组统一转换 -func convert[T GenericType](s Series, v T) { - +func convert[T GenericType](s Frame, v T) { values := s.Values() rawType := checkoutRawType(values) values, ok := values.([]T) _ = rawType _ = ok - } -func ToFloat32(s Series) []float32 { +func ToFloat32(s Frame) []float32 { length := s.Len() defaultSlice := vek32.Repeat(Nil2Float32, length) values := s.Values() @@ -39,7 +37,7 @@ func ToFloat32(s Series) []float32 { } } -func ToFloat64(s Series) []float64 { +func ToFloat64(s Frame) []float64 { length := s.Len() defaultSlice := vek.Repeat(Nil2Float64, length) values := s.Values() @@ -61,7 +59,7 @@ func ToFloat64(s Series) []float64 { } } -func ToBool(s Series) []bool { +func ToBool(s Frame) []bool { length := s.Len() defaultSlice := make([]bool, length) values := s.Values() diff --git a/generic_convert_test.go b/stat/ndarray_convert_test.go similarity index 80% rename from generic_convert_test.go rename to stat/ndarray_convert_test.go index 303bd73eda93aa58509d7992c998d3dbfb79980d..bdcccb7b05fbf2d9f12d4d1a0f99aefd99707520 100644 --- a/generic_convert_test.go +++ b/stat/ndarray_convert_test.go @@ -1,4 +1,4 @@ -package pandas +package stat import ( "testing" diff --git a/stat/ndarray_sort.go b/stat/ndarray_sort.go new file mode 100644 index 0000000000000000000000000000000000000000..cd8ad787c0ae50d7a694e8abbe24bbd3df1bc70b --- /dev/null +++ b/stat/ndarray_sort.go @@ -0,0 +1,67 @@ +package stat + +func (arr NDArray[T]) Len() int { + return len(arr) +} + +// Less 实现sort.Interface接口的比较元素方法 +func (arr NDArray[T]) Less(i, j int) bool { + type_ := arr.Type() + if type_ == SERIES_TYPE_BOOL { + values := arr.Values().([]bool) + var ( + a = int(0) + b = int(0) + ) + if values[i] { + a = 1 + } + if values[j] { + b = 1 + } + return a < b + } else if type_ == SERIES_TYPE_INT64 { + values := arr.Values().([]int64) + return values[i] < values[j] + } else if type_ == SERIES_TYPE_FLOAT32 { + values := arr.Values().([]float32) + return values[i] < values[j] + } else if type_ == SERIES_TYPE_FLOAT64 { + values := arr.Values().([]float64) + return values[i] < values[j] + } else if type_ == SERIES_TYPE_STRING { + values := arr.Values().([]string) + return values[i] < values[j] + } else { + // SERIES_TYPE_INVAILD + // 应该到不了这里, Len()会返回0 + panic(ErrUnsupportedType) + } + return false + +} + +// Swap 实现sort.Interface接口的交换元素方法 +func (arr NDArray[T]) Swap(i, j int) { + type_ := arr.Type() + if type_ == SERIES_TYPE_BOOL { + values := arr.Values().([]bool) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_INT64 { + values := arr.Values().([]int64) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_FLOAT32 { + values := arr.Values().([]float32) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_FLOAT64 { + values := arr.Values().([]float64) + values[i], values[j] = values[j], values[i] + } else if type_ == SERIES_TYPE_STRING { + values := arr.Values().([]string) + values[i], values[j] = values[j], values[i] + } else { + // SERIES_TYPE_INVAILD + // 应该到不了这里, Len()会返回0 + panic(ErrUnsupportedType) + } +} diff --git a/stat/ndarray_test.go b/stat/ndarray_test.go new file mode 100644 index 0000000000000000000000000000000000000000..93096aeb2248cad1e1fcdf3e539877e69e22cbfb --- /dev/null +++ b/stat/ndarray_test.go @@ -0,0 +1,56 @@ +package stat + +import ( + "fmt" + "reflect" + "testing" + "unsafe" +) + +func TestNDArray_Len(t *testing.T) { + f1 := []float64{1, 2, 3, 4, 5} + a1 := NDArray[float64](f1) + fmt.Println(a1) + fmt.Println(a1.Len()) +} + +type X int + +func TestNDArrayAll(t *testing.T) { + var x1 X = 5 + var x2 int + x2 = int(x1) + fmt.Println(x2) + d := []float32{1, 2, 3, 4, 5} + sh1 := (*reflect.SliceHeader)(unsafe.Pointer(&d)) + fmt.Printf("s : %#v\n", sh1) + var s Frame + s = NDArray[float32](d) + //s3 := []float32(s) + //fmt.Println(s3) + sh2 := (*reflect.SliceHeader)(unsafe.Pointer(&s)) + fmt.Printf("s : %#v\n", sh2.Data) + fmt.Println(s.Len()) + s4 := s.Values() + fmt.Println(s.Type()) + fmt.Println(s.Floats()) + + f32 := ToFloat32(s) + fmt.Println(f32) + + a1 := s.Diff(1) + fmt.Println(a1) + a2 := s.Ref(1) + fmt.Println(a2) + a2 = a2.FillNa(9, true) + fmt.Println(a2) + + a3 := s.Mean() + fmt.Println(a3) + + a4 := s.Shift(-1) + fmt.Println(a4) + s = s.Append(10, 11) + fmt.Println(s) + _ = s4 +} diff --git a/stat/frame_type.go b/stat/ndarray_type.go similarity index 93% rename from stat/frame_type.go rename to stat/ndarray_type.go index cbf7246a69f9ca0cbf8fdb75faa7d4a452ada439..e0af29018e3f07a8443a25da8bd90813eafd2c1f 100644 --- a/stat/frame_type.go +++ b/stat/ndarray_type.go @@ -106,11 +106,15 @@ func checkoutRawType(frame any) reflect.Kind { if pos < 0 { return reflect.Invalid } - strType = strings.TrimSpace(strType[:pos]) - if len(strType) < 1 { - return reflect.Invalid + rawType := strings.TrimSpace(strType[:pos]) + // 如果是0, 这个应该是个slice + if len(rawType) < 1 { + rawType = strings.TrimSpace(strType[pos+1:]) + if len(rawType) < 1 { + return reflect.Invalid + } } - if t, ok := mapKind[strType]; ok { + if t, ok := mapKind[rawType]; ok { return t } return reflect.Invalid diff --git a/scope_limit.go b/stat/scope_limit.go similarity index 99% rename from scope_limit.go rename to stat/scope_limit.go index 917dad0bb2ca4ee33a9b295b4e632381c4603a41..4e6cca6a097824078c6f2f260008b7fd8ce9ab40 100644 --- a/scope_limit.go +++ b/stat/scope_limit.go @@ -1,6 +1,6 @@ // Copyright 2018-20 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved. -package pandas +package stat import ( "errors" diff --git a/scope_limit_test.go b/stat/scope_limit_test.go similarity index 98% rename from scope_limit_test.go rename to stat/scope_limit_test.go index 1e2e5a181181d769ca14107e5719c7ac633845ca..04d2b9f1d3190f14e1d14f454ef15c69cb109a15 100644 --- a/scope_limit_test.go +++ b/stat/scope_limit_test.go @@ -1,6 +1,6 @@ // Copyright 2018-20 PJ Engineering and Business Solutions Pty. Ltd. All rights reserved. -package pandas +package stat import ( "testing" diff --git a/stat/shift.go b/stat/shift.go index 72078b766e1ac4a8e32de4503f7febd2daa23690..d8bd265b052ff97ecc9cff9f47d3b3a88618cb83 100644 --- a/stat/shift.go +++ b/stat/shift.go @@ -1,12 +1,13 @@ package stat import ( + "gitee.com/quant1x/pandas/exception" "golang.org/x/exp/slices" "math" ) // Shift series切片, 使用可选的时间频率按所需的周期数移动索引 -func Shift[T GenericType](S []T, periods int) []T { +func Shift[T BaseType](S []T, periods int) []T { d := slices.Clone(S) if periods == 0 { return d @@ -59,3 +60,35 @@ func Shift2[T GenericType](S []T, N []DType) []T { return d } + +// Shift3 series切片, 使用可选的时间频率按所需的周期数移动索引 +// +// param不支持负值 +func Shift3[T BaseType](S []T, param any) []T { + sLen := len(S) + var N []DType + switch v := param.(type) { + case int: + N = Repeat[DType](DType(v), sLen) + case []DType: + N = Align(v, DTypeNaN, sLen) + default: + panic(exception.New(1, "error window")) + } + var d []T + d = slices.Clone(S) + if len(N) == 0 { + return d + } + values := d + for i, _ := range S { + x := N[i] + if DTypeIsNaN(x) || int(x) > i { + values[i] = typeDefault[T]() + continue + } + values[i] = S[i-int(x)] + } + + return d +} diff --git a/stat/stddev.go b/stat/stddev.go index bd9d0336cc59f2e8e691183baac5616dbad3f83f..b43a9cfaa865d389a6491fba4f1949385323a290 100644 --- a/stat/stddev.go +++ b/stat/stddev.go @@ -32,9 +32,9 @@ func Std_TODO[T Float](f []T) T { } // Std 计算标准差 -func Std[T Float](f []T) T { +func Std[T BaseType](f []T) T { if len(f) == 0 { - return T(0) + return typeDefault[T]() } var d any var s any diff --git a/stat/frame_xstring.go b/stat/strings.go similarity index 100% rename from stat/frame_xstring.go rename to stat/strings.go diff --git a/stat/type.go b/stat/type.go index e7e3b0991acaf2bf5ba476c09667ce30740aecb4..8c896f3efa24621fb9bf14cf725378540490aa59 100644 --- a/stat/type.go +++ b/stat/type.go @@ -2,6 +2,7 @@ package stat import ( "math/big" + "reflect" ) // Signed is a constraint that permits any signed integer type. @@ -47,6 +48,12 @@ type Ordered interface { Integer | Float | ~string } +// NumberOfCPUBitsRelated The number of CPU bits is related +// Deprecated: 不推荐使用 +type NumberOfCPUBitsRelated interface { + ~int | ~uint | ~uintptr +} + // /*nil, */ int8, uint8, int16, uint16, int32, uint32, int64, uint64, int, uint, float32, float64 , bool, string // ~int8 | ~uint8 | ~int16 | ~uint16 | ~int32 | ~uint32 | ~int64 | ~uint64 | ~int | ~uint | ~float32 | ~float64 | ~bool | ~string // uintptr @@ -57,33 +64,40 @@ type BaseType interface { } // GenericType Series支持的所有类型 +// Deprecated: 不推荐使用 type GenericType interface { ~bool | ~int32 | ~int64 | ~int | ~float32 | ~float64 | ~string } // StatType 可以统计的类型 +// Deprecated: 不推荐使用 type StatType interface { ~int32 | ~int64 | ~float32 | ~float64 } type BigFloat = big.Float // 预留将来可能扩展float +// Deprecated: 不推荐使用 type Number8 interface { ~int8 | ~uint8 } +// Deprecated: 不推荐使用 type Number16 interface { ~int16 | ~uint16 } +// Deprecated: 不推荐使用 type Number32 interface { ~int32 | ~uint32 | float32 } +// Deprecated: 不推荐使用 type Number64 interface { ~int64 | ~uint64 | float64 | int | uint } +// Deprecated: 已弃用 type MoveType interface { StatType | ~bool | ~string } @@ -152,7 +166,105 @@ func valueToNumber[T Number](v any, nil2t T, bool2t func(b bool) T, string2t fun case string: return string2t(val, v) default: - panic(ErrUnsupportedType) + panic(Throw(v)) } return T(0) } + +// any转number +func __anyToNumber[T Number](v any) T { + switch val := v.(type) { + case nil: // 这个地方判断nil值 + return typeDefault[T]() + case int8: + return T(val) + case uint8: + return T(val) + case int16: + return T(val) + case uint16: + return T(val) + case int32: + return T(val) + case uint32: + return T(val) + case int64: + return T(val) + case uint64: + return T(val) + case int: + return T(val) + case uint: + return T(val) + case uintptr: + return T(val) + case float32: + return T(val) + case float64: + return T(val) + case bool: + return T(bool2Int(val)) + case string: + vt := ParseFloat64(val, v) + if Float64IsNaN(vt) { + td := T(0) + //rawType :=checkoutRawType(td) + if !reflect.ValueOf(td).CanFloat() { + return td + } + } + return T(vt) + default: + panic(Throw(v)) + } + return T(0) +} + +// any转其它类型 +// 支持3个方向: any到number, any到bool, any到string +func anyToGeneric[T BaseType](v any) T { + var d any + var to T + switch any(to).(type) { + case int8: + d = __anyToNumber[int8](v) + case uint8: + d = __anyToNumber[uint8](v) + case int16: + d = __anyToNumber[int16](v) + case uint16: + d = __anyToNumber[uint16](v) + case int32: + d = __anyToNumber[int32](v) + case uint32: + d = __anyToNumber[uint32](v) + case int64: + d = __anyToNumber[int64](v) + case uint64: + d = __anyToNumber[uint64](v) + case int: + d = __anyToNumber[int](v) + case uint: + d = __anyToNumber[uint](v) + case uintptr: + d = __anyToNumber[uintptr](v) + case float32: + d = __anyToNumber[float32](v) + case float64: + d = __anyToNumber[float64](v) + case bool: + d = AnyToBool(v) + case string: + d = AnyToString(v) + case []int8, []uint8, []int16, []uint16, []int32, []uint32, []int64, []uint64, []int, []uint, []uintptr, []float32, []float64: + // 什么也不处理, 给个默认值 + d = to + case []bool: + d = to + case []string: + d = to + default: + panic(Throw(v)) + } + return d.(T) +} diff --git a/stat/type_bool.go b/stat/type_bool.go index 6c65d47f01e447f3b9d0fb941c1d7850a1f4b591..7aefcc6c5fbd82ba885ca850d9c84e5b26296d8a 100644 --- a/stat/type_bool.go +++ b/stat/type_bool.go @@ -35,6 +35,20 @@ func isFalse(s string) bool { } } +func bool2Int(b bool) int8 { + if b { + return int8(1) + } + return int8(0) +} + +func boolToInt32(b bool) int32 { + if b { + return True2Int32 + } + return False2Int32 +} + func boolToInt64(b bool) int64 { if b { return True2Int64 diff --git a/stat/type_int32.go b/stat/type_int32.go new file mode 100644 index 0000000000000000000000000000000000000000..e8296690ffc768de067fb659367b3bf197016269 --- /dev/null +++ b/stat/type_int32.go @@ -0,0 +1,71 @@ +package stat + +import ( + "fmt" + "github.com/mymmsc/gox/logger" + "math" + "strconv" +) + +const ( + MaxInt32 = int32(math.MaxInt32) + MinInt32 = int32(math.MinInt32) + Nil2Int32 = int32(0) // 空指针转int32 + Int32NaN = int32(0) // int32 无效值 + True2Int32 = int32(1) // true转int32 + False2Int32 = int32(0) // false 转int32 + StringBad2Int32 = int32(0) // 字符串解析int32异常 + StringTrue2Int32 = int32(1) // 字符串true转int32 + StringFalse2Int32 = int32(0) // 字符串false转int32 +) + +// ParseInt32 解析int字符串, 尝试解析10进制和16进制 +func ParseInt32(s string, v any) int32 { + defer func() { + // 解析失败以后输出日志, 以备检查 + if err := recover(); err != nil { + logger.Errorf("ParseInt32 %+v, error=%+v\n", v, err) + } + }() + if IsEmpty(s) { + return Nil2Int32 + } + if isTrue(s) { + return StringTrue2Int32 + } else if isFalse(s) { + return StringFalse2Int32 + } + i, err := strconv.ParseInt(s, 10, 32) + if err == nil { + return int32(i) + } + // 解析失败继续解析16进制 + i, err = strconv.ParseInt(s, 16, 32) + if err == nil { + return int32(i) + } + logger.Errorf("%s, error=%+v\n", s, err) + if IgnoreParseExceptions { + i = int64(StringBad2Int32) + } else { + _ = v.(int32) // Intentionally panic + } + return int32(i) +} + +func int32ToString(v int32) string { + if Float64IsNaN(float64(v)) { + return StringNaN + } + return fmt.Sprint(v) +} + +// AnyToInt32 any转换int32 +func AnyToInt32(v any) int32 { + if vv, ok := extraceValueFromPointer(v); ok { + v = vv + } + + f := valueToNumber[int32](v, Nil2Int32, boolToInt32, ParseInt32) + return f +} diff --git a/stat/type_test.go b/stat/type_test.go index 84493a87fdab996b5487d858da126bed7416328c..345b47bbe930d7b8c1859ebe572f747a5dbdcb88 100644 --- a/stat/type_test.go +++ b/stat/type_test.go @@ -23,3 +23,16 @@ func Test_typeDefault(t *testing.T) { fmt.Println(typeDefault[int]()) fmt.Println(typeDefault[uint]()) } + +func Test_Number(t *testing.T) { + +} + +func Test_anyToGeneric(t *testing.T) { + fmt.Println(anyToGeneric[int](true)) + fmt.Println(anyToGeneric[int]("true")) + fmt.Println(anyToGeneric[int]("false")) + fmt.Println(anyToGeneric[int]("aa")) + fmt.Println(anyToGeneric[int]("tt")) + fmt.Println(anyToGeneric[int](3.00)) +} diff --git a/stat/unsafe.go b/stat/unsafe.go deleted file mode 100644 index 45766af889f148b4bb7d4bc65d084e19a2ebe0d9..0000000000000000000000000000000000000000 --- a/stat/unsafe.go +++ /dev/null @@ -1,8 +0,0 @@ -package stat - -func math_abs[T StatType](v T) T { - if v < 0 { - return v * -1 - } - return v -} diff --git a/stat/unsafe_test.go b/stat/unsafe_test.go deleted file mode 100644 index e53ad50737f46ea9690f6b5ad51ad313de33e3ab..0000000000000000000000000000000000000000 --- a/stat/unsafe_test.go +++ /dev/null @@ -1,12 +0,0 @@ -package stat - -import ( - "fmt" - "testing" -) - -func Test_frombits(t *testing.T) { - i32_1 := float64(-1) - //fmt.Println(math.Abs(i32_1)) - fmt.Println(math_abs(i32_1)) -}