From f48afd67dc577dedde4032cabd0e6450be578d55 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sun, 5 Feb 2023 11:45:57 +0800 Subject: [PATCH 1/6] =?UTF-8?q?=E5=88=A0=E9=99=A4=E5=BA=9F=E5=BC=83?= =?UTF-8?q?=E7=9A=84=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- generic_diff.go | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/generic_diff.go b/generic_diff.go index 9cc1a16..35bac28 100644 --- a/generic_diff.go +++ b/generic_diff.go @@ -5,19 +5,6 @@ import ( "reflect" ) -//func (self *NDFrame) Diff_() float64 { -// if self.Len() < 1 { -// return NaN() -// } -// fs := make([]float64, 0) -// self.Apply(func(idx int, v any) { -// f := AnyToFloat64(v) -// fs = append(fs, f) -// }) -// stdDev := avx2.Mean(fs) -// return stdDev -//} - // Diff 元素的第一个离散差 // First discrete difference of element. // Calculates the difference of a {klass} element compared with another @@ -32,7 +19,7 @@ func (self *NDFrame) Diff(param any) (s Series) { N = stat.Repeat[float32](float32(v), self.Len()) case Series: vs := v.Values() - N = sliceToFloat32(vs) + N = SliceToFloat32(vs) N = stat.Align(N, Nil2Float32, self.Len()) default: //periods = 1 -- Gitee From d67917bc701c5283eed43bf771c29796b0ecc987 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sun, 5 Feb 2023 14:54:29 +0800 Subject: [PATCH 2/6] =?UTF-8?q?=E4=BB=8E=E5=8F=98=E9=87=8F=E5=90=8D?= =?UTF-8?q?=E4=B8=8A=E6=98=8E=E7=A1=AEint=E6=98=AF64=E4=BD=8D=E7=9A=84int6?= =?UTF-8?q?4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- type_int64.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/type_int64.go b/type_int64.go index 92ef0a1..3f1548e 100644 --- a/type_int64.go +++ b/type_int64.go @@ -8,7 +8,7 @@ import ( const ( Nil2Int64 = int64(0) // 空指针转int64 - IntNaN = int64(0) // int64 无效值 + Int64NaN = int64(0) // int64 无效值 True2Int64 = int64(1) // true转int64 False2Int64 = int64(0) // false 转int64 StringBad2Int64 = int64(0) // 字符串解析int64异常 -- Gitee From 0766cc8d45a4269487818f865a2c6fd2e51b7315 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sun, 5 Feb 2023 14:55:12 +0800 Subject: [PATCH 3/6] =?UTF-8?q?=E6=9A=B4=E9=9C=B2=E4=B8=BA=E5=85=AC?= =?UTF-8?q?=E5=85=B1=E5=87=BD=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- slice_float32.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/slice_float32.go b/slice_float32.go index e27acaf..db95443 100644 --- a/slice_float32.go +++ b/slice_float32.go @@ -13,8 +13,8 @@ func slice_any_to_float32[T Number](s []T) []float32 { return d } -// any输入只能是一维slice或者数组 -func sliceToFloat32(v any) []float32 { +// SliceToFloat32 any输入只能是一维slice或者数组 +func SliceToFloat32(v any) []float32 { var vs []float32 switch values := v.(type) { case []int8: -- Gitee From c7d56d05e0c8c08e2de0423eb7dd64a336accac4 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sun, 5 Feb 2023 14:56:02 +0800 Subject: [PATCH 4/6] =?UTF-8?q?=E8=B0=83=E6=95=B4shift=E5=87=BD=E6=95=B0?= =?UTF-8?q?=E4=B8=BA=E7=8B=AC=E7=AB=8B=E4=B8=80=E4=B8=AA=E6=BA=90=E6=96=87?= =?UTF-8?q?=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- series.go | 122 +++++++++++++----------------------------------- series_shift.go | 61 ++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 89 deletions(-) create mode 100644 series_shift.go diff --git a/series.go b/series.go index eadad4a..a47a980 100644 --- a/series.go +++ b/series.go @@ -2,7 +2,6 @@ package pandas import ( "fmt" - "math" "reflect" ) @@ -11,19 +10,12 @@ import ( type Type = reflect.Kind // Supported Series Types -//const ( -// SERIES_TYPE_INVAILD = "unknown" // 未知类型 -// SERIES_TYPE_BOOL = "bool" // 布尔类型 -// SERIES_TYPE_INT = "int" // int64 -// SERIES_TYPE_FLOAT = "float" // float64 -// SERIES_TYPE_STRING = "string" // string -//) - const ( SERIES_TYPE_INVAILD = reflect.Invalid // 无效类型 SERIES_TYPE_BOOL = reflect.Bool // 布尔类型 - SERIES_TYPE_INT = reflect.Int64 // int64 - SERIES_TYPE_FLOAT = reflect.Float64 // float64 + SERIES_TYPE_INT64 = reflect.Int64 // int64 + SERIES_TYPE_FLOAT32 = reflect.Float32 // float32 + SERIES_TYPE_FLOAT64 = reflect.Float64 // float64 SERIES_TYPE_STRING = reflect.String // string ) @@ -79,6 +71,8 @@ type Series interface { Apply(f func(idx int, v any)) // Diff 元素的第一个离散差 Diff(param any) (s Series) + // 引用其它周期的数据 + Ref(param any) (s Series) } // NewSeries 指定类型创建序列 @@ -86,31 +80,33 @@ func NewSeries(t Type, name string, vals any) Series { var series Series if t == SERIES_TYPE_BOOL { series = NewSeriesWithType(SERIES_TYPE_BOOL, name, vals) - } else if t == SERIES_TYPE_INT { - series = NewSeriesWithType(SERIES_TYPE_INT, name, vals) + } else if t == SERIES_TYPE_INT64 { + series = NewSeriesWithType(SERIES_TYPE_INT64, name, vals) } else if t == SERIES_TYPE_STRING { series = NewSeriesWithType(SERIES_TYPE_STRING, name, vals) + } else if t == SERIES_TYPE_FLOAT64 { + series = NewSeriesWithType(SERIES_TYPE_FLOAT64, name, vals) } else { - // 默认全部强制转换成float64 - series = NewSeriesWithType(SERIES_TYPE_FLOAT, name, vals) + // 默认全部强制转换成float32 + series = NewSeriesWithType(SERIES_TYPE_FLOAT32, name, vals) } return series } -func NewSeries_old(t Type, name string, vals ...interface{}) *Series { - var series Series - if t == SERIES_TYPE_BOOL { - series = NewSeriesBool(name, vals...) - } else if t == SERIES_TYPE_INT { - series = NewSeriesInt64(name, vals...) - } else if t == SERIES_TYPE_STRING { - series = NewSeriesString(name, vals...) - } else { - // 默认全部强制转换成float64 - series = NewSeriesFloat64(name, vals...) - } - return &series -} +//func NewSeries_old(t Type, name string, vals ...interface{}) *Series { +// var series Series +// if t == SERIES_TYPE_BOOL { +// series = NewSeriesBool(name, vals...) +// } else if t == SERIES_TYPE_INT64 { +// series = NewSeriesInt64(name, vals...) +// } else if t == SERIES_TYPE_STRING { +// series = NewSeriesString(name, vals...) +// } else { +// // 默认全部强制转换成float64 +// series = NewSeriesFloat64(name, vals...) +// } +// return &series +//} // GenericSeries 泛型方法, 构造序列, 比其它方式对类型的统一性要求更严格 func GenericSeries[T GenericType](name string, values ...T) Series { @@ -124,31 +120,14 @@ func GenericSeries[T GenericType](name string, values ...T) Series { vv := reflect.ValueOf(v) vk := vv.Kind() switch vk { - //case reflect.Invalid: // {interface} nil - // series.assign(idx, size, Nil2Float64) - //case reflect.Slice: // 切片, 不定长 - // for i := 0; i < vv.Len(); i++ { - // tv := vv.Index(i).Interface() - // str := AnyToFloat64(tv) - // series.assign(idx, size, str) - // } - //case reflect.Array: // 数组, 定长 - // for i := 0; i < vv.Len(); i++ { - // tv := vv.Index(i).Interface() - // av := AnyToFloat64(tv) - // series.assign(idx, size, av) - // } - //case reflect.Struct: // 忽略结构体 - // continue - //default: - // vv := AnyToFloat64(val) - // series.assign(idx, size, vv) case reflect.Bool: _type = SERIES_TYPE_BOOL case reflect.Int64: - _type = SERIES_TYPE_INT + _type = SERIES_TYPE_INT64 + case reflect.Float32: + _type = SERIES_TYPE_FLOAT32 case reflect.Float64: - _type = SERIES_TYPE_FLOAT + _type = SERIES_TYPE_FLOAT64 case reflect.String: _type = SERIES_TYPE_STRING default: @@ -169,9 +148,11 @@ func detectTypes[T GenericType](v T) (Type, any) { case reflect.Bool: _type = SERIES_TYPE_BOOL case reflect.Int64: - _type = SERIES_TYPE_INT + _type = SERIES_TYPE_INT64 + case reflect.Float32: + _type = SERIES_TYPE_FLOAT32 case reflect.Float64: - _type = SERIES_TYPE_FLOAT + _type = SERIES_TYPE_FLOAT64 case reflect.String: _type = SERIES_TYPE_STRING default: @@ -180,43 +161,6 @@ func detectTypes[T GenericType](v T) (Type, any) { return _type, vv.Interface() } -// Shift series切片, 使用可选的时间频率按所需的周期数移动索引 -func Shift[T GenericType](s *Series, periods int, cbNan func() T) Series { - var d Series - d = clone(*s).(Series) - if periods == 0 { - return d - } - - values := d.Values().([]T) - - var ( - naVals []T - dst []T - src []T - ) - - if shlen := int(math.Abs(float64(periods))); shlen < len(values) { - if periods > 0 { - naVals = values[:shlen] - dst = values[shlen:] - src = values - } else { - naVals = values[len(values)-shlen:] - dst = values[:len(values)-shlen] - src = values[shlen:] - } - copy(dst, src) - } else { - naVals = values - } - for i := range naVals { - naVals[i] = cbNan() - } - _ = naVals - return d -} - // FillNa 填充NaN的元素为v // inplace为真是修改series元素的值 // 如果v和Values()返回值的slice类型不一致就会panic diff --git a/series_shift.go b/series_shift.go new file mode 100644 index 0000000..0111cc0 --- /dev/null +++ b/series_shift.go @@ -0,0 +1,61 @@ +package pandas + +import "math" + +// Shift series切片, 使用可选的时间频率按所需的周期数移动索引 +func Shift[T GenericType](s *Series, periods int, cbNan func() T) Series { + var d Series + d = clone(*s).(Series) + if periods == 0 { + return d + } + + values := d.Values().([]T) + + var ( + naVals []T + dst []T + src []T + ) + + if shlen := int(math.Abs(float64(periods))); shlen < len(values) { + if periods > 0 { + naVals = values[:shlen] + dst = values[shlen:] + src = values + } else { + naVals = values[len(values)-shlen:] + dst = values[:len(values)-shlen] + src = values[shlen:] + } + copy(dst, src) + } else { + naVals = values + } + for i := range naVals { + naVals[i] = cbNan() + } + _ = naVals + return d +} + +// Shift2 series切片, 使用可选的时间频率按所需的周期数移动索引 +func Shift2[T GenericType](s *Series, N []float32, cbNan func() T) Series { + var d Series + d = clone(*s).(Series) + if len(N) == 0 { + return d + } + S := (*s).Values().([]T) + values := d.Values().([]T) + for i, _ := range S { + x := N[i] + if Float32IsNaN(x) || int(x) > i { + values[i] = cbNan() + continue + } + values[i] = S[i-int(x)] + } + + return d +} -- Gitee From 8785a1794fe8aedaa7195d8f1fc11ca19aa4cbfd Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sun, 5 Feb 2023 14:57:52 +0800 Subject: [PATCH 5/6] =?UTF-8?q?=E6=96=B0=E5=A2=9Efloat32=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dataframe.go | 22 ++++----------- dataframe_join.go | 6 +++-- dataframe_join_test.go | 26 ++++++++++++++++++ dataframe_matrix.go | 2 +- dataframe_struct.go | 12 +++++---- dataframe_test.go | 20 -------------- dataframe_type.go | 61 +++++++++++++++++++++++++++++++++++------- frame.go | 7 +++-- frame_assign.go | 6 ++--- generic.go | 32 +++++++++++++++++++--- generic_append.go | 7 +++-- generic_diff.go | 4 +-- generic_diff_test.go | 2 +- generic_rolling.go | 4 ++- generic_test.go | 8 +++--- rolling_mean.go | 2 +- series_float64.go | 2 +- series_generic.go | 7 +++-- series_int64.go | 6 ++--- 19 files changed, 156 insertions(+), 80 deletions(-) create mode 100644 dataframe_join_test.go diff --git a/dataframe.go b/dataframe.go index 3edba58..6bd625c 100644 --- a/dataframe.go +++ b/dataframe.go @@ -24,14 +24,16 @@ func NewDataFrame(se ...Series) DataFrame { columns := make([]Series, len(se)) for i, s := range se { var d Series - if s.Type() == SERIES_TYPE_INT { - d = NewSeries(SERIES_TYPE_INT, s.Name(), s.Values()) + if s.Type() == SERIES_TYPE_INT64 { + d = NewSeries(SERIES_TYPE_INT64, s.Name(), s.Values()) } else if s.Type() == SERIES_TYPE_BOOL { d = NewSeries(SERIES_TYPE_BOOL, s.Name(), s.Values()) } else if s.Type() == SERIES_TYPE_STRING { d = NewSeries(SERIES_TYPE_STRING, s.Name(), s.Values()) + } else if s.Type() == SERIES_TYPE_FLOAT32 { + d = NewSeries(SERIES_TYPE_FLOAT32, s.Name(), s.Values()) } else { - d = NewSeries(SERIES_TYPE_FLOAT, s.Name(), s.Values()) + d = NewSeries(SERIES_TYPE_FLOAT64, s.Name(), s.Values()) } columns[i] = d } @@ -228,17 +230,3 @@ func findInStringSlice(str string, s []string) int { // LoadOption is the type used to configure the load of elements type LoadOption func(*loadOptions) - -func parseType(s string) (Type, error) { - switch s { - case "float", "float64", "float32": - return SERIES_TYPE_FLOAT, nil - case "int", "int64", "int32", "int16", "int8": - return SERIES_TYPE_INT, nil - case "string": - return SERIES_TYPE_STRING, nil - case "bool": - return SERIES_TYPE_BOOL, nil - } - return SERIES_TYPE_INVAILD, fmt.Errorf("type (%s) is not supported", s) -} diff --git a/dataframe_join.go b/dataframe_join.go index 063d4be..0f77b4b 100644 --- a/dataframe_join.go +++ b/dataframe_join.go @@ -27,11 +27,13 @@ func (self DataFrame) align(ss ...Series) []Series { var ns any if vt == SERIES_TYPE_BOOL { ns = stat.Align(vs.([]bool), Nil2Bool, int(maxLength)) - } else if vt == SERIES_TYPE_INT { + } else if vt == SERIES_TYPE_INT64 { ns = stat.Align(vs.([]int64), Nil2Int64, int(maxLength)) } else if vt == SERIES_TYPE_STRING { ns = stat.Align(vs.([]string), Nil2String, int(maxLength)) - } else if vt == SERIES_TYPE_FLOAT { + } else if vt == SERIES_TYPE_FLOAT32 { + ns = stat.Align(vs.([]float32), Nil2Float32, int(maxLength)) + } else if vt == SERIES_TYPE_FLOAT64 { ns = stat.Align(vs.([]float64), Nil2Float64, int(maxLength)) } cols[i] = NewSeries(vt, vn, ns) diff --git a/dataframe_join_test.go b/dataframe_join_test.go new file mode 100644 index 0000000..a6d6e28 --- /dev/null +++ b/dataframe_join_test.go @@ -0,0 +1,26 @@ +package pandas + +import ( + "fmt" + "testing" +) + +func TestDataFrame_Join(t *testing.T) { + type testStruct struct { + A string + B int + C bool + D float32 + } + data := []testStruct{ + {"a", 1, true, 0.0}, + {"b", 2, false, 0.5}, + } + df1 := LoadStructs(data) + fmt.Println(df1) + + // 增加1列 + s_e := GenericSeries[string]("", "a0", "a1", "a2", "a3") + df2 := df1.Join(s_e) + fmt.Println(df2) +} diff --git a/dataframe_matrix.go b/dataframe_matrix.go index 58a06f0..66c0c97 100644 --- a/dataframe_matrix.go +++ b/dataframe_matrix.go @@ -12,7 +12,7 @@ func LoadMatrix(mat mat.Matrix) DataFrame { for j := 0; j < nrows; j++ { floats[j] = mat.At(j, i) } - columns[i] = NewSeries(SERIES_TYPE_FLOAT, "", floats) + columns[i] = NewSeries(SERIES_TYPE_FLOAT64, "", floats) } nrows, ncols, err := checkColumnsDimensions(columns...) if err != nil { diff --git a/dataframe_struct.go b/dataframe_struct.go index cdd390f..86432f3 100644 --- a/dataframe_struct.go +++ b/dataframe_struct.go @@ -136,14 +136,16 @@ func LoadStructs(i interface{}, options ...LoadOption) DataFrame { fieldName = "" } if t == SERIES_TYPE_STRING { - columns = append(columns, NewSeriesString(fieldName, elements)) + columns = append(columns, NewSeries(SERIES_TYPE_STRING, fieldName, elements)) } else if t == SERIES_TYPE_BOOL { - columns = append(columns, NewSeriesBool(fieldName, elements)) - } else if t == SERIES_TYPE_INT { - columns = append(columns, NewSeriesInt64(fieldName, elements)) + columns = append(columns, NewSeries(SERIES_TYPE_BOOL, fieldName, elements)) + } else if t == SERIES_TYPE_INT64 { + columns = append(columns, NewSeries(SERIES_TYPE_INT64, fieldName, elements)) + } else if t == SERIES_TYPE_FLOAT32 { + columns = append(columns, NewSeries(SERIES_TYPE_FLOAT32, fieldName, elements)) } else { // 默认float - columns = append(columns, NewSeriesFloat64(fieldName, elements)) + columns = append(columns, NewSeries(SERIES_TYPE_FLOAT64, fieldName, elements)) } } return NewDataFrame(columns...) diff --git a/dataframe_test.go b/dataframe_test.go index d1f4230..f1ef073 100644 --- a/dataframe_test.go +++ b/dataframe_test.go @@ -52,23 +52,3 @@ func TestLoadStructs(t *testing.T) { df2 := LoadStructs(dataTags) fmt.Println(df2) } - -func TestDataFrame_Join(t *testing.T) { - type testStruct struct { - A string - B int - C bool - D float64 - } - data := []testStruct{ - {"a", 1, true, 0.0}, - {"b", 2, false, 0.5}, - } - df1 := LoadStructs(data) - fmt.Println(df1) - - // 增加1列 - s_e := GenericSeries[string]("", "a0", "a1", "a2", "a3") - df2 := df1.Join(s_e) - fmt.Println(df2) -} diff --git a/dataframe_type.go b/dataframe_type.go index ed82a10..4831d6f 100644 --- a/dataframe_type.go +++ b/dataframe_type.go @@ -6,8 +6,20 @@ import ( "strings" ) +const ( + MAX_FLOAT32_PRICE = float32(9999.9999) // float32的价最大阀值触发扩展到float64 +) + +func mustFloat64(f float32) bool { + if f > MAX_FLOAT32_PRICE { + return true + } + return false +} + func findTypeByString(arr []string) (Type, error) { var hasFloats, hasInts, hasBools, hasStrings bool + var useFloat32, useFloat64 bool var stringLengthEqual = -1 var stringLenth = -1 for _, str := range arr { @@ -30,8 +42,15 @@ func findTypeByString(arr []string) (Type, error) { hasInts = true continue } - if _, err := strconv.ParseFloat(str, 64); err == nil { + if f, err := strconv.ParseFloat(str, 64); err == nil { hasFloats = true + if f < MaxFloat32 { + if mustFloat64(float32(f)) { + useFloat64 = true + } else { + useFloat32 = true + } + } continue } if str == "true" || str == "false" { @@ -49,25 +68,31 @@ func findTypeByString(arr []string) (Type, error) { return SERIES_TYPE_STRING, nil case hasBools: return SERIES_TYPE_BOOL, nil + case useFloat32 && !useFloat64: + return SERIES_TYPE_FLOAT32, nil case hasFloats: - return SERIES_TYPE_FLOAT, nil + return SERIES_TYPE_FLOAT64, nil case hasInts: - return SERIES_TYPE_INT, nil + return SERIES_TYPE_INT64, nil default: return SERIES_TYPE_STRING, fmt.Errorf("couldn't detect type") } + } // 检测类型 func detectTypeBySlice(arr []any) (Type, error) { - var hasFloats, hasInts, hasBools, hasStrings bool + var hasFloat32s, hasFloat64s, hasInts, hasBools, hasStrings bool for _, v := range arr { switch value := v.(type) { case string: hasStrings = true continue - case float32, float64: - hasFloats = true + case float32: + hasFloat32s = true + continue + case float64: + hasFloat64s = true continue case int, int32, int64: hasInts = true @@ -85,11 +110,29 @@ func detectTypeBySlice(arr []any) (Type, error) { return SERIES_TYPE_STRING, nil case hasBools: return SERIES_TYPE_BOOL, nil - case hasFloats: - return SERIES_TYPE_FLOAT, nil + case hasFloat32s: + return SERIES_TYPE_FLOAT32, nil + case hasFloat64s: + return SERIES_TYPE_FLOAT64, nil case hasInts: - return SERIES_TYPE_INT, nil + return SERIES_TYPE_INT64, nil default: return SERIES_TYPE_STRING, fmt.Errorf("couldn't detect type") } } + +func parseType(s string) (Type, error) { + switch s { + case "float", "float32": + return SERIES_TYPE_FLOAT32, nil + case "float64": + return SERIES_TYPE_FLOAT64, nil + case "int", "int64", "int32", "int16", "int8": + return SERIES_TYPE_INT64, nil + case "string": + return SERIES_TYPE_STRING, nil + case "bool": + return SERIES_TYPE_BOOL, nil + } + return SERIES_TYPE_INVAILD, fmt.Errorf("type (%s) is not supported", s) +} diff --git a/frame.go b/frame.go index ae9b41b..06f2565 100644 --- a/frame.go +++ b/frame.go @@ -47,10 +47,13 @@ func NewFrame[T GenericType](name string, values ...any) Frame[T] { if frame.type_ == SERIES_TYPE_BOOL { // bool frame.values = reflect.MakeSlice(typeBool, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_INT { + } else if frame.type_ == SERIES_TYPE_INT64 { // int64 frame.values = reflect.MakeSlice(typeInt64, 0, 0).Interface().([]T) - } else if frame.type_ == SERIES_TYPE_FLOAT { + } else if frame.type_ == SERIES_TYPE_FLOAT32 { + // float32 + frame.values = reflect.MakeSlice(typeFloat32, 0, 0).Interface().([]T) + } else if frame.type_ == SERIES_TYPE_FLOAT64 { // float64 frame.values = reflect.MakeSlice(typeFloat64, 0, 0).Interface().([]T) } else { diff --git a/frame_assign.go b/frame_assign.go index 13709b2..48a4cc3 100644 --- a/frame_assign.go +++ b/frame_assign.go @@ -7,9 +7,9 @@ func (self *GenericFrame[T]) assign(idx, size int, val any) { var v any if self.type_ == SERIES_TYPE_BOOL { v = AnyToBool(val) - } else if self.type_ == SERIES_TYPE_INT { + } else if self.type_ == SERIES_TYPE_INT64 { v = AnyToInt64(val) - } else if self.type_ == SERIES_TYPE_FLOAT { + } else if self.type_ == SERIES_TYPE_FLOAT64 { v = AnyToFloat64(val) } else { v = AnyToString(val) @@ -24,7 +24,7 @@ func (self *GenericFrame[T]) assign(idx, size int, val any) { _vv := reflect.ValueOf(v) _vi := _vv.Interface() // float和string类型有可能是NaN, 对nil和NaN进行计数 - if self.type_ == SERIES_TYPE_FLOAT && Float64IsNaN(_vi.(float64)) { + if self.type_ == SERIES_TYPE_FLOAT64 && Float64IsNaN(_vi.(float64)) { self.nilCount++ } else if self.type_ == SERIES_TYPE_STRING && StringIsNaN(_vi.(string)) { self.nilCount++ diff --git a/generic.go b/generic.go index e89f256..ddf9b17 100644 --- a/generic.go +++ b/generic.go @@ -9,7 +9,7 @@ import ( // GenericType Series支持的所有类型 type GenericType interface { - ~bool | ~int64 | ~float64 | ~string + ~bool | ~int64 | ~float32 | ~float64 | ~string } // NDFrame 这里本意是想做一个父类, 实际的效果是一个抽象类 @@ -67,7 +67,9 @@ func assign[T GenericType](frame *NDFrame, idx, size int, v T) { _vv := reflect.ValueOf(v) _vi := _vv.Interface() // float和string类型有可能是NaN, 对nil和NaN进行计数 - if frame.Type() == SERIES_TYPE_FLOAT && Float64IsNaN(_vi.(float64)) { + if frame.Type() == SERIES_TYPE_FLOAT32 && Float32IsNaN(_vi.(float32)) { + frame.nilCount++ + } else if frame.Type() == SERIES_TYPE_FLOAT64 && Float64IsNaN(_vi.(float64)) { frame.nilCount++ } else if frame.Type() == SERIES_TYPE_STRING && StringIsNaN(_vi.(string)) { frame.nilCount++ @@ -154,7 +156,7 @@ func (self *NDFrame) Empty() Series { rows: 0, values: []bool{}, } - } else if self.type_ == SERIES_TYPE_INT { + } else if self.type_ == SERIES_TYPE_INT64 { frame = NDFrame{ formatter: self.formatter, name: self.name, @@ -163,7 +165,16 @@ func (self *NDFrame) Empty() Series { rows: 0, values: []int64{}, } - } else if self.type_ == SERIES_TYPE_FLOAT { + } else if self.type_ == SERIES_TYPE_FLOAT32 { + frame = NDFrame{ + formatter: self.formatter, + name: self.name, + type_: self.type_, + nilCount: 0, + rows: 0, + values: []float32{}, + } + } else if self.type_ == SERIES_TYPE_FLOAT64 { frame = NDFrame{ formatter: self.formatter, name: self.name, @@ -198,6 +209,9 @@ func (self *NDFrame) Repeat(x any, repeats int) Series { case []int64: vs := Repeat(AnyToInt64(x), repeats) return NewNDFrame(self.name, vs...) + case []float32: + vs := Repeat(AnyToFloat32(x), repeats) + return NewNDFrame(self.name, vs...) default: //case []float64: vs := Repeat(AnyToFloat64(x), repeats) return NewNDFrame(self.name, vs...) @@ -224,6 +238,10 @@ func (self *NDFrame) Shift(periods int) Series { return Shift[int64](&d, periods, func() int64 { return Nil2Int64 }) + case []float32: + return Shift[float32](&d, periods, func() float32 { + return Nil2Float32 + }) default: //case []float64: return Shift[float64](&d, periods, func() float64 { return Nil2Float64 @@ -271,6 +289,12 @@ func (self *NDFrame) FillNa(v any, inplace bool) { rows[idx] = AnyToInt64(v) } } + case []float32: + for idx, iv := range rows { + if Float32IsNaN(iv) && inplace { + rows[idx] = AnyToFloat32(v) + } + } case []float64: for idx, iv := range rows { if Float64IsNaN(iv) && inplace { diff --git a/generic_append.go b/generic_append.go index d12f32b..bac368b 100644 --- a/generic_append.go +++ b/generic_append.go @@ -7,10 +7,13 @@ func (self *NDFrame) insert(idx, size int, v any) { if self.type_ == SERIES_TYPE_BOOL { val := AnyToBool(v) assign[bool](self, idx, size, val) - } else if self.type_ == SERIES_TYPE_INT { + } else if self.type_ == SERIES_TYPE_INT64 { val := AnyToInt64(v) assign[int64](self, idx, size, val) - } else if self.type_ == SERIES_TYPE_FLOAT { + } else if self.type_ == SERIES_TYPE_FLOAT32 { + val := AnyToFloat32(v) + assign[float32](self, idx, size, val) + } else if self.type_ == SERIES_TYPE_FLOAT64 { val := AnyToFloat64(v) assign[float64](self, idx, size, val) } else { diff --git a/generic_diff.go b/generic_diff.go index 35bac28..ec24539 100644 --- a/generic_diff.go +++ b/generic_diff.go @@ -10,7 +10,7 @@ import ( // Calculates the difference of a {klass} element compared with another // element in the {klass} (default is element in previous row). func (self *NDFrame) Diff(param any) (s Series) { - if !(self.type_ == SERIES_TYPE_INT || self.type_ == SERIES_TYPE_FLOAT) { + if !(self.type_ == SERIES_TYPE_INT64 || self.type_ == SERIES_TYPE_FLOAT32 || self.type_ == SERIES_TYPE_FLOAT64) { return NewSeries(SERIES_TYPE_INVAILD, "", "") } var N []float32 @@ -51,6 +51,6 @@ func (self *NDFrame) Diff(param any) (s Series) { d = append(d, diff) front = cf } - s = NewSeries(SERIES_TYPE_FLOAT, r.series.Name(), d) + s = NewSeries(SERIES_TYPE_FLOAT64, r.series.Name(), d) return } diff --git a/generic_diff_test.go b/generic_diff_test.go index deff2d7..ee29550 100644 --- a/generic_diff_test.go +++ b/generic_diff_test.go @@ -17,7 +17,7 @@ func TestNDFrame_Diff(t *testing.T) { fmt.Println("序列化结果:", r1) fmt.Println("------------------------------------------------------------") d2 := []float64{1, 2, 3, 4, 3, 3, 2, 1, Nil2Float64, Nil2Float64, Nil2Float64, Nil2Float64} - s2 := NewSeries(SERIES_TYPE_FLOAT, "x", d2) + s2 := NewSeries(SERIES_TYPE_FLOAT64, "x", d2) fmt.Printf("序列化参数: %+v\n", s2.Values()) r2 := df.Col("x").Diff(s2).Values() fmt.Println("序列化结果:", r2) diff --git a/generic_rolling.go b/generic_rolling.go index 80d2653..72e7d22 100644 --- a/generic_rolling.go +++ b/generic_rolling.go @@ -25,9 +25,11 @@ func (self *NDFrame) Rolling2(param any) RollingAndExpandingMixin { switch v := param.(type) { case int: N = stat.Repeat[float32](float32(v), self.Len()) + case []float32: + N = stat.Align(v, Nil2Float32, self.Len()) case Series: vs := v.Values() - N = sliceToFloat32(vs) + N = SliceToFloat32(vs) N = stat.Align(N, Nil2Float32, self.Len()) default: panic(exception.New(1, "error window")) diff --git a/generic_test.go b/generic_test.go index 2a25749..de9869e 100644 --- a/generic_test.go +++ b/generic_test.go @@ -7,12 +7,12 @@ import ( func TestSeriesFrame(t *testing.T) { data := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10} - s1 := NewSeries(SERIES_TYPE_FLOAT, "x", data) + s1 := NewSeries(SERIES_TYPE_FLOAT64, "x", data) fmt.Printf("%+v\n", s1) var d1 any d1 = data - s2 := NewSeries(SERIES_TYPE_FLOAT, "x", d1) + s2 := NewSeries(SERIES_TYPE_FLOAT64, "x", d1) fmt.Printf("%+v\n", s2) var s3 Series @@ -22,7 +22,7 @@ func TestSeriesFrame(t *testing.T) { var s4 Series ts4 := GenericSeries[float64]("x", data...) - ts4 = NewSeries(SERIES_TYPE_FLOAT, "x", data) + ts4 = NewSeries(SERIES_TYPE_FLOAT64, "x", data) s4 = ts4 fmt.Printf("%+v\n", s4.Values()) } @@ -79,7 +79,7 @@ func TestRolling2(t *testing.T) { fmt.Println("序列化结果:", r1) fmt.Println("------------------------------------------------------------") d2 := []float64{1, 2, 3, 4, 3, 3, 2, 1, Nil2Float64, Nil2Float64, Nil2Float64, Nil2Float64} - s2 := NewSeries(SERIES_TYPE_FLOAT, "x", d2) + s2 := NewSeries(SERIES_TYPE_FLOAT64, "x", d2) fmt.Printf("序列化参数: %+v\n", s2.Values()) r2 := df.Col("x").Rolling2(s2).Mean().Values() fmt.Println("序列化结果:", r2) diff --git a/rolling_mean.go b/rolling_mean.go index 62dcf2c..b0405c5 100644 --- a/rolling_mean.go +++ b/rolling_mean.go @@ -6,6 +6,6 @@ func (r RollingAndExpandingMixin) Mean() (s Series) { for _, block := range r.getBlocks() { d = append(d, block.Mean()) } - s = NewSeries(SERIES_TYPE_FLOAT, r.series.Name(), d) + s = NewSeries(SERIES_TYPE_FLOAT64, r.series.Name(), d) return } diff --git a/series_float64.go b/series_float64.go index 63a2a13..8d5661f 100644 --- a/series_float64.go +++ b/series_float64.go @@ -99,7 +99,7 @@ func (self *SeriesFloat64) Rename(n string) { // Type returns the type of data the series holds. func (self *SeriesFloat64) Type() Type { - return SERIES_TYPE_FLOAT + return SERIES_TYPE_FLOAT64 } func (self *SeriesFloat64) Len() int { diff --git a/series_generic.go b/series_generic.go index 6441cab..cf320b0 100644 --- a/series_generic.go +++ b/series_generic.go @@ -58,10 +58,13 @@ func NewSeriesWithType(_type Type, name string, values ...interface{}) Series { if frame.type_ == SERIES_TYPE_BOOL { // bool frame.values = reflect.MakeSlice(typeBool, 0, 0).Interface() - } else if frame.type_ == SERIES_TYPE_INT { + } else if frame.type_ == SERIES_TYPE_INT64 { // int64 frame.values = reflect.MakeSlice(typeInt64, 0, 0).Interface() - } else if frame.type_ == SERIES_TYPE_FLOAT { + } else if frame.type_ == SERIES_TYPE_FLOAT32 { + // float32 + frame.values = reflect.MakeSlice(typeFloat32, 0, 0).Interface() + } else if frame.type_ == SERIES_TYPE_FLOAT64 { // float64 frame.values = reflect.MakeSlice(typeFloat64, 0, 0).Interface() } else { diff --git a/series_int64.go b/series_int64.go index 750f9d7..1219919 100644 --- a/series_int64.go +++ b/series_int64.go @@ -36,7 +36,7 @@ func NewSeriesInt64(name string, vals ...interface{}) *SeriesInt64 { vk := vv.Kind() switch vk { case reflect.Invalid: // {interface} nil - series.assign(idx, size, IntNaN) + series.assign(idx, size, Int64NaN) case reflect.Slice: // 切片, 不定长 for i := 0; i < vv.Len(); i++ { tv := vv.Index(i).Interface() @@ -96,14 +96,14 @@ func (self *SeriesInt64) Rename(n string) { } func (self *SeriesInt64) Type() Type { - return SERIES_TYPE_INT + return SERIES_TYPE_INT64 } func (self *SeriesInt64) Shift(periods int) Series { var d Series d = clone(self).(Series) return Shift[int64](&d, periods, func() int64 { - return IntNaN + return Int64NaN }) } -- Gitee From 2fd735e18968ca35c5b227db28616b1a2f165ea5 Mon Sep 17 00:00:00 2001 From: wangfeng Date: Sun, 5 Feb 2023 15:00:43 +0800 Subject: [PATCH 6/6] =?UTF-8?q?=E6=96=B0=E5=A2=9Efloat32=E6=95=B0=E6=8D=AE?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- formula/ma.go | 24 ++++++++++++++++++++++++ formula/ma_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 formula/ma.go create mode 100644 formula/ma_test.go diff --git a/formula/ma.go b/formula/ma.go new file mode 100644 index 0000000..0bada94 --- /dev/null +++ b/formula/ma.go @@ -0,0 +1,24 @@ +package formula + +import ( + "gitee.com/quant1x/pandas" + "gitee.com/quant1x/pandas/exception" + "gitee.com/quant1x/pandas/stat" +) + +// MA 计算移动均线 +// 求序列的N日简单移动平均值, 返回序列 +func MA(S pandas.Series, N any) any { + var X []float32 + switch v := N.(type) { + case int: + X = stat.Repeat[float32](float32(v), S.Len()) + case pandas.Series: + vs := v.Values() + X = pandas.SliceToFloat32(vs) + X = stat.Align(X, pandas.Nil2Float32, S.Len()) + default: + panic(exception.New(1, "error window")) + } + return S.Rolling2(X).Mean().Values() +} diff --git a/formula/ma_test.go b/formula/ma_test.go new file mode 100644 index 0000000..0ff2fec --- /dev/null +++ b/formula/ma_test.go @@ -0,0 +1,33 @@ +package formula + +import ( + "fmt" + "gitee.com/quant1x/pandas" + "testing" +) + +func TestMA(t *testing.T) { + type testStruct struct { + A string + B int + C bool + D float32 + } + data := []testStruct{ + {"a", 1, true, 0.0}, + {"b", 2, false, 0.5}, + } + df1 := pandas.LoadStructs(data) + fmt.Println(df1) + // 修改列名 + _ = df1.SetNames("a", "b", "c", "d") + // 增加1列 + s_e := pandas.GenericSeries[string]("", "a0", "a1", "a2", "a3") + df2 := df1.Join(s_e) + fmt.Println(df2) + B := df2.Col("b") + + // 2日均线 + r2 := MA(B, 2) + fmt.Println(r2) +} -- Gitee