diff --git a/dataframe_group.go b/dataframe_group.go new file mode 100644 index 0000000000000000000000000000000000000000..e464447624a62296567b9d731b76e2e28742b786 --- /dev/null +++ b/dataframe_group.go @@ -0,0 +1,28 @@ +package pandas + +import ( + "gitee.com/quant1x/pandas/stat" +) + +// Group 分组 +func (self DataFrame) Group(columnName string, filter func(kind stat.Type, e any) bool) DataFrame { + series := self.Col(columnName) + if series.Len() == 0 { + return self + } + t := series.Type() + indexes := []int{} + series.Apply(func(idx int, v any) { + ok := filter(t, v) + if ok { + indexes = append(indexes, idx) + } + }) + ranges := stat.IntsToRanges(indexes) + df := DataFrame{} + for _, r := range ranges { + tmp := self.SelectRows(r) + df = df.Concat(tmp) + } + return df +} diff --git a/dataframe_group_test.go b/dataframe_group_test.go new file mode 100644 index 0000000000000000000000000000000000000000..0317de459d9167556d19f5437604c83794507c8f --- /dev/null +++ b/dataframe_group_test.go @@ -0,0 +1,40 @@ +package pandas + +import ( + "fmt" + "gitee.com/quant1x/pandas/stat" + "testing" +) + +func TestDataFrame_Group(t *testing.T) { + type testStruct struct { + A string + B int + C bool + D float64 + } + data := []testStruct{ + {"a", 1, true, 0.0}, + {"b", 2, false, 0.5}, + {"b", 3, false, 3.5}, + {"b", 4, false, 2.5}, + {"b", 5, false, 1.5}, + {"a", 6, true, 0.0}, + {"a", 7, true, 0.0}, + {"a", 8, true, 0.0}, + {"a", 9, true, 0.0}, + {"a", 10, true, 0.0}, + {"a", 11, true, 0.0}, + } + df := LoadStructs(data) + fmt.Println(df) + + df1 := df.Group("A", func(kind stat.Type, e any) bool { + v := stat.AnyToString(e) + if v == "b" { + return true + } + return false + }) + fmt.Println(df1) +} diff --git a/dataframe_subset.go b/dataframe_subset.go index 747f2778b9ca163588bada5fa8fb5aedb3c99262..b62ea09764946b4b53595c5d3b12f10bd5475077 100644 --- a/dataframe_subset.go +++ b/dataframe_subset.go @@ -26,7 +26,13 @@ func (self DataFrame) Subset(start, end int) DataFrame { } } -// Select 选择一段记录 +// Sub 选择一个子集, start end 支持从后到前选择 +func (self DataFrame) Sub(start, end int) DataFrame { + sl := stat.RangeFinite(start, end) + return self.SelectRows(sl) +} + +// SelectRows 选择一段记录 func (self DataFrame) SelectRows(p stat.ScopeLimit) DataFrame { columns := []stat.Series{} for i := range self.columns { diff --git a/dataframe_xstring.go b/dataframe_xstring.go index 78fbd087e0289b060a001fd260e0d8d2daa24cb0..c21015d1a42d2431a022137aa66dadb3a671915c 100644 --- a/dataframe_xstring.go +++ b/dataframe_xstring.go @@ -36,7 +36,7 @@ func (self DataFrame) print( str = fmt.Sprintf("%s error: %v", class, self.Err) return } - nMinRows := int(maxRows / 2) + nMinRows := maxRows / 2 nTotal := 0 nrows, ncols := self.Dims() if nrows == 0 || ncols == 0 { @@ -80,10 +80,10 @@ func (self DataFrame) print( } else if i == nMinRows+1 && shortening { // 跳过 rowNumbersOffset -= 1 - } else if i < nMinRows+1 { - add = strconv.Itoa(i-1+rowNumbersOffset) + ":" - } else { + } else if i >= nMinRows+1 && shortening { add = strconv.Itoa(nrows-maxRows+i-1+rowNumbersOffset) + ":" + } else { + add = strconv.Itoa(i-1+rowNumbersOffset) + ":" } //fmt.Println(i) records[i] = append([]string{add}, records[i]...)