diff --git a/.gitignore b/.gitignore index da13d98f358bb2c563548e899e4b260561cd5327..040c7f787c5556b351d0e10375c05ce11a45e321 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,4 @@ assembly/version.properties coverage.txt # test -tutorials.csv +test-*-w* diff --git a/builtin.go b/builtin.go index 94d82328e15780f2d9b70199c4401dca3fe560c9..342d9ef797f476a42d4ddb1aefc273a8b1bf24b0 100644 --- a/builtin.go +++ b/builtin.go @@ -8,6 +8,10 @@ import ( ) // 收敛统一初始化 +const ( + quant1xPath = "~/.quant1x" // quant1x默认 + tmpDir = quant1xPath + "/tmp" // 临时路径 +) // 全局变量定义 diff --git a/dataframe.go b/dataframe.go index 784b507304038fdf431a8109db127a6377fef664..3edba58dc5c41ea7efe8ec394006781c459cacae 100644 --- a/dataframe.go +++ b/dataframe.go @@ -229,100 +229,6 @@ func findInStringSlice(str string, s []string) int { // LoadOption is the type used to configure the load of elements type LoadOption func(*loadOptions) -type loadOptions struct { - // Specifies which is the default type in case detectTypes is disabled. - defaultType Type - - // If set, the type of each column will be automatically detected unless - // otherwise specified. - detectTypes bool - - // If set, the first row of the tabular structure will be used as column - // names. - hasHeader bool - - // The names to set as columns names. - names []string - - // Defines which values are going to be considered as NaN when parsing from string. - nanValues []string - - // Defines the csv delimiter - delimiter rune - - // EnablesLazyQuotes - lazyQuotes bool - - // Defines the comment delimiter - comment rune - - // The types of specific columns can be specified via column name. - types map[string]Type -} - -// DefaultType sets the defaultType option for loadOptions. -func DefaultType(t Type) LoadOption { - return func(c *loadOptions) { - c.defaultType = t - } -} - -// DetectTypes sets the detectTypes option for loadOptions. -func DetectTypes(b bool) LoadOption { - return func(c *loadOptions) { - c.detectTypes = b - } -} - -// HasHeader sets the hasHeader option for loadOptions. -func HasHeader(b bool) LoadOption { - return func(c *loadOptions) { - c.hasHeader = b - } -} - -// Names sets the names option for loadOptions. -func Names(names ...string) LoadOption { - return func(c *loadOptions) { - c.names = names - } -} - -// NaNValues sets the nanValues option for loadOptions. -func NaNValues(nanValues []string) LoadOption { - return func(c *loadOptions) { - c.nanValues = nanValues - } -} - -// WithTypes sets the types option for loadOptions. -func WithTypes(coltypes map[string]Type) LoadOption { - return func(c *loadOptions) { - c.types = coltypes - } -} - -// WithDelimiter sets the csv delimiter other than ',', for example '\t' -func WithDelimiter(b rune) LoadOption { - return func(c *loadOptions) { - c.delimiter = b - } -} - -// WithLazyQuotes sets csv parsing option to LazyQuotes -func WithLazyQuotes(b bool) LoadOption { - return func(c *loadOptions) { - c.lazyQuotes = b - } -} - -// WithComments sets the csv comment line detect to remove lines -func WithComments(b rune) LoadOption { - return func(c *loadOptions) { - c.comment = b - } -} - func parseType(s string) (Type, error) { switch s { case "float", "float64", "float32": diff --git a/dataframe_csv.go b/dataframe_csv.go index e8bc3278280777eab8bfdfc9ad4164c97a8f6327..19a7a6fa0cf0ba78890f969fb7b8fe2eec3b808e 100644 --- a/dataframe_csv.go +++ b/dataframe_csv.go @@ -60,21 +60,6 @@ func ReadCSV(in any, options ...LoadOption) DataFrame { return LoadRecords(records, options...) } -// WriteOption is the type used to configure the writing of elements -type WriteOption func(*writeOptions) - -type writeOptions struct { - // Specifies whether the header is also written - writeHeader bool -} - -// WriteHeader sets the writeHeader option for writeOptions. -func WriteHeader(b bool) WriteOption { - return func(c *writeOptions) { - c.writeHeader = b - } -} - // WriteCSV writes the DataFrame to the given io.Writer as a CSV file. // 支持文件名和io两种方式写入数据 func (self DataFrame) WriteCSV(out any, options ...WriteOption) error { @@ -118,18 +103,3 @@ func (self DataFrame) WriteCSV(out any, options ...WriteOption) error { return csv.NewWriter(writer).WriteAll(records) } - -// ToCSV 写csv格式文件 -func (self DataFrame) oldToCSV(filename string, options ...WriteOption) error { - filepath, err := homedir.Expand(filename) - if err != nil { - return err - } - csvFile, err := os.Create(filepath) - if err != nil { - return err - } - defer api.CloseQuietly(csvFile) - err = self.WriteCSV(csvFile, options...) - return err -} diff --git a/dataframe_excel.go b/dataframe_excel.go new file mode 100644 index 0000000000000000000000000000000000000000..33a86f04623a64f4c4c531d60dbb966da04422b9 --- /dev/null +++ b/dataframe_excel.go @@ -0,0 +1,78 @@ +package pandas + +import ( + "fmt" + "github.com/mymmsc/gox/logger" + "github.com/mymmsc/gox/util/homedir" + "github.com/tealeg/xlsx" +) + +// 读取excel文件 +func ReadExcel(filename string, options ...LoadOption) DataFrame { + if IsEmpty(filename) { + return DataFrame{Err: fmt.Errorf("filaname is empty")} + } + + filepath, err := homedir.Expand(filename) + if err != nil { + logger.Errorf("%s, error=%+v\n", filename, err) + return DataFrame{Err: err} + } + //filename := "test.xlsx" + xlFile, err := xlsx.OpenFile(filepath) + if err != nil { + return DataFrame{Err: err} + } + colnums := make([][]string, 0) + for _, sheet := range xlFile.Sheets { + //fmt.Printf("Sheet Name: %s\n", sheet.Name) + for _, row := range sheet.Rows { + col := make([]string, 0) + for _, cell := range row.Cells { + text := cell.String() + col = append(col, text) + } + colnums = append(colnums, col) + } + // 只展示第一个sheet + break + } + + return LoadRecords(colnums, options...) +} + +// WriteExcel 支持文件名和io两种方式写入数据 +func (self DataFrame) WriteExcel(filename string, options ...WriteOption) error { + filepath, err := homedir.Expand(filename) + if err != nil { + return err + } + xlFile := xlsx.NewFile() + sheet, err := xlFile.AddSheet("Sheet(pandas)") + if err != nil { + return err + } + // Set the default write options + cfg := writeOptions{ + writeHeader: true, + } + + // Set any custom write options + for _, option := range options { + option(&cfg) + } + + records := self.Records() + if !cfg.writeHeader { + records = records[1:] + } + for _, cols := range records { + row := sheet.AddRow() + for _, col := range cols { + cell := row.AddCell() + cell.SetString(col) + } + } + + return xlFile.Save(filepath) +} diff --git a/dataframe_excel_test.go b/dataframe_excel_test.go new file mode 100644 index 0000000000000000000000000000000000000000..2862e398181c28edd194ac5916c15334b8fee4c7 --- /dev/null +++ b/dataframe_excel_test.go @@ -0,0 +1,17 @@ +package pandas + +import ( + "fmt" + "testing" +) + +func TestReadExcel(t *testing.T) { + filename := "./testfiles/test-excel-r01.xlsx" + df := ReadExcel(filename) + fmt.Println(df) + toFile := "./testfiles/test-excel-w01.xlsx" + err := df.WriteExcel(toFile) + if err != nil { + t.Errorf("write excel=%s, failed", toFile) + } +} diff --git a/dataframe_map.go b/dataframe_map.go new file mode 100644 index 0000000000000000000000000000000000000000..ca19279a0d6a22d5540c3034ed336b4e13636919 --- /dev/null +++ b/dataframe_map.go @@ -0,0 +1,47 @@ +package pandas + +import ( + "fmt" + "sort" +) + +// LoadMaps creates a new DataFrame based on the given maps. This function assumes +// that every map on the array represents a row of observations. +func LoadMaps(maps []map[string]interface{}, options ...LoadOption) DataFrame { + if len(maps) == 0 { + return DataFrame{Err: fmt.Errorf("load maps: empty array")} + } + inStrSlice := func(i string, s []string) bool { + for _, v := range s { + if v == i { + return true + } + } + return false + } + // Detect all colnames + var colnames []string + for _, v := range maps { + for k := range v { + if exists := inStrSlice(k, colnames); !exists { + colnames = append(colnames, k) + } + } + } + sort.Strings(colnames) + records := make([][]string, len(maps)+1) + records[0] = colnames + for k, m := range maps { + row := make([]string, len(colnames)) + for i, colname := range colnames { + element := "" + val, ok := m[colname] + if ok { + element = fmt.Sprint(val) + } + row[i] = element + } + records[k+1] = row + } + return LoadRecords(records, options...) +} diff --git a/dataframe_matrix.go b/dataframe_matrix.go new file mode 100644 index 0000000000000000000000000000000000000000..58a06f0ee61165aaf5580d83aeb0585270ad3b87 --- /dev/null +++ b/dataframe_matrix.go @@ -0,0 +1,32 @@ +package pandas + +import "gonum.org/v1/gonum/mat" + +// LoadMatrix loads the given Matrix as a DataFrame +// TODO: Add Loadoptions +func LoadMatrix(mat mat.Matrix) DataFrame { + nrows, ncols := mat.Dims() + columns := make([]Series, ncols) + for i := 0; i < ncols; i++ { + floats := make([]float64, nrows) + for j := 0; j < nrows; j++ { + floats[j] = mat.At(j, i) + } + columns[i] = NewSeries(SERIES_TYPE_FLOAT, "", floats) + } + nrows, ncols, err := checkColumnsDimensions(columns...) + if err != nil { + return DataFrame{Err: err} + } + df := DataFrame{ + columns: columns, + ncols: ncols, + nrows: nrows, + } + colnames := df.Names() + fixColnames(colnames) + for i, colname := range colnames { + df.columns[i].Rename(colname) + } + return df +} diff --git a/dataframe_options.go b/dataframe_options.go new file mode 100644 index 0000000000000000000000000000000000000000..0d2e6a3d0a29c18763f66572073c352d3a347181 --- /dev/null +++ b/dataframe_options.go @@ -0,0 +1,110 @@ +package pandas + +type loadOptions struct { + // Specifies which is the default type in case detectTypes is disabled. + defaultType Type + + // If set, the type of each column will be automatically detected unless + // otherwise specified. + detectTypes bool + + // If set, the first row of the tabular structure will be used as column + // names. + hasHeader bool + + // The names to set as columns names. + names []string + + // Defines which values are going to be considered as NaN when parsing from string. + nanValues []string + + // Defines the csv delimiter + delimiter rune + + // EnablesLazyQuotes + lazyQuotes bool + + // Defines the comment delimiter + comment rune + + // The types of specific columns can be specified via column name. + types map[string]Type +} + +// DefaultType sets the defaultType option for loadOptions. +func DefaultType(t Type) LoadOption { + return func(c *loadOptions) { + c.defaultType = t + } +} + +// DetectTypes sets the detectTypes option for loadOptions. +func DetectTypes(b bool) LoadOption { + return func(c *loadOptions) { + c.detectTypes = b + } +} + +// HasHeader sets the hasHeader option for loadOptions. +func HasHeader(b bool) LoadOption { + return func(c *loadOptions) { + c.hasHeader = b + } +} + +// Names sets the names option for loadOptions. +func Names(names ...string) LoadOption { + return func(c *loadOptions) { + c.names = names + } +} + +// NaNValues sets the nanValues option for loadOptions. +func NaNValues(nanValues []string) LoadOption { + return func(c *loadOptions) { + c.nanValues = nanValues + } +} + +// WithTypes sets the types option for loadOptions. +func WithTypes(coltypes map[string]Type) LoadOption { + return func(c *loadOptions) { + c.types = coltypes + } +} + +// WithDelimiter sets the csv delimiter other than ',', for example '\t' +func WithDelimiter(b rune) LoadOption { + return func(c *loadOptions) { + c.delimiter = b + } +} + +// WithLazyQuotes sets csv parsing option to LazyQuotes +func WithLazyQuotes(b bool) LoadOption { + return func(c *loadOptions) { + c.lazyQuotes = b + } +} + +// WithComments sets the csv comment line detect to remove lines +func WithComments(b rune) LoadOption { + return func(c *loadOptions) { + c.comment = b + } +} + +// WriteOption is the type used to configure the writing of elements +type WriteOption func(*writeOptions) + +type writeOptions struct { + // Specifies whether the header is also written + writeHeader bool +} + +// WriteHeader sets the writeHeader option for writeOptions. +func WriteHeader(b bool) WriteOption { + return func(c *writeOptions) { + c.writeHeader = b + } +} diff --git a/go.mod b/go.mod index cf538904a47c6bd834b682cb7b098072bea2a753..61ff1684764f645feb419d80e99e94f997f6d456 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/google/go-cmp v0.5.8 github.com/huandu/go-clone v1.4.1 github.com/mymmsc/gox v1.3.1 + github.com/tealeg/xlsx v1.0.5 github.com/viterin/vek v0.4.0 gonum.org/v1/gonum v0.12.0 ) diff --git a/go.sum b/go.sum index 05d0989cc9efd1ab1d306b544e750093ec0f3730..0d0b4f6be2d01dcceab62b9297d59700e00ecb92 100644 --- a/go.sum +++ b/go.sum @@ -9,8 +9,11 @@ github.com/huandu/go-assert v1.1.5 h1:fjemmA7sSfYHJD7CUqs9qTwwfdNAx7/j2/ZlHXzNB3 github.com/huandu/go-assert v1.1.5/go.mod h1:yOLvuqZwmcHIC5rIzrBhT7D3Q9c3GFnd0JrPVhn/06U= github.com/huandu/go-clone v1.4.1 h1:QQYjiLadyxOvdwgZoH8f1xGkvvf4+Cm8be7fo9W2QQA= github.com/huandu/go-clone v1.4.1/go.mod h1:ReGivhG6op3GYr+UY3lS6mxjKp7MIGTknuU5TbTVaXE= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI= @@ -22,6 +25,8 @@ github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZN github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= +github.com/tealeg/xlsx v1.0.5 h1:+f8oFmvY8Gw1iUXzPk+kz+4GpbDZPK1FhPiQRd+ypgE= +github.com/tealeg/xlsx v1.0.5/go.mod h1:btRS8dz54TDnvKNosuAqxrM1QgN1udgk9O34bDCnORM= github.com/viterin/partial v1.0.0 h1:e6z0cWJ+SddpXHoLU4ikIDrsI/ZE+p+hqMsB++8IfwE= github.com/viterin/partial v1.0.0/go.mod h1:K9y+kVePpmfZN510YNHoUs+6scZ2K7BLojfI8aW2nw0= github.com/viterin/vek v0.4.0 h1:P34BWVGd3pSZFma9SE+G1pTucMGtw9p79I+Hull/+Ao= @@ -33,6 +38,7 @@ golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= gonum.org/v1/gonum v0.12.0 h1:xKuo6hzt+gMav00meVPUlXwSdoEJP46BR+wdxQEFK2o= gonum.org/v1/gonum v0.12.0/go.mod h1:73TDxJfAAHeA8Mk9mf8NlIppyhQNo5GLTcYeqgo2lvY= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/testfiles/test-excel-r01.xlsx b/testfiles/test-excel-r01.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..fc4298e87d21d4f8b801874bb8cde78b987fe87c Binary files /dev/null and b/testfiles/test-excel-r01.xlsx differ diff --git a/tests/series1_test.go b/tests/series1_test.go index 61494a57f9df5e13fe77e091e4f277c28b94ed83..6c045f086d8e57bf181cc4298866dc360bb4b3a5 100644 --- a/tests/series1_test.go +++ b/tests/series1_test.go @@ -23,7 +23,7 @@ Spain,2012-02-01,66,555.42,00241 ` df := pandas.ReadCSV(strings.NewReader(csvStr)) fmt.Println(df) - filename := "tutorials.csv" + filename := "test-tutorials-w01.csv" _ = df.WriteCSV(filename) buf := new(bytes.Buffer) _ = df.WriteCSV(buf)