From dc76a0a6fdd59cc7878d2a86f5555376213c4b6b Mon Sep 17 00:00:00 2001 From: archer0258 <627776371@qq.com> Date: Fri, 24 Jun 2022 02:46:45 +0000 Subject: [PATCH] =?UTF-8?q?add=20contributors/fake2excel.=20for=20column,?= =?UTF-8?q?=20name=20in=20zip(columns,=20names):=20=20=20=20=20=20=20=20?= =?UTF-8?q?=20excel=5Fdict[name]=20=3D=20[]=20=20=20=20=20=20=20=20=20whil?= =?UTF-8?q?e=20len(excel=5Fdict[name])=20<=20rows:=20=20#=20=E5=BE=AA?= =?UTF-8?q?=E7=8E=AF=E6=AF=8F=E4=B8=80=E5=88=97=E7=9A=84=E6=AF=8F=E4=B8=80?= =?UTF-8?q?=E8=A1=8C=20=20=20=20=20=20=20=20=20=20=20=20=20excel=5Fdict[na?= =?UTF-8?q?me].append(eval(f'fake.{column}'))=20=20#=20=E5=BE=80=E6=AF=8F?= =?UTF-8?q?=E4=B8=80=E5=88=97=E7=9A=84=E6=AF=8F=E4=B8=80=E8=A1=8C=E9=87=8C?= =?UTF-8?q?=E9=9D=A2=E6=B7=BB=E5=8A=A0=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- contributors/fake2excel | 90 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 contributors/fake2excel diff --git a/contributors/fake2excel b/contributors/fake2excel new file mode 100644 index 0000000..19cbd97 --- /dev/null +++ b/contributors/fake2excel @@ -0,0 +1,90 @@ +#!/usr/bin/env python +# -*- coding:utf-8 -*- + +############################################# +# File Name: excel.py +# Mail: 1957875073@qq.com +# Created Time: 2022-4-25 10:17:34 +# Description: 有关 excel 的自动化操作 +############################################# + +from faker import Faker +import pandas as pd +from alive_progress import alive_bar + +import numpy as np + + +def reduce_pandas_mem_usage(df): + # start_mem = df.memory_usage().sum() / 1024 ** 2 + # print('Memory usage of dataframe is {:.2f} MB'.format(start_mem)) + + for col in df.columns: # Iterate all the columns + col_type = df[col].dtype # Get the dtype of the column + + if col_type != object: # If the column is not object + c_min = df[col].min() # Get the minimum value + c_max = df[col].max() # Get the maximum value + if str(col_type)[:3] == 'int': # If the column is integer + if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max: + # If the column is within 8-bit integer range + df[col] = df[col].astype(np.int8) # Convert to int8 + elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max: + df[col] = df[col].astype(np.int16) + elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max: + df[col] = df[col].astype(np.int32) + elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max: + df[col] = df[col].astype(np.int64) + else: + if 'date' in col: + pass + else: + df[col] = df[col].astype('category') + + # end_mem = df.memory_usage().sum() / 1024 ** 2 + # print('Memory usage after optimization is: {:.2f} MB'.format(end_mem)) + # print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem)) + + return df + + +def fake2excel(columns=None, rows=1, language='zh_CN', path='./fake2excel.xlsx'): + """ + @Author & Date : CoderWanFeng 2022/5/13 0:12 + @Desc : columns:list,每列的数据名称,默认是名称 + rows:多少行,默认是1 + language:什么语言,可以填english,默认是中文 + path:输出excel的位置,有默认值 + """ + # 可以选择英语 + if columns is None: + columns = ['name'] + if language.lower() == 'english': + language = 'en_US' + # 开始造数 + fake = Faker(language) + excel_dict = {} + with alive_bar(len(columns) * rows) as bar: + for column in columns: # 循环每一列 + excel_dict[column] = [] # 初始化每一列 + while len(excel_dict[column]) < rows: # 循环每一列的每一行 + excel_dict[column].append(eval(f'fake.{column}()')) # 往每一列的每一行里面添加数据 + bar() # 动态显示进度 + # 用pandas,将模拟数据,写进excel里面 + writer = pd.ExcelWriter(path) # 创建一个ExcelWriter对象 + data = pd.DataFrame(excel_dict) # 将字典转换成DataFrame + data = reduce_pandas_mem_usage(data) # 压缩数据 + data.to_excel(writer, index=False) # 将数据写入Excel + writer.save() + + +def fake2excel_dateframe(columns, names, rows=1, language='zh_CN'): + # language = 'en_US' # 可以选择英语 + fake = Faker(language) + excel_dict = {} + for column, name in zip(columns, names): + excel_dict[name] = [] + while len(excel_dict[name]) < rows: # 循环每一列的每一行 + excel_dict[name].append(eval(f'fake.{column}')) # 往每一列的每一行里面添加数据 + data = pd.DataFrame(excel_dict) # 将字典转换成DataFrame + return data -- Gitee