From 615278958c7bb79bfa27fc1126af49980f042449 Mon Sep 17 00:00:00 2001 From: alexad <643757595@qq.com> Date: Fri, 18 Oct 2024 08:43:39 +0000 Subject: [PATCH] python post_processing.py --input_csv /home/xieyukai/ceshi/evaluation_results_20241018_105411.csv --output_path /home/xieyukai/ceshi/ Signed-off-by: alexad <643757595@qq.com> --- .../post_processing.py | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 app/customer_service_evaluation_system/post_processing.py diff --git a/app/customer_service_evaluation_system/post_processing.py b/app/customer_service_evaluation_system/post_processing.py new file mode 100644 index 0000000..d9f20bb --- /dev/null +++ b/app/customer_service_evaluation_system/post_processing.py @@ -0,0 +1,88 @@ +import glob +import pymysql +import pandas as pd +import argparse +from rich.console import Console +from rich.table import Table +import cfg +parser = argparse.ArgumentParser(description="Process Excel files and update with database information.") +parser.add_argument('--input_csv', required=True, help='Path to input .xlsx file or directory containing .xlsx files.') +parser.add_argument('--output_path', required=True, help='Path to save the updated .xlsx files.') +args = parser.parse_args() + +db = pymysql.connect( + host=cfg.db["host"], + port=cfg.db["port"], + user=cfg.db["user"], + password=cfg.db["password"], + database=cfg.db["database"] +) +cursor = db.cursor() + +file_paths = glob.glob(f"{args.input_csv}/*.xlsx") if args.input_csv.endswith('/') else [args.input_csv] +console = Console() + +for file_path in file_paths: + df = pd.read_csv(file_path) + print("Excel 文件加载完成,共有行数:", len(df)) + df['ID'] = df['ID'].astype(str) + df[['caller_num', 'callee_num', 'url']] = None + + begintimes = df['ID'].str[:14].unique() + begintime_str = ','.join([f"'{bt[:4]}-{bt[4:6]}-{bt[6:8]} {bt[8:10]}:{bt[10:12]}:{bt[12:]}'" for bt in begintimes]) + query1 = f"SELECT begintime, customer_uuid, record_file_name FROM cti_record WHERE begintime IN ({begintime_str});" + cursor.execute(query1) + results1 = cursor.fetchall() + begintime_map = {str(row[0]): (row[1], row[2]) for row in results1} + customer_uuids = [row[1] for row in results1 if row[1] is not None] + customer_uuid_str = ','.join([f"'{uuid}'" for uuid in customer_uuids]) + + if customer_uuid_str: + query2 = f"SELECT call_uuid, caller_num, callee_num FROM cti_cdr_call WHERE call_uuid IN ({customer_uuid_str});" + cursor.execute(query2) + results2 = cursor.fetchall() + customer_uuid_map = {row[0]: (row[1], row[2]) for row in results2} + else: + customer_uuid_map = {} + print("未找到任何有效的 customer_uuid,跳过相关查询。") + + for index, row in df.iterrows(): + begintime = row['ID'][:14] + formatted_begintime = f"{begintime[:4]}-{begintime[4:6]}-{begintime[6:8]} {begintime[8:10]}:{begintime[10:12]}:{begintime[12:]}" + if formatted_begintime in begintime_map: + customer_uuid, record_file_name = begintime_map[formatted_begintime] + if customer_uuid in customer_uuid_map: + caller_num, callee_num = customer_uuid_map[customer_uuid] + df.at[index, 'caller_num'] = caller_num + df.at[index, 'callee_num'] = callee_num + df.at[index, 'url'] = f"http://116.62.120.233{record_file_name}" + else: + print(f"未找到 customer_uuid {customer_uuid} 对应的 caller_num 和 callee_num。") + else: + print(f"未找到 begintime {formatted_begintime} 对应的 customer_uuid 和 record_file_name。") + + output_file_path = f"{args.output_path}/{file_path.split('/')[-1].replace('.csv', '.xlsx')}" + df.to_excel(output_file_path, index=False) + + filtered_columns = [col for col in df.columns if 'ID' not in col and '原因' not in col and '关键' in col] + main_table = Table(title="Evaluation Results", show_header=True) + main_table.add_column("Category", justify="left", style="cyan", width=20) + main_table.add_column("Value", justify="left", style="cyan", width=45) + main_table.add_column("Count", justify="right", style="magenta", width=10) + main_table.add_column("Percentage", justify="right", style="green", width=15) + + for col in filtered_columns: + value_counts = df[col].value_counts() + percentages = value_counts / value_counts.sum() * 100 + for i, (value, count) in enumerate(value_counts.items()): + percentage = '{:.2f}%'.format(percentages[value]) + if i == 0: + main_table.add_row(col, str(value), str(count), percentage) + else: + main_table.add_row("", str(value), str(count), percentage) + + console.print(main_table) + +cursor.close() +db.close() +print("数据库连接已关闭") -- Gitee