From 036a5ab0a7eecd75b2b6944283c6bb619d01c331 Mon Sep 17 00:00:00 2001 From: jiajie555 Date: Sun, 27 Apr 2025 16:45:31 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E5=9B=9E=E4=BC=A0=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E7=AD=9B=E9=80=89=E6=96=B9=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- read_xlsx_filter_events_multiCondition.py | 132 ++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 read_xlsx_filter_events_multiCondition.py diff --git a/read_xlsx_filter_events_multiCondition.py b/read_xlsx_filter_events_multiCondition.py new file mode 100644 index 0000000..a15f901 --- /dev/null +++ b/read_xlsx_filter_events_multiCondition.py @@ -0,0 +1,132 @@ +import pandas as pd +import os +import shutil +from collections import OrderedDict + +def copy_events(date_path, events, save_event): + for event in events: + try: + # print('en', str(event)) + if str(event) != 'nan': + event_path = os.path.join(date_path, str(event)) + save_event_ = os.path.join(save_event, str(event)) + print(f'copy {event_path} to {save_event_}') + shutil.copytree(event_path, save_event_) + except Exception as e: + with open('error_events.txt', 'a') as f: + f.write(f'{e}: {event}\n') + +def write_excel(excel_file_path, sheet_name, df): + # 检查文件是否存在 + if not os.path.exists(excel_file_path): + # 文件不存在,创建一个新的 Excel 文件 + with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer: + df.to_excel(writer, sheet_name=sheet_name, index=False) + else: + # 文件存在,在已有文件中新建一个名为sheet_name的工作表并写入数据 + with pd.ExcelWriter(excel_file_path, engine='openpyxl', mode='a', if_sheet_exists='new') as writer: + df.to_excel(writer, sheet_name=sheet_name, index=False) + +def filter_condition(con, filter_contents_, filter_df): + for filter_content in filter_contents_: + filter_name = con[0] + '-' + filter_content ## 散称-苹果 + excel_path = os.path.join(save_path, con[0], filter_name) + print('excel_path', excel_path) + os.makedirs(excel_path, exist_ok=True) + excel_file = os.path.join(excel_path, filter_name + '.xlsx') + + # temp_df = filter_df[filter_df[con[0]] == filter_content] + temp_df = filter_df[filter_df[con[0]].str.contains(filter_content, na=False)] + write_excel(excel_file, date, temp_df) ##write excel + events = list(set(temp_df['事件名'].tolist())) + copy_events(date_path, events, excel_path) ##copy events + +def anlay_xlsx_filter_events(file_path, condition, save_path): + + date_path = os.path.dirname(file_path) + date = os.path.basename(date_path) + + # 读取 Excel 文件 + df = pd.read_excel(file_path) + column_names = df.columns.tolist() + print('column_names', column_names) + # filter_df = df + filter_df = df[df['是否购物现场'].str.contains('否')] ##筛选出购物现场的视频 + print('000***********', filter_df.columns.tolist()) + + for i, con in enumerate(condition): + if len(con) == 1: ##'赠品'、'散称' + if con[0] == '赠品': ##"同一商品/不同商品&赠品包装颜色&赠品包装形态(红色绑带、白色扎带、买二送一黄色标签、透明包装组合等)&具体赠品" + filter_contents = filter_df[con[0]].tolist() + filter_contents_ = list(set([str(f).split('&')[0]+'&'+str(f).split('&')[1] for f in filter_contents if len(str(f)) > 0 and len(str(f).split('&')) > 1 and '/' not in str(f)])) ##同一商品_白色扎带 + print('filter_contents_', filter_contents_) + if len(filter_contents_) > 0: + filter_condition(con, filter_contents_, filter_df) + + + elif con[0] == '散称': ##散称包装形态&散称商品名称 例如:网装&西瓜 + # print('***********', filter_df.columns.tolist()) + filter_contents = filter_df[con[0]].tolist() + print('filter_contents', filter_contents) + # filter_contents_ = list(set([str(f[1]) for f in filter_contents if f != 'nan'])) ##苹果 + filter_contents_ = list(set([str(f).split('&')[1] for f in filter_contents if len(str(f)) > 0 and len(str(f).split('&')) > 1])) ##苹果 + if len(filter_contents_) > 0: + filter_condition(con, filter_contents_, filter_df) + + else: + raise Exception(f'{con[0]} 不是赠品和散称') + elif (len(con) == 3) and isinstance(con[1], float): ##one2one + filter_name = con[0] + '-' + str(con[1]) + '_' + str(con[2]) ##one2one-0.8_小于 + if con[2] == '小于': + filter_df = filter_df[filter_df[con[0]] < con[1]] + else: + filter_df = filter_df[filter_df[con[0]] > con[1]] + if len(filter_df) > 0: + excel_path = os.path.join(save_path, con[0], filter_name) + os.makedirs(excel_path, exist_ok=True) + excel_file = os.path.join(excel_path, filter_name + '.xlsx') + write_excel(excel_file, date, filter_df) ##write excel + events = list(set(filter_df['事件名'].tolist())) + copy_events(date_path, events, excel_path) ##copy events + + + + + + +if __name__ == "__main__": + # file_path = 'finish_data_test' + file_path = '/shareData/data/temp_data/tengXunCloud_data/code_test_0427/' ##待筛选数据路径 + save_path = '/shareData/data/temp_data/tengXunCloud_data/filter_result_0427/' ##筛选结果保存路径 + xlsx_name = '现场回传数据分析表_all.xlsx' + # events_save = os.path.join(file_path, 'events_class') + ''' + title_list = ['barcode', '有无商品操作', '赠品', '散称', '私人物品', '大号购物袋', '遮挡/模糊', '单次/多次事件切分不准确', '复杂操作', '是否购物现场', '备注', + 'algroStartToEnd', 'one2one', 'one2SN', 'one2n'] + ''' + + condition = [['one2one', 0.8, '小于']] + # condition = [['赠品']] + # condition = [['散称']] + + + for name in os.listdir(file_path): ##人名 + # name_ = 'cxy' + name_path = os.path.join(file_path, name) + for date in os.listdir(name_path): ##日期 + # date_ = '2025-02-04' + date_path = os.path.join(name_path, date) + data_xlsx = os.path.join(date_path, xlsx_name) + + # save_excel_name = '-'.join(['_'.join(map(str, c)) for c in condition]) + # save_path_ = os.path.join(save_path, save_excel_name) + # os.makedirs(save_path_, exist_ok=True) + anlay_xlsx_filter_events(data_xlsx, condition, save_path) + + + + + + + +