修改回传数据筛选方式

This commit is contained in:
jiajie555
2025-04-27 16:45:31 +08:00
parent 58aaada519
commit 036a5ab0a7

View File

@ -0,0 +1,132 @@
import pandas as pd
import os
import shutil
from collections import OrderedDict
def copy_events(date_path, events, save_event):
for event in events:
try:
# print('en', str(event))
if str(event) != 'nan':
event_path = os.path.join(date_path, str(event))
save_event_ = os.path.join(save_event, str(event))
print(f'copy {event_path} to {save_event_}')
shutil.copytree(event_path, save_event_)
except Exception as e:
with open('error_events.txt', 'a') as f:
f.write(f'{e}: {event}\n')
def write_excel(excel_file_path, sheet_name, df):
# 检查文件是否存在
if not os.path.exists(excel_file_path):
# 文件不存在,创建一个新的 Excel 文件
with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False)
else:
# 文件存在在已有文件中新建一个名为sheet_name的工作表并写入数据
with pd.ExcelWriter(excel_file_path, engine='openpyxl', mode='a', if_sheet_exists='new') as writer:
df.to_excel(writer, sheet_name=sheet_name, index=False)
def filter_condition(con, filter_contents_, filter_df):
for filter_content in filter_contents_:
filter_name = con[0] + '-' + filter_content ## 散称-苹果
excel_path = os.path.join(save_path, con[0], filter_name)
print('excel_path', excel_path)
os.makedirs(excel_path, exist_ok=True)
excel_file = os.path.join(excel_path, filter_name + '.xlsx')
# temp_df = filter_df[filter_df[con[0]] == filter_content]
temp_df = filter_df[filter_df[con[0]].str.contains(filter_content, na=False)]
write_excel(excel_file, date, temp_df) ##write excel
events = list(set(temp_df['事件名'].tolist()))
copy_events(date_path, events, excel_path) ##copy events
def anlay_xlsx_filter_events(file_path, condition, save_path):
date_path = os.path.dirname(file_path)
date = os.path.basename(date_path)
# 读取 Excel 文件
df = pd.read_excel(file_path)
column_names = df.columns.tolist()
print('column_names', column_names)
# filter_df = df
filter_df = df[df['是否购物现场'].str.contains('')] ##筛选出购物现场的视频
print('000***********', filter_df.columns.tolist())
for i, con in enumerate(condition):
if len(con) == 1: ##'赠品'、'散称'
if con[0] == '赠品': ##"同一商品/不同商品&赠品包装颜色&赠品包装形态(红色绑带、白色扎带、买二送一黄色标签、透明包装组合等)&具体赠品"
filter_contents = filter_df[con[0]].tolist()
filter_contents_ = list(set([str(f).split('&')[0]+'&'+str(f).split('&')[1] for f in filter_contents if len(str(f)) > 0 and len(str(f).split('&')) > 1 and '/' not in str(f)])) ##同一商品_白色扎带
print('filter_contents_', filter_contents_)
if len(filter_contents_) > 0:
filter_condition(con, filter_contents_, filter_df)
elif con[0] == '散称': ##散称包装形态&散称商品名称 例如:网装&西瓜
# print('***********', filter_df.columns.tolist())
filter_contents = filter_df[con[0]].tolist()
print('filter_contents', filter_contents)
# filter_contents_ = list(set([str(f[1]) for f in filter_contents if f != 'nan'])) ##苹果
filter_contents_ = list(set([str(f).split('&')[1] for f in filter_contents if len(str(f)) > 0 and len(str(f).split('&')) > 1])) ##苹果
if len(filter_contents_) > 0:
filter_condition(con, filter_contents_, filter_df)
else:
raise Exception(f'{con[0]} 不是赠品和散称')
elif (len(con) == 3) and isinstance(con[1], float): ##one2one
filter_name = con[0] + '-' + str(con[1]) + '_' + str(con[2]) ##one2one-0.8_小于
if con[2] == '小于':
filter_df = filter_df[filter_df[con[0]] < con[1]]
else:
filter_df = filter_df[filter_df[con[0]] > con[1]]
if len(filter_df) > 0:
excel_path = os.path.join(save_path, con[0], filter_name)
os.makedirs(excel_path, exist_ok=True)
excel_file = os.path.join(excel_path, filter_name + '.xlsx')
write_excel(excel_file, date, filter_df) ##write excel
events = list(set(filter_df['事件名'].tolist()))
copy_events(date_path, events, excel_path) ##copy events
if __name__ == "__main__":
# file_path = 'finish_data_test'
file_path = '/shareData/data/temp_data/tengXunCloud_data/code_test_0427/' ##待筛选数据路径
save_path = '/shareData/data/temp_data/tengXunCloud_data/filter_result_0427/' ##筛选结果保存路径
xlsx_name = '现场回传数据分析表_all.xlsx'
# events_save = os.path.join(file_path, 'events_class')
'''
title_list = ['barcode', '有无商品操作', '赠品', '散称', '私人物品', '大号购物袋', '遮挡/模糊', '单次/多次事件切分不准确', '复杂操作', '是否购物现场', '备注',
'algroStartToEnd', 'one2one', 'one2SN', 'one2n']
'''
condition = [['one2one', 0.8, '小于']]
# condition = [['赠品']]
# condition = [['散称']]
for name in os.listdir(file_path): ##人名
# name_ = 'cxy'
name_path = os.path.join(file_path, name)
for date in os.listdir(name_path): ##日期
# date_ = '2025-02-04'
date_path = os.path.join(name_path, date)
data_xlsx = os.path.join(date_path, xlsx_name)
# save_excel_name = '-'.join(['_'.join(map(str, c)) for c in condition])
# save_path_ = os.path.join(save_path, save_excel_name)
# os.makedirs(save_path_, exist_ok=True)
anlay_xlsx_filter_events(data_xlsx, condition, save_path)