修改回传数据筛选方式
This commit is contained in:
132
read_xlsx_filter_events_multiCondition.py
Normal file
132
read_xlsx_filter_events_multiCondition.py
Normal file
@ -0,0 +1,132 @@
|
||||
import pandas as pd
|
||||
import os
|
||||
import shutil
|
||||
from collections import OrderedDict
|
||||
|
||||
def copy_events(date_path, events, save_event):
|
||||
for event in events:
|
||||
try:
|
||||
# print('en', str(event))
|
||||
if str(event) != 'nan':
|
||||
event_path = os.path.join(date_path, str(event))
|
||||
save_event_ = os.path.join(save_event, str(event))
|
||||
print(f'copy {event_path} to {save_event_}')
|
||||
shutil.copytree(event_path, save_event_)
|
||||
except Exception as e:
|
||||
with open('error_events.txt', 'a') as f:
|
||||
f.write(f'{e}: {event}\n')
|
||||
|
||||
def write_excel(excel_file_path, sheet_name, df):
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(excel_file_path):
|
||||
# 文件不存在,创建一个新的 Excel 文件
|
||||
with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
|
||||
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||
else:
|
||||
# 文件存在,在已有文件中新建一个名为sheet_name的工作表并写入数据
|
||||
with pd.ExcelWriter(excel_file_path, engine='openpyxl', mode='a', if_sheet_exists='new') as writer:
|
||||
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||
|
||||
def filter_condition(con, filter_contents_, filter_df):
|
||||
for filter_content in filter_contents_:
|
||||
filter_name = con[0] + '-' + filter_content ## 散称-苹果
|
||||
excel_path = os.path.join(save_path, con[0], filter_name)
|
||||
print('excel_path', excel_path)
|
||||
os.makedirs(excel_path, exist_ok=True)
|
||||
excel_file = os.path.join(excel_path, filter_name + '.xlsx')
|
||||
|
||||
# temp_df = filter_df[filter_df[con[0]] == filter_content]
|
||||
temp_df = filter_df[filter_df[con[0]].str.contains(filter_content, na=False)]
|
||||
write_excel(excel_file, date, temp_df) ##write excel
|
||||
events = list(set(temp_df['事件名'].tolist()))
|
||||
copy_events(date_path, events, excel_path) ##copy events
|
||||
|
||||
def anlay_xlsx_filter_events(file_path, condition, save_path):
|
||||
|
||||
date_path = os.path.dirname(file_path)
|
||||
date = os.path.basename(date_path)
|
||||
|
||||
# 读取 Excel 文件
|
||||
df = pd.read_excel(file_path)
|
||||
column_names = df.columns.tolist()
|
||||
print('column_names', column_names)
|
||||
# filter_df = df
|
||||
filter_df = df[df['是否购物现场'].str.contains('否')] ##筛选出购物现场的视频
|
||||
print('000***********', filter_df.columns.tolist())
|
||||
|
||||
for i, con in enumerate(condition):
|
||||
if len(con) == 1: ##'赠品'、'散称'
|
||||
if con[0] == '赠品': ##"同一商品/不同商品&赠品包装颜色&赠品包装形态(红色绑带、白色扎带、买二送一黄色标签、透明包装组合等)&具体赠品"
|
||||
filter_contents = filter_df[con[0]].tolist()
|
||||
filter_contents_ = list(set([str(f).split('&')[0]+'&'+str(f).split('&')[1] for f in filter_contents if len(str(f)) > 0 and len(str(f).split('&')) > 1 and '/' not in str(f)])) ##同一商品_白色扎带
|
||||
print('filter_contents_', filter_contents_)
|
||||
if len(filter_contents_) > 0:
|
||||
filter_condition(con, filter_contents_, filter_df)
|
||||
|
||||
|
||||
elif con[0] == '散称': ##散称包装形态&散称商品名称 例如:网装&西瓜
|
||||
# print('***********', filter_df.columns.tolist())
|
||||
filter_contents = filter_df[con[0]].tolist()
|
||||
print('filter_contents', filter_contents)
|
||||
# filter_contents_ = list(set([str(f[1]) for f in filter_contents if f != 'nan'])) ##苹果
|
||||
filter_contents_ = list(set([str(f).split('&')[1] for f in filter_contents if len(str(f)) > 0 and len(str(f).split('&')) > 1])) ##苹果
|
||||
if len(filter_contents_) > 0:
|
||||
filter_condition(con, filter_contents_, filter_df)
|
||||
|
||||
else:
|
||||
raise Exception(f'{con[0]} 不是赠品和散称')
|
||||
elif (len(con) == 3) and isinstance(con[1], float): ##one2one
|
||||
filter_name = con[0] + '-' + str(con[1]) + '_' + str(con[2]) ##one2one-0.8_小于
|
||||
if con[2] == '小于':
|
||||
filter_df = filter_df[filter_df[con[0]] < con[1]]
|
||||
else:
|
||||
filter_df = filter_df[filter_df[con[0]] > con[1]]
|
||||
if len(filter_df) > 0:
|
||||
excel_path = os.path.join(save_path, con[0], filter_name)
|
||||
os.makedirs(excel_path, exist_ok=True)
|
||||
excel_file = os.path.join(excel_path, filter_name + '.xlsx')
|
||||
write_excel(excel_file, date, filter_df) ##write excel
|
||||
events = list(set(filter_df['事件名'].tolist()))
|
||||
copy_events(date_path, events, excel_path) ##copy events
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# file_path = 'finish_data_test'
|
||||
file_path = '/shareData/data/temp_data/tengXunCloud_data/code_test_0427/' ##待筛选数据路径
|
||||
save_path = '/shareData/data/temp_data/tengXunCloud_data/filter_result_0427/' ##筛选结果保存路径
|
||||
xlsx_name = '现场回传数据分析表_all.xlsx'
|
||||
# events_save = os.path.join(file_path, 'events_class')
|
||||
'''
|
||||
title_list = ['barcode', '有无商品操作', '赠品', '散称', '私人物品', '大号购物袋', '遮挡/模糊', '单次/多次事件切分不准确', '复杂操作', '是否购物现场', '备注',
|
||||
'algroStartToEnd', 'one2one', 'one2SN', 'one2n']
|
||||
'''
|
||||
|
||||
condition = [['one2one', 0.8, '小于']]
|
||||
# condition = [['赠品']]
|
||||
# condition = [['散称']]
|
||||
|
||||
|
||||
for name in os.listdir(file_path): ##人名
|
||||
# name_ = 'cxy'
|
||||
name_path = os.path.join(file_path, name)
|
||||
for date in os.listdir(name_path): ##日期
|
||||
# date_ = '2025-02-04'
|
||||
date_path = os.path.join(name_path, date)
|
||||
data_xlsx = os.path.join(date_path, xlsx_name)
|
||||
|
||||
# save_excel_name = '-'.join(['_'.join(map(str, c)) for c in condition])
|
||||
# save_path_ = os.path.join(save_path, save_excel_name)
|
||||
# os.makedirs(save_path_, exist_ok=True)
|
||||
anlay_xlsx_filter_events(data_xlsx, condition, save_path)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user