|
@@ -0,0 +1,65 @@
|
|
|
+# %%
|
|
|
+import os
|
|
|
+import re
|
|
|
+import json
|
|
|
+import pandas as pd
|
|
|
+
|
|
|
+# 第一步:从JSON文件中加载所有模块
|
|
|
+with open('assets/en_US.json', encoding='utf-8') as f:
|
|
|
+ data = json.load(f)
|
|
|
+
|
|
|
+json_modules = {}
|
|
|
+for key, value in data.items():
|
|
|
+ json_modules[key] = value
|
|
|
+print(f"已加载 {len(json_modules)} 个模块\n")
|
|
|
+
|
|
|
+for filename in os.listdir('gpt-outputs'):
|
|
|
+ if not filename.endswith('.md'):
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 从文件名中提取模块名称
|
|
|
+ module_name = os.path.splitext(filename)[0].split('-')[0]
|
|
|
+
|
|
|
+ # 加载模块数据
|
|
|
+ module_data = json_modules[module_name]
|
|
|
+
|
|
|
+ # print(f"正在处理模块 {module_name} 内容:{module_data}...")
|
|
|
+ # 提取表格数据并更新模块数据
|
|
|
+ with open(os.path.join('gpt-outputs', filename), 'r', encoding='utf-8') as f:
|
|
|
+ content = f.read()
|
|
|
+
|
|
|
+ print(f"正在处理文件 {filename} ...")
|
|
|
+
|
|
|
+ table_rows = re.findall(r'\|(.+)\|', content)
|
|
|
+
|
|
|
+ cleaned_rows = []
|
|
|
+ for row in table_rows:
|
|
|
+ cells = [cell.strip() for cell in row.split('|')]
|
|
|
+ if len(cells) != 4:
|
|
|
+ continue
|
|
|
+ cleaned_cells = [cells[0], cells[3]]
|
|
|
+ cleaned_rows.append(cleaned_cells)
|
|
|
+
|
|
|
+ df = pd.DataFrame(cleaned_rows, columns=['词条', '优化后'])
|
|
|
+
|
|
|
+ # 补全此处代码,将df中的数据更新到 module_data 中
|
|
|
+ for index, row in df.iterrows():
|
|
|
+ key = row['词条']
|
|
|
+ value = row['优化后']
|
|
|
+ # 如果不存在对应的词条,则跳过
|
|
|
+ if key not in module_data:
|
|
|
+ continue
|
|
|
+ if module_data[key] != value:
|
|
|
+ print(f"更新 {module_data[key]} -> {value} ")
|
|
|
+ module_data[key] = value
|
|
|
+ json_modules[module_name] = module_data
|
|
|
+
|
|
|
+ print(f"✅ 已更新模块 {module_name}\n")
|
|
|
+
|
|
|
+print("正在写回数据...")
|
|
|
+
|
|
|
+# 将更新后的模块数据写回JSON文件中
|
|
|
+with open('assets/en_US.json', 'w', encoding='utf-8') as f:
|
|
|
+ json.dump(json_modules, f, indent=2, ensure_ascii=False)
|
|
|
+
|
|
|
+print("替换已完成")
|