import os import re import json import pandas as pd
with open('assets/en_US.json', encoding='utf-8') as f:
data = json.load(f)
json_modules = {} for key, value in data.items():
json_modules[key] = value
print(f"已加载 {len(json_modules)} 个模块\n")
for filename in os.listdir('gpt-outputs'):
if not filename.endswith('.md'):
continue
# 从文件名中提取模块名称
module_name = os.path.splitext(filename)[0].split('-')[0]
# 加载模块数据
module_data = json_modules[module_name]
# print(f"正在处理模块 {module_name} 内容:{module_data}...")
# 提取表格数据并更新模块数据
with open(os.path.join('gpt-outputs', filename), 'r', encoding='utf-8') as f:
content = f.read()
print(f"正在处理文件 {filename} ...")
table_rows = re.findall(r'\|(.+)\|', content)
cleaned_rows = []
for row in table_rows:
cells = [cell.strip() for cell in row.split('|')]
if len(cells) != 4:
continue
cleaned_cells = [cells[0], cells[3]]
cleaned_rows.append(cleaned_cells)
df = pd.DataFrame(cleaned_rows, columns=['词条', '优化后'])
# 补全此处代码,将df中的数据更新到 module_data 中
for index, row in df.iterrows():
key = row['词条']
value = row['优化后']
# 如果不存在对应的词条,则跳过
if key not in module_data:
continue
if module_data[key] != value:
print(f"更新 {module_data[key]} -> {value} ")
module_data[key] = value
json_modules[module_name] = module_data
print(f"✅ 已更新模块 {module_name}\n")
print("正在写回数据...")
with open('assets/en_US.json', 'w', encoding='utf-8') as f:
json.dump(json_modules, f, indent=2, ensure_ascii=False)
print("替换已完成")