mcu_ab568x/app/projects/AB5681F_240_32M/i18n/copy_string.py
2025-05-30 18:03:10 +08:00

48 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
# 读取Excel文件
df = pd.read_excel('language.xlsx', usecols=['lang_en', 'lang_zh', 'lang_es', 'lang_fr', 'lang_de',
'lang_ru', 'lang_ro', 'lang_pt', 'lang_it', 'lang_pl',
'lang_he', 'lang_ms', 'lang_tr', 'lang_vi', 'lang_cs',
'lang_hu', 'lang_th'])
# 初始化一个空集合来存储唯一的字符
unique_chars = set()
# 定义要包含的标点符号
punctuation = set([ '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>',
'?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', '', '', '', '', '', ' ', '', '',
'', '', '', '', '', '', '', '', '', '·', '', '', '', '', '', '', '', '', '',
'', '', ' ', '', '', '', '¥', '', '|',])
# 定义要额外添加的文本列表
extra_texts = [
"简体中文", "Español", "Français", "ภาษาไทย", "العربية", "한국어", "Română", "Português",
"日本語.", "Ελληνικά", "Українська", "Български", "Հայերեն", "עברית", "Türkçe", "हिंदी",
"বাङ्গালি", "Tiếng Việt", "فارسی", "繁體中文", "Čeština", "Magyarország", "Slovenská",
"", "千卡", "英里", "千米", "一月", "一月", "二月", "三月", "四月", "五月", "六月", "七月",
"八月", "九月", "十月", "十一月", "十二月", "毫米汞柱", "次/分", "。?!,、;:「」『』‘’“”()【】〔〕—…–《》〈〉",
]
# 遍历DataFrame的每一行和每一列
for _, row in df.iterrows():
for col in df.columns:
# 假设列中的值是字符串,我们将它们转换为列表,然后遍历每个字符
for char in str(row[col]):
# 如果字符是字母、数字或指定的标点符号则添加到unique_chars集合中
if char.isalpha() or char.isdigit() or char in punctuation:
unique_chars.add(char)
unique_chars.update(punctuation)
# 将去重后的字符包括数字、英文和符号以及额外文本写入txt文件按字典序排序
with open('output_chars.txt', 'w', encoding='utf-8') as f:
# 写入去重后的字符
for char in sorted(unique_chars):
f.write(char)
# 写入额外文本,每个文本后添加换行符(如果需要的话)
for text in extra_texts:
f.write(text) # 添加换行符以分隔每个额外文本
print('字符包括数字、英文和指定符号以及额外文本已提取并写入output_chars.txt文件。')