Commit 99cf160d authored by myhloli's avatar myhloli

fix(dict2md): improve text concatenation logic

- Optimize content stripping and checking logic
- Add special case handling for single-character content
- Adjust spacing rules for different content types
parent cf0d76c0
......@@ -145,7 +145,8 @@ def merge_para_with_text(para_block):
elif span_type == ContentType.InterlineEquation:
content = f"\n$$\n{span['content']}\n$$\n"
if content.strip() != '':
content = content.strip()
if content != '':
langs = ['zh', 'ja', 'ko']
if line_lang in langs: # 遇到一些一个字一个span的文档,这种单字语言判断不准,需要用整行文本判断
if span_type in [ContentType.Text, ContentType.InterlineEquation]:
......@@ -157,8 +158,10 @@ def merge_para_with_text(para_block):
# 如果是前一行带有-连字符,那么末尾不应该加空格
if __is_hyphen_at_line_end(content):
para_text += content[:-1]
elif len(content) == 1 and content not in ['A', 'I', 'a', 'i']:
para_text += content
else: # 西方文本语境下 content间需要空格分隔
para_text += f"{content.strip()} "
para_text += f"{content} "
elif span_type == ContentType.InterlineEquation:
para_text += content
else:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment