Commit d67af17f authored by quyuan's avatar quyuan

add ci

parent 7560e128
......@@ -4,12 +4,12 @@ calculate_score
import os
import re
import json
from Levenshtein import distance
from lib import scoring
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize
import nltk
nltk.download('punkt')
from Levenshtein import distance
class Scoring:
"""
......
......@@ -118,9 +118,6 @@ def clean_data(prod_type, download_dir):
with open(input_file, 'r', encoding='utf-8') as fr:
content = fr.read()
new_content = clean_markdown_images(content)
new_content = convert_html_table_to_md(new_content)
new_content = convert_latext_to_md(new_content)
new_content = convert_htmltale_to_md(new_content)
with open(output_file, 'w', encoding='utf-8') as fw:
fw.write(new_content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment