Commit d67af17f authored by quyuan's avatar quyuan

add ci

parent 7560e128
...@@ -4,12 +4,12 @@ calculate_score ...@@ -4,12 +4,12 @@ calculate_score
import os import os
import re import re
import json import json
from Levenshtein import distance
from lib import scoring from lib import scoring
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from nltk.tokenize import word_tokenize from nltk.tokenize import word_tokenize
import nltk import nltk
nltk.download('punkt') nltk.download('punkt')
from Levenshtein import distance
class Scoring: class Scoring:
""" """
......
...@@ -118,9 +118,6 @@ def clean_data(prod_type, download_dir): ...@@ -118,9 +118,6 @@ def clean_data(prod_type, download_dir):
with open(input_file, 'r', encoding='utf-8') as fr: with open(input_file, 'r', encoding='utf-8') as fr:
content = fr.read() content = fr.read()
new_content = clean_markdown_images(content) new_content = clean_markdown_images(content)
new_content = convert_html_table_to_md(new_content)
new_content = convert_latext_to_md(new_content)
new_content = convert_htmltale_to_md(new_content)
with open(output_file, 'w', encoding='utf-8') as fw: with open(output_file, 'w', encoding='utf-8') as fw:
fw.write(new_content) fw.write(new_content)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment