Commit 2fb4b2ef authored by liusilu's avatar liusilu

add pdf tools

parent d3e6853a
...@@ -22,9 +22,9 @@ def indicator_cal(json_standard,json_test): ...@@ -22,9 +22,9 @@ def indicator_cal(json_standard,json_test):
'''数据集总体指标''' '''数据集总体指标'''
a=json_test[['id','mid_json']] a=json_test[['id','mid_json']]
b=json_standard[['id','mid_json']] b=json_standard[['id','mid_json','pass_label']]
outer_merge=pd.merge(a,b,on='id',how='outer') outer_merge=pd.merge(a,b,on='id',how='outer')
outer_merge.columns=['id','standard_mid_json','test_mid_json'] outer_merge.columns=['id','standard_mid_json','test_mid_json','pass_label']
standard_exist=outer_merge.standard_mid_json.apply(lambda x: not isnull(x)) standard_exist=outer_merge.standard_mid_json.apply(lambda x: not isnull(x))
test_exist=outer_merge.test_mid_json.apply(lambda x: not isnull(x)) test_exist=outer_merge.test_mid_json.apply(lambda x: not isnull(x))
...@@ -36,7 +36,7 @@ def indicator_cal(json_standard,json_test): ...@@ -36,7 +36,7 @@ def indicator_cal(json_standard,json_test):
inner_merge=pd.merge(a,b,on='id',how='inner') inner_merge=pd.merge(a,b,on='id',how='inner')
inner_merge.columns=['id','standard_mid_json','test_mid_json'] inner_merge.columns=['id','standard_mid_json','test_mid_json','pass_label']
json_standard = inner_merge['standard_mid_json']#check一下是否对齐 json_standard = inner_merge['standard_mid_json']#check一下是否对齐
json_test = inner_merge['test_mid_json'] json_test = inner_merge['test_mid_json']
...@@ -156,7 +156,14 @@ def indicator_cal(json_standard,json_test): ...@@ -156,7 +156,14 @@ def indicator_cal(json_standard,json_test):
""" """
'''计算pdf之间的总体编辑距离和bleu''' '''
计算pdf之间的总体编辑距离和bleu
这里只计算正例的pdf
'''
test_para_text=np.asarray(test_para_text, dtype = object)[inner_merge['pass_label']=='yes']
standard_para_text=np.asarray(standard_para_text, dtype = object)[inner_merge['pass_label']=='yes']
pdf_dis=[] pdf_dis=[]
pdf_bleu=[] pdf_bleu=[]
for a,b in zip(test_para_text,standard_para_text): for a,b in zip(test_para_text,standard_para_text):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment