Unverified Commit 3d2fb836 authored by yyy's avatar yyy Committed by GitHub

feat: add test case (#499)

Co-authored-by: 's avatarquyuan <quyuan@pjlab.org>
parent f0a8886c
......@@ -6,20 +6,22 @@ on:
push:
branches:
- "master"
- "dev"
paths-ignore:
- "cmds/**"
- "**.md"
pull_request:
branches:
- "master"
- "dev"
paths-ignore:
- "cmds/**"
- "**.md"
workflow_dispatch:
jobs:
cli-test:
runs-on: ubuntu-latest
timeout-minutes: 40
runs-on: pdf
timeout-minutes: 120
strategy:
fail-fast: true
......@@ -28,27 +30,22 @@ jobs:
uses: actions/checkout@v3
with:
fetch-depth: 2
- name: check-requirements
run: |
pip install -r requirements.txt
pip install -r requirements-qa.txt
pip install magic-pdf
- name: test_cli
- name: install
run: |
cp magic-pdf.template.json ~/magic-pdf.json
echo $GITHUB_WORKSPACE
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && pytest -s -v tests/test_unit.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli.py
- name: benchmark
echo $GITHUB_WORKSPACE && sh tests/retry_env.sh
- name: unit test
run: |
cd $GITHUB_WORKSPACE && export PYTHONPATH=. && coverage run -m pytest tests/test_unit.py --cov=magic_pdf/ --cov-report term-missing --cov-report html
cd $GITHUB_WORKSPACE && python tests/get_coverage.py
- name: cli test
run: |
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_bench.py
cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
needs: [cli-test]
runs-on: ubuntu-latest
needs: cli-test
runs-on: pdf
steps:
- name: get_actor
run: |
......@@ -67,9 +64,5 @@ jobs:
- name: notify
run: |
curl ${{ secrets.WEBHOOK_URL }} -H 'Content-Type: application/json' -d '{
"msgtype": "text",
"text": {
"mentioned_list": ["${{ env.METIONS }}"] , "content": "'${{ github.repository }}' GitHubAction Failed!\n 细节请查看:https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"
}
}'
\ No newline at end of file
echo ${{ secrets.USER_ID }}
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
\ No newline at end of file
......@@ -14,4 +14,6 @@ tqdm
htmltabletomd
pypandoc
pyopenssl==24.0.0
struct-eqtable==0.1.0
\ No newline at end of file
struct-eqtable==0.1.0
pytest-cov
beautifulsoup4
\ No newline at end of file
"""
get cov
"""
from bs4 import BeautifulSoup
def get_covrage():
"""get covrage"""
# 发送请求获取网页内容
html_content = open("htmlcov/index.html", "r", encoding="utf-8").read()
soup = BeautifulSoup(html_content, 'html.parser')
# 查找包含"pc_cov"的span标签
pc_cov_span = soup.find('span', class_='pc_cov')
# 提取百分比值
percentage_value = pc_cov_span.text.strip()
percentage_float = float(percentage_value.rstrip('%'))
print ("percentage_float:", percentage_float)
assert percentage_float >= 0.2
if __name__ == '__main__':
get_covrage()
\ No newline at end of file
import json
import pandas as pd
import numpy as np
from nltk.translate.bleu_score import sentence_bleu
import argparse
from sklearn.metrics import classification_report
from collections import Counter
from sklearn import metrics
from pandas import isnull
def indicator_cal(json_standard,json_test):
json_standard = pd.DataFrame(json_standard)
json_test = pd.DataFrame(json_test)
'''数据集总体指标'''
a=json_test[['id','mid_json']]
b=json_standard[['id','mid_json','pass_label']]
a=a.drop_duplicates(subset='id',keep='first')
a.index=range(len(a))
b=b.drop_duplicates(subset='id',keep='first')
b.index=range(len(b))
outer_merge=pd.merge(a,b,on='id',how='outer')
outer_merge.columns=['id','standard_mid_json','test_mid_json','pass_label']
standard_exist=outer_merge.standard_mid_json.apply(lambda x: not isnull(x))
test_exist=outer_merge.test_mid_json.apply(lambda x: not isnull(x))
overall_report = {}
overall_report['accuracy']=metrics.accuracy_score(standard_exist,test_exist)
overall_report['precision']=metrics.precision_score(standard_exist,test_exist)
overall_report['recall']=metrics.recall_score(standard_exist,test_exist)
overall_report['f1_score']=metrics.f1_score(standard_exist,test_exist)
inner_merge=pd.merge(a,b,on='id',how='inner')
inner_merge.columns=['id','standard_mid_json','test_mid_json','pass_label']
json_standard = inner_merge['standard_mid_json']#check一下是否对齐
json_test = inner_merge['test_mid_json']
'''批量读取中间生成的json文件'''
test_inline_equations=[]
test_interline_equations=[]
test_inline_euqations_bboxs=[]
test_interline_equations_bboxs=[]
test_dropped_text_bboxes=[]
test_dropped_text_tag=[]
test_dropped_image_bboxes=[]
test_dropped_table_bboxes=[]
test_preproc_num=[]#阅读顺序
test_para_num=[]
test_para_text=[]
for i in json_test:
mid_json=pd.DataFrame(i)
mid_json=mid_json.iloc[:,:-1]
for j1 in mid_json.loc['inline_equations',:]:
page_in_text=[]
page_in_bbox=[]
for k1 in j1:
page_in_text.append(k1['latex_text'])
page_in_bbox.append(k1['bbox'])
test_inline_equations.append(page_in_text)
test_inline_euqations_bboxs.append(page_in_bbox)
for j2 in mid_json.loc['interline_equations',:]:
page_in_text=[]
page_in_bbox=[]
for k2 in j2:
page_in_text.append(k2['latex_text'])
page_in_bbox.append(k2['bbox'])
test_interline_equations.append(page_in_text)
test_interline_equations_bboxs.append(page_in_bbox)
for j3 in mid_json.loc['droped_text_block',:]:
page_in_bbox=[]
page_in_tag=[]
for k3 in j3:
page_in_bbox.append(k3['bbox'])
#如果k3中存在tag这个key
if 'tag' in k3.keys():
page_in_tag.append(k3['tag'])
else:
page_in_tag.append('None')
test_dropped_text_tag.append(page_in_tag)
test_dropped_text_bboxes.append(page_in_bbox)
for j4 in mid_json.loc['droped_image_block',:]:
test_dropped_image_bboxes.append(j4)
for j5 in mid_json.loc['droped_table_block',:]:
test_dropped_table_bboxes.append(j5)
for j6 in mid_json.loc['preproc_blocks',:]:
page_in=[]
for k6 in j6:
page_in.append(k6['number'])
test_preproc_num.append(page_in)
test_pdf_text=[]
for j7 in mid_json.loc['para_blocks',:]:
test_para_num.append(len(j7))
for k7 in j7:
test_pdf_text.append(k7['text'])
test_para_text.append(test_pdf_text)
standard_inline_equations=[]
standard_interline_equations=[]
standard_inline_euqations_bboxs=[]
standard_interline_equations_bboxs=[]
standard_dropped_text_bboxes=[]
standard_dropped_text_tag=[]
standard_dropped_image_bboxes=[]
standard_dropped_table_bboxes=[]
standard_preproc_num=[]#阅读顺序
standard_para_num=[]
standard_para_text=[]
for i in json_standard:
mid_json=pd.DataFrame(i)
mid_json=mid_json.iloc[:,:-1]
for j1 in mid_json.loc['inline_equations',:]:
page_in_text=[]
page_in_bbox=[]
for k1 in j1:
page_in_text.append(k1['latex_text'])
page_in_bbox.append(k1['bbox'])
standard_inline_equations.append(page_in_text)
standard_inline_euqations_bboxs.append(page_in_bbox)
for j2 in mid_json.loc['interline_equations',:]:
page_in_text=[]
page_in_bbox=[]
for k2 in j2:
page_in_text.append(k2['latex_text'])
page_in_bbox.append(k2['bbox'])
standard_interline_equations.append(page_in_text)
standard_interline_equations_bboxs.append(page_in_bbox)
for j3 in mid_json.loc['droped_text_block',:]:
page_in_bbox=[]
page_in_tag=[]
for k3 in j3:
page_in_bbox.append(k3['bbox'])
if 'tag' in k3.keys():
page_in_tag.append(k3['tag'])
else:
page_in_tag.append('None')
standard_dropped_text_bboxes.append(page_in_bbox)
standard_dropped_text_tag.append(page_in_tag)
for j4 in mid_json.loc['droped_image_block',:]:
standard_dropped_image_bboxes.append(j4)
for j5 in mid_json.loc['droped_table_block',:]:
standard_dropped_table_bboxes.append(j5)
for j6 in mid_json.loc['preproc_blocks',:]:
page_in=[]
for k6 in j6:
page_in.append(k6['number'])
standard_preproc_num.append(page_in)
standard_pdf_text=[]
for j7 in mid_json.loc['para_blocks',:]:
standard_para_num.append(len(j7))
for k7 in j7:
standard_pdf_text.append(k7['text'])
standard_para_text.append(standard_pdf_text)
"""
在计算指标之前最好先确认基本统计信息是否一致
"""
'''
计算pdf之间的总体编辑距离和bleu
这里只计算正例的pdf
'''
test_para_text=np.asarray(test_para_text, dtype = object)[inner_merge['pass_label']=='yes']
standard_para_text=np.asarray(standard_para_text, dtype = object)[inner_merge['pass_label']=='yes']
pdf_dis=[]
pdf_bleu=[]
for a,b in zip(test_para_text,standard_para_text):
a1=[ ''.join(i) for i in a]
b1=[ ''.join(i) for i in b]
pdf_dis.append(Levenshtein_Distance(a1,b1))
pdf_bleu.append(sentence_bleu([a1],b1))
overall_report['pdf间的平均编辑距离']=np.mean(pdf_dis)
overall_report['pdf间的平均bleu']=np.mean(pdf_bleu)
'''行内公式和行间公式的编辑距离和bleu'''
inline_equations_edit_bleu=equations_indicator(test_inline_euqations_bboxs,standard_inline_euqations_bboxs,test_inline_equations,standard_inline_equations)
interline_equations_edit_bleu=equations_indicator(test_interline_equations_bboxs,standard_interline_equations_bboxs,test_interline_equations,standard_interline_equations)
'''行内公式bbox匹配相关指标'''
inline_equations_bbox_report=bbox_match_indicator(test_inline_euqations_bboxs,standard_inline_euqations_bboxs)
'''行间公式bbox匹配相关指标'''
interline_equations_bbox_report=bbox_match_indicator(test_interline_equations_bboxs,standard_interline_equations_bboxs)
'''可以先检查page和bbox数量是否一致'''
'''dropped_text_block的bbox匹配相关指标'''
test_text_bbox=[]
standard_text_bbox=[]
test_tag=[]
standard_tag=[]
index=0
for a,b in zip(test_dropped_text_bboxes,standard_dropped_text_bboxes):
test_page_tag=[]
standard_page_tag=[]
test_page_bbox=[]
standard_page_bbox=[]
if len(a)==0 and len(b)==0:
pass
else:
for i in range(len(b)):
judge=0
standard_page_tag.append(standard_dropped_text_tag[index][i])
standard_page_bbox.append(1)
for j in range(len(a)):
if bbox_offset(b[i],a[j]):
judge=1
test_page_tag.append(test_dropped_text_tag[index][j])
test_page_bbox.append(1)
break
if judge==0:
test_page_tag.append('None')
test_page_bbox.append(0)
if len(test_dropped_text_tag[index])+test_page_tag.count('None')>len(standard_dropped_text_tag[index]):#有多删的情况出现
test_page_tag1=test_page_tag.copy()
if 'None' in test_page_tag:
test_page_tag1=test_page_tag1.remove('None')
else:
test_page_tag1=test_page_tag
diff=list((Counter(test_dropped_text_tag[index]) - Counter(test_page_tag1)).elements())
test_page_tag.extend(diff)
standard_page_tag.extend(['None']*len(diff))
test_page_bbox.extend([1]*len(diff))
standard_page_bbox.extend([0]*len(diff))
test_tag.extend(test_page_tag)
standard_tag.extend(standard_page_tag)
test_text_bbox.extend(test_page_bbox)
standard_text_bbox.extend(standard_page_bbox)
index+=1
text_block_report = {}
text_block_report['accuracy']=metrics.accuracy_score(standard_text_bbox,test_text_bbox)
text_block_report['precision']=metrics.precision_score(standard_text_bbox,test_text_bbox)
text_block_report['recall']=metrics.recall_score(standard_text_bbox,test_text_bbox)
text_block_report['f1_score']=metrics.f1_score(standard_text_bbox,test_text_bbox)
'''删除的text_block的tag的准确率,召回率和f1-score'''
text_block_tag_report = classification_report(y_true=standard_tag , y_pred=test_tag,output_dict=True)
del text_block_tag_report['None']
del text_block_tag_report["macro avg"]
del text_block_tag_report["weighted avg"]
'''dropped_image_block的bbox匹配相关指标'''
'''有数据格式不一致的问题'''
image_block_report=bbox_match_indicator(test_dropped_image_bboxes,standard_dropped_image_bboxes)
'''dropped_table_block的bbox匹配相关指标'''
table_block_report=bbox_match_indicator(test_dropped_table_bboxes,standard_dropped_table_bboxes)
'''阅读顺序编辑距离的均值'''
preproc_num_dis=[]
for a,b in zip(test_preproc_num,standard_preproc_num):
preproc_num_dis.append(Levenshtein_Distance(a,b))
preproc_num_edit=np.mean(preproc_num_dis)
'''分段准确率'''
test_para_num=np.array(test_para_num)
standard_para_num=np.array(standard_para_num)
acc_para=np.mean(test_para_num==standard_para_num)
output=pd.DataFrame()
output['总体指标']=[overall_report]
output['行内公式平均编辑距离']=[inline_equations_edit_bleu[0]]
output['行内公式平均bleu']=[inline_equations_edit_bleu[1]]
output['行间公式平均编辑距离']=[interline_equations_edit_bleu[0]]
output['行间公式平均bleu']=[interline_equations_edit_bleu[1]]
output['行内公式识别相关指标']=[inline_equations_bbox_report]
output['行间公式识别相关指标']=[interline_equations_bbox_report]
output['阅读顺序平均编辑距离']=[preproc_num_edit]
output['分段准确率']=[acc_para]
output['删除的text block的相关指标']=[text_block_report]
output['删除的image block的相关指标']=[image_block_report]
output['删除的table block的相关指标']=[table_block_report]
output['删除的text block的tag相关指标']=[text_block_tag_report]
return output
"""
计算编辑距离
"""
def Levenshtein_Distance(str1, str2):
matrix = [[ i + j for j in range(len(str2) + 1)] for i in range(len(str1) + 1)]
for i in range(1, len(str1)+1):
for j in range(1, len(str2)+1):
if(str1[i-1] == str2[j-1]):
d = 0
else:
d = 1
matrix[i][j] = min(matrix[i-1][j]+1, matrix[i][j-1]+1, matrix[i-1][j-1]+d)
return matrix[len(str1)][len(str2)]
'''
计算bbox偏移量是否符合标准的函数
'''
def bbox_offset(b_t,b_s):
'''b_t是test_doc里的bbox,b_s是standard_doc里的bbox'''
x1_t,y1_t,x2_t,y2_t=b_t
x1_s,y1_s,x2_s,y2_s=b_s
x1=max(x1_t,x1_s)
x2=min(x2_t,x2_s)
y1=max(y1_t,y1_s)
y2=min(y2_t,y2_s)
area_overlap=(x2-x1)*(y2-y1)
area_t=(x2_t-x1_t)*(y2_t-y1_t)+(x2_s-x1_s)*(y2_s-y1_s)-area_overlap
if area_t-area_overlap==0 or area_overlap/(area_t-area_overlap)>0.95:
return True
else:
return False
'''bbox匹配和对齐函数,输出相关指标'''
'''输入的是以page为单位的bbox列表'''
def bbox_match_indicator(test_bbox_list,standard_bbox_list):
test_bbox=[]
standard_bbox=[]
for a,b in zip(test_bbox_list,standard_bbox_list):
test_page_bbox=[]
standard_page_bbox=[]
if len(a)==0 and len(b)==0:
pass
else:
for i in b:
if len(i)!=4:
continue
else:
judge=0
standard_page_bbox.append(1)
for j in a:
if bbox_offset(i,j):
judge=1
test_page_bbox.append(1)
break
if judge==0:
test_page_bbox.append(0)
diff_num=len(a)+test_page_bbox.count(0)-len(b)
if diff_num>0:#有多删的情况出现
test_page_bbox.extend([1]*diff_num)
standard_page_bbox.extend([0]*diff_num)
test_bbox.extend(test_page_bbox)
standard_bbox.extend(standard_page_bbox)
block_report = {}
block_report['accuracy']=metrics.accuracy_score(standard_bbox,test_bbox)
block_report['precision']=metrics.precision_score(standard_bbox,test_bbox)
block_report['recall']=metrics.recall_score(standard_bbox,test_bbox)
block_report['f1_score']=metrics.f1_score(standard_bbox,test_bbox)
return block_report
'''公式编辑距离和bleu'''
def equations_indicator(test_euqations_bboxs,standard_euqations_bboxs,test_equations,standard_equations):
test_match_equations=[]
standard_match_equations=[]
index=0
for a,b in zip(test_euqations_bboxs,standard_euqations_bboxs):
if len(a)==0 and len(b)==0:
pass
else:
for i in range(len(b)):
for j in range(len(a)):
if bbox_offset(b[i],a[j]):
standard_match_equations.append(standard_equations[index][i])
test_match_equations.append(test_equations[index][j])
break
index+=1
dis=[]
bleu=[]
for a,b in zip(test_match_equations,standard_match_equations):
if len(a)==0 and len(b)==0:
continue
else:
if a==b:
dis.append(0)
bleu.append(1)
else:
dis.append(Levenshtein_Distance(a,b))
bleu.append(sentence_bleu([a],b))
equations_edit=np.mean(dis)
equations_bleu=np.mean(bleu)
return (equations_edit,equations_bleu)
parser = argparse.ArgumentParser()
parser.add_argument('--test', type=str)
parser.add_argument('--standard', type=str)
args = parser.parse_args()
pdf_json_test = args.test
pdf_json_standard = args.standard
if __name__ == '__main__':
pdf_json_test = [json.loads(line)
for line in open(pdf_json_test, 'r', encoding='utf-8')]
pdf_json_standard = [json.loads(line)
for line in open(pdf_json_standard, 'r', encoding='utf-8')]
overall_indicator=indicator_cal(pdf_json_standard,pdf_json_test)
'''计算的指标输出到overall_indicator_output.json中'''
overall_indicator.to_json('overall_indicator_output.json',orient='records',lines=True,force_ascii=False)
\ No newline at end of file
#!/bin/bash
# 定义最大重试次数
max_retries=5
retry_count=0
while true; do
# prepare env
source activate MinerU
pip install -r requirements-qa.txt
pip install magic-pdf[full]==0.7.0b1 --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
exit_code=$?
if [ $exit_code -eq 0 ]; then
echo "test.sh 成功执行!"
break
else
let retry_count+=1
if [ $retry_count -ge $max_retries ]; then
echo "达到最大重试次数 ($max_retries),放弃重试。"
exit 1
fi
echo "test.sh 执行失败 (退出码: $exit_code)。尝试第 $retry_count 次重试..."
sleep 5 # 等待 5 秒后重试
fi
done
import subprocess
"""common definitions."""
import os
import shutil
def check_shell(cmd):
"""
shell successful
"""
"""shell successful."""
res = os.system(cmd)
assert res == 0
def count_folders_and_check_contents(file_path):
""""
获取文件夹大小
"""
def cli_count_folders_and_check_contents(file_path):
"""" count cli files."""
if os.path.exists(file_path):
for files in os.listdir(file_path):
folder_count = os.path.getsize(os.path.join(file_path, files))
assert folder_count > 0
assert len(os.listdir(file_path)) > 5
def sdk_count_folders_and_check_contents(file_path):
"""count folders."""
if os.path.exists(file_path):
folder_count = os.path.getsize(file_path)
assert folder_count > 0
file_count = os.path.getsize(file_path)
assert file_count > 0
else:
exit(1)
if __name__ == "__main__":
count_folders_and_check_contents("/home/quyuan/code/Magic-PDF/Magic-PDF/Magic-PDF/ci")
\ No newline at end of file
def delete_file(path):
"""delete file."""
if not os.path.exists(path):
if os.path.isfile(path):
try:
os.remove(path)
print(f"File '{path}' deleted.")
except TypeError as e:
print(f"Error deleting file '{path}': {e}")
elif os.path.isdir(path):
try:
shutil.rmtree(path)
print(f"Directory '{path}' and its contents deleted.")
except TypeError as e:
print(f"Error deleting directory '{path}': {e}")
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
[
{
"layout_dets": [
{
"category_id": 2,
"poly": [
126.50015258789062,
128.93304443359375,
540.679931640625,
128.93304443359375,
540.679931640625,
226.92637634277344,
126.50015258789062,
226.92637634277344
],
"score": 0.9999887347221375
},
{
"category_id": 0,
"poly": [
130.72247314453125,
532.6777954101562,
1501.8043212890625,
532.6777954101562,
1501.8043212890625,
690.7334594726562,
130.72247314453125,
690.7334594726562
],
"score": 0.9999832510948181
},
{
"category_id": 1,
"poly": [
854.5001831054688,
1298.847412109375,
1522.951904296875,
1298.847412109375,
1522.951904296875,
1908.7020263671875,
854.5001831054688,
1908.7020263671875
],
"score": 0.9999802112579346
},
{
"category_id": 1,
"poly": [
854.1588134765625,
1057.34716796875,
1522.5185546875,
1057.34716796875,
1522.5185546875,
1296.958251953125,
854.1588134765625,
1296.958251953125
],
"score": 0.999922513961792
},
{
"category_id": 1,
"poly": [
129.9320526123047,
995.6026611328125,
811.706298828125,
995.6026611328125,
811.706298828125,
1205.6361083984375,
129.9320526123047,
1205.6361083984375
],
"score": 0.9998705387115479
},
{
"category_id": 1,
"poly": [
854.8023071289062,
1914.2344970703125,
1523.3448486328125,
1914.2344970703125,
1523.3448486328125,
2062.06005859375,
854.8023071289062,
2062.06005859375
],
"score": 0.9998676180839539
},
{
"category_id": 1,
"poly": [
129.7725830078125,
771.8756713867188,
1463.919189453125,
771.8756713867188,
1463.919189453125,
829.7714233398438,
129.7725830078125,
829.7714233398438
],
"score": 0.9998531341552734
},
{
"category_id": 1,
"poly": [
131.63143920898438,
1848.7064208984375,
813.7200927734375,
1848.7064208984375,
813.7200927734375,
1908.3885498046875,
131.63143920898438,
1908.3885498046875
],
"score": 0.9997979998588562
},
{
"category_id": 1,
"poly": [
131.2013702392578,
711.3101806640625,
974.0772705078125,
711.3101806640625,
974.0772705078125,
746.839111328125,
131.2013702392578,
746.839111328125
],
"score": 0.9996878504753113
},
{
"category_id": 1,
"poly": [
129.92178344726562,
1453.175537109375,
812.6341552734375,
1453.175537109375,
812.6341552734375,
1632.7532958984375,
129.92178344726562,
1632.7532958984375
],
"score": 0.9996528625488281
},
{
"category_id": 0,
"poly": [
854.4719848632812,
997.0496215820312,
1004.6527099609375,
997.0496215820312,
1004.6527099609375,
1020.6658935546875,
854.4719848632812,
1020.6658935546875
],
"score": 0.99927818775177
},
{
"category_id": 1,
"poly": [
129.71356201171875,
1300.873779296875,
812.8416137695312,
1300.873779296875,
812.8416137695312,
1450.150146484375,
129.71356201171875,
1450.150146484375
],
"score": 0.9991269111633301
},
{
"category_id": 1,
"poly": [
129.04617309570312,
1208.1441650390625,
812.42919921875,
1208.1441650390625,
812.42919921875,
1298.6868896484375,
129.04617309570312,
1298.6868896484375
],
"score": 0.9990298748016357
},
{
"category_id": 1,
"poly": [
129.8084716796875,
1636.7369384765625,
812.437255859375,
1636.7369384765625,
812.437255859375,
1816.5880126953125,
129.8084716796875,
1816.5880126953125
],
"score": 0.9989234805107117
},
{
"category_id": 2,
"poly": [
133.50637817382812,
2036.548583984375,
350.5669860839844,
2036.548583984375,
350.5669860839844,
2059.908203125,
133.50637817382812,
2059.908203125
],
"score": 0.9984697699546814
},
{
"category_id": 2,
"poly": [
1034.2279052734375,
131.83363342285156,
1528.302001953125,
131.83363342285156,
1528.302001953125,
184.0697784423828,
1034.2279052734375,
184.0697784423828
],
"score": 0.9977995753288269
},
{
"category_id": 1,
"poly": [
129.7623748779297,
855.4282836914062,
858.1234741210938,
855.4282836914062,
858.1234741210938,
880.0763549804688,
129.7623748779297,
880.0763549804688
],
"score": 0.9964384436607361
},
{
"category_id": 1,
"poly": [
131.41041564941406,
429.4252624511719,
484.5693054199219,
429.4252624511719,
484.5693054199219,
474.6931457519531,
131.41041564941406,
474.6931457519531
],
"score": 0.8408285975456238
},
{
"category_id": 0,
"poly": [
131.45191955566406,
429.0645446777344,
484.478271484375,
429.0645446777344,
484.478271484375,
474.9486083984375,
131.45191955566406,
474.9486083984375
],
"score": 0.3361666798591614
},
{
"category_id": 13,
"poly": [
129,
1329,
317,
1329,
317,
1361,
129,
1361
],
"score": 0.88,
"latex": "10{\\cdot}300\\,\\upmu\\mathrm{mol}/\\mathrm{kg})"
},
{
"category_id": 13,
"poly": [
408,
1605,
499,
1605,
499,
1634,
408,
1634
],
"score": 0.86,
"latex": "(l7\\pm4)"
},
{
"category_id": 13,
"poly": [
450,
1574,
542,
1574,
542,
1604,
450,
1604
],
"score": 0.8,
"latex": "(\\mathsf{p}\\!\\!<\\!\\!0.01)"
},
{
"category_id": 13,
"poly": [
126,
1605,
173,
1605,
173,
1634,
126,
1634
],
"score": 0.68,
"latex": "\\pm\\nobreakspace2\\nobreakspace"
},
{
"category_id": 13,
"poly": [
487,
1358,
616,
1358,
616,
1391,
487,
1391
],
"score": 0.65,
"latex": "(^{51}\\mathrm{CrEDTA})"
},
{
"category_id": 13,
"poly": [
127,
203,
149,
203,
149,
225,
127,
225
],
"score": 0.53,
"latex": "\\copyright"
}
],
"page_info": {
"page_no": 0,
"height": 2181,
"width": 1653
}
},
{
"layout_dets": [
{
"category_id": 0,
"poly": [
131.0747528076172,
1646.9365234375,
232.2142333984375,
1646.9365234375,
232.2142333984375,
1674.91015625,
131.0747528076172,
1674.91015625
],
"score": 0.999990701675415
},
{
"category_id": 1,
"poly": [
854.1908569335938,
457.566650390625,
1522.8731689453125,
457.566650390625,
1522.8731689453125,
716.369873046875,
854.1908569335938,
716.369873046875
],
"score": 0.9999818801879883
},
{
"category_id": 1,
"poly": [
854.4945678710938,
199.6878662109375,
1523.6170654296875,
199.6878662109375,
1523.6170654296875,
327.42291259765625,
854.4945678710938,
327.42291259765625
],
"score": 0.999980628490448
},
{
"category_id": 1,
"poly": [
853.7386474609375,
843.7147216796875,
1524.1510009765625,
843.7147216796875,
1524.1510009765625,
1494.796630859375,
853.7386474609375,
1494.796630859375
],
"score": 0.9999773502349854
},
{
"category_id": 1,
"poly": [
129.98367309570312,
1775.478271484375,
798.7672119140625,
1775.478271484375,
798.7672119140625,
2061.471923828125,
129.98367309570312,
2061.471923828125
],
"score": 0.9999737739562988
},
{
"category_id": 1,
"poly": [
854.2423706054688,
1623.10107421875,
1523.447998046875,
1623.10107421875,
1523.447998046875,
1828.6688232421875,
854.2423706054688,
1828.6688232421875
],
"score": 0.9999676942825317
},
{
"category_id": 2,
"poly": [
1117.2716064453125,
131.07754516601562,
1525.3043212890625,
131.07754516601562,
1525.3043212890625,
153.0941925048828,
1117.2716064453125,
153.0941925048828
],
"score": 0.999963641166687
},
{
"category_id": 1,
"poly": [
129.4814910888672,
200.13360595703125,
798.3907470703125,
200.13360595703125,
798.3907470703125,
748.1752319335938,
129.4814910888672,
748.1752319335938
],
"score": 0.9999613761901855
},
{
"category_id": 1,
"poly": [
854.3500366210938,
1960.907470703125,
1521.8297119140625,
1960.907470703125,
1521.8297119140625,
2059.875732421875,
854.3500366210938,
2059.875732421875
],
"score": 0.9999498128890991
},
{
"category_id": 0,
"poly": [
855.2539672851562,
785.2300415039062,
1112.8525390625,
785.2300415039062,
1112.8525390625,
809.8038940429688,
855.2539672851562,
809.8038940429688
],
"score": 0.9999380111694336
},
{
"category_id": 1,
"poly": [
129.95936584472656,
1060.7459716796875,
798.635986328125,
1060.7459716796875,
798.635986328125,
1576.5565185546875,
129.95936584472656,
1576.5565185546875
],
"score": 0.9999300837516785
},
{
"category_id": 0,
"poly": [
854.482666015625,
396.4778137207031,
1104.807373046875,
396.4778137207031,
1104.807373046875,
421.73834228515625,
854.482666015625,
421.73834228515625
],
"score": 0.9999269247055054
},
{
"category_id": 0,
"poly": [
854.3327026367188,
1897.323486328125,
1061.5657958984375,
1897.323486328125,
1061.5657958984375,
1925.312255859375,
854.3327026367188,
1925.312255859375
],
"score": 0.9999227523803711
},
{
"category_id": 1,
"poly": [
130.46299743652344,
752.8681030273438,
798.3680419921875,
752.8681030273438,
798.3680419921875,
1056.0352783203125,
130.46299743652344,
1056.0352783203125
],
"score": 0.9999172687530518
},
{
"category_id": 0,
"poly": [
854.9552001953125,
1562.7789306640625,
1080.6656494140625,
1562.7789306640625,
1080.6656494140625,
1591.3477783203125,
854.9552001953125,
1591.3477783203125
],
"score": 0.9999111890792847
},
{
"category_id": 2,
"poly": [
130.90972900390625,
130.28379821777344,
167.29371643066406,
130.28379821777344,
167.29371643066406,
150.61325073242188,
130.90972900390625,
150.61325073242188
],
"score": 0.9997210502624512
},
{
"category_id": 0,
"poly": [
130.5743408203125,
1713.660888671875,
219.01223754882812,
1713.660888671875,
219.01223754882812,
1739.568115234375,
130.5743408203125,
1739.568115234375
],
"score": 0.9326722025871277
},
{
"category_id": 13,
"poly": [
852,
224,
973,
224,
973,
252,
852,
252
],
"score": 0.89,
"latex": "300\\,\\upmu\\mathrm{mol}/\\mathrm{kg}"
},
{
"category_id": 13,
"poly": [
1009,
1286,
1069,
1286,
1069,
1313,
1009,
1313
],
"score": 0.89,
"latex": "100\\,\\upmu\\mathrm{l}"
},
{
"category_id": 13,
"poly": [
881,
1649,
925,
1649,
925,
1675,
881,
1675
],
"score": 0.89,
"latex": "\\mathrm{CO}_{2}"
},
{
"category_id": 13,
"poly": [
975,
1208,
1025,
1208,
1025,
1235,
975,
1235
],
"score": 0.87,
"latex": "50\\,\\upmu\\mathrm{l}"
},
{
"category_id": 13,
"poly": [
1179,
1700,
1235,
1700,
1235,
1725,
1179,
1725
],
"score": 0.87,
"latex": "0.9\\,\\%"
},
{
"category_id": 13,
"poly": [
1375,
1025,
1425,
1025,
1425,
1052,
1375,
1052
],
"score": 0.86,
"latex": "50\\,\\upmu\\mathrm{l}"
},
{
"category_id": 13,
"poly": [
127,
1800,
225,
1800,
225,
1827,
127,
1827
],
"score": 0.86,
"latex": "_{200-250\\mathrm{g}}"
},
{
"category_id": 13,
"poly": [
958,
586,
1007,
586,
1007,
610,
958,
610
],
"score": 0.85,
"latex": "10\\,\\%"
},
{
"category_id": 13,
"poly": [
1042,
1313,
1091,
1313,
1091,
1337,
1042,
1337
],
"score": 0.85,
"latex": "10\\,\\%"
},
{
"category_id": 13,
"poly": [
1325,
1155,
1391,
1155,
1391,
1182,
1325,
1182
],
"score": 0.83,
"latex": "\\mathrm{{MgCl}}_{2}"
},
{
"category_id": 13,
"poly": [
1003,
894,
1064,
894,
1064,
921,
1003,
921
],
"score": 0.82,
"latex": "\\mathrm{CaCl}_{2}"
},
{
"category_id": 13,
"poly": [
984,
225,
1044,
225,
1044,
249,
984,
249
],
"score": 0.81,
"latex": "\\mathbf{\\tilde{n}}=8\\mathbf{\\tilde{\\ n}}"
},
{
"category_id": 13,
"poly": [
1285,
1779,
1354,
1779,
1354,
1803,
1285,
1803
],
"score": 0.81,
"latex": "{>}5\\,\\mathrm{mm}"
},
{
"category_id": 13,
"poly": [
1441,
1287,
1518,
1287,
1518,
1313,
1441,
1313
],
"score": 0.81,
"latex": "1\\,\\mathrm{mg/ml}"
},
{
"category_id": 13,
"poly": [
1451,
843,
1482,
843,
1482,
869,
1451,
869
],
"score": 0.8,
"latex": "1\\,\\mathrm{g}"
},
{
"category_id": 13,
"poly": [
1037,
480,
1098,
480,
1098,
508,
1037,
508
],
"score": 0.8,
"latex": "10\\upmu\\mathrm{Ci}"
},
{
"category_id": 13,
"poly": [
1148,
1390,
1199,
1390,
1199,
1417,
1148,
1417
],
"score": 0.76,
"latex": "50\\,\\upmu\\mathrm{l}"
},
{
"category_id": 13,
"poly": [
1048,
1182,
1134,
1182,
1134,
1207,
1048,
1207
],
"score": 0.76,
"latex": "0.25\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1464,
481,
1525,
481,
1525,
506,
1464,
506
],
"score": 0.74,
"latex": "0.5\\,\\mathrm{ml}"
},
{
"category_id": 13,
"poly": [
851,
1182,
926,
1182,
926,
1207,
851,
1207
],
"score": 0.72,
"latex": "2.5\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1152,
869,
1196,
869,
1196,
893,
1152,
893
],
"score": 0.69,
"latex": "4\\,\\mathrm{{m}l}"
},
{
"category_id": 13,
"poly": [
1122,
974,
1229,
974,
1229,
1000,
1122,
1000
],
"score": 0.68,
"latex": "13{,}000\\,\\mathrm{rpm}"
},
{
"category_id": 13,
"poly": [
852,
1338,
938,
1338,
938,
1364,
852,
1364
],
"score": 0.67,
"latex": "0.25\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1095,
1156,
1164,
1156,
1164,
1180,
1095,
1180
],
"score": 0.65,
"latex": "50\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1260,
1313,
1326,
1313,
1326,
1339,
1260,
1339
],
"score": 0.64,
"latex": "\\mathrm{{MgCl}}_{2}"
},
{
"category_id": 13,
"poly": [
1214,
1182,
1309,
1182,
1309,
1207,
1214,
1207
],
"score": 0.6,
"latex": "20\\;0.05\\,\\%"
},
{
"category_id": 13,
"poly": [
973,
1418,
1045,
1418,
1045,
1442,
973,
1442
],
"score": 0.6,
"latex": "405\\,\\mathrm{nm}"
},
{
"category_id": 13,
"poly": [
960,
1779,
1028,
1779,
1028,
1803,
960,
1803
],
"score": 0.6,
"latex": "{\\tt\\le}5\\,\\mathrm{mm}"
},
{
"category_id": 13,
"poly": [
1450,
869,
1519,
869,
1519,
893,
1450,
893
],
"score": 0.57,
"latex": "50\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1266,
894,
1353,
894,
1353,
919,
1266,
919
],
"score": 0.56,
"latex": "0.25\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1235,
1155,
1314,
1155,
1314,
1180,
1235,
1180
],
"score": 0.54,
"latex": "150\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1478,
1443,
1526,
1443,
1526,
1471,
1478,
1471
],
"score": 0.54,
"latex": "\\mathrm{mg/l}"
},
{
"category_id": 13,
"poly": [
1331,
922,
1440,
922,
1440,
947,
1331,
947
],
"score": 0.54,
"latex": "_{20,000\\,\\mathrm{rpm}}"
},
{
"category_id": 13,
"poly": [
690,
2035,
801,
2035,
801,
2063,
690,
2063
],
"score": 0.52,
"latex": "60\\,\\upmu\\mathrm{mol}/\\mathrm{mol}"
},
{
"category_id": 13,
"poly": [
664,
1453,
788,
1453,
788,
1486,
664,
1486
],
"score": 0.52,
"latex": "^{51}\\mathrm{CrEDTA})"
},
{
"category_id": 13,
"poly": [
1396,
1155,
1470,
1155,
1470,
1181,
1396,
1181
],
"score": 0.47,
"latex": "0.5\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
912,
894,
991,
894,
991,
920,
912,
920
],
"score": 0.46,
"latex": "150\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1070,
894,
1138,
894,
1138,
920,
1070,
920
],
"score": 0.41,
"latex": "10\\,\\mathrm{mM}"
},
{
"category_id": 13,
"poly": [
1029,
509,
1073,
509,
1073,
532,
1029,
532
],
"score": 0.39,
"latex": "1\\,\\mathrm{mol}"
},
{
"category_id": 13,
"poly": [
947,
1338,
1021,
1338,
1021,
1365,
947,
1365
],
"score": 0.34,
"latex": "\\mathrm{pH}\\ 9.6)"
},
{
"category_id": 13,
"poly": [
1102,
479,
1204,
479,
1204,
507,
1102,
507
],
"score": 0.33,
"latex": "^{51}\\mathrm{CrEDTA}"
},
{
"category_id": 13,
"poly": [
663,
1453,
704,
1453,
704,
1484,
663,
1484
],
"score": 0.31,
"latex": "^{51}\\mathrm{C}"
},
{
"category_id": 13,
"poly": [
1260,
1312,
1405,
1312,
1405,
1339,
1260,
1339
],
"score": 0.28,
"latex": "\\mathrm{MgCl_{2}\\ 0.5\\,m M}"
},
{
"category_id": 13,
"poly": [
1198,
1290,
1212,
1290,
1212,
1313,
1198,
1313
],
"score": 0.27,
"latex": "\\mathrm{\\bfp}"
}
],
"page_info": {
"page_no": 1,
"height": 2181,
"width": 1653
}
},
{
"layout_dets": [
{
"category_id": 3,
"poly": [
854.8114624023438,
1000.0735473632812,
1526.2498779296875,
1000.0735473632812,
1526.2498779296875,
1602.6619873046875,
854.8114624023438,
1602.6619873046875
],
"score": 0.9999911189079285
},
{
"category_id": 4,
"poly": [
849.4373779296875,
817.45849609375,
1530.512451171875,
817.45849609375,
1530.512451171875,
956.3124389648438,
849.4373779296875,
956.3124389648438
],
"score": 0.9999908208847046
},
{
"category_id": 4,
"poly": [
127.44953918457031,
1929.2335205078125,
801.6481323242188,
1929.2335205078125,
801.6481323242188,
2066.464599609375,
127.44953918457031,
2066.464599609375
],
"score": 0.9999856948852539
},
{
"category_id": 1,
"poly": [
128.18153381347656,
321.1024169921875,
802.0577392578125,
321.1024169921875,
802.0577392578125,
506.3064270019531,
128.18153381347656,
506.3064270019531
],
"score": 0.9999765753746033
},
{
"category_id": 4,
"poly": [
850.1302490234375,
1621.6951904296875,
1530.3858642578125,
1621.6951904296875,
1530.3858642578125,
1736.14794921875,
850.1302490234375,
1736.14794921875
],
"score": 0.9999746680259705
},
{
"category_id": 1,
"poly": [
127.43219757080078,
625.2225952148438,
801.4795532226562,
625.2225952148438,
801.4795532226562,
903.7083129882812,
127.43219757080078,
903.7083129882812
],
"score": 0.9999739527702332
},
{
"category_id": 1,
"poly": [
127.66834259033203,
1022.7542724609375,
802.6483764648438,
1022.7542724609375,
802.6483764648438,
1180.9302978515625,
127.66834259033203,
1180.9302978515625
],
"score": 0.999945342540741
},
{
"category_id": 0,
"poly": [
852.8805541992188,
1816.059326171875,
981.3207397460938,
1816.059326171875,
981.3207397460938,
1848.59228515625,
852.8805541992188,
1848.59228515625
],
"score": 0.9999176263809204
},
{
"category_id": 3,
"poly": [
861.9379272460938,
198.4549560546875,
1519.971923828125,
198.4549560546875,
1519.971923828125,
801.3720703125,
861.9379272460938,
801.3720703125
],
"score": 0.9999100565910339
},
{
"category_id": 3,
"poly": [
129.3984375,
1296.743408203125,
801.4290771484375,
1296.743408203125,
801.4290771484375,
1911.7489013671875,
129.3984375,
1911.7489013671875
],
"score": 0.9999092817306519
},
{
"category_id": 1,
"poly": [
850.8916015625,
1877.3216552734375,
1527.890625,
1877.3216552734375,
1527.890625,
2067.06494140625,
850.8916015625,
2067.06494140625
],
"score": 0.9998919367790222
},
{
"category_id": 2,
"poly": [
124.14147186279297,
130.1715850830078,
1190.90234375,
130.1715850830078,
1190.90234375,
157.70138549804688,
124.14147186279297,
157.70138549804688
],
"score": 0.9996301531791687
},
{
"category_id": 2,
"poly": [
1487.102294921875,
128.76255798339844,
1524.0087890625,
128.76255798339844,
1524.0087890625,
152.51942443847656,
1487.102294921875,
152.51942443847656
],
"score": 0.9978102445602417
},
{
"category_id": 1,
"poly": [
128.83859252929688,
260.12615966796875,
385.3800964355469,
260.12615966796875,
385.3800964355469,
292.27276611328125,
128.83859252929688,
292.27276611328125
],
"score": 0.9885663986206055
},
{
"category_id": 0,
"poly": [
128.97781372070312,
963.9873657226562,
418.56024169921875,
963.9873657226562,
418.56024169921875,
992.4894409179688,
128.97781372070312,
992.4894409179688
],
"score": 0.9732756614685059
},
{
"category_id": 0,
"poly": [
128.39158630371094,
567.232421875,
392.3690185546875,
567.232421875,
392.3690185546875,
596.4879150390625,
128.39158630371094,
596.4879150390625
],
"score": 0.830464243888855
},
{
"category_id": 0,
"poly": [
129.43605041503906,
198.31634521484375,
217.4158477783203,
198.31634521484375,
217.4158477783203,
229.57237243652344,
129.43605041503906,
229.57237243652344
],
"score": 0.6879984140396118
},
{
"category_id": 4,
"poly": [
129.5968475341797,
198.34027099609375,
217.13697814941406,
198.34027099609375,
217.13697814941406,
229.41558837890625,
129.5968475341797,
229.41558837890625
],
"score": 0.3450572192668915
},
{
"category_id": 1,
"poly": [
129.5113983154297,
198.30056762695312,
217.10781860351562,
198.30056762695312,
217.10781860351562,
229.45274353027344,
129.5113983154297,
229.45274353027344
],
"score": 0.32501277327537537
},
{
"category_id": 1,
"poly": [
128.48004150390625,
567.8143310546875,
392.4095153808594,
567.8143310546875,
392.4095153808594,
596.3490600585938,
128.48004150390625,
596.3490600585938
],
"score": 0.23228876292705536
},
{
"category_id": 13,
"poly": [
161,
748,
304,
748,
304,
780,
161,
780
],
"score": 0.91,
"latex": "300\\,\\upmu\\mathrm{mol}/\\mathrm{kg}"
},
{
"category_id": 13,
"poly": [
415,
1054,
585,
1054,
585,
1085,
415,
1085
],
"score": 0.89,
"latex": "0{-}100\\,\\upmu\\mathrm{mol}/\\mathrm{kg}"
},
{
"category_id": 13,
"poly": [
127,
656,
258,
656,
258,
688,
127,
688
],
"score": 0.89,
"latex": "30\\,\\upmu\\mathrm{mol}/\\mathrm{kg}"
},
{
"category_id": 13,
"poly": [
1389,
1702,
1509,
1702,
1509,
1730,
1389,
1730
],
"score": 0.88,
"latex": "300\\,\\upmu\\mathrm{mol}/\\mathrm{kg}"
},
{
"category_id": 13,
"poly": [
403,
687,
545,
687,
545,
719,
403,
719
],
"score": 0.88,
"latex": "100\\,\\upmu\\mathrm{mol}/\\mathrm{kg}"
},
{
"category_id": 13,
"poly": [
407,
1086,
554,
1086,
554,
1116,
407,
1116
],
"score": 0.87,
"latex": "300\\,\\upmu\\mathrm{mol}/\\mathrm{kg})"
},
{
"category_id": 13,
"poly": [
665,
412,
770,
412,
770,
444,
665,
444
],
"score": 0.83,
"latex": "\\mathrm{(p>}0.05)"
},
{
"category_id": 13,
"poly": [
583,
718,
686,
718,
686,
750,
583,
750
],
"score": 0.82,
"latex": "\\left(\\mathrm{p}<\\!\\!0.05\\right)"
},
{
"category_id": 13,
"poly": [
476,
810,
578,
810,
578,
842,
476,
842
],
"score": 0.82,
"latex": "({\\tt p}<\\!0.01)"
},
{
"category_id": 13,
"poly": [
205,
1146,
319,
1146,
319,
1178,
205,
1178
],
"score": 0.8,
"latex": "(\\mathfrak{p}\\!<\\!0.001)"
},
{
"category_id": 13,
"poly": [
131,
412,
244,
412,
244,
444,
131,
444
],
"score": 0.76,
"latex": "\\scriptstyle(\\mathtt{p}<0.001)"
},
{
"category_id": 13,
"poly": [
755,
1085,
803,
1085,
803,
1114,
755,
1114
],
"score": 0.38,
"latex": "10-"
}
],
"page_info": {
"page_no": 2,
"height": 2181,
"width": 1653
}
},
{
"layout_dets": [
{
"category_id": 1,
"poly": [
849.5838623046875,
878.706787109375,
1529.954833984375,
878.706787109375,
1529.954833984375,
2062.25048828125,
849.5838623046875,
2062.25048828125
],
"score": 0.9999964237213135
},
{
"category_id": 1,
"poly": [
851.5059204101562,
197.32901000976562,
1526.1309814453125,
197.32901000976562,
1526.1309814453125,
412.517578125,
851.5059204101562,
412.517578125
],
"score": 0.9999958276748657
},
{
"category_id": 1,
"poly": [
127.03380584716797,
320.2677307128906,
802.3373413085938,
320.2677307128906,
802.3373413085938,
778.7638549804688,
127.03380584716797,
778.7638549804688
],
"score": 0.9999817609786987
},
{
"category_id": 0,
"poly": [
854.364990234375,
817.7249755859375,
983.921875,
817.7249755859375,
983.921875,
848.59765625,
854.364990234375,
848.59765625
],
"score": 0.999971866607666
},
{
"category_id": 1,
"poly": [
126.02391815185547,
1573.8297119140625,
802.7295532226562,
1573.8297119140625,
802.7295532226562,
2063.943603515625,
126.02391815185547,
2063.943603515625
],
"score": 0.9999658465385437
},
{
"category_id": 1,
"poly": [
126.4128189086914,
779.7554321289062,
801.98388671875,
779.7554321289062,
801.98388671875,
1236.2952880859375,
126.4128189086914,
1236.2952880859375
],
"score": 0.9999641180038452
},
{
"category_id": 1,
"poly": [
128.8470001220703,
198.27818298339844,
801.576416015625,
198.27818298339844,
801.576416015625,
320.2906799316406,
128.8470001220703,
320.2906799316406
],
"score": 0.9999555349349976
},
{
"category_id": 1,
"poly": [
852.162841796875,
411.9776916503906,
1527.426513671875,
411.9776916503906,
1527.426513671875,
627.5949096679688,
852.162841796875,
627.5949096679688
],
"score": 0.9999544024467468
},
{
"category_id": 1,
"poly": [
126.1973876953125,
1237.482177734375,
803.0906372070312,
1237.482177734375,
803.0906372070312,
1573.64111328125,
126.1973876953125,
1573.64111328125
],
"score": 0.9998923540115356
},
{
"category_id": 1,
"poly": [
852.0659790039062,
685.3585815429688,
1526.1573486328125,
685.3585815429688,
1526.1573486328125,
751.5690307617188,
852.0659790039062,
751.5690307617188
],
"score": 0.9998598098754883
},
{
"category_id": 2,
"poly": [
1116.092041015625,
129.10800170898438,
1527.0518798828125,
129.10800170898438,
1527.0518798828125,
154.35687255859375,
1116.092041015625,
154.35687255859375
],
"score": 0.9996548891067505
},
{
"category_id": 2,
"poly": [
130.6074981689453,
129.7509765625,
166.90464782714844,
129.7509765625,
166.90464782714844,
150.73068237304688,
130.6074981689453,
150.73068237304688
],
"score": 0.9994045495986938
},
{
"category_id": 13,
"poly": [
551,
840,
689,
840,
689,
872,
551,
872
],
"score": 0.89,
"latex": "30\\,\\upmu\\mathrm{mol}/\\mathrm{kg})"
},
{
"category_id": 13,
"poly": [
333,
1023,
479,
1023,
479,
1055,
333,
1055
],
"score": 0.87,
"latex": "300\\,\\upmu\\mathrm{mol}/\\mathrm{kg}"
}
],
"page_info": {
"page_no": 3,
"height": 2181,
"width": 1653
}
},
{
"layout_dets": [
{
"category_id": 1,
"poly": [
850.2716064453125,
197.65927124023438,
1530.317626953125,
197.65927124023438,
1530.317626953125,
641.234619140625,
850.2716064453125,
641.234619140625
],
"score": 0.999996542930603
},
{
"category_id": 1,
"poly": [
126.53795623779297,
197.20611572265625,
801.671875,
197.20611572265625,
801.671875,
672.7994384765625,
126.53795623779297,
672.7994384765625
],
"score": 0.9999955892562866
},
{
"category_id": 2,
"poly": [
125.36932373046875,
128.70034790039062,
1188.8201904296875,
128.70034790039062,
1188.8201904296875,
155.23692321777344,
125.36932373046875,
155.23692321777344
],
"score": 0.9999717473983765
},
{
"category_id": 2,
"poly": [
1489.344482421875,
129.5157012939453,
1526.055419921875,
129.5157012939453,
1526.055419921875,
151.8965606689453,
1489.344482421875,
151.8965606689453
],
"score": 0.9999598264694214
},
{
"category_id": 2,
"poly": [
587.8172607421875,
1163.7890625,
1065.035888671875,
1163.7890625,
1065.035888671875,
1266.2200927734375,
587.8172607421875,
1266.2200927734375
],
"score": 0.9998905658721924
}
],
"page_info": {
"page_no": 4,
"height": 2181,
"width": 1653
}
}
]
\ No newline at end of file
[
{
"layout_dets": [
{
"category_id": 4,
"poly": [
863.2782592773438,
1035.4449462890625,
1566.4375,
1035.4449462890625,
1566.4375,
1110.1534423828125,
863.2782592773438,
1110.1534423828125
],
"score": 0.9999994039535522
},
{
"category_id": 0,
"poly": [
374.12786865234375,
1095.8162841796875,
595.0630493164062,
1095.8162841796875,
595.0630493164062,
1123.12060546875,
374.12786865234375,
1123.12060546875
],
"score": 0.9999938011169434
},
{
"category_id": 1,
"poly": [
865.3327026367188,
1511.36181640625,
1567.931640625,
1511.36181640625,
1567.931640625,
1908.5230712890625,
865.3327026367188,
1908.5230712890625
],
"score": 0.999992847442627
},
{
"category_id": 3,
"poly": [
899.0333862304688,
516.339111328125,
1500.767578125,
516.339111328125,
1500.767578125,
1002.146240234375,
899.0333862304688,
1002.146240234375
],
"score": 0.9999920725822449
},
{
"category_id": 0,
"poly": [
140.3105010986328,
160.29049682617188,
1558.3450927734375,
160.29049682617188,
1558.3450927734375,
301.54150390625,
140.3105010986328,
301.54150390625
],
"score": 0.9999915361404419
},
{
"category_id": 1,
"poly": [
132.46669006347656,
488.9729919433594,
836.7824096679688,
488.9729919433594,
836.7824096679688,
1014.5713500976562,
132.46669006347656,
1014.5713500976562
],
"score": 0.9999898672103882
},
{
"category_id": 1,
"poly": [
864.4011840820312,
1206.3807373046875,
1566.180419921875,
1206.3807373046875,
1566.180419921875,
1502.9554443359375,
864.4011840820312,
1502.9554443359375
],
"score": 0.9999885559082031
},
{
"category_id": 2,
"poly": [
46.35005569458008,
583.8515014648438,
98.920654296875,
583.8515014648438,
98.920654296875,
1574.2994384765625,
46.35005569458008,
1574.2994384765625
],
"score": 0.9999722242355347
},
{
"category_id": 1,
"poly": [
133.71018981933594,
1134.7393798828125,
837.6100463867188,
1134.7393798828125,
837.6100463867188,
1733.16015625,
133.71018981933594,
1733.16015625
],
"score": 0.9999712705612183
},
{
"category_id": 1,
"poly": [
863.2889404296875,
1915.9327392578125,
1565.4844970703125,
1915.9327392578125,
1565.4844970703125,
2079.54345703125,
863.2889404296875,
2079.54345703125
],
"score": 0.9999582767486572
},
{
"category_id": 1,
"poly": [
141.26788330078125,
329.41650390625,
1547.88134765625,
329.41650390625,
1547.88134765625,
364.2337951660156,
141.26788330078125,
364.2337951660156
],
"score": 0.9995179176330566
},
{
"category_id": 2,
"poly": [
132.21490478515625,
1753.2657470703125,
836.714599609375,
1753.2657470703125,
836.714599609375,
2079.021240234375,
132.21490478515625,
2079.021240234375
],
"score": 0.9935375452041626
},
{
"category_id": 2,
"poly": [
1548.62744140625,
67.1996841430664,
1566.5760498046875,
67.1996841430664,
1566.5760498046875,
91.99691009521484,
1548.62744140625,
91.99691009521484
],
"score": 0.8435658812522888
},
{
"category_id": 0,
"poly": [
161.2336883544922,
1031.1558837890625,
747.5531616210938,
1031.1558837890625,
747.5531616210938,
1057.9443359375,
161.2336883544922,
1057.9443359375
],
"score": 0.8226985335350037
},
{
"category_id": 1,
"poly": [
161.42782592773438,
1031.3416748046875,
747.2906494140625,
1031.3416748046875,
747.2906494140625,
1058.0198974609375,
161.42782592773438,
1058.0198974609375
],
"score": 0.5235136151313782
},
{
"category_id": 13,
"poly": [
135,
1400,
249,
1400,
249,
1432,
135,
1432
],
"score": 0.68,
"latex": "(\\approx1\\,\\mathrm{Tb/s})"
},
{
"category_id": 13,
"poly": [
280,
1333,
420,
1333,
420,
1364,
280,
1364
],
"score": 0.57,
"latex": "\\left(0.1{\\cdot}10\\,\\mathrm{THz}\\right)"
},
{
"category_id": 13,
"poly": [
347,
1880,
366,
1880,
366,
1900,
347,
1900
],
"score": 0.56,
"latex": "@"
},
{
"category_id": 13,
"poly": [
44,
815,
96,
815,
96,
851,
44,
851
],
"score": 0.37,
"latex": "\\cap"
},
{
"category_id": 13,
"poly": [
345,
1829,
365,
1829,
365,
1851,
345,
1851
],
"score": 0.27,
"latex": "@"
}
],
"page_info": {
"page_no": 0,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 1,
"poly": [
894.3416137695312,
1848.383544921875,
1566.903564453125,
1848.383544921875,
1566.903564453125,
2079.466064453125,
894.3416137695312,
2079.466064453125
],
"score": 0.9999867677688599
},
{
"category_id": 1,
"poly": [
866.365234375,
1705.896484375,
1564.6666259765625,
1705.896484375,
1564.6666259765625,
1835.31396484375,
866.365234375,
1835.31396484375
],
"score": 0.9999860525131226
},
{
"category_id": 3,
"poly": [
297.703369140625,
157.119873046875,
1303.635009765625,
157.119873046875,
1303.635009765625,
1399.029052734375,
297.703369140625,
1399.029052734375
],
"score": 0.9999844431877136
},
{
"category_id": 1,
"poly": [
136.4300537109375,
1705.1046142578125,
833.2474365234375,
1705.1046142578125,
833.2474365234375,
1902.489990234375,
136.4300537109375,
1902.489990234375
],
"score": 0.9999755620956421
},
{
"category_id": 1,
"poly": [
135.40646362304688,
1915.3026123046875,
833.69091796875,
1915.3026123046875,
833.69091796875,
2079.314453125,
135.40646362304688,
2079.314453125
],
"score": 0.999956488609314
},
{
"category_id": 4,
"poly": [
134.93357849121094,
1603.8221435546875,
969.350830078125,
1603.8221435546875,
969.350830078125,
1631.3472900390625,
134.93357849121094,
1631.3472900390625
],
"score": 0.997449517250061
},
{
"category_id": 2,
"poly": [
1551.055908203125,
69.2196273803711,
1568.1268310546875,
69.2196273803711,
1568.1268310546875,
91.64757537841797,
1551.055908203125,
91.64757537841797
],
"score": 0.9901553392410278
}
],
"page_info": {
"page_no": 1,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 1,
"poly": [
133.6353759765625,
1413.207275390625,
836.9700927734375,
1413.207275390625,
836.9700927734375,
1577.2298583984375,
133.6353759765625,
1577.2298583984375
],
"score": 0.9999912977218628
},
{
"category_id": 1,
"poly": [
133.57623291015625,
796.8501586914062,
836.7691650390625,
796.8501586914062,
836.7691650390625,
961.833984375,
133.57623291015625,
961.833984375
],
"score": 0.9999896883964539
},
{
"category_id": 1,
"poly": [
863.9443969726562,
1264.5728759765625,
1567.9918212890625,
1264.5728759765625,
1567.9918212890625,
1857.951904296875,
863.9443969726562,
1857.951904296875
],
"score": 0.9999896287918091
},
{
"category_id": 1,
"poly": [
864.5279541015625,
1031.6209716796875,
1566.75146484375,
1031.6209716796875,
1566.75146484375,
1262.1263427734375,
864.5279541015625,
1262.1263427734375
],
"score": 0.9999868273735046
},
{
"category_id": 1,
"poly": [
133.58103942871094,
963.8072509765625,
836.1664428710938,
963.8072509765625,
836.1664428710938,
1126.9761962890625,
133.58103942871094,
1126.9761962890625
],
"score": 0.9999858736991882
},
{
"category_id": 1,
"poly": [
915.4673461914062,
154.09107971191406,
1566.4822998046875,
154.09107971191406,
1566.4822998046875,
252.11843872070312,
915.4673461914062,
252.11843872070312
],
"score": 0.9999622106552124
},
{
"category_id": 1,
"poly": [
133.48443603515625,
297.8970642089844,
837.02978515625,
297.8970642089844,
837.02978515625,
563.48193359375,
133.48443603515625,
563.48193359375
],
"score": 0.9999534487724304
},
{
"category_id": 1,
"poly": [
134.178466796875,
1129.2037353515625,
835.380615234375,
1129.2037353515625,
835.380615234375,
1326.577392578125,
134.178466796875,
1326.577392578125
],
"score": 0.9999504089355469
},
{
"category_id": 1,
"poly": [
863.1788940429688,
1947.821044921875,
1567.12744140625,
1947.821044921875,
1567.12744140625,
2081.134765625,
863.1788940429688,
2081.134765625
],
"score": 0.9999489188194275
},
{
"category_id": 1,
"poly": [
133.19537353515625,
565.38818359375,
837.0946044921875,
565.38818359375,
837.0946044921875,
795.4552612304688,
133.19537353515625,
795.4552612304688
],
"score": 0.9999396800994873
},
{
"category_id": 1,
"poly": [
889.4902954101562,
632.3302001953125,
1567.4151611328125,
632.3302001953125,
1567.4151611328125,
760.7906494140625,
889.4902954101562,
760.7906494140625
],
"score": 0.9999313354492188
},
{
"category_id": 1,
"poly": [
864.2963256835938,
559.0233764648438,
1565.3338623046875,
559.0233764648438,
1565.3338623046875,
623.64453125,
864.2963256835938,
623.64453125
],
"score": 0.9999247789382935
},
{
"category_id": 0,
"poly": [
956.4207763671875,
800.2708740234375,
1473.8553466796875,
800.2708740234375,
1473.8553466796875,
832.0525512695312,
956.4207763671875,
832.0525512695312
],
"score": 0.9999191164970398
},
{
"category_id": 1,
"poly": [
890.3426513671875,
254.0496368408203,
1567.084228515625,
254.0496368408203,
1567.084228515625,
550.75048828125,
890.3426513671875,
550.75048828125
],
"score": 0.9999065399169922
},
{
"category_id": 0,
"poly": [
863.724365234375,
1905.3587646484375,
1263.0279541015625,
1905.3587646484375,
1263.0279541015625,
1937.046875,
863.724365234375,
1937.046875
],
"score": 0.9998754262924194
},
{
"category_id": 0,
"poly": [
865.344970703125,
990.8092651367188,
1077.787353515625,
990.8092651367188,
1077.787353515625,
1020.8342895507812,
865.344970703125,
1020.8342895507812
],
"score": 0.9998668432235718
},
{
"category_id": 0,
"poly": [
132.80747985839844,
1371.076171875,
476.3838195800781,
1371.076171875,
476.3838195800781,
1401.41552734375,
132.80747985839844,
1401.41552734375
],
"score": 0.9998248815536499
},
{
"category_id": 1,
"poly": [
158.07598876953125,
1580.614013671875,
837.52490234375,
1580.614013671875,
837.52490234375,
2084.036376953125,
158.07598876953125,
2084.036376953125
],
"score": 0.999720573425293
},
{
"category_id": 1,
"poly": [
864.1918334960938,
846.3515625,
1565.4425048828125,
846.3515625,
1565.4425048828125,
941.529541015625,
864.1918334960938,
941.529541015625
],
"score": 0.999374270439148
},
{
"category_id": 1,
"poly": [
187.75222778320312,
155.8234405517578,
763.4747314453125,
155.8234405517578,
763.4747314453125,
187.12890625,
187.75222778320312,
187.12890625
],
"score": 0.9937731623649597
},
{
"category_id": 2,
"poly": [
1551.6641845703125,
70.21305084228516,
1566.66748046875,
70.21305084228516,
1566.66748046875,
90.993408203125,
1551.6641845703125,
90.993408203125
],
"score": 0.9599642753601074
},
{
"category_id": 0,
"poly": [
187.70907592773438,
219.8146209716797,
785.0242309570312,
219.8146209716797,
785.0242309570312,
285.96136474609375,
187.70907592773438,
285.96136474609375
],
"score": 0.5746440887451172
},
{
"category_id": 8,
"poly": [
187.70443725585938,
219.90408325195312,
785.4927978515625,
219.90408325195312,
785.4927978515625,
286.03607177734375,
187.70443725585938,
286.03607177734375
],
"score": 0.5241892337799072
},
{
"category_id": 13,
"poly": [
1381,
1430,
1428,
1430,
1428,
1459,
1381,
1459
],
"score": 0.53,
"latex": "5\\,\\mathrm{m}"
},
{
"category_id": 13,
"poly": [
864,
1662,
910,
1662,
910,
1691,
864,
1691
],
"score": 0.5,
"latex": "2\\,\\mathrm{m}"
},
{
"category_id": 13,
"poly": [
1193,
1530,
1239,
1530,
1239,
1558,
1193,
1558
],
"score": 0.47,
"latex": "_{\\textrm{1m}}"
},
{
"category_id": 13,
"poly": [
864,
1729,
909,
1729,
909,
1758,
864,
1758
],
"score": 0.44,
"latex": "_{\\textrm{1m}}"
}
],
"page_info": {
"page_no": 2,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 4,
"poly": [
864.2838745117188,
1281.72265625,
1566.531982421875,
1281.72265625,
1566.531982421875,
1359.263427734375,
864.2838745117188,
1359.263427734375
],
"score": 0.9999990463256836
},
{
"category_id": 1,
"poly": [
133.01622009277344,
837.9033813476562,
835.7518310546875,
837.9033813476562,
835.7518310546875,
1203.0052490234375,
133.01622009277344,
1203.0052490234375
],
"score": 0.9999970197677612
},
{
"category_id": 1,
"poly": [
863.889892578125,
1781.8822021484375,
1566.0186767578125,
1781.8822021484375,
1566.0186767578125,
2081.87939453125,
863.889892578125,
2081.87939453125
],
"score": 0.9999960660934448
},
{
"category_id": 1,
"poly": [
864.3985595703125,
1409.9678955078125,
1565.9906005859375,
1409.9678955078125,
1565.9906005859375,
1710.3426513671875,
864.3985595703125,
1710.3426513671875
],
"score": 0.9999954700469971
},
{
"category_id": 1,
"poly": [
133.8789825439453,
1205.3355712890625,
835.18359375,
1205.3355712890625,
835.18359375,
1634.283935546875,
133.8789825439453,
1634.283935546875
],
"score": 0.9999930262565613
},
{
"category_id": 0,
"poly": [
135.4618377685547,
1839.9617919921875,
331.53729248046875,
1839.9617919921875,
331.53729248046875,
1871.8616943359375,
135.4618377685547,
1871.8616943359375
],
"score": 0.9999902248382568
},
{
"category_id": 4,
"poly": [
132.88088989257812,
679.1465454101562,
838.4302978515625,
679.1465454101562,
838.4302978515625,
785.7191772460938,
132.88088989257812,
785.7191772460938
],
"score": 0.9999885559082031
},
{
"category_id": 3,
"poly": [
193.83837890625,
182.79244995117188,
759.5771484375,
182.79244995117188,
759.5771484375,
653.6824340820312,
193.83837890625,
653.6824340820312
],
"score": 0.9999850988388062
},
{
"category_id": 1,
"poly": [
134.48211669921875,
1636.1163330078125,
834.7794189453125,
1636.1163330078125,
834.7794189453125,
1800.814208984375,
134.48211669921875,
1800.814208984375
],
"score": 0.9999649524688721
},
{
"category_id": 1,
"poly": [
134.12872314453125,
1881.7559814453125,
834.7655029296875,
1881.7559814453125,
834.7655029296875,
1982.255615234375,
134.12872314453125,
1982.255615234375
],
"score": 0.9999020099639893
},
{
"category_id": 0,
"poly": [
976.8430786132812,
1740.978515625,
1452.464111328125,
1740.978515625,
1452.464111328125,
1771.867919921875,
976.8430786132812,
1771.867919921875
],
"score": 0.9998255968093872
},
{
"category_id": 3,
"poly": [
898.9835815429688,
175.76722717285156,
1495.93212890625,
175.76722717285156,
1495.93212890625,
1266.6322021484375,
898.9835815429688,
1266.6322021484375
],
"score": 0.9998016357421875
},
{
"category_id": 1,
"poly": [
133.10968017578125,
1982.9735107421875,
834.5057983398438,
1982.9735107421875,
834.5057983398438,
2080.69677734375,
133.10968017578125,
2080.69677734375
],
"score": 0.9996336698532104
},
{
"category_id": 2,
"poly": [
1553.209716796875,
71.55194854736328,
1565.424560546875,
71.55194854736328,
1565.424560546875,
89.57391357421875,
1553.209716796875,
89.57391357421875
],
"score": 0.9797953367233276
},
{
"category_id": 13,
"poly": [
1306,
1282,
1353,
1282,
1353,
1306,
1306,
1306
],
"score": 0.87,
"latex": "2\\!\\times\\!1"
},
{
"category_id": 13,
"poly": [
307,
731,
367,
731,
367,
755,
307,
755
],
"score": 0.72,
"latex": "\\mathrm{110\\,m}"
},
{
"category_id": 13,
"poly": [
581,
731,
653,
731,
653,
756,
581,
756
],
"score": 0.63,
"latex": "28\\,\\mathrm{GHz}"
},
{
"category_id": 13,
"poly": [
296,
1436,
352,
1436,
352,
1468,
296,
1468
],
"score": 0.34,
"latex": "\\mathbf{gN}\\mathbf{B}"
},
{
"category_id": 13,
"poly": [
463,
1668,
520,
1668,
520,
1700,
463,
1700
],
"score": 0.26,
"latex": "\\mathbf{gN}\\mathbf{B}"
}
],
"page_info": {
"page_no": 3,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 1,
"poly": [
134.3433837890625,
942.0186157226562,
834.1826171875,
942.0186157226562,
834.1826171875,
1272.496337890625,
134.3433837890625,
1272.496337890625
],
"score": 0.9999979734420776
},
{
"category_id": 1,
"poly": [
134.33851623535156,
1813.447265625,
834.64990234375,
1813.447265625,
834.64990234375,
2080.197021484375,
134.33851623535156,
2080.197021484375
],
"score": 0.9999959468841553
},
{
"category_id": 1,
"poly": [
864.8323364257812,
987.3436889648438,
1566.8726806640625,
987.3436889648438,
1566.8726806640625,
1783.5870361328125,
864.8323364257812,
1783.5870361328125
],
"score": 0.9999951720237732
},
{
"category_id": 1,
"poly": [
134.39962768554688,
1277.7550048828125,
834.7018432617188,
1277.7550048828125,
834.7018432617188,
1809.951171875,
134.39962768554688,
1809.951171875
],
"score": 0.999994158744812
},
{
"category_id": 3,
"poly": [
366.1568908691406,
150.17318725585938,
1329.7593994140625,
150.17318725585938,
1329.7593994140625,
792.8095092773438,
366.1568908691406,
792.8095092773438
],
"score": 0.9999933242797852
},
{
"category_id": 0,
"poly": [
866.7418823242188,
1836.01318359375,
1017.6839599609375,
1836.01318359375,
1017.6839599609375,
1865.512451171875,
866.7418823242188,
1865.512451171875
],
"score": 0.9999814033508301
},
{
"category_id": 1,
"poly": [
864.4174194335938,
1882.203125,
1564.8563232421875,
1882.203125,
1564.8563232421875,
2079.260498046875,
864.4174194335938,
2079.260498046875
],
"score": 0.9999741315841675
},
{
"category_id": 4,
"poly": [
134.15469360351562,
815.9164428710938,
1562.02392578125,
815.9164428710938,
1562.02392578125,
867.7661743164062,
134.15469360351562,
867.7661743164062
],
"score": 0.9999508857727051
},
{
"category_id": 0,
"poly": [
865.704833984375,
942.009033203125,
1059.95556640625,
942.009033203125,
1059.95556640625,
970.8005981445312,
865.704833984375,
970.8005981445312
],
"score": 0.9999415874481201
},
{
"category_id": 2,
"poly": [
1552.128173828125,
71.25533294677734,
1566.6485595703125,
71.25533294677734,
1566.6485595703125,
89.53498840332031,
1552.128173828125,
89.53498840332031
],
"score": 0.9974276423454285
},
{
"category_id": 13,
"poly": [
1388,
1517,
1445,
1517,
1445,
1550,
1388,
1550
],
"score": 0.3,
"latex": "\\tt g N B"
}
],
"page_info": {
"page_no": 4,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 1,
"poly": [
863.9874877929688,
1815.5633544921875,
1566.741455078125,
1815.5633544921875,
1566.741455078125,
2079.217041015625,
863.9874877929688,
2079.217041015625
],
"score": 0.9999942779541016
},
{
"category_id": 1,
"poly": [
865.5879516601562,
1316.8531494140625,
1566.5723876953125,
1316.8531494140625,
1566.5723876953125,
1813.6834716796875,
865.5879516601562,
1813.6834716796875
],
"score": 0.9999927282333374
},
{
"category_id": 1,
"poly": [
134.6375732421875,
952.2572021484375,
834.2245483398438,
952.2572021484375,
834.2245483398438,
1315.1661376953125,
134.6375732421875,
1315.1661376953125
],
"score": 0.9999904036521912
},
{
"category_id": 1,
"poly": [
134.9095458984375,
1400.1383056640625,
834.28173828125,
1400.1383056640625,
834.28173828125,
1697.2962646484375,
134.9095458984375,
1697.2962646484375
],
"score": 0.9999892115592957
},
{
"category_id": 1,
"poly": [
865.7293090820312,
155.0264434814453,
1565.5443115234375,
155.0264434814453,
1565.5443115234375,
383.32318115234375,
865.7293090820312,
383.32318115234375
],
"score": 0.9999874830245972
},
{
"category_id": 1,
"poly": [
865.7639770507812,
477.6691589355469,
1566.436279296875,
477.6691589355469,
1566.436279296875,
1006.602294921875,
865.7639770507812,
1006.602294921875
],
"score": 0.9999862909317017
},
{
"category_id": 1,
"poly": [
133.97454833984375,
1782.52880859375,
835.3529052734375,
1782.52880859375,
835.3529052734375,
2079.283447265625,
133.97454833984375,
2079.283447265625
],
"score": 0.9999849200248718
},
{
"category_id": 1,
"poly": [
865.3876342773438,
1127.646728515625,
1564.5369873046875,
1127.646728515625,
1564.5369873046875,
1225.0914306640625,
865.3876342773438,
1225.0914306640625
],
"score": 0.9999810457229614
},
{
"category_id": 1,
"poly": [
133.3822021484375,
338.7093505859375,
834.5867919921875,
338.7093505859375,
834.5867919921875,
868.1721801757812,
133.3822021484375,
868.1721801757812
],
"score": 0.9999763369560242
},
{
"category_id": 1,
"poly": [
134.7703857421875,
156.58360290527344,
834.59228515625,
156.58360290527344,
834.59228515625,
250.93218994140625,
134.7703857421875,
250.93218994140625
],
"score": 0.9999515414237976
},
{
"category_id": 0,
"poly": [
134.81646728515625,
296.1169128417969,
301.2171936035156,
296.1169128417969,
301.2171936035156,
327.50225830078125,
134.81646728515625,
327.50225830078125
],
"score": 0.9999410510063171
},
{
"category_id": 0,
"poly": [
865.8960571289062,
433.5700988769531,
1040.275634765625,
433.5700988769531,
1040.275634765625,
463.8289489746094,
865.8960571289062,
463.8289489746094
],
"score": 0.9998129606246948
},
{
"category_id": 0,
"poly": [
865.2048950195312,
1275.2528076171875,
1319.10205078125,
1275.2528076171875,
1319.10205078125,
1305.5203857421875,
865.2048950195312,
1305.5203857421875
],
"score": 0.9997825622558594
},
{
"category_id": 0,
"poly": [
135.78082275390625,
1740.425048828125,
598.881591796875,
1740.425048828125,
598.881591796875,
1770.975830078125,
135.78082275390625,
1770.975830078125
],
"score": 0.9997348189353943
},
{
"category_id": 0,
"poly": [
135.5992889404297,
911.2128295898438,
440.9443054199219,
911.2128295898438,
440.9443054199219,
940.1547241210938,
135.5992889404297,
940.1547241210938
],
"score": 0.9996732473373413
},
{
"category_id": 0,
"poly": [
135.51629638671875,
1360.5496826171875,
630.7794189453125,
1360.5496826171875,
630.7794189453125,
1390.0040283203125,
135.51629638671875,
1390.0040283203125
],
"score": 0.9994310140609741
},
{
"category_id": 2,
"poly": [
1551.8868408203125,
71.72320556640625,
1565.9241943359375,
71.72320556640625,
1565.9241943359375,
91.15934753417969,
1551.8868408203125,
91.15934753417969
],
"score": 0.9937852621078491
},
{
"category_id": 0,
"poly": [
878.8119506835938,
1049.76806640625,
1547.8568115234375,
1049.76806640625,
1547.8568115234375,
1112.1201171875,
878.8119506835938,
1112.1201171875
],
"score": 0.9739665389060974
}
],
"page_info": {
"page_no": 5,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 1,
"poly": [
865.1703491210938,
1749.82861328125,
1565.8792724609375,
1749.82861328125,
1565.8792724609375,
2080.968017578125,
865.1703491210938,
2080.968017578125
],
"score": 0.9999978542327881
},
{
"category_id": 1,
"poly": [
863.9318237304688,
866.5559692382812,
1567.1939697265625,
866.5559692382812,
1567.1939697265625,
1663.54638671875,
863.9318237304688,
1663.54638671875
],
"score": 0.999996542930603
},
{
"category_id": 1,
"poly": [
134.32009887695312,
1563.3734130859375,
834.4869995117188,
1563.3734130859375,
834.4869995117188,
1926.479248046875,
134.32009887695312,
1926.479248046875
],
"score": 0.9999960660934448
},
{
"category_id": 3,
"poly": [
374.6717529296875,
151.7071990966797,
1326.4266357421875,
151.7071990966797,
1326.4266357421875,
711.5238037109375,
374.6717529296875,
711.5238037109375
],
"score": 0.9999955892562866
},
{
"category_id": 1,
"poly": [
133.2987060546875,
911.025634765625,
834.7653198242188,
911.025634765625,
834.7653198242188,
1473.9681396484375,
133.2987060546875,
1473.9681396484375
],
"score": 0.9999939203262329
},
{
"category_id": 1,
"poly": [
135.3870086669922,
2014.2613525390625,
834.2026977539062,
2014.2613525390625,
834.2026977539062,
2078.903076171875,
135.3870086669922,
2078.903076171875
],
"score": 0.9999884963035583
},
{
"category_id": 0,
"poly": [
136.2907257080078,
1521.8297119140625,
513.5540161132812,
1521.8297119140625,
513.5540161132812,
1552.1356201171875,
136.2907257080078,
1552.1356201171875
],
"score": 0.9999793767929077
},
{
"category_id": 0,
"poly": [
134.11581420898438,
1972.88916015625,
664.4715576171875,
1972.88916015625,
664.4715576171875,
2003.9886474609375,
134.11581420898438,
2003.9886474609375
],
"score": 0.9999605417251587
},
{
"category_id": 4,
"poly": [
133.239990234375,
738.7720336914062,
1567.0321044921875,
738.7720336914062,
1567.0321044921875,
789.1837768554688,
133.239990234375,
789.1837768554688
],
"score": 0.9999586939811707
},
{
"category_id": 0,
"poly": [
866.1632690429688,
1708.7288818359375,
1406.107421875,
1708.7288818359375,
1406.107421875,
1738.577880859375,
866.1632690429688,
1738.577880859375
],
"score": 0.9998884201049805
},
{
"category_id": 0,
"poly": [
134.7357940673828,
871.1494140625,
652.3981323242188,
871.1494140625,
652.3981323242188,
898.2235717773438,
134.7357940673828,
898.2235717773438
],
"score": 0.9853350520133972
},
{
"category_id": 2,
"poly": [
1553.031494140625,
70.53016662597656,
1566.2633056640625,
70.53016662597656,
1566.2633056640625,
89.09037780761719,
1553.031494140625,
89.09037780761719
],
"score": 0.9816321730613708
},
{
"category_id": 13,
"poly": [
595,
1727,
652,
1727,
652,
1760,
595,
1760
],
"score": 0.42,
"latex": "\\mathbf{gN}\\mathbf{B}"
}
],
"page_info": {
"page_no": 6,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 3,
"poly": [
869.9388427734375,
588.944091796875,
1061.8564453125,
588.944091796875,
1061.8564453125,
838.3399047851562,
869.9388427734375,
838.3399047851562
],
"score": 0.9999961853027344
},
{
"category_id": 1,
"poly": [
864.8761596679688,
156.0197296142578,
1571.390380859375,
156.0197296142578,
1571.390380859375,
484.4344787597656,
864.8761596679688,
484.4344787597656
],
"score": 0.9999923706054688
},
{
"category_id": 1,
"poly": [
135.25286865234375,
1097.9901123046875,
839.7271728515625,
1097.9901123046875,
839.7271728515625,
2085.473388671875,
135.25286865234375,
2085.473388671875
],
"score": 0.9999918937683105
},
{
"category_id": 1,
"poly": [
1088.017578125,
580.5264892578125,
1568.900634765625,
580.5264892578125,
1568.900634765625,
858.09619140625,
1088.017578125,
858.09619140625
],
"score": 0.9999913573265076
},
{
"category_id": 1,
"poly": [
1089.640869140625,
947.9395141601562,
1567.883056640625,
947.9395141601562,
1567.883056640625,
1198.422607421875,
1089.640869140625,
1198.422607421875
],
"score": 0.9999912977218628
},
{
"category_id": 1,
"poly": [
131.48910522460938,
153.96853637695312,
838.3993530273438,
153.96853637695312,
838.3993530273438,
453.02337646484375,
131.48910522460938,
453.02337646484375
],
"score": 0.9999911189079285
},
{
"category_id": 1,
"poly": [
131.714111328125,
541.0372924804688,
838.0123901367188,
541.0372924804688,
838.0123901367188,
1005.5633544921875,
131.714111328125,
1005.5633544921875
],
"score": 0.9999898672103882
},
{
"category_id": 3,
"poly": [
863.97900390625,
1719.6123046875,
1068.673095703125,
1719.6123046875,
1068.673095703125,
1953.03173828125,
863.97900390625,
1953.03173828125
],
"score": 0.9999892115592957
},
{
"category_id": 0,
"poly": [
378.59942626953125,
496.0562744140625,
593.4119873046875,
496.0562744140625,
593.4119873046875,
527.6988525390625,
378.59942626953125,
527.6988525390625
],
"score": 0.9999885559082031
},
{
"category_id": 0,
"poly": [
405.65191650390625,
1052.885498046875,
564.7144775390625,
1052.885498046875,
564.7144775390625,
1081.0938720703125,
405.65191650390625,
1081.0938720703125
],
"score": 0.9999884366989136
},
{
"category_id": 3,
"poly": [
863.9266967773438,
1333.662353515625,
1069.4085693359375,
1333.662353515625,
1069.4085693359375,
1553.8045654296875,
863.9266967773438,
1553.8045654296875
],
"score": 0.9999866485595703
},
{
"category_id": 1,
"poly": [
1089.958740234375,
1313.381591796875,
1569.0367431640625,
1313.381591796875,
1569.0367431640625,
1591.7625732421875,
1089.958740234375,
1591.7625732421875
],
"score": 0.9999823570251465
},
{
"category_id": 3,
"poly": [
863.3223876953125,
990.8128051757812,
1069.0321044921875,
990.8128051757812,
1069.0321044921875,
1166.51708984375,
863.3223876953125,
1166.51708984375
],
"score": 0.9999755620956421
},
{
"category_id": 1,
"poly": [
1087.6712646484375,
1704.9793701171875,
1568.7255859375,
1704.9793701171875,
1568.7255859375,
1983.8875732421875,
1087.6712646484375,
1983.8875732421875
],
"score": 0.9999744296073914
},
{
"category_id": 2,
"poly": [
1548.5992431640625,
68.92752075195312,
1568.561279296875,
68.92752075195312,
1568.561279296875,
91.09439086914062,
1548.5992431640625,
91.09439086914062
],
"score": 0.9984710216522217
},
{
"category_id": 2,
"poly": [
862.5369262695312,
1981.4864501953125,
1158.696533203125,
1981.4864501953125,
1158.696533203125,
2009.3497314453125,
862.5369262695312,
2009.3497314453125
],
"score": 0.9784647822380066
},
{
"category_id": 4,
"poly": [
865.48876953125,
1590.6600341796875,
975.1014404296875,
1590.6600341796875,
975.1014404296875,
1613.881103515625,
865.48876953125,
1613.881103515625
],
"score": 0.6813311576843262
},
{
"category_id": 1,
"poly": [
865.5831298828125,
1590.6781005859375,
974.7860107421875,
1590.6781005859375,
974.7860107421875,
1613.807861328125,
865.5831298828125,
1613.807861328125
],
"score": 0.45190906524658203
},
{
"category_id": 13,
"poly": [
1270,
1706,
1336,
1706,
1336,
1732,
1270,
1732
],
"score": 0.28,
"latex": "[\\mathbf{M}^{\\prime}00]"
}
],
"page_info": {
"page_no": 7,
"height": 2200,
"width": 1700
}
},
{
"layout_dets": [
{
"category_id": 3,
"poly": [
128.92079162597656,
1207.0789794921875,
339.8792419433594,
1207.0789794921875,
339.8792419433594,
1404.8714599609375,
128.92079162597656,
1404.8714599609375
],
"score": 0.9999963641166687
},
{
"category_id": 3,
"poly": [
150.6888427734375,
161.63258361816406,
321.2713928222656,
161.63258361816406,
321.2713928222656,
416.30303955078125,
150.6888427734375,
416.30303955078125
],
"score": 0.9999936819076538
},
{
"category_id": 1,
"poly": [
359.1070251464844,
1166.5550537109375,
838.76220703125,
1166.5550537109375,
838.76220703125,
1445.023681640625,
359.1070251464844,
1445.023681640625
],
"score": 0.9999911785125732
},
{
"category_id": 1,
"poly": [
360.03173828125,
159.8954315185547,
837.4935913085938,
159.8954315185547,
837.4935913085938,
434.51287841796875,
360.03173828125,
434.51287841796875
],
"score": 0.999970555305481
},
{
"category_id": 2,
"poly": [
1551.287841796875,
69.9497299194336,
1566.8572998046875,
69.9497299194336,
1566.8572998046875,
90.27826690673828,
1551.287841796875,
90.27826690673828
],
"score": 0.9870278835296631
},
{
"category_id": 13,
"poly": [
539,
1168,
598,
1168,
598,
1194,
539,
1194
],
"score": 0.81,
"latex": "[\\mathrm{F}^{\\prime}09]"
}
],
"page_info": {
"page_no": 8,
"height": 2200,
"width": 1700
}
}
]
\ No newline at end of file
# Characterization of severely deformed new composites fabricated by powder metallurgy including a stage of mechanical alloying
H. Ashuri, A. Hassani*<br>Faculty of Materials Science and Engineering, Semnan University, Semnan 35131-19111, Iran
## ARTICLE INFO
## Article history:
Received 11 February 2014
Received in revised form 11 June 2014
Accepted 4 August 2014
Available online 12 August 2014
## Keywords:
Nanocomposite
Mechanical alloying
Twist extrusion
Powder metallurgy
#### Abstract
Mechanical properties of new composites having a binary matrix of $\mathrm{Al}-4 \mathrm{Cu}$ reinforced with $\mathrm{TiO}_{2}$ nano particles were investigated. The composites which consisted of $2 \mathrm{wt} \%$ and $8 \mathrm{wt} \%$ of $\mathrm{TiO}_{2}$ reinforcement particles, were fabricated using mechanical alloying and a powder metallurgy route. Morphology, phases and compounds formed during ball milling and densification of samples were studied. With increasing percentages of the reinforcement particles, mechanical properties of the composites were enhanced. Microstructural evolution and mechanical properties changes of the composites after application of twist extrusion (TE), as a severe plastic deformation (SPD) process, were also investigated. It was revealed that the more TE passes the higher hardness and yield strength obtained. In addition, increasing TE passes, led to occurrence of a more homogeneous distribution of the reinforcement particles within the structure, and development of an ultrafine-grained nano-structure. The maximum allowable number of TE passes was found to be four, above which the materials failed.
## 1. Introduction
In recent decades, aluminum matrix composites (AMC) with discontinuous reinforcements have vastly been attracted by different industries due to their good mechanical properties. Large number of manufacture routes have been developed to produce these materials among which powder metallurgy (PM) routes have been more considered with several causes. First, in powder metallurgy a controlled phase microstructure can be achieved. On the other hand, lower temperatures used in PM processes make the interphase kinetics be precisely controlled. In PM routes, the powders of elements and alloys are used which might be more inexpensive, and of course, much more effective in reinforcement of the composites. Traditional stages of PM-AMCs fabrication include mixing and blending the powders; degassing the solidified product in vacuum; homogenizing through hot pressing or hot isostatic pressing (HIP) [1].
AMCs are widely used in automotive, aerospace and transport industries because of their light weight, high elastic modulus, improved strength and good wear resistance. Strength and wear resistance of these materials are strongly dependent on volume fraction, size and type of reinforcement particles. They are well established that compared to their un-reinforced matrix alloys show higher wear resistance. AMCs with ceramic particles including $\mathrm{SiC}, \mathrm{TiC}, \mathrm{C}_{4} \mathrm{~B}, \mathrm{TiB}_{2}$ and $\mathrm{Al}_{2} \mathrm{O}_{3}$ are relatively easy to process and, in comparison with fiber-reinforced composites, are nearly isotropic [2].
Particulate AMCs have introduced most wide spread applications and hold the greatest promise for future growth because of their tailored properties, low cost-effectiveness and high volume production methods [3]. Aluminum matrix composites are known to be hard materials exhibiting a low forming capacity through the conventional techniques. Nevertheless, many promising attempts have been made to produce Al composites with a high potential of being formed plastically and even superplastically while their strength is retained [4].
Mechanical alloying (MA) is an interesting powder metallurgy route for producing of powders with high homogeneity and uniformity. This technique is very effective in dispersion of reinforcement particles and enhances grain refinement, which induces an increase of strength and hardness [5].
In recent years, manifestation of severe plastic deformation (SPD) methods in material science has shed light on new prospects in achieving a unique combination of high strength and ductility [6] as well as attaining ultrafine-grained materials with improved properties. SPD is a family of metal forming techniques that use extensive hydrostatic pressure to impose a very high strain on bulk solids, producing exceptional grain refinement without introducing any significant change in the overall dimensions of the sample[7]. Several different SPD techniques are now available; these include high-pressure torsion (HPT) [8], equal channel angular pressing (ECAP) [9], multi-directional forging (MDF) [10], accumulative roll-bonding (ARB) [11], repetitive corrugation and strengthening (RCS) [12], spread extrusion (SE) [13], simple shear extrusion (SSE) [14] and twist extrusion (TE) [15,16]. The SPD products have much higher structural efficiency in comparison with their coarsegrained counterparts. However, high cost-effectiveness of most SPD methods is a central drawback to produce such materials in high quantities. Therefore, development of new SPD methods to tackle cost problem is important.
In 1999, Beygelzimer proposed a severe plastic deformation process that became known as Twist Extrusion (TE) [17]. This process can change the structure of materials, significantly improving some of their physical and mechanical properties and, even in certain cases, gaining new properties. TE works by extruding a prism specimen through a matrix whose profile consists of two prismlike regions separated by a twist passage. The extruded material undergoes an intense shift, with the properties that the final cross-section of the specimen is identical to the initial cross-section [18]. These properties allow for a repeated extrusion that accumulates the value of deformation. TE is carried out under high hydrostatic pressure in the center of deformation which is created by applying anti-pressure (back pressure) to the specimen when it exits the matrix. It is possible to produce more isotropic and homogeneous deformation by turning the samples $90^{\circ}$ in each consecutive deformation or alternatively, make the use of consecutive clockwise-anticlockwise-clockwise twists [19]. A comparison between TE and the two most widely used SPD methods, ECAE and HPT, reveals that firstly, TE provides some advantages over ECAE such as the ability to extrude the hollow parts and the rectangular cross-sections [6]. Secondly, HPT involves order of magnitude higher pressures than in any other SPD process which provides attainment of uniquely high strains and formation of ultrafine grained structures. From another point of view, twist extrusion that combines extrusion with torsion, was introduced to tackle the insufficiency of HPT, that is, its being limited to laboratory conditions due to small size of the samples [19]. There are currently three main application areas of TE: (a) obtaining ultrafine grained crystalline and nano-crystalline structures in bulk specimens, (b) increasing the plasticity of secondary non-ferrous metals and alloys, which allows one to significantly broaden the range of production, (c) obtaining bulk specimens by consolidating porous materials which allows one to create substantially different, new compositions with unique characteristics [20].
In TE, strain distribution along the cross-section of the specimen is inhomogeneous; getting away from the axe, plastic strain increases, thus, the grains being finer. The microstructural inhomogeneity leads to inhomogeneities in the mechanical properties of the composite; the central area of the cross section having lowest strength. It is expected that with increasing the number of TE cycles, the microstructure becomes uniform [21].
In the present study, a powder metallurgy route combined with mechanical alloying was employed to produce some particulate $\mathrm{Al}-\mathrm{Cu} / \mathrm{TiO}_{2}$ composites with low $\mathrm{TiO}_{2}$ contents. The products were then severely deformed by twist extrusion technique. The microstructures, densities, wear resistances, hardness and strengths of the resulted composites, in two different $\mathrm{TiO}_{2}$ reinforcement content of 2 and $8 \mathrm{wt} \%$ and at various TE cycles were examined.
## 2. Experimental procedures
To attain a uniform distribution of the $\mathrm{TiO}_{2}$ reinforcement particles in $\mathrm{Al}-\mathrm{Cu}$ matrix, a high-energy planetary ball-mill machine, manufactured by the authors was utilized and, the powder behavior was studied during the process. Milling time and the effects of volume fraction of the reinforcement and its particle size were also investigated. Aluminum powder with mean grain size of $<45 \mu \mathrm{m}$ and commercial purity of $99.9 \%$ and copper powder of $40 \mu \mathrm{m}$ with $99.0 \%$ purity were supplied. Nano-scale anatase $\mathrm{TiO}_{2}$ powder, as the reinforcement, having a mean size of $50 \mathrm{~nm}$ was also obtained. The powders specifications are shown in Table 1.
The appropriate proportions of $\mathrm{Al}$ and $\mathrm{Cu}$ powders were weighed using a digital balance of $0.001 \mathrm{mg}$ accuracy. Internal surfaces of the cups were wetted with a thin layer of glycerin to prevent sticking the powders mixtures on them. The powders were then mixed and blended in a high energy planetary ball mill to produce the matrix alloy powder. For ball-milling, chromium steel balls with diameters of 17 , $19,22,25$ and $30 \mathrm{~mm}$, ball-to-powder weight ratio of $20: 1$, constant rotational speed of $300 \mathrm{rpm}$ and argon atmosphere were used. Ball milling time was $16 \mathrm{~h}$. To reinforce the product, $2 \mathrm{wt} \%$ and $8 \mathrm{wt} \%$ of $\mathrm{TiO}_{2}$ particles were added and blended to a homogeneous mixture. The mixture was cold compacted into a two-piece die of DIN-1.2344 hot die steel having a hole of $15 \times 15 \times 80 \mathrm{~mm}$ dimensions for $15 \mathrm{~min}$ under $600 \mathrm{MPa}$. Then, to enhance apparent densities of powders, they were put into the die under $100 \mathrm{MPa}$ pressure being heated to temperatures of $550^{\circ} \mathrm{C}, 580^{\circ} \mathrm{C}$ and $640^{\circ} \mathrm{C}$. After reaching these temperatures, the pressure was turned up to $700 \mathrm{MPa}$ at which the samples were kept for 30,60 and $120 \mathrm{~min}$ to obtain three different densities. After sintering, the samples were furnace cooled and homogenized to room temperature at a rate of $21.8^{\circ} \mathrm{C} / \mathrm{h}$.
For twist extrusion testing, the samples were lubricated with $\mathrm{MoS}_{2}$ to reduce friction. Then, they were inserted into the entrance guide of the twist extrusion die being pushed to the distorted channel using a steel plunger with speed of $1.1 \mathrm{~mm} / \mathrm{s}$. The twist extrusion die of $14.6 \times 14.6 \mathrm{~mm}$ internal cross-section with a twist line slope of $\beta=60^{\circ}$ in the counter-clockwise direction was used (Fig. 1). In order to apply a backpressure on the sample, the output channel was built steeped. This channel, itself, acted as a direct extrusion die. Thus, after the specimen passes the twisted channel, enters a straight output passage of $29 \mathrm{~mm}$ length during which its cross-section changed from $14.6 \times 14.6 \mathrm{~mm}$ to $14.2 \times 14.2 \mathrm{~mm}$. In addition, for preventing deviation of the sample to the sides and making sure of upright entering of the sample into the twisted channel, an $80 \mathrm{~mm}$ channel with $15 \times 15 \mathrm{~mm}$ crosssection was developed at the entrance as the sample guide. Also, to inhibit stress concentration, the right angle corners of the die interior walls were blunted. In this research, two sets of $\mathrm{Al}-4 \mathrm{wt} \% \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $\mathrm{Al}-4 \mathrm{wt} \% \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ samples were extruded at velocity of $68.4 \mathrm{~mm} / \mathrm{min}$ (maximum velocity of the available press) for 1,2 and 4 passes. The maximum allowed number of TE passes was found to be four, above which material failure occurred during twist extrusion operation.
To investigate the microstructure changes in the materials due to twist extrusion, the samples were prepared by cutting from the cross-section perpendicular to the axial direction of the extruded billets. The microstructure evolution was then studied in the central, lateral and corner regions of the cross-section using scanning electron microscopy (SEM).
Densities of the compacted powders were determined through Archimedes procedure according to the standard ASTM B93-13 [22]. The microstructures of samples from both composites were studied using SEM model ISI ABT SR-50 equipped with EDX analyzer after their preparation including grinding, polishing and etching with Keller etchant solution. To investigate the formation of deleterious phases like $\mathrm{Al}_{7} \mathrm{Cu}_{2} \mathrm{Fe}$ and $\mathrm{Al}_{4} \mathrm{C}_{3}, \mathrm{X}$-ray diffraction examinations and scanning electron microscopy observations were carried out on the sintered composites. To evaluate grain size and lattice strain, spectroscope system equipped with copper ray lamp (wavelength $1.5405 \AA$ ) was utilized. Williamson-Hall equation was used to determine crystallite size and lattice strain in diffracting domain. For hardness measurements of the sintered samples, Vickers hardness testing machine with the applied force of $1000 \mathrm{~g}$ was utilized.
To evaluate wear resistance of the composite specimens, tribological studies were conducted according to ASTM G99-04 standard [23] using a WAZAU pin-ondisk wear testing machine connected to computer interface from Tribo V4.3L software. The samples were cut from the cross-section perpendicular to the extrusion direction. Hardness testing was performed on the points across cross-section diameter with $1 \mathrm{~mm}$ intervals from one corner to the other. The samples were cut by a Merck lathe from the upper part of the cylindrical samples to make disks of $50 \mathrm{~mm}$ diameter and $4 \mathrm{~mm}$ thickness. To polish the sample surfaces, they were ground against 100,200 , and 500 grit emery papers. As the wearing apparatus, pins of $5 \mathrm{~mm}$ length and $2 \mathrm{~mm}$ diameter from 2160 steel with 60 HRC were prepared. The applied force and sliding distance were selected to be $30 \mathrm{~N}$ and $1000 \mathrm{~m}$, respectively. Wear coefficient, K, was predicted using the Archard equation [24]:
$V=K W L / H$
where $V$ is the lost volume of the worn material, $H$ Brinell hardness, $W$ normal applied load equal to $30 \mathrm{~N}, L$ the sliding distance (m) and $K$ wear coefficient.
Table 1
Specifications of powders used in this study.
| Powder | Particle size | Purity (\%) |
| :--- | :--- | :--- |
| $\mathrm{Al}$ | $<45 \mu \mathrm{m}$ | 99.9 |
| $\mathrm{Cu}$ | $<40 \mu \mathrm{m}$ | 99 |
| $\mathrm{TiO}_{2}$ | $<50 \mathrm{~nm}$ | 99 |
Fig. 1. Twist channel of TE die with $\alpha=90^{\circ}$ and $\beta=60^{\circ}$.
To investigate mechanical properties of the composites and to plot true stresstrue strain relation, compression tests were carried out. The cylindrical compression samples were cut from the centre of the billets for 1,2 and 4 cycles with ratio of $H / D=1$, separately out of the samples containing 2 and $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ and, then were prepared and polished. The tests were conducted at ambient temperature. True stress-true strain relation of samples in each pass were inferred from compressive stress-strain curves.
## 3. Results and discussion
### 3.1. Powders specifications
Firstly, to determine the adequate milling duration, aluminum powder and $4 \mathrm{wt} \% \mathrm{Cu}$ powder were mixed and ball-milled for 5, 6 and $8 \mathrm{~h}$. The XRD patterns are compared in Fig. 2a. As seen, with increasing the milling time, the XRD peak intensities for aluminum and copper phases decreased and the XRD peak intensity of $\mathrm{Al}_{2} \mathrm{Cu}$ phase increased, therefore, it is inferred that after $8 \mathrm{~h}$ milling, the phase $\mathrm{Al}_{2} \mathrm{Cu}$ was formed and alloying process was completed.Now, $2 \mathrm{wt} \% \mathrm{TiO}_{2}$ reinforcement powder was added to the mixture and milled for 4 more hours (two-stage alloying). Next, aluminum, copper and $\mathrm{TiO}_{2}$ powders were mixed together and were milled in two portions for 12 and $16 \mathrm{~h}$ (one-stage alloying). Compressive results are shown in Fig. 2b. In two-stage conditions, as observed in Table 2, the subgrain size is smaller, but formation of $\mathrm{Al}_{7} \mathrm{Cu}_{2} \mathrm{Fe}$ brittle phase occurred that might be due to gradual intrusion of $\mathrm{Fe}$ into the mixture during ball-milling through surface erosion of the balls and cups. On the other hand, the weak signs of the formation of that phase were observed in one-stage milling for $16 \mathrm{~h}$. Therefore, to minimize the possible formation of the deleterious brittle phase of $\mathrm{Al}_{7} \mathrm{Cu}_{2} \mathrm{Fe}$ in the final product, all samples were produced through one-stage, $16 \mathrm{~h}$ ball-milling. In Fig. 2c, the results of X-ray diffraction experiments for milling of $\mathrm{Al}-4 \mathrm{wt} \% \mathrm{Cu}$ powder mixture containing $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ for $16 \mathrm{~h}$ are depicted. The results indicated the formation of $\mathrm{Al}_{2} \mathrm{Cu}$ phase which witnesses that the alloying was performed successfully. The deleterious brittle phase of $\mathrm{Al}_{7} \mathrm{Cu}_{2} \mathrm{Fe}$ was not observed in the final product.
Fig. 2. XRD results (a) for different milling times of Al-4Cu, (b) comparison of results for $12 \mathrm{~h} 2$-stage with $12 \mathrm{~h}$ and $16 \mathrm{~h} 1$-stage mechanical alloying and (c) for $16 \mathrm{~h}$ milling of powder mixture containing $8 \mathrm{wt} \% \mathrm{TiO}_{2}$.
Table 2
A comparison of subgrain size and grain strain in different milling times.
| Sample | Type of milling | Milling time $(\mathrm{h})$ | Subgrain size $(\mathrm{nm})$ | Grain strain |
| :--- | :--- | :--- | :--- | :--- |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ | Two stage | 12 | 11 | 0.0017 |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ | One stage | 12 | 33 | 0.0045 |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ | One stage | 16 | 31.51 | 0.00475 |
| $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ | One stage | 16 | 28.41 | 0.00481 |
Table 3
Relative densities of samples at 700 Mpa pressure for $30 \mathrm{~min}$ at different temperatures.
| Sample | Theoretical density $\left(\mathrm{g} / \mathrm{cm}^{3}\right)$ | Bulk density $\left(\mathrm{g} / \mathrm{cm}^{3}\right)$ | Temperature $\left({ }^{\circ} \mathrm{C}\right)$ | Relative density $(\%)$ | Porosity percentage $(\%)$ |
| :--- | :--- | :---: | :--- | :--- | :--- |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ | 2.797 | $2.256 \pm 0.0006$ | 550 | 90.31 | 9.69 |
| | | $2.7095 \pm 0.0004$ | 580 | 96.87 | 3.13 |
| $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ | 2.856 | $2.7452 \pm 0.003$ | 640 | 98.15 | 1.85 |
| | | $2.759 \pm 0.004$ | 640 | 98.64 | 1.36 |
### 3.2. Evaluation of composites
For hot compacting, the twist extruded samples having $2 \mathrm{wt} \% \mathrm{TiO}_{2}$, first, temperature of $550^{\circ} \mathrm{C}$ and, then $580{ }^{\circ} \mathrm{C}$ were applied. Densities of those samples were calculated using dipping-in-water procedure (Eqs. (2) and (3)) and, since their measured densities at above two temperatures were found to be very low ( $<97 \%)$, higher temperatures were applied. The applied pressure for all samples was fixed at $700 \mathrm{MPa}$ for $30 \mathrm{~min}$. Porosity volume fraction was also determined using Eq. (3); the results are presented in Table 3. For applying a uniform axial pressure, the ratio of height to diameter $(h / d)$ was about 1.5 .
$\rho=\frac{W_{\text {air }}\left\lfloor\rho_{\text {water }}-0.0012\right\rfloor}{0.99983\left\lfloor W_{\text {air }}-W_{\text {water }}\right\rfloor}+0.0012$
$\rho_{T}=\sum_{i=1}^{n} f_{i} \rho_{i}$
$\%$ Porosity $=\frac{\rho_{T}-\rho}{\rho_{T}} \times 100$
where $W_{\text {air }}$ is the measured weight of the sample in air, $W_{\text {water }}$ the weight in water, $\rho$ measured density, $\rho_{\text {water }}$ density in water, $\rho_{T}$ theoretical density.In hot compression test performed on composite, temperature of $640^{\circ} \mathrm{C}$ was applied for the other samples, but their holding time in the furnace increased to 60 and $120 \mathrm{~min}$. The final densities are tabulated in Table 4. In hot compression situations at $640{ }^{\circ} \mathrm{C}$ for $120 \mathrm{~min}$, the density of the sintered sample was nearly equal to the theoretical density of the composite. Therefore, the same conditions were repeated for the mixture powder containing $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ reinforcement. Because the density of $\mathrm{TiO}_{2}$ particles was higher than that of the matrix alloy $\left(4.5 \mathrm{~g} / \mathrm{cm}^{3}\right)$, it was anticipated that with increasing volume fraction of reinforcement particles, the relative density of the composite increased [5] which was consistent with the results depicted in Table 4.
Fig. 4. SEM micrograph of sample $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ showing large agglomerated $\mathrm{TiO}_{2}$ particles.
Table 4
Relative densities of samples at $700 \mathrm{MPa}$ pressure at $640^{\circ} \mathrm{C}$ and different times.
| Sample | Theoretical density $\left(\mathrm{g} / \mathrm{cm}^{3}\right)$ | Bulk density $\left(\mathrm{g} / \mathrm{cm}^{3}\right)$ | Time $(\mathrm{min})$ | Relative density $(\%)$ | Porosity percentage $(\%)$ |
| :--- | :--- | :--- | :--- | :--- | :--- |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ | 2.797 | $2.252 \pm 0.0008$ | 30 | 98.15 | 1.85 |
| | | $2.757 \pm 0.0005$ | 60 | 98.59 | 1.41 |
| $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ | 2.856 | $2.769 \pm 0.0003$ | 120 | 99.01 | 0.99 |
| | | $2.833 \pm 0.0004$ | 120 | 99.20 | 0.80 |
Fig. 3. SEM images of samples reinforced with (a) 2 and (b) $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ particles prior to $\mathrm{TE}$.
Fig. 5. (a and b) XRD patterns of composites reinforced with $2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ nano-particles and (c and d) EDX analysis results for those composites, respectively.
Table 5
Weight percentage of elements in $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ derived from EDX analysis.
| Element | wt\% of element in $\mathrm{Al}-4 \mathrm{Cu} /$ <br> $2 \mathrm{wt} \% \mathrm{TiO}_{2}$ | wt\% of element in $\mathrm{Al}-4 \mathrm{Cu} /$ <br> $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ |
| :---: | :---: | :---: |
| $\mathrm{Al}$ | 84.1 | 77.14 |
| $\mathrm{Cu}$ | 3.88 | 3.68 |
| $\mathrm{Ti}$ | 2.08 | 7.86 |
| 0 | 9.94 | 11.32 |
| Total | 100 | 100 |
Fig. 3 shows SEM micrographs of the samples containing $2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ prior to TE with coarse distinct grains. As seen, the reinforcement particles are distributed uniformly within the matrix. The particles became finer with smooth edges and corners during ball-milling. Uniform distribution of nanoparticles within matrix, because of their high surface to volume ratio, is difficult. In the composites with $8 \mathrm{wt} \% \mathrm{TiO}_{2}$, distribution of these nano-particles was inhomogeneous resulting in formation of their large agglomerates; these usually impair mechanical properties of materials. This is more evident in Fig. 4.
Fig. 6. (a) $\mathrm{SE}$ images of composite reinforced with $2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $\mathrm{EDX}$ analysis of point $\mathrm{A}$ and $\mathrm{B}$, (b) $\mathrm{SE}$ micrograph of composite reinforced with 8 wt $\% \mathrm{TiO}_{2}$ with $\mathrm{EDX}$ analysis of points $A$ and $B$.
Fig. 7. Compression true stress-true strain curves for annealed samples until beginning of barreling.
According to XRD results shown in Fig. 5a, presence of the brittle phase (i.e. $\mathrm{Al}_{7} \mathrm{Cu}_{2} \mathrm{Fe}$ in the sintered $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ sample, which causes brittle fracture of the material, is confirmed. However, X-ray diffraction pattern in Fig. $5 \mathrm{~b}$ indicates that in the $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ sample, the brittle phase is absent. In the next stages of experiments, it will be noticed that with application of severe plastic strains, fracture of latter samples occurred more frequently in comparison with the former ones. For performing quantitative analysis of the existing elements in the samples, EDX was utilized. The results are depicted in Fig. 5(c) and (d) as well as in Table 5. Those results were obtained from surfaces of the samples showing the total weight percentage of the elements in the sintered samples. The analysis showed no contamination.
Fig. 6(a) shows secondary electron (SE) micrograph of a sample with composition of $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ together with $\mathrm{EDX}$ analyses of points A and B (specified with circles), which are nearly identical. Therefore, it is concluded that the reinforcing particles are uniformly distributed within the matrix, as mentioned earlier. However, EDX analyses of points A and B of SE micrograph of the sample $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ shown in Fig. 6 (b) are quite dissimilar. It means that a non-uniform distribution of $\mathrm{TiO}_{2}$ nano-particles coupled with their large agglomerates, which usually contribute to deterioration of mechanical properties of the composite, occurred.
Compression true stress-true strain curves of the annealed samples, in the case of $\mathrm{Al}-4 \mathrm{Cu}, \mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $\mathrm{Al}-4 \mathrm{Cu} /$ $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ are illustrated and compared in Fig. 7 and the results are shown in Table 6. It is evident that with increasing $\mathrm{TiO}_{2}$ content in the composite, yield stress and Young modulus increase but, ductility decreases. Perhaps, deletion of porosities during hot compression at $640^{\circ} \mathrm{C}$ for $120 \mathrm{~min}$ was effective in enhancement of the sample strength. On the other hand, $\mathrm{TiO}_{2}$ particles are stable thermodynamically, and do not react with the matrix phase at high temperatures. These particles act as barriers against movement of dislocations leading to ductility decrease [25].
Fig. 8 shows variation of lost volume of composites during wear test vs. sliding distance. As previously shown, with increasing the percentage of $\mathrm{TiO}_{2}$ reinforcement, the lost volume decreases. To predict wear coefficient, Eq. (1) was used. The graph in Fig. 9 shows variation of wear coefficient with sliding distance for all samples. As indicated, the lowest wear coefficient belongs to the composite having $8 \mathrm{wt} \% \mathrm{TiO}_{2}$. It is also evident in Table 7 that with increasing weight percentage of $\mathrm{TiO}_{2}$ particles, the hardness of the material increases. Such a hardness increasing can be attributed to the increasing of dislocation densities improving material resistance. Hardness increasing leads to enhancement of wear resistance of the composite [25].
Fig. 8. Variation of lost volume of matrix alloy and composite vs. sliding distance in wear test.
Fig. 9. Variation of wear coefficient for matrix alloy and composites vs. sliding distance at pressure of $30 \mathrm{~N}$.
Table 7
Mean Brinell hardness for different samples.
| Specimen | Hardness (Brinell) |
| :--- | :--- |
| $\mathrm{Al}-4 \mathrm{Cu}$ | 107 |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ | 138 |
| $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ | 200 |
Fig. 10 are the backscattered electron BSE images of samples $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ from central and lateral regions. EDX examinations revealed that the two discrete dark and light regions corresponding to $\mathrm{TiO}_{2}$ clusters and the matrix, respectively. Fig. 10(a), (b), (e) and (f) shows the microstructure of two composites in central area after two passes and four passes of twist extrusion, respectively. It is revealed that grains became finer and the microstructure was more uniform with increasing the number of TE passes and this is same for Fig. 10(c), (d), (g) and (h) that illustrate the microstructure of lateral region of cross-section of both composites after two passes and four passes of TE, respectively. After four passes of TE in both composites, formation of nano-sized grains are evident. Unlike significant effect of billet axial rotations between ECAP passes [26,27], the billet rotations between TE passes have no effect on the plastic flow. This is due to the axial symmetry of the process.
Table 6
Results of uniaxial compression testing for annealed samples.
| Specimen | Young modulus (GPa) | Yield strength (GPa) | Barreling stress (MPa) | Barreling strain (\%) |
| :--- | :--- | :--- | :--- | :--- |
| $\mathrm{Al}-4 \mathrm{Cu}$ | 66 | 256 | 270 | 0.96 |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ | 70 | 246 | 0.93 | 284|
| $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ | 79.6 | 278 | 284 | 0.87 |
Fig. 10. BSC images of composites, up: $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and down: $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$, (a, b, e and f) central regions, after 2 and 4 passes, (c, d, $\mathrm{g}$ and $\mathrm{h}$ ) lateral regions, after 2 and 4 passes.
Fig. 11. Grain size measurement for a corner of cross-section of a sample containing $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ extruded for 2 passes.
Clustering of fine particles has been reported by Ritasalo et al. [28]. With increasing twist extrusion passes, $\mathrm{TiO}_{2}$ clusters became smaller having a more homogeneous distribution in the matrix. For the sample $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$, as observed, with increasing the number of TE passes, very fine equiaxed grains are formed. It is also evident that in the centre of the sample, the microstructure is less homogeneous and the formation of $\mathrm{TiO}_{2}$ clusters is observed. As indicated, in both types of samples, the microstructures in the corners and edges of the cross-section are more homogenous than those in the centers. This means that the corners experienced larger strains compared to the centre. In each pass of TE process, applying plastic deformation leads to an increase in dislocation density and, consequently formation of subgrains that impede dislocations glide gradually. With accumulation of plastic strains in subsequent passes, misorientation between neighboring grains increases and elongated grains turn to fine equiaxed ones resulting in a recrystallized microstructure [29]. This is the same phenomenon normally observed in SPD processes and is termed dynamic recrystallization [30].
Mishra et al. [29] suggested that when grains become smaller and also when the total area of grain boundaries increases, discrete dislocations emitted by a boundary are absorbed by the opposite grain boundary. Therefore, in higher passes of TE, dislocation density decreases gradually and high angle grain boundaries form. Shape of grains and rate of converting low angle boundaries to high angle ones in TE process depend on twist path and twist angle $(\alpha)$. It is worth noting that strain distribution and the boundary of deformation zones depend strongly on the geometry of die crosssection, i.e. deviation angle $(\beta)$ and twist angle $(\alpha)$, and by varying these factors, one can change strain intensity in different regions.
Grain sizes of the extruded samples were determined after each TE pass using a scaling-measuring utility installed to scanning electron microscope, as shown in Fig. 11 and the results are depicted in Table 8. It is inferred from Table 8 that with increasing TE passes, grain size in the centre and in the corners decreases. However, with increase of passes, the amount of strain that can be imposed on the sample, decreases. Such a decrease is further observed at edge regions. Therefore, uniformity of deformed structure increases and gradually, extent of grain refinement increases in central and lateral regions of the sample. This is due to structure stability brought about by saturation of the mechanical properties after the strain exceeds saturation limit. Such stability and saturation are not confined to TE, but are extended to all deformations based on pure shear like ECAP and so on. Mechanisms of this effect in PSD processes are such that with increasing passes (i.e. with increasing the strains), grain boundary surfaces also increase with a rate proportional to deformation state. During plastic deformation, cells or subgrains form and after a rather large strain, a considerable change does not occur in them. Therefore, with increasing strain, amount of high angle boundaries increases [29]. It is then concluded that with increasing the number of passes, the difference in grain sizes decreases in various regions of the sample. To determine extent of this difference in various passes, a variable index $(V)$ is defined as a ratio of standard deviation, SD to a parameter average value $\bar{x}$ as follows [31]:
$V=\frac{S D}{\bar{x}} \times 100$
Table 8
Mean grain size $(\mu \mathrm{m})$ of centers and corners of cross sections of two composites at various passes of TE.
| Sample | Position | Annealed | 1-pass | 2-pass | 4-pass |
| :--- | :--- | :--- | :--- | :--- | :--- |
| $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ | Corner | $20.9 \mu \mathrm{m}$ | $11 \mu \mathrm{m}$ | $9 \mu \mathrm{m}$ | $7.1 \mu \mathrm{m}$ |
| | Center | $21.7 \mu \mathrm{m}$ | $20 \mu \mathrm{m}$ | $12 \mu \mathrm{m}$ | $8.4 \mu \mathrm{m}$ |
| $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ | Corner | $18.2 \mu \mathrm{m}$ | $10 \mu \mathrm{m}$ | $7.8 \mu \mathrm{m}$ | $5.6 \mu \mathrm{m}$ |
| | Center | $18.8 \mu \mathrm{m}$ | $14 \mu \mathrm{m}$ | $9.8 \mu \mathrm{m}$ | $6.5 \mu \mathrm{m}$ |
Fig. 12. Grain size heterogeneity index for different passes for two composites.
Fig. 12 shows inhomogeneity of grain size for different passes and various $\mathrm{TiO}_{2}$ contents in composites. As indicated, the annealed sample is more homogeneous in grain size and, the sample extruded for one pass shows highest inhomogeneity. It is evident that with increasing the number of passes, the inhomogeneity of grain size decreases to a minimum of $V=\sim 16 \%$ for fourpass TE operation.
Fig. 13(a) shows the effect of $\mathrm{TiO}_{2}$ content on Vickers microhardness of the composites. A more uniform dispersion of $\mathrm{TiO}_{2}$ particle in the matrix impedes dislocation movements resulting in an increase of the hardness [32]. Fig. 13(b) and (c) shows the variation of hardness in the centre and corners of cross-section as well as mean hardness, in different passes of TE, for both types of composites. As seen, during first pass, the hardness increases dramatically, but in next passes, an obvious decrease in the curve slope is observed. This has been attributed to increase in dislocation density resulted from application of severe plastic deformation [29].Since corners get higher plastic strains than the centre, they possess higher hardness. However, with increasing the number of passes and gradual saturation of microstructure with strain due to saturation in dislocation density and, then development of a fine substructure, the heterogeneity in hardness distribution on the cross-section of the sample decreases. Therefore, despite occurrence of heterogeneity in deformation, hardness distribution is homogeneous at higher strains. Zendehdel et al. also reported homogeneity of hardness distribution at higher passes when they investigated influence of $\mathrm{TE}$ process on microstructure and mechanical properties of 6063 aluminum alloy [33]. Fig. 14(a) and (b) illustrates Vickers microhardness measured along diagonal line on cross-section of different samples. As indicated, hardness of samples increased noticeably after first pass compared to the annealed specimens. For the samples containing $2 \mathrm{wt} \% \mathrm{TiO}_{2}$, the hardness increased by $52 \%$ in average, but for the sample having $8 \mathrm{wt} \% \mathrm{TiO}_{2}$, the hardness increasing was $46 \%$. The hardness increase is lower for the central regions that undergoes lower strain levels and, higher for the lateral areas deformed by higher strains; this is due to higher redundant strains $\left(\varepsilon_{\mathrm{r}}\right)$ at the lateral regions [34]. The variation index, $V$, is also defined for description of heterogeneity level in hardness values. Using the variation index, $V$, calculated through Eq. (5) for different passes, Table 9 for the sample $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and Table 10 for the sample $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ are tabulated. Fig. 15 shows the heterogeneity index of hardness values $(V)$ for different passes of TE. It is inferred from Tables 9 and 10, and Fig. 15 that, in addition to increase in hardness within the central and lateral areas, and also increasing average hardness in whole sample, heterogeneity increased as well. Hardness heterogeneity index value in the sample of $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{TiO}_{2}$ extruded for 4 passes reached to 8.44 from 1.37 for the conditions before TE and, also in the sample of $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{TiO}_{2}$ extruded for 4 passes, $V$ value reached to 12.45 from 1.42 for the conditions before TE. It seems that heterogeneous distribution of hardness within whole sample was not significant due to application of backpressure during the process. Because backpressure is necessary for completion of cinematic conditions of applied plastic flow through tool geometry of $\mathrm{TE}$, and facilitates development of more homogeneous structure and mechanical properties [30].
Fig. 13. (a) Variation of Vickers hardness of composites with different $\mathrm{TiO}_{2}$ contents, prior to TE. (b) Variation of hardness at central and lateral regions of cross-section and mean hardness at different $\mathrm{TE}$ passes for composite $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$. (c) Variation of hardness at central and lateral regions of cross-section and mean hardness at different TE passes for composite $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$.
Fig. 14. Vickers hardness measured along diagonal line over cross section of (a) sample $\mathrm{Al}-4 \mathrm{Cu} 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ at various passes of $\mathrm{TE}$.
Fig. 15. Heterogeneity index of hardness values for various TE passes.
Table 9
Vickers microhardness values for samples $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ at different passes and their heterogeneity values.
| Sample | Hardness in center (HV) | Mean hardness (HV) | Hardness of edge (HV) | Heterogeneity V (\%) |
| :--- | :--- | :--- | :--- | :--- |
| Annealed | 145 | 146 | 147 | 1.3 |
| 1-pass | 217 | 224 | 231 | 6.25 |
| 2-pass | 225 | 234 | 243 | 7.69 |
| 4-pass | 227 | 237 | 247 | 8.44 |
Table 10
Vickers microhardness values for samples $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ at different passes and their heterogeneity values.
| Sample | Hardness in center (HV) | Mean hardness (HV) | Hardness of edge (HV) | Heterogeneity V (\%) |
| :--- | :--- | :--- | :--- | ---: |
| Annealed | 210 | 211.5 | 213 | 1.42 |
| 1-pass | 296 | 308.5 | 321 | 8.10 |
| 2-pass | 318 | 335.5 | 353 | 10.43 |
| 4-pass | 324 | 345.5 | 367 | 12.45 |
Fig. 16. Results of compression tests: (a) true stress-true strain curves for $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$, (b) true stress-true strain curves for $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO} \mathrm{O}_{2}$. (c) Variation of yield strength at different passes for both composites.
Table 11
Results of uniaxial compression testing on samples $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $\mathrm{Al}-4 \mathrm{Cu} / 8 \mathrm{wt} \% \mathrm{TiO}_{2}$ (Type 1 and Type 2, respectively) at different passes.
<table><thead><tr><th rowspan="2">Sample</th><th colspan="2">Young modulus (GPa)</th><th colspan="2"> Yield strength (MPa)</th><th colspan="2">Barreling strain MPa</th><th colspan="2"> Barreling strain (%)</th></tr><tr><th>Type 1</th><th>Type 2</th><th>Type 1</th><th>Type 2</th><th>Type 1</th><th>Type 2</th><th>Type 1</th><th>Type 2</th></tr></thead><tr><td>Annealed</td><td>70</td><td>79.6</td><td>246</td><td>278</td><td>284</td><td>298</td><td> 0.93</td><td> 0.87</td></tr><tr><td>1-pass</td><td>84</td><td> 94.9</td><td> 304</td><td> 325</td><td>241</td><td>347</td><td>0.80</td><td>0.74</td></tr><tr><td>2-pass</td><td>85</td><td>96.6</td><td>334</td><td>354</td><td>358</td><td>366</td><td>0.66</td><td>0.62</td></tr><tr><td>4-pass</td><td>87.6</td><td>97.2</td><td>368</td><td>372</td><td>383</td><td>391</td><td>0.65</td><td>0.61</td></tr></table>
Fig. 16(a) and (b) shows graphs obtained from compression tests including true stres-true strain curves derived from uniaxial compression test on samples $\mathrm{Al}-4 \mathrm{Cu} / 2 \mathrm{wt} \% \mathrm{TiO}_{2}$ and $\mathrm{Al}-4 \mathrm{Cu} /$ $8 \mathrm{wt} \% \mathrm{TiO}_{2}$ after different TE passes, respectively. The obtained results are compared in Table 11. Fig. 16(c) illustrates variation curves of yield strength for both composites in different TE passes. As indicated, with increasing number of passes, strength increased and ductility decreased. The strength increase extent in the first pass is obviously higher than that in the second and fourth passes which is due to gradual strain saturation in different regions, particularly those closer to the centre. By applying a few number of TE passes, strain exceeds saturation limit and saturation state gradually extends all cross-section area leading to uniformity in changing microstructure and other properties [35].
## 4. Conclusions
Mechanical alloying and powder metallurgy routes were applied to fabricate a new composite with binary matrix of Al$4 \mathrm{Cu}$ and reinforced by $2 \mathrm{wt} \% \mathrm{TiO}_{2}$ nano-particles. The annealed materials were subjected to some of the mechanical tests, and hardness, strength and yield strength were measured. With increasing percentages of the reinforcement particles, hardness, yield strength, Young modulus and wear resistance of the composites increased but ductility decreased. Afterwards, the composites were deformed severely through twist extrusion for 1,2 and 4 passes. The maximum allowable passes of extrusion was four, beyond which the materials did not endure plastic deformation and failed. It was revealed that with increasing the number of passes by 4 , a more homogeneous distribution of reinforcement particles occurred and also an ultrafine-grained nano-structure was obtained.
## References
[1] B. Ogel, R. Gurbuz, Microstructural characterization and tensile properties of hot pressed Al-SiC composites prepared from pure Al and Cu powders, Mater. Sci. Eng. A 301 (2001) 213-220.
[2] J. Onoro, M.D. Salvador, L.E.G. Cambronero, High-temperature mechanica properties of aluminium alloys reinforced with boron carbide particles, Mater. Sci. Eng. A 499 (2009) 421-426.
[3] R. Khorshidi, A. Hassani, Comparative analysis between TOPSIS and PSI methods of materials selection to achieve a desirable combination of strength and workability in Al/SiC composite, Mater. Des. 52 (2013) 999-1010.
[4] A. Hassani, M. Zabihi, High strain rate superplasticity in a nano-structured Al$\mathrm{Mg} / \mathrm{SiCP}$ composite severely deformed by equal channel angular extrusion, J. Mater. Des. 39 (2012) 140-150
[5] H. Kaftelena, M.L. Ovecoglua, H. Heneinb, H. Cimenoglua, $\mathrm{ZrC}$ particle reinforced $\mathrm{Al}-4 \mathrm{wt} \% \mathrm{Cu}$ alloy composites fabricated by mechanical alloying and vacuum hot pressing: microstructural evaluation and mechanical properties, Mater. Sci. Eng. A 527 (2010) 5930-5938,
[6] S.A.A. Akbari Mousavi, A.R. Shahab, M. Mastoori, Computational study of Ti$6 \mathrm{Al}-4 \mathrm{~V}$ flow behaviors during the twist extrusion process, Mater. Des. 29 (2008) 1316-1329.
[7] R.Z. Valiev, Y. Estrin, Z. Horita, T.G. Langdon, M.J. Zehetbauer, Y.T. Zhu, Producing bulk ultrafine-grained materials by severe plastic deformation, JOM 58 (4) (2006) 33-39.
[8] N.A. Smirnova, V.I. Levit, V.I. Pilyugin, R.I. Kuznetsov, L.S. Davydova, V.A. Sazonova, Evolution of structure of fcc single crystals during strong plastic deformation, Phys. Met. Metallogr. 61 (6) (1989) 127-134.
[9] M.V. Segal, V.I. Reznikov, A.E. Drobyshevskiy, V.I. Kopylov, Plastic metal working by simple shear, Izvestia Akademii nauk SSSR. Metally 1 (1981) 115123.
[10] G.A. Salishchev, O.R. Valiakhmetov, R.M. Galeyev, Formation of submicrocrystalline structure in the titanium alloy VT8 and its influence on mechanical properties, J. Mater. Sci. 28 (1993) 2898-2903.
[11] Y. Saito, H. Utsunomiya, N. Tsuji, T. Sakai, Novel ultra-high straining process for bulk materials development of the accumulative roll-bonding (ARB) process, Acta Mater. 47 (2) (1999) 579-583.
[12] J.Y. Huang, Y.T. Zhu, H.G. Jiang, T.C. Lowe, Microstructures and dislocation configurations in nanostructured Cu processed by repetitive corrugation and straightening, Acta Mater. 49 (9) (2001) 1497-1505.
[13] Y. Beygelzimer, V.N. Varyukhin, D.V. Orlov, S.G. Son, Twist extrusion - the accumulation of strain, Donetsk firma naukoemnih Technol. Natl. Acad. Sci. Ukraine (2003) 73-75.
[14] N. Pardis, R. Ebrahimi, Deformation behavior in simple shear extrusion (SSE) as a new severe plastic deformation technique, Mater. Sci. Eng. A 527 (1-2) (2009) 355-360.
[15] V. Varyukhin, Y. Beygelzimer, R. Kulagin, O. Prokofeva, A. Reshetov, Twist extrusion: fundamentals and applications, Mater. Sci. Forum 667-669 (2011) 31-37.
[16] Y. Beygelzimer, D. Orlov, V. Varyukhin, Proceedings of the Second International Symposium on Ultrafine Grained Materials, in: Y.T. Zhu (Ed.), Minerals, Metals, and Materials Society, Warren Dale (PA), 2002, pp. 297-301.
[17] Beygelzimer Y, Varukhin V, Synkov S, Sapronov A, Synkov V. New techniques for accumulating large plastic deformations using hydroextrusion, Fizika i Tekhnika Vusokih Davlenii (High Pressure Physics and Technology, in Russian) 1999; 9(3).
[18] Y. Beygelzimer, V. Varyukhin, D. Orlov, S. Sinkov, Twist extrusion: accumulating deformations 56 (2003) 456-465 (in Russian).
[19] Y. Beygelzimer, D. Prilepoa, R. Kulagina, V. Grishaeva, A. Abramovaa, V. Varyukhina, Planar twist extrusion vs. TWIST extrusion, J. Mater. Process Technol. 211 (2011) 522-529.
[20] V. Varukhin, Y. Beygelzimer, S. Synkov, D. Orlov, Applications of twist extrusion, Mater. Sci. Forum 503-504 (2006) 335-340.
[21] Y. Beygelzimer, V. Varyukhin, S. Synkov, D. Orlov, Useful properties of twist extrusion, Mater. Sci. Eng. A 503 (2009) 14-17.
[22] ASTM B962-13 Standard Test Methods for Density of Compacted or Sintered Powder Metallurgy (PM) Products Using Archimedes' Principle West Conshohocken, PA: ASTM International, 2004.
[23] G99-04 A. Standard test method for wear testing with a Pin-on-Disk apparatus. West Conshohocken, PA: ASTM International, 2004.
[24] J.F. Archard, Contact and rubbing of flat surfaces, J. Appl. Phys. 24 (1953) 981.
[25] C.S. Ramesha, A.R. Anwar Khanb, N. Ravikumar, P. Savanprabhu, Prediction of wear coefficient of $\mathrm{Al}_{6061}-\mathrm{TiO}_{2}$ composites Wear 259 (2005) 602-608.
[26] M. Furukawa, Y. Iwahashi, Z. Horita, M. Nemoto, T.G. Langdon, The shearing characteristics associated with equal-channel angular pressing, Mater. Sci. Eng. A 257 (2) (1998) 328-332.
[27] M.V. Segal, Slip line solutions, deformation mode and loading history during equal channel angular extrusion, Mater. Sci. Eng. A 271 (1-2) (1999) 322-333.
[28] R. Ritasalo, M.E. Cura, X.W. Liu, Y. Ge, T. Kosonen, U. Kanerva, O. Söderberg, S.P. Hannula, Microstructural and mechanical characteristics of $\mathrm{Cu}_{-1} \mathrm{Cu}_{2} \mathrm{O}$ composites compacted with pulsed electric current sintering and hot isostatic pressing, Composites: Part A 45 (2013) 61-69.
[29] A. Mishra, V. Richard, F. Gregori, R.J. Asaro, M.A. Meyers, Microstructural evolution in copper processed by severe plastic deformation, Mater. Sci. Eng. A 290 (2005) 410-411.
[30] D. Orlov, Y. Beygelzimer, S. Synkov, V. Varyukhin, N. Tsuji, Z. Horita, Plastic flow, structure and mechanical properties in pure Al deformed by twist extrusion, Mater. Sci. Eng. A 519 (2009) 105-111.
[31] Y. Beygelzimer, D. Orlov, A. Korshunov, S. Synkov, V. Varyukhin, I. Vedernikova, A. Reshetov, A. Synkov, L. Polyakov, I. Korotchenkova, Features of twist extrusion: method, structures and material properties, Solid State Phenomena 114 (2006) 69-78.
[32] G.S. Kataiah, D.P. Girish, The mechanical properties and fractography of aluminium $6061-\mathrm{TiO}_{2}$ composites, IJPSR I (2010) 17-25.
[33] H. Zendehdel, A. Hassani, Influence of twist extrusion process on microstructure and mechanical properties of 6063 aluminum alloy, Mater. Des. 37 (2012) 13-18.
[34] M.S. Mohebbi, A. Akbarzadeh, Experimental study and FEM analysis of redundant strains in flow forming of tubes, J. Mater. Process Technol. 210 (2010) 389-395
[35] Y. Beygelzimer, A. Reshetov, S. Synkov, O. Prokofeva, R. Kulagin, Kinematics of metal flow during twist extrusion investigated with a new experimental method, J. Mater. Proc. Technol. 209 (2009) 3650-3656.
\ No newline at end of file
# Research Article
## The effects of AZD3582 [4-(nitroxy)butyl-(2S)-2-(6-methoxy-2naphthyl) propanoate], and naproxen on key pathogenic steps in NSAID-enteropathy in the rat.
M. Walley, G. Sigthorsson, C. Hotz-Behofsits, R. Simpson, I. Bjarnason*
Guy's, King's and St Thomas' School of Medicine, Department of Gastroenterology, King's College Hospital Foundation Trust, Denmark Hill,<br>London SE5 9PJ, London, UK, Tel: ++2032992417, Fax: ++2032996474, e-mail: ingvar.Bjarnason @kcl.ac.uk
Received 9 October 2006; revised 27 January 2007; accepted 1 February 2007
Abstract. Background: The pathogenesis of NSAID-induced enteropathy may involve dual inhibition of the cyclooxygenase (1 and 2 ) and a topical effect with sequential increased intestinal permeability, development of inflammation and ulcers. It has been suggested that nitric-oxide donating drugs cause significantly less gastrointestinal injury by counteracting for NSAID-induced reductions in blood flow.
Aims: To compare the effects of AZD3582 [4-(nitroxy)butyl(2S)-2-(6-methoxy-2-naphthyl) propanoate], and naproxen on key pathogenic steps in NSAID-enteropathy in the rat.
Methods: Single doses of AZD3582, naproxen (dose range $10-300 \mu \mathrm{mol} / \mathrm{kg}$ ) or vehicle were given to male Sprague Dawley rats. Intestinal permeability ( ${ }^{1} \mathrm{CrEDTA}$ ) and intestinal inflammation (granulocyte marker protein) was quantitated and ulcer counts made.
Results: Intestinal permeability (all doses) and inflammation (highest dose of the drugs) increased significantly from control levels following naproxen and AZD3582 and there was no significant difference between the drugs. Median ulcer counts were, however, significantly ( $\mathrm{p}<0.01$ ) lower with AZD3582 (4 $\pm 2$ ) than with naproxen $(17 \pm 4)$.
Conclusions: Naproxen and AZD3582 are equally associated with increased small intestinal permeability and inflammation, which is the consequence of their topical effect. The reduced small bowel ulcer counts with AZD3582 accords with the suggestion that vascular factors are the main driving force for NSAID-induced ulcer formation.
Key words: NSAIDs; CINODs; Nitric oxide; Naproxen; AZD3582[^0]
## Introduction
Nonsteroidal anti-inflammatory drugs (NSAIDs) cause gastrointestinal side effects which involve the stomach as well as the small bowel mucosa (Hawkey and Langman, 2003). Although the serious gastric side effects of bleeding and perforation have attracted the most attention, it is increasingly clear that the small bowel is associated with similar types and prevalence of complications (Bjarnason et al., 1993; Laine et al., 2002)
The pathogenesis of NSAID-induced gastrointestinal damage is uncertain. There is substantial evidence to suggest that NSAID-enteropathy in rodents is caused by various combinations of the selective biochemical effects of NSAIDs including cyclooxygenase (COX)-1 and COX-2 inhibition together with the topical effect (Somasundaram et al., 1995). The topical effect is thought to relate to the physicochemical properties of NSAIDs to act as detergents (Lichtenberger et al., 1995) and uncouplers of mitochondrial oxidative phosphorylation (Somasundaram et al., 1995). Collectively, it is suggested that the topical effect results in increased intestinal permeability with mucosal exposure of luminal aggressive factors and hence inflammation. It is also suggested that NSAID-induced inhibition of COX1, with decreased amounts of vasoactive prostaglandins, drives this inflammation to ulcers (Wallace et al., 2000). Interestingly small bowel damage occurs with long-term COX-2 absence or inhibition (small bowel inflammation and ulcers) although the mechanisms are unclear (Sigthorsson et al., 2002).
One of the more recent suggestions for reducing the intestinal side effects of NSAIDs is to attach a nitric oxide (NO) moiety to the NSAID, in the hope that the NO might counteract the effect of prostaglandin deficiency on the intestinal microcirculation. NO donors have been used in patients with cardiovascular disease for more than a century (Burgaud et al., 2002). NO is recognised as an important modulator of a large number of physiological processes (Wallace and Miller, 2000). More specifically, NO increases mucosal blood flow and mucus secretion and decreases leukocyte adherence (Cirino et al., 1996; Wallace and Miller, 2000). As these actions could, in theory, counteract the effects of mucosal prostaglandin deficiency induced by NSAIDs, a class of COX-inhibiting nitric oxide donors (CINODs) have been developed. CINODs are frequently produced by the addition of a nitroxybutyl moiety to the carboxylic group of the NSAID (which mediates the binding to the COX enzymes) by means of an ester linkage (Fiorucci, 2001). As well as potentially offering improved gastrointestinal tolerability, CINODs may result in enhanced anti-inflammatory, anti-pyretic and analgesic effects when compared to NSAIDs (Fiorucci et al., 2002) or COX-2 selective agents, although this is a controversial issue.
Previous animal studies have indicated that CINODs may offer reduced adverse gastrointestinal effects when compared to the parent compounds (Elliott et al., 1995; Davies et al., 1997). More recently, human studies have demonstrated that NO-aspirin (NCX-4016) maintains COX-1 and platelet inhibition whilst nearly avoiding the short-term gastric damage (Fiorucci, et al., 2003) and that AZD3582 [4-(nitroxy)butyl(2S)-2-(6-methoxy-2-naphthyl) propanoate] reduces gastrointestinal toxicity when compared to naproxen (Hawkey et al., 2003).
The precise reason that these CINODs reduce the gastrointestinal damage of the parent drug is controversial. Their ester linkage to the NSAID abolishes their topical effect and their effect on the COX enzymes. In order to maintain therapeutic efficacy the CINOD needs to undergo hydrolysis yielding the parent NSAID and the NO moiety. If the beneficial effects of CINODs are due to their counteracting or compensating for the vascular effects of prostaglandin deficiency it might be expected that CINODs would be equally associated with the permeability and inflammatory changes of the comparator NSAID (consequence of the topical effect) whilst reducing the number of ulcers. We tested this hypothesis by comparing the effects of AZD3582 with those of naproxen on small bowel permeability (using ${ }^{51} \mathrm{CrEDTA}$ ), intestinal inflammation (quantitated by measurement of granulocyte marker protein (GMP)) and small bowel ulcer counts in rats.
## Methods
## Animals
Male Sprague Dawley rats (Charles Rivers), 6-8 weeks old, weighing $200-250 \mathrm{~g}$ were used throughout these studies. Two groups of animals were used. The first group was used for the measurement of intestinal permeability and ulcer counts while the second group was used to assess intestinal inflammation via the measurement of granulocyte marker protein (GMP) in stool samples. Animals were housed singly in metabolism cages for up to 9 days and fed standard laboratory diet and water. For measurement of intestinal inflammation, stool samples were collected each day from day 2 to day 9 (with the drugs being administered on day 5). Following an overnight fast (day 4), animals received AZD3582 or naproxen diluted in solvent (oil in water emulsion) from $60 \mu \mathrm{mol} / \mathrm{ml}$ emulsions and given by gastric gavage at the doses of $10,30,100$ or $300 \mu \mathrm{mol} / \mathrm{kg}$ ( $\mathrm{n}=8$ in each group). Control animals received solvent only. Ulcer counts were performed 48 hours after administration of the drugs or vehicle. The naproxen and AZD3582 were obtained from AstraZeneca, R\&D Sodertalje, Sweden.
## Intestinal Permeability
One hour after administration of the study drugs or vehicle, the rats were administered $10 \mu \mathrm{Ci}{ }^{51} \mathrm{CrEDTA}$ via a tube in a volume of $0.5 \mathrm{ml}$ water followed by $1 \mathrm{ml}$ of water. Animals were allowed food and fluids 2 hours later. All urine passed during the following 5 hours was collected and the samples were assayed for gamma-radioactivity along with standards ( $10 \%$ of the dose given) in a Wallac 1284 gamma counter (Pharmacia, Sweden) for 1 minute. Results are expressed as percentage of the oral dose that was excreted in urine, which provides a measure of intestinal permeability as previously described (Somasundaram et al., 2000).
## Intestinal Inflammation
Stool samples were collected on each day of the study and $1 \mathrm{~g}$ wet weight of each were added to $4 \mathrm{ml}$ of extraction buffer (Tris $50 \mathrm{mM}$, $\mathrm{NaCl} 150 \mathrm{mM}, \mathrm{CaCl}_{2} 10 \mathrm{mM}$, Thiomersal $0.25 \mathrm{mM}, \mathrm{pH}$ to 8.4$)$. The samples were then homogenized for 30 seconds at $20,000 \mathrm{rpm}$ using an Ultra Turrax homogenizer (IKE Werke, Germany) and spun in a microcentrifuge for 10 minutes at $13,000 \mathrm{rpm}$. The supernatant was decanted off into an eppendorf tube and the samples were assayed for GMP as previously described (Sigthorsson et al., 2002). In short $50 \mu 1$ of a 1:200 dilution in duplicate to 96 well microtitre plates were added. The plates were pre-coated with anti-GMP antibody. Equal volumes of 9 standards were also added to the plates in duplicate. The plates were incubated at room temperature on a plate shaker for 45 minutes, washed 4 times with rinsing buffer (Tris $50 \mathrm{mM}, \mathrm{NaCl} 150 \mathrm{mM}, \mathrm{MgCl}_{2} 0.5 \mathrm{mM}, \mathrm{KCl}$ $2.5 \mathrm{mM}$, Thiomersal $0.25 \mathrm{mM}$, Tween- $200.05 \%, \mathrm{pH}$ to 8.0 ) allowed to dry and then $50 \mu \mathrm{l}$ of alkaline phosphatase (ALP) conjugated anti-GMP (diluted 1:800 in assay buffer) was added to each well. The plates were incubated under the same conditions as before, washed and dried as before and then $100 \mu \mathrm{l}$ of substrate (p-nitrophenyl phosphate, $1 \mathrm{mg} / \mathrm{ml}$, in substrate buffer ( $10 \%$ diethanolamine), $\mathrm{MgCl}_{2} 0.5 \mathrm{mM}$, Thiomersal $0.25 \mathrm{mM}, \mathrm{pH} 9.6$ ) was added to each well. The optical density of the highest standard was monitored and when it read between 1.2-1.8, the reaction was stopped by adding $50 \mu 11 \mathrm{M} \mathrm{NaOH}$ to each well. The plates were read at $405 \mathrm{~nm}$ using an MRX plate reader plus Dynex Revelation software (Dynex Technologies, USA). The results are expressed in $\mathrm{mg} / \mathrm{l}$ of extract.
## Macroscopic studies
To assess ulceration within the small bowel, animals were euthanazed by $\mathrm{CO}_{2}$ inhalation 48 hours after administration of the drugs. The abdomen was opened via a midline incision and the small intestine isolated, removed and gently flushed with $0.9 \%$ saline. The intestinal mucosa was exposed by cutting along the anti-mesenteric side of the intestine. Ulcer counts were performed by noting both the number and size of the ulcers ( $\leq 5 \mathrm{~mm}$ were recorded as pointed, $>5 \mathrm{~mm}$ were recorded as longitudinal).
## Statistical analysis
Results are presented as median and range as not all data was normally distributed. Wilcoxon's rank sum test was used to assess statistical differences between groups and the Wilcoxon's signed rank test for sequential data.
## Results
## Intestinal Permeability
Figure 1 shows that administration of both naproxen and AZD3582 significantly increased intestinal permeability at all doses given when compared with baseline (vehicle only) ( $p<0.001$ ). There was no significant difference ( $p>0.05$ ) in intestinal permeability between AZD3582 and naproxen at any of the doses given.
## Intestinal Inflammation
There was no significant increase in GMP with the 10 or $30 \mu \mathrm{mol} / \mathrm{kg}$ doses of either drug (Figure 2). Rats given AZD3582 at a dose of $100 \mu \mathrm{mol} / \mathrm{kg}$ had GMP values significantly higher than the control group ( $p<0.05$ ). At doses of $300 \mu \mathrm{mol} / \mathrm{kg}$, a significant increase in intestinal inflammation was noted with both naproxen and AZD3582 when compared to the vehicle group ( $\mathrm{p}<0.01$ ). No significant difference was observed between the two drugs at any of the dose range tested.
## Macroscopic examination
On macroscopic examination, no ulcers were seen with either drug over a dose range of $0-100 \mu \mathrm{mol} / \mathrm{kg}$. The mean number of ulcers with naproxen $300 \mu \mathrm{mol} / \mathrm{kg}$ ) was 17.1 (range 10 29). The rats treated with AZD3582 had significantly fewer ulcers ( $\mathrm{p}<0.001$ ) (median 2.5; range 0-12) (Figure 3).
Fig. 1 Urinary excretion of 51 CrEDTA after Naproxen and AZD3582. The white circles represent median (bars represent range) values obtained from rats dosed with naproxen. The black circles represent values obtained from rats dosed with AZD3582. Urinary excretion of $51 \mathrm{CrEDTA}$ was measured 5 hours following dosing.
Fig. 2 GMP concentrations after Naproxen and AZD3582. The white circles represent median (bars represent range) GMP values obtained from rats dosed with naproxen. The black circles represent the GMP values obtained from rats dosed with AZD3582. Data shown is taken from the day following dosing.
Fig. 3 Small bowel ulcer counts after Naproxen and AZD3582. The white circles represent the number of ulcers in rats dosed with naproxen. The black circles represent the number of ulcers in rats dosed with AZD3582. Counts were made 48 hours after dosing with $300 \mu \mathrm{mol} / \mathrm{kg}$.
## Discussion
These studies show that AZD3582 is associated with significantly less small bowel ulcerative damage than naproxen while the postulated consequences of the topical effect, intestinal permeability and inflammation, is equally evident with both drugs. The findings are consistent (assuming that the NO is released prior to or during drug absorption ren-dering intact naproxen) with the aforementioned pathogenic framework for NSAID-induced small bowel damage and the suggestion that the NO maintains vascular perfusion following the administration of naproxen.
A number of studies show that virtually all acidic NSAIDs increase small intestinal permeability, by virtue of their acidity and lipid solubility, and it is suggested that that this is a prerequisite for the development of small intestinal inflammation (Sigthorsson et al., 2000). Unlike non-selective NSAIDs, the NO moiety of AZD3582 renders the molecule non-acidic, and hence it can not exert a topical effect in this form. Nevertheless it is still associated with increased intestinal permeability in the current experiments suggesting hydrolyses of the ester bond, presumably by gastric and more importantly pancreatic esterases (Somasundaram et al., 1997). This raises the possibility that the beneficial action of AZD3582 on the stomach (Hawkey et al., 2003; WilderSmith et al., 2006) may simply be due to its lack of topical toxicity (Rainsford and Whitehouse, 1980).
While both drugs were associated with similar increases in intestinal permeability there were no inflammatory changes following the lower ( 10 or $30 \mu \mathrm{mol} / \mathrm{kg}$ ) doses of the drugs unlike previous studies where inflammation invariably follows the intestinal permeability changes (Somasundaram et al., 1997; Somasundaram et al., 2000). The reasons for this may be that much higher doses of NSAIDs were administered in previous studies. Indeed at the higher doses of 100 and $300 \mu \mathrm{mol} / \mathrm{kg}$, dose dependent inflammation was seen for naproxen and AZD3582, the inflammation being similar for both drugs. It is noteworthy that NO itself, despite its potentially beneficial effect on microvascular blood flow and healing, may be directly toxic to the epithelial cells at high concentrations (Menconi et al., 1998).
The results of studies on AZD3582 can not be extrapolated over to other NO-NSAIDs as their method of production, stability, pharmacokinetics and rate of hydrolyses may differ. A similar study to the current one (Davies et al., 1997) nevertheless found more contrasting degrees of inflammation with naproxen compared to the CINOD. This study used higher drug doses and a much stricter dosing regime (twice daily dosing for over 2 weeks compared to our single dose over 8 days). However, Somasundaram (1997) using nitroxybutyl-flurbiprtofen obtained almost identical results to the current study.
Previous studies have clearly dissociated the consequences of the topical effect (increased intestinal permeability and inflammation) from the COX-1 inhibitory effect, which seems to drive the inflamed mucosa to an ulcerated one (Somasundaram et al., 2000). The ulcerative damage with AZD3582 was significantly less than for naproxen. The precise mechanism for this is nevertheless uncertain. Estimates of the metabolism of CINODs and NO-releasing drugs suggest that the rate of NO release from these compounds both in vitro and in vivo is slow in comparison to other NO donors such as sodium nitroprusside (SNP) and S-nitroso-N-acetyl-D,L-penicillamine (Keeble and Moore, 2002). However NO certainly has the potential to increase microvascular blood flow (Whittle, 2003) and thus reduce the damage (Wallace et al., 2000), but it also increases mucous secretion, reduces secretion and adhesion of neu- trophils and reduces cytokine release from macrophages, all of which may be impaired by COX inhibition (Wallace et al., 2000). It has also been suggested that cytochrome P450 may play a role in the metabolism of CINODs (Grosser and Schroder, 2000). Alternatively the bioavailability of naproxen from a dose of AZD3582 may be lower than from a dose of naproxen.
In summary, AZD3582 is associated with equal changes in increased intestinal permeability and inflammation as equimolar doses of naproxen. At the same time it is associated with significantly less ulcerative small bowel damage. These findings are consistent to the suggestions that NO derived from AZD3582 counteracts the vascular effects of NSAID-induced inhibition of COX.
Acknowledgements. The drugs and solvents were supplied by AstraZeneca, Sweden who supported this project.
## References
Bjarnason, I., Hayllar, J., Macpherson, A. J. et al. (1993). Side effects of nonsteroidal anti-inflammatory drugs on the small and large intestine in humans. Gastroenterology 104: 1832-1847.
Burgaud, J. L., Ongini, E. and Del Soldato, P. (2002). Nitric oxidereleasing drugs - A novel class of effective and safe therapeutic agents. Ann N. Y. Acad. Sci. 962: 360-371.
Cirino, G., Wheeler-Jones, C. P., Wallace, J. L. et al. (1996). Inhibition of inducible nitric oxide synthase expression by novel nonsteroidal anti-inflammatory derivatives with gastrointestinal-sparing properties. Br. J. of Pharmacol. 117: 1421-1426.
Davies, N. M., Roseth, A. G., Appleyard, C. B. et al. (1997). NO-naproxen vs naproxen: Ulcerogenic, analgesic and anti- inflammatory effects. Aliment. Pharmacol. Ther. 11: 69-79.
Elliott, S. N., McKnight, W., Cirino, G. et al. (1995). A Nitric Oxide-Releasing Nonsteroidal Antiinflammatory Drug Accelerates GastricUlcer Healing in Rats. Gastroenterology 109: 524-530.
Fiorucci, S. (2001). NO-releasing NSAIDs are caspase inhibitors. Trends in Immunology 22: 232-235.
Fiorucci, S., Antonelli, E., Mencarelli, A. et al. (2002). A NO-releasing derivative of acetaminophen spares the liver by acting at several checkpoints in the Fas pathway. Br. J. of Pharmacol. 135: 589-599.
Fiorucci, S., Santucci, L., Greasele, P. et al. (2003). Gastrointestinal safety of NO-aspirin (NCX-4016) in healthy human volunteers: a proof of concept endoscopic study. Gastroenterology 124: 600-607.
Grosser, N. and Schroder, H. (2000). A common pathway for nitric oxide release from NO-aspirin and glyceryl trinitrate. Biochem. Biophys. Res. Commun. 274: 255-258.
Hawkey, C. J., Jones, I. J., Atherton, C. T. et al. (2003). Gastrointestinal safety of AZD3582, a cyclooxygenase inhibiting nitric oxide donator: proof of concept study in humans. Gut 52: 1537-1542.
Hawkey, C. J. and Langman, M. J. (2003). Non-steroidal anti-inflammatory drugs: overall risks and management. Complementary roles for COX-2 inhibitors and proton pump inhibitors. Gut 52: 600-608.
Keeble, J. E. and Moore, P. K. (2002). Pharmacology and potential therapeutic applications of nitric oxide-releasing non-steroidal anti-inflammatory and related nitric oxide-donating drugs. $B r . J$. of Pharmacol. 137: 295-310.
Laine, L., Bombardier, C., Hawkey, C. J. et al. (2002). Stratifying the risk of NSAID-related upper gastrointestinal clinical events: Results of a double-blind outcomes study in patients with rheumatoid arthritis. Gastroenterology 123: 1006-1012.
Lichtenberger, L. M., Wang, Z-M., Romero. J. J. et al. (1995) Non-steroidal anti-inflammatory drugs (NSAIDs) associate with zwitterionic phospholipids: Insight into the mechanism and reversal of NSAID-induced gastrointestinal injury. Nat Med 1: 154-158
Menconi, M. J., Unno, N., Smith, M. et al. (1998). Nitric oxide donor-induced hyperpermeability of cultured intestinal epithelial monolayers: role of superoxide radical, hydroxyl radical, and peroxynitrite. Biochim. Biophys. Acta. 1425: 189-203.
Rainsford, K. D. and Whitehouse, M. W. (1980). Anti-inflammatory antipyretic salicylic acid esters, with low gastric ulcerogenic activity. Agents Actions. 10: 451-456.
Sigthorsson, G., Tibble, J., Mahmud, T. et al. (2000). NSAID-Induced gastrointestinal damage: the biochemical consequences of the "ion trapping" hypothesis. Inflammopharmacology 8: 31-41.
Sigthorsson, G., Simpson, R. J., Walley, M. et al. (2002). COX-1 and 2 , intestinal integrity and pathogenesis of NSAID-enteropathy in mice. Gastroenterology 122: 1913-1923.
Somasundaram, S., Hayllar, J., Rafi S. et al. (1995). The biochemical basis of NSAID-induced damage to the gastrointestinal tract: A review and a hypothesis. Scand J Gastroenterology. 30: 289-299.
Somasundaram, S., Rafi, S., Jacob, M. et al. (1997). Intestinal tolerability of nitroxybutyl-flurbiprofen in rats. Gut 40: 608-613.
Somasundaram, S., Sigthorsson, G., Simpson, R. J. et al. (2000). Uncoupling of intestinal mitochondrial oxidative phosphorylation and inhibition of cyclooxygenase are required for the development of NSAID-enteropathy in the rat. Aliment Pharmacol Ther 14: 639650.
Wallace, J. L. and Miller, M. J. S. (2000). Nitric oxide in mucosal defense: A little goes a long way. Gastroenterology 119: 512-520.
Wallace, J. L., McKnight, W., Reuter B. K. et al. (2000). NSAID-induced gastric damage in rats: requirement for inhibition of both cyclooxygenase 1 and 2. Gastroenterology. 119: 706-714.
Wilder-Smith CH, J. B., Fornstedt-Wallin, B., Hedman, A. et al. (2006). Dose-effect comparisons of the CINOD AZD3582 and naproxen on upper gastrointestinal tract mucosal injury in healthy subjects. Scand J Gastroenterol 41: 264-273.
Whittle, B. J. (2003). Nitric oxide and the gut injury induced by nonsteroidalanti-inflammatory drugs. Inflammopharmacology 11:415422.
# Artificial Intelligence for 6G Networks: Technology Advancement and Standardization
Muhammad K. Shehzad, Luca Rose, M. Majid Butt, István Z. Kovács, Mohamad Assaad, and Mohsen Guizani
Abstract—With the deployment of 5G networks, standards organizations have started working on the design phase for sixth-generation ( $6 \mathrm{G}$ ) networks. $6 \mathrm{G}$ networks will be immensely complex, requiring more deployment time, cost and management efforts. On the other hand, mobile network operators demand these networks to be intelligent, self-organizing, and cost-effective to reduce operating expenses (OPEX). Machine learning (ML), a branch of artificial intelligence (AI), is the answer to many of these challenges providing pragmatic solutions, which can entirely change the future of wireless network technologies. By using some case study examples, we briefly examine the most compelling problems, particularly at the physical (PHY) and link layers in cellular networks where ML can bring significant gains. We also review standardization activities in relation to the use of ML in wireless networks and future timeline on readiness of standardization bodies to adapt to these changes. Finally, we highlight major issues in ML use in the wireless technology, and provide potential directions to mitigate some of them in $6 \mathrm{G}$ wireless networks.
Index Terms-AI, ML, Wireless networks, 3GPP, 6G.
## I. INTRODUCTION
Unprecedented growth in the global cellular traffic (as shown in Fig. 1) and immense data rate demands have become a challenge, leading wireless industry to the next-generation, called 6G. 6G-era will bring digital, physical and biological worlds together with the goal to improve human experience and well-being. $6 \mathrm{G}$ will be operating in TeraHertz $(\mathrm{THz})$ frequencies $(0.1-10 \mathrm{THz})$, hence beneficial for multiple use cases in industrial applications, providing immense data rates $(\approx 1 \mathrm{~Tb} / \mathrm{s})$, accelerating internet-of-things, and wider network coverage. AI/ML will pave the way for $\mathrm{THz}$ communications at different layers [2], e.g., supporting channel acquisition [3] and modulation classification [4] at PHY. Similarly, at the link layer, beamforming design and channel allocation can exploit ML [2]. In $\mathrm{THz}$ systems, a channel can significantly vary at a micrometer scale, resulting in a tremendous increase in channel estimation frequency and corresponding overhead. ML algorithms can counter this issue by using, e.g., improved channel prediction techniques [3], [5].
Fig. 1. Estimation of global mobile subscriptions in machine-to-machine (M2M) and mobile broadband (MBB) from 2020 to 2030. Source: ITU-R Report M. $2370-0$ [1].
Recently, fast-growing deployment of $5 \mathrm{G}$ has opened up many challenges, including massive complexity in network architecture, low latency, high cost, power consumption, and deployment of hybrid Long-Term Evolution (LTE) new radio $(\mathrm{NR})$, leading to difficulties in network optimization. In such a complex scenario, the network intelligence has become a major focus as it will play a pivotal role in complex problem solving [6], e.g., self-healing, self-optimization, and self-configuration of a network [7].
Future networks will become "cognitive" in a way that many aspects such as spectrum sensing/sharing, slicing, radio resource management (RRM), and mobility management, will be ML-based. Further, it is expected that ML will impact 6G air interface fundamentally and it will be designed to support ML natively [8]. Several recent research attempts, e.g., [9], propose different road maps for 6G, but they do not address standardization timeline and related issues regarding application of ML in 6G. Albeit, to some extent, [10] gives an overview of ML and standardization; nevertheless, ML-related technical challenges and its applications from an industrial and standardization perspective are not addressed.
Reconfigurable intelligent surface (RIS) and non-orthogonal multiple access (NOMA) are two key technologies for 6G [11]. RIS can re-engineer electromagnetic waves, hence beneficial to deliver the information where obstacles block the destination. RIS can be integrated with ML, allowing RIS to acquire envi-ronmental information by configuring various sensors, while ML can learn dynamic parameters intelligently, reducing the computation cost of RIS-based networks. Similarly, NOMA is a promising access technique for $6 \mathrm{G}$. In ML-empowered NOMA-based networks, gNodeBs ( $\mathrm{gNB}$ ) can intelligently define their control policy and improve decision-making ability.
Fig. 2. An overview of ML paradigms, major tools, and applications in wireless networks.
Today's networks use model-based methods to optimize various network functions providing characteristics of the process involved. However, these models might be too complex to be implemented in a realistic time frame or they include a great level of abstraction to function in a general environment. In contrast, ML-based solutions can adapt to real-time (RT) scenario changes and localized characteristics, learning the specific environment around the transceivers. The contributions of this article are twofold:
- We look at the above-mentioned problems from an industrial perspective and outline the gap between research and practice.
- We review standardization activities in the context of adopting ML in various aspects of wireless communications, e.g., channel acquisition, positioning. Furthermore, we highlight major issues and possible research directions in relation to the use of ML in wireless networks.
## II. OVERVIEW OF ML TECHNIQUES IN WIRELESS NETWORKS
ML is a process of training machines through data without explicit programming. Broadly speaking, ML consists of three paradigms: unsupervised learning, supervised learning, and reinforcement learning (RL). All these paradigms have a training/exploration phase to optimize a learning algorithm that later can be used in prediction/exploitation phase to infer on unknown inputs. As shown in Fig. 2, we briefly summarize them by providing some use cases in wireless networks.
1) Supervised Learning: Supervised learning exploits a labelled data set to learn a (hidden) function that maps an input to an expected output based on the examples. The standard techniques used to solve supervised learning-based problems are artificial neural networks (ANNs), support vector machines (SVMs), Bayesian networks, recurrent neural networks (RNNs), and convolutional neural networks (CNNs).
2) Unsupervised Learning: Unsupervised learning does not learn from labelled data, instead, training is based on an unlabelled data set. K-means and principal component analysis (PCA) are examples of two major tools used for clustering and dimensionality reduction, respectively.
3) Reinforcement Learning: RL is not based on training but rather the agent/decision-maker learns and decides online, maximizing a long-term reward. RL is beneficial in control problems where the agent adapts to changing environmental conditions, e.g., uplink power control.
Motivated by the considerable benefits of ML in various fields, its applications have also been considered in wireless networks almost at all layers of communication. Here, we focus on its impact on radio access networks (RAN), particularly PHY and link layers. Based on ML tools, given in Fig.2, some case studies will be explained later in Section III.
## A. Machine Learning at PHY
At PHY, many optimization problems are non-convex, e.g., sum-rate maximization. ML is a powerful tool to find good solution(s) for such non-convex optimization problems. Based on advanced learning algorithms, 6G networks provide the following major advantages by using ML.
- ML can be effective to deal with network complexity. 6G networks will be more complex due to numerous network topologies, immense growth in the cellular users, staggering data rate demands, complex air interface, vast network coordination methods, etc. Forecasting considerable complexity of $6 \mathrm{G}$ networks, the derivation of optimum performance solutions is nearly infeasible without ML.
- ML can play a vital role to deal with model deficit problems. Current cellular networks are amenable for mathematical derivation, for instance, information theory gives closed-form expressions for various problems such as Shannon theorem. However, the inherent complexity of $6 \mathrm{G}$ networks hinders the possibility of exploiting closed-form analytical expression(s), which can be due, for instance, to non-linearities either in the channel or network devices. ML offers an efficient way to deal with non-linearities, providing feasible solution(s) in a tractable manner.
- ML can cope with algorithm deficit problems. In current cellular networks, many optimal algorithms, although well-characterized, are impractical to be implemented. Considering the example of multiple-input multipleoutput (MIMO) systems where optimal solutions are known (e.g., dirty paper coding), they are overlooked in favour of linear solutions, e.g., linear minimum meansquared error. It is envisaged that ML can pave the way to implement more efficient yet practical solutions.
ML has been used to study various PHY issues, and without being exhaustive, some of the recent areas include:
- CNNs are used for modulation classification in [4].
- An RNN-based wireless channel predictor [5] is used in [3], explained in Section III-C to deal with inaccurate channel state information (CSI).
## III. Wireless Networks: Case Studies
In this section, we present three use cases to demonstrate the use of ML techniques in industrial wireless networks. ML tools utilized for these use cases are depicted in Fig. 2.
## A. UE Positioning
Highly accurate user equipment (UE) positioning is one of the prime considerations for Third Generation Partnership Project (3GPP) studies beyond Release 15. Various angle and time-of-arrival-based methods are used to determine UE positioning in today's cellular networks. All of these methods require triangulation techniques to resolve UE position and suffer from time synchronization errors.
We studied UE position by using radio frequency (RF) fingerprinting and two ML techniques, namely deep learning and decision tree, for an outdoor scenario [12]. Serving cell Reference Signal Received Power (RSRP) as well as neighbor cell RSRP values were used as features to train a deep neural network (DNN). As shown in Fig. 3, nearly $5 \mathrm{~m}$ accuracy is achieved for DNN when only 4 serving cell RSRP values and corresponding beam IDs are considered as a feature input, while it improves to nearly $1 \mathrm{~m}$ when 2 more RSRP values from the strongest neighboring cells, respective cell and beam IDs are added to the input feature set. The decision tree, a less complex algorithm as compared to DNN, provides about $2 \mathrm{~m}$ accuracy using data from both serving and neighboring cell beams as an input feature. The mean accuracy of nearly $1 \mathrm{~m}$ obtained from DNN is comparable to the accuracy level achieved with traditional methods without requiring triangulation and does not suffer from signal timing synchronization issues.
## B. ML-Assisted Proactive Mobility
For seamless and efficient mobility, a well optimized network should reduce the number of Handover (HO) events while avoiding Handover Failures (HOF) and Radio Link Failures (RLF). An emerging approach is to utilize ML-based algorithms, which enable proactive and UE specific mobility actions in the gNB. A relatively simple approach to this is to design an ML-based estimator of the radio measurements, such as RSRP of serving and neighbor cells, with a certain minimum accuracy and within a certain time horizon. Radio measurements are traditionally performed at the UEs side and reported to the serving $\mathrm{gNB}$ (or gNB-Centralized Unit) according to specific Radio Resource Control (RRC) configurations. For ML-based prediction purposes, time-traces of RSRP, or Reference Signal Received Quality (RSRQ) values need to be collected either in the UE and/or serving the gNB.
Fig. 3. Comparison of UE position for both DNN and decision tree techniques. The system level parameters for the network includes 8 sites with Inter-site distance $110 \mathrm{~m}$ and carrier frequency $28 \mathrm{GHz}$. For details of the parameters, please refer to [12].
For example, collected time-series of RSRP values are used as input to the ML-based predictor, which provides at the UE, and/or at the serving $\mathrm{gNB}$, a set of sufficiently accurately estimated RSRP values within a given future time horizon. Then, these signal estimations are used for predictive evaluation of possible $\mathrm{HO}$ conditions, thus can trigger proactive measurement reports from the UE and/or proactive $\mathrm{HO}$ actions at the serving $\mathrm{gNB}$. These two steps are repeated with a time periodicity given, e.g., by the sampling rate and time filtering of the input RSRP measurements [13], or alternatively, the steps can also be triggered by the serving $\mathrm{gNB}$ when certain traffic or mobility Quality-of-Service (QoS) conditions are met.
The outlined ML-based mobility algorithm can be implemented in either the UE or gNB or both, depending on the available ML assistance capabilities in each node. Furthermore, the mechanism can be integrated in self-organizing network-based Mobility Robustness Optimization solutions.
## C. CSI Feedback
CSI feedback in the downlink channel is a major challenge in Release 17 and beyond. Currently, CSI precision is affected by compressing the measurements imposed by the standard.
In our study, summarized in Section II-A, we assumed two RNN-based twin channel predictors at the $\mathrm{gNB}$ and UE [3]. The past CSI is utilized for training the RNN at both ends of the communication system. UE's feedback is evaluated with respect to the predicted channel. Fig. 4 depicts the meansquared error (MSE) between the actual channel versus the acquired channel at the $\mathrm{gNB}$ and the precoding gain when different quantization bits are used to feedback the CSI from the UE. The results are compared with and without using ML for the CSI feedback. A clear benefit of using ML can be observed. We believe that ML-based solutions will improve current performance without increasing signaling overhead.
(a) Trend of MSE.
(b) Trend of precoding gain.
Fig. 4. Performance of MSE and precoding gain. $2 \times 1$ MIMO configuration is considered, and RNN is composed of 1 hidden layer. For parameters' details, refer to [3].
## IV. Role of ML in Standardization
The potential of ML for $5 \mathrm{G}$ has been widely acknowledged in the literature and applications made it even in the standard at higher levels, e.g., for networking and security [7]. 3GPP has introduced a specification, named network data analytics function (NWDAF), in Release 15 and 16, as part of the $5 \mathrm{G}$ Core $(5 \mathrm{GC})$ architecture [7]. NWDAF is responsible for providing network analytics when requested by a network function (NF). Data is collected via application function (AF), operation, administration, and maintenance (OAM), NF, and data repositories. The specifications have also addressed the problem of inter-working for automation and data collection, which analytics vendors previously faced. 3GPP NWDAF framework for $5 \mathrm{G}$ systems is depicted in Fig.55. This automation gives leverage to network vendors for the deployment and testing of non-RT ML-related use cases. In Fig. 5. inward interfaces aggregate data from different network sources, where communication occurs using existing service-based interfaces. Outward interfaces provide decisions (analytics-based, algorithmic) to AF and NF.
Fig. 5. A generalized framework for 5G network automation in Release 16, representing that NWDAF should be able to collect data from the operator OAM, AFs and $5 \mathrm{GC}$ network functions $[7]$.
Regarding PHY, ML techniques lag behind, due to a number of issues. First, PHY makes use of abstractions and mathematical models that are inferred from the physical reality and electromagnetic principles. As long as such models describe the real-world precisely, there is no need for ML. Nevertheless, in practice, models and fixed algorithms are inefficient when facing rapidly changing and heterogeneous environments. For example, using the same channel acquisition scheme to acquire CSI from a laptop in line-of-sight with a $\mathrm{gNB}$, a tablet on a fast train, or a mobile quickly moving in a super densely covered area might not be optimal. Consequently, the standardization efforts of intelligent techniques have gained momentum, and while 3GPP is ready to begin a study item on ML implementations, open-radio access network (O-RAN) will be ML-native, defining a RAN intelligent controller (RIC), which will enhance several RAN functions.
3GPP has started studying the implications of the ML use at layer-1 and a study item on ML for NR air interface has been agreed upon. After the RAN-1 working group studies, protocol aspects will be studied in RAN-2 and subsequently, interoperability and testability aspects will be considered in RAN-4 working group. The remaining part of this section summarizes the status of the standardization of ML techniques for PHY for both 3GPP and O-RAN.
## A. CSI Feedback
CSI feedback for downlink channel in Release 17 is a complex issue in which UE-based beam selection is followed by CSI reference symbols (RS) training and precoding matrix index (PMI) reporting, and lastly by Demodulation Reference Signal (DMRS) and consequent estimation of the precoded channel. Broadly, beam selection aims to establish a sufficiently strong link budget between the UEs and the gNB. The CSI-RS is used for fine channel estimation, which is then fed back to the gNB to compute a precoder (eventually multiuser); finally, DMRS are precoded pilots that the UEs use to implement coherent demodulation. Currently, each of these phases is created following pre-established rules, with little to none room for intelligent behaviour. ML has been envisioned to possibly enhance each phase in a different way. Beam selection can be improved by intelligently correlating the beams with position or identity of the UEs. This would allow for a smart selection of the beams from the gNB side, thus avoiding brute-force selection. The CSI-RS can be enhanced by compressing the pilots and the PMI feedback exploiting ad hoc ML compressors. Furthermore, channel prediction techniques [5] can be used in order to pre-establish a baseline for the CSI feedback [3]. Other aspects that can be improved include frequency of pilots in both CSI-RS and DMRS, power and timing and CSI-RS port selection.
## B. $R S-D M R S$
Roughly speaking, DMRS are RS used for channel estimation to perform coherent demodulation. The correct estimation of the channel using such pilots have a strong impact on the performance in terms of bit-error-rate and thus block-errorrate. The role of the ML in such domain is twofold. First, it can be used to improve the performance of the channel estimation. Second, the ML can provide a smarter positioning of DMRS in order to reduce their frequency, hence reducing the overhead footprint in $6 \mathrm{G}$.
## C. Positioning
A precise positioning is one of the aspects that sees the largest improvement with respect to LTE's observed time difference of arrival (OTDOA) and uplink time difference of arrival (UTDOA), defined in Release 9 onward. Various aspects of $6 \mathrm{G}$ allow for precise positioning of the UE, such as large number of antenna elements at the $\mathrm{gNB}$, millimeter wave transmissions, dense network deployment. However, the methods based on angle-of-arrival and time-of-arrival fall short when non-line-of-sight scenarios are considered, in interference-limited scenarios. ML techniques, see Fig.2, are expected to help in improving the position by exploiting channel charting, hence learning the likely position of a UE based on a report, and multiplexing together information that carries positioning information but are hard to exploit in a classical way, such as CSI report and sounding reference signal maps.
## D. Mobility Enhancements
In 6G, frequent cell-selection, and frequent RSRP measurement could impact UEs' battery life. Furthermore, load balancing algorithms can use intelligent techniques that exploit the UE specific channel prediction, movement trajectory prediction and traffic demands prediction. Furthermore, the scenarios like fast-trains or non-terrestrial networks, will pose challenges to $\mathrm{HO}$ and conditional-HO operations. Novel solutions envisaged, compared to current 3GPP Release 17, include the use of UE specific ML-based predictive algorithms, addressed in Section III-B, designed to reduce paging errors and HO failures; thus, improve the overall QoS.
## E. Standardization for ML Data Collection
3GPP has started working on data collection for running ML algorithms in 5G networks [14]. The scope of such studies include identifying mechanisms to collect data from the network through minimization of drive test framework or further advanced enhancements. Furthermore, studies will focus on discussing hosting of ML models both for training as well as inference purposes at various network entities for various use cases and defining any new interfaces required for transporting data to the models.
## F. Federated Learning Model Collection
Training and prediction based on ML models will put an extra load on networks already transporting a large volume of data. Therefore, it is important to estimate the effect of model training and inference on network traffic, particularly for federated learning (FL) where UEs will act as distributed hosts [15]. The latency in collecting locally trained models is bounded in FL and network links should be able to meet delay budgets. This is particularly challenging in today's networks where a UE's own QoS requirements are already demanding and the FL model training and collection will further incur an extra burden on the network. Similarly, the split inference, where UEs cooperate with each other to perform joint inference, results in increasing the network traffic. 3GPP studies in Release 18 [15] will focus on the above mentioned issues to support training and inference for ML/FL models over wireless links.
## G. O-RAN-RIC
O-RAN alliance, aims to define a RAN network that is non-vendor specific, and that has an innate support for ML as an enabler for automation and OPEX savings. O-RAN alliance has defined interfaces for exchange of information in the protocol stack. To this end, in the O-RAN architecture, ML-assisted RAN intelligent controller (RIC) is included for network automation, for both scenarios, i.e., non-RT and RT. In the non-RT RIC, ML algorithms' training is done by using the data obtained at lower layers. However, the learning process remains slow; therefore, it is called non-RT RIC. Later, the learner is fed into the RT RIC, which utilizes the RT captured data to perform decisions online. Additionally, the functionality of non-RT includes policy management and higher layer procedure optimization. Therefore, the RAN or core-network can deploy such a mechanism based on the collected data.
## V. Open Challenges and Roadmap for Deploying ML TECHNIQUES
Though ML is a potential technology and enabler for nextgeneration wireless networks, several challenges related to its practical use are addressed below.
## A. Data Availability and Benchmarking
One of the foremost challenges in wireless networks is data availability. Data availability concerns the problem of identifying a common and accepted set of data (e.g., channel realizations) with the goal of testing and benchmarking ML algorithms. Such a problem is of a pivotal importance for standardization, where normally algorithms and proposals are tested using agreed underlying physical models (e.g., urban macrocells/microcells channel models), evaluation methodologies and calibrated simulators. Contrary to other fields, cellular networks have no standard data set to train and benchmark an ML algorithm. Therefore, a synthetic data set or software generated data set is of a predominant importance to train and benchmark ML algorithm(s), and to agree on a common evaluation methodology to rank proposition and standard algorithms.
Identifying a set of key performance indicators in wireless networks is another crucial task for ML standardization. It is necessary to design a set of metrics to classify and rank ML algorithms and their performance. Classic approaches such as throughput and signal-to-interference-plus-noise ratio (SINR) might not be sufficient since a small improvement in these values might come at the cost of large complexity augmentation and exacerbated energy consumption.
Fig. 6. Model collection for FL in a wireless network when some of the UEs have large blockage and use D2D communication for model transfer. Cluster-based UE selection is another solution for asynchronous model collection to meet network QoS requirements.
## B. Selection of ML versus Non-ML Solutions
ML tools are regarded as an implementation-oriented tool rather than a standard relevant aspect. The idea behind this relies on the fact that each vendor has the freedom to efficiently implement each aspect of the standard as long as the external interfaces are respected. A simple example of this is given in the CSI feedback, where a UE needs to select a specific PMI, but the standard does not specify any specific way in which this selection is performed. Recently, however, the idea of having ML dedicated message exchanges and performance that only an ML-aided algorithm can achieve has paved the way for standardization of ML algorithms [3]. This opens the door for several issues, e.g., will the standard impose a specific ML structure, classifying minimum performance and implementation structure, or will it remain far from the implementation? With regards to NNs, it is still open if hyperparameters are going to be left to vendor-specific implementation or will they be set by the standard.
## C. Complexity of ML Algorithms
Considering the limited battery life, storage, computational capability, and limited communication bandwidth in most cellular network entities, an ML model's cost-performance tradeoff becomes a fundamental issue. Another issue is the speed/time-steps at which the training and inference needs to be performed. Whereas hard-wired gNB have sufficient computational power to run complex ML algorithms, UEs need to face battery, heating and stringent complexity limits. Possible solutions to such issue include, but not limited to implementation of substitute rule-based algorithms at the UE side, migrating the load all on the $\mathrm{gNB}$ side.
## D. Communication-aware Federated Learning
Traditional ML models support centralized learning. Due to difficulties in collecting large amount of training data from the UEs, privacy issues and bandwidth bottleneck, FL has emerged as a promising solution. In FL, training is performed distributively over network devices, called local model hosts, and an application server on the network side acts as a central host to aggregate local models transmitted by the local learners. Typically, an application server host aggregates models only when updates are available from all the local learners, called synchronous model transfer. However, this is highly inefficient in wireless networks where links are unpredictable, local learners (UEs) are energy limited and have their own QoS requirements. Asynchronous model collection is the most viable solution for FL in wireless networks, where a subset of UEs is selected for a local model update in each round of model collection. However, UE selection in each round is a complex problem because UEs are energy limited and the network bandwidth is scarce, hindering collection of local models from all the UEs to represent independently and identically data collection. These mechanisms are usually vendor proprietary, but standardization still needs to define some common mechanisms for efficient model collection. As shown in Fig. 6. UE clustering and local device-to-device (D2D) communication for asynchronous model collection are possible solutions to decrease network communication and will require standardization support.
## E. Stability and Adaptability of ML Techniques
ML algorithms applied to wireless networks must be adaptive as they will have to deal with parameters that change dynamically. Particularly, the weights of the NN are evaluated online based on the trained data. However, this approach may not be applicable in wireless, and specifically in a standard, where coordination among entities belonging to different operators and provided by different vendors have to coexist, and in which the need for quick response could prevent one or the other solution. Possible solutions include: pre-trained $\mathrm{NN}$, or partially trained $\mathrm{NN}$ (i.e., $\mathrm{NN}$ in which the starting point is pre-set); cloud-based downloadable data set for $\mathrm{NN}$ training; codebook-based $\mathrm{NN}$, in which a codebook of different NNs is used and agreed upon between the gNB and UEs. Another related problem is to detect an outdated ML model with high inference error and replace it. Replacing an outdated model with a new model incurs further delay. Thus, there must be a proactive mechanism to adapt the ML model to network conditions such that network functions suffer minimum performance loss.
## VI. Conclusion
Motivated by the promise of the use of ML algorithms, we presented an overview of ML techniques to be used in 5G-Advanced and 6G wireless networks. Furthermore, we discussed the key roles of ML-based solutions from industrial and standardization perspectives. We also highlighted the practical challenges of deploying ML techniques in wireless networks and how to deal with them. Non-RT and higher layer ML-based solutions can be, and are, applied already in today's networks. Implementing RT ML solutions at PHY/MAC in 6G networks are the next big challenge in the research community. We believe that overcoming these challenges, both in research as well as at standardization levels, will pave the way for next-generation wireless communication to be effective and sustainable.
## REFERENCES
[1] I. Union, "IMT traffic estimates for the years 2020 to 2030," Report ITU, pp. 2370-0, 2015.
[2] A.-A. A. Boulogeorgos, E. Yaqub, M. Di Renzo, A. Alexiou, R. Desai, and R. Klinkenberg, "Machine learning: A catalyst for $\mathrm{THz}$ wireless networks," Frontiers in Communications and Networks, p. 37, 2021.
[3] M. K. Shehzad, L. Rose, and M. Assaad, "Dealing with CSI compression to reduce losses and overhead: An artificial intelligence approach," in 2021 IEEE International Conference on Communications Workshops (ICC Workshops), 2021, pp. 1-6.
[4] T. O'Shea and J. Hoydis, "An introduction to deep learning for the physical layer," IEEE Transactions on Cognitive Communications and Networking, vol. 3, no. 4, pp. 563-575, 2017.
[5] M. K. Shehzad, L. Rose, S. Wesemann, and M. Assaad, "ML-based massive MIMO channel prediction: Does it work on real-world data?" IEEE Wireless Communications Letters, pp. 1-5, 2022.
[6] B. Mao, F. Tang, Y. Kawamoto, and N. Kato, "Optimizing computation offloading in satellite-UAV-served 6G IoT: A deep learning approach," IEEE Network, vol. 35, no. 4, pp. 102-108, 2021.
[7] 3GPP, "Study of enablers for network automation for 5G (Release 16)," https://portal.3gpp.org/desktopmodules/Specifications/ SpecificationDetails.aspx?specificationId=3252, , Technical Report (TR) $23.791,062019$.
[8] J. Hoydis, F. A. Aoudia, A. Valcarce, and H. Viswanathan, "Toward a 6G AI-native air interface," IEEE Communications Magazine, vol. 59, no. 5, pp. 76-81, 2021.
[9] F. Tariq, M. R. Khandaker, K.-K. Wong, M. A. Imran, M. Bennis, and M. Debbah, "A speculative study on 6G," IEEE Wireless Communications, vol. 27, no. 4, pp. 118-125, 2020.
[10] R. Shafin, L. Liu, V. Chandrasekhar, H. Chen, J. Reed, and J. C. Zhang, "Artificial intelligence-enabled cellular networks: A critical path to beyond-5G and 6G," IEEE Wireless Communications, vol. 27, no. 2, pp. 212-217, 2020.
[11] R. Zhong, Y. Liu, X. Mu, Y. Chen, and L. Song, "AI empowered RISassisted NOMA networks: Deep learning or reinforcement learning?" IEEE Journal on Selected Areas in Communications, vol. 40, no. 1, pp. $182-196,2022$.
[12] M. M. Butt, A. Pantelidou, and I. Z. Kovács, "ML-assisted UE positioning: performance analysis and 5G architecture enhancements," IEEE Open Journal of Vehicular Technology, vol. 2, pp. 377-388, 2021.
[13] 3GPP, "NR; Radio Resource Control (RRC); Protocol specification (Release 15)," https://portal.3gpp.org/desktopmodules/Specifications/ SpecificationDetails.aspx?specificationId=3197 , Technical report (TR) TS38.331, 032021.
[14] - , "Study on enhancement for data collection for NR and ENDC (Release 17)," https://portal.3gpp.org/desktopmodules/Specifications/ SpecificationDetails.aspx?specificationId=3817 , Technical report (TR) $37.817,012021$.
[15] -, "5G System (5GS); Study on traffic characteristics and performance requirements for AI/ML model transfer (Release 18)," https://portal.3gpp.org/desktopmodules/Specifications/ SpecificationDetails.aspx?specificationId=3721 , Technical report (TR) $22.874,032021$.
Muhammad K. Shehzad [S'21] is working as a Research Engineer and Ph.D. student at Nokia Bell-Labs and CentraleSupelec, Paris, France, respectively. He received his B.Eng. (Hons.) degree in Electrical and Electronic Engineering from the University of Bradford, Bradford, U.K., in 2016, and M.S. in Electrical Engineering from the National University of Sciences \& Technology (NUST), Islamabad, Pakistan, in 2019. His major research interest is in MIMO communication using Artificial Intelligence (AI)/Machine Learning (ML).
Luca Rose [M'11] is Senior research and standard-ization expert with Nokia Bell-labs. He received his M.Sc. from university of Pisa, Italy, and his Ph.D. in Physics from Centrale-Supelec. He worked with Huawei France research center and Thales Communications and Security, contributing to several standard organizations. He is currently an ITU-R and ETSI delegate and the lead editor of IEEE Communication magazine series on IoT. His interests span from the field of AI/ML to Game theory.
M. Majid Butt [SM'15] is a Senior Specialist at Nokia Bell-Labs, France, and an adjunct Professor at Trinity College Dublin, Ireland. He has authored more than 70 peer-reviewed conference and journal articles and filed over 30 patents. He is IEEE Comsoc distinguished lecturer for the class 2022-23. He frequently gives invited and technical tutorial talks on various topics in IEEE conferences and serves as an associate editor for IEEE Communication Magazine, IEEE Open Journal of the Communication Society and IEEE Open Journal of Vehicular Technology.
István Z. Kovács [M’00] received his B.Sc. from "Politehnica" Technical University of Timişoara, Romania in 1989, his M.Sc.E.E. from École Nationale Supérieure des Télécommunications de Bretagne, France in 1996, and his Ph.D.E.E. in Wireless Communications from Aalborg University, Denmark in 2002. Currently he is senior research engineer at Nokia, Aalborg, Denmark, where he conducts research on machine learning-driven radio resource management and radio connectivity enhancements for non-terrestrial and aerial vehicle communications, in LTE and 5G networks.
Mohamad Assaad [SM'15] is a Professor at CentraleSupelec, France and a researcher at the Laboratory of Signals and Systems (CNRS). He has coauthored 1 book and more than 120 journal and conference papers and serves regularly as TPC cochair for top-tier international conferences. He is currently an Editor for the IEEE Wireless Communications Letters and Journal of Communications and Information Networks. His research interests include 5G and beyond systems, and Machine Learning in wireless networks.
Mohsen Guizani [F'09] is currently a Professor at the Machine Learning Department at the Mohamed Bin Zayed University of Artificial Intelligence (MBZUAI), Abu Dhabi, UAE. His main research interests are wireless communications and IoT security. He was elevated to the IEEE Fellow in 2009. He was listed as a Clarivate Analytics Highly Cited Researcher in Computer Science in 2019, 2020 and 2021. Dr. Guizani has won several research awards. He is the author of ten books and more than 800 publications.
# 数学新星问题征解
第十五期 (2016.06)
主持: 牟晓生
第一题. 设 $z_{1}, z_{2}, z_{3}$ 是单位复数. 证明存在单位复数 $z$ 使得:
$$
\frac{1}{\left|z-z_{1}\right|^{2}}+\frac{1}{\left|z-z_{2}\right|^{2}}+\frac{1}{\left|z-z_{3}\right|^{2}} \leq \frac{9}{4}
$$
(湖北武钢三中学生 王逸轩, 上海大学冷岗松 供题)
第二题. 如图, $D$ 是正三角形 $A B C$ 的边 $B C$ 上一点, $B D>C D$. 记 $O_{1}, I_{1}$ 为 $\triangle A B D$ 的外心与内心, $O_{2}, I_{2}$ 为 $\triangle A C D$ 的外心与内心. 圆 $I_{1}$ 与圆 $I_{2}$ 除 $B C$外的另一条外公切线交 $A B, A C$ 于 $P, Q$. 设直线 $P I_{1}$与 $Q I_{2}$ 交于 $R$, 而直线 $O_{1} I_{1}$ 与 $O_{2} I_{2}$ 交于 $T$. 证明: $A T^{2}=A R^{2}+A D \cdot B C$.
(广西钦州 卢圣 供题)
第三题. 给定正整数 $m, n$, 考虑在 $m \times n$ 白棋盘上先将一些格染成黑色. 在之后的每一时刻, 若存在一个白格至少与两个黑格相邻, 则可将它也染成黑色. 求最初至少要染多少个黑色格才能在某一时刻染黑整个棋盘?
(哈佛大学 牟晓生 供题)
第四题. $A B C$ 是一个三角形, 而 $P, Q, R$ 分别是 $B C, C A, A B$ 上的点。证明 $\triangle P Q R$ 的周长不小于 $\triangle A Q R, \triangle B R P, \triangle C P Q$ 周长的最小值.
(哈佛大学 牟晓生 供题)
[
{
"layout_dets": [
{
"category_id": 1,
"poly": [
245.17965698242188,
1408.162841796875,
1409.9876708984375,
1408.162841796875,
1409.9876708984375,
1576.8612060546875,
245.17965698242188,
1576.8612060546875
],
"score": 0.9999911189079285
},
{
"category_id": 1,
"poly": [
625.3294067382812,
753.8365478515625,
1410.015380859375,
753.8365478515625,
1410.015380859375,
797.5187377929688,
625.3294067382812,
797.5187377929688
],
"score": 0.9999904632568359
},
{
"category_id": 1,
"poly": [
243.91610717773438,
900.430419921875,
1029.7550048828125,
900.430419921875,
1029.7550048828125,
1246.8853759765625,
243.91610717773438,
1246.8853759765625
],
"score": 0.9999890327453613
},
{
"category_id": 1,
"poly": [
244.826171875,
575.121826171875,
1113.444091796875,
575.121826171875,
1113.444091796875,
624.2438354492188,
244.826171875,
624.2438354492188
],
"score": 0.9999887347221375
},
{
"category_id": 1,
"poly": [
698.5866088867188,
1262.7681884765625,
1032.8016357421875,
1262.7681884765625,
1032.8016357421875,
1304.719970703125,
698.5866088867188,
1304.719970703125
],
"score": 0.9999858736991882
},
{
"category_id": 1,
"poly": [
1047.3941650390625,
1589.7156982421875,
1407.320556640625,
1589.7156982421875,
1407.320556640625,
1635.564453125,
1047.3941650390625,
1635.564453125
],
"score": 0.9999785423278809
},
{
"category_id": 0,
"poly": [
586.237060546875,
268.1336669921875,
1070.578857421875,
268.1336669921875,
1070.578857421875,
333.3851623535156,
586.237060546875,
333.3851623535156
],
"score": 0.9999648332595825
},
{
"category_id": 3,
"poly": [
1064.586669921875,
891.74169921875,
1405.2781982421875,
891.74169921875,
1405.2781982421875,
1323.926513671875,
1064.586669921875,
1323.926513671875
],
"score": 0.9999620318412781
},
{
"category_id": 1,
"poly": [
245.0867156982422,
1737.461181640625,
1407.4088134765625,
1737.461181640625,
1407.4088134765625,
1844.520751953125,
245.0867156982422,
1844.520751953125
],
"score": 0.9999591112136841
},
{
"category_id": 1,
"poly": [
728.7286376953125,
464.164306640625,
925.77294921875,
464.164306640625,
925.77294921875,
507.0546875,
728.7286376953125,
507.0546875
],
"score": 0.9999172687530518
},
{
"category_id": 1,
"poly": [
671.8990478515625,
403.32611083984375,
982.4508666992188,
403.32611083984375,
982.4508666992188,
447.346435546875,
671.8990478515625,
447.346435546875
],
"score": 0.9999128580093384
},
{
"category_id": 1,
"poly": [
1050.064697265625,
1859.377197265625,
1406.635009765625,
1859.377197265625,
1406.635009765625,
1901.196533203125,
1050.064697265625,
1901.196533203125
],
"score": 0.9998365640640259
},
{
"category_id": 8,
"poly": [
559.9688720703125,
640.2896728515625,
1096.220458984375,
640.2896728515625,
1096.220458984375,
732.165283203125,
559.9688720703125,
732.165283203125
],
"score": 0.9991127252578735
},
{
"category_id": 13,
"poly": [
409,
581,
530,
581,
530,
621,
409,
621
],
"score": 0.93,
"latex": "z_{1},z_{2},z_{3}"
},
{
"category_id": 13,
"poly": [
539,
963,
627,
963,
627,
1005,
539,
1005
],
"score": 0.93,
"latex": "O_{1},I_{1}"
},
{
"category_id": 13,
"poly": [
754,
1741,
864,
1741,
864,
1783,
754,
1783
],
"score": 0.93,
"latex": "P,Q,R"
},
{
"category_id": 13,
"poly": [
725,
1144,
798,
1144,
798,
1185,
725,
1185
],
"score": 0.92,
"latex": "O_{2}I_{2}"
},
{
"category_id": 13,
"poly": [
738,
1413,
836,
1413,
836,
1451,
738,
1451
],
"score": 0.92,
"latex": "m\\times n"
},
{
"category_id": 13,
"poly": [
602,
1144,
674,
1144,
674,
1184,
602,
1184
],
"score": 0.92,
"latex": "O_{1}I_{1}"
},
{
"category_id": 13,
"poly": [
246,
1023,
332,
1023,
332,
1065,
246,
1065
],
"score": 0.92,
"latex": "O_{2},I_{2}"
},
{
"category_id": 13,
"poly": [
304,
963,
470,
963,
470,
1002,
304,
1002
],
"score": 0.92,
"latex": "B D\\,>\\,C D"
},
{
"category_id": 13,
"poly": [
289,
1144,
350,
1144,
350,
1186,
289,
1186
],
"score": 0.91,
"latex": "Q I_{2}"
},
{
"category_id": 14,
"poly": [
557,
640,
1093,
640,
1093,
729,
557,
729
],
"score": 0.91,
"latex": "\\frac{1}{|z-z_{1}|^{2}}+\\frac{1}{|z-z_{2}|^{2}}+\\frac{1}{|z-z_{3}|^{2}}\\leq\\frac{9}{4}."
},
{
"category_id": 13,
"poly": [
767,
1083,
835,
1083,
835,
1125,
767,
1125
],
"score": 0.91,
"latex": "P,Q"
},
{
"category_id": 13,
"poly": [
597,
1082,
720,
1082,
720,
1124,
597,
1124
],
"score": 0.9,
"latex": "A B,A C"
},
{
"category_id": 13,
"poly": [
988,
1740,
1176,
1740,
1176,
1783,
988,
1783
],
"score": 0.9,
"latex": "B C,C A,A B"
},
{
"category_id": 13,
"poly": [
968,
1084,
1026,
1084,
1026,
1123,
968,
1123
],
"score": 0.9,
"latex": "P I_{1}"
},
{
"category_id": 13,
"poly": [
546,
1414,
615,
1414,
615,
1453,
546,
1453
],
"score": 0.9,
"latex": "m,n"
},
{
"category_id": 13,
"poly": [
570,
1800,
921,
1800,
921,
1843,
570,
1843
],
"score": 0.89,
"latex": "\\triangle A Q R,\\triangle B R P,\\triangle C P Q"
},
{
"category_id": 13,
"poly": [
771,
1024,
806,
1024,
806,
1064,
771,
1064
],
"score": 0.88,
"latex": "I_{1}"
},
{
"category_id": 13,
"poly": [
887,
1024,
921,
1024,
921,
1063,
887,
1063
],
"score": 0.88,
"latex": "I_{2}"
},
{
"category_id": 13,
"poly": [
996,
585,
1021,
585,
1021,
616,
996,
616
],
"score": 0.82,
"latex": "z"
},
{
"category_id": 13,
"poly": [
475,
904,
510,
904,
510,
941,
475,
941
],
"score": 0.81,
"latex": "D"
},
{
"category_id": 13,
"poly": [
437,
1145,
467,
1145,
467,
1181,
437,
1181
],
"score": 0.8,
"latex": "R"
},
{
"category_id": 13,
"poly": [
884,
1145,
914,
1145,
914,
1181,
884,
1181
],
"score": 0.8,
"latex": "T"
},
{
"category_id": 14,
"poly": [
246,
1203,
593,
1203,
593,
1244,
246,
1244
],
"score": 0.78,
"latex": "A T^{2}=A R^{2}+A D\\cdot B C."
},
{
"category_id": 13,
"poly": [
883,
903,
943,
903,
943,
942,
883,
942
],
"score": 0.74,
"latex": "B C"
},
{
"category_id": 13,
"poly": [
969,
1024,
1028,
1024,
1028,
1061,
969,
1061
],
"score": 0.73,
"latex": "B C"
},
{
"category_id": 13,
"poly": [
380,
1023,
494,
1023,
494,
1062,
380,
1062
],
"score": 0.67,
"latex": "\\triangle A C D"
},
{
"category_id": 13,
"poly": [
246,
1800,
360,
1800,
360,
1842,
246,
1842
],
"score": 0.6,
"latex": "\\triangle P Q R"
},
{
"category_id": 13,
"poly": [
677,
963,
793,
963,
793,
1002,
677,
1002
],
"score": 0.52,
"latex": "\\triangle A B D"
},
{
"category_id": 13,
"poly": [
710,
902,
795,
902,
795,
942,
710,
942
],
"score": 0.41,
"latex": "A B C"
},
{
"category_id": 13,
"poly": [
379,
1740,
463,
1740,
463,
1780,
379,
1780
],
"score": 0.31,
"latex": "A B C"
},
{
"category_id": 13,
"poly": [
1381,
1166,
1393,
1166,
1393,
1178,
1381,
1178
],
"score": 0.26,
"latex": "c"
},
{
"category_id": 15,
"poly": [
254.0,
1476.0,
1400.0,
1476.0,
1400.0,
1511.0,
254.0,
1511.0
],
"score": 0.99,
"text": "的每一时刻,若存在一个白格至少与两个黑格相邻,则可将它也染成黑色.求最初"
},
{
"category_id": 15,
"poly": [
256.0,
1537.0,
1031.0,
1537.0,
1031.0,
1572.0,
256.0,
1572.0
],
"score": 0.98,
"text": "至少要染多少个黑色格才能在某一时刻染黑整个棋盘?"
},
{
"category_id": 15,
"poly": [
837.0,
1418.0,
1403.0,
1418.0,
1403.0,
1452.0,
837.0,
1452.0
],
"score": 0.99,
"text": "白棋盘上先将一些格染成黑色.在之后"
},
{
"category_id": 15,
"poly": [
254.0,
1418.0,
545.0,
1418.0,
545.0,
1452.0,
254.0,
1452.0
],
"score": 1.0,
"text": "第三题.给定正整数"
},
{
"category_id": 15,
"poly": [
616.0,
1418.0,
737.0,
1418.0,
737.0,
1452.0,
616.0,
1452.0
],
"score": 0.94,
"text": ",考虑在"
},
{
"category_id": 15,
"poly": [
645.0,
763.0,
1400.0,
763.0,
1400.0,
797.0,
645.0,
797.0
],
"score": 0.98,
"text": "(湖北武钢三中学生 王逸轩,上海大学冷岗松 供题)"
},
{
"category_id": 15,
"poly": [
675.0,
1150.0,
724.0,
1150.0,
724.0,
1184.0,
675.0,
1184.0
],
"score": 1.0,
"text": "与"
},
{
"category_id": 15,
"poly": [
251.0,
970.0,
303.0,
970.0,
303.0,
1004.0,
251.0,
1004.0
],
"score": 0.99,
"text": "点,"
},
{
"category_id": 15,
"poly": [
471.0,
970.0,
538.0,
970.0,
538.0,
1004.0,
471.0,
1004.0
],
"score": 0.71,
"text": ".记"
},
{
"category_id": 15,
"poly": [
254.0,
1150.0,
288.0,
1150.0,
288.0,
1184.0,
254.0,
1184.0
],
"score": 1.0,
"text": "与"
},
{
"category_id": 15,
"poly": [
251.0,
1089.0,
596.0,
1089.0,
596.0,
1123.0,
251.0,
1123.0
],
"score": 1.0,
"text": "外的另一条外公切线交"
},
{
"category_id": 15,
"poly": [
721.0,
1089.0,
766.0,
1089.0,
766.0,
1123.0,
721.0,
1123.0
],
"score": 1.0,
"text": "于"
},
{
"category_id": 15,
"poly": [
836.0,
1089.0,
967.0,
1089.0,
967.0,
1123.0,
836.0,
1123.0
],
"score": 0.97,
"text": ".设直线"
},
{
"category_id": 15,
"poly": [
807.0,
1026.0,
886.0,
1023.0,
886.0,
1065.0,
807.0,
1067.0
],
"score": 1.0,
"text": "与圆"
},
{
"category_id": 15,
"poly": [
251.0,
906.0,
474.0,
906.0,
474.0,
940.0,
251.0,
940.0
],
"score": 0.95,
"text": "第二题.如图,"
},
{
"category_id": 15,
"poly": [
351.0,
1150.0,
436.0,
1150.0,
436.0,
1184.0,
351.0,
1184.0
],
"score": 1.0,
"text": "交于"
},
{
"category_id": 15,
"poly": [
468.0,
1150.0,
601.0,
1150.0,
601.0,
1184.0,
468.0,
1184.0
],
"score": 1.0,
"text": ",而直线"
},
{
"category_id": 15,
"poly": [
799.0,
1150.0,
883.0,
1150.0,
883.0,
1184.0,
799.0,
1184.0
],
"score": 1.0,
"text": "交于"
},
{
"category_id": 15,
"poly": [
915.0,
1150.0,
1024.0,
1150.0,
1024.0,
1184.0,
915.0,
1184.0
],
"score": 0.86,
"text": ".证明:"
},
{
"category_id": 15,
"poly": [
944.0,
906.0,
1019.0,
906.0,
1019.0,
940.0,
944.0,
940.0
],
"score": 0.99,
"text": "上一"
},
{
"category_id": 15,
"poly": [
922.0,
1026.0,
968.0,
1023.0,
968.0,
1065.0,
922.0,
1067.0
],
"score": 1.0,
"text": "除"
},
{
"category_id": 15,
"poly": [
333.0,
1026.0,
379.0,
1023.0,
379.0,
1065.0,
333.0,
1067.0
],
"score": 1.0,
"text": "为"
},
{
"category_id": 15,
"poly": [
495.0,
1026.0,
770.0,
1023.0,
770.0,
1065.0,
495.0,
1067.0
],
"score": 0.99,
"text": "的外心与内心.圆"
},
{
"category_id": 15,
"poly": [
628.0,
970.0,
676.0,
970.0,
676.0,
1004.0,
628.0,
1004.0
],
"score": 1.0,
"text": "为"
},
{
"category_id": 15,
"poly": [
794.0,
970.0,
1024.0,
970.0,
1024.0,
1004.0,
794.0,
1004.0
],
"score": 0.97,
"text": "的外心与内心,"
},
{
"category_id": 15,
"poly": [
511.0,
906.0,
709.0,
906.0,
709.0,
940.0,
511.0,
940.0
],
"score": 1.0,
"text": "是正三角形"
},
{
"category_id": 15,
"poly": [
796.0,
906.0,
882.0,
906.0,
882.0,
940.0,
796.0,
940.0
],
"score": 1.0,
"text": "的边"
},
{
"category_id": 15,
"poly": [
251.0,
582.0,
408.0,
582.0,
408.0,
624.0,
251.0,
624.0
],
"score": 1.0,
"text": "第一题.设"
},
{
"category_id": 15,
"poly": [
531.0,
582.0,
995.0,
582.0,
995.0,
624.0,
531.0,
624.0
],
"score": 1.0,
"text": "是单位复数.证明存在单位复数"
},
{
"category_id": 15,
"poly": [
1022.0,
582.0,
1105.0,
582.0,
1105.0,
624.0,
1022.0,
624.0
],
"score": 0.98,
"text": "使得:"
},
{
"category_id": 15,
"poly": [
704.0,
1267.0,
1026.0,
1267.0,
1026.0,
1308.0,
704.0,
1308.0
],
"score": 0.95,
"text": "(广西钦州 卢圣 供题)"
},
{
"category_id": 15,
"poly": [
1053.0,
1596.0,
1405.0,
1596.0,
1405.0,
1637.0,
1053.0,
1637.0
],
"score": 0.96,
"text": "(哈佛大学 牟晓生 供题)"
},
{
"category_id": 15,
"poly": [
596.0,
278.0,
1058.0,
278.0,
1058.0,
329.0,
596.0,
329.0
],
"score": 1.0,
"text": "数学新星问题征解"
},
{
"category_id": 15,
"poly": [
865.0,
1745.0,
987.0,
1745.0,
987.0,
1786.0,
865.0,
1786.0
],
"score": 1.0,
"text": "分别是"
},
{
"category_id": 15,
"poly": [
1177.0,
1745.0,
1405.0,
1745.0,
1405.0,
1786.0,
1177.0,
1786.0
],
"score": 1.0,
"text": "上的点。证明"
},
{
"category_id": 15,
"poly": [
922.0,
1808.0,
1130.0,
1808.0,
1130.0,
1842.0,
922.0,
1842.0
],
"score": 1.0,
"text": "周长的最小值"
},
{
"category_id": 15,
"poly": [
361.0,
1808.0,
569.0,
1808.0,
569.0,
1842.0,
361.0,
1842.0
],
"score": 1.0,
"text": "的周长不小于"
},
{
"category_id": 15,
"poly": [
251.0,
1745.0,
378.0,
1745.0,
378.0,
1786.0,
251.0,
1786.0
],
"score": 0.97,
"text": "第四题."
},
{
"category_id": 15,
"poly": [
464.0,
1745.0,
753.0,
1745.0,
753.0,
1786.0,
464.0,
1786.0
],
"score": 1.0,
"text": "是一个三角形,而"
},
{
"category_id": 15,
"poly": [
729.0,
465.0,
923.0,
465.0,
923.0,
509.0,
729.0,
509.0
],
"score": 1.0,
"text": "主持:牟晓生"
},
{
"category_id": 15,
"poly": [
672.0,
404.0,
982.0,
404.0,
982.0,
453.0,
672.0,
453.0
],
"score": 1.0,
"text": "第十五期 (2016.06)"
},
{
"category_id": 15,
"poly": [
1049.0,
1856.0,
1408.0,
1862.0,
1407.0,
1910.0,
1048.0,
1905.0
],
"score": 0.97,
"text": "(哈佛大学 牟晓生 供题)"
}
],
"page_info": {
"page_no": 0,
"height": 2339,
"width": 1654
}
}
]
\ No newline at end of file
[
{
"layout_dets": [
{
"category_id": 1,
"poly": [
578.199951171875,
672.8836669921875,
1579.9771728515625,
672.8836669921875,
1579.9771728515625,
1034.6820068359375,
578.199951171875,
1034.6820068359375
],
"score": 0.9999963641166687
},
{
"category_id": 1,
"poly": [
583.6012573242188,
1067.112548828125,
1579.8231201171875,
1067.112548828125,
1579.8231201171875,
1537.1314697265625,
583.6012573242188,
1537.1314697265625
],
"score": 0.9999961853027344
},
{
"category_id": 1,
"poly": [
585.4329223632812,
1568.2215576171875,
1578.5496826171875,
1568.2215576171875,
1578.5496826171875,
1931.5169677734375,
585.4329223632812,
1931.5169677734375
],
"score": 0.9999949336051941
},
{
"category_id": 1,
"poly": [
578.48388671875,
532.0015869140625,
1577.96337890625,
532.0015869140625,
1577.96337890625,
641.0133056640625,
578.48388671875,
641.0133056640625
],
"score": 0.999992847442627
},
{
"category_id": 1,
"poly": [
66.4359359741211,
1776.6947021484375,
530.4816284179688,
1776.6947021484375,
530.4816284179688,
1883.12841796875,
66.4359359741211,
1883.12841796875
],
"score": 0.9999925494194031
},
{
"category_id": 3,
"poly": [
70.23741149902344,
818.9378662109375,
517.8241577148438,
818.9378662109375,
517.8241577148438,
1076.58251953125,
70.23741149902344,
1076.58251953125
],
"score": 0.9999912977218628
},
{
"category_id": 1,
"poly": [
64.99989318847656,
651.9586791992188,
436.51446533203125,
651.9586791992188,
436.51446533203125,
723.5755615234375,
64.99989318847656,
723.5755615234375
],
"score": 0.9999803900718689
},
{
"category_id": 0,
"poly": [
556.2784423828125,
270.2118835449219,
1577.8243408203125,
270.2118835449219,
1577.8243408203125,
408.96875,
556.2784423828125,
408.96875
],
"score": 0.9999694228172302
},
{
"category_id": 1,
"poly": [
67.8554458618164,
1342.222900390625,
530.5653686523438,
1342.222900390625,
530.5653686523438,
1447.843017578125,
67.8554458618164,
1447.843017578125
],
"score": 0.999964714050293
},
{
"category_id": 1,
"poly": [
65.74972534179688,
1631.3668212890625,
530.32763671875,
1631.3668212890625,
530.32763671875,
1772.4139404296875,
65.74972534179688,
1772.4139404296875
],
"score": 0.9999628067016602
},
{
"category_id": 1,
"poly": [
588.5555419921875,
2068.548828125,
1525.326416015625,
2068.548828125,
1525.326416015625,
2103.8896484375,
588.5555419921875,
2103.8896484375
],
"score": 0.9999607801437378
},
{
"category_id": 1,
"poly": [
586.5614013671875,
1963.109619140625,
1556.57763671875,
1963.109619140625,
1556.57763671875,
2034.810302734375,
586.5614013671875,
2034.810302734375
],
"score": 0.9999467730522156
},
{
"category_id": 5,
"poly": [
59.963104248046875,
1110.6282958984375,
529.9212646484375,
1110.6282958984375,
529.9212646484375,
1225.2918701171875,
59.963104248046875,
1225.2918701171875
],
"score": 0.9999458193778992
},
{
"category_id": 2,
"poly": [
70.253173828125,
103.42188262939453,
420.4876708984375,
103.42188262939453,
420.4876708984375,
223.3950653076172,
70.253173828125,
223.3950653076172
],
"score": 0.9999403953552246
},
{
"category_id": 2,
"poly": [
1081.0198974609375,
2244.876220703125,
1554.6702880859375,
2244.876220703125,
1554.6702880859375,
2275.28662109375,
1081.0198974609375,
2275.28662109375
],
"score": 0.9999216794967651
},
{
"category_id": 1,
"poly": [
68.85406494140625,
345.90887451171875,
307.9100646972656,
345.90887451171875,
307.9100646972656,
409.0101013183594,
68.85406494140625,
409.0101013183594
],
"score": 0.9999182224273682
},
{
"category_id": 0,
"poly": [
65.58615112304688,
1295.93701171875,
180.41529846191406,
1295.93701171875,
180.41529846191406,
1328.8675537109375,
65.58615112304688,
1328.8675537109375
],
"score": 0.9998924136161804
},
{
"category_id": 2,
"poly": [
1245.0789794921875,
108.83450317382812,
1576.3145751953125,
108.83450317382812,
1576.3145751953125,
219.29098510742188,
1245.0789794921875,
219.29098510742188
],
"score": 0.9995979070663452
},
{
"category_id": 1,
"poly": [
65.7517318725586,
483.5211181640625,
428.60296630859375,
483.5211181640625,
428.60296630859375,
586.8902587890625,
65.7517318725586,
586.8902587890625
],
"score": 0.9993292689323425
},
{
"category_id": 0,
"poly": [
65.02902221679688,
445.0223083496094,
208.32994079589844,
445.0223083496094,
208.32994079589844,
476.65191650390625,
65.02902221679688,
476.65191650390625
],
"score": 0.9992275238037109
},
{
"category_id": 0,
"poly": [
556.9666748046875,
453.0841369628906,
673.0485229492188,
453.0841369628906,
673.0485229492188,
490.6045227050781,
556.9666748046875,
490.6045227050781
],
"score": 0.9949869513511658
},
{
"category_id": 1,
"poly": [
66.26496124267578,
1524.239013671875,
530.25537109375,
1524.239013671875,
530.25537109375,
1627.5289306640625,
66.26496124267578,
1627.5289306640625
],
"score": 0.9919456839561462
},
{
"category_id": 7,
"poly": [
62.55642318725586,
1227.4195556640625,
380.1070556640625,
1227.4195556640625,
380.1070556640625,
1252.86181640625,
62.55642318725586,
1252.86181640625
],
"score": 0.9918301105499268
},
{
"category_id": 1,
"poly": [
66.80264282226562,
1451.476806640625,
527.379150390625,
1451.476806640625,
527.379150390625,
1519.5836181640625,
66.80264282226562,
1519.5836181640625
],
"score": 0.9883919954299927
},
{
"category_id": 0,
"poly": [
65.35992431640625,
605.3745727539062,
181.2437286376953,
605.3745727539062,
181.2437286376953,
637.0079956054688,
65.35992431640625,
637.0079956054688
],
"score": 0.9870822429656982
},
{
"category_id": 0,
"poly": [
178.8284149169922,
264.662109375,
396.5289611816406,
264.662109375,
396.5289611816406,
315.4195251464844,
178.8284149169922,
315.4195251464844
],
"score": 0.9779264330863953
},
{
"category_id": 4,
"poly": [
66.15017700195312,
767.2459106445312,
181.25796508789062,
767.2459106445312,
181.25796508789062,
799.7833251953125,
66.15017700195312,
799.7833251953125
],
"score": 0.8933500051498413
},
{
"category_id": 13,
"poly": [
590,
747,
688,
747,
688,
778,
590,
778
],
"score": 0.91,
"latex": "+24.4\\%"
},
{
"category_id": 13,
"poly": [
1433,
855,
1492,
855,
1492,
886,
1433,
886
],
"score": 0.86,
"latex": "30\\%"
},
{
"category_id": 13,
"poly": [
238,
689,
264,
689,
264,
717,
238,
717
],
"score": 0.34,
"latex": "@"
},
{
"category_id": 13,
"poly": [
702,
1002,
722,
1002,
722,
1026,
702,
1026
],
"score": 0.33,
"latex": "^+"
},
{
"category_id": 13,
"poly": [
177,
1154,
223,
1154,
223,
1185,
177,
1185
],
"score": 0.28,
"latex": "(\\%)"
}
],
"page_info": {
"page_no": 0,
"height": 2339,
"width": 1654
}
},
{
"layout_dets": [
{
"category_id": 2,
"poly": [
88.00835418701172,
31.891786575317383,
300.7422180175781,
31.891786575317383,
300.7422180175781,
113.60026550292969,
88.00835418701172,
113.60026550292969
],
"score": 0.9999986886978149
},
{
"category_id": 2,
"poly": [
771.0192260742188,
2213.478759765625,
827.4277954101562,
2213.478759765625,
827.4277954101562,
2239.4013671875,
771.0192260742188,
2239.4013671875
],
"score": 0.9999961853027344
},
{
"category_id": 7,
"poly": [
544.297119140625,
488.5483703613281,
988.39990234375,
488.5483703613281,
988.39990234375,
541.063232421875,
544.297119140625,
541.063232421875
],
"score": 0.9999918341636658
},
{
"category_id": 2,
"poly": [
1082.88330078125,
82.37212371826172,
1519.426513671875,
82.37212371826172,
1519.426513671875,
114.92091369628906,
1082.88330078125,
114.92091369628906
],
"score": 0.9999634623527527
},
{
"category_id": 2,
"poly": [
1009.1594848632812,
2210.946533203125,
1535.924560546875,
2210.946533203125,
1535.924560546875,
2241.8310546875,
1009.1594848632812,
2241.8310546875
],
"score": 0.9999324679374695
},
{
"category_id": 5,
"poly": [
537.3482666015625,
156.8837432861328,
1584.9873046875,
156.8837432861328,
1584.9873046875,
485.2989501953125,
537.3482666015625,
485.2989501953125
],
"score": 0.9985944628715515
},
{
"category_id": 7,
"poly": [
62.69691848754883,
443.4039611816406,
249.91006469726562,
443.4039611816406,
249.91006469726562,
467.46136474609375,
62.69691848754883,
467.46136474609375
],
"score": 0.9873790740966797
},
{
"category_id": 5,
"poly": [
61.37367248535156,
138.51014709472656,
528.3062744140625,
138.51014709472656,
528.3062744140625,
443.5386962890625,
61.37367248535156,
443.5386962890625
],
"score": 0.9232067465782166
},
{
"category_id": 6,
"poly": [
548.1131591796875,
148.73146057128906,
797.3046875,
148.73146057128906,
797.3046875,
180.74632263183594,
548.1131591796875,
180.74632263183594
],
"score": 0.6074692606925964
},
{
"category_id": 13,
"poly": [
864,
455,
922,
455,
922,
482,
864,
482
],
"score": 0.74,
"latex": "6.0\\%"
},
{
"category_id": 13,
"poly": [
850,
418,
922,
418,
922,
445,
850,
445
],
"score": 0.64,
"latex": "35.3\\%"
},
{
"category_id": 13,
"poly": [
1501,
270,
1571,
270,
1571,
298,
1501,
298
],
"score": 0.54,
"latex": "13.8\\%"
},
{
"category_id": 13,
"poly": [
1013,
454,
1083,
454,
1083,
482,
1013,
482
],
"score": 0.52,
"latex": "15.0\\%"
},
{
"category_id": 13,
"poly": [
1012,
417,
1083,
417,
1083,
444,
1012,
444
],
"score": 0.52,
"latex": "33.7\\%"
},
{
"category_id": 13,
"poly": [
689,
456,
725,
456,
725,
482,
689,
482
],
"score": 0.48,
"latex": "(\\%)"
},
{
"category_id": 13,
"poly": [
850,
344,
922,
344,
922,
372,
850,
372
],
"score": 0.4,
"latex": "83.8\\%"
},
{
"category_id": 13,
"poly": [
863,
270,
922,
270,
922,
298,
863,
298
],
"score": 0.4,
"latex": "4.5\\%"
},
{
"category_id": 13,
"poly": [
1334,
270,
1406,
270,
1406,
298,
1334,
298
],
"score": 0.35,
"latex": "37.2\\%"
},
{
"category_id": 13,
"poly": [
618,
419,
656,
419,
656,
446,
618,
446
],
"score": 0.35,
"latex": "(\\%)"
}
],
"page_info": {
"page_no": 1,
"height": 2339,
"width": 1654
}
},
{
"layout_dets": [
{
"category_id": 2,
"poly": [
87.90370178222656,
31.597869873046875,
300.9918518066406,
31.597869873046875,
300.9918518066406,
113.40574645996094,
87.90370178222656,
113.40574645996094
],
"score": 0.9999939799308777
},
{
"category_id": 2,
"poly": [
1008.9932250976562,
2209.250732421875,
1534.93310546875,
2209.250732421875,
1534.93310546875,
2242.773193359375,
1008.9932250976562,
2242.773193359375
],
"score": 0.9999377727508545
},
{
"category_id": 2,
"poly": [
770.6605224609375,
2212.857666015625,
827.4124145507812,
2212.857666015625,
827.4124145507812,
2239.771484375,
770.6605224609375,
2239.771484375
],
"score": 0.9998394250869751
},
{
"category_id": 2,
"poly": [
1082.0982666015625,
82.25032043457031,
1518.9271240234375,
82.25032043457031,
1518.9271240234375,
114.52558898925781,
1082.0982666015625,
114.52558898925781
],
"score": 0.9996459484100342
},
{
"category_id": 7,
"poly": [
95.3975601196289,
1846.637939453125,
564.4164428710938,
1846.637939453125,
564.4164428710938,
1899.2098388671875,
95.3975601196289,
1899.2098388671875
],
"score": 0.9908689260482788
},
{
"category_id": 6,
"poly": [
95.46688842773438,
173.42837524414062,
470.2196960449219,
173.42837524414062,
470.2196960449219,
217.74642944335938,
95.46688842773438,
217.74642944335938
],
"score": 0.9438199400901794
},
{
"category_id": 5,
"poly": [
854.114501953125,
1043.93505859375,
1592.0174560546875,
1043.93505859375,
1592.0174560546875,
1846.166015625,
854.114501953125,
1846.166015625
],
"score": 0.884392499923706
},
{
"category_id": 5,
"poly": [
92.02899169921875,
1331.891845703125,
814.2921752929688,
1331.891845703125,
814.2921752929688,
1842.61962890625,
92.02899169921875,
1842.61962890625
],
"score": 0.8743516206741333
},
{
"category_id": 5,
"poly": [
851.83984375,
224.9954833984375,
1592.4066162109375,
224.9954833984375,
1592.4066162109375,
1018.7108154296875,
851.83984375,
1018.7108154296875
],
"score": 0.8650234937667847
},
{
"category_id": 5,
"poly": [
91.79834747314453,
224.1070556640625,
816.58203125,
224.1070556640625,
816.58203125,
1248.4244384765625,
91.79834747314453,
1248.4244384765625
],
"score": 0.8604705333709717
},
{
"category_id": 5,
"poly": [
85.1959228515625,
220.71908569335938,
1602.307373046875,
220.71908569335938,
1602.307373046875,
1844.490234375,
85.1959228515625,
1844.490234375
],
"score": 0.6637970209121704
},
{
"category_id": 13,
"poly": [
737,
704,
804,
704,
804,
730,
737,
730
],
"score": 0.56,
"latex": "\\pmb{26.5\\%}"
},
{
"category_id": 13,
"poly": [
738,
673,
804,
673,
804,
699,
738,
699
],
"score": 0.48,
"latex": "\\pmb{16.2\\%}"
},
{
"category_id": 13,
"poly": [
736,
767,
805,
767,
805,
795,
736,
795
],
"score": 0.48,
"latex": "\\mathbf{\\lambda_{23.7\\%}}"
},
{
"category_id": 13,
"poly": [
231,
611,
268,
611,
268,
638,
231,
638
],
"score": 0.47,
"latex": "(\\%)"
},
{
"category_id": 13,
"poly": [
749,
736,
804,
736,
804,
763,
749,
763
],
"score": 0.41,
"latex": "\\pmb{9.2\\%}"
},
{
"category_id": 13,
"poly": [
737,
641,
804,
641,
804,
668,
737,
668
],
"score": 0.41,
"latex": "{\\bf38.0\\%}"
},
{
"category_id": 13,
"poly": [
748,
577,
805,
577,
805,
606,
748,
606
],
"score": 0.35,
"latex": "0.1\\%"
},
{
"category_id": 13,
"poly": [
187,
800,
222,
800,
222,
827,
187,
827
],
"score": 0.32,
"latex": "(\\%)"
},
{
"category_id": 13,
"poly": [
738,
830,
805,
830,
805,
857,
738,
857
],
"score": 0.28,
"latex": "\\mathbf{13.8\\%}"
},
{
"category_id": 13,
"poly": [
737,
862,
805,
862,
805,
889,
737,
889
],
"score": 0.27,
"latex": "\\mathbf{31.9\\%}"
},
{
"category_id": 13,
"poly": [
736,
955,
804,
955,
804,
983,
736,
983
],
"score": 0.26,
"latex": "\\pmb{65.3\\%}"
}
],
"page_info": {
"page_no": 2,
"height": 2339,
"width": 1654
}
},
{
"layout_dets": [
{
"category_id": 2,
"poly": [
86.30094909667969,
32.05949783325195,
303.6516418457031,
32.05949783325195,
303.6516418457031,
114.77470397949219,
86.30094909667969,
114.77470397949219
],
"score": 0.9999954104423523
},
{
"category_id": 1,
"poly": [
108.4946060180664,
590.2034912109375,
1536.75146484375,
590.2034912109375,
1536.75146484375,
688.491455078125,
108.4946060180664,
688.491455078125
],
"score": 0.9999933242797852
},
{
"category_id": 0,
"poly": [
95.94879913330078,
1205.413818359375,
252.92385864257812,
1205.413818359375,
252.92385864257812,
1246.00146484375,
95.94879913330078,
1246.00146484375
],
"score": 0.9999929666519165
},
{
"category_id": 1,
"poly": [
106.48407745361328,
338.2734680175781,
1568.8638916015625,
338.2734680175781,
1568.8638916015625,
437.8475341796875,
106.48407745361328,
437.8475341796875
],
"score": 0.9999896883964539
},
{
"category_id": 2,
"poly": [
767.6920776367188,
2212.26904296875,
830.787353515625,
2212.26904296875,
830.787353515625,
2239.28466796875,
767.6920776367188,
2239.28466796875
],
"score": 0.9999850988388062
},
{
"category_id": 0,
"poly": [
96.18524932861328,
508.3636474609375,
291.44244384765625,
508.3636474609375,
291.44244384765625,
549.4661254882812,
96.18524932861328,
549.4661254882812
],
"score": 0.9999837875366211
},
{
"category_id": 2,
"poly": [
1082.2711181640625,
81.18756103515625,
1520.2156982421875,
81.18756103515625,
1520.2156982421875,
116.55754089355469,
1082.2711181640625,
116.55754089355469
],
"score": 0.99994957447052
},
{
"category_id": 0,
"poly": [
96.45137786865234,
157.9286346435547,
319.2138671875,
157.9286346435547,
319.2138671875,
213.84323120117188,
96.45137786865234,
213.84323120117188
],
"score": 0.9999274611473083
},
{
"category_id": 0,
"poly": [
96.99203491210938,
257.65087890625,
483.64617919921875,
257.65087890625,
483.64617919921875,
301.5384216308594,
96.99203491210938,
301.5384216308594
],
"score": 0.999910295009613
},
{
"category_id": 2,
"poly": [
1008.87890625,
2208.611328125,
1536.04736328125,
2208.611328125,
1536.04736328125,
2243.415283203125,
1008.87890625,
2243.415283203125
],
"score": 0.999893069267273
},
{
"category_id": 1,
"poly": [
108.4665298461914,
1288.0936279296875,
1546.7523193359375,
1288.0936279296875,
1546.7523193359375,
1383.8436279296875,
108.4665298461914,
1383.8436279296875
],
"score": 0.9997895956039429
},
{
"category_id": 1,
"poly": [
107.81368255615234,
1678.247802734375,
1227.883056640625,
1678.247802734375,
1227.883056640625,
1711.3719482421875,
107.81368255615234,
1711.3719482421875
],
"score": 0.999572217464447
},
{
"category_id": 5,
"poly": [
109.7546157836914,
810.016357421875,
1579.9564208984375,
810.016357421875,
1579.9564208984375,
1171.63818359375,
109.7546157836914,
1171.63818359375
],
"score": 0.999454140663147
},
{
"category_id": 1,
"poly": [
106.4626235961914,
1548.298828125,
1540.339111328125,
1548.298828125,
1540.339111328125,
1676.6796875,
106.4626235961914,
1676.6796875
],
"score": 0.9886388778686523
},
{
"category_id": 1,
"poly": [
107.5276107788086,
1386.3994140625,
1540.8876953125,
1386.3994140625,
1540.8876953125,
1447.81298828125,
107.5276107788086,
1447.81298828125
],
"score": 0.9709202647209167
},
{
"category_id": 1,
"poly": [
107.66427612304688,
1451.8365478515625,
1537.991943359375,
1451.8365478515625,
1537.991943359375,
1546.6905517578125,
107.66427612304688,
1546.6905517578125
],
"score": 0.9589993953704834
},
{
"category_id": 6,
"poly": [
95.90386199951172,
728.28564453125,
328.19708251953125,
728.28564453125,
328.19708251953125,
768.121826171875,
95.90386199951172,
768.121826171875
],
"score": 0.6999472379684448
},
{
"category_id": 1,
"poly": [
106.67626953125,
1371.860595703125,
1544.8497314453125,
1371.860595703125,
1544.8497314453125,
1678.673095703125,
106.67626953125,
1678.673095703125
],
"score": 0.5646986961364746
},
{
"category_id": 0,
"poly": [
95.94149780273438,
728.2644653320312,
328.195068359375,
728.2644653320312,
328.195068359375,
768.1664428710938,
95.94149780273438,
768.1664428710938
],
"score": 0.30706164240837097
},
{
"category_id": 13,
"poly": [
1247,
887,
1353,
887,
1353,
914,
1247,
914
],
"score": 0.91,
"latex": "5\\%{\\sim}20\\%"
},
{
"category_id": 13,
"poly": [
1181,
923,
1290,
923,
1290,
950,
1181,
950
],
"score": 0.9,
"latex": "-5\\%{+}5\\%"
},
{
"category_id": 13,
"poly": [
1416,
1047,
1469,
1047,
1469,
1077,
1416,
1077
],
"score": 0.87,
"latex": "10\\%"
},
{
"category_id": 13,
"poly": [
1254,
963,
1296,
963,
1296,
991,
1254,
991
],
"score": 0.86,
"latex": "5\\%"
},
{
"category_id": 13,
"poly": [
1373,
1003,
1428,
1003,
1428,
1032,
1373,
1032
],
"score": 0.86,
"latex": "10\\%"
},
{
"category_id": 13,
"poly": [
1332,
1047,
1388,
1047,
1388,
1076,
1332,
1076
],
"score": 0.86,
"latex": "\\cdot10\\%"
},
{
"category_id": 13,
"poly": [
1373,
1112,
1428,
1112,
1428,
1141,
1373,
1141
],
"score": 0.85,
"latex": "10\\%"
},
{
"category_id": 13,
"poly": [
1248,
854,
1302,
854,
1302,
880,
1248,
880
],
"score": 0.85,
"latex": "z0\\%"
}
],
"page_info": {
"page_no": 3,
"height": 2339,
"width": 1654
}
}
]
\ No newline at end of file
......@@ -37,9 +37,9 @@ class TestBench():
now_simscore = now_score["average_sim_score"]
now_editdistance = now_score["average_edit_distance"]
now_bleu = now_score["average_bleu_score"]
#assert last_simscore <= now_simscore
#assert last_editdistance <= now_editdistance
#assert last_bleu <= now_bleu
assert last_simscore <= now_simscore
assert last_editdistance <= now_editdistance
assert last_bleu <= now_bleu
def get_score():
......
import pytest
import os
from conf import conf
import os
import json
from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
from lib import common
pdf_res_path = conf.conf["pdf_res_path"]
code_path = conf.conf["code_path"]
pdf_dev_path = conf.conf["pdf_dev_path"]
class TestCli:
"""
test cli
"""
def test_pdf_sdk(self):
"""
pdf sdk 方式解析
"""
demo_names = list()
pdf_path = os.path.join(pdf_dev_path, "pdf")
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
model_path = os.path.join(pdf_dev_path, f"{demo_name}_model.json")
pdf_path = os.path.join(pdf_dev_path, "pdf", f"{demo_name}.pdf")
pdf_bytes = open(pdf_path, "rb").read()
model_json = json.loads(open(model_path, "r", encoding="utf-8").read())
image_writer = DiskReaderWriter(pdf_dev_path)
image_dir = str(os.path.basename(pdf_dev_path))
jso_useful_key = {"_pdf_type": "", "model_list": model_json}
pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
pipe.pipe_classify()
pipe.pipe_parse()
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
dir_path = os.path.join(pdf_dev_path, "mineru")
if not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
res_path = os.path.join(dir_path, f"{demo_name}.md")
with open(res_path, "w+", encoding="utf-8") as f:
f.write(md_content)
common.count_folders_and_check_contents(res_path)
# def test_pdf_specify_jsonl(self):
# """
# 输入jsonl, 默认方式解析
# """
# cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972'" % (code_path)
# logging.info(cmd)
# common.check_shell(cmd)
# #common.count_folders_and_check_contents(pdf_res_path)
# def test_pdf_specify_jsonl_txt(self):
# """
# 输入jsonl, txt方式解析
# """
# cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method txt" % (code_path)
# logging.info(cmd)
# common.check_shell(cmd)
# #common.count_folders_and_check_contents(pdf_res_path)
#
# def test_pdf_specify_jsonl_ocr(self):
# """
# 输入jsonl, ocr方式解析
# """
# cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method ocr" % (code_path)
# logging.info(cmd)
# common.check_shell(cmd)
# #common.count_folders_and_check_contents(pdf_res_path)
if __name__ == "__main__":
pytest.main()
"""test cli and sdk."""
import logging
import os
import pytest
from conf import conf
from lib import common
import magic_pdf.model as model_config
from magic_pdf.pipe.UNIPipe import UNIPipe
from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
model_config.__use_inside_model__ = True
pdf_res_path = conf.conf['pdf_res_path']
code_path = conf.conf['code_path']
pdf_dev_path = conf.conf['pdf_dev_path']
class TestCli:
"""test cli."""
@pytest.mark.P0
def test_pdf_auto_sdk(self):
"""pdf sdk auto test."""
demo_names = list()
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
print(pdf_path)
pdf_bytes = open(pdf_path, 'rb').read()
local_image_dir = os.path.join(pdf_dev_path, 'pdf', 'images')
image_dir = str(os.path.basename(local_image_dir))
image_writer = DiskReaderWriter(local_image_dir)
model_json = list()
jso_useful_key = {'_pdf_type': '', 'model_list': model_json}
pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
pipe.pipe_classify()
if len(model_json) == 0:
if model_config.__use_inside_model__:
pipe.pipe_analyze()
else:
exit(1)
pipe.pipe_parse()
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode='none')
dir_path = os.path.join(pdf_dev_path, 'mineru')
if not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
res_path = os.path.join(dir_path, f'{demo_name}.md')
common.delete_file(res_path)
with open(res_path, 'w+', encoding='utf-8') as f:
f.write(md_content)
common.sdk_count_folders_and_check_contents(res_path)
@pytest.mark.P0
def test_pdf_ocr_sdk(self):
"""pdf sdk ocr test."""
demo_names = list()
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
print(pdf_path)
pdf_bytes = open(pdf_path, 'rb').read()
local_image_dir = os.path.join(pdf_dev_path, 'pdf', 'images')
image_dir = str(os.path.basename(local_image_dir))
image_writer = DiskReaderWriter(local_image_dir)
model_json = list()
jso_useful_key = {'_pdf_type': 'ocr', 'model_list': model_json}
pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
pipe.pipe_classify()
if len(model_json) == 0:
if model_config.__use_inside_model__:
pipe.pipe_analyze()
else:
exit(1)
pipe.pipe_parse()
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode='none')
dir_path = os.path.join(pdf_dev_path, 'mineru')
if not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
res_path = os.path.join(dir_path, f'{demo_name}.md')
common.delete_file(res_path)
with open(res_path, 'w+', encoding='utf-8') as f:
f.write(md_content)
common.sdk_count_folders_and_check_contents(res_path)
@pytest.mark.P0
def test_pdf_txt_sdk(self):
"""pdf sdk txt test."""
demo_names = list()
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
pdf_path = os.path.join(pdf_dev_path, 'pdf', f'{demo_name}.pdf')
print(pdf_path)
pdf_bytes = open(pdf_path, 'rb').read()
local_image_dir = os.path.join(pdf_dev_path, 'pdf', 'images')
image_dir = str(os.path.basename(local_image_dir))
image_writer = DiskReaderWriter(local_image_dir)
model_json = list()
jso_useful_key = {'_pdf_type': 'txt', 'model_list': model_json}
pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
pipe.pipe_classify()
if len(model_json) == 0:
if model_config.__use_inside_model__:
pipe.pipe_analyze()
else:
exit(1)
pipe.pipe_parse()
md_content = pipe.pipe_mk_markdown(image_dir, drop_mode='none')
dir_path = os.path.join(pdf_dev_path, 'mineru')
if not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
res_path = os.path.join(dir_path, f'{demo_name}.md')
common.delete_file(res_path)
with open(res_path, 'w+', encoding='utf-8') as f:
f.write(md_content)
common.sdk_count_folders_and_check_contents(res_path)
@pytest.mark.P0
def test_pdf_cli_auto(self):
"""magic_pdf cli test auto."""
demo_names = []
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
res_path = os.path.join(pdf_dev_path, 'mineru')
common.delete_file(res_path)
cmd = 'magic-pdf -p %s -o %s -m %s' % (os.path.join(
pdf_path, f'{demo_name}.pdf'), res_path, 'auto')
logging.info(cmd)
os.system(cmd)
common.cli_count_folders_and_check_contents(
os.path.join(res_path, demo_name, 'auto'))
@pytest.mark.P0
def test_pdf_clit_txt(self):
"""magic_pdf cli test txt."""
demo_names = []
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
res_path = os.path.join(pdf_dev_path, 'mineru')
common.delete_file(res_path)
cmd = 'magic-pdf -p %s -o %s -m %s' % (os.path.join(
pdf_path, f'{demo_name}.pdf'), res_path, 'txt')
logging.info(cmd)
os.system(cmd)
common.cli_count_folders_and_check_contents(
os.path.join(res_path, demo_name, 'txt'))
@pytest.mark.P0
def test_pdf_clit_ocr(self):
"""magic_pdf cli test ocr."""
demo_names = []
pdf_path = os.path.join(pdf_dev_path, 'pdf')
for pdf_file in os.listdir(pdf_path):
if pdf_file.endswith('.pdf'):
demo_names.append(pdf_file.split('.')[0])
for demo_name in demo_names:
res_path = os.path.join(pdf_dev_path, 'mineru')
common.delete_file(res_path)
cmd = 'magic-pdf -p %s -o %s -m %s' % (os.path.join(
pdf_path, f'{demo_name}.pdf'), res_path, 'ocr')
logging.info(cmd)
os.system(cmd)
common.cli_count_folders_and_check_contents(
os.path.join(res_path, demo_name, 'ocr'))
if __name__ == '__main__':
pytest.main()
dependent on the service headway and the reliability of the departure time of the service to which passengers are incident.
After briefly introducing the random incidence model, which is often assumed to hold at short headways, the balance of this section reviews six studies of passenger incidence behavior that are moti- vated by understanding the relationships between service headway, service reliability, passenger incidence behavior, and passenger waiting time in a more nuanced fashion than is embedded in the random incidence assumption ( 2 ). Three of these studies depend on manually collected data, two studies use data from AFC systems, and one study analyzes the issue purely theoretically. These studies reveal much about passenger incidence behavior, but all are found to be limited in their general applicability by the methods with which they collect information about passengers and the services those passengers intend to use.
# Random Passenger Incidence Behavior
One characterization of passenger incidence behavior is that of ran- dom incidence ( 3 ). The key assumption underlying the random inci- dence model is that the process of passenger arrivals to the public transport service is independent from the vehicle departure process of the service. This implies that passengers become incident to the service at a random time, and thus the instantaneous rate of passen- ger arrivals to the service is uniform over a given period of time. Let $W$ and $H$ be random variables representing passenger waiting times and service headways, respectively. Under the random incidence assumption and the assumption that vehicle capacity is not a binding constraint, a classic result of transportation science is that
$$
E!\\left(W\\right)!=!\\frac{E!\\left\[H^{2}\\right\]}{2E!\\left\[H\\right\]}!=!\\frac{E!\\left\[H\\right\]}{2}!!\\left(1!+!\\operatorname{CV}!\\left(H\\right)^{2}\\right)
$$
where $E\[X\]$ is the probabilistic expectation of some random variable $X$ and $\\operatorname{CV}(H)$ is the coefficient of variation of $H$ , a unitless measure of the variability of $H$ defined as
$$
\\mathbf{CV}\\big(H\\big)!=!\\frac{\\boldsymbol{\\upsigma}\_{H}}{E\\big\[H\\big\]}
$$
where $\\upsigma\_{H}$ is the standard deviation of $H\\left(4\\right)$ . The second expression in Equation 1 is particularly useful because it expresses the mean passenger waiting time as the sum of two components: the waiting time caused by the mean headway (i.e., the reciprocal of service fre- quency) and the waiting time caused by the variability of the head- ways (which is one measure of service reliability). When the service is perfectly reliable with constant headways, the mean ­ waiting time will be simply half the headway.
# More Behaviorally Realistic Incidence Models
Jolliffe and Hutchinson studied bus passenger incidence in South London suburbs ( 5 ). They observed 10 bus stops for $^{1\\mathrm{~h~}}$ per day over 8 days, recording the times of passenger incidence and actual and scheduled bus departures. They limited their stop selection to those served by only a single bus route with a single service pat- tern so as to avoid ambiguity about which service a passenger was waiting for. The authors found that the actual average passenger waiting time was $30%$ less than predicted by the random incidence model. They also found that the empirical distributions of passenger incidence times (by time of day) had peaks just before the respec- tive average bus departure times. They hypothesized the existence of three classes of passengers: with proportion $q$ , passengers whose time of incidence is causally coincident with that of a bus departure (e.g., because they saw the approaching bus from their home or a shop window); with proportion $p(1-q)$ , passengers who time their arrivals to minimize expected waiting time; and with proportion $(1-p)(1-q)$ , passengers who are randomly incident. The authors found that $p$ was positively correlated with the potential reduction in waiting time (compared with arriving randomly) that resulted from knowledge of the timetable and of service reliability. They also found $p$ to be higher in the peak commuting periods rather than in the off-peak periods, indicating more awareness of the timetable or historical reliability, or both, by commuters.
Bowman and Turnquist built on the concept of aware and unaware passengers of proportions $p$ and $(1-p)$ , respectively. They proposed a utility-based model to estimate $p$ and the distribution of incidence times, and thus the mean waiting time, of aware passengers over a given headway as a function of the headway and reliability of bus departure times $(l)$ . They observed seven bus stops in Chicago, Illinois, each served by a single (different) bus route, between 6:00 and $8{\\cdot}00;\\mathrm{a.m}$ . for 5 to 10 days each. The bus routes had headways of 5 to $20~\\mathrm{min}$ and a range of reliabilities. The authors found that actual average waiting time was substantially less than predicted by the random incidence model. They estimated that $p$ was not statistically significantly different from 1.0, which they explain by the fact that all observations were taken during peak commuting times. Their model predicts that the longer the headway and the more reliable the departures, the more peaked the distribution of incidence times will be and the closer that peak will be to the next scheduled departure time. This prediction demonstrates what they refer to as a safety margin that passengers add to reduce the chance of missing their bus when the service is known to be somewhat unreliable. Such a safety margin can also result from unreliability in passengers’ journeys to the public transport stop or station. Bowman and ­ Turnquist conclude from their model that the random incidence model underestimates the waiting time benefits of improving reli- ability and overestimates the waiting time benefits of increasing ser- vice frequency. This is because as reliability increases passengers can better predict departure times and so can time their incidence to decrease their waiting time.
Furth and Muller study the issue in a theoretical context and gener- ally agree with the above findings ( 2 ). They are primarily concerned with the use of data from automatic vehicle-tracking systems to assess the impacts of reliability on passenger incidence behavior and wait- ing times. They propose that passengers will react to unreliability by departing earlier than they would with reliable services. Randomly incident unaware passengers will experience unreliability as a more dispersed distribution of headways and simply allocate additional time to their trip plan to improve the chance of arriving at their des- tination on time. Aware passengers, whose incidence is not entirely random, will react by timing their incidence somewhat earlier than the scheduled departure time to increase their chance of catching the desired service. The authors characterize these ­ reactions as the costs of unreliability.
Luethi et al. continued with the analysis of manually collected data on actual passenger behavior ( 6 ). They use the language of probability to describe two classes of passengers. The first is timetable-dependent passengers (i.e., the aware passengers), whose incidence behavior is affected by awareness (possibly gained
This source diff could not be displayed because it is too large. You can view the blob instead.
[
{
"layout_dets": [
{
"category_id": 1,
"poly": [
882.4013061523438,
169.93817138671875,
1552.350341796875,
169.93817138671875,
1552.350341796875,
625.8263549804688,
882.4013061523438,
625.8263549804688
],
"score": 0.999992311000824
},
{
"category_id": 1,
"poly": [
882.474853515625,
1450.92822265625,
1551.4490966796875,
1450.92822265625,
1551.4490966796875,
1877.5712890625,
882.474853515625,
1877.5712890625
],
"score": 0.9999903440475464
},
{
"category_id": 1,
"poly": [
881.6513061523438,
626.2058715820312,
1552.1400146484375,
626.2058715820312,
1552.1400146484375,
1450.604736328125,
881.6513061523438,
1450.604736328125
],
"score": 0.9999856352806091
},
{
"category_id": 1,
"poly": [
149.41075134277344,
232.1595001220703,
819.0465087890625,
232.1595001220703,
819.0465087890625,
625.8865356445312,
149.41075134277344,
625.8865356445312
],
"score": 0.99998539686203
},
{
"category_id": 1,
"poly": [
149.3945770263672,
1215.5172119140625,
817.8850708007812,
1215.5172119140625,
817.8850708007812,
1304.873291015625,
149.3945770263672,
1304.873291015625
],
"score": 0.9999765157699585
},
{
"category_id": 1,
"poly": [
882.6979370117188,
1880.13916015625,
1552.15185546875,
1880.13916015625,
1552.15185546875,
2031.339599609375,
882.6979370117188,
2031.339599609375
],
"score": 0.9999744892120361
},
{
"category_id": 1,
"poly": [
148.96054077148438,
743.3055419921875,
818.6231689453125,
743.3055419921875,
818.6231689453125,
1074.2369384765625,
148.96054077148438,
1074.2369384765625
],
"score": 0.9999669790267944
},
{
"category_id": 1,
"poly": [
148.8435516357422,
1791.14306640625,
818.6885375976562,
1791.14306640625,
818.6885375976562,
2030.794189453125,
148.8435516357422,
2030.794189453125
],
"score": 0.9999618530273438
},
{
"category_id": 0,
"poly": [
150.7009735107422,
684.0087890625,
623.5106201171875,
684.0087890625,
623.5106201171875,
717.03662109375,
150.7009735107422,
717.03662109375
],
"score": 0.9999415278434753
},
{
"category_id": 8,
"poly": [
146.48068237304688,
1331.6737060546875,
317.2640075683594,
1331.6737060546875,
317.2640075683594,
1400.1722412109375,
146.48068237304688,
1400.1722412109375
],
"score": 0.9998958110809326
},
{
"category_id": 1,
"poly": [
149.42420959472656,
1430.8782958984375,
818.9042358398438,
1430.8782958984375,
818.9042358398438,
1672.7386474609375,
149.42420959472656,
1672.7386474609375
],
"score": 0.9998599290847778
},
{
"category_id": 1,
"poly": [
149.18746948242188,
172.10252380371094,
818.5662231445312,
172.10252380371094,
818.5662231445312,
230.4594268798828,
149.18746948242188,
230.4594268798828
],
"score": 0.9997718334197998
},
{
"category_id": 0,
"poly": [
149.0175018310547,
1732.1090087890625,
702.1005859375,
1732.1090087890625,
702.1005859375,
1763.6046142578125,
149.0175018310547,
1763.6046142578125
],
"score": 0.9997085928916931
},
{
"category_id": 2,
"poly": [
1519.802490234375,
98.59099578857422,
1551.985107421875,
98.59099578857422,
1551.985107421875,
119.48420715332031,
1519.802490234375,
119.48420715332031
],
"score": 0.9995552897453308
},
{
"category_id": 8,
"poly": [
146.9109649658203,
1100.156494140625,
544.2803344726562,
1100.156494140625,
544.2803344726562,
1184.929443359375,
146.9109649658203,
1184.929443359375
],
"score": 0.9995207786560059
},
{
"category_id": 2,
"poly": [
148.11611938476562,
99.87767791748047,
318.926025390625,
99.87767791748047,
318.926025390625,
120.70393371582031,
148.11611938476562,
120.70393371582031
],
"score": 0.999351441860199
},
{
"category_id": 9,
"poly": [
791.7642211914062,
1130.056396484375,
818.6940307617188,
1130.056396484375,
818.6940307617188,
1161.1080322265625,
791.7642211914062,
1161.1080322265625
],
"score": 0.9908884763717651
},
{
"category_id": 9,
"poly": [
788.37060546875,
1346.8450927734375,
818.5010986328125,
1346.8450927734375,
818.5010986328125,
1377.370361328125,
788.37060546875,
1377.370361328125
],
"score": 0.9873985052108765
},
{
"category_id": 14,
"poly": [
146,
1103,
543,
1103,
543,
1184,
146,
1184
],
"score": 0.94,
"latex": "E\\!\\left(W\\right)\\!=\\!\\frac{E\\!\\left[H^{2}\\right]}{2E\\!\\left[H\\right]}\\!=\\!\\frac{E\\!\\left[H\\right]}{2}\\!\\!\\left(1\\!+\\!\\operatorname{CV}\\!\\left(H\\right)^{2}\\right)"
},
{
"category_id": 13,
"poly": [
1196,
354,
1278,
354,
1278,
384,
1196,
384
],
"score": 0.91,
"latex": "p(1-q)"
},
{
"category_id": 13,
"poly": [
881,
415,
1020,
415,
1020,
444,
881,
444
],
"score": 0.91,
"latex": "(1-p)(1-q)"
},
{
"category_id": 14,
"poly": [
147,
1333,
318,
1333,
318,
1400,
147,
1400
],
"score": 0.91,
"latex": "\\mathbf{CV}\\big(H\\big)\\!=\\!\\frac{\\boldsymbol{\\upsigma}_{H}}{E\\big[H\\big]}"
},
{
"category_id": 13,
"poly": [
1197,
657,
1263,
657,
1263,
686,
1197,
686
],
"score": 0.9,
"latex": "(1-p)"
},
{
"category_id": 13,
"poly": [
213,
1217,
263,
1217,
263,
1244,
213,
1244
],
"score": 0.88,
"latex": "E[X]"
},
{
"category_id": 13,
"poly": [
214,
1434,
245,
1434,
245,
1459,
214,
1459
],
"score": 0.87,
"latex": "\\upsigma_{H}"
},
{
"category_id": 13,
"poly": [
324,
2002,
373,
2002,
373,
2028,
324,
2028
],
"score": 0.84,
"latex": "30\\%"
},
{
"category_id": 13,
"poly": [
1209,
693,
1225,
693,
1225,
717,
1209,
717
],
"score": 0.83,
"latex": "p"
},
{
"category_id": 13,
"poly": [
990,
449,
1007,
449,
1007,
474,
990,
474
],
"score": 0.81,
"latex": "p"
},
{
"category_id": 13,
"poly": [
346,
1277,
369,
1277,
369,
1301,
346,
1301
],
"score": 0.81,
"latex": "H"
},
{
"category_id": 13,
"poly": [
1137,
661,
1154,
661,
1154,
686,
1137,
686
],
"score": 0.81,
"latex": "p"
},
{
"category_id": 13,
"poly": [
522,
1432,
579,
1432,
579,
1459,
522,
1459
],
"score": 0.81,
"latex": "H\\left(4\\right)"
},
{
"category_id": 13,
"poly": [
944,
540,
962,
540,
962,
565,
944,
565
],
"score": 0.8,
"latex": "p"
},
{
"category_id": 13,
"poly": [
1444,
936,
1461,
936,
1461,
961,
1444,
961
],
"score": 0.79,
"latex": "p"
},
{
"category_id": 13,
"poly": [
602,
1247,
624,
1247,
624,
1270,
602,
1270
],
"score": 0.78,
"latex": "H"
},
{
"category_id": 13,
"poly": [
147,
1247,
167,
1247,
167,
1271,
147,
1271
],
"score": 0.77,
"latex": "X"
},
{
"category_id": 13,
"poly": [
210,
1246,
282,
1246,
282,
1274,
210,
1274
],
"score": 0.77,
"latex": "\\operatorname{CV}(H)"
},
{
"category_id": 13,
"poly": [
1346,
268,
1361,
268,
1361,
292,
1346,
292
],
"score": 0.76,
"latex": "q"
},
{
"category_id": 13,
"poly": [
215,
957,
238,
957,
238,
981,
215,
981
],
"score": 0.74,
"latex": "H"
},
{
"category_id": 13,
"poly": [
149,
956,
173,
956,
173,
981,
149,
981
],
"score": 0.63,
"latex": "W"
},
{
"category_id": 13,
"poly": [
924,
841,
1016,
841,
1016,
868,
924,
868
],
"score": 0.56,
"latex": "8{\\cdot}00\\;\\mathrm{a.m}"
},
{
"category_id": 13,
"poly": [
956,
871,
1032,
871,
1032,
898,
956,
898
],
"score": 0.43,
"latex": "20~\\mathrm{min}"
},
{
"category_id": 13,
"poly": [
1082,
781,
1112,
781,
1112,
808,
1082,
808
],
"score": 0.41,
"latex": "(l)"
},
{
"category_id": 13,
"poly": [
697,
1821,
734,
1821,
734,
1847,
697,
1847
],
"score": 0.3,
"latex": "^{1\\mathrm{~h~}}"
}
],
"page_info": {
"page_no": 0,
"height": 2200,
"width": 1700
}
}
]
import tempfile
import os
import shutil
import tempfile
from click.testing import CliRunner
from magic_pdf.tools.cli import cli
......@@ -9,20 +8,19 @@ from magic_pdf.tools.cli import cli
def test_cli_pdf():
# setup
unitest_dir = '/tmp/magic_pdf/unittest/tools'
filename = 'cli_test_01'
unitest_dir = "/tmp/magic_pdf/unittest/tools"
filename = "cli_test_01"
os.makedirs(unitest_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
os.makedirs(temp_output_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
# run
runner = CliRunner()
result = runner.invoke(
cli,
[
'-p',
'tests/test_tools/assets/cli/pdf/cli_test_01.pdf',
'-o',
"-p",
"tests/test_tools/assets/cli/pdf/cli_test_01.pdf",
"-o",
temp_output_dir,
],
)
......@@ -30,31 +28,29 @@ def test_cli_pdf():
# check
assert result.exit_code == 0
base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
assert r.st_size > 7000
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
r = os.stat(os.path.join(base_output_dir, "middle.json"))
assert r.st_size > 200000
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
r = os.stat(os.path.join(base_output_dir, "model.json"))
assert r.st_size > 15000
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
assert r.st_size > 500000
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
assert os.path.exists(
os.path.join(base_output_dir,
f'{filename}_content_list.json')) is False
assert os.path.exists(os.path.join(base_output_dir, "images")) is True
assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
# teardown
shutil.rmtree(temp_output_dir)
......@@ -62,72 +58,68 @@ def test_cli_pdf():
def test_cli_path():
# setup
unitest_dir = '/tmp/magic_pdf/unittest/tools'
unitest_dir = "/tmp/magic_pdf/unittest/tools"
os.makedirs(unitest_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
os.makedirs(temp_output_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
# run
runner = CliRunner()
result = runner.invoke(
cli, ['-p', 'tests/test_tools/assets/cli/path', '-o', temp_output_dir])
cli, ["-p", "tests/test_tools/assets/cli/path", "-o", temp_output_dir]
)
# check
assert result.exit_code == 0
filename = 'cli_test_01'
base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
filename = "cli_test_01"
base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
assert r.st_size > 7000
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
r = os.stat(os.path.join(base_output_dir, "middle.json"))
assert r.st_size > 200000
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
r = os.stat(os.path.join(base_output_dir, "model.json"))
assert r.st_size > 15000
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
assert r.st_size > 500000
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
assert os.path.exists(
os.path.join(base_output_dir,
f'{filename}_content_list.json')) is False
assert os.path.exists(os.path.join(base_output_dir, "images")) is True
assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
base_output_dir = os.path.join(temp_output_dir, 'cli_test_02/auto')
filename = 'cli_test_02'
base_output_dir = os.path.join(temp_output_dir, "cli_test_02/auto")
filename = "cli_test_02"
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
assert r.st_size > 5000
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
r = os.stat(os.path.join(base_output_dir, "middle.json"))
assert r.st_size > 200000
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
r = os.stat(os.path.join(base_output_dir, "model.json"))
assert r.st_size > 15000
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
assert r.st_size > 500000
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
assert os.path.exists(
os.path.join(base_output_dir,
f'{filename}_content_list.json')) is False
assert os.path.exists(os.path.join(base_output_dir, "images")) is True
assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
assert os.path.exists(os.path.join(base_output_dir, "content_list.json")) is False
# teardown
shutil.rmtree(temp_output_dir)
import tempfile
import os
import shutil
import tempfile
from click.testing import CliRunner
from magic_pdf.tools import cli_dev
......@@ -9,23 +8,22 @@ from magic_pdf.tools import cli_dev
def test_cli_pdf():
# setup
unitest_dir = '/tmp/magic_pdf/unittest/tools'
filename = 'cli_test_01'
unitest_dir = "/tmp/magic_pdf/unittest/tools"
filename = "cli_test_01"
os.makedirs(unitest_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
os.makedirs(temp_output_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
# run
runner = CliRunner()
result = runner.invoke(
cli_dev.cli,
[
'pdf',
'-p',
'tests/test_tools/assets/cli/pdf/cli_test_01.pdf',
'-j',
'tests/test_tools/assets/cli_dev/cli_test_01.model.json',
'-o',
"pdf",
"-p",
"tests/test_tools/assets/cli/pdf/cli_test_01.pdf",
"-j",
"tests/test_tools/assets/cli_dev/cli_test_01.model.json",
"-o",
temp_output_dir,
],
)
......@@ -33,31 +31,31 @@ def test_cli_pdf():
# check
assert result.exit_code == 0
base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
r = os.stat(os.path.join(base_output_dir, "content_list.json"))
assert r.st_size > 5000
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
assert r.st_size > 7000
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
r = os.stat(os.path.join(base_output_dir, "middle.json"))
assert r.st_size > 200000
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
r = os.stat(os.path.join(base_output_dir, "model.json"))
assert r.st_size > 15000
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
assert r.st_size > 500000
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
assert os.path.exists(os.path.join(base_output_dir, "images")) is True
assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
# teardown
shutil.rmtree(temp_output_dir)
......@@ -65,27 +63,26 @@ def test_cli_pdf():
def test_cli_jsonl():
# setup
unitest_dir = '/tmp/magic_pdf/unittest/tools'
filename = 'cli_test_01'
unitest_dir = "/tmp/magic_pdf/unittest/tools"
filename = "cli_test_01"
os.makedirs(unitest_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
os.makedirs(temp_output_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
def mock_read_s3_path(s3path):
with open(s3path, 'rb') as f:
with open(s3path, "rb") as f:
return f.read()
cli_dev.read_s3_path = mock_read_s3_path # mock
cli_dev.read_s3_path = mock_read_s3_path # mock
# run
runner = CliRunner()
result = runner.invoke(
cli_dev.cli,
[
'jsonl',
'-j',
'tests/test_tools/assets/cli_dev/cli_test_01.jsonl',
'-o',
"jsonl",
"-j",
"tests/test_tools/assets/cli_dev/cli_test_01.jsonl",
"-o",
temp_output_dir,
],
)
......@@ -93,31 +90,31 @@ def test_cli_jsonl():
# check
assert result.exit_code == 0
base_output_dir = os.path.join(temp_output_dir, 'cli_test_01/auto')
base_output_dir = os.path.join(temp_output_dir, "cli_test_01/auto")
r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
r = os.stat(os.path.join(base_output_dir, "content_list.json"))
assert r.st_size > 5000
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
assert r.st_size > 7000
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
r = os.stat(os.path.join(base_output_dir, "middle.json"))
assert r.st_size > 200000
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
r = os.stat(os.path.join(base_output_dir, "model.json"))
assert r.st_size > 15000
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
assert r.st_size > 500000
assert os.path.exists(os.path.join(base_output_dir, 'images')) is True
assert os.path.isdir(os.path.join(base_output_dir, 'images')) is True
assert os.path.exists(os.path.join(base_output_dir, "images")) is True
assert os.path.isdir(os.path.join(base_output_dir, "images")) is True
# teardown
shutil.rmtree(temp_output_dir)
import tempfile
import os
import shutil
import tempfile
import pytest
import magic_pdf.model as model_config
from magic_pdf.tools.common import do_parse
@pytest.mark.parametrize('method', ['auto', 'txt', 'ocr'])
@pytest.mark.parametrize("method", ["auto", "txt", "ocr"])
def test_common_do_parse(method):
# setup
model_config.__use_inside_model__ = True
unitest_dir = '/tmp/magic_pdf/unittest/tools'
filename = 'fake'
unitest_dir = "/tmp/magic_pdf/unittest/tools"
filename = "fake"
os.makedirs(unitest_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir='/tmp/magic_pdf/unittest/tools')
os.makedirs(temp_output_dir, exist_ok=True)
temp_output_dir = tempfile.mkdtemp(dir="/tmp/magic_pdf/unittest/tools")
# run
with open('tests/test_tools/assets/common/cli_test_01.pdf', 'rb') as f:
with open("tests/test_tools/assets/common/cli_test_01.pdf", "rb") as f:
bits = f.read()
do_parse(temp_output_dir,
filename,
bits, [],
method,
f_dump_content_list=True)
do_parse(temp_output_dir, filename, bits, [], method, f_dump_content_list=True)
# check
base_output_dir = os.path.join(temp_output_dir, f'fake/{method}')
base_output_dir = os.path.join(temp_output_dir, f"fake/{method}")
r = os.stat(os.path.join(base_output_dir, f'{filename}_content_list.json'))
r = os.stat(os.path.join(base_output_dir, "content_list.json"))
assert r.st_size > 5000
r = os.stat(os.path.join(base_output_dir, f'{filename}.md'))
r = os.stat(os.path.join(base_output_dir, f"{filename}.md"))
assert r.st_size > 7000
r = os.stat(os.path.join(base_output_dir, f'{filename}_middle.json'))
r = os.stat(os.path.join(base_output_dir, "middle.json"))
assert r.st_size > 200000
r = os.stat(os.path.join(base_output_dir, f'{filename}_model.json'))
r = os.stat(os.path.join(base_output_dir, "model.json"))
assert r.st_size > 15000
r = os.stat(os.path.join(base_output_dir, f'{filename}_origin.pdf'))
r = os.stat(os.path.join(base_output_dir, "origin.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_layout.pdf'))
r = os.stat(os.path.join(base_output_dir, "layout.pdf"))
assert r.st_size > 500000
r = os.stat(os.path.join(base_output_dir, f'{filename}_spans.pdf'))
r = os.stat(os.path.join(base_output_dir, "spans.pdf"))
assert r.st_size > 500000
os.path.exists(os.path.join(base_output_dir, 'images'))
os.path.isdir(os.path.join(base_output_dir, 'images'))
os.path.exists(os.path.join(base_output_dir, "images"))
os.path.isdir(os.path.join(base_output_dir, "images"))
# teardown
shutil.rmtree(temp_output_dir)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment