Commit 6f58eeab authored by drunkpig's avatar drunkpig

merge: sync from master branch

parents 9067cd31 7f0fe200
This diff is collapsed.
# use modelscope sdk download models
from modelscope import snapshot_download
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
print(f"model dir is: {model_dir}/models")
......@@ -9,7 +9,7 @@ git lfs install
To download the `PDF-Extract-Kit` model from Hugging Face, use the following command:
```bash
git lfs clone https://huggingface.co/wanderkid/PDF-Extract-Kit
git lfs clone https://huggingface.co/opendatalab/PDF-Extract-Kit
```
Ensure that Git LFS is enabled during the clone to properly download all large files.
......
......@@ -13,7 +13,7 @@
```bash
git lfs install # 安装 Git 大文件存储插件 (Git LFS)
git lfs clone https://huggingface.co/wanderkid/PDF-Extract-Kit # 从 Hugging Face 下载 PDF-Extract-Kit 模型
git lfs clone https://huggingface.co/opendatalab/PDF-Extract-Kit # 从 Hugging Face 下载 PDF-Extract-Kit 模型
```
......@@ -28,7 +28,7 @@ ModelScope 支持SDK或模型下载,任选一个即可。
```bash
git lfs install
git lfs clone https://www.modelscope.cn/wanderkid/PDF-Extract-Kit.git
git lfs clone https://www.modelscope.cn/opendatalab/PDF-Extract-Kit.git
```
### 2)利用SDK下载
......@@ -41,7 +41,7 @@ pip install modelscope
```python
# 使用modelscope sdk下载模型
from modelscope import snapshot_download
model_dir = snapshot_download('wanderkid/PDF-Extract-Kit')
model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
print(f"模型文件下载路径为:{model_dir}/models")
```
......
......@@ -100,7 +100,7 @@ def __detect_list_lines(lines, new_layout_bboxes, lang):
if lang != 'en':
return lines, None
else:
total_lines = len(lines)
line_fea_encode = []
"""
......@@ -114,6 +114,9 @@ def __detect_list_lines(lines, new_layout_bboxes, lang):
x_map_tag_dict, min_x_tag = cluster_line_x(lines)
for l in lines:
span_text = __get_span_text(l['spans'][0])
if not span_text:
line_fea_encode.append(0)
continue
first_char = span_text[0]
layout = __find_layout_bbox_by_line(l['bbox'], new_layout_bboxes)
if not layout:
......
......@@ -31,6 +31,22 @@
"created_at": "2024-08-13T12:23:16Z",
"repoId": 765083837,
"pullRequestNo": 418
},
{
"name": "Matthijz98",
"id": 17087153,
"comment_id": 2298912989,
"created_at": "2024-08-20T13:49:50Z",
"repoId": 765083837,
"pullRequestNo": 467
},
{
"name": "strongerfly",
"id": 11643869,
"comment_id": 2309481561,
"created_at": "2024-08-26T07:01:49Z",
"repoId": 765083837,
"pullRequestNo": 487
}
]
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment