Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
46bcddf4
Commit
46bcddf4
authored
Jul 12, 2024
by
myhloli
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
disable s3 test
parent
d458b705
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
25 deletions
+25
-25
test_cli.py
tests/test_cli/test_cli.py
+25
-25
No files found.
tests/test_cli/test_cli.py
View file @
46bcddf4
...
...
@@ -19,32 +19,32 @@ class TestCli:
#common.count_folders_and_check_contents(pdf_res_path)
def
test_pdf_specify_jsonl
(
self
):
"""
输入jsonl, 默认方式解析
"""
cmd
=
"cd
%
s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972'"
%
(
code_path
)
logging
.
info
(
cmd
)
common
.
check_shell
(
cmd
)
#common.count_folders_and_check_contents(pdf_res_path)
def
test_pdf_specify_jsonl_txt
(
self
):
"""
输入jsonl, txt方式解析
"""
cmd
=
"cd
%
s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method txt"
%
(
code_path
)
logging
.
info
(
cmd
)
common
.
check_shell
(
cmd
)
#common.count_folders_and_check_contents(pdf_res_path)
# def test_pdf_specify_jsonl(self):
# """
# 输入jsonl, 默认方式解析
# """
# cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972'" % (code_path)
# logging.info(cmd)
# common.check_shell(cmd)
# #common.count_folders_and_check_contents(pdf_res_path)
def
test_pdf_specify_jsonl_ocr
(
self
):
"""
输入jsonl, ocr方式解析
"""
cmd
=
"cd
%
s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method ocr"
%
(
code_path
)
logging
.
info
(
cmd
)
common
.
check_shell
(
cmd
)
#common.count_folders_and_check_contents(pdf_res_path)
# def test_pdf_specify_jsonl_txt(self):
# """
# 输入jsonl, txt方式解析
# """
# cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method txt" % (code_path)
# logging.info(cmd)
# common.check_shell(cmd)
# #common.count_folders_and_check_contents(pdf_res_path)
#
# def test_pdf_specify_jsonl_ocr(self):
# """
# 输入jsonl, ocr方式解析
# """
# cmd = "cd %s && export PYTHONPATH=. && python magic_pdf/cli/magicpdf.py json-command --json 's3://llm-process-pperf/ebook_index_textbook_40k/中高考&竞赛知识点/part-663f1ef5e7c1-009416.jsonl?bytes=0,1133972' --method ocr" % (code_path)
# logging.info(cmd)
# common.check_shell(cmd)
# #common.count_folders_and_check_contents(pdf_res_path)
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment