Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
bd183428
Commit
bd183428
authored
Jun 03, 2024
by
赵小蒙
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add version_name to middle json
parent
496045f3
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
26 additions
and
15 deletions
+26
-15
commons.py
magic_pdf/libs/commons.py
+15
-0
user_api.py
magic_pdf/user_api.py
+7
-1
setup.py
setup.py
+4
-14
No files found.
magic_pdf/libs/commons.py
View file @
bd183428
import
datetime
import
datetime
import
json
import
json
import
os
,
re
,
configparser
import
os
,
re
,
configparser
import
subprocess
import
time
import
time
import
boto3
import
boto3
...
@@ -11,6 +12,20 @@ from botocore.config import Config
...
@@ -11,6 +12,20 @@ from botocore.config import Config
import
fitz
# 1.23.9中已经切换到rebase
import
fitz
# 1.23.9中已经切换到rebase
# import fitz_old as fitz # 使用1.23.9之前的pymupdf库
# import fitz_old as fitz # 使用1.23.9之前的pymupdf库
def
get_version
():
command
=
[
"git"
,
"describe"
,
"--tags"
]
try
:
version
=
subprocess
.
check_output
(
command
)
.
decode
()
.
strip
()
version_parts
=
version
.
split
(
"-"
)
if
len
(
version_parts
)
>
1
and
version_parts
[
0
]
.
startswith
(
"magic_pdf"
):
return
version_parts
[
1
]
else
:
raise
ValueError
(
f
"Invalid version tag {version}. Expected format is magic_pdf-<version>-released."
)
except
Exception
as
e
:
print
(
e
)
return
"0.0.0"
def
get_delta_time
(
input_time
):
def
get_delta_time
(
input_time
):
return
round
(
time
.
time
()
-
input_time
,
2
)
return
round
(
time
.
time
()
-
input_time
,
2
)
...
...
magic_pdf/user_api.py
View file @
bd183428
...
@@ -16,11 +16,11 @@ import re
...
@@ -16,11 +16,11 @@ import re
from
loguru
import
logger
from
loguru
import
logger
from
magic_pdf.libs.commons
import
get_version
from
magic_pdf.rw
import
AbsReaderWriter
from
magic_pdf.rw
import
AbsReaderWriter
from
magic_pdf.pdf_parse_by_ocr_v2
import
parse_pdf_by_ocr
from
magic_pdf.pdf_parse_by_ocr_v2
import
parse_pdf_by_ocr
from
magic_pdf.pdf_parse_by_txt_v2
import
parse_pdf_by_txt
from
magic_pdf.pdf_parse_by_txt_v2
import
parse_pdf_by_txt
PARSE_TYPE_TXT
=
"txt"
PARSE_TYPE_TXT
=
"txt"
PARSE_TYPE_OCR
=
"ocr"
PARSE_TYPE_OCR
=
"ocr"
...
@@ -39,6 +39,8 @@ def parse_txt_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
...
@@ -39,6 +39,8 @@ def parse_txt_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
pdf_info_dict
[
"_parse_type"
]
=
PARSE_TYPE_TXT
pdf_info_dict
[
"_parse_type"
]
=
PARSE_TYPE_TXT
pdf_info_dict
[
"_version_name"
]
=
get_version
()
return
pdf_info_dict
return
pdf_info_dict
...
@@ -57,6 +59,8 @@ def parse_ocr_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
...
@@ -57,6 +59,8 @@ def parse_ocr_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
pdf_info_dict
[
"_parse_type"
]
=
PARSE_TYPE_OCR
pdf_info_dict
[
"_parse_type"
]
=
PARSE_TYPE_OCR
pdf_info_dict
[
"_version_name"
]
=
get_version
()
return
pdf_info_dict
return
pdf_info_dict
...
@@ -118,4 +122,6 @@ def parse_union_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWr
...
@@ -118,4 +122,6 @@ def parse_union_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWr
else
:
else
:
pdf_info_dict
[
"_parse_type"
]
=
PARSE_TYPE_TXT
pdf_info_dict
[
"_parse_type"
]
=
PARSE_TYPE_TXT
pdf_info_dict
[
"_version_name"
]
=
get_version
()
return
pdf_info_dict
return
pdf_info_dict
setup.py
View file @
bd183428
from
setuptools
import
setup
,
find_packages
from
setuptools
import
setup
,
find_packages
import
subprocess
from
magic_pdf.libs.commons
import
get_version
def
parse_requirements
(
filename
):
def
parse_requirements
(
filename
):
with
open
(
filename
)
as
f
:
with
open
(
filename
)
as
f
:
lines
=
f
.
read
()
.
splitlines
()
lines
=
f
.
read
()
.
splitlines
()
...
@@ -15,23 +18,10 @@ def parse_requirements(filename):
...
@@ -15,23 +18,10 @@ def parse_requirements(filename):
return
requires
return
requires
def
get_version
():
command
=
[
"git"
,
"describe"
,
"--tags"
]
try
:
version
=
subprocess
.
check_output
(
command
)
.
decode
()
.
strip
()
version_parts
=
version
.
split
(
"-"
)
if
len
(
version_parts
)
>
1
and
version_parts
[
0
]
.
startswith
(
"magic_pdf"
):
return
version_parts
[
1
]
else
:
raise
ValueError
(
f
"Invalid version tag {version}. Expected format is magic_pdf-<version>-released."
)
except
Exception
as
e
:
print
(
e
)
return
"0.0.0"
setup
(
setup
(
name
=
"magic_pdf"
,
# 项目名
name
=
"magic_pdf"
,
# 项目名
# version="0.1.3", # 版本号
version
=
get_version
(),
# 自动从tag中获取版本号
version
=
get_version
(),
# 自动从tag中获取版本号
packages
=
find_packages
(),
# 包含所有的包
packages
=
find_packages
(),
# 包含所有的包
install_requires
=
parse_requirements
(
'requirements.txt'
),
# 项目依赖的第三方库
install_requires
=
parse_requirements
(
'requirements.txt'
),
# 项目依赖的第三方库
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment