Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
pdf-miner
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Qin Kaijie
pdf-miner
Commits
94d94e61
Commit
94d94e61
authored
Apr 09, 2024
by
liukaiwen
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
io modules
parent
8f65af9f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
8 deletions
+10
-8
DiskReaderWriter.py
magic_pdf/io/DiskReaderWriter.py
+7
-5
S3ReaderWriter.py
magic_pdf/io/S3ReaderWriter.py
+3
-3
No files found.
magic_pdf/io/DiskReaderWriter.py
View file @
94d94e61
import
os
import
os
from
magic_pdf.io.AbsReaderWriter
import
AbsReaderWriter
from
magic_pdf.io.AbsReaderWriter
import
AbsReaderWriter
from
loguru
import
logger
class
DiskReaderWriter
(
AbsReaderWriter
):
class
DiskReaderWriter
(
AbsReaderWriter
):
def
__init__
(
self
,
parent_path
,
encoding
=
'utf-8'
):
def
__init__
(
self
,
parent_path
,
encoding
=
'utf-8'
):
self
.
path
=
parent_path
self
.
path
=
parent_path
...
@@ -7,8 +8,8 @@ class DiskReaderWriter(AbsReaderWriter):
...
@@ -7,8 +8,8 @@ class DiskReaderWriter(AbsReaderWriter):
def
read
(
self
,
mode
=
"text"
):
def
read
(
self
,
mode
=
"text"
):
if
not
os
.
path
.
exists
(
self
.
path
):
if
not
os
.
path
.
exists
(
self
.
path
):
print
(
f
"文件 {self.path} 不存在"
)
logger
.
error
(
f
"文件 {self.path} 不存在"
)
r
eturn
None
r
aise
Exception
(
f
"文件 {self.path} 不存在"
)
if
mode
==
"text"
:
if
mode
==
"text"
:
with
open
(
self
.
path
,
'r'
,
encoding
=
self
.
encoding
)
as
f
:
with
open
(
self
.
path
,
'r'
,
encoding
=
self
.
encoding
)
as
f
:
return
f
.
read
()
return
f
.
read
()
...
@@ -22,11 +23,12 @@ class DiskReaderWriter(AbsReaderWriter):
...
@@ -22,11 +23,12 @@ class DiskReaderWriter(AbsReaderWriter):
if
mode
==
"text"
:
if
mode
==
"text"
:
with
open
(
self
.
path
,
'w'
,
encoding
=
self
.
encoding
)
as
f
:
with
open
(
self
.
path
,
'w'
,
encoding
=
self
.
encoding
)
as
f
:
f
.
write
(
data
)
f
.
write
(
data
)
print
(
f
"内容已成功写入 {self.path}"
)
logger
.
info
(
f
"内容已成功写入 {self.path}"
)
elif
mode
==
"binary"
:
elif
mode
==
"binary"
:
with
open
(
self
.
path
,
'wb'
)
as
f
:
with
open
(
self
.
path
,
'wb'
)
as
f
:
f
.
write
(
data
)
f
.
write
(
data
)
print
(
f
"内容已成功写入 {self.path}"
)
logger
.
info
(
f
"内容已成功写入 {self.path}"
)
else
:
else
:
raise
ValueError
(
"Invalid mode. Use 'text' or 'binary'."
)
raise
ValueError
(
"Invalid mode. Use 'text' or 'binary'."
)
...
@@ -42,6 +44,6 @@ if __name__ == "__main__":
...
@@ -42,6 +44,6 @@ if __name__ == "__main__":
# 从文件读取内容
# 从文件读取内容
content
=
drw
.
read
()
content
=
drw
.
read
()
if
content
:
if
content
:
print
(
f
"从 {file_path} 读取的内容: {content}"
)
logger
.
info
(
f
"从 {file_path} 读取的内容: {content}"
)
magic_pdf/io/S3ReaderWriter.py
View file @
94d94e61
...
@@ -46,6 +46,7 @@ class S3ReaderWriter(AbsReaderWriter):
...
@@ -46,6 +46,7 @@ class S3ReaderWriter(AbsReaderWriter):
raise
ValueError
(
"Invalid mode. Use 'text' or 'binary'."
)
raise
ValueError
(
"Invalid mode. Use 'text' or 'binary'."
)
bucket_name
,
bucket_key
=
parse_bucket_key
(
s3_path
)
bucket_name
,
bucket_key
=
parse_bucket_key
(
s3_path
)
self
.
client
.
put_object
(
Body
=
body
,
Bucket
=
bucket_name
,
Key
=
bucket_key
)
self
.
client
.
put_object
(
Body
=
body
,
Bucket
=
bucket_name
,
Key
=
bucket_key
)
logger
.
info
(
f
"内容已写入 {s3_path} "
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -64,12 +65,11 @@ if __name__ == "__main__":
...
@@ -64,12 +65,11 @@ if __name__ == "__main__":
# Read text data from S3
# Read text data from S3
text_data_read
=
s3_reader_writer
.
read
(
s3_path
=
"s3://bucket_name/ebook/test/test.json"
,
mode
=
'text'
)
text_data_read
=
s3_reader_writer
.
read
(
s3_path
=
"s3://bucket_name/ebook/test/test.json"
,
mode
=
'text'
)
print
(
f
"Read text data from S3: {text_data_read}"
)
logger
.
info
(
f
"Read text data from S3: {text_data_read}"
)
# Write binary data to S3
# Write binary data to S3
binary_data
=
b
"This is some binary data"
binary_data
=
b
"This is some binary data"
s3_reader_writer
.
write
(
data
=
text_data
,
s3_path
=
"s3://bucket_name/ebook/test/test2.json"
,
mode
=
'binary'
)
s3_reader_writer
.
write
(
data
=
text_data
,
s3_path
=
"s3://bucket_name/ebook/test/test2.json"
,
mode
=
'binary'
)
# Read binary data from S3
# Read binary data from S3
binary_data_read
=
s3_reader_writer
.
read
(
s3_path
=
"s3://bucket_name/ebook/test/test2.json"
,
mode
=
'binary'
)
binary_data_read
=
s3_reader_writer
.
read
(
s3_path
=
"s3://bucket_name/ebook/test/test2.json"
,
mode
=
'binary'
)
print
(
f
"Read binary data from S3: {binary_data_read}"
)
logger
.
info
(
f
"Read binary data from S3: {binary_data_read}"
)
\ No newline at end of file
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment