Commit 6c656af6 authored by 赵小蒙's avatar 赵小蒙

update:cleanup requirements.txt

parent 53ccd5a6
from s3pathlib import S3Path
def remove_non_official_s3_args(s3path):
"""
example: s3://abc/xxxx.json?bytes=0,81350 ==> s3://abc/xxxx.json
......@@ -10,8 +8,19 @@ def remove_non_official_s3_args(s3path):
return arr[0]
def parse_s3path(s3path: str):
p = S3Path(remove_non_official_s3_args(s3path))
return p.bucket, p.key
# from s3pathlib import S3Path
# p = S3Path(remove_non_official_s3_args(s3path))
# return p.bucket, p.key
s3path = remove_non_official_s3_args(s3path).strip()
if s3path.startswith(('s3://', 's3a://')):
prefix, path = s3path.split('://', 1)
bucket_name, key = path.split('/', 1)
return bucket_name, key
elif s3path.startswith('/'):
raise ValueError("The provided path starts with '/'. This does not conform to a valid S3 path format.")
else:
raise ValueError("Invalid S3 path format. Expected 's3://bucket-name/key' or 's3a://bucket-name/key'.")
def parse_s3_range_params(s3path: str):
"""
......
Levenshtein
nltk
rapidfuzz
statistics
openxlab #安装opendatalab
pandas
numpy
matplotlib
seaborn
scipy
scikit-learn
tqdm
htmltabletomd
pypandoc
\ No newline at end of file
boto3>=1.28.43
Brotli>=1.1.0
click>=8.1.7
Distance>=0.1.3
PyMuPDF>=1.24.7
loguru>=0.6.0
matplotlib>=3.8.3
numpy>=1.21.6
pandas>=1.3.5
fast-langdetect>=0.1.1
regex>=2023.12.25
termcolor>=2.4.0
wordninja>=2.0.0
scikit-learn>=1.0.2
nltk==3.8.1
s3pathlib>=2.1.1
pdfminer.six>=20231228
Levenshtein
rapidfuzz
statistics
openxlab #安装opendatalab
seaborn
scipy
tqdm
htmltabletomd
pypandoc
\ No newline at end of file
# requirements.txt 须保证只引入必需的外部依赖,如有新依赖添加请联系项目管理员
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment