Commit 30ac6f22 authored by myhloli's avatar myhloli

fix(magic-pdf): add default values and improve warning logs for config...

fix(magic-pdf): add default values and improve warning logs for config optionsEnsure that 'temp-output-dir', 'models-dir', and 'device-mode' have sensible default
values in case they are not specified in the config file.
parent cad37d5d
...@@ -89,7 +89,6 @@ def do_parse( ...@@ -89,7 +89,6 @@ def do_parse(
orig_model_list = copy.deepcopy(model_list) orig_model_list = copy.deepcopy(model_list)
local_image_dir, local_md_dir = prepare_env(pdf_file_name, parse_method) local_image_dir, local_md_dir = prepare_env(pdf_file_name, parse_method)
logger.info(f"local output dir is {local_md_dir}")
image_writer, md_writer = DiskReaderWriter(local_image_dir), DiskReaderWriter(local_md_dir) image_writer, md_writer = DiskReaderWriter(local_image_dir), DiskReaderWriter(local_md_dir)
image_dir = str(os.path.basename(local_image_dir)) image_dir = str(os.path.basename(local_image_dir))
...@@ -163,6 +162,7 @@ def do_parse( ...@@ -163,6 +162,7 @@ def do_parse(
path=f"{pdf_file_name}_content_list.json", path=f"{pdf_file_name}_content_list.json",
mode=AbsReaderWriter.MODE_TXT, mode=AbsReaderWriter.MODE_TXT,
) )
logger.info(f"local output dir is '{local_md_dir}', you can found the result in it.")
@click.group() @click.group()
......
...@@ -56,17 +56,32 @@ def get_bucket_name(path): ...@@ -56,17 +56,32 @@ def get_bucket_name(path):
def get_local_dir(): def get_local_dir():
config = read_config() config = read_config()
return config.get("temp-output-dir", "/tmp") local_dir = config.get("temp-output-dir")
if local_dir is None:
logger.warning("'temp-output-dir' not found in magic-pdf.json, use '/tmp' as default")
return "/tmp"
else:
return local_dir
def get_local_models_dir(): def get_local_models_dir():
config = read_config() config = read_config()
return config.get("models-dir", "/tmp/models") models_dir = config.get("models-dir")
if models_dir is None:
logger.warning("'models-dir' not found in magic-pdf.json, use '/tmp/models' as default")
return "/tmp/models"
else:
return models_dir
def get_device(): def get_device():
config = read_config() config = read_config()
return config.get("device-mode", "cpu") device = config.get("device-mode")
if device is None:
logger.warning("'device-mode' not found in magic-pdf.json, use 'cpu' as default")
return "cpu"
else:
return device
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -105,6 +105,7 @@ class CustomPEKModel: ...@@ -105,6 +105,7 @@ class CustomPEKModel:
self.device = kwargs.get("device", self.configs["config"]["device"]) self.device = kwargs.get("device", self.configs["config"]["device"])
logger.info("using device: {}".format(self.device)) logger.info("using device: {}".format(self.device))
models_dir = kwargs.get("models_dir", os.path.join(root_dir, "resources", "models")) models_dir = kwargs.get("models_dir", os.path.join(root_dir, "resources", "models"))
logger.info("using models_dir: {}".format(models_dir))
# 初始化公式识别 # 初始化公式识别
if self.apply_formula: if self.apply_formula:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment