Commit 38086572 authored by houlinfeng's avatar houlinfeng

feat: mineru web and web_api

parent e92c896a
...@@ -39,3 +39,4 @@ source.dev.env ...@@ -39,3 +39,4 @@ source.dev.env
tmp tmp
projects/web/node_modules projects/web/node_modules
projects/web/dist
...@@ -8,5 +8,5 @@ npm install -g pnpm ...@@ -8,5 +8,5 @@ npm install -g pnpm
3. build 3. build
``` ```
1.pnpm run build 1.pnpm run build
2.npm run buil 2.npm run build
``` ```
\ No newline at end of file
...@@ -22,9 +22,11 @@ ...@@ -22,9 +22,11 @@
"ahooks": "^3.8.1", "ahooks": "^3.8.1",
"antd": "^5.20.3", "antd": "^5.20.3",
"axios": "^1.7.5", "axios": "^1.7.5",
"canvas": "^2.11.2",
"classnames": "^2.5.1", "classnames": "^2.5.1",
"js-cookie": "^3.0.5", "js-cookie": "^3.0.5",
"lodash": "^4.17.21", "lodash": "^4.17.21",
"path2d": "^0.2.1",
"qs": "^6.13.0", "qs": "^6.13.0",
"react": "^18.3.1", "react": "^18.3.1",
"react-copy-to-clipboard": "^5.1.0", "react-copy-to-clipboard": "^5.1.0",
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import ErrorBoundary from "@/components/error-boundary"; import ErrorBoundary from "@/components/error-boundary";
import styles from "./home.module.scss"; import styles from "./home.module.scss";
import { SlotID, Path } from "@/constant/route"; import { SlotID, Path } from "@/constant/route";
import { BrowserRouter, Routes, Route, Outlet } from "react-router-dom"; import { HashRouter, Routes, Route, Outlet } from "react-router-dom";
import { ExtractorSide } from "./extract-side"; import { ExtractorSide } from "./extract-side";
import { LanguageProvider } from "@/context/language-provider"; import { LanguageProvider } from "@/context/language-provider";
import PDFUpload from "@/pages/extract/components/pdf-upload"; import PDFUpload from "@/pages/extract/components/pdf-upload";
...@@ -70,9 +70,9 @@ export function Home() { ...@@ -70,9 +70,9 @@ export function Home() {
return ( return (
<ErrorBoundary> <ErrorBoundary>
<LanguageProvider> <LanguageProvider>
<BrowserRouter> <HashRouter>
<Screen /> <Screen />
</BrowserRouter> </HashRouter>
</LanguageProvider> </LanguageProvider>
</ErrorBoundary> </ErrorBoundary>
); );
......
## 安装 ## Mineru 本地API服务
MinerU MinerU
```bash ```
# mineru已安装则跳过此步骤 # 服务依赖mineru,请先确保mineru已安装
```
git clone https://github.com/opendatalab/MinerU.git 1. 打包前端界面
cd MinerU
conda create -n MinerU python=3.10 ```bash
conda activate MinerU # 先进入前端目录
pip install .[full] --extra-index-url https://wheels.myhloli.com cd projects/web
# 打包前端项目
npm install -g yarn
yarn install
yarn build
``` ```
第三方软件 2. 安装服务依赖
```bash ```bash
# 先进入后端目录
cd projects/web_api cd projects/web_api
pip install poetry # 安装依赖
portey install pip3 install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
``` ```
启动服务 3. 启动服务
```bash ```bash
cd web_api # 进入程序目录
python app.py cd projects/web_api/web_api
# 启动服务
python3 app.py
# 在浏览器访问启动的地址即可访问界面
``` ```
接口文档 ps:接口文档
``` ```
在浏览器打开 mineru-web接口文档.html 在浏览器打开 mineru-web接口文档.html
``` ```
This source diff could not be displayed because it is too large. You can view the blob instead.
flask-cors
flask-jwt-extended
flask-marshmallow
flask-migrate
flask-restful
flask-sqlalchemy
flask
greenlet
loguru
marshmallow-sqlalchemy
marshmallow
pyjwt
pyyaml
...@@ -4,7 +4,7 @@ from common.web_hook import before_request ...@@ -4,7 +4,7 @@ from common.web_hook import before_request
from common.logger import setup_log from common.logger import setup_log
root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
print("root_dir", root_dir)
def _register_db(flask_app): def _register_db(flask_app):
from common import import_models from common import import_models
...@@ -30,6 +30,8 @@ def create_app(config): ...@@ -30,6 +30,8 @@ def create_app(config):
ma.init_app(app=app) ma.init_app(app=app)
from .analysis import analysis_blue from .analysis import analysis_blue
app.register_blueprint(analysis_blue) app.register_blueprint(analysis_blue)
from .react_app import react_app_blue
app.register_blueprint(react_app_blue)
app.before_request(before_request) app.before_request(before_request)
......
...@@ -4,6 +4,7 @@ from .upload_view import UploadPdfView ...@@ -4,6 +4,7 @@ from .upload_view import UploadPdfView
from .analysis_view import AnalysisTaskView, AnalysisTaskProgressView from .analysis_view import AnalysisTaskView, AnalysisTaskProgressView
from .img_md_view import ImgView, MdView from .img_md_view import ImgView, MdView
from .task_view import TaskView, HistoricalTasksView, DeleteTaskView from .task_view import TaskView, HistoricalTasksView, DeleteTaskView
from .markdown_view import MarkdownView
analysis_blue = Blueprint('analysis', __name__) analysis_blue = Blueprint('analysis', __name__)
...@@ -16,3 +17,4 @@ api_v2.add_resource(MdView, '/analysis/pdf_md') ...@@ -16,3 +17,4 @@ api_v2.add_resource(MdView, '/analysis/pdf_md')
api_v2.add_resource(TaskView, '/extract/taskQueue') api_v2.add_resource(TaskView, '/extract/taskQueue')
api_v2.add_resource(HistoricalTasksView, '/extract/list') api_v2.add_resource(HistoricalTasksView, '/extract/list')
api_v2.add_resource(DeleteTaskView, '/extract/task/<int:id>') api_v2.add_resource(DeleteTaskView, '/extract/task/<int:id>')
api_v2.add_resource(MarkdownView, '/extract/markdown')
\ No newline at end of file
import json import json
import threading import threading
from multiprocessing import Process
from pathlib import Path from pathlib import Path
from flask import request, current_app, url_for from flask import request, current_app, url_for
from flask_restful import Resource from flask_restful import Resource
...@@ -212,10 +213,10 @@ class AnalysisTaskView(Resource): ...@@ -212,10 +213,10 @@ class AnalysisTaskView(Resource):
pdf_analysis_folder = current_app.config['PDF_ANALYSIS_FOLDER'] pdf_analysis_folder = current_app.config['PDF_ANALYSIS_FOLDER']
pdf_dir = f"{current_app.static_folder}/{pdf_analysis_folder}/{file_stem}" pdf_dir = f"{current_app.static_folder}/{pdf_analysis_folder}/{file_stem}"
image_dir = f"{pdf_dir}/images" image_dir = f"{pdf_dir}/images"
t = threading.Thread(target=analysis_pdf_task, process = Process(target=analysis_pdf_task,
args=(pdf_dir, image_dir, file_path, analysis_task.is_ocr, args=(pdf_dir, image_dir, file_path, analysis_task.is_ocr,
analysis_task.analysis_pdf_id)) analysis_task.analysis_pdf_id))
t.start() process.start()
# 生成文件的URL路径 # 生成文件的URL路径
file_url = url_for('analysis.uploadpdfview', filename=analysis_task.file_name, as_attachment=False) file_url = url_for('analysis.uploadpdfview', filename=analysis_task.file_name, as_attachment=False)
......
import json
from pathlib import Path
from flask import request, current_app
from flask_restful import Resource
from common.custom_response import generate_response
class MarkdownView(Resource):
def put(self):
"""
编辑markdown
"""
params = json.loads(request.data)
file_key = params.get('file_key')
data = params.get('data', {})
if not data:
return generate_response(code=400, msg="empty data", msgZH="数据为空,无法更新markdown")
pdf_analysis_folder = current_app.config['PDF_ANALYSIS_FOLDER']
pdf_dir = f"{current_app.static_folder}/{pdf_analysis_folder}"
markdown_file_dir = ""
for path_obj in Path(pdf_dir).iterdir():
if path_obj.name.startswith(file_key):
markdown_file_dir = path_obj
break
if markdown_file_dir and Path(markdown_file_dir).exists():
for k, v in data.items():
md_path = f"{markdown_file_dir}/{k}.md"
if Path(md_path).exists():
with open(md_path, 'w', encoding="utf-8") as f:
f.write(v)
full_content = ""
for path_obj in Path(markdown_file_dir).iterdir():
if path_obj.is_file() and path_obj.suffix == ".md" and path_obj.stem != "full":
with open(path_obj, 'r', encoding="utf-8") as f:
full_content += f.read() + "\n"
with open(f"{markdown_file_dir}/full.md", 'w', encoding="utf-8") as f:
f.write(full_content)
else:
return generate_response(code=400, msg="Invalid file_key", msgZH="文件哈希错误")
return generate_response()
...@@ -59,3 +59,4 @@ db = SQLAlchemy() ...@@ -59,3 +59,4 @@ db = SQLAlchemy()
migrate = Migrate() migrate = Migrate()
jwt = JWTManager() jwt = JWTManager()
ma = Marshmallow() ma = Marshmallow()
folder = app.config.get("REACT_APP_DIST")
from pathlib import Path
from flask import Blueprint
from ..extentions import app, Api
from .react_app_view import ReactAppView
from loguru import logger
folder = Path(app.config.get("REACT_APP_DIST", "../../web/dist/")).resolve()
logger.info(f"react_app folder: {folder}")
react_app_blue = Blueprint('react_app', __name__, static_folder=folder, static_url_path='', template_folder=folder)
react_app_api = Api(react_app_blue, prefix='')
react_app_api.add_resource(ReactAppView, '/')
\ No newline at end of file
from flask import render_template, Response
from flask_restful import Resource
class ReactAppView(Resource):
def get(self):
# 创建自定义的响应对象
rendered_template = render_template('index.html')
response = Response(rendered_template, mimetype='text/html')
return response
...@@ -11,6 +11,8 @@ BaseConfig: &base ...@@ -11,6 +11,8 @@ BaseConfig: &base
JWT_ACCESS_TOKEN_EXPIRES: 3600 JWT_ACCESS_TOKEN_EXPIRES: 3600
PDF_UPLOAD_FOLDER: "upload_pdf" PDF_UPLOAD_FOLDER: "upload_pdf"
PDF_ANALYSIS_FOLDER: "analysis_pdf" PDF_ANALYSIS_FOLDER: "analysis_pdf"
# 前端项目打包的路径
REACT_APP_DIST: "../../web/dist/"
# 开发配置 # 开发配置
DevelopmentConfig: DevelopmentConfig:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment