mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-18 09:31:02 +00:00
feat: docling-parse v2 as default PDF backend (#549)
* Move to_docling_document from ds-glm to this repo Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Upgrade to ds-glm 1.0 and docling-parse 3.0 Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lock Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix DP2 backend code, change CLI default backend Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -4,7 +4,6 @@ from pathlib import Path
|
||||
from typing import List, Union
|
||||
|
||||
from deepsearch_glm.nlp_utils import init_nlp_model
|
||||
from deepsearch_glm.utils.doc_utils import to_docling_document
|
||||
from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
|
||||
from docling_core.types.doc import BoundingBox, CoordOrigin, DoclingDocument
|
||||
from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox
|
||||
@@ -29,6 +28,7 @@ from pydantic import BaseModel, ConfigDict
|
||||
from docling.datamodel.base_models import Cluster, FigureElement, Table, TextElement
|
||||
from docling.datamodel.document import ConversionResult, layout_label_to_ds_type
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.utils.glm_utils import to_docling_document
|
||||
from docling.utils.profiling import ProfilingScope, TimeRecorder
|
||||
from docling.utils.utils import create_hash
|
||||
|
||||
@@ -232,7 +232,7 @@ class GlmModel:
|
||||
def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
|
||||
with TimeRecorder(conv_res, "glm", scope=ProfilingScope.DOCUMENT):
|
||||
ds_doc = self._to_legacy_document(conv_res)
|
||||
ds_doc_dict = ds_doc.model_dump(by_alias=True)
|
||||
ds_doc_dict = ds_doc.model_dump(by_alias=True, exclude_none=True)
|
||||
|
||||
glm_doc = self.model.apply_on_doc(ds_doc_dict)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user