mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-11 14:18:30 +00:00
feat: docling-parse v2 as default PDF backend (#549)
* Move to_docling_document from ds-glm to this repo Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Upgrade to ds-glm 1.0 and docling-parse 3.0 Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lock Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fix DP2 backend code, change CLI default backend Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -9,7 +9,7 @@ from pydantic import BaseModel, ConfigDict, model_validator, validate_call
|
||||
|
||||
from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||
from docling.backend.asciidoc_backend import AsciiDocBackend
|
||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||
from docling.backend.html_backend import HTMLDocumentBackend
|
||||
from docling.backend.md_backend import MarkdownDocumentBackend
|
||||
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
||||
@@ -84,12 +84,12 @@ class HTMLFormatOption(FormatOption):
|
||||
|
||||
class PdfFormatOption(FormatOption):
|
||||
pipeline_cls: Type = StandardPdfPipeline
|
||||
backend: Type[AbstractDocumentBackend] = DoclingParseDocumentBackend
|
||||
backend: Type[AbstractDocumentBackend] = DoclingParseV2DocumentBackend
|
||||
|
||||
|
||||
class ImageFormatOption(FormatOption):
|
||||
pipeline_cls: Type = StandardPdfPipeline
|
||||
backend: Type[AbstractDocumentBackend] = DoclingParseDocumentBackend
|
||||
backend: Type[AbstractDocumentBackend] = DoclingParseV2DocumentBackend
|
||||
|
||||
|
||||
def _get_default_option(format: InputFormat) -> FormatOption:
|
||||
@@ -113,10 +113,10 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
||||
pipeline_cls=SimplePipeline, backend=HTMLDocumentBackend
|
||||
),
|
||||
InputFormat.IMAGE: FormatOption(
|
||||
pipeline_cls=StandardPdfPipeline, backend=DoclingParseDocumentBackend
|
||||
pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
|
||||
),
|
||||
InputFormat.PDF: FormatOption(
|
||||
pipeline_cls=StandardPdfPipeline, backend=DoclingParseDocumentBackend
|
||||
pipeline_cls=StandardPdfPipeline, backend=DoclingParseV2DocumentBackend
|
||||
),
|
||||
}
|
||||
if (options := format_to_default_options.get(format)) is not None:
|
||||
|
||||
Reference in New Issue
Block a user