mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-11 14:18:30 +00:00
feat: add backend for METS with Google Books profile (#1989)
* add backend for METS with Google Books profile Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Fixes for cell indexing Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * use HTMLParser and add options from CLI Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix typing and unloading Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * restore guess format Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * rename inputformat Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * use PdfDocumentBackend Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * use test file from test folder (still missing) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add test file Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -20,6 +20,7 @@ from docling.backend.docling_parse_v4_backend import DoclingParseV4DocumentBacke
|
||||
from docling.backend.html_backend import HTMLDocumentBackend
|
||||
from docling.backend.json.docling_json_backend import DoclingJSONBackend
|
||||
from docling.backend.md_backend import MarkdownDocumentBackend
|
||||
from docling.backend.mets_gbs_backend import MetsGbsDocumentBackend
|
||||
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
||||
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
||||
from docling.backend.msword_backend import MsWordDocumentBackend
|
||||
@@ -159,6 +160,9 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
||||
InputFormat.XML_JATS: FormatOption(
|
||||
pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
|
||||
),
|
||||
InputFormat.METS_GBS: FormatOption(
|
||||
pipeline_cls=StandardPdfPipeline, backend=MetsGbsDocumentBackend
|
||||
),
|
||||
InputFormat.IMAGE: FormatOption(
|
||||
pipeline_cls=StandardPdfPipeline, backend=DoclingParseV4DocumentBackend
|
||||
),
|
||||
|
||||
Reference in New Issue
Block a user