From 46b904e059231105bf6cfdedc82b3a940d8eb186 Mon Sep 17 00:00:00 2001 From: Michele Dolfi Date: Fri, 25 Jul 2025 15:16:41 +0200 Subject: [PATCH] rename inputformat Signed-off-by: Michele Dolfi --- docling/backend/mets_gbs_backend.py | 2 +- docling/cli/main.py | 2 +- docling/datamodel/base_models.py | 6 +++--- docling/document_converter.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docling/backend/mets_gbs_backend.py b/docling/backend/mets_gbs_backend.py index 29e216a2..b3ca2fb2 100644 --- a/docling/backend/mets_gbs_backend.py +++ b/docling/backend/mets_gbs_backend.py @@ -388,7 +388,7 @@ class MetsGbsDocumentBackend(PaginatedDocumentBackend): @classmethod def supported_formats(cls) -> Set[InputFormat]: - return {InputFormat.XML_METS_GBS} + return {InputFormat.METS_GBS} @classmethod def supports_pagination(cls) -> bool: diff --git a/docling/cli/main.py b/docling/cli/main.py index 8ed127a6..db9dfc00 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -613,7 +613,7 @@ def convert( # noqa: C901 format_options = { InputFormat.PDF: pdf_format_option, InputFormat.IMAGE: pdf_format_option, - InputFormat.XML_METS_GBS: mets_gbs_format_option, + InputFormat.METS_GBS: mets_gbs_format_option, } elif pipeline == ProcessingPipeline.VLM: diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index 6825e125..8edefe38 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -56,7 +56,7 @@ class InputFormat(str, Enum): XLSX = "xlsx" XML_USPTO = "xml_uspto" XML_JATS = "xml_jats" - XML_METS_GBS = "xml_mets_gbs" + METS_GBS = "xml_mets_gbs" JSON_DOCLING = "json_docling" AUDIO = "audio" @@ -82,7 +82,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = { InputFormat.CSV: ["csv"], InputFormat.XLSX: ["xlsx", "xlsm"], InputFormat.XML_USPTO: ["xml", "txt"], - InputFormat.XML_METS_GBS: ["tar.gz"], + InputFormat.METS_GBS: ["tar.gz"], InputFormat.JSON_DOCLING: ["json"], InputFormat.AUDIO: ["wav", "mp3"], } @@ -115,7 +115,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = { "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ], InputFormat.XML_USPTO: ["application/xml", "text/plain"], - InputFormat.XML_METS_GBS: ["application/mets+xml"], + InputFormat.METS_GBS: ["application/mets+xml"], InputFormat.JSON_DOCLING: ["application/json"], InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"], } diff --git a/docling/document_converter.py b/docling/document_converter.py index fea14f38..855a5caa 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -157,7 +157,7 @@ def _get_default_option(format: InputFormat) -> FormatOption: InputFormat.XML_JATS: FormatOption( pipeline_cls=SimplePipeline, backend=JatsDocumentBackend ), - InputFormat.XML_METS_GBS: FormatOption( + InputFormat.METS_GBS: FormatOption( pipeline_cls=StandardPdfPipeline, backend=MetsGbsDocumentBackend ), InputFormat.IMAGE: FormatOption(