rename inputformat

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2025-07-25 15:16:41 +02:00
parent 79c59cb2b0
commit 46b904e059
4 changed files with 6 additions and 6 deletions

View File

@ -388,7 +388,7 @@ class MetsGbsDocumentBackend(PaginatedDocumentBackend):
@classmethod
def supported_formats(cls) -> Set[InputFormat]:
return {InputFormat.XML_METS_GBS}
return {InputFormat.METS_GBS}
@classmethod
def supports_pagination(cls) -> bool:

View File

@ -613,7 +613,7 @@ def convert( # noqa: C901
format_options = {
InputFormat.PDF: pdf_format_option,
InputFormat.IMAGE: pdf_format_option,
InputFormat.XML_METS_GBS: mets_gbs_format_option,
InputFormat.METS_GBS: mets_gbs_format_option,
}
elif pipeline == ProcessingPipeline.VLM:

View File

@ -56,7 +56,7 @@ class InputFormat(str, Enum):
XLSX = "xlsx"
XML_USPTO = "xml_uspto"
XML_JATS = "xml_jats"
XML_METS_GBS = "xml_mets_gbs"
METS_GBS = "xml_mets_gbs"
JSON_DOCLING = "json_docling"
AUDIO = "audio"
@ -82,7 +82,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
InputFormat.CSV: ["csv"],
InputFormat.XLSX: ["xlsx", "xlsm"],
InputFormat.XML_USPTO: ["xml", "txt"],
InputFormat.XML_METS_GBS: ["tar.gz"],
InputFormat.METS_GBS: ["tar.gz"],
InputFormat.JSON_DOCLING: ["json"],
InputFormat.AUDIO: ["wav", "mp3"],
}
@ -115,7 +115,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
],
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
InputFormat.XML_METS_GBS: ["application/mets+xml"],
InputFormat.METS_GBS: ["application/mets+xml"],
InputFormat.JSON_DOCLING: ["application/json"],
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
}

View File

@ -157,7 +157,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
InputFormat.XML_JATS: FormatOption(
pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
),
InputFormat.XML_METS_GBS: FormatOption(
InputFormat.METS_GBS: FormatOption(
pipeline_cls=StandardPdfPipeline, backend=MetsGbsDocumentBackend
),
InputFormat.IMAGE: FormatOption(