mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
rename inputformat
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
79c59cb2b0
commit
46b904e059
@ -388,7 +388,7 @@ class MetsGbsDocumentBackend(PaginatedDocumentBackend):
|
||||
|
||||
@classmethod
|
||||
def supported_formats(cls) -> Set[InputFormat]:
|
||||
return {InputFormat.XML_METS_GBS}
|
||||
return {InputFormat.METS_GBS}
|
||||
|
||||
@classmethod
|
||||
def supports_pagination(cls) -> bool:
|
||||
|
@ -613,7 +613,7 @@ def convert( # noqa: C901
|
||||
format_options = {
|
||||
InputFormat.PDF: pdf_format_option,
|
||||
InputFormat.IMAGE: pdf_format_option,
|
||||
InputFormat.XML_METS_GBS: mets_gbs_format_option,
|
||||
InputFormat.METS_GBS: mets_gbs_format_option,
|
||||
}
|
||||
|
||||
elif pipeline == ProcessingPipeline.VLM:
|
||||
|
@ -56,7 +56,7 @@ class InputFormat(str, Enum):
|
||||
XLSX = "xlsx"
|
||||
XML_USPTO = "xml_uspto"
|
||||
XML_JATS = "xml_jats"
|
||||
XML_METS_GBS = "xml_mets_gbs"
|
||||
METS_GBS = "xml_mets_gbs"
|
||||
JSON_DOCLING = "json_docling"
|
||||
AUDIO = "audio"
|
||||
|
||||
@ -82,7 +82,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
||||
InputFormat.CSV: ["csv"],
|
||||
InputFormat.XLSX: ["xlsx", "xlsm"],
|
||||
InputFormat.XML_USPTO: ["xml", "txt"],
|
||||
InputFormat.XML_METS_GBS: ["tar.gz"],
|
||||
InputFormat.METS_GBS: ["tar.gz"],
|
||||
InputFormat.JSON_DOCLING: ["json"],
|
||||
InputFormat.AUDIO: ["wav", "mp3"],
|
||||
}
|
||||
@ -115,7 +115,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
],
|
||||
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
||||
InputFormat.XML_METS_GBS: ["application/mets+xml"],
|
||||
InputFormat.METS_GBS: ["application/mets+xml"],
|
||||
InputFormat.JSON_DOCLING: ["application/json"],
|
||||
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
|
||||
}
|
||||
|
@ -157,7 +157,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
||||
InputFormat.XML_JATS: FormatOption(
|
||||
pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
|
||||
),
|
||||
InputFormat.XML_METS_GBS: FormatOption(
|
||||
InputFormat.METS_GBS: FormatOption(
|
||||
pipeline_cls=StandardPdfPipeline, backend=MetsGbsDocumentBackend
|
||||
),
|
||||
InputFormat.IMAGE: FormatOption(
|
||||
|
Loading…
Reference in New Issue
Block a user