mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
rename inputformat
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
79c59cb2b0
commit
46b904e059
@ -388,7 +388,7 @@ class MetsGbsDocumentBackend(PaginatedDocumentBackend):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def supported_formats(cls) -> Set[InputFormat]:
|
def supported_formats(cls) -> Set[InputFormat]:
|
||||||
return {InputFormat.XML_METS_GBS}
|
return {InputFormat.METS_GBS}
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def supports_pagination(cls) -> bool:
|
def supports_pagination(cls) -> bool:
|
||||||
|
@ -613,7 +613,7 @@ def convert( # noqa: C901
|
|||||||
format_options = {
|
format_options = {
|
||||||
InputFormat.PDF: pdf_format_option,
|
InputFormat.PDF: pdf_format_option,
|
||||||
InputFormat.IMAGE: pdf_format_option,
|
InputFormat.IMAGE: pdf_format_option,
|
||||||
InputFormat.XML_METS_GBS: mets_gbs_format_option,
|
InputFormat.METS_GBS: mets_gbs_format_option,
|
||||||
}
|
}
|
||||||
|
|
||||||
elif pipeline == ProcessingPipeline.VLM:
|
elif pipeline == ProcessingPipeline.VLM:
|
||||||
|
@ -56,7 +56,7 @@ class InputFormat(str, Enum):
|
|||||||
XLSX = "xlsx"
|
XLSX = "xlsx"
|
||||||
XML_USPTO = "xml_uspto"
|
XML_USPTO = "xml_uspto"
|
||||||
XML_JATS = "xml_jats"
|
XML_JATS = "xml_jats"
|
||||||
XML_METS_GBS = "xml_mets_gbs"
|
METS_GBS = "xml_mets_gbs"
|
||||||
JSON_DOCLING = "json_docling"
|
JSON_DOCLING = "json_docling"
|
||||||
AUDIO = "audio"
|
AUDIO = "audio"
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
|||||||
InputFormat.CSV: ["csv"],
|
InputFormat.CSV: ["csv"],
|
||||||
InputFormat.XLSX: ["xlsx", "xlsm"],
|
InputFormat.XLSX: ["xlsx", "xlsm"],
|
||||||
InputFormat.XML_USPTO: ["xml", "txt"],
|
InputFormat.XML_USPTO: ["xml", "txt"],
|
||||||
InputFormat.XML_METS_GBS: ["tar.gz"],
|
InputFormat.METS_GBS: ["tar.gz"],
|
||||||
InputFormat.JSON_DOCLING: ["json"],
|
InputFormat.JSON_DOCLING: ["json"],
|
||||||
InputFormat.AUDIO: ["wav", "mp3"],
|
InputFormat.AUDIO: ["wav", "mp3"],
|
||||||
}
|
}
|
||||||
@ -115,7 +115,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
|||||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||||
],
|
],
|
||||||
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
||||||
InputFormat.XML_METS_GBS: ["application/mets+xml"],
|
InputFormat.METS_GBS: ["application/mets+xml"],
|
||||||
InputFormat.JSON_DOCLING: ["application/json"],
|
InputFormat.JSON_DOCLING: ["application/json"],
|
||||||
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
|
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
|
||||||
}
|
}
|
||||||
|
@ -157,7 +157,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
|||||||
InputFormat.XML_JATS: FormatOption(
|
InputFormat.XML_JATS: FormatOption(
|
||||||
pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
|
pipeline_cls=SimplePipeline, backend=JatsDocumentBackend
|
||||||
),
|
),
|
||||||
InputFormat.XML_METS_GBS: FormatOption(
|
InputFormat.METS_GBS: FormatOption(
|
||||||
pipeline_cls=StandardPdfPipeline, backend=MetsGbsDocumentBackend
|
pipeline_cls=StandardPdfPipeline, backend=MetsGbsDocumentBackend
|
||||||
),
|
),
|
||||||
InputFormat.IMAGE: FormatOption(
|
InputFormat.IMAGE: FormatOption(
|
||||||
|
Loading…
Reference in New Issue
Block a user