mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: add backend for METS with Google Books profile (#1989)
* add backend for METS with Google Books profile Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * Fixes for cell indexing Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * use HTMLParser and add options from CLI Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * fix typing and unloading Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * restore guess format Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * rename inputformat Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * use PdfDocumentBackend Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * use test file from test folder (still missing) Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add test file Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> Signed-off-by: Christoph Auer <cau@zurich.ibm.com> Co-authored-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
@@ -56,6 +56,7 @@ class InputFormat(str, Enum):
|
||||
XLSX = "xlsx"
|
||||
XML_USPTO = "xml_uspto"
|
||||
XML_JATS = "xml_jats"
|
||||
METS_GBS = "mets_gbs"
|
||||
JSON_DOCLING = "json_docling"
|
||||
AUDIO = "audio"
|
||||
|
||||
@@ -81,6 +82,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
||||
InputFormat.CSV: ["csv"],
|
||||
InputFormat.XLSX: ["xlsx", "xlsm"],
|
||||
InputFormat.XML_USPTO: ["xml", "txt"],
|
||||
InputFormat.METS_GBS: ["tar.gz"],
|
||||
InputFormat.JSON_DOCLING: ["json"],
|
||||
InputFormat.AUDIO: ["wav", "mp3"],
|
||||
}
|
||||
@@ -113,6 +115,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
],
|
||||
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
||||
InputFormat.METS_GBS: ["application/mets+xml"],
|
||||
InputFormat.JSON_DOCLING: ["application/json"],
|
||||
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user