mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: add Docling JSON ingestion (#783)
* feat: add Docling JSON ingestion Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> * update conversion as per review comments, add tests, revert Docling JSON disambiguation, document intricacies Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> * Update docling/backend/json/docling_json_backend.py Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com> Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --------- Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Co-authored-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
@@ -41,6 +41,7 @@ class InputFormat(str, Enum):
|
||||
MD = "md"
|
||||
XLSX = "xlsx"
|
||||
XML_USPTO = "xml_uspto"
|
||||
JSON_DOCLING = "json_docling"
|
||||
|
||||
|
||||
class OutputFormat(str, Enum):
|
||||
@@ -62,6 +63,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
||||
InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
|
||||
InputFormat.XLSX: ["xlsx"],
|
||||
InputFormat.XML_USPTO: ["xml", "txt"],
|
||||
InputFormat.JSON_DOCLING: ["json"],
|
||||
}
|
||||
|
||||
FormatToMimeType: Dict[InputFormat, List[str]] = {
|
||||
@@ -90,6 +92,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
],
|
||||
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
||||
InputFormat.JSON_DOCLING: ["application/json"],
|
||||
}
|
||||
|
||||
MimeTypeToFormat: dict[str, list[InputFormat]] = {
|
||||
|
||||
Reference in New Issue
Block a user