From b04f14ec24bb68565a18aaff80e21d19fd37f73e Mon Sep 17 00:00:00 2001 From: Peter Staar Date: Tue, 22 Oct 2024 09:13:08 +0200 Subject: [PATCH] able to parse the captions and image uri's Signed-off-by: Peter Staar --- docling/backend/asciidoc_backend.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/docling/backend/asciidoc_backend.py b/docling/backend/asciidoc_backend.py index 06c4b6d3..2061f54f 100644 --- a/docling/backend/asciidoc_backend.py +++ b/docling/backend/asciidoc_backend.py @@ -4,7 +4,12 @@ from io import BytesIO from pathlib import Path from typing import Set, Union +from pydantic import ( + AnyUrl, +) + from docling_core.types.doc import ( + Size, DocItemLabel, DoclingDocument, DocumentOrigin, @@ -190,7 +195,19 @@ class AsciidocBackend(DeclarativeDocumentBackend): item = self.parse_picture(line) print(item) - image = ImageRef(mimetype="image/png", size=[100,100], dpi=70, uri=item["uri"]) + size = None + if "width" in item and "height" in item: + size = Size(width=int(item["width"]), height=int(item["height"])) + + uri = None + if "uri" in item and not item["uri"].startswith("http") and item["uri"].startswith("//"): + uri = "file:"+item["uri"] + elif "uri" in item and not item["uri"].startswith("http") and item["uri"].startswith("/"): + uri = "file:/"+item["uri"] + elif "uri" in item and not item["uri"].startswith("http"): + uri = "file://"+item["uri"] + + image = ImageRef(mimetype="image/png", size=size, dpi=70, uri=uri) doc.add_picture(image=image, caption=caption) # Caption