mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
able to parse the captions and image uri's
Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
parent
1c0a766cc5
commit
b04f14ec24
@ -4,7 +4,12 @@ from io import BytesIO
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Set, Union
|
from typing import Set, Union
|
||||||
|
|
||||||
|
from pydantic import (
|
||||||
|
AnyUrl,
|
||||||
|
)
|
||||||
|
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
|
Size,
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
DocumentOrigin,
|
DocumentOrigin,
|
||||||
@ -190,7 +195,19 @@ class AsciidocBackend(DeclarativeDocumentBackend):
|
|||||||
item = self.parse_picture(line)
|
item = self.parse_picture(line)
|
||||||
print(item)
|
print(item)
|
||||||
|
|
||||||
image = ImageRef(mimetype="image/png", size=[100,100], dpi=70, uri=item["uri"])
|
size = None
|
||||||
|
if "width" in item and "height" in item:
|
||||||
|
size = Size(width=int(item["width"]), height=int(item["height"]))
|
||||||
|
|
||||||
|
uri = None
|
||||||
|
if "uri" in item and not item["uri"].startswith("http") and item["uri"].startswith("//"):
|
||||||
|
uri = "file:"+item["uri"]
|
||||||
|
elif "uri" in item and not item["uri"].startswith("http") and item["uri"].startswith("/"):
|
||||||
|
uri = "file:/"+item["uri"]
|
||||||
|
elif "uri" in item and not item["uri"].startswith("http"):
|
||||||
|
uri = "file://"+item["uri"]
|
||||||
|
|
||||||
|
image = ImageRef(mimetype="image/png", size=size, dpi=70, uri=uri)
|
||||||
doc.add_picture(image=image, caption=caption)
|
doc.add_picture(image=image, caption=caption)
|
||||||
|
|
||||||
# Caption
|
# Caption
|
||||||
|
Loading…
Reference in New Issue
Block a user