mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
Fixed example run_md, added origin info to md_backend
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
1456a36618
commit
8c60dfa0e6
@ -10,6 +10,7 @@ import marko.inline
|
||||
from docling_core.types.doc import (
|
||||
DocItemLabel,
|
||||
DoclingDocument,
|
||||
DocumentOrigin,
|
||||
GroupLabel,
|
||||
TableCell,
|
||||
TableData,
|
||||
@ -235,7 +236,22 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
||||
|
||||
def convert(self) -> DoclingDocument:
|
||||
_log.debug("converting Markdown...")
|
||||
doc = DoclingDocument(name="Test")
|
||||
|
||||
fname = ""
|
||||
if isinstance(self.path_or_stream, Path):
|
||||
fname = self.path_or_stream.name
|
||||
|
||||
origin = DocumentOrigin(
|
||||
filename=fname,
|
||||
mimetype="text/markdown",
|
||||
binary_hash=self.document_hash,
|
||||
)
|
||||
if len(fname) > 0:
|
||||
docname = Path(fname).stem
|
||||
else:
|
||||
docname = "stream"
|
||||
|
||||
doc = DoclingDocument(name=docname, origin=origin)
|
||||
|
||||
if self.is_valid():
|
||||
# Parse the markdown into an abstract syntax tree (AST)
|
||||
|
@ -13,14 +13,7 @@ _log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def main():
|
||||
input_paths = [
|
||||
Path("README.md"),
|
||||
Path("scratch_a/2203.01017v2.md"),
|
||||
Path("scratch_a/2206.01062.md"),
|
||||
Path("scratch_a/2305.03393v1.md"),
|
||||
Path("scratch_a/redp5110.md"),
|
||||
Path("scratch_a/redp5695.md"),
|
||||
]
|
||||
input_paths = [Path("README.md")]
|
||||
|
||||
for path in input_paths:
|
||||
in_doc = InputDocument(
|
||||
|
Loading…
Reference in New Issue
Block a user