mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
Fixed example run_md, added origin info to md_backend
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
1456a36618
commit
8c60dfa0e6
@ -10,6 +10,7 @@ import marko.inline
|
|||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
|
DocumentOrigin,
|
||||||
GroupLabel,
|
GroupLabel,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
@ -235,7 +236,22 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
def convert(self) -> DoclingDocument:
|
def convert(self) -> DoclingDocument:
|
||||||
_log.debug("converting Markdown...")
|
_log.debug("converting Markdown...")
|
||||||
doc = DoclingDocument(name="Test")
|
|
||||||
|
fname = ""
|
||||||
|
if isinstance(self.path_or_stream, Path):
|
||||||
|
fname = self.path_or_stream.name
|
||||||
|
|
||||||
|
origin = DocumentOrigin(
|
||||||
|
filename=fname,
|
||||||
|
mimetype="text/markdown",
|
||||||
|
binary_hash=self.document_hash,
|
||||||
|
)
|
||||||
|
if len(fname) > 0:
|
||||||
|
docname = Path(fname).stem
|
||||||
|
else:
|
||||||
|
docname = "stream"
|
||||||
|
|
||||||
|
doc = DoclingDocument(name=docname, origin=origin)
|
||||||
|
|
||||||
if self.is_valid():
|
if self.is_valid():
|
||||||
# Parse the markdown into an abstract syntax tree (AST)
|
# Parse the markdown into an abstract syntax tree (AST)
|
||||||
|
@ -13,14 +13,7 @@ _log = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
input_paths = [
|
input_paths = [Path("README.md")]
|
||||||
Path("README.md"),
|
|
||||||
Path("scratch_a/2203.01017v2.md"),
|
|
||||||
Path("scratch_a/2206.01062.md"),
|
|
||||||
Path("scratch_a/2305.03393v1.md"),
|
|
||||||
Path("scratch_a/redp5110.md"),
|
|
||||||
Path("scratch_a/redp5695.md"),
|
|
||||||
]
|
|
||||||
|
|
||||||
for path in input_paths:
|
for path in input_paths:
|
||||||
in_doc = InputDocument(
|
in_doc = InputDocument(
|
||||||
|
Loading…
Reference in New Issue
Block a user