chore: switch to docling-core-provided MD export

Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com>
This commit is contained in:
Panos Vagenas 2024-07-18 11:07:34 +02:00
parent f09ffcc8f4
commit cbf92a6c93
3 changed files with 7 additions and 10 deletions

View File

@ -3,7 +3,6 @@ from io import BytesIO
from pathlib import Path, PurePath from pathlib import Path, PurePath
from typing import ClassVar, Dict, Iterable, List, Optional, Type, Union from typing import ClassVar, Dict, Iterable, List, Optional, Type, Union
from deepsearch.documents.core.export import export_to_markdown
from docling_core.types import BaseCell, BaseText from docling_core.types import BaseCell, BaseText
from docling_core.types import BoundingBox as DsBoundingBox from docling_core.types import BoundingBox as DsBoundingBox
from docling_core.types import Document as DsDocument from docling_core.types import Document as DsDocument
@ -299,9 +298,7 @@ class ConvertedDocument(BaseModel):
def render_as_markdown(self): def render_as_markdown(self):
if self.output: if self.output:
return export_to_markdown( return self.output.export_to_markdown()
self.output.model_dump(by_alias=True, exclude_none=True)
)
else: else:
return "" return ""

9
poetry.lock generated
View File

@ -707,13 +707,13 @@ files = [
[[package]] [[package]]
name = "docling-core" name = "docling-core"
version = "0.2.0" version = "1.1.0"
description = "A python library to define and validate data types in Docling." description = "A python library to define and validate data types in Docling."
optional = false optional = false
python-versions = "<4.0,>=3.9" python-versions = "<4.0,>=3.9"
files = [ files = [
{file = "docling_core-0.2.0-py3-none-any.whl", hash = "sha256:e8ff3af4f13a3a1709e2d0be8f1a258bfa71a820a70acab1a3b41f9c10e428b5"}, {file = "docling_core-1.1.0-py3-none-any.whl", hash = "sha256:80096ec6bbce9e616700ccd6bdd5a50e5d1a9a832d7968da3874d54b29962536"},
{file = "docling_core-0.2.0.tar.gz", hash = "sha256:3fa8920d12ce5cf687fc0c43b103a6c3a54a53f9eecdde9fad5dc5f0a0c76d6d"}, {file = "docling_core-1.1.0.tar.gz", hash = "sha256:69bc83d3b192d9e56bb91d77d8434d9fc109f8cb25ab5a285d2f3bccc10899cb"},
] ]
[package.dependencies] [package.dependencies]
@ -723,6 +723,7 @@ jsonschema = ">=4.16.0,<5.0.0"
poetry = ">=1.8.3,<2.0.0" poetry = ">=1.8.3,<2.0.0"
pydantic = ">=2.6.0,<3.0.0" pydantic = ">=2.6.0,<3.0.0"
pyproject-toml = ">=0.0.10,<0.0.11" pyproject-toml = ">=0.0.10,<0.0.11"
tabulate = ">=0.9.0,<0.10.0"
[[package]] [[package]]
name = "docling-ibm-models" name = "docling-ibm-models"
@ -4827,4 +4828,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.11" python-versions = "^3.11"
content-hash = "dc19329559f190dfe687b4ee272eb6dac66b3d9fe0398c95c2572e8c63fa23ac" content-hash = "a2de0e26ae072cae8ca01360b4242695427ae2493e860a0bc9e8c11d2d4cf58e"

View File

@ -23,10 +23,9 @@ packages = [{include = "docling"}]
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.11" python = "^3.11"
pydantic = "^2.0.0" pydantic = "^2.0.0"
docling-core = "^0.2.0" docling-core = "^1.1.0"
docling-ibm-models = "^0.2.0" docling-ibm-models = "^0.2.0"
deepsearch-glm = ">=0.19.0,<1" deepsearch-glm = ">=0.19.0,<1"
deepsearch-toolkit = ">=0.47.0,<1"
filetype = "^1.2.0" filetype = "^1.2.0"
pypdfium2 = "^4.30.0" pypdfium2 = "^4.30.0"
pydantic-settings = "^2.3.0" pydantic-settings = "^2.3.0"