From fa6b7eeec474c4a7ad1223577b0dd7eec2c6c832 Mon Sep 17 00:00:00 2001 From: Christoph Auer Date: Thu, 20 Feb 2025 12:00:55 +0100 Subject: [PATCH] Push final lockfile Signed-off-by: Christoph Auer --- docling/models/readingorder_model.py | 60 ---------------------------- poetry.lock | 38 ++++++++---------- pyproject.toml | 2 +- 3 files changed, 18 insertions(+), 82 deletions(-) diff --git a/docling/models/readingorder_model.py b/docling/models/readingorder_model.py index 1c8d0b57..23bd26af 100644 --- a/docling/models/readingorder_model.py +++ b/docling/models/readingorder_model.py @@ -378,64 +378,4 @@ class ReadingOrderModel: el_merges_mapping, ) - # DEBUG code: - def draw_clusters_and_cells(ds_document, page_no, show: bool = False): - clusters_to_draw = [] - image = copy.deepcopy(conv_res.pages[page_no].image) - for ix, elem in enumerate(ds_document.main_text): - if isinstance(elem, BaseText): - prov = elem.prov[0] # type: ignore - elif isinstance(elem, Ref): - _, arr, index = elem.ref.split("/") - index = int(index) # type: ignore - if arr == "tables": - prov = ds_document.tables[index].prov[0] - elif arr == "figures": - prov = ds_document.pictures[index].prov[0] - else: - prov = None - - if prov and prov.page == page_no: - clusters_to_draw.append( - Cluster( - id=ix, - label=elem.name, - bbox=BoundingBox.from_tuple( - coord=prov.bbox, # type: ignore - origin=CoordOrigin.BOTTOMLEFT, - ).to_top_left_origin(conv_res.pages[page_no].size.height), - ) - ) - - draw = ImageDraw.Draw(image) - for c in clusters_to_draw: - x0, y0, x1, y1 = c.bbox.as_tuple() - draw.rectangle([(x0, y0), (x1, y1)], outline="red") - draw.text((x0 + 2, y0 + 2), f"{c.id}:{c.label}", fill=(255, 0, 0, 255)) - - cell_color = ( - random.randint(30, 140), - random.randint(30, 140), - random.randint(30, 140), - ) - for tc in c.cells: # [:1]: - x0, y0, x1, y1 = tc.bbox.as_tuple() - draw.rectangle([(x0, y0), (x1, y1)], outline=cell_color) - - if show: - image.show() - else: - out_path: Path = ( - Path(settings.debug.debug_output_path) - / f"debug_{conv_res.input.file.stem}" - ) - out_path.mkdir(parents=True, exist_ok=True) - - out_file = out_path / f"doc_page_{page_no:05}.png" - image.save(str(out_file), format="png") - - # for item in ds_doc.page_dimensions: - # page_no = item.page - # draw_clusters_and_cells(ds_doc, page_no) - return docling_doc diff --git a/poetry.lock b/poetry.lock index 1b72fc53..213d6d8f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -849,39 +849,35 @@ chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"] [[package]] name = "docling-ibm-models" -version = "3.3.2" +version = "3.4.0" description = "This package contains the AI models used by the Docling PDF conversion package" optional = false -python-versions = "^3.9" -files = [] -develop = false +python-versions = "<4.0,>=3.9" +files = [ + {file = "docling_ibm_models-3.4.0-py3-none-any.whl", hash = "sha256:186517ff1f76e76113600fa1e5a699927325081a8013fdd5d0551121c2e34190"}, + {file = "docling_ibm_models-3.4.0.tar.gz", hash = "sha256:fb79beeb07d1bb9bc8acf9d0a44643cd7ce1910aa418cd685e2e477b13eeafee"}, +] [package.dependencies] -docling-core = "^2.19.0" +docling-core = ">=2.19.0,<3.0.0" huggingface_hub = ">=0.23,<1" -jsonlines = "^3.1.0" +jsonlines = ">=3.1.0,<4.0.0" numpy = [ {version = ">=1.24.4,<3.0.0", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""}, {version = ">=1.24.4,<2.0.0", markers = "sys_platform == \"darwin\" and platform_machine == \"x86_64\""}, ] -opencv-python-headless = "^4.6.0.66" +opencv-python-headless = ">=4.6.0.66,<5.0.0.0" Pillow = ">=10.0.0,<12.0.0" -pydantic = "^2.0.0" +pydantic = ">=2.0.0,<3.0.0" safetensors = {version = ">=0.4.3,<1", extras = ["torch"]} -torch = "^2.2.2" -torchvision = "^0" -tqdm = "^4.64.0" +torch = ">=2.2.2,<3.0.0" +torchvision = ">=0,<1" +tqdm = ">=4.64.0,<5.0.0" transformers = [ {version = ">=4.42.0,<5.0.0", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""}, {version = ">=4.42.0,<4.43.0", markers = "sys_platform == \"darwin\" and platform_machine == \"x86_64\""}, ] -[package.source] -type = "git" -url = "https://github.com/DS4SD/docling-ibm-models.git" -reference = "dev/add-reading-order" -resolved_reference = "fe9794d472459e17d36bfbb0b247e77b29b80349" - [[package]] name = "docling-parse" version = "3.4.0" @@ -1455,13 +1451,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "huggingface-hub" -version = "0.29.0" +version = "0.29.1" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.29.0-py3-none-any.whl", hash = "sha256:c02daa0b6bafbdacb1320fdfd1dc7151d0940825c88c4ef89837fdb1f6ea0afe"}, - {file = "huggingface_hub-0.29.0.tar.gz", hash = "sha256:64034c852be270cac16c5743fe1f659b14515a9de6342d6f42cbb2ede191fc80"}, + {file = "huggingface_hub-0.29.1-py3-none-any.whl", hash = "sha256:352f69caf16566c7b6de84b54a822f6238e17ddd8ae3da4f8f2272aea5b198d5"}, + {file = "huggingface_hub-0.29.1.tar.gz", hash = "sha256:9524eae42077b8ff4fc459ceb7a514eca1c1232b775276b009709fe2a084f250"}, ] [package.dependencies] @@ -7790,4 +7786,4 @@ vlm = ["transformers", "transformers"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "3f657e7af78058e75dfb9f32e373f7f70e5e68a42a5b3603189e2251be90f349" +content-hash = "21c25b86d88aa138f7faa68fcba95af1d9a5edaa22550bd325997aed6eef44fe" diff --git a/pyproject.toml b/pyproject.toml index 9118cdcc..58deac1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ packages = [{include = "docling"}] python = "^3.9" pydantic = "^2.0.0" docling-core = {extras = ["chunking"], version = "^2.19.0"} -docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "dev/add-reading-order"} +docling-ibm-models = "^3.4.0" docling-parse = "^3.3.0" filetype = "^1.2.0" pypdfium2 = "^4.30.0"