mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Import statement updates from docling-core
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
8710506072
commit
dac82ca7f2
@ -3,7 +3,7 @@ from io import BytesIO
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Set, Union
|
from typing import TYPE_CHECKING, Set, Union
|
||||||
|
|
||||||
from docling_core.types.experimental import DoclingDocument
|
from docling_core.types.doc import DoclingDocument
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
@ -5,7 +5,7 @@ from pathlib import Path
|
|||||||
from typing import Iterable, List, Optional, Union
|
from typing import Iterable, List, Optional, Union
|
||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin, Size
|
from docling_core.types.doc import BoundingBox, CoordOrigin, Size
|
||||||
from docling_parse.docling_parse import pdf_parser
|
from docling_parse.docling_parse import pdf_parser
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from pypdfium2 import PdfPage
|
from pypdfium2 import PdfPage
|
||||||
|
@ -4,14 +4,14 @@ from pathlib import Path
|
|||||||
from typing import Set, Union
|
from typing import Set, Union
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from docling_core.types.experimental import (
|
from docling_core.types.doc import (
|
||||||
DescriptionItem,
|
DescriptionItem,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
PictureData,
|
PictureData,
|
||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
from docling_core.types.experimental.labels import DocItemLabel, GroupLabel
|
from docling_core.types.doc.labels import DocItemLabel, GroupLabel
|
||||||
|
|
||||||
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
@ -3,7 +3,7 @@ from io import BytesIO
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Set, Union
|
from typing import Set, Union
|
||||||
|
|
||||||
from docling_core.types.experimental import (
|
from docling_core.types.doc import (
|
||||||
DescriptionItem,
|
DescriptionItem,
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
@ -14,7 +14,7 @@ from docling_core.types.experimental import (
|
|||||||
TableCell,
|
TableCell,
|
||||||
TableData,
|
TableData,
|
||||||
)
|
)
|
||||||
from docling_core.types.experimental.base import BoundingBox, CoordOrigin, Size
|
from docling_core.types.doc.base import BoundingBox, CoordOrigin, Size
|
||||||
from pptx import Presentation
|
from pptx import Presentation
|
||||||
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
|
from pptx.enum.shapes import MSO_SHAPE_TYPE, PP_PLACEHOLDER
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ from pathlib import Path
|
|||||||
from typing import Set, Union
|
from typing import Set, Union
|
||||||
|
|
||||||
import docx
|
import docx
|
||||||
from docling_core.types.experimental import (
|
from docling_core.types.doc import (
|
||||||
DescriptionItem,
|
DescriptionItem,
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
DoclingDocument,
|
DoclingDocument,
|
||||||
|
@ -2,8 +2,8 @@ from abc import ABC, abstractmethod
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Iterable, Optional, Set, Union
|
from typing import Iterable, Optional, Set, Union
|
||||||
|
|
||||||
from docling_core.types.doc.doc_ocr import Path
|
from docling_core.types.doc import BoundingBox, Size
|
||||||
from docling_core.types.experimental import BoundingBox, Size
|
from docling_core.types.legacy_doc.doc_ocr import Path
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from docling.backend.abstract_backend import PaginatedDocumentBackend
|
from docling.backend.abstract_backend import PaginatedDocumentBackend
|
||||||
|
@ -6,7 +6,7 @@ from typing import Iterable, List, Optional, Union
|
|||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
import pypdfium2.raw as pdfium_c
|
import pypdfium2.raw as pdfium_c
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin, Size
|
from docling_core.types.doc import BoundingBox, CoordOrigin, Size
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from pypdfium2 import PdfTextPage
|
from pypdfium2 import PdfTextPage
|
||||||
from pypdfium2._helpers.misc import PdfiumError
|
from pypdfium2._helpers.misc import PdfiumError
|
||||||
|
@ -137,10 +137,10 @@ def convert(
|
|||||||
from_formats: List[InputFormat] = typer.Option(
|
from_formats: List[InputFormat] = typer.Option(
|
||||||
None,
|
None,
|
||||||
"--from",
|
"--from",
|
||||||
help="Specify input formats " "to convert from. Defaults to all formats.",
|
help="Specify input formats to convert from. Defaults to all formats.",
|
||||||
),
|
),
|
||||||
to_formats: List[OutputFormat] = typer.Option(
|
to_formats: List[OutputFormat] = typer.Option(
|
||||||
None, "--to", help="Specify output formats. " "Defaults to Markdown."
|
None, "--to", help="Specify output formats. Defaults to Markdown."
|
||||||
),
|
),
|
||||||
ocr: Annotated[
|
ocr: Annotated[
|
||||||
bool,
|
bool,
|
||||||
@ -148,9 +148,6 @@ def convert(
|
|||||||
..., help="If enabled, the bitmap content will be processed using OCR."
|
..., help="If enabled, the bitmap content will be processed using OCR."
|
||||||
),
|
),
|
||||||
] = True,
|
] = True,
|
||||||
# backend: Annotated[
|
|
||||||
# Backend, typer.Option(..., help="The PDF backend to use.")
|
|
||||||
# ] = Backend.DOCLING,
|
|
||||||
ocr_engine: Annotated[
|
ocr_engine: Annotated[
|
||||||
OcrEngine, typer.Option(..., help="The OCR engine to use.")
|
OcrEngine, typer.Option(..., help="The OCR engine to use.")
|
||||||
] = OcrEngine.EASYOCR,
|
] = OcrEngine.EASYOCR,
|
||||||
@ -196,16 +193,6 @@ def convert(
|
|||||||
export_txt = OutputFormat.TEXT in to_formats
|
export_txt = OutputFormat.TEXT in to_formats
|
||||||
export_doctags = OutputFormat.DOCTAGS in to_formats
|
export_doctags = OutputFormat.DOCTAGS in to_formats
|
||||||
|
|
||||||
# match backend:
|
|
||||||
# case Backend.PYPDFIUM2:
|
|
||||||
# do_cell_matching = ocr # only do cell matching when OCR enabled
|
|
||||||
# pdf_backend = PyPdfiumDocumentBackend
|
|
||||||
# case Backend.DOCLING:
|
|
||||||
# do_cell_matching = True
|
|
||||||
# pdf_backend = DoclingParseDocumentBackend
|
|
||||||
# case _:
|
|
||||||
# raise RuntimeError(f"Unexpected backend type {backend}")
|
|
||||||
|
|
||||||
match ocr_engine:
|
match ocr_engine:
|
||||||
case OcrEngine.EASYOCR:
|
case OcrEngine.EASYOCR:
|
||||||
ocr_options = EasyOcrOptions()
|
ocr_options = EasyOcrOptions()
|
||||||
|
@ -2,9 +2,9 @@ from enum import Enum, auto
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
|
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Union
|
||||||
|
|
||||||
from docling_core.types.experimental import BoundingBox, Size
|
from docling_core.types.doc import BoundingBox, Size
|
||||||
from docling_core.types.experimental.document import PictureData, TableCell
|
from docling_core.types.doc.document import PictureData, TableCell
|
||||||
from docling_core.types.experimental.labels import DocItemLabel
|
from docling_core.types.doc.labels import DocItemLabel
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
|
@ -12,9 +12,7 @@ from docling_core.types import DocumentDescription as DsDocumentDescription
|
|||||||
from docling_core.types import FileInfoObject as DsFileInfoObject
|
from docling_core.types import FileInfoObject as DsFileInfoObject
|
||||||
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
||||||
from docling_core.types import Table as DsSchemaTable
|
from docling_core.types import Table as DsSchemaTable
|
||||||
from docling_core.types.doc.base import BoundingBox as DsBoundingBox
|
from docling_core.types.doc import (
|
||||||
from docling_core.types.doc.base import Figure, GlmTableCell, TableCell
|
|
||||||
from docling_core.types.experimental import (
|
|
||||||
DescriptionItem,
|
DescriptionItem,
|
||||||
DocItem,
|
DocItem,
|
||||||
DocItemLabel,
|
DocItemLabel,
|
||||||
@ -24,7 +22,9 @@ from docling_core.types.experimental import (
|
|||||||
TableItem,
|
TableItem,
|
||||||
TextItem,
|
TextItem,
|
||||||
)
|
)
|
||||||
from docling_core.types.experimental.document import ListItem
|
from docling_core.types.doc.document import ListItem
|
||||||
|
from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox
|
||||||
|
from docling_core.types.legacy_doc.base import Figure, GlmTableCell, TableCell
|
||||||
from docling_core.utils.file import resolve_file_source
|
from docling_core.utils.file import resolve_file_source
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing_extensions import deprecated
|
from typing_extensions import deprecated
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Iterable
|
from typing import Any, Iterable
|
||||||
|
|
||||||
from docling_core.types.experimental import DoclingDocument, NodeItem
|
from docling_core.types.doc import DoclingDocument, NodeItem
|
||||||
|
|
||||||
from docling.datamodel.base_models import Page
|
from docling.datamodel.base_models import Page
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ from abc import abstractmethod
|
|||||||
from typing import Iterable, List, Tuple
|
from typing import Iterable, List, Tuple
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from PIL import Image, ImageDraw
|
from PIL import Image, ImageDraw
|
||||||
from rtree import index
|
from rtree import index
|
||||||
from scipy.ndimage import find_objects, label
|
from scipy.ndimage import find_objects, label
|
||||||
|
@ -15,10 +15,10 @@ from docling_core.types import DocumentDescription as DsDocumentDescription
|
|||||||
from docling_core.types import FileInfoObject as DsFileInfoObject
|
from docling_core.types import FileInfoObject as DsFileInfoObject
|
||||||
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
||||||
from docling_core.types import Table as DsSchemaTable
|
from docling_core.types import Table as DsSchemaTable
|
||||||
from docling_core.types.doc.base import BoundingBox as DsBoundingBox
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.doc.base import Figure, TableCell
|
from docling_core.types.doc.document import DoclingDocument
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox
|
||||||
from docling_core.types.experimental.document import DoclingDocument
|
from docling_core.types.legacy_doc.base import Figure, TableCell
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
|
@ -1,10 +1,7 @@
|
|||||||
from typing import Any, Iterable
|
from typing import Any, Iterable
|
||||||
|
|
||||||
from docling_core.types.experimental import DoclingDocument, NodeItem
|
from docling_core.types.doc import DoclingDocument, NodeItem
|
||||||
from docling_core.types.experimental.document import (
|
from docling_core.types.doc.document import PictureClassificationData, PictureItem
|
||||||
PictureClassificationData,
|
|
||||||
PictureItem,
|
|
||||||
)
|
|
||||||
|
|
||||||
from docling.models.base_model import BaseEnrichmentModel
|
from docling.models.base_model import BaseEnrichmentModel
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ import logging
|
|||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
|
|
||||||
from docling.datamodel.base_models import OcrCell, Page
|
from docling.datamodel.base_models import OcrCell, Page
|
||||||
from docling.datamodel.pipeline_options import EasyOcrOptions
|
from docling.datamodel.pipeline_options import EasyOcrOptions
|
||||||
|
@ -5,8 +5,8 @@ import time
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
from docling_core.types.experimental import CoordOrigin
|
from docling_core.types.doc import CoordOrigin
|
||||||
from docling_core.types.experimental.labels import DocItemLabel
|
from docling_core.types.doc.labels import DocItemLabel
|
||||||
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
from docling_ibm_models.layoutmodel.layout_predictor import LayoutPredictor
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
|
||||||
|
@ -3,9 +3,9 @@ from pathlib import Path
|
|||||||
from typing import Iterable, List
|
from typing import Iterable, List
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
from docling_core.types.experimental import BoundingBox
|
from docling_core.types.doc import BoundingBox
|
||||||
from docling_core.types.experimental.document import TableCell
|
from docling_core.types.doc.document import TableCell
|
||||||
from docling_core.types.experimental.labels import DocItemLabel
|
from docling_core.types.doc.labels import DocItemLabel
|
||||||
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
from docling_ibm_models.tableformer.data_management.tf_predictor import TFPredictor
|
||||||
from PIL import ImageDraw
|
from PIL import ImageDraw
|
||||||
|
|
||||||
|
@ -5,7 +5,7 @@ from subprocess import DEVNULL, PIPE, Popen
|
|||||||
from typing import Iterable, Tuple
|
from typing import Iterable, Tuple
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
|
|
||||||
from docling.datamodel.base_models import OcrCell, Page
|
from docling.datamodel.base_models import OcrCell, Page
|
||||||
from docling.datamodel.pipeline_options import TesseractCliOcrOptions
|
from docling.datamodel.pipeline_options import TesseractCliOcrOptions
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
|
|
||||||
from docling.datamodel.base_models import OcrCell, Page
|
from docling.datamodel.base_models import OcrCell, Page
|
||||||
from docling.datamodel.pipeline_options import TesseractCliOcrOptions
|
from docling.datamodel.pipeline_options import TesseractCliOcrOptions
|
||||||
|
@ -5,7 +5,7 @@ import traceback
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Callable, Iterable, List
|
from typing import Callable, Iterable, List
|
||||||
|
|
||||||
from docling_core.types.experimental import DoclingDocument, NodeItem
|
from docling_core.types.doc import DoclingDocument, NodeItem
|
||||||
|
|
||||||
from docling.backend.abstract_backend import AbstractDocumentBackend
|
from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, Dict, Iterable, List, Tuple, Union
|
from typing import Any, Dict, Iterable, List, Tuple, Union
|
||||||
|
|
||||||
from docling_core.types.doc.base import BaseCell, BaseText, Ref, Table
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
from docling_core.types.legacy_doc.base import BaseCell, BaseText, Ref, Table
|
||||||
|
|
||||||
from docling.datamodel.base_models import OcrCell
|
from docling.datamodel.base_models import OcrCell
|
||||||
from docling.datamodel.document import ConversionResult, Page
|
from docling.datamodel.document import ConversionResult, Page
|
||||||
|
@ -2,7 +2,7 @@ import copy
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
from docling_core.types.experimental.labels import DocItemLabel
|
from docling_core.types.doc.labels import DocItemLabel
|
||||||
|
|
||||||
logger = logging.getLogger("layout_utils")
|
logger = logging.getLogger("layout_utils")
|
||||||
|
|
||||||
|
@ -32,23 +32,23 @@ def export_documents(
|
|||||||
doc_filename = conv_res.input.file.stem
|
doc_filename = conv_res.input.file.stem
|
||||||
|
|
||||||
if USE_V2:
|
if USE_V2:
|
||||||
# Export Docling document format to JSON (experimental):
|
# Export Docling document format to JSON:
|
||||||
with (output_dir / f"{doc_filename}.json").open("w") as fp:
|
with (output_dir / f"{doc_filename}.json").open("w") as fp:
|
||||||
fp.write(json.dumps(conv_res.document.export_to_dict()))
|
fp.write(json.dumps(conv_res.document.export_to_dict()))
|
||||||
|
|
||||||
# Export Docling document format to YAML (experimental):
|
# Export Docling document format to YAML:
|
||||||
with (output_dir / f"{doc_filename}.yaml").open("w") as fp:
|
with (output_dir / f"{doc_filename}.yaml").open("w") as fp:
|
||||||
fp.write(yaml.safe_dump(conv_res.document.export_to_dict()))
|
fp.write(yaml.safe_dump(conv_res.document.export_to_dict()))
|
||||||
|
|
||||||
# Export Docling document format to doctags (experimental):
|
# Export Docling document format to doctags:
|
||||||
with (output_dir / f"{doc_filename}.doctags.txt").open("w") as fp:
|
with (output_dir / f"{doc_filename}.doctags.txt").open("w") as fp:
|
||||||
fp.write(conv_res.document.export_to_document_tokens())
|
fp.write(conv_res.document.export_to_document_tokens())
|
||||||
|
|
||||||
# Export Docling document format to markdown (experimental):
|
# Export Docling document format to markdown:
|
||||||
with (output_dir / f"{doc_filename}.md").open("w") as fp:
|
with (output_dir / f"{doc_filename}.md").open("w") as fp:
|
||||||
fp.write(conv_res.document.export_to_markdown())
|
fp.write(conv_res.document.export_to_markdown())
|
||||||
|
|
||||||
# Export Docling document format to text (experimental):
|
# Export Docling document format to text:
|
||||||
with (output_dir / f"{doc_filename}.txt").open("w") as fp:
|
with (output_dir / f"{doc_filename}.txt").open("w") as fp:
|
||||||
fp.write(conv_res.document.export_to_markdown(strict_text=True))
|
fp.write(conv_res.document.export_to_markdown(strict_text=True))
|
||||||
|
|
||||||
|
@ -58,8 +58,8 @@ for res in conv_results:
|
|||||||
f"Document {res.input.file.name} converted."
|
f"Document {res.input.file.name} converted."
|
||||||
f"\nSaved markdown output to: {str(out_path)}"
|
f"\nSaved markdown output to: {str(out_path)}"
|
||||||
)
|
)
|
||||||
# print(res.experimental.export_to_markdown())
|
# print(res.docdocument.export_to_markdown())
|
||||||
# Export Docling document format to markdown (experimental):
|
# Export Docling document format to markdowndoc:
|
||||||
with (out_path / f"{res.input.file.name}.md").open("w") as fp:
|
with (out_path / f"{res.input.file.name}.md").open("w") as fp:
|
||||||
fp.write(res.document.export_to_markdown())
|
fp.write(res.document.export_to_markdown())
|
||||||
|
|
||||||
|
32
poetry.lock
generated
32
poetry.lock
generated
@ -885,7 +885,7 @@ files = []
|
|||||||
develop = false
|
develop = false
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b"}
|
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "6fee533a101ca08f62e88826218c33e0aab2f417"}
|
||||||
docutils = "!=0.21"
|
docutils = "!=0.21"
|
||||||
matplotlib = "^3.7.1"
|
matplotlib = "^3.7.1"
|
||||||
networkx = "^3.1"
|
networkx = "^3.1"
|
||||||
@ -909,8 +909,8 @@ toolkit = ["deepsearch-toolkit (>=0.31.0)"]
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/DS4SD/deepsearch-glm.git"
|
url = "https://github.com/DS4SD/deepsearch-glm.git"
|
||||||
reference = "58c589fc23d675e8098f24ec680a9bf93e2a796e"
|
reference = "c13a6cdda25206911d63a5a28e990217ad823068"
|
||||||
resolved_reference = "58c589fc23d675e8098f24ec680a9bf93e2a796e"
|
resolved_reference = "c13a6cdda25206911d63a5a28e990217ad823068"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dill"
|
name = "dill"
|
||||||
@ -958,8 +958,8 @@ tabulate = "^0.9.0"
|
|||||||
[package.source]
|
[package.source]
|
||||||
type = "git"
|
type = "git"
|
||||||
url = "https://github.com/DS4SD/docling-core.git"
|
url = "https://github.com/DS4SD/docling-core.git"
|
||||||
reference = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b"
|
reference = "6fee533a101ca08f62e88826218c33e0aab2f417"
|
||||||
resolved_reference = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b"
|
resolved_reference = "6fee533a101ca08f62e88826218c33e0aab2f417"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-ibm-models"
|
name = "docling-ibm-models"
|
||||||
@ -2296,22 +2296,18 @@ transformers = ">=4.39.0"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "langchain-milvus"
|
name = "langchain-milvus"
|
||||||
version = "0.1.5"
|
version = "0.1.6"
|
||||||
description = "An integration package connecting Milvus and LangChain"
|
description = "An integration package connecting Milvus and LangChain"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "<4.0,>=3.8.1"
|
python-versions = "<4.0,>=3.9"
|
||||||
files = [
|
files = [
|
||||||
{file = "langchain_milvus-0.1.5-py3-none-any.whl", hash = "sha256:74aa487738afde4c3e1346433ef26f9556e599826161562b308d3357d86529fd"},
|
{file = "langchain_milvus-0.1.6-py3-none-any.whl", hash = "sha256:efab3fcf613bd6151735e2c75f3264dba9daecb317b9bb22604c2aac579049a9"},
|
||||||
{file = "langchain_milvus-0.1.5.tar.gz", hash = "sha256:1cceab384783ba264055102e5831451482fd726a68feb64258f6dbbd8d702557"},
|
{file = "langchain_milvus-0.1.6.tar.gz", hash = "sha256:155979a6e5aeb94b0e141a12d2fdb4c34a4d7a0e5da2cec1ae7c9bccf6649205"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
langchain-core = {version = ">=0.2.38,<0.4", markers = "python_version >= \"3.9\""}
|
langchain-core = ">=0.2.38,<0.4"
|
||||||
pymilvus = ">=2.4.3,<3.0.0"
|
pymilvus = ">=2.4.3,<3.0.0"
|
||||||
scipy = [
|
|
||||||
{version = ">=1.9,<2.0", markers = "python_version >= \"3.12\""},
|
|
||||||
{version = ">=1.7,<2.0", markers = "python_version < \"3.12\""},
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "langchain-text-splitters"
|
name = "langchain-text-splitters"
|
||||||
@ -2329,13 +2325,13 @@ langchain-core = ">=0.2.38,<0.3.0"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "langsmith"
|
name = "langsmith"
|
||||||
version = "0.1.134"
|
version = "0.1.135"
|
||||||
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
|
description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "<4.0,>=3.8.1"
|
python-versions = "<4.0,>=3.8.1"
|
||||||
files = [
|
files = [
|
||||||
{file = "langsmith-0.1.134-py3-none-any.whl", hash = "sha256:ada98ad80ef38807725f32441a472da3dd28394010877751f48f458d3289da04"},
|
{file = "langsmith-0.1.135-py3-none-any.whl", hash = "sha256:b1d1ca3bad483a4239745c57e9b9157b4d099fbf3149be21e3d112c94ede06ac"},
|
||||||
{file = "langsmith-0.1.134.tar.gz", hash = "sha256:23abee3b508875a0e63c602afafffc02442a19cfd88f9daae05b3e9054fd6b61"},
|
{file = "langsmith-0.1.135.tar.gz", hash = "sha256:7abed7e141386af99a2177f0b3600b124ae3ad1b482879ba0724ce92ef998a11"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
@ -7118,4 +7114,4 @@ tesserocr = ["tesserocr"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "9678a9fb33ecbfbc6ec118fd3209aab5ab4e4c90d589e93c4dc7073dc9fb72ae"
|
content-hash = "46f6c1eb76034223f7d65760f6ebe0989ba9e8aff46fcdbce82c147030fcb8be"
|
||||||
|
@ -37,9 +37,9 @@ torchvision = [
|
|||||||
######################
|
######################
|
||||||
python = "^3.10"
|
python = "^3.10"
|
||||||
pydantic = "^2.0.0"
|
pydantic = "^2.0.0"
|
||||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "4ddecf80cf5afb4b1488172ecafcf12cb2b8cb9b"}
|
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "6fee533a101ca08f62e88826218c33e0aab2f417"}
|
||||||
docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"}
|
docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"}
|
||||||
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "58c589fc23d675e8098f24ec680a9bf93e2a796e"}
|
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "c13a6cdda25206911d63a5a28e990217ad823068"}
|
||||||
docling-parse = "^1.5.1"
|
docling-parse = "^1.5.1"
|
||||||
|
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from docling_core.types.experimental import BoundingBox
|
from docling_core.types.doc import BoundingBox
|
||||||
|
|
||||||
from docling.backend.docling_parse_backend import (
|
from docling.backend.docling_parse_backend import (
|
||||||
DoclingParseDocumentBackend,
|
DoclingParseDocumentBackend,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from docling_core.types.experimental.base import BoundingBox
|
from docling_core.types.doc.base import BoundingBox
|
||||||
|
|
||||||
from docling.backend.pypdfium2_backend import (
|
from docling.backend.pypdfium2_backend import (
|
||||||
PyPdfiumDocumentBackend,
|
PyPdfiumDocumentBackend,
|
||||||
|
@ -4,7 +4,7 @@ from pathlib import Path
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from docling_core.types import Document as DsDocument
|
from docling_core.types import Document as DsDocument
|
||||||
from docling_core.types.experimental import DoclingDocument
|
from docling_core.types.doc import DoclingDocument
|
||||||
from pydantic import TypeAdapter
|
from pydantic import TypeAdapter
|
||||||
from pydantic.json import pydantic_encoder
|
from pydantic.json import pydantic_encoder
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user