mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 12:34:22 +00:00
Merge branch 'cau/input-format-abstraction' of github.com:DS4SD/docling into cau/input-format-abstraction
This commit is contained in:
commit
75feef259d
2
.github/workflows/checks.yml
vendored
2
.github/workflows/checks.yml
vendored
@ -26,7 +26,7 @@ jobs:
|
|||||||
poetry run pytest -v tests
|
poetry run pytest -v tests
|
||||||
- name: Run examples
|
- name: Run examples
|
||||||
run: |
|
run: |
|
||||||
for file in examples/*.py; do
|
for file in docs/examples/*.py; do
|
||||||
# Skip batch_convert.py
|
# Skip batch_convert.py
|
||||||
if [[ "$(basename "$file")" == "batch_convert.py" ]]; then
|
if [[ "$(basename "$file")" == "batch_convert.py" ]]; then
|
||||||
echo "Skipping $file"
|
echo "Skipping $file"
|
||||||
|
@ -20,12 +20,12 @@ repos:
|
|||||||
# pass_filenames: false
|
# pass_filenames: false
|
||||||
# language: system
|
# language: system
|
||||||
# files: '\.py$'
|
# files: '\.py$'
|
||||||
# - id: mypy
|
- id: mypy
|
||||||
# name: MyPy
|
name: MyPy
|
||||||
# entry: poetry run mypy docling
|
entry: poetry run mypy docling
|
||||||
# pass_filenames: false
|
pass_filenames: false
|
||||||
# language: system
|
language: system
|
||||||
# files: '\.py$'
|
files: '\.py$'
|
||||||
- id: nbqa_black
|
- id: nbqa_black
|
||||||
name: nbQA Black
|
name: nbQA Black
|
||||||
entry: poetry run nbqa black examples
|
entry: poetry run nbqa black examples
|
||||||
|
@ -2,7 +2,7 @@ import logging
|
|||||||
import random
|
import random
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, List, Optional, Union
|
from typing import TYPE_CHECKING, Iterable, List, Optional, Union
|
||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
@ -13,6 +13,9 @@ from pypdfium2 import PdfPage
|
|||||||
from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
|
from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
|
||||||
from docling.datamodel.base_models import Cell, Size
|
from docling.datamodel.base_models import Cell, Size
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,10 +30,10 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# Initialise the parents for the hierarchy
|
# Initialise the parents for the hierarchy
|
||||||
self.max_levels = 10
|
self.max_levels = 10
|
||||||
self.level = 0
|
self.level = 0
|
||||||
self.parents = {}
|
self.parents = {} # type: ignore
|
||||||
for i in range(0, self.max_levels):
|
for i in range(0, self.max_levels):
|
||||||
self.parents[i] = None
|
self.parents[i] = None
|
||||||
self.labels = {}
|
self.labels = {} # type: ignore
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
@ -49,8 +49,9 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
) from e
|
) from e
|
||||||
|
|
||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
||||||
return True
|
return self.soup is not None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
def supports_pagination(cls) -> bool:
|
def supports_pagination(cls) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -68,11 +69,17 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# access self.path_or_stream to load stuff
|
# access self.path_or_stream to load stuff
|
||||||
doc = DoclingDocument(description=DescriptionItem(), name="dummy")
|
doc = DoclingDocument(description=DescriptionItem(), name="dummy")
|
||||||
_log.debug("Trying to convert HTML...")
|
_log.debug("Trying to convert HTML...")
|
||||||
|
|
||||||
|
if self.is_valid():
|
||||||
|
assert self.soup is not None
|
||||||
# Replace <br> tags with newline characters
|
# Replace <br> tags with newline characters
|
||||||
for br in self.soup.body.find_all("br"):
|
for br in self.soup.body.find_all("br"):
|
||||||
br.replace_with("\n")
|
br.replace_with("\n")
|
||||||
doc = self.walk(self.soup.body, doc)
|
doc = self.walk(self.soup.body, doc)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Cannot convert doc with {self.document_hash} because the backend failed to init."
|
||||||
|
)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def walk(self, element, doc):
|
def walk(self, element, doc):
|
||||||
@ -386,7 +393,7 @@ class HTMLDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
if contains_lists is None:
|
if contains_lists is None:
|
||||||
return cell.text
|
return cell.text
|
||||||
else:
|
else:
|
||||||
_log.warn(
|
_log.debug(
|
||||||
"should extract the content correctly for table-cells with lists ..."
|
"should extract the content correctly for table-cells with lists ..."
|
||||||
)
|
)
|
||||||
return cell.text
|
return cell.text
|
||||||
|
@ -42,7 +42,11 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
self.pptx_obj = None
|
self.pptx_obj = None
|
||||||
self.valid = False
|
self.valid = False
|
||||||
try:
|
try:
|
||||||
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
self.pptx_obj = Presentation(self.path_or_stream)
|
self.pptx_obj = Presentation(self.path_or_stream)
|
||||||
|
elif isinstance(self.path_or_stream, Path):
|
||||||
|
self.pptx_obj = Presentation(str(self.path_or_stream))
|
||||||
|
|
||||||
self.valid = True
|
self.valid = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
@ -53,6 +57,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
|
|
||||||
def page_count(self) -> int:
|
def page_count(self) -> int:
|
||||||
if self.is_valid():
|
if self.is_valid():
|
||||||
|
assert self.pptx_obj is not None
|
||||||
return len(self.pptx_obj.slides)
|
return len(self.pptx_obj.slides)
|
||||||
else:
|
else:
|
||||||
return 0
|
return 0
|
||||||
@ -60,6 +65,7 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
||||||
return self.valid
|
return self.valid
|
||||||
|
|
||||||
|
@classmethod
|
||||||
def supports_pagination(cls) -> bool:
|
def supports_pagination(cls) -> bool:
|
||||||
return True # True? if so, how to handle pages...
|
return True # True? if so, how to handle pages...
|
||||||
|
|
||||||
@ -151,9 +157,9 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
new_list = None
|
new_list = None
|
||||||
|
|
||||||
if is_a_list:
|
if is_a_list:
|
||||||
_log.info("LIST DETECTED!")
|
_log.debug("LIST DETECTED!")
|
||||||
else:
|
else:
|
||||||
_log.info("No List")
|
_log.debug("No List")
|
||||||
|
|
||||||
# for e in p.iter():
|
# for e in p.iter():
|
||||||
for e in p.iterfind(".//a:r", namespaces={"a": self.namespaces["a"]}):
|
for e in p.iterfind(".//a:r", namespaces={"a": self.namespaces["a"]}):
|
||||||
@ -311,10 +317,10 @@ class MsPowerpointDocumentBackend(DeclarativeDocumentBackend, PaginatedDocumentB
|
|||||||
slide_width = pptx_obj.slide_width
|
slide_width = pptx_obj.slide_width
|
||||||
slide_height = pptx_obj.slide_height
|
slide_height = pptx_obj.slide_height
|
||||||
|
|
||||||
text_content = []
|
text_content = [] # type: ignore
|
||||||
|
|
||||||
max_levels = 10
|
max_levels = 10
|
||||||
parents = {}
|
parents = {} # type: ignore
|
||||||
for i in range(0, max_levels):
|
for i in range(0, max_levels):
|
||||||
parents[i] = None
|
parents[i] = None
|
||||||
|
|
||||||
|
@ -39,7 +39,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
# Initialise the parents for the hierarchy
|
# Initialise the parents for the hierarchy
|
||||||
self.max_levels = 10
|
self.max_levels = 10
|
||||||
self.level_at_new_list = None
|
self.level_at_new_list = None
|
||||||
self.parents = {}
|
self.parents = {} # type: ignore
|
||||||
for i in range(-1, self.max_levels):
|
for i in range(-1, self.max_levels):
|
||||||
self.parents[i] = None
|
self.parents[i] = None
|
||||||
|
|
||||||
@ -55,16 +55,21 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
self.docx_obj = None
|
self.docx_obj = None
|
||||||
try:
|
try:
|
||||||
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
self.docx_obj = docx.Document(self.path_or_stream)
|
self.docx_obj = docx.Document(self.path_or_stream)
|
||||||
|
elif isinstance(self.path_or_stream, Path):
|
||||||
|
self.docx_obj = docx.Document(str(self.path_or_stream))
|
||||||
|
|
||||||
self.valid = True
|
self.valid = True
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"MsPowerpointDocumentBackend could not load document with hash {document_hash}"
|
f"MsPowerpointDocumentBackend could not load document with hash {self.document_hash}"
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
def is_valid(self) -> bool:
|
def is_valid(self) -> bool:
|
||||||
return True
|
return self.valid
|
||||||
|
|
||||||
|
@classmethod
|
||||||
def supports_pagination(cls) -> bool:
|
def supports_pagination(cls) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@ -81,10 +86,14 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
def convert(self) -> DoclingDocument:
|
def convert(self) -> DoclingDocument:
|
||||||
# Parses the DOCX into a structured document model.
|
# Parses the DOCX into a structured document model.
|
||||||
doc = DoclingDocument(description=DescriptionItem(), name="dummy")
|
doc = DoclingDocument(description=DescriptionItem(), name="dummy")
|
||||||
|
if self.is_valid():
|
||||||
# self.initialise()
|
assert self.docx_obj is not None
|
||||||
doc = self.walk_linear(self.docx_obj.element.body, self.docx_obj, doc)
|
doc = self.walk_linear(self.docx_obj.element.body, self.docx_obj, doc)
|
||||||
return doc
|
return doc
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Cannot convert doc with {self.document_hash} because the backend failed to init."
|
||||||
|
)
|
||||||
|
|
||||||
def update_history(self, name, level, numid, ilevel):
|
def update_history(self, name, level, numid, ilevel):
|
||||||
self.history["names"].append(name)
|
self.history["names"].append(name)
|
||||||
@ -129,7 +138,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
try:
|
try:
|
||||||
self.handle_tables(element, docx_obj, doc)
|
self.handle_tables(element, docx_obj, doc)
|
||||||
except Exception:
|
except Exception:
|
||||||
_log.error("could not parse a table, broken docx table")
|
_log.debug("could not parse a table, broken docx table")
|
||||||
|
|
||||||
elif found_drawing or found_pict:
|
elif found_drawing or found_pict:
|
||||||
self.handle_pictures(element, docx_obj, doc)
|
self.handle_pictures(element, docx_obj, doc)
|
||||||
@ -137,7 +146,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
elif tag_name in ["p"]:
|
elif tag_name in ["p"]:
|
||||||
self.handle_text_elements(element, docx_obj, doc)
|
self.handle_text_elements(element, docx_obj, doc)
|
||||||
else:
|
else:
|
||||||
_log.warn(f"Ignoring element in DOCX with tag: {tag_name}")
|
_log.debug(f"Ignoring element in DOCX with tag: {tag_name}")
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def str_to_int(self, s, default=0):
|
def str_to_int(self, s, default=0):
|
||||||
@ -333,7 +342,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
level = self.get_level()
|
level = self.get_level()
|
||||||
if self.prev_numid() is None: # Open new list
|
if self.prev_numid() is None: # Open new list
|
||||||
self.level_at_new_list = level
|
self.level_at_new_list = level # type: ignore
|
||||||
|
|
||||||
self.parents[level] = doc.add_group(
|
self.parents[level] = doc.add_group(
|
||||||
label=GroupLabel.LIST, name="list", parent=self.parents[level - 1]
|
label=GroupLabel.LIST, name="list", parent=self.parents[level - 1]
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
from typing import Iterable, Optional, Set, Union
|
from typing import Iterable, Optional, Set, Union
|
||||||
|
|
||||||
from docling_core.types.doc import BoundingBox, Size
|
from docling_core.types.doc import BoundingBox, Size
|
||||||
from docling_core.types.legacy_doc.doc_ocr import Path
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
from docling.backend.abstract_backend import PaginatedDocumentBackend
|
from docling.backend.abstract_backend import PaginatedDocumentBackend
|
||||||
|
@ -2,7 +2,7 @@ import logging
|
|||||||
import random
|
import random
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, List, Optional, Union
|
from typing import TYPE_CHECKING, Iterable, List, Optional, Union
|
||||||
|
|
||||||
import pypdfium2 as pdfium
|
import pypdfium2 as pdfium
|
||||||
import pypdfium2.raw as pdfium_c
|
import pypdfium2.raw as pdfium_c
|
||||||
@ -14,6 +14,9 @@ from pypdfium2._helpers.misc import PdfiumError
|
|||||||
from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
|
from docling.backend.pdf_backend import PdfDocumentBackend, PdfPageBackend
|
||||||
from docling.datamodel.base_models import Cell
|
from docling.datamodel.base_models import Cell
|
||||||
|
|
||||||
|
if TYPE_CHECKING:
|
||||||
|
from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -5,13 +5,12 @@ import time
|
|||||||
import warnings
|
import warnings
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, Iterable, List, Optional
|
from typing import Annotated, Dict, Iterable, List, Optional
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from docling_core.utils.file import resolve_file_source
|
from docling_core.utils.file import resolve_file_source
|
||||||
|
|
||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
||||||
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
ConversionStatus,
|
ConversionStatus,
|
||||||
FormatToExtensions,
|
FormatToExtensions,
|
||||||
@ -21,11 +20,12 @@ from docling.datamodel.base_models import (
|
|||||||
from docling.datamodel.document import ConversionResult
|
from docling.datamodel.document import ConversionResult
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
EasyOcrOptions,
|
EasyOcrOptions,
|
||||||
|
OcrOptions,
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
TesseractCliOcrOptions,
|
TesseractCliOcrOptions,
|
||||||
TesseractOcrOptions,
|
TesseractOcrOptions,
|
||||||
)
|
)
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
||||||
|
|
||||||
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch")
|
||||||
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
warnings.filterwarnings(action="ignore", category=FutureWarning, module="easyocr")
|
||||||
@ -151,6 +151,14 @@ def convert(
|
|||||||
ocr_engine: Annotated[
|
ocr_engine: Annotated[
|
||||||
OcrEngine, typer.Option(..., help="The OCR engine to use.")
|
OcrEngine, typer.Option(..., help="The OCR engine to use.")
|
||||||
] = OcrEngine.EASYOCR,
|
] = OcrEngine.EASYOCR,
|
||||||
|
abort_on_error: Annotated[
|
||||||
|
bool,
|
||||||
|
typer.Option(
|
||||||
|
...,
|
||||||
|
"--abort-on-error/--no-abort-on-error",
|
||||||
|
help="If enabled, the bitmap content will be processed using OCR.",
|
||||||
|
),
|
||||||
|
] = False,
|
||||||
output: Annotated[
|
output: Annotated[
|
||||||
Path, typer.Option(..., help="Output directory where results are saved.")
|
Path, typer.Option(..., help="Output directory where results are saved.")
|
||||||
] = Path("."),
|
] = Path("."),
|
||||||
@ -179,7 +187,7 @@ def convert(
|
|||||||
raise typer.Abort()
|
raise typer.Abort()
|
||||||
elif source.is_dir():
|
elif source.is_dir():
|
||||||
for fmt in from_formats:
|
for fmt in from_formats:
|
||||||
for ext in FormatToExtensions.get(fmt):
|
for ext in FormatToExtensions[fmt]:
|
||||||
input_doc_paths.extend(list(source.glob(f"**/*.{ext}")))
|
input_doc_paths.extend(list(source.glob(f"**/*.{ext}")))
|
||||||
input_doc_paths.extend(list(source.glob(f"**/*.{ext.upper()}")))
|
input_doc_paths.extend(list(source.glob(f"**/*.{ext.upper()}")))
|
||||||
else:
|
else:
|
||||||
@ -195,7 +203,7 @@ def convert(
|
|||||||
|
|
||||||
match ocr_engine:
|
match ocr_engine:
|
||||||
case OcrEngine.EASYOCR:
|
case OcrEngine.EASYOCR:
|
||||||
ocr_options = EasyOcrOptions()
|
ocr_options: OcrOptions = EasyOcrOptions()
|
||||||
case OcrEngine.TESSERACT_CLI:
|
case OcrEngine.TESSERACT_CLI:
|
||||||
ocr_options = TesseractCliOcrOptions()
|
ocr_options = TesseractCliOcrOptions()
|
||||||
case OcrEngine.TESSERACT:
|
case OcrEngine.TESSERACT:
|
||||||
@ -210,18 +218,22 @@ def convert(
|
|||||||
)
|
)
|
||||||
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
|
pipeline_options.table_structure_options.do_cell_matching = True # do_cell_matching
|
||||||
|
|
||||||
doc_converter = DocumentConverter(
|
format_options: Dict[InputFormat, FormatOption] = {
|
||||||
format_options={
|
|
||||||
InputFormat.PDF: PdfFormatOption(
|
InputFormat.PDF: PdfFormatOption(
|
||||||
pipeline_options=pipeline_options,
|
pipeline_options=pipeline_options,
|
||||||
backend=DoclingParseDocumentBackend, # pdf_backend
|
backend=DoclingParseDocumentBackend, # pdf_backend
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
doc_converter = DocumentConverter(
|
||||||
|
allowed_formats=from_formats,
|
||||||
|
format_options=format_options,
|
||||||
)
|
)
|
||||||
|
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
conv_results = doc_converter.convert_all(input_doc_paths)
|
conv_results = doc_converter.convert_all(
|
||||||
|
input_doc_paths, raises_on_error=abort_on_error
|
||||||
|
)
|
||||||
|
|
||||||
output.mkdir(parents=True, exist_ok=True)
|
output.mkdir(parents=True, exist_ok=True)
|
||||||
export_documents(
|
export_documents(
|
||||||
|
@ -126,7 +126,8 @@ class TableStructurePrediction(BaseModel):
|
|||||||
table_map: Dict[int, Table] = {}
|
table_map: Dict[int, Table] = {}
|
||||||
|
|
||||||
|
|
||||||
class TextElement(BasePageElement): ...
|
class TextElement(BasePageElement):
|
||||||
|
text: str
|
||||||
|
|
||||||
|
|
||||||
class FigureElement(BasePageElement):
|
class FigureElement(BasePageElement):
|
||||||
|
@ -3,7 +3,7 @@ import re
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path, PurePath
|
from pathlib import Path, PurePath
|
||||||
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Union
|
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type, Union
|
||||||
|
|
||||||
import filetype
|
import filetype
|
||||||
from docling_core.types import BaseText
|
from docling_core.types import BaseText
|
||||||
@ -23,13 +23,15 @@ from docling_core.types.doc import (
|
|||||||
TextItem,
|
TextItem,
|
||||||
)
|
)
|
||||||
from docling_core.types.doc.document import ListItem
|
from docling_core.types.doc.document import ListItem
|
||||||
from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox
|
|
||||||
from docling_core.types.legacy_doc.base import Figure, GlmTableCell, TableCell
|
from docling_core.types.legacy_doc.base import Figure, GlmTableCell, TableCell
|
||||||
from docling_core.utils.file import resolve_file_source
|
from docling_core.utils.file import resolve_file_source
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from typing_extensions import deprecated
|
from typing_extensions import deprecated
|
||||||
|
|
||||||
from docling.backend.abstract_backend import AbstractDocumentBackend
|
from docling.backend.abstract_backend import (
|
||||||
|
AbstractDocumentBackend,
|
||||||
|
PaginatedDocumentBackend,
|
||||||
|
)
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
AssembledUnit,
|
AssembledUnit,
|
||||||
ConversionStatus,
|
ConversionStatus,
|
||||||
@ -40,8 +42,6 @@ from docling.datamodel.base_models import (
|
|||||||
MimeTypeToFormat,
|
MimeTypeToFormat,
|
||||||
Page,
|
Page,
|
||||||
PageElement,
|
PageElement,
|
||||||
Table,
|
|
||||||
TextElement,
|
|
||||||
)
|
)
|
||||||
from docling.datamodel.settings import DocumentLimits
|
from docling.datamodel.settings import DocumentLimits
|
||||||
from docling.utils.utils import create_file_hash, create_hash
|
from docling.utils.utils import create_file_hash, create_hash
|
||||||
@ -70,41 +70,34 @@ layout_label_to_ds_type = {
|
|||||||
DocItemLabel.PARAGRAPH: "paragraph",
|
DocItemLabel.PARAGRAPH: "paragraph",
|
||||||
}
|
}
|
||||||
|
|
||||||
_EMPTY_LEGACY_DOC = DsDocument(
|
|
||||||
_name="",
|
|
||||||
description=DsDocumentDescription(logs=[]),
|
|
||||||
file_info=DsFileInfoObject(
|
|
||||||
filename="",
|
|
||||||
document_hash="",
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
_EMPTY_DOCLING_DOC = DoclingDocument(
|
_EMPTY_DOCLING_DOC = DoclingDocument(
|
||||||
description=DescriptionItem(), name="dummy"
|
description=DescriptionItem(), name="dummy"
|
||||||
) # TODO: Stub
|
) # TODO: Stub
|
||||||
|
|
||||||
|
|
||||||
class InputDocument(BaseModel):
|
class InputDocument(BaseModel):
|
||||||
file: PurePath = None
|
file: PurePath
|
||||||
document_hash: Optional[str] = None
|
document_hash: str # = None
|
||||||
valid: bool = True
|
valid: bool = True
|
||||||
limits: DocumentLimits = DocumentLimits()
|
limits: DocumentLimits = DocumentLimits()
|
||||||
format: Optional[InputFormat] = None
|
format: InputFormat # = None
|
||||||
|
|
||||||
filesize: Optional[int] = None
|
filesize: Optional[int] = None
|
||||||
page_count: int = 0
|
page_count: int = 0
|
||||||
|
|
||||||
_backend: AbstractDocumentBackend = None # Internal PDF backend used
|
_backend: AbstractDocumentBackend # Internal PDF backend used
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
path_or_stream: Union[BytesIO, Path],
|
path_or_stream: Union[BytesIO, Path],
|
||||||
format: InputFormat,
|
format: InputFormat,
|
||||||
backend: AbstractDocumentBackend,
|
backend: Type[AbstractDocumentBackend],
|
||||||
filename: Optional[str] = None,
|
filename: Optional[str] = None,
|
||||||
limits: Optional[DocumentLimits] = None,
|
limits: Optional[DocumentLimits] = None,
|
||||||
):
|
):
|
||||||
super().__init__()
|
super().__init__(
|
||||||
|
file="", document_hash="", format=InputFormat.PDF
|
||||||
|
) # initialize with dummy values
|
||||||
|
|
||||||
self.limits = limits or DocumentLimits()
|
self.limits = limits or DocumentLimits()
|
||||||
self.format = format
|
self.format = format
|
||||||
@ -120,6 +113,9 @@ class InputDocument(BaseModel):
|
|||||||
self._init_doc(backend, path_or_stream)
|
self._init_doc(backend, path_or_stream)
|
||||||
|
|
||||||
elif isinstance(path_or_stream, BytesIO):
|
elif isinstance(path_or_stream, BytesIO):
|
||||||
|
assert (
|
||||||
|
filename is not None
|
||||||
|
), "Can't construct InputDocument from stream without providing filename arg."
|
||||||
self.file = PurePath(filename)
|
self.file = PurePath(filename)
|
||||||
self.filesize = path_or_stream.getbuffer().nbytes
|
self.filesize = path_or_stream.getbuffer().nbytes
|
||||||
|
|
||||||
@ -128,10 +124,16 @@ class InputDocument(BaseModel):
|
|||||||
else:
|
else:
|
||||||
self.document_hash = create_file_hash(path_or_stream)
|
self.document_hash = create_file_hash(path_or_stream)
|
||||||
self._init_doc(backend, path_or_stream)
|
self._init_doc(backend, path_or_stream)
|
||||||
|
else:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Unexpected type path_or_stream: {type(path_or_stream)}"
|
||||||
|
)
|
||||||
|
|
||||||
# For paginated backends, check if the maximum page count is exceeded.
|
# For paginated backends, check if the maximum page count is exceeded.
|
||||||
if self.valid and self._backend.is_valid():
|
if self.valid and self._backend.is_valid():
|
||||||
if self._backend.supports_pagination():
|
if self._backend.supports_pagination() and isinstance(
|
||||||
|
self._backend, PaginatedDocumentBackend
|
||||||
|
):
|
||||||
self.page_count = self._backend.page_count()
|
self.page_count = self._backend.page_count()
|
||||||
if not self.page_count <= self.limits.max_num_pages:
|
if not self.page_count <= self.limits.max_num_pages:
|
||||||
self.valid = False
|
self.valid = False
|
||||||
@ -150,12 +152,12 @@ class InputDocument(BaseModel):
|
|||||||
|
|
||||||
def _init_doc(
|
def _init_doc(
|
||||||
self,
|
self,
|
||||||
backend: AbstractDocumentBackend,
|
backend: Type[AbstractDocumentBackend],
|
||||||
path_or_stream: Union[BytesIO, Path],
|
path_or_stream: Union[BytesIO, Path],
|
||||||
) -> None:
|
) -> None:
|
||||||
if backend is None:
|
if backend is None:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"No backend configuration provided for file {self.file} with format {self.format}. "
|
f"No backend configuration provided for file {self.file.name} with format {self.format}. "
|
||||||
f"Please check your format configuration on DocumentConverter."
|
f"Please check your format configuration on DocumentConverter."
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -436,17 +438,22 @@ class ConversionResult(BaseModel):
|
|||||||
return ds_doc
|
return ds_doc
|
||||||
|
|
||||||
def render_element_images(
|
def render_element_images(
|
||||||
self, element_types: Tuple[PageElement] = (FigureElement,)
|
self, element_types: Tuple[Type[PageElement]] = (FigureElement,)
|
||||||
):
|
):
|
||||||
for element in self.assembled.elements:
|
for element in self.assembled.elements:
|
||||||
if isinstance(element, element_types):
|
if isinstance(element, element_types):
|
||||||
page_ix = element.page_no
|
page_ix = element.page_no
|
||||||
scale = self.pages[page_ix]._default_image_scale
|
page = self.pages[page_ix]
|
||||||
crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
|
|
||||||
page_height=self.pages[page_ix].size.height * scale
|
|
||||||
)
|
|
||||||
|
|
||||||
cropped_im = self.pages[page_ix].image.crop(crop_bbox.as_tuple())
|
assert page.size is not None
|
||||||
|
|
||||||
|
scale = page._default_image_scale
|
||||||
|
crop_bbox = element.cluster.bbox.scaled(scale=scale).to_top_left_origin(
|
||||||
|
page_height=page.size.height * scale
|
||||||
|
)
|
||||||
|
page_img = page.image
|
||||||
|
if page_img is not None:
|
||||||
|
cropped_im = page_img.crop(crop_bbox.as_tuple())
|
||||||
yield element, cropped_im
|
yield element, cropped_im
|
||||||
|
|
||||||
|
|
||||||
@ -462,12 +469,12 @@ class _DocumentConversionInput(BaseModel):
|
|||||||
obj = resolve_file_source(item) if isinstance(item, str) else item
|
obj = resolve_file_source(item) if isinstance(item, str) else item
|
||||||
format = self._guess_format(obj)
|
format = self._guess_format(obj)
|
||||||
if format not in format_options.keys():
|
if format not in format_options.keys():
|
||||||
_log.debug(
|
_log.info(
|
||||||
f"Skipping input document {obj.name} because its format is not in the whitelist."
|
f"Skipping input document {obj.name} because it isn't matching any of the allowed formats."
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
backend = format_options.get(format).backend
|
backend = format_options[format].backend
|
||||||
|
|
||||||
if isinstance(obj, Path):
|
if isinstance(obj, Path):
|
||||||
yield InputDocument(
|
yield InputDocument(
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
import warnings
|
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Annotated, List, Literal, Optional, Union
|
from typing import List, Literal, Optional, Union
|
||||||
|
|
||||||
from pydantic import BaseModel, ConfigDict, Field, model_validator
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
|
|
||||||
class TableFormerMode(str, Enum):
|
class TableFormerMode(str, Enum):
|
||||||
|
@ -111,6 +111,14 @@ class DocumentConverter:
|
|||||||
_log.debug(f"Requested format {f} will use default options.")
|
_log.debug(f"Requested format {f} will use default options.")
|
||||||
self.format_to_options[f] = _format_to_default_options[f]
|
self.format_to_options[f] = _format_to_default_options[f]
|
||||||
|
|
||||||
|
remove_keys = []
|
||||||
|
for f in self.format_to_options.keys():
|
||||||
|
if f not in self.allowed_formats:
|
||||||
|
remove_keys.append(f)
|
||||||
|
|
||||||
|
for f in remove_keys:
|
||||||
|
self.format_to_options.pop(f)
|
||||||
|
|
||||||
self.initialized_pipelines: Dict[Type[BasePipeline], BasePipeline] = {}
|
self.initialized_pipelines: Dict[Type[BasePipeline], BasePipeline] = {}
|
||||||
|
|
||||||
@validate_call(config=ConfigDict(strict=True))
|
@validate_call(config=ConfigDict(strict=True))
|
||||||
@ -161,6 +169,8 @@ class DocumentConverter:
|
|||||||
def _convert(
|
def _convert(
|
||||||
self, conv_input: _DocumentConversionInput, raises_on_error: bool
|
self, conv_input: _DocumentConversionInput, raises_on_error: bool
|
||||||
) -> Iterator[ConversionResult]:
|
) -> Iterator[ConversionResult]:
|
||||||
|
assert self.format_to_options is not None
|
||||||
|
|
||||||
for input_batch in chunkify(
|
for input_batch in chunkify(
|
||||||
conv_input.docs(self.format_to_options),
|
conv_input.docs(self.format_to_options),
|
||||||
settings.perf.doc_batch_size, # pass format_options
|
settings.perf.doc_batch_size, # pass format_options
|
||||||
@ -174,13 +184,15 @@ class DocumentConverter:
|
|||||||
|
|
||||||
# Note: PDF backends are not thread-safe, thread pool usage was disabled.
|
# Note: PDF backends are not thread-safe, thread pool usage was disabled.
|
||||||
for item in map(
|
for item in map(
|
||||||
partial(self.process_document, raises_on_error=raises_on_error),
|
partial(self._process_document, raises_on_error=raises_on_error),
|
||||||
input_batch,
|
input_batch,
|
||||||
):
|
):
|
||||||
if item is not None:
|
if item is not None:
|
||||||
yield item
|
yield item
|
||||||
|
|
||||||
def _get_pipeline(self, doc: InputDocument) -> Optional[BasePipeline]:
|
def _get_pipeline(self, doc: InputDocument) -> Optional[BasePipeline]:
|
||||||
|
assert self.format_to_options is not None
|
||||||
|
|
||||||
fopt = self.format_to_options.get(doc.format)
|
fopt = self.format_to_options.get(doc.format)
|
||||||
|
|
||||||
if fopt is None:
|
if fopt is None:
|
||||||
@ -189,6 +201,7 @@ class DocumentConverter:
|
|||||||
pipeline_class = fopt.pipeline_cls
|
pipeline_class = fopt.pipeline_cls
|
||||||
pipeline_options = fopt.pipeline_options
|
pipeline_options = fopt.pipeline_options
|
||||||
|
|
||||||
|
assert pipeline_options is not None
|
||||||
# TODO this will ignore if different options have been defined for the same pipeline class.
|
# TODO this will ignore if different options have been defined for the same pipeline class.
|
||||||
if (
|
if (
|
||||||
pipeline_class not in self.initialized_pipelines
|
pipeline_class not in self.initialized_pipelines
|
||||||
@ -200,33 +213,44 @@ class DocumentConverter:
|
|||||||
)
|
)
|
||||||
return self.initialized_pipelines[pipeline_class]
|
return self.initialized_pipelines[pipeline_class]
|
||||||
|
|
||||||
def process_document(
|
def _process_document(
|
||||||
self, in_doc: InputDocument, raises_on_error: bool
|
self, in_doc: InputDocument, raises_on_error: bool
|
||||||
) -> ConversionResult:
|
) -> Optional[ConversionResult]:
|
||||||
if in_doc.format not in self.allowed_formats:
|
assert self.allowed_formats is not None
|
||||||
return None
|
assert in_doc.format in self.allowed_formats
|
||||||
else:
|
|
||||||
start_doc_time = time.time()
|
start_doc_time = time.time()
|
||||||
|
|
||||||
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
|
conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)
|
||||||
|
|
||||||
end_doc_time = time.time() - start_doc_time
|
end_doc_time = time.time() - start_doc_time
|
||||||
_log.info(f"Finished converting document in {end_doc_time:.2f} seconds.")
|
_log.info(
|
||||||
|
f"Finished converting document {in_doc.file.name} in {end_doc_time:.2f} seconds."
|
||||||
|
)
|
||||||
|
|
||||||
return conv_res
|
return conv_res
|
||||||
|
|
||||||
def _execute_pipeline(
|
def _execute_pipeline(
|
||||||
self, in_doc: InputDocument, raises_on_error: bool
|
self, in_doc: InputDocument, raises_on_error: bool
|
||||||
) -> Optional[ConversionResult]:
|
) -> ConversionResult:
|
||||||
if in_doc.valid:
|
if in_doc.valid:
|
||||||
pipeline = self._get_pipeline(in_doc)
|
pipeline = self._get_pipeline(in_doc)
|
||||||
if pipeline is None: # Can't find a default pipeline. Should this raise?
|
if pipeline is None: # Can't find a default pipeline. Should this raise?
|
||||||
|
if raises_on_error:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"No pipeline could be initialized for {in_doc.file}."
|
||||||
|
)
|
||||||
|
else:
|
||||||
conv_res = ConversionResult(input=in_doc)
|
conv_res = ConversionResult(input=in_doc)
|
||||||
conv_res.status = ConversionStatus.FAILURE
|
conv_res.status = ConversionStatus.FAILURE
|
||||||
return conv_res
|
return conv_res
|
||||||
|
|
||||||
conv_res = pipeline.execute(in_doc, raises_on_error=raises_on_error)
|
conv_res = pipeline.execute(in_doc, raises_on_error=raises_on_error)
|
||||||
|
|
||||||
|
else:
|
||||||
|
if raises_on_error:
|
||||||
|
raise RuntimeError(f"Input document {in_doc.file} is not valid.")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# invalid doc or not of desired format
|
# invalid doc or not of desired format
|
||||||
conv_res = ConversionResult(input=in_doc)
|
conv_res = ConversionResult(input=in_doc)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
import copy
|
import copy
|
||||||
import logging
|
import logging
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from typing import Iterable, List, Tuple
|
from typing import Iterable, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
@ -21,8 +21,9 @@ class BaseOcrModel:
|
|||||||
self.options = options
|
self.options = options
|
||||||
|
|
||||||
# Computes the optimum amount and coordinates of rectangles to OCR on a given page
|
# Computes the optimum amount and coordinates of rectangles to OCR on a given page
|
||||||
def get_ocr_rects(self, page: Page) -> Tuple[bool, List[BoundingBox]]:
|
def get_ocr_rects(self, page: Page) -> List[BoundingBox]:
|
||||||
BITMAP_COVERAGE_TRESHOLD = 0.75
|
BITMAP_COVERAGE_TRESHOLD = 0.75
|
||||||
|
assert page.size is not None
|
||||||
|
|
||||||
def find_ocr_rects(size, bitmap_rects):
|
def find_ocr_rects(size, bitmap_rects):
|
||||||
image = Image.new(
|
image = Image.new(
|
||||||
@ -61,7 +62,10 @@ class BaseOcrModel:
|
|||||||
|
|
||||||
return (area_frac, bounding_boxes) # fraction covered # boxes
|
return (area_frac, bounding_boxes) # fraction covered # boxes
|
||||||
|
|
||||||
|
if page._backend is not None:
|
||||||
bitmap_rects = page._backend.get_bitmap_rects()
|
bitmap_rects = page._backend.get_bitmap_rects()
|
||||||
|
else:
|
||||||
|
bitmap_rects = []
|
||||||
coverage, ocr_rects = find_ocr_rects(page.size, bitmap_rects)
|
coverage, ocr_rects = find_ocr_rects(page.size, bitmap_rects)
|
||||||
|
|
||||||
# return full-page rectangle if sufficiently covered with bitmaps
|
# return full-page rectangle if sufficiently covered with bitmaps
|
||||||
@ -76,7 +80,7 @@ class BaseOcrModel:
|
|||||||
)
|
)
|
||||||
]
|
]
|
||||||
# return individual rectangles if the bitmap coverage is smaller
|
# return individual rectangles if the bitmap coverage is smaller
|
||||||
elif coverage < BITMAP_COVERAGE_TRESHOLD:
|
else: # coverage <= BITMAP_COVERAGE_TRESHOLD:
|
||||||
return ocr_rects
|
return ocr_rects
|
||||||
|
|
||||||
# Filters OCR cells by dropping any OCR cell that intersects with an existing programmatic cell.
|
# Filters OCR cells by dropping any OCR cell that intersects with an existing programmatic cell.
|
||||||
|
@ -1,16 +1,12 @@
|
|||||||
import copy
|
import copy
|
||||||
import random
|
import random
|
||||||
from typing import Tuple
|
from typing import List, Union
|
||||||
|
|
||||||
from deepsearch_glm.nlp_utils import init_nlp_model
|
from deepsearch_glm.nlp_utils import init_nlp_model
|
||||||
from deepsearch_glm.utils.doc_utils import (
|
from deepsearch_glm.utils.doc_utils import to_docling_document
|
||||||
to_docling_document,
|
|
||||||
to_legacy_document_format,
|
|
||||||
)
|
|
||||||
from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
|
from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models
|
||||||
from docling_core.types import BaseText
|
from docling_core.types import BaseText
|
||||||
from docling_core.types import Document as DsDocument
|
from docling_core.types import Document as DsDocument
|
||||||
from docling_core.types import Document as DsLegacyDocument
|
|
||||||
from docling_core.types import DocumentDescription as DsDocumentDescription
|
from docling_core.types import DocumentDescription as DsDocumentDescription
|
||||||
from docling_core.types import FileInfoObject as DsFileInfoObject
|
from docling_core.types import FileInfoObject as DsFileInfoObject
|
||||||
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
from docling_core.types import PageDimensions, PageReference, Prov, Ref
|
||||||
@ -42,7 +38,7 @@ class GlmModel:
|
|||||||
|
|
||||||
def _to_legacy_document(self, conv_res) -> DsDocument:
|
def _to_legacy_document(self, conv_res) -> DsDocument:
|
||||||
title = ""
|
title = ""
|
||||||
desc = DsDocumentDescription(logs=[])
|
desc: DsDocumentDescription = DsDocumentDescription(logs=[])
|
||||||
|
|
||||||
page_hashes = [
|
page_hashes = [
|
||||||
PageReference(
|
PageReference(
|
||||||
@ -60,9 +56,9 @@ class GlmModel:
|
|||||||
page_hashes=page_hashes,
|
page_hashes=page_hashes,
|
||||||
)
|
)
|
||||||
|
|
||||||
main_text = []
|
main_text: List[Union[Ref, BaseText]] = []
|
||||||
tables = []
|
tables: List[DsSchemaTable] = []
|
||||||
figures = []
|
figures: List[Figure] = []
|
||||||
|
|
||||||
page_no_to_page = {p.page_no: p for p in conv_res.pages}
|
page_no_to_page = {p.page_no: p for p in conv_res.pages}
|
||||||
|
|
||||||
@ -146,11 +142,16 @@ class GlmModel:
|
|||||||
yield [rspan, cspan]
|
yield [rspan, cspan]
|
||||||
|
|
||||||
spans = list(make_spans(cell))
|
spans = list(make_spans(cell))
|
||||||
table_data[i][j] = TableCell(
|
if cell.bbox is not None:
|
||||||
text=cell.text,
|
|
||||||
bbox = cell.bbox.to_bottom_left_origin(
|
bbox = cell.bbox.to_bottom_left_origin(
|
||||||
page_no_to_page[element.page_no].size.height
|
page_no_to_page[element.page_no].size.height
|
||||||
).as_tuple(),
|
).as_tuple()
|
||||||
|
else:
|
||||||
|
bbox = None
|
||||||
|
|
||||||
|
table_data[i][j] = TableCell(
|
||||||
|
text=cell.text,
|
||||||
|
bbox=bbox,
|
||||||
# col=j,
|
# col=j,
|
||||||
# row=i,
|
# row=i,
|
||||||
spans=spans,
|
spans=spans,
|
||||||
@ -204,7 +205,7 @@ class GlmModel:
|
|||||||
for p in conv_res.pages
|
for p in conv_res.pages
|
||||||
]
|
]
|
||||||
|
|
||||||
ds_doc = DsDocument(
|
ds_doc: DsDocument = DsDocument(
|
||||||
name=title,
|
name=title,
|
||||||
description=desc,
|
description=desc,
|
||||||
file_info=file_info,
|
file_info=file_info,
|
||||||
@ -216,9 +217,7 @@ class GlmModel:
|
|||||||
|
|
||||||
return ds_doc
|
return ds_doc
|
||||||
|
|
||||||
def __call__(
|
def __call__(self, conv_res: ConversionResult) -> DoclingDocument:
|
||||||
self, conv_res: ConversionResult
|
|
||||||
) -> Tuple[DsLegacyDocument, DoclingDocument]:
|
|
||||||
ds_doc = self._to_legacy_document(conv_res)
|
ds_doc = self._to_legacy_document(conv_res)
|
||||||
ds_doc_dict = ds_doc.model_dump(by_alias=True)
|
ds_doc_dict = ds_doc.model_dump(by_alias=True)
|
||||||
|
|
||||||
|
@ -40,6 +40,8 @@ class EasyOcrModel(BaseOcrModel):
|
|||||||
return
|
return
|
||||||
|
|
||||||
for page in page_batch:
|
for page in page_batch:
|
||||||
|
assert page._backend is not None
|
||||||
|
|
||||||
ocr_rects = self.get_ocr_rects(page)
|
ocr_rects = self.get_ocr_rects(page)
|
||||||
|
|
||||||
all_ocr_cells = []
|
all_ocr_cells = []
|
||||||
|
@ -47,7 +47,7 @@ class LayoutModel(BasePageModel):
|
|||||||
def __init__(self, artifacts_path: Path):
|
def __init__(self, artifacts_path: Path):
|
||||||
self.layout_predictor = LayoutPredictor(artifacts_path) # TODO temporary
|
self.layout_predictor = LayoutPredictor(artifacts_path) # TODO temporary
|
||||||
|
|
||||||
def postprocess(self, clusters: List[Cluster], cells: List[Cell], page_height):
|
def postprocess(self, clusters_in: List[Cluster], cells: List[Cell], page_height):
|
||||||
MIN_INTERSECTION = 0.2
|
MIN_INTERSECTION = 0.2
|
||||||
CLASS_THRESHOLDS = {
|
CLASS_THRESHOLDS = {
|
||||||
DocItemLabel.CAPTION: 0.35,
|
DocItemLabel.CAPTION: 0.35,
|
||||||
@ -78,9 +78,9 @@ class LayoutModel(BasePageModel):
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
# Apply Confidence Threshold to cluster predictions
|
# Apply Confidence Threshold to cluster predictions
|
||||||
# confidence = self.conf_threshold
|
# confidence = self.conf_threshold
|
||||||
clusters_out = []
|
clusters_mod = []
|
||||||
|
|
||||||
for cluster in clusters:
|
for cluster in clusters_in:
|
||||||
confidence = CLASS_THRESHOLDS[cluster.label]
|
confidence = CLASS_THRESHOLDS[cluster.label]
|
||||||
if cluster.confidence >= confidence:
|
if cluster.confidence >= confidence:
|
||||||
# annotation["created_by"] = "high_conf_pred"
|
# annotation["created_by"] = "high_conf_pred"
|
||||||
@ -88,10 +88,10 @@ class LayoutModel(BasePageModel):
|
|||||||
# Remap class labels where needed.
|
# Remap class labels where needed.
|
||||||
if cluster.label in CLASS_REMAPPINGS.keys():
|
if cluster.label in CLASS_REMAPPINGS.keys():
|
||||||
cluster.label = CLASS_REMAPPINGS[cluster.label]
|
cluster.label = CLASS_REMAPPINGS[cluster.label]
|
||||||
clusters_out.append(cluster)
|
clusters_mod.append(cluster)
|
||||||
|
|
||||||
# map to dictionary clusters and cells, with bottom left origin
|
# map to dictionary clusters and cells, with bottom left origin
|
||||||
clusters = [
|
clusters_orig = [
|
||||||
{
|
{
|
||||||
"id": c.id,
|
"id": c.id,
|
||||||
"bbox": list(
|
"bbox": list(
|
||||||
@ -101,7 +101,7 @@ class LayoutModel(BasePageModel):
|
|||||||
"cell_ids": [],
|
"cell_ids": [],
|
||||||
"type": c.label,
|
"type": c.label,
|
||||||
}
|
}
|
||||||
for c in clusters
|
for c in clusters_in
|
||||||
]
|
]
|
||||||
|
|
||||||
clusters_out = [
|
clusters_out = [
|
||||||
@ -115,9 +115,11 @@ class LayoutModel(BasePageModel):
|
|||||||
"cell_ids": [],
|
"cell_ids": [],
|
||||||
"type": c.label,
|
"type": c.label,
|
||||||
}
|
}
|
||||||
for c in clusters_out
|
for c in clusters_mod
|
||||||
]
|
]
|
||||||
|
|
||||||
|
del clusters_mod
|
||||||
|
|
||||||
raw_cells = [
|
raw_cells = [
|
||||||
{
|
{
|
||||||
"id": c.id,
|
"id": c.id,
|
||||||
@ -151,7 +153,7 @@ class LayoutModel(BasePageModel):
|
|||||||
|
|
||||||
# Assign orphan cells with lower confidence predictions
|
# Assign orphan cells with lower confidence predictions
|
||||||
clusters_out, orphan_cell_indices = lu.assign_orphans_with_low_conf_pred(
|
clusters_out, orphan_cell_indices = lu.assign_orphans_with_low_conf_pred(
|
||||||
clusters_out, clusters, raw_cells, orphan_cell_indices
|
clusters_out, clusters_orig, raw_cells, orphan_cell_indices
|
||||||
)
|
)
|
||||||
|
|
||||||
# Refresh the cell_ids assignment, after creating new clusters using low conf predictions
|
# Refresh the cell_ids assignment, after creating new clusters using low conf predictions
|
||||||
@ -180,7 +182,7 @@ class LayoutModel(BasePageModel):
|
|||||||
) = lu.cell_id_state_map(clusters_out, cell_count)
|
) = lu.cell_id_state_map(clusters_out, cell_count)
|
||||||
|
|
||||||
clusters_out, orphan_cell_indices = lu.set_orphan_as_text(
|
clusters_out, orphan_cell_indices = lu.set_orphan_as_text(
|
||||||
clusters_out, clusters, raw_cells, orphan_cell_indices
|
clusters_out, clusters_orig, raw_cells, orphan_cell_indices
|
||||||
)
|
)
|
||||||
|
|
||||||
_log.debug("---- 5. Merge Cells & and adapt the bounding boxes")
|
_log.debug("---- 5. Merge Cells & and adapt the bounding boxes")
|
||||||
@ -239,34 +241,41 @@ class LayoutModel(BasePageModel):
|
|||||||
end_time = time.time() - start_time
|
end_time = time.time() - start_time
|
||||||
_log.debug(f"Finished post processing in seconds={end_time:.3f}")
|
_log.debug(f"Finished post processing in seconds={end_time:.3f}")
|
||||||
|
|
||||||
cells_out = [
|
cells_out_new = [
|
||||||
Cell(
|
Cell(
|
||||||
id=c["id"],
|
id=c["id"], # type: ignore
|
||||||
bbox=BoundingBox.from_tuple(
|
bbox=BoundingBox.from_tuple(
|
||||||
coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT
|
coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT # type: ignore
|
||||||
).to_top_left_origin(page_height),
|
).to_top_left_origin(page_height),
|
||||||
text=c["text"],
|
text=c["text"], # type: ignore
|
||||||
)
|
)
|
||||||
for c in cells_out
|
for c in cells_out
|
||||||
]
|
]
|
||||||
|
|
||||||
|
del cells_out
|
||||||
|
|
||||||
clusters_out_new = []
|
clusters_out_new = []
|
||||||
for c in clusters_out:
|
for c in clusters_out:
|
||||||
cluster_cells = [ccell for ccell in cells_out if ccell.id in c["cell_ids"]]
|
cluster_cells = [
|
||||||
|
ccell for ccell in cells_out_new if ccell.id in c["cell_ids"] # type: ignore
|
||||||
|
]
|
||||||
c_new = Cluster(
|
c_new = Cluster(
|
||||||
id=c["id"],
|
id=c["id"], # type: ignore
|
||||||
bbox=BoundingBox.from_tuple(
|
bbox=BoundingBox.from_tuple(
|
||||||
coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT
|
coord=c["bbox"], origin=CoordOrigin.BOTTOMLEFT # type: ignore
|
||||||
).to_top_left_origin(page_height),
|
).to_top_left_origin(page_height),
|
||||||
confidence=c["confidence"],
|
confidence=c["confidence"], # type: ignore
|
||||||
label=DocItemLabel(c["type"]),
|
label=DocItemLabel(c["type"]),
|
||||||
cells=cluster_cells,
|
cells=cluster_cells,
|
||||||
)
|
)
|
||||||
clusters_out_new.append(c_new)
|
clusters_out_new.append(c_new)
|
||||||
|
|
||||||
return clusters_out_new, cells_out
|
return clusters_out_new, cells_out_new
|
||||||
|
|
||||||
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
|
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
|
||||||
for page in page_batch:
|
for page in page_batch:
|
||||||
|
assert page.size is not None
|
||||||
|
|
||||||
clusters = []
|
clusters = []
|
||||||
for ix, pred_item in enumerate(
|
for ix, pred_item in enumerate(
|
||||||
self.layout_predictor.predict(page.get_image(scale=1.0))
|
self.layout_predictor.predict(page.get_image(scale=1.0))
|
||||||
|
@ -53,6 +53,8 @@ class PageAssembleModel(BasePageModel):
|
|||||||
|
|
||||||
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
|
def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]:
|
||||||
for page in page_batch:
|
for page in page_batch:
|
||||||
|
assert page._backend is not None
|
||||||
|
assert page.predictions.layout is not None
|
||||||
# assembles some JSON output page by page.
|
# assembles some JSON output page by page.
|
||||||
|
|
||||||
elements: List[PageElement] = []
|
elements: List[PageElement] = []
|
||||||
|
@ -40,7 +40,9 @@ class PagePreprocessingModel(BasePageModel):
|
|||||||
|
|
||||||
# Extract and populate the page cells and store it in the page object
|
# Extract and populate the page cells and store it in the page object
|
||||||
def _parse_page_cells(self, page: Page) -> Page:
|
def _parse_page_cells(self, page: Page) -> Page:
|
||||||
page.cells = page._backend.get_text_cells()
|
assert page._backend is not None
|
||||||
|
|
||||||
|
page.cells = list(page._backend.get_text_cells())
|
||||||
|
|
||||||
# DEBUG code:
|
# DEBUG code:
|
||||||
def draw_text_boxes(image, cells):
|
def draw_text_boxes(image, cells):
|
||||||
|
@ -24,8 +24,6 @@ class TableStructureModel(BasePageModel):
|
|||||||
|
|
||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
artifacts_path: Path = artifacts_path
|
|
||||||
|
|
||||||
if self.mode == TableFormerMode.ACCURATE:
|
if self.mode == TableFormerMode.ACCURATE:
|
||||||
artifacts_path = artifacts_path / "fat"
|
artifacts_path = artifacts_path / "fat"
|
||||||
|
|
||||||
@ -40,6 +38,8 @@ class TableStructureModel(BasePageModel):
|
|||||||
self.scale = 2.0 # Scale up table input images to 144 dpi
|
self.scale = 2.0 # Scale up table input images to 144 dpi
|
||||||
|
|
||||||
def draw_table_and_cells(self, page: Page, tbl_list: List[Table]):
|
def draw_table_and_cells(self, page: Page, tbl_list: List[Table]):
|
||||||
|
assert page._backend is not None
|
||||||
|
|
||||||
image = (
|
image = (
|
||||||
page._backend.get_page_image()
|
page._backend.get_page_image()
|
||||||
) # make new image to avoid drawing on the saved ones
|
) # make new image to avoid drawing on the saved ones
|
||||||
@ -50,6 +50,7 @@ class TableStructureModel(BasePageModel):
|
|||||||
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
draw.rectangle([(x0, y0), (x1, y1)], outline="red")
|
||||||
|
|
||||||
for tc in table_element.table_cells:
|
for tc in table_element.table_cells:
|
||||||
|
if tc.bbox is not None:
|
||||||
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
x0, y0, x1, y1 = tc.bbox.as_tuple()
|
||||||
if tc.column_header:
|
if tc.column_header:
|
||||||
width = 3
|
width = 3
|
||||||
@ -71,6 +72,9 @@ class TableStructureModel(BasePageModel):
|
|||||||
return
|
return
|
||||||
|
|
||||||
for page in page_batch:
|
for page in page_batch:
|
||||||
|
assert page._backend is not None
|
||||||
|
assert page.predictions.layout is not None
|
||||||
|
assert page.size is not None
|
||||||
|
|
||||||
page.predictions.tablestructure = TableStructurePrediction() # dummy
|
page.predictions.tablestructure = TableStructurePrediction() # dummy
|
||||||
|
|
||||||
@ -132,7 +136,7 @@ class TableStructureModel(BasePageModel):
|
|||||||
element["bbox"]["token"] = text_piece
|
element["bbox"]["token"] = text_piece
|
||||||
|
|
||||||
tc = TableCell.model_validate(element)
|
tc = TableCell.model_validate(element)
|
||||||
if self.do_cell_matching:
|
if self.do_cell_matching and tc.bbox is not None:
|
||||||
tc.bbox = tc.bbox.scaled(1 / self.scale)
|
tc.bbox = tc.bbox.scaled(1 / self.scale)
|
||||||
table_cells.append(tc)
|
table_cells.append(tc)
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ import io
|
|||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
from subprocess import DEVNULL, PIPE, Popen
|
from subprocess import DEVNULL, PIPE, Popen
|
||||||
from typing import Iterable, Tuple
|
from typing import Iterable, Optional, Tuple
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
@ -22,8 +22,8 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
|
|
||||||
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
self.scale = 3 # multiplier for 72 dpi == 216 dpi.
|
||||||
|
|
||||||
self._name = None
|
self._name: Optional[str] = None
|
||||||
self._version = None
|
self._version: Optional[str] = None
|
||||||
|
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
try:
|
try:
|
||||||
@ -40,7 +40,7 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
def _get_name_and_version(self) -> Tuple[str, str]:
|
def _get_name_and_version(self) -> Tuple[str, str]:
|
||||||
|
|
||||||
if self._name != None and self._version != None:
|
if self._name != None and self._version != None:
|
||||||
return self._name, self._version
|
return self._name, self._version # type: ignore
|
||||||
|
|
||||||
cmd = [self.options.tesseract_cmd, "--version"]
|
cmd = [self.options.tesseract_cmd, "--version"]
|
||||||
|
|
||||||
@ -109,6 +109,8 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
return
|
return
|
||||||
|
|
||||||
for page in page_batch:
|
for page in page_batch:
|
||||||
|
assert page._backend is not None
|
||||||
|
|
||||||
ocr_rects = self.get_ocr_rects(page)
|
ocr_rects = self.get_ocr_rects(page)
|
||||||
|
|
||||||
all_ocr_cells = []
|
all_ocr_cells = []
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
import numpy
|
|
||||||
from docling_core.types.doc import BoundingBox, CoordOrigin
|
from docling_core.types.doc import BoundingBox, CoordOrigin
|
||||||
|
|
||||||
from docling.datamodel.base_models import OcrCell, Page
|
from docling.datamodel.base_models import OcrCell, Page
|
||||||
@ -69,6 +68,9 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
return
|
return
|
||||||
|
|
||||||
for page in page_batch:
|
for page in page_batch:
|
||||||
|
assert page._backend is not None
|
||||||
|
assert self.reader is not None
|
||||||
|
|
||||||
ocr_rects = self.get_ocr_rects(page)
|
ocr_rects = self.get_ocr_rects(page)
|
||||||
|
|
||||||
all_ocr_cells = []
|
all_ocr_cells = []
|
||||||
|
@ -34,12 +34,6 @@ class BasePipeline(ABC):
|
|||||||
conv_res = ConversionResult(input=in_doc)
|
conv_res = ConversionResult(input=in_doc)
|
||||||
|
|
||||||
_log.info(f"Processing document {in_doc.file.name}")
|
_log.info(f"Processing document {in_doc.file.name}")
|
||||||
|
|
||||||
if not in_doc.valid:
|
|
||||||
conv_res.status = ConversionStatus.FAILURE
|
|
||||||
return conv_res
|
|
||||||
|
|
||||||
# TODO: propagate option for raises_on_error?
|
|
||||||
try:
|
try:
|
||||||
# These steps are building and assembling the structure of the
|
# These steps are building and assembling the structure of the
|
||||||
# output DoclingDocument
|
# output DoclingDocument
|
||||||
@ -155,7 +149,7 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
end_pb_time = time.time() - start_pb_time
|
end_pb_time = time.time() - start_pb_time
|
||||||
_log.info(f"Finished converting page batch time={end_pb_time:.3f}")
|
_log.debug(f"Finished converting page batch time={end_pb_time:.3f}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
conv_res.status = ConversionStatus.FAILURE
|
conv_res.status = ConversionStatus.FAILURE
|
||||||
@ -178,7 +172,7 @@ class PaginatedPipeline(BasePipeline): # TODO this is a bad name.
|
|||||||
) -> ConversionStatus:
|
) -> ConversionStatus:
|
||||||
status = ConversionStatus.SUCCESS
|
status = ConversionStatus.SUCCESS
|
||||||
for page in conv_res.pages:
|
for page in conv_res.pages:
|
||||||
if not page._backend.is_valid():
|
if page._backend is None or not page._backend.is_valid():
|
||||||
conv_res.errors.append(
|
conv_res.errors.append(
|
||||||
ErrorItem(
|
ErrorItem(
|
||||||
component_type=DoclingComponentType.DOCUMENT_BACKEND,
|
component_type=DoclingComponentType.DOCUMENT_BACKEND,
|
||||||
|
@ -120,7 +120,8 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def initialize_page(self, doc: InputDocument, page: Page) -> Page:
|
def initialize_page(self, doc: InputDocument, page: Page) -> Page:
|
||||||
page._backend = doc._backend.load_page(page.page_no)
|
page._backend = doc._backend.load_page(page.page_no) # type: ignore
|
||||||
|
if page._backend is not None and page._backend.is_valid():
|
||||||
page.size = page._backend.get_size()
|
page.size = page._backend.get_size()
|
||||||
|
|
||||||
return page
|
return page
|
||||||
@ -133,7 +134,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
all_body = []
|
all_body = []
|
||||||
|
|
||||||
for p in conv_res.pages:
|
for p in conv_res.pages:
|
||||||
|
assert p.assembled is not None
|
||||||
for el in p.assembled.body:
|
for el in p.assembled.body:
|
||||||
all_body.append(el)
|
all_body.append(el)
|
||||||
for el in p.assembled.headers:
|
for el in p.assembled.headers:
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
|
import time
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from docling.datamodel.base_models import ConversionStatus
|
from docling.datamodel.base_models import ConversionStatus
|
||||||
@ -122,7 +122,7 @@ def main():
|
|||||||
raises_on_error=False, # to let conversion run through all and examine results at the end
|
raises_on_error=False, # to let conversion run through all and examine results at the end
|
||||||
)
|
)
|
||||||
success_count, partial_success_count, failure_count = export_documents(
|
success_count, partial_success_count, failure_count = export_documents(
|
||||||
conv_results, output_dir=Path("../../examples/scratch")
|
conv_results, output_dir=Path("scratch")
|
||||||
)
|
)
|
||||||
|
|
||||||
end_time = time.time() - start_time
|
end_time = time.time() - start_time
|
||||||
|
@ -1,18 +1,14 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable
|
|
||||||
|
|
||||||
from docling.datamodel.base_models import ConversionStatus, InputFormat
|
import time
|
||||||
from docling.datamodel.document import ConversionResult
|
|
||||||
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import (
|
||||||
PdfPipelineOptions,
|
PdfPipelineOptions,
|
||||||
TesseractCliOcrOptions,
|
|
||||||
TesseractOcrOptions,
|
|
||||||
)
|
)
|
||||||
from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -112,7 +108,7 @@ def main():
|
|||||||
_log.info(f"Document converted in {end_time:.2f} seconds.")
|
_log.info(f"Document converted in {end_time:.2f} seconds.")
|
||||||
|
|
||||||
## Export results
|
## Export results
|
||||||
output_dir = Path("../../examples/scratch")
|
output_dir = Path("scratch")
|
||||||
output_dir.mkdir(parents=True, exist_ok=True)
|
output_dir.mkdir(parents=True, exist_ok=True)
|
||||||
doc_filename = conv_result.input.file.stem
|
doc_filename = conv_result.input.file.stem
|
||||||
|
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
import logging
|
import logging
|
||||||
import time
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
import time
|
||||||
|
|
||||||
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
from docling.datamodel.base_models import FigureElement, InputFormat, Table
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
@ -15,7 +16,7 @@ def main():
|
|||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
input_doc_path = Path("./tests/data/2206.01062.pdf")
|
input_doc_path = Path("./tests/data/2206.01062.pdf")
|
||||||
output_dir = Path("../../examples/scratch")
|
output_dir = Path("scratch")
|
||||||
|
|
||||||
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
|
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
|
||||||
# will destroy them for cleaning up memory.
|
# will destroy them for cleaning up memory.
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import time
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
@ -20,7 +20,7 @@ def main():
|
|||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
input_doc_path = Path("./tests/data/2206.01062.pdf")
|
input_doc_path = Path("./tests/data/2206.01062.pdf")
|
||||||
output_dir = Path("../../examples/scratch")
|
output_dir = Path("scratch")
|
||||||
|
|
||||||
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
|
# Important: For operating with page images, we must keep them, otherwise the DocumentConverter
|
||||||
# will destroy them for cleaning up memory.
|
# will destroy them for cleaning up memory.
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import logging
|
import logging
|
||||||
import time
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
import time
|
||||||
|
|
||||||
from docling.document_converter import DocumentConverter
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
@ -13,7 +13,7 @@ def main():
|
|||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
input_doc_path = Path("./tests/data/2206.01062.pdf")
|
input_doc_path = Path("./tests/data/2206.01062.pdf")
|
||||||
output_dir = Path("../../examples/scratch")
|
output_dir = Path("scratch")
|
||||||
|
|
||||||
doc_converter = DocumentConverter()
|
doc_converter = DocumentConverter()
|
||||||
|
|
||||||
|
@ -7,4 +7,4 @@ print(
|
|||||||
result.document.export_to_markdown()
|
result.document.export_to_markdown()
|
||||||
) # output: ## Docling Technical Report [...]"
|
) # output: ## Docling Technical Report [...]"
|
||||||
# if the legacy output is needed, use this version
|
# if the legacy output is needed, use this version
|
||||||
# print(result.legacy_output.export_to_markdown()) # output: ## Docling Technical Report [...]"
|
# print(result.legacy_document.export_to_markdown()) # output: ## Docling Technical Report [...]"
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"<a href=\"https://colab.research.google.com/github/DS4SD/docling/blob/main/examples/rag_llamaindex.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
"<a href=\"https://colab.research.google.com/github/DS4SD/docling/blob/main/docs/examples/rag_llamaindex.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
@ -53,7 +53,7 @@ doc_converter = (
|
|||||||
conv_results = doc_converter.convert_all(input_paths)
|
conv_results = doc_converter.convert_all(input_paths)
|
||||||
|
|
||||||
for res in conv_results:
|
for res in conv_results:
|
||||||
out_path = Path("../../examples/scratch")
|
out_path = Path("scratch")
|
||||||
print(
|
print(
|
||||||
f"Document {res.input.file.name} converted."
|
f"Document {res.input.file.name} converted."
|
||||||
f"\nSaved markdown output to: {str(out_path)}"
|
f"\nSaved markdown output to: {str(out_path)}"
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
{% extends "base.html" %}
|
{% extends "base.html" %}
|
||||||
|
|
||||||
{#
|
|
||||||
{% block announce %}
|
{% block announce %}
|
||||||
<p>🎉 Docling is now officially supported in LlamaIndex! <a href="{{ 'integrations/llamaindex/' | url }}">Check it out</a>!</p>
|
<p>🎉 Docling is going v2, <a href="{{ 'v2' | url }}">check out</a> what's new and how to get started!</p>
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
#}
|
|
||||||
|
107
docs/v2.md
Normal file
107
docs/v2.md
Normal file
@ -0,0 +1,107 @@
|
|||||||
|
## What's new
|
||||||
|
|
||||||
|
Docling v2 introduces several new features:
|
||||||
|
- Understands and converts PDF, MS Word, MS Powerpoint, HTML and several image formats
|
||||||
|
- Produces a new, universal document representation which can encapsulate document hierarchy
|
||||||
|
- Comes with a fresh new API and CLI
|
||||||
|
|
||||||
|
## Migration from v1
|
||||||
|
|
||||||
|
### Setting up a `DocumentConverter`
|
||||||
|
|
||||||
|
To accomodate many input formats, we changed the way you need to set up your `DocumentConverter` object.
|
||||||
|
You can now define a list of allowed formats on the `DocumentConverter` initialization, and specify custom options
|
||||||
|
per-format if desired. By default, all supported formats are allowed. If you don't provide `format_options`, defaults
|
||||||
|
will be used for all `allowed_formats`.
|
||||||
|
|
||||||
|
Format options can include the pipeline class to use, the options to provide to the pipeline, and the document backend.
|
||||||
|
They are provided as format-specific types, such as `PdfFormatOption` or `WordFormatOption`, as seen below.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
from docling.document_converter import (
|
||||||
|
DocumentConverter,
|
||||||
|
PdfFormatOption,
|
||||||
|
WordFormatOption,
|
||||||
|
)
|
||||||
|
from docling.pipeline.simple_pipeline import SimplePipeline
|
||||||
|
from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
||||||
|
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
|
||||||
|
|
||||||
|
## Default initialization still works as before:
|
||||||
|
# doc_converter = DocumentConverter()
|
||||||
|
|
||||||
|
## Custom options are now defined per format.
|
||||||
|
doc_converter = (
|
||||||
|
DocumentConverter( # all of the below is optional, has internal defaults.
|
||||||
|
allowed_formats=[
|
||||||
|
InputFormat.PDF,
|
||||||
|
InputFormat.IMAGE,
|
||||||
|
InputFormat.DOCX,
|
||||||
|
InputFormat.HTML,
|
||||||
|
InputFormat.PPTX,
|
||||||
|
], # whitelist formats, non-matching files are ignored.
|
||||||
|
format_options={
|
||||||
|
InputFormat.PDF: PdfFormatOption(
|
||||||
|
pipeline_cls=StandardPdfPipeline, backend=PyPdfiumDocumentBackend
|
||||||
|
),
|
||||||
|
InputFormat.DOCX: WordFormatOption(
|
||||||
|
pipeline_cls=SimplePipeline # , backend=MsWordDocumentBackend
|
||||||
|
),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Note: If you work only with defaults, all remains the same as in Docling v1.
|
||||||
|
|
||||||
|
### Converting documents
|
||||||
|
|
||||||
|
We have simplified the way you can feed input to the `DocumentConverter` and renamed the conversion methods for
|
||||||
|
better semantics. You can now call the conversion directly with a single file, or a list of input files,
|
||||||
|
or `DocumentStream` objects, without constructing a `DocumentConversionInput` object first.
|
||||||
|
|
||||||
|
* `DocumentConverter.convert` now converts a single file input (previously `DocumentConverter.convert_single`).
|
||||||
|
* `DocumentConverter.convert_all` now converts many files at once (previously `DocumentConverter.convert`).
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
...
|
||||||
|
## Convert a single file (from URL or local path)
|
||||||
|
conv_result = doc_converter.convert("https://arxiv.org/pdf/2408.09869") # previously `convert_single`
|
||||||
|
|
||||||
|
## Convert several files at once:
|
||||||
|
|
||||||
|
input_files = [
|
||||||
|
"tests/data/wiki_duck.html",
|
||||||
|
"tests/data/word_sample.docx",
|
||||||
|
"tests/data/lorem_ipsum.docx",
|
||||||
|
"tests/data/powerpoint_sample.pptx",
|
||||||
|
"tests/data/2305.03393v1-pg9-img.png",
|
||||||
|
"tests/data/2206.01062.pdf",
|
||||||
|
]
|
||||||
|
|
||||||
|
conv_results_iter = doc_converter.convert_all(input_files) # previously `convert_batch`
|
||||||
|
|
||||||
|
```
|
||||||
|
Through the `raises_on_error` argument, you can also control if the conversion should raise exceptions when first
|
||||||
|
encountering a problem, or resiliently convert all files first and reflect errors in each file's conversion status.
|
||||||
|
By default, any error is immediately raised and the conversion aborts (previously, exceptions were swallowed).
|
||||||
|
|
||||||
|
```python
|
||||||
|
...
|
||||||
|
conv_results_iter = doc_converter.convert_all(input_files, raises_on_error=False) # previously `convert_batch`
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
### Exporting documents into JSON, Markdown, Doctags
|
||||||
|
|
||||||
|
We have simplified how you can access and export the converted document data, too.
|
||||||
|
|
||||||
|
TBD.
|
||||||
|
|
||||||
|
|
||||||
|
### CLI
|
||||||
|
|
||||||
|
TBD.
|
@ -1,369 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# RAG with Docling and 🦜🔗 LangChain"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"# requirements for this example:\n",
|
|
||||||
"%pip install -qq docling docling-core python-dotenv langchain-text-splitters langchain-huggingface langchain-milvus"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"True"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"\n",
|
|
||||||
"from dotenv import load_dotenv\n",
|
|
||||||
"\n",
|
|
||||||
"load_dotenv()"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import warnings\n",
|
|
||||||
"\n",
|
|
||||||
"warnings.filterwarnings(action=\"ignore\", category=UserWarning, module=\"pydantic|torch\")\n",
|
|
||||||
"warnings.filterwarnings(action=\"ignore\", category=FutureWarning, module=\"easyocr\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Loader and splitter"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Below we set up:\n",
|
|
||||||
"- a `Loader` which will be used to create LangChain documents, and\n",
|
|
||||||
"- a splitter, which will be used to split these documents"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from enum import Enum\n",
|
|
||||||
"from typing import Iterator\n",
|
|
||||||
"\n",
|
|
||||||
"from langchain_core.document_loaders import BaseLoader\n",
|
|
||||||
"from langchain_core.documents import Document as LCDocument\n",
|
|
||||||
"from pydantic import BaseModel\n",
|
|
||||||
"\n",
|
|
||||||
"from docling.document_converter import DocumentConverter\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"class DocumentMetadata(BaseModel):\n",
|
|
||||||
" dl_doc_hash: str\n",
|
|
||||||
" # source: str\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"class DoclingPDFLoader(BaseLoader):\n",
|
|
||||||
" class ParseType(str, Enum):\n",
|
|
||||||
" MARKDOWN = \"markdown\"\n",
|
|
||||||
" # JSON = \"json\"\n",
|
|
||||||
"\n",
|
|
||||||
" def __init__(self, file_path: str | list[str], parse_type: ParseType) -> None:\n",
|
|
||||||
" self._file_paths = file_path if isinstance(file_path, list) else [file_path]\n",
|
|
||||||
" self._parse_type = parse_type\n",
|
|
||||||
" self._converter = DocumentConverter()\n",
|
|
||||||
"\n",
|
|
||||||
" def lazy_load(self) -> Iterator[LCDocument]:\n",
|
|
||||||
" for source in self._file_paths:\n",
|
|
||||||
" dl_doc = self._converter.convert_single(source).output\n",
|
|
||||||
" match self._parse_type:\n",
|
|
||||||
" case self.ParseType.MARKDOWN:\n",
|
|
||||||
" text = dl_doc.export_to_markdown()\n",
|
|
||||||
" # case self.ParseType.JSON:\n",
|
|
||||||
" # text = dl_doc.model_dump_json()\n",
|
|
||||||
" case _:\n",
|
|
||||||
" raise RuntimeError(\n",
|
|
||||||
" f\"Unexpected parse type encountered: {self._parse_type}\"\n",
|
|
||||||
" )\n",
|
|
||||||
" lc_doc = LCDocument(\n",
|
|
||||||
" page_content=text,\n",
|
|
||||||
" metadata=DocumentMetadata(\n",
|
|
||||||
" dl_doc_hash=dl_doc.file_info.document_hash,\n",
|
|
||||||
" ).model_dump(),\n",
|
|
||||||
" )\n",
|
|
||||||
" yield lc_doc"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"FILE_PATH = \"https://arxiv.org/pdf/2206.01062\" # DocLayNet paper"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"application/vnd.jupyter.widget-view+json": {
|
|
||||||
"model_id": "1b38d07d5fed4618a44ecf261e1e5c44",
|
|
||||||
"version_major": 2,
|
|
||||||
"version_minor": 0
|
|
||||||
},
|
|
||||||
"text/plain": [
|
|
||||||
"Fetching 7 files: 0%| | 0/7 [00:00<?, ?it/s]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
|
|
||||||
"\n",
|
|
||||||
"loader = DoclingPDFLoader(\n",
|
|
||||||
" file_path=FILE_PATH,\n",
|
|
||||||
" parse_type=DoclingPDFLoader.ParseType.MARKDOWN,\n",
|
|
||||||
")\n",
|
|
||||||
"text_splitter = RecursiveCharacterTextSplitter(\n",
|
|
||||||
" chunk_size=1000,\n",
|
|
||||||
" chunk_overlap=200,\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We now used the above-defined objects to get the document splits:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"docs = loader.load()\n",
|
|
||||||
"splits = text_splitter.split_documents(docs)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Embeddings"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 8,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from langchain_huggingface.embeddings import HuggingFaceEmbeddings\n",
|
|
||||||
"\n",
|
|
||||||
"HF_EMBED_MODEL_ID = \"BAAI/bge-small-en-v1.5\"\n",
|
|
||||||
"embeddings = HuggingFaceEmbeddings(model_name=HF_EMBED_MODEL_ID)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Vector store"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 9,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from tempfile import TemporaryDirectory\n",
|
|
||||||
"\n",
|
|
||||||
"from langchain_milvus import Milvus\n",
|
|
||||||
"\n",
|
|
||||||
"MILVUS_URI = os.environ.get(\n",
|
|
||||||
" \"MILVUS_URL\", f\"{(tmp_dir := TemporaryDirectory()).name}/milvus_demo.db\"\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"vectorstore = Milvus.from_documents(\n",
|
|
||||||
" splits,\n",
|
|
||||||
" embeddings,\n",
|
|
||||||
" connection_args={\"uri\": MILVUS_URI},\n",
|
|
||||||
" drop_old=True,\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### LLM"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 10,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
|
|
||||||
"Token is valid (permission: write).\n",
|
|
||||||
"Your token has been saved to /Users/pva/.cache/huggingface/token\n",
|
|
||||||
"Login successful\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from langchain_huggingface import HuggingFaceEndpoint\n",
|
|
||||||
"\n",
|
|
||||||
"HF_API_KEY = os.environ.get(\"HF_API_KEY\")\n",
|
|
||||||
"HF_LLM_MODEL_ID = \"mistralai/Mistral-7B-Instruct-v0.3\"\n",
|
|
||||||
"\n",
|
|
||||||
"llm = HuggingFaceEndpoint(\n",
|
|
||||||
" repo_id=HF_LLM_MODEL_ID,\n",
|
|
||||||
" huggingfacehub_api_token=HF_API_KEY,\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## RAG"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 11,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from typing import Iterable\n",
|
|
||||||
"\n",
|
|
||||||
"from langchain_core.documents import Document as LCDocument\n",
|
|
||||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
|
||||||
"from langchain_core.prompts import PromptTemplate\n",
|
|
||||||
"from langchain_core.runnables import RunnablePassthrough\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def format_docs(docs: Iterable[LCDocument]):\n",
|
|
||||||
" return \"\\n\\n\".join(doc.page_content for doc in docs)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"retriever = vectorstore.as_retriever()\n",
|
|
||||||
"\n",
|
|
||||||
"prompt = PromptTemplate.from_template(\n",
|
|
||||||
" \"Context information is below.\\n---------------------\\n{context}\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: {question}\\nAnswer:\\n\"\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"rag_chain = (\n",
|
|
||||||
" {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
|
|
||||||
" | prompt\n",
|
|
||||||
" | llm\n",
|
|
||||||
" | StrOutputParser()\n",
|
|
||||||
")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 12,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"'The human annotation of DocLayNet was performed on 80863 pages.\\n\\nExplanation:\\nThe information is found in the paragraph \"DocLayNet contains 80863 PDF pages\" in the context.'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 12,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"rag_chain.invoke(\"How many pages were human annotated for DocLayNet?\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": ".venv",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
@ -1,434 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"<a href=\"https://colab.research.google.com/github/DS4SD/docling/blob/main/examples/rag_llamaindex.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"# RAG with Docling and 🦙 LlamaIndex"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Overview"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"LlamaIndex extensions `DoclingReader` and `DoclingNodeParser` presented in this notebook seamlessly integrate Docling into LlamaIndex, enabling you to:\n",
|
|
||||||
"- use PDF documents in your LLM applications with ease and speed, and\n",
|
|
||||||
"- leverage Docling's rich format for advanced, document-native grounding."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Setup"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"- 👉 For best conversion speed, use GPU acceleration whenever available; e.g. if running on Colab, use GPU-enabled runtime.\n",
|
|
||||||
"- Notebook uses HuggingFace's Inference API; for increased LLM quota, token can be provided via env var `HF_TOKEN`.\n",
|
|
||||||
"- Requirements can be installed as shown below (`--no-warn-conflicts` meant for Colab's pre-populated Python env; feel free to remove for stricter usage):"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Note: you may need to restart the kernel to use updated packages.\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%pip install -q --progress-bar off --no-warn-conflicts llama-index-core llama-index-readers-docling llama-index-node-parser-docling llama-index-embeddings-huggingface llama-index-llms-huggingface-api llama-index-readers-file python-dotenv"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 2,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"import os\n",
|
|
||||||
"from pathlib import Path\n",
|
|
||||||
"from tempfile import mkdtemp\n",
|
|
||||||
"from warnings import filterwarnings\n",
|
|
||||||
"\n",
|
|
||||||
"from dotenv import load_dotenv\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"def _get_env_from_colab_or_os(key):\n",
|
|
||||||
" try:\n",
|
|
||||||
" from google.colab import userdata\n",
|
|
||||||
"\n",
|
|
||||||
" try:\n",
|
|
||||||
" return userdata.get(key)\n",
|
|
||||||
" except userdata.SecretNotFoundError:\n",
|
|
||||||
" pass\n",
|
|
||||||
" except ImportError:\n",
|
|
||||||
" pass\n",
|
|
||||||
" return os.getenv(key)\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"load_dotenv()\n",
|
|
||||||
"\n",
|
|
||||||
"filterwarnings(action=\"ignore\", category=UserWarning, module=\"pydantic\")\n",
|
|
||||||
"filterwarnings(action=\"ignore\", category=FutureWarning, module=\"easyocr\")\n",
|
|
||||||
"# https://github.com/huggingface/transformers/issues/5486:\n",
|
|
||||||
"os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\""
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"We can now define the main parameters:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 3,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n",
|
|
||||||
"from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI\n",
|
|
||||||
"\n",
|
|
||||||
"EMBED_MODEL = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n",
|
|
||||||
"MILVUS_URI = str(Path(mkdtemp()) / \"docling.db\")\n",
|
|
||||||
"GEN_MODEL = HuggingFaceInferenceAPI(\n",
|
|
||||||
" token=_get_env_from_colab_or_os(\"HF_TOKEN\"),\n",
|
|
||||||
" model_name=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n",
|
|
||||||
")\n",
|
|
||||||
"SOURCE = \"https://arxiv.org/pdf/2408.09869\" # Docling Technical Report\n",
|
|
||||||
"QUERY = \"Which are the main AI models in Docling?\"\n",
|
|
||||||
"\n",
|
|
||||||
"embed_dim = len(EMBED_MODEL.get_text_embedding(\"hi\"))"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Using Markdown export"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"To create a simple RAG pipeline, we can:\n",
|
|
||||||
"- define a `DoclingReader`, which by default exports to Markdown, and\n",
|
|
||||||
"- use a standard node parser for these Markdown-based docs, e.g. a `MarkdownNodeParser`"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 4,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Q: Which are the main AI models in Docling?\n",
|
|
||||||
"A: 1. A layout analysis model, an accurate object-detector for page elements. 2. TableFormer, a state-of-the-art table structure recognition model.\n",
|
|
||||||
"\n",
|
|
||||||
"Sources:\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('3.2 AI models\\n\\nAs part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.',\n",
|
|
||||||
" {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n",
|
|
||||||
" 'Header_2': '3.2 AI models'}),\n",
|
|
||||||
" (\"5 Applications\\n\\nThanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets.\",\n",
|
|
||||||
" {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n",
|
|
||||||
" 'Header_2': '5 Applications'})]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from llama_index.core import StorageContext, VectorStoreIndex\n",
|
|
||||||
"from llama_index.core.node_parser import MarkdownNodeParser\n",
|
|
||||||
"from llama_index.readers.docling import DoclingReader\n",
|
|
||||||
"from llama_index.vector_stores.milvus import MilvusVectorStore\n",
|
|
||||||
"\n",
|
|
||||||
"reader = DoclingReader()\n",
|
|
||||||
"node_parser = MarkdownNodeParser()\n",
|
|
||||||
"\n",
|
|
||||||
"vector_store = MilvusVectorStore(\n",
|
|
||||||
" uri=str(Path(mkdtemp()) / \"docling.db\"), # or set as needed\n",
|
|
||||||
" dim=embed_dim,\n",
|
|
||||||
" overwrite=True,\n",
|
|
||||||
")\n",
|
|
||||||
"index = VectorStoreIndex.from_documents(\n",
|
|
||||||
" documents=reader.load_data(SOURCE),\n",
|
|
||||||
" transformations=[node_parser],\n",
|
|
||||||
" storage_context=StorageContext.from_defaults(vector_store=vector_store),\n",
|
|
||||||
" embed_model=EMBED_MODEL,\n",
|
|
||||||
")\n",
|
|
||||||
"result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)\n",
|
|
||||||
"print(f\"Q: {QUERY}\\nA: {result.response.strip()}\\n\\nSources:\")\n",
|
|
||||||
"display([(n.text, n.metadata) for n in result.source_nodes])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## Using Docling format"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"To leverage Docling's rich native format, we:\n",
|
|
||||||
"- create a `DoclingReader` with JSON export type, and\n",
|
|
||||||
"- employ a `DoclingNodeParser` in order to appropriately parse that Docling format.\n",
|
|
||||||
"\n",
|
|
||||||
"Notice how the sources now also contain document-level grounding (e.g. page number or bounding box information):"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Q: Which are the main AI models in Docling?\n",
|
|
||||||
"A: The main AI models in Docling are a layout analysis model and TableFormer. The layout analysis model is an accurate object-detector for page elements, and TableFormer is a state-of-the-art table structure recognition model.\n",
|
|
||||||
"\n",
|
|
||||||
"Sources:\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.',\n",
|
|
||||||
" {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n",
|
|
||||||
" 'path': '#/main-text/37',\n",
|
|
||||||
" 'heading': '3.2 AI models',\n",
|
|
||||||
" 'page': 3,\n",
|
|
||||||
" 'bbox': [107.36903381347656,\n",
|
|
||||||
" 330.07513427734375,\n",
|
|
||||||
" 506.29705810546875,\n",
|
|
||||||
" 407.3725280761719]}),\n",
|
|
||||||
" ('With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models.',\n",
|
|
||||||
" {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n",
|
|
||||||
" 'path': '#/main-text/10',\n",
|
|
||||||
" 'heading': '1 Introduction',\n",
|
|
||||||
" 'page': 1,\n",
|
|
||||||
" 'bbox': [107.33261108398438,\n",
|
|
||||||
" 83.3067626953125,\n",
|
|
||||||
" 504.0033874511719,\n",
|
|
||||||
" 136.45367431640625]})]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from llama_index.node_parser.docling import DoclingNodeParser\n",
|
|
||||||
"\n",
|
|
||||||
"reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)\n",
|
|
||||||
"node_parser = DoclingNodeParser()\n",
|
|
||||||
"\n",
|
|
||||||
"vector_store = MilvusVectorStore(\n",
|
|
||||||
" uri=str(Path(mkdtemp()) / \"docling.db\"), # or set as needed\n",
|
|
||||||
" dim=embed_dim,\n",
|
|
||||||
" overwrite=True,\n",
|
|
||||||
")\n",
|
|
||||||
"index = VectorStoreIndex.from_documents(\n",
|
|
||||||
" documents=reader.load_data(SOURCE),\n",
|
|
||||||
" transformations=[node_parser],\n",
|
|
||||||
" storage_context=StorageContext.from_defaults(vector_store=vector_store),\n",
|
|
||||||
" embed_model=EMBED_MODEL,\n",
|
|
||||||
")\n",
|
|
||||||
"result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)\n",
|
|
||||||
"print(f\"Q: {QUERY}\\nA: {result.response.strip()}\\n\\nSources:\")\n",
|
|
||||||
"display([(n.text, n.metadata) for n in result.source_nodes])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"## With Simple Directory Reader"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"To demonstrate this usage pattern, we first set up a test document directory."
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 6,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from pathlib import Path\n",
|
|
||||||
"from tempfile import mkdtemp\n",
|
|
||||||
"\n",
|
|
||||||
"import requests\n",
|
|
||||||
"\n",
|
|
||||||
"tmp_dir_path = Path(mkdtemp())\n",
|
|
||||||
"r = requests.get(SOURCE)\n",
|
|
||||||
"with open(tmp_dir_path / f\"{Path(SOURCE).name}.pdf\", \"wb\") as out_file:\n",
|
|
||||||
" out_file.write(r.content)"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Using the `reader` and `node_parser` definitions from any of the above variants, usage with `SimpleDirectoryReader` then looks as follows:"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 7,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Loading files: 100%|██████████| 1/1 [00:11<00:00, 11.15s/file]\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Q: Which are the main AI models in Docling?\n",
|
|
||||||
"A: The main AI models in Docling are a layout analysis model and TableFormer. The layout analysis model is an accurate object-detector for page elements, and TableFormer is a state-of-the-art table structure recognition model.\n",
|
|
||||||
"\n",
|
|
||||||
"Sources:\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.',\n",
|
|
||||||
" {'file_path': '/var/folders/76/4wwfs06x6835kcwj4186c0nc0000gn/T/tmp4vsev3_r/2408.09869.pdf',\n",
|
|
||||||
" 'file_name': '2408.09869.pdf',\n",
|
|
||||||
" 'file_type': 'application/pdf',\n",
|
|
||||||
" 'file_size': 5566574,\n",
|
|
||||||
" 'creation_date': '2024-10-09',\n",
|
|
||||||
" 'last_modified_date': '2024-10-09',\n",
|
|
||||||
" 'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n",
|
|
||||||
" 'path': '#/main-text/37',\n",
|
|
||||||
" 'heading': '3.2 AI models',\n",
|
|
||||||
" 'page': 3,\n",
|
|
||||||
" 'bbox': [107.36903381347656,\n",
|
|
||||||
" 330.07513427734375,\n",
|
|
||||||
" 506.29705810546875,\n",
|
|
||||||
" 407.3725280761719]}),\n",
|
|
||||||
" ('With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models.',\n",
|
|
||||||
" {'file_path': '/var/folders/76/4wwfs06x6835kcwj4186c0nc0000gn/T/tmp4vsev3_r/2408.09869.pdf',\n",
|
|
||||||
" 'file_name': '2408.09869.pdf',\n",
|
|
||||||
" 'file_type': 'application/pdf',\n",
|
|
||||||
" 'file_size': 5566574,\n",
|
|
||||||
" 'creation_date': '2024-10-09',\n",
|
|
||||||
" 'last_modified_date': '2024-10-09',\n",
|
|
||||||
" 'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n",
|
|
||||||
" 'path': '#/main-text/10',\n",
|
|
||||||
" 'heading': '1 Introduction',\n",
|
|
||||||
" 'page': 1,\n",
|
|
||||||
" 'bbox': [107.33261108398438,\n",
|
|
||||||
" 83.3067626953125,\n",
|
|
||||||
" 504.0033874511719,\n",
|
|
||||||
" 136.45367431640625]})]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "display_data"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from llama_index.core import SimpleDirectoryReader\n",
|
|
||||||
"\n",
|
|
||||||
"dir_reader = SimpleDirectoryReader(\n",
|
|
||||||
" input_dir=tmp_dir_path,\n",
|
|
||||||
" file_extractor={\".pdf\": reader},\n",
|
|
||||||
")\n",
|
|
||||||
"\n",
|
|
||||||
"vector_store = MilvusVectorStore(\n",
|
|
||||||
" uri=str(Path(mkdtemp()) / \"docling.db\"), # or set as needed\n",
|
|
||||||
" dim=embed_dim,\n",
|
|
||||||
" overwrite=True,\n",
|
|
||||||
")\n",
|
|
||||||
"index = VectorStoreIndex.from_documents(\n",
|
|
||||||
" documents=dir_reader.load_data(SOURCE),\n",
|
|
||||||
" transformations=[node_parser],\n",
|
|
||||||
" storage_context=StorageContext.from_defaults(vector_store=vector_store),\n",
|
|
||||||
" embed_model=EMBED_MODEL,\n",
|
|
||||||
")\n",
|
|
||||||
"result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)\n",
|
|
||||||
"print(f\"Q: {QUERY}\\nA: {result.response.strip()}\\n\\nSources:\")\n",
|
|
||||||
"display([(n.text, n.metadata) for n in result.source_nodes])"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": ".venv",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.12.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
116
logo.svg
116
logo.svg
@ -1,116 +0,0 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
|
||||||
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
|
||||||
<svg width="100%" height="100%" viewBox="0 0 1024 1024" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:1.5;">
|
|
||||||
<g id="Docling" transform="matrix(1.07666,0,0,1.07666,-35.9018,-84.1562)">
|
|
||||||
<g id="Outline" transform="matrix(1,0,0,1,-0.429741,55.0879)">
|
|
||||||
<path d="M394.709,69.09C417.34,35.077 467.97,30.178 478.031,55.609C486.35,55.043 494.726,54.701 503.158,54.589C533.157,45.238 560.496,47.419 584.65,60.732C800.941,96.66 966.069,284.814 966.069,511.232C966.069,763.284 761.435,967.918 509.383,967.918C433.692,967.918 362.277,949.464 299.385,916.808L242.3,931.993C203.092,943.242 187.715,928.369 208.575,891.871C208.935,891.24 216.518,879.37 223.997,867.677C119.604,783.975 52.698,655.355 52.698,511.232C52.698,298.778 198.086,120.013 394.709,69.09Z" style="fill:white;"/>
|
|
||||||
</g>
|
|
||||||
<g id="Color" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
|
|
||||||
<path d="M284.8,894.232L179.735,783.955L130.222,645.203L125.538,504.726L185.211,385.816C209.006,322.738 249.951,278.973 302.281,248.028L406.684,203.333L413.483,175.767L436.637,152.428L451.408,153.312L457.726,183.183L485.164,165.379L526.92,159.699L557.014,177.545L612.652,211.018C679.009,226.066 740.505,264.146 797.138,325.26L862.813,423.477L891.583,560.826L883.273,683.32L814.268,809.924L734.431,894.384L644.495,926.906L497.146,954.121L361.064,940.647L284.8,894.232Z" style="fill:url(#_Linear1);"/>
|
|
||||||
<path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z" style="fill:url(#_Linear2);"/>
|
|
||||||
<g transform="matrix(-0.765945,0,0,1,839.727,5.47434)">
|
|
||||||
<clipPath id="_clip3">
|
|
||||||
<path d="M699.932,887.255L634.427,825.291L597.884,782.352L594.906,738.956L610.14,709.396L643.207,699.954L685,710.111L730.425,736.425L765.204,778.79L775.166,849.531L719.381,894.082L699.932,887.255Z"/>
|
|
||||||
</clipPath>
|
|
||||||
<g clip-path="url(#_clip3)">
|
|
||||||
<g transform="matrix(-1.18516,0,0,0.907769,1039.04,88.3496)">
|
|
||||||
<use xlink:href="#_Image4" x="223.969" y="674.21" width="152.098px" height="213.852px" transform="matrix(0.994105,0,0,0.999308,0,0)"/>
|
|
||||||
</g>
|
|
||||||
</g>
|
|
||||||
</g>
|
|
||||||
<path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
|
|
||||||
<g transform="matrix(-1,0,0,1,1022.04,2.74442)">
|
|
||||||
<path d="M311.699,713.521C189.178,639.091 164.299,526.77 191.824,394.113L135.136,476.434L122.004,547.53C143.022,614.014 174.522,676.199 225.005,730.598C210.601,754.156 201.894,776.601 197.955,798.114L245.803,841.67C247.274,812.1 254.934,783.047 270.614,754.664L311.699,713.521Z" style="fill-opacity:0.22;"/>
|
|
||||||
</g>
|
|
||||||
<path d="M354.92,650.818L420.009,663.185L493.368,666.379L554.826,665.251L620.19,658.511L658.169,651.428L671.428,644.802L673.265,627.093L659.898,611.845L625.422,609.244L599.275,591.212L568.632,556.79L542.9,534.336L515.052,528.253L480.412,532.71L455.2,552.337L428.514,578.155L405.312,599.359L374.228,612.097L355.342,614.456L340.75,630.308L341.568,645.341L354.92,650.818Z" style="fill:url(#_Linear5);"/>
|
|
||||||
<path d="M257.168,949.32L317.434,876.747L364.928,810.6L384.1,743.934L378.759,714.719L376.844,685.849L374.836,659.954L448.734,664.2L511.462,667.602L571.339,665.091L632.796,658.836L648.232,656.882L649.937,697.808L608.105,717.702L598.45,738.594L592.286,761.642L604.743,796.309L639.595,825.803L649.872,840.757L558.219,895.152L502.124,907.569L425.781,923.496L333.29,931.298L286.269,936.907L257.168,949.32Z" style="fill:url(#_Linear6);"/>
|
|
||||||
<g transform="matrix(1,0,0,1.30081,-1.77636e-15,-196.488)">
|
|
||||||
<path d="M374.165,685.268C463.946,706.599 553.728,707.491 643.51,688.593L641.903,653.199C549.263,671.731 459.645,672.22 373.059,654.611L374.165,685.268Z" style="fill-opacity:0.18;"/>
|
|
||||||
</g>
|
|
||||||
<path d="M459.633,571.457C476.7,536.091 530.064,535.913 553.1,568.767C520.703,551.407 489.553,552.374 459.633,571.457Z" style="fill:white;"/>
|
|
||||||
<g transform="matrix(1,0,0,1,0.223468,-2.61949)">
|
|
||||||
<path d="M355.3,267.232C500.64,173.156 720.699,241.362 793.691,423.582C766.716,384.84 735.725,357.078 697.53,349.014L717.306,335.248C698.537,321.49 675.794,320.957 651.039,327.119C652.235,315.768 658.995,306.991 674.188,302.115C641.864,287.427 617.356,289.473 596.258,298.818C597.049,286.116 605.827,278.087 620.068,273.254C589.192,267.477 564.13,270.926 544.651,283.232C545.822,271.831 550.709,260.943 560.913,250.79C517.498,257.095 492.995,267.925 482.892,282.202C477.311,269.499 477.274,257.221 487.625,245.739C439.161,252.932 421.555,265.094 410.355,278.286C407.697,269.01 407.705,260.632 410.853,253.316C389.633,254.773 372.178,260.663 355.3,267.232Z" style="fill:rgb(255,213,95);"/>
|
|
||||||
</g>
|
|
||||||
<path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
|
|
||||||
<g transform="matrix(0.114323,-0.655229,0.82741,0.144365,224.632,497.317)">
|
|
||||||
<path d="M475.656,209.175C479.639,175.037 503.437,173.299 532.412,180.026C507.242,183.404 486.969,195.251 473.705,219.215L475.656,209.175Z" style="fill:rgb(255,215,101);"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(1.6739,1.15217e-16,-1.15217e-16,-0.733075,-341.46,1039.77)">
|
|
||||||
<path d="M447.449,560.911C468.179,536.963 546.237,539.305 565.638,560.831C533.166,555.541 477.296,553.494 447.449,560.911Z" style="fill:white;"/>
|
|
||||||
</g>
|
|
||||||
<path d="M348.201,622.341C395.549,653.534 622.351,660.854 661.936,616.729L677.568,633.834L667.044,650.308L557.802,667.518L498.074,670.562L446.718,666.416L391.404,658.406L348.154,652.501L340.161,637.119L348.201,622.341Z" style="fill:rgb(199,68,6);"/>
|
|
||||||
</g>
|
|
||||||
<g id="Black-outline" serif:id="Black outline" transform="matrix(1.02317,0,0,1.02317,-11.55,-17.8333)">
|
|
||||||
<path d="M373.389,657.919C376.285,676.334 377.04,695.016 375.326,714.008" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
|
||||||
<path d="M645.931,654.961C646.158,669.958 647.22,684.853 648.975,699.661" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
|
||||||
<path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
|
|
||||||
<g transform="matrix(0.94177,0,0,0.94909,28.8868,3.79501)">
|
|
||||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(0.112099,0.0552506,-0.0673118,0.136571,455.367,509.409)">
|
|
||||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(-0.112099,0.0552506,0.0673118,0.136571,560.529,509.492)">
|
|
||||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(-1,0,0,1,1013.33,-1.15187)">
|
|
||||||
<path d="M290.084,534.662C276.554,533.535 264.892,530.024 254.279,525.175C276.732,555.341 305.316,569.76 338.631,572.029L290.084,534.662Z"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(-0.94177,0,0,0.94909,984.44,2.64314)">
|
|
||||||
<ellipse cx="338.022" cy="510.34" rx="88.911" ry="89.412"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(1,0,0,1,1.9047,-5.57346)">
|
|
||||||
<path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(-1,0,0,1,1011.43,-5.7284)">
|
|
||||||
<path d="M277.021,489.604C279.828,554.545 355.855,583.508 405.306,537.851C354.458,599.537 263.881,560.914 277.021,489.604Z" style="fill:white;"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(0.973815,0,0,1.00246,4.71761,-0.508759)">
|
|
||||||
<path d="M407.22,206.891C107.655,339.384 134.447,630.03 314.615,708.305" style="fill:none;stroke:black;stroke-width:29.39px;"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(-0.973815,0,0,1.00246,1006.67,-1.31695)">
|
|
||||||
<path d="M461.559,196.756C119.768,256.762 111.059,642.544 320.305,711.486" style="fill:none;stroke:black;stroke-width:29.39px;"/>
|
|
||||||
</g>
|
|
||||||
<g id="vector-duck" serif:id="vector duck">
|
|
||||||
<path d="M240.912,850.71C248.043,740.231 325.609,685.992 371.268,715.193C386.487,724.926 392.506,757.72 358.575,816.753C327.005,871.68 300.465,894.596 288.329,903.447" style="fill:none;stroke:black;stroke-width:21.79px;"/>
|
|
||||||
<path d="M638.382,843.426C427.991,964.695 389.022,902.942 251.512,947.641L307.759,889.573" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
|
||||||
<path d="M770.991,853.754C779.364,764.998 730.67,727.923 666.385,704.966C629.568,691.819 580.483,723.886 595.974,772.596C606.285,805.016 650.54,839.029 707.786,886.778" style="fill:none;stroke:black;stroke-width:21.79px;"/>
|
|
||||||
<g transform="matrix(1,0,0,1,-1.87208,0.908099)">
|
|
||||||
<path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(-0.969851,0.2437,0.2437,0.969851,773.329,-138.212)">
|
|
||||||
<path d="M603.287,772.415C614.237,757.963 627.553,750.285 642.878,748.352C628.356,760.968 617.23,775.676 620.632,799.336C635.815,785.15 650.367,779.457 664.396,780.801C651.715,790.7 639.329,803.279 641.039,818.089C641.247,819.891 647.043,823.996 647.595,825.837C659.897,816.37 672.867,811.065 689.234,809.472C676.577,822.659 668.021,834.011 674.478,848.729L664.333,847.825L625.643,812.604L603.629,786.218L603.287,772.415Z"/>
|
|
||||||
</g>
|
|
||||||
<path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
|
||||||
<g transform="matrix(-1,0,0,1,1014.44,-0.213451)">
|
|
||||||
<path d="M511.787,670.044C461.061,671.835 411.878,662.84 361.322,653.92C329.071,648.229 335.56,616.432 361.693,615.181C391.498,613.754 411.83,601.737 437.593,569.084C459.063,541.872 482.443,528.143 506.834,529.767" style="fill:none;stroke:black;stroke-width:15.73px;"/>
|
|
||||||
</g>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(2.4586,0,0,2.5497,-444.527,-690.434)">
|
|
||||||
<ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
|
|
||||||
</g>
|
|
||||||
<g transform="matrix(2.4586,0,0,2.5497,-127.75,-690.991)">
|
|
||||||
<ellipse cx="312.566" cy="450.751" rx="10.63" ry="10.48" style="fill:white;"/>
|
|
||||||
</g>
|
|
||||||
<path d="M505.738,698.061L578.879,713.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
|
||||||
<path d="M422.781,709.6L568.438,743.041" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
|
||||||
<path d="M419.941,738.409L565.688,772.989" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
|
||||||
<path d="M408.6,787.08L510.634,810.689" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
|
||||||
<path d="M397.571,815.956L500.93,840.219" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
|
||||||
<path d="M386.763,844.926L454.065,861.974" style="fill:none;stroke:black;stroke-width:12.1px;"/>
|
|
||||||
<path d="M459.169,919.169C512.194,898.262 539.171,867.298 535.241,824.402C568.052,818.31 598.499,817.058 625.84,822.165" style="fill:none;stroke:black;stroke-width:16.95px;"/>
|
|
||||||
<path d="M366.219,241.106C389.605,229.261 413.371,220.601 438.247,217.5C416.795,202.419 418.72,174.582 444.22,162.47C442.086,178.175 447.633,193.354 464.772,207.738C468.721,167.57 530.015,162.087 545.674,184.112C526.45,189.314 513.082,197.344 504.566,207.717C522.403,208.119 540.706,207.86 556.2,210.609L566.935,168.471C536.388,146.208 495.718,142.166 464.65,166.705C467.703,133.264 419.536,128.364 404.624,178.47L366.219,241.106Z"/>
|
|
||||||
<path d="M392.617,924.576C428.953,936.938 467.84,943.636 508.258,943.636C708.944,943.636 871.876,778.49 871.876,575.076C871.876,382.463 725.788,224.162 539.898,207.895L554.137,173.696L554.485,168.187C757.218,191.602 914.895,366.003 914.895,577.383C914.895,804.698 732.549,989.249 507.949,989.249C435.381,989.249 367.223,969.983 308.199,936.232L392.617,924.576ZM279.206,917.988C171.663,843.819 101.002,718.887 101.002,577.383C101.002,383.006 234.333,219.898 413.398,176.712L424.375,216.389C264.082,254.803 144.64,400.913 144.64,575.076C144.64,703.735 209.822,817.086 308.514,883.023L279.206,917.988Z"/>
|
|
||||||
<path d="M714.938,895.223L647.287,836.693L616.06,855.308L549.158,889.412L459.845,919.216L390.213,928.828L429.291,950.712L535.832,960.1L586.137,952.591L662.254,931.896L714.938,895.223Z"/>
|
|
||||||
<path d="M423.538,929.39C509.164,917.593 580.815,890.465 640.827,850.566C635.677,886.828 622.639,918.218 594.006,939.977C530.254,930.953 474.955,928.632 423.538,929.39Z" style="fill:url(#_Linear7);"/>
|
|
||||||
</g>
|
|
||||||
</g>
|
|
||||||
<defs>
|
|
||||||
<linearGradient id="_Linear1" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-52.3962,375.121,-375.121,-52.3962,471.134,384.463)"><stop offset="0" style="stop-color:rgb(255,176,44);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
|
|
||||||
<linearGradient id="_Linear2" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(28.6198,-84.8913,84.8913,28.6198,647.831,831.55)"><stop offset="0" style="stop-color:rgb(255,73,2);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,176,44);stop-opacity:1"/></linearGradient>
|
|
||||||
<image id="_Image4" width="153px" height="214px" xlink:href="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEAYABgAAD/2wBDAAUDBAQEAwUEBAQFBQUGBwwIBwcHBw8LCwkMEQ8SEhEPERETFhwXExQaFRERGCEYGh0dHx8fExciJCIeJBweHx7/2wBDAQUFBQcGBw4ICA4eFBEUHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh7/wAARCADWAJkDAREAAhEBAxEB/8QAGQABAQEBAQEAAAAAAAAAAAAAAwACBwYF/8QAGBABAQEBAQAAAAAAAAAAAAAAAgABEhH/xAAbAQADAQADAQAAAAAAAAAAAAAAAQMCBAUGB//EABYRAQEBAAAAAAAAAAAAAAAAAAABEf/aAAwDAQACEQMRAD8A63fAX1BQFAUBQFAUBQFAUBQFAZShqQSUNyBSmpIJK0pIJqakgUptyCampIx1DWPS0XSqAoCgKAoCgKAoCgKAwlDUgkobkE1aVkClNuQbU1JAtTUkElNSQTU25GOptY9Vcd0KgKAoCgKAoCgKAoDDUNSCSmpIJqakgUptyCampIJKakgWpqSCSm3IJKakjHU2sewuM86oCgKAoCgKAoCgMJQ1IJqakgWpqSCSmpIJqbcgUpqSCSmpIJqbcgmpqSCSmpIx1PGse1uK8yoCgKAoCgKAoA2obkGlNuQLU1JBJTUkElPG5AtTUkElNSQSU1JBNTbkClNSQSU1JGOptY93cR5VQFAUBQFAUAbUNyCam3IJKakgmpqSBampIJKbcgmpqSBampIJKakgmptyBampINKakjHU8ax0C4byKgKAoCgLd8gDShuQTU25AtTVkElNuQTU1JBNTUkElNuQLU1JBNTUkElNSQLU25BJWlJBJTUkY6hrHRrhPGqAoCgLd8gDahuQSU1JAtTUkE1NuQSU1JBNTUkClNSQSU25BNTUkC1NSQSVpSQSUNyCatKSBSmpIx1DWOmXBeJUBQFu+QBtQ3IFqakgkpqSCam3IFqakgkpqSCampIJqbcgUpqSCampIJq0pIJKakgWptyCampIJKakjHU2sdRuveFUBbvkASUNyCSmpIJqakgkpqSBam3IJqakgkpqSCam3IFqakgmpqSCampIFq0pIJKbcgkpqSBSmpIJKakjHUNY6vde8Ct3yAJKG5BNTUkE1NSQLU1JBJTUkE1NuQLU1JBJWlJBJQpIJq03IFKakgkp4pIJqakgmptyBSmpIJqakgkpqSMdQeOt7vl1z5/INKG5BNTbkClPFJBJTUkE1NSQKU1JBJTbkE1NSQLU1JBtTbkC1aUkE1NSQSU1JAtTUkElNuQSU1JBJTUkC1NSRjqbWOupXWPnsgmpqSBSmpIJqbcgkpqSBampIJK0pIJKbcgWoUkE1aUkElNSQTU25ApTUkElNSQSU25AtTUkElNSQTU1JApTUkZ6g8dcautfPpBJTUkE1NSQLU25BJTUkE1aUkC1NuQSU1JBNTUkClNSQSU25BJTUkE1NSQSU1JAtTbkE1NSQSU8UkClNSQe77NtQHWErrXgJBJTUkE1NuQSU8UkClNSQTVpuQSU1JBJTUkC1NSQTU1JBJTbkE1NSQKU1JBJTUkElNuQLU1JBJTUkHu+zbUBQHU2rrnhJBJWlJAtTbkElNSQTU1JBJTbkC1NSQSU1JBNTUkElNuQKU1JBJTUkE1NSQSU1JAtTbkElNSQe77NtQFAUB01q694iQSU1JBNWm5BNTUkClNSQTU25BJTUkElNSQKU25BNTVkElNuQTU1JApTUkElNSQSU25B7vs21AUBQFAdIauC8ZIJKeNyCampIFKakgmp4pIJKbcgWpqSCSnikgmpqSCSm3IFKakgkpqSCampIFKakjG77NpQFAUBQFAdCauE8fIJKakgkpqSCampIFKakgkptyCSmpIFqakg0ptyBampIJqakgWpqSCSmpIxNpQFAUBQFAUB71q4bycgkpqSBampIJKakgmpqSCSm3IFqakgkpqSCSmpIJqbcgWrSkgkoUkYm0oCgKAoCgKAoD3CVxHl5AtTUkElNSQTU1JApTbkElNSQSU1JApTUkElNuQTU1JBJWlJGIaUBQFAUBQFAUBQHsmrivNyBSmpIJKakgkptyCatKyCSm3IFqFJBNWlJBJTUkElNuRiGlAUBQFAUBQFAUBQHrErjPPyCampIJKakgmpqSBatNyCShSQTU1JAtWlJBJQ3IzNpQFAUBQFAUBQFAUBQHp2rjujkElaUkClNSQTU25BJTUkElCkgWrSkgkpqSMwagKAoCgKAoCgKAoCgKA9ElQdPIFq0pIJKakgmobkC1aUkElNSQSU1JGYNQFAUBQFAUBQFAUBQFAUB9xqk6uQTU1JApTxSQTUNyBatKSDSmpIzBqAoCgKAoCgKAoCgKAoCgKA+u1TdfIFKcUkE1NuQTU1JBLZqSMwagKAoCgKAoCgKAoCgKAoCgKA/9k="/>
|
|
||||||
<linearGradient id="_Linear5" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-39.3403,137.423,-137.423,-39.3403,545.523,573.246)"><stop offset="0" style="stop-color:rgb(255,200,41);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(255,73,2);stop-opacity:1"/></linearGradient>
|
|
||||||
<linearGradient id="_Linear6" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(1.01113,-68.2054,68.2054,1.01113,482.996,741.463)"><stop offset="0" style="stop-color:white;stop-opacity:1"/><stop offset="1" style="stop-color:rgb(179,179,179);stop-opacity:1"/></linearGradient>
|
|
||||||
<linearGradient id="_Linear7" x1="0" y1="0" x2="1" y2="0" gradientUnits="userSpaceOnUse" gradientTransform="matrix(-7.13599,-34.117,34.117,-7.13599,578.793,922.144)"><stop offset="0" style="stop-color:rgb(164,164,164);stop-opacity:1"/><stop offset="1" style="stop-color:rgb(106,106,106);stop-opacity:1"/></linearGradient>
|
|
||||||
</defs>
|
|
||||||
</svg>
|
|
Before Width: | Height: | Size: 18 KiB |
@ -54,7 +54,7 @@ nav:
|
|||||||
- Get started:
|
- Get started:
|
||||||
- Home: index.md
|
- Home: index.md
|
||||||
- Installation: installation.md
|
- Installation: installation.md
|
||||||
# - Docling v2: v2.md
|
- Docling v2: v2.md
|
||||||
# - Concepts:
|
# - Concepts:
|
||||||
# - Docling Document: concepts/document.md
|
# - Docling Document: concepts/document.md
|
||||||
# - Chunking: concepts/chunking.md
|
# - Chunking: concepts/chunking.md
|
||||||
|
726
poetry.lock
generated
726
poetry.lock
generated
@ -263,6 +263,20 @@ files = [
|
|||||||
pycodestyle = ">=2.11.0"
|
pycodestyle = ">=2.11.0"
|
||||||
tomli = {version = "*", markers = "python_version < \"3.11\""}
|
tomli = {version = "*", markers = "python_version < \"3.11\""}
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "babel"
|
||||||
|
version = "2.16.0"
|
||||||
|
description = "Internationalization utilities"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
|
||||||
|
{file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["freezegun (>=1.0,<2.0)", "pytest (>=6.0)", "pytest-cov"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "backports-tarfile"
|
name = "backports-tarfile"
|
||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
@ -347,6 +361,24 @@ d = ["aiohttp (>=3.10)"]
|
|||||||
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
|
jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
|
||||||
uvloop = ["uvloop (>=0.15.2)"]
|
uvloop = ["uvloop (>=0.15.2)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bleach"
|
||||||
|
version = "6.1.0"
|
||||||
|
description = "An easy safelist-based HTML-sanitizing tool."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "bleach-6.1.0-py3-none-any.whl", hash = "sha256:3225f354cfc436b9789c66c4ee030194bee0568fbf9cbdad3bc8b5c26c5f12b6"},
|
||||||
|
{file = "bleach-6.1.0.tar.gz", hash = "sha256:0a31f1837963c41d46bbf1331b8778e1308ea0791db03cc4e7357b97cf42a8fe"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
six = ">=1.9.0"
|
||||||
|
webencodings = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
css = ["tinycss2 (>=1.1.0,<1.3)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "certifi"
|
name = "certifi"
|
||||||
version = "2024.8.30"
|
version = "2024.8.30"
|
||||||
@ -912,6 +944,17 @@ url = "https://github.com/DS4SD/deepsearch-glm.git"
|
|||||||
reference = "c185c4f985ccd29a470a1cddd3bec43880b739ee"
|
reference = "c185c4f985ccd29a470a1cddd3bec43880b739ee"
|
||||||
resolved_reference = "c185c4f985ccd29a470a1cddd3bec43880b739ee"
|
resolved_reference = "c185c4f985ccd29a470a1cddd3bec43880b739ee"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "defusedxml"
|
||||||
|
version = "0.7.1"
|
||||||
|
description = "XML bomb protection for Python stdlib modules"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
files = [
|
||||||
|
{file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
|
||||||
|
{file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dill"
|
name = "dill"
|
||||||
version = "0.3.8"
|
version = "0.3.8"
|
||||||
@ -1150,6 +1193,20 @@ files = [
|
|||||||
[package.extras]
|
[package.extras]
|
||||||
tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
|
tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "fastjsonschema"
|
||||||
|
version = "2.20.0"
|
||||||
|
description = "Fastest Python implementation of JSON schema"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "fastjsonschema-2.20.0-py3-none-any.whl", hash = "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a"},
|
||||||
|
{file = "fastjsonschema-2.20.0.tar.gz", hash = "sha256:3d48fc5300ee96f5d116f10fe6f28d938e6008f59a6a025c2649475b87f76a23"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filelock"
|
name = "filelock"
|
||||||
version = "3.16.1"
|
version = "3.16.1"
|
||||||
@ -1409,6 +1466,23 @@ test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,
|
|||||||
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
|
test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
|
||||||
tqdm = ["tqdm"]
|
tqdm = ["tqdm"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ghp-import"
|
||||||
|
version = "2.1.0"
|
||||||
|
description = "Copy your docs directly to the gh-pages branch."
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "ghp-import-2.1.0.tar.gz", hash = "sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"},
|
||||||
|
{file = "ghp_import-2.1.0-py3-none-any.whl", hash = "sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
python-dateutil = ">=2.8.1"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["flake8", "markdown", "twine", "wheel"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "gitdb"
|
name = "gitdb"
|
||||||
version = "4.0.11"
|
version = "4.0.11"
|
||||||
@ -2091,6 +2165,17 @@ traitlets = ">=5.3"
|
|||||||
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"]
|
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"]
|
||||||
test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"]
|
test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jupyterlab-pygments"
|
||||||
|
version = "0.3.0"
|
||||||
|
description = "Pygments theme using JupyterLab CSS variables"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780"},
|
||||||
|
{file = "jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jupyterlab-widgets"
|
name = "jupyterlab-widgets"
|
||||||
version = "3.0.13"
|
version = "3.0.13"
|
||||||
@ -2102,6 +2187,35 @@ files = [
|
|||||||
{file = "jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed"},
|
{file = "jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "jupytext"
|
||||||
|
version = "1.16.4"
|
||||||
|
description = "Jupyter notebooks as Markdown documents, Julia, Python or R scripts"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "jupytext-1.16.4-py3-none-any.whl", hash = "sha256:76989d2690e65667ea6fb411d8056abe7cd0437c07bd774660b83d62acf9490a"},
|
||||||
|
{file = "jupytext-1.16.4.tar.gz", hash = "sha256:28e33f46f2ce7a41fb9d677a4a2c95327285579b64ca104437c4b9eb1e4174e9"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
markdown-it-py = ">=1.0"
|
||||||
|
mdit-py-plugins = "*"
|
||||||
|
nbformat = "*"
|
||||||
|
packaging = "*"
|
||||||
|
pyyaml = "*"
|
||||||
|
tomli = {version = "*", markers = "python_version < \"3.11\""}
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs (>=1.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist", "sphinx-gallery (<0.8)"]
|
||||||
|
docs = ["myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"]
|
||||||
|
test = ["pytest", "pytest-randomly", "pytest-xdist"]
|
||||||
|
test-cov = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-cov (>=2.6.1)", "pytest-randomly", "pytest-xdist"]
|
||||||
|
test-external = ["autopep8", "black", "flake8", "gitpython", "ipykernel", "isort", "jupyter-fs (>=1.0)", "jupyter-server (!=2.11)", "nbconvert", "pre-commit", "pytest", "pytest-randomly", "pytest-xdist", "sphinx-gallery (<0.8)"]
|
||||||
|
test-functional = ["pytest", "pytest-randomly", "pytest-xdist"]
|
||||||
|
test-integration = ["ipykernel", "jupyter-server (!=2.11)", "nbconvert", "pytest", "pytest-randomly", "pytest-xdist"]
|
||||||
|
test-ui = ["calysto-bash"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "keyring"
|
name = "keyring"
|
||||||
version = "25.4.1"
|
version = "25.4.1"
|
||||||
@ -2518,6 +2632,21 @@ html5 = ["html5lib"]
|
|||||||
htmlsoup = ["BeautifulSoup4"]
|
htmlsoup = ["BeautifulSoup4"]
|
||||||
source = ["Cython (==0.29.37)"]
|
source = ["Cython (==0.29.37)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "markdown"
|
||||||
|
version = "3.7"
|
||||||
|
description = "Python implementation of John Gruber's Markdown."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "Markdown-3.7-py3-none-any.whl", hash = "sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"},
|
||||||
|
{file = "markdown-3.7.tar.gz", hash = "sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
|
||||||
|
testing = ["coverage", "pyyaml"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "markdown-it-py"
|
name = "markdown-it-py"
|
||||||
version = "3.0.0"
|
version = "3.0.0"
|
||||||
@ -2749,6 +2878,25 @@ files = [
|
|||||||
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
|
{file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mdit-py-plugins"
|
||||||
|
version = "0.4.2"
|
||||||
|
description = "Collection of plugins for markdown-it-py"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "mdit_py_plugins-0.4.2-py3-none-any.whl", hash = "sha256:0c673c3f889399a33b95e88d2f0d111b4447bdfea7f237dab2d488f459835636"},
|
||||||
|
{file = "mdit_py_plugins-0.4.2.tar.gz", hash = "sha256:5f2cd1fdb606ddf152d37ec30e46101a60512bc0e5fa1a7002c36647b09e26b5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
markdown-it-py = ">=1.0.0,<4.0.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
code-style = ["pre-commit"]
|
||||||
|
rtd = ["myst-parser", "sphinx-book-theme"]
|
||||||
|
testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mdurl"
|
name = "mdurl"
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
@ -2775,6 +2923,17 @@ files = [
|
|||||||
numpy = "*"
|
numpy = "*"
|
||||||
pandas = "*"
|
pandas = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mergedeep"
|
||||||
|
version = "1.3.4"
|
||||||
|
description = "A deep merge function for 🐍."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "mergedeep-1.3.4-py3-none-any.whl", hash = "sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"},
|
||||||
|
{file = "mergedeep-1.3.4.tar.gz", hash = "sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "milvus-lite"
|
name = "milvus-lite"
|
||||||
version = "2.4.10"
|
version = "2.4.10"
|
||||||
@ -2791,6 +2950,122 @@ files = [
|
|||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
tqdm = "*"
|
tqdm = "*"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mistune"
|
||||||
|
version = "3.0.2"
|
||||||
|
description = "A sane and fast Markdown parser with useful plugins and renderers"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.7"
|
||||||
|
files = [
|
||||||
|
{file = "mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205"},
|
||||||
|
{file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mkdocs"
|
||||||
|
version = "1.6.1"
|
||||||
|
description = "Project documentation with Markdown."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "mkdocs-1.6.1-py3-none-any.whl", hash = "sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"},
|
||||||
|
{file = "mkdocs-1.6.1.tar.gz", hash = "sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
click = ">=7.0"
|
||||||
|
colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""}
|
||||||
|
ghp-import = ">=1.0"
|
||||||
|
jinja2 = ">=2.11.1"
|
||||||
|
markdown = ">=3.3.6"
|
||||||
|
markupsafe = ">=2.0.1"
|
||||||
|
mergedeep = ">=1.3.4"
|
||||||
|
mkdocs-get-deps = ">=0.2.0"
|
||||||
|
packaging = ">=20.5"
|
||||||
|
pathspec = ">=0.11.1"
|
||||||
|
pyyaml = ">=5.1"
|
||||||
|
pyyaml-env-tag = ">=0.1"
|
||||||
|
watchdog = ">=2.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
i18n = ["babel (>=2.9.0)"]
|
||||||
|
min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.4)", "jinja2 (==2.11.1)", "markdown (==3.3.6)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "mkdocs-get-deps (==0.2.0)", "packaging (==20.5)", "pathspec (==0.11.1)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "watchdog (==2.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mkdocs-get-deps"
|
||||||
|
version = "0.2.0"
|
||||||
|
description = "MkDocs extension that lists all dependencies according to a mkdocs.yml file"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "mkdocs_get_deps-0.2.0-py3-none-any.whl", hash = "sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"},
|
||||||
|
{file = "mkdocs_get_deps-0.2.0.tar.gz", hash = "sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
mergedeep = ">=1.3.4"
|
||||||
|
platformdirs = ">=2.2.0"
|
||||||
|
pyyaml = ">=5.1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mkdocs-jupyter"
|
||||||
|
version = "0.25.0"
|
||||||
|
description = "Use Jupyter in mkdocs websites"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
files = [
|
||||||
|
{file = "mkdocs_jupyter-0.25.0-py3-none-any.whl", hash = "sha256:d83d71deef19f0401505945bf92ec3bd5b40615af89308e72d5112929f8ee00b"},
|
||||||
|
{file = "mkdocs_jupyter-0.25.0.tar.gz", hash = "sha256:e26c1d341916bc57f96ea3f93d8d0a88fc77c87d4cee222f66d2007798d924f5"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
ipykernel = ">6.0.0,<7.0.0"
|
||||||
|
jupytext = ">1.13.8,<2"
|
||||||
|
mkdocs = ">=1.4.0,<2"
|
||||||
|
mkdocs-material = ">9.0.0"
|
||||||
|
nbconvert = ">=7.2.9,<8"
|
||||||
|
pygments = ">2.12.0"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mkdocs-material"
|
||||||
|
version = "9.5.40"
|
||||||
|
description = "Documentation that simply works"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "mkdocs_material-9.5.40-py3-none-any.whl", hash = "sha256:8e7a16ada34e79a7b6459ff2602584222f522c738b6a023d1bea853d5049da6f"},
|
||||||
|
{file = "mkdocs_material-9.5.40.tar.gz", hash = "sha256:b69d70e667ec51fc41f65e006a3184dd00d95b2439d982cb1586e4c018943156"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
babel = ">=2.10,<3.0"
|
||||||
|
colorama = ">=0.4,<1.0"
|
||||||
|
jinja2 = ">=3.0,<4.0"
|
||||||
|
markdown = ">=3.2,<4.0"
|
||||||
|
mkdocs = ">=1.6,<2.0"
|
||||||
|
mkdocs-material-extensions = ">=1.3,<2.0"
|
||||||
|
paginate = ">=0.5,<1.0"
|
||||||
|
pygments = ">=2.16,<3.0"
|
||||||
|
pymdown-extensions = ">=10.2,<11.0"
|
||||||
|
regex = ">=2022.4"
|
||||||
|
requests = ">=2.26,<3.0"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
git = ["mkdocs-git-committers-plugin-2 (>=1.1,<2.0)", "mkdocs-git-revision-date-localized-plugin (>=1.2.4,<2.0)"]
|
||||||
|
imaging = ["cairosvg (>=2.6,<3.0)", "pillow (>=10.2,<11.0)"]
|
||||||
|
recommended = ["mkdocs-minify-plugin (>=0.7,<1.0)", "mkdocs-redirects (>=1.2,<2.0)", "mkdocs-rss-plugin (>=1.6,<2.0)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mkdocs-material-extensions"
|
||||||
|
version = "1.3.1"
|
||||||
|
description = "Extension pack for Python Markdown and MkDocs Material."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "mkdocs_material_extensions-1.3.1-py3-none-any.whl", hash = "sha256:adff8b62700b25cb77b53358dad940f3ef973dd6db797907c49e3c2ef3ab4e31"},
|
||||||
|
{file = "mkdocs_material_extensions-1.3.1.tar.gz", hash = "sha256:10c9511cea88f568257f960358a467d12b970e1f7b2c0e5fb2bb48cab1928443"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "more-itertools"
|
name = "more-itertools"
|
||||||
version = "10.5.0"
|
version = "10.5.0"
|
||||||
@ -3010,6 +3285,86 @@ files = [
|
|||||||
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
{file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nbclient"
|
||||||
|
version = "0.10.0"
|
||||||
|
description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8.0"
|
||||||
|
files = [
|
||||||
|
{file = "nbclient-0.10.0-py3-none-any.whl", hash = "sha256:f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f"},
|
||||||
|
{file = "nbclient-0.10.0.tar.gz", hash = "sha256:4b3f1b7dba531e498449c4db4f53da339c91d449dc11e9af3a43b4eb5c5abb09"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
jupyter-client = ">=6.1.12"
|
||||||
|
jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
|
||||||
|
nbformat = ">=5.1"
|
||||||
|
traitlets = ">=5.4"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["pre-commit"]
|
||||||
|
docs = ["autodoc-traits", "mock", "moto", "myst-parser", "nbclient[test]", "sphinx (>=1.7)", "sphinx-book-theme", "sphinxcontrib-spelling"]
|
||||||
|
test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "pytest (>=7.0,<8)", "pytest-asyncio", "pytest-cov (>=4.0)", "testpath", "xmltodict"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nbconvert"
|
||||||
|
version = "7.16.4"
|
||||||
|
description = "Converting Jupyter Notebooks (.ipynb files) to other formats. Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script. nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3"},
|
||||||
|
{file = "nbconvert-7.16.4.tar.gz", hash = "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
beautifulsoup4 = "*"
|
||||||
|
bleach = "!=5.0.0"
|
||||||
|
defusedxml = "*"
|
||||||
|
jinja2 = ">=3.0"
|
||||||
|
jupyter-core = ">=4.7"
|
||||||
|
jupyterlab-pygments = "*"
|
||||||
|
markupsafe = ">=2.0"
|
||||||
|
mistune = ">=2.0.3,<4"
|
||||||
|
nbclient = ">=0.5.0"
|
||||||
|
nbformat = ">=5.7"
|
||||||
|
packaging = "*"
|
||||||
|
pandocfilters = ">=1.4.1"
|
||||||
|
pygments = ">=2.4.1"
|
||||||
|
tinycss2 = "*"
|
||||||
|
traitlets = ">=5.1"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
all = ["flaky", "ipykernel", "ipython", "ipywidgets (>=7.5)", "myst-parser", "nbsphinx (>=0.2.12)", "playwright", "pydata-sphinx-theme", "pyqtwebengine (>=5.15)", "pytest (>=7)", "sphinx (==5.0.2)", "sphinxcontrib-spelling", "tornado (>=6.1)"]
|
||||||
|
docs = ["ipykernel", "ipython", "myst-parser", "nbsphinx (>=0.2.12)", "pydata-sphinx-theme", "sphinx (==5.0.2)", "sphinxcontrib-spelling"]
|
||||||
|
qtpdf = ["pyqtwebengine (>=5.15)"]
|
||||||
|
qtpng = ["pyqtwebengine (>=5.15)"]
|
||||||
|
serve = ["tornado (>=6.1)"]
|
||||||
|
test = ["flaky", "ipykernel", "ipywidgets (>=7.5)", "pytest (>=7)"]
|
||||||
|
webpdf = ["playwright"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "nbformat"
|
||||||
|
version = "5.10.4"
|
||||||
|
description = "The Jupyter Notebook format"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b"},
|
||||||
|
{file = "nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
fastjsonschema = ">=2.15"
|
||||||
|
jsonschema = ">=2.6"
|
||||||
|
jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
|
||||||
|
traitlets = ">=5.1"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"]
|
||||||
|
test = ["pep440", "pre-commit", "pytest", "testpath"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nbqa"
|
name = "nbqa"
|
||||||
version = "1.9.0"
|
version = "1.9.0"
|
||||||
@ -3442,9 +3797,9 @@ files = [
|
|||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = [
|
numpy = [
|
||||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||||
|
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
|
||||||
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
|
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
|
||||||
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
|
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
|
||||||
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -3524,6 +3879,21 @@ files = [
|
|||||||
{file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
|
{file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "paginate"
|
||||||
|
version = "0.5.7"
|
||||||
|
description = "Divides large result sets into pages for easier browsing"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "paginate-0.5.7-py2.py3-none-any.whl", hash = "sha256:b885e2af73abcf01d9559fd5216b57ef722f8c42affbb63942377668e35c7591"},
|
||||||
|
{file = "paginate-0.5.7.tar.gz", hash = "sha256:22bd083ab41e1a8b4f3690544afb2c60c25e5c9a63a30fa2f483f6c60c8e5945"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
dev = ["pytest", "tox"]
|
||||||
|
lint = ["black"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pandas"
|
name = "pandas"
|
||||||
version = "2.2.3"
|
version = "2.2.3"
|
||||||
@ -3578,8 +3948,8 @@ files = [
|
|||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = [
|
numpy = [
|
||||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||||
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
|
||||||
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||||
|
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
||||||
]
|
]
|
||||||
python-dateutil = ">=2.8.2"
|
python-dateutil = ">=2.8.2"
|
||||||
pytz = ">=2020.1"
|
pytz = ">=2020.1"
|
||||||
@ -3625,6 +3995,17 @@ files = [
|
|||||||
numpy = ">=1.23.5"
|
numpy = ">=1.23.5"
|
||||||
types-pytz = ">=2022.1.1"
|
types-pytz = ">=2022.1.1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pandocfilters"
|
||||||
|
version = "1.5.1"
|
||||||
|
description = "Utilities for writing pandoc filters in python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||||
|
files = [
|
||||||
|
{file = "pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc"},
|
||||||
|
{file = "pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parso"
|
name = "parso"
|
||||||
version = "0.8.4"
|
version = "0.8.4"
|
||||||
@ -4340,6 +4721,24 @@ tomlkit = ">=0.10.1"
|
|||||||
spelling = ["pyenchant (>=3.2,<4.0)"]
|
spelling = ["pyenchant (>=3.2,<4.0)"]
|
||||||
testutils = ["gitpython (>3)"]
|
testutils = ["gitpython (>3)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pymdown-extensions"
|
||||||
|
version = "10.11.2"
|
||||||
|
description = "Extension pack for Python Markdown."
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "pymdown_extensions-10.11.2-py3-none-any.whl", hash = "sha256:41cdde0a77290e480cf53892f5c5e50921a7ee3e5cd60ba91bf19837b33badcf"},
|
||||||
|
{file = "pymdown_extensions-10.11.2.tar.gz", hash = "sha256:bc8847ecc9e784a098efd35e20cba772bc5a1b529dfcef9dc1972db9021a1049"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
markdown = ">=3.6"
|
||||||
|
pyyaml = "*"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
extra = ["pygments (>=2.12)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pymilvus"
|
name = "pymilvus"
|
||||||
version = "2.4.8"
|
version = "2.4.8"
|
||||||
@ -4460,128 +4859,128 @@ testing = ["filelock"]
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "python-bidi"
|
name = "python-bidi"
|
||||||
version = "0.6.0"
|
version = "0.6.1"
|
||||||
description = "Python Bidi layout wrapping the Rust crate unicode-bidi"
|
description = "Python Bidi layout wrapping the Rust crate unicode-bidi"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
files = [
|
files = [
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:032b16f70c5d4f48c8dc5a4ade071826a0fb64172e0435d49deba6ea66fc5d42"},
|
{file = "python_bidi-0.6.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e4d5f46b52a6057540a1d09cc2efcc5ddc99319f4fd9ea1de0007878e08e1f3c"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:53b50f6ad3e633dcc74fc96bb959bf375a84db36db380d76f9c189ce33099ede"},
|
{file = "python_bidi-0.6.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5ca2c05e5a041ff8698e638b196fd1d7629f47e55a5412657abdf5cb09e72b79"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d2928ae4aedff4f49ac2e334d176b9488762276bae8b32045c3b91f41c447e4"},
|
{file = "python_bidi-0.6.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ea67606546af9eb40c6cfe5d1551fc1a9c96d7f82125c90a776d253fde8d64"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8f3e5ef9093699300868f9c92975e4d3472131e9da1125501b1950faa0eec62a"},
|
{file = "python_bidi-0.6.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f1702455861f5304e73d72bda9a08d5175815780f0cac83743f237610100946"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0eb05fc7115f296e09e06d47648b032a2dff4322b363b8b7f88d4695be452951"},
|
{file = "python_bidi-0.6.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:43bc35c668ec003309e3831c3ae5577e7b0ca564701ddd647b8112c3d38f8dee"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6fdf72111aed1e30bb89989f55e167411d5fb7a94ee412a3116b9a9b257516f4"},
|
{file = "python_bidi-0.6.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7d72cbdf452504e5c18582843859e9a27aa710009db87f56052fcf39a19a7aa9"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9daa84f8f8138521a5971d38c92d918bdb0a899268d83d9daa5eba7dce641ce"},
|
{file = "python_bidi-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a429c12570a7f4ca016a708bb3aa94962ef583863227779caee60b55a68b435"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d2e5bafacba56696712ea2284c27f8a3d3b4ee94684b7dcd06af8775cf650dea"},
|
{file = "python_bidi-0.6.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:24ce7ace8eb7af96b60aaad9bd48283a40d9f13b7b0f3fac48a4d5e4eacbc207"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7214a175dd09a4da5f755dbf19d767261d2087686dfff321b4a3967d09096081"},
|
{file = "python_bidi-0.6.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:c4e31fb4fe01bcf4b91bf78695d65c858c684551cf5fb2d107a4625da32be445"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:9f6fe4d9b86c123a960c7506ffb31ebba0c7c465a364b344f96858679bf54401"},
|
{file = "python_bidi-0.6.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:21dc7da0f89876d23c0ef99aebba061059357e0c13fe0281afa9234ab97d2515"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ddf12de3ce92bbfcc3f0cdcc4591f9dbbdf3f83388da22646dbf0ba56d66844d"},
|
{file = "python_bidi-0.6.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4b2f143f651c010b57f0ccec0c3faa4b794a9ae1afefea5f5e12d7c20132355c"},
|
||||||
{file = "python_bidi-0.6.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:550565296af0e3e938cedc87c78a6ba02e38dab7c4bb2fbbf3717f2412e2a6e1"},
|
{file = "python_bidi-0.6.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:531ce6a560e4142a919389e5bbb36b3aeb2923845c71fcde1514ba9f58ca4ab6"},
|
||||||
{file = "python_bidi-0.6.0-cp310-none-win32.whl", hash = "sha256:b6958bc4a27e2854c1e9a3d6a2dac0cfd09451834c64f96738c3365d8a053358"},
|
{file = "python_bidi-0.6.1-cp310-none-win32.whl", hash = "sha256:449d8b7fd54faa7fbcf56e02273620bc08a8ed8c099883eaa68b0f109845b853"},
|
||||||
{file = "python_bidi-0.6.0-cp310-none-win_amd64.whl", hash = "sha256:712d666331e813f498ad6f16e23b6c9795f21e7a231b7047f32f2843e303ec92"},
|
{file = "python_bidi-0.6.1-cp310-none-win_amd64.whl", hash = "sha256:6295a2f0102782353cc657bf57c8254205c5d292c167cfb06315548263c4bd6c"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:b3984f4d91b75f19c1e1c2e5a52f4263f4c4a11de2c1f5bfb7b8fceb7960d8d8"},
|
{file = "python_bidi-0.6.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:dce1c4edf396d7b93f8b9718d129675a5aa1bb617245ad4d9c99dd567037cdf1"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:603b4485b7dc588bc58f80f1271f103b859a45b19024b90686c639a451e50b0a"},
|
{file = "python_bidi-0.6.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0c6437aec52c0a953fd476c82f8ec511b0c611d15ccbe44fccd628887a6adb7a"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d4f2a41ba306fd2827a1e5f153e856f5e79176abf4f0ae41def5255113548cc"},
|
{file = "python_bidi-0.6.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bd97cc94216d3edf5a6f6b6fbe3d49741faa7fbd18395490eda96f0fe27d543"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb50d50809508f0f9907973e8c99fd663d7d3b2bb124218c7f9d9abe374527c5"},
|
{file = "python_bidi-0.6.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4677aa514898df2031404f3ba77a003f114f8adf4683f46229862317d0962156"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd5d2c89acc022cfd3b2d87b09f3ac8503beb6ca45af2ee31df9bd0fbbbe85ce"},
|
{file = "python_bidi-0.6.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e86bf0a1d92e3e63ba6bbc0a9b8b4e402a87297efc0aa780e68e0a3fa16d2394"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08c7ebd084312706868df172fb46f635ee437344181c0c55302f0da221f3bf75"},
|
{file = "python_bidi-0.6.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7373a284ef3fd86b1a7a5869a20d138f5ffef011d64f5d0affdaec412e528bb7"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f8689da1893a5a70e5bb6b47fff4571b8ab6a3653b8f9e3d3555ddaaabb607f"},
|
{file = "python_bidi-0.6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86c5a89577c46ba754cbc61ce5e061828bbb680f884a416bc2dbc8dee9958146"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7cac60293dddbe6307bfb15f8a227f614afa882999ff669b5af795dca7db97dc"},
|
{file = "python_bidi-0.6.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dae26c1d02124d7ee6fd3e46cf2af4c7d453ddb63324fa445802ad90f867337"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4019165cb8e9e73bebec839156ff25e02b499da5f3e849e44c5c76fd487be967"},
|
{file = "python_bidi-0.6.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3978e8218a683c9994764e379921536b3f02c9a42b9734542108bd8a90c1d454"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:6684e0f1cb2d769281b6b8bbb51c69a700b63b18a6cb6088d3f34a9eb544620a"},
|
{file = "python_bidi-0.6.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:91b54175bd63f3914f4ef91e434416e2ad214cb278bb2c52c386b5e211dcdf72"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7203782c888fcffc0e20e1e6b2d8bfe947ce356796a6709c09c1d751943b6ed7"},
|
{file = "python_bidi-0.6.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2372c47e112614b0936380020c9b07201bc42d20a14eae82bd2b5df2820c1c1a"},
|
||||||
{file = "python_bidi-0.6.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f7d98736704548a001fb072becdfbda9d67dde8fd993320494c2370243c92f87"},
|
{file = "python_bidi-0.6.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3d999ab2f3b884e0e0252fc34d5bb618439ec7852bc68ae1a7cc4bcb7a173079"},
|
||||||
{file = "python_bidi-0.6.0-cp311-none-win32.whl", hash = "sha256:205d885944f929e93283b88a45fe57ae0102c39ba0576ab856f9e5dd50d7a049"},
|
{file = "python_bidi-0.6.1-cp311-none-win32.whl", hash = "sha256:79f002b542020b59a0797fe18a3a810a81c36f97a449e44b86d666f4adca206d"},
|
||||||
{file = "python_bidi-0.6.0-cp311-none-win_amd64.whl", hash = "sha256:b7847f882442179fd67608958c1ce8af9ee4b051a921342c7a3bc071e2ba0fc4"},
|
{file = "python_bidi-0.6.1-cp311-none-win_amd64.whl", hash = "sha256:20ca913be99845c36f1d0a7ffe5278720d29aee432b6a55a17aecafb82aefb42"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:694373c087f2d5067289832070a21e84fc648ac087163723ccd0759dac3a7161"},
|
{file = "python_bidi-0.6.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:c039d49cee2ef509290c1e1afdbf1b6158e74f74100afc3127f1e089d8175121"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:dd2ae8876412974b8959520688a271c1b3dbb65ef57306e3bf745115147d05b8"},
|
{file = "python_bidi-0.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cf2deb32d6295d18b320e74a37eb08f24512f154eccb8f8015d2914d577f6f18"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2d748eccc2c40ce5b56bde1b7eef72f7b6037e289fb34a38335cd05e3b5f7cd6"},
|
{file = "python_bidi-0.6.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e6b986a80cc95ccb0826aaddbb8c386d0f7f6a32c92690592188d4c814bc2c00"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:22b6866cf18e2e8189cdbc5ede22b843c15c8aaef5eb8438fb02f8197fb29bf9"},
|
{file = "python_bidi-0.6.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:16e39952287e2644605bb5f40fd327199e449171863af63b674ef4b91a81530b"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e056f602e634b8cd3c8c5497f52d43674f5de088df4f1a8d73e99cd97735fb3f"},
|
{file = "python_bidi-0.6.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:859ab2a628d483b8db3273802fca128414abff6ace98780984436baada9ae297"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3adf383d7e1bc50c8357f78ec3591c483066f9b7744a0c2c89d1ef501c75f693"},
|
{file = "python_bidi-0.6.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5893348452498797cd3dc0f966c0524ddb187b2702e662984de7e242c266c023"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db9981b2d05ef108a540424dabfa157dff20ec4adb909e5a6d2938cac6cf3987"},
|
{file = "python_bidi-0.6.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f306c17f41d7b90cb738c115c6f5c153604672811b195da9b1415c291f732bf0"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7dcf1d8834e1db2f4d3372c607fe2a12acbeeb4a9aba1bf0014cc37474ef08d1"},
|
{file = "python_bidi-0.6.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05fedcd7d8b1c3997b2a3a8e6786947481fc60a423b5a93e673b9f37920bf8ba"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58b9628fa1d15b30686fb6196cc2b3d6c1546bfe7e5fbdd9b758d69a76411cd4"},
|
{file = "python_bidi-0.6.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:57cdf7d407aca3f293fbd0ac744207bdf277928828bd312ac52976986b04d90f"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:7fa5babd7d89a541d6507bddd4839271db1504a54f46a5ee64c959dde41c0596"},
|
{file = "python_bidi-0.6.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b779e802542768cd89ed934313dd9fa428b931028532b0ba794c6d12765679ab"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0b5c33ad97ad7bb2fa335a0ce63d0a887e99dbc86ce2684f7622c0fb1b25873e"},
|
{file = "python_bidi-0.6.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b7639a03e2bdb80f5bd7e8ee3d81e794d5cc32a4070a8a64c29020fa97591d30"},
|
||||||
{file = "python_bidi-0.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:48034cc54c792dfeeb017505293600bc4ece89fca1fc28d6fb24f932d7ef1bcf"},
|
{file = "python_bidi-0.6.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2a6291f66d79636815e6360a9de6ef6fc924c05e38fac0343bd3d4cc04d87833"},
|
||||||
{file = "python_bidi-0.6.0-cp312-none-win32.whl", hash = "sha256:8483de08f3b41063f1819a397aa6686ae88ac908192e448b72e4bf7caa91a655"},
|
{file = "python_bidi-0.6.1-cp312-none-win32.whl", hash = "sha256:0f46cb78804dd3b04e6da1c9806dc5736565a65fcfed907b2ca158e9b2f0904f"},
|
||||||
{file = "python_bidi-0.6.0-cp312-none-win_amd64.whl", hash = "sha256:a82ee4b48e9b192d4ff3873f2fd063efae063b904b6283119b8cef7165a54084"},
|
{file = "python_bidi-0.6.1-cp312-none-win_amd64.whl", hash = "sha256:9f72887a61f87dc284b729b8f825687e81c1b01179e1b8d1c9c11897c58323fc"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e080e4bf367c3761fc9a430a6a0375dcb10a541721a6b688142a9bbee883e576"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c2209dae25b0d34b53062534a30018e712664ff8841a86516af928355373a78"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1002a9b65deae763b236d7d4ea6f046acdb778c85932053ce0d4607f691a1a93"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2a7da8a53296272d39575262b61c813c4397a2f159d21dc8f2145beb78054bee"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9418a69c7189907cd27ebe783708572dff979be77e1a7d2b646ff0a456f4f59b"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:49565ba1c151fe34be4707c9afe48c25c7651e8f1a8aa59275c0ba966d12afc3"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:61c05401d8ff120221b53938e7576a330fde22b22e22bf9243e8fffa225cd35f"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:721875c76e8f314a9f90c6d91a50042d64622274457b7f117a3710a4a88219d3"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5e8382835ae85a4bacd188e563fcd5c90d73be7406f7e4ee1148f7a3fa61ff1"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7be39d50f5b2a7c522a7c7ed282f7518cfa163c321cf097e37d91e52138638ae"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19d07a95ebab603849a639d649a07474dc54485ad822aa045309a12ec0f7d388"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:357150587cbe5bfb799b690b6d056457d899fa5e3e0389d850a86533598730cc"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:e8695ad198e816b42df61ea2a315f7d84189f69a3aa608c0fa71bb5d9105790e"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:eff6d500a4afdd141b3792596a26af5f23b25e9bb5782cda22b6dbd5f173c539"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-musllinux_1_2_armv7l.whl", hash = "sha256:13956919a2bcf0d5f240d66acd99256996abec10ae235d328d93433480dac62a"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-musllinux_1_2_armv7l.whl", hash = "sha256:e83ce823494fbcaa62521b07590e2df5ab896674b18ee6d2e00cd4a642ee1868"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:88ee27d0ce129b51e0aedcf88a7961e300f0e3b5d0e707ca813e2af33b46e8a1"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-musllinux_1_2_i686.whl", hash = "sha256:af335a47ba80df69c4b4c860d0cb2e6b7519a9435d1ddf2ff5d07c12c2457d72"},
|
||||||
{file = "python_bidi-0.6.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:50ec1353643a03c8c324968e1216624e0bba57b77af465675932ce6cc5505015"},
|
{file = "python_bidi-0.6.1-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:10b0ee6db22ea9c0208143e68bed3fc044737c674dbdf04d660dad49fc6a9d1f"},
|
||||||
{file = "python_bidi-0.6.0-cp37-none-win32.whl", hash = "sha256:9ec06a91c64fb6832351dc8ab1dc9f970b505e09ccb83b8ff2c91bd04ce31417"},
|
{file = "python_bidi-0.6.1-cp37-none-win32.whl", hash = "sha256:0c6d806955ba5dbdb25390e0a902a4b230ec061dc46202aaaa90ab00a09246dd"},
|
||||||
{file = "python_bidi-0.6.0-cp37-none-win_amd64.whl", hash = "sha256:4425879da7b1ca6257759ace9277506d9d6cf0fc13820bfa1e779931a6bb9795"},
|
{file = "python_bidi-0.6.1-cp37-none-win_amd64.whl", hash = "sha256:41eed1e3037a44a3de7535307afa4c476a804de97a53f3bba492ad303277054f"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5985ed1f85b8a2853c73976d259e3095122559646956c5bf8f1c6c4eb2bd7ebd"},
|
{file = "python_bidi-0.6.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71f15e1023a47fd151f95d323724f6af035738e61eb454f11844a533473e6de1"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:afb77402c6d79daa1715a8a5d6d0eff13387e0db1a34c0f91a09b40ca6a60972"},
|
{file = "python_bidi-0.6.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:10ff6884eceba3a7592160e0677371bc77c25f1d3bf7e205466d8fe9601b13e9"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2c6d339b94efde502286a8f80f130db6014762c8218e6f3af23bfe446217b10e"},
|
{file = "python_bidi-0.6.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:67c18c82b95fcbab3d4ce340dfcacc6bdb700a67c46e5d378d5ad5fbc7c8e480"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afec9e08733d6754ef309eef617ba324eacf4bdf0081c3ec34758cbfb964e889"},
|
{file = "python_bidi-0.6.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ca23a0b2dda59665170ae7152ba090eea77da39f984b3cd8f79b4039563e946"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2901c29525e8c6071781bd796658f7934143fe6bcfb4f998a11cc80372f756fc"},
|
{file = "python_bidi-0.6.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:290786e0bca55a65e460eb573ab7430033b731ae07e80f39acfa500342a482c3"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c71bb7e5fbd67fd64dc7b07c0a69a1b1daffdae0839d543e6e48dbfa82509208"},
|
{file = "python_bidi-0.6.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9765498ff033615a5655620ade87ab9fb7936938f9d5bbc967bffa4eea76b6f4"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:782b166b22cdd1738dc557acf7311a1d85565b9f58c48eb4004e4f770854c9d2"},
|
{file = "python_bidi-0.6.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:07ebf09fb3712027dfb3e052f71978ded8475fdaaa8ee08cc333c372b366acda"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1000815b42e9eba8d4e28e8d6f9558f055d54b9ec746875117d8b8150c86511c"},
|
{file = "python_bidi-0.6.1-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:1445b9f509f0d48ac09c94ee26c9ce4f94257220fe7cb0710d37b54574c43af1"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:dfae0ea2444833acab3f7c62fd38b965f7332617993ef09098672ca9279bb27d"},
|
{file = "python_bidi-0.6.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:e31e418ebbf8b4c9a59af13ae43cb23304b7da4e75555a8d68525d8f4183e022"},
|
||||||
{file = "python_bidi-0.6.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:65fcf6afe02d64b3ec9a6b97513fad50b858f88b83f785e4c0416a9acac5bc63"},
|
{file = "python_bidi-0.6.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:85d985e38e11b353d37f8be4a0a0b87d8e6bac1db16ced87ca7940c3323ec10b"},
|
||||||
{file = "python_bidi-0.6.0-cp38-none-win32.whl", hash = "sha256:0fc9ad821600a0bd4c9bd6327f5cac3c1494f0d291173bf41e655f2ec80f1cee"},
|
{file = "python_bidi-0.6.1-cp38-none-win32.whl", hash = "sha256:32f82666e2468104011257722cebcf153236638706f030ebbd738cbe74cde07d"},
|
||||||
{file = "python_bidi-0.6.0-cp38-none-win_amd64.whl", hash = "sha256:09cd618b42b6e042140c3c15792942c4a2fd259ed68cd68f224dfe00ff312f1d"},
|
{file = "python_bidi-0.6.1-cp38-none-win_amd64.whl", hash = "sha256:34bb781fb3f2d7102c0a0a3748503f5af2583575ad104ab8987f416d98302fb9"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:332506db671bc2ba8ae2591ba48c617a25dd2924a0ae185bc970f9f4e386a55f"},
|
{file = "python_bidi-0.6.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cc794ca257335c56d161b585427ce07122e79f78671ad1ed57ceb7b4d0d1460b"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c066bffffcb66e13fff3e4cc4a9570d744f4b48e54caa0308faf98a0a8dc4570"},
|
{file = "python_bidi-0.6.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4a5029f8a2b1eb61aee34ba3c971767105929ef1af0723ba07a866479a861ee6"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:36fe41cc095045b23bfddc5c48a6aef1674dc32b1d1a52ece3b302a5fb28f33a"},
|
{file = "python_bidi-0.6.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d9f101fe2d14f95beef4d0700073e1026e401228bd8af69acb1744e556915a7d"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:018747edf332240e6400335a10d34c66d7d27e096d05ea0761afb61dc4e750b0"},
|
{file = "python_bidi-0.6.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:83a01910edf6a83f025e57cd3d8e05f4933c939c17283be4c8acecd968836a04"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6aae20bb48ff0ccc4bd3ede085bfb781918c938f2cc09867c879d23252d18775"},
|
{file = "python_bidi-0.6.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bf84d89893c0a1cb286a120f1a9de7ee3093b174bdb2be6b3c90f9f241677e0e"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:600236fbce5e43348c71de7327dd88f1484358cdad04ee742752a289569f1d82"},
|
{file = "python_bidi-0.6.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:52fb816fb80150a20ea64967f33ec89f509a229b9a880eb097f01d92da616e91"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9dc0ac10d7728b17ce9ac22d09918bd6f8b64e9b92a5199cb9a9cf29016dd41"},
|
{file = "python_bidi-0.6.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d56b82fce448b047d7186942e8ce604ab727e8b9e47bdb6af4122aa7ac852c6"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6cbccc3484f6c30a682960fdde2ede944d9bc6b9ed8b8ead683e97af066ebe07"},
|
{file = "python_bidi-0.6.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:63c7e61cb7446ad2c8ab679a6ad03037d7f2ec5ce2ba334a1651d66adf47cf3b"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:de1b836e0d8134b8331423a5c76c886b5c25eed8d6f9e5dcac7767feba3d052c"},
|
{file = "python_bidi-0.6.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:98c3eb1b1d15140ea52f69194403f08ac76b057fb42a50cb74bd3b2cce155730"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:cfb45fecb08bb070dc0e62e6ee8249bbb6b6622181756fae2cff60c8eb5850e9"},
|
{file = "python_bidi-0.6.1-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:c89b2f708018955ea52cc36c6d65923a0ddd56db2f9ec2672c8a2ddbacc9a3f5"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fb6b3545296ff4c1a6cef465359f6ed9b32cfc4bd3d8a6633f0234476414e387"},
|
{file = "python_bidi-0.6.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e420621ad4f57809e3f3817a609ce5f80538f1f75cf9cd6579fbb9bcf590352d"},
|
||||||
{file = "python_bidi-0.6.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e511838eeff5b76f053afe0b936920e5aca91ea597c43caa196e0c6b5cd0d623"},
|
{file = "python_bidi-0.6.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:121775718e39fd884d5399e46a94375a594f641abe3ab2f1f4857cd0dbc23325"},
|
||||||
{file = "python_bidi-0.6.0-cp39-none-win32.whl", hash = "sha256:de171a2e7671dc5d19f957054e9f8fba997a98caebfcd3d386c4189d23e0d73f"},
|
{file = "python_bidi-0.6.1-cp39-none-win32.whl", hash = "sha256:cca50435936c229eb2bd00ff0adb53feaacfe840ed3735d72ebc72702418c32c"},
|
||||||
{file = "python_bidi-0.6.0-cp39-none-win_amd64.whl", hash = "sha256:75243e17201831d8f626be57a1ba52fe4f62594eb8bc777e2a81785a93745466"},
|
{file = "python_bidi-0.6.1-cp39-none-win_amd64.whl", hash = "sha256:0e82b650ffdf30d8a4a809affb96bdf4a7dabe3b9f2c68b2e19616d4f54955db"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a75ca41875c8829295931eb2f0f380da50c1448d64e3c28c3db4966afdfbc53f"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee41b97746846b53565fbd7606eebac73658ef5a28b5f900ce91c85a5b407e13"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4700f71dc553cb65dc8b132de1ee542ae6c518fa8e942b5e0d3ba07bca054a42"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b751f4a05657acaca2008afeb11586e088e55a310718d31c20d0e63c4c7b6a22"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:13b7a10fa19b949132581dfd621ca800030cf21dc06a13366371a6e71309c6f6"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9ce2a05082269b7ff0bf231b29ec7ecb68b096e30d758eb9c969c3cec9e7168f"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef186911e27c968be69cdbaccaa378f0fa129b224a2854ec491963632ea37ff1"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de7bbb1997c7e6dedf5285482ee8b4d26917b73e0a4f0b1899e16043fac5900f"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1af301fe73f76c46e3ddd21b0a3c4467b01e0c3a94f69cd185a78db6810300e8"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:076dab730e851820621f296174c0150f0b58f4b41a187c498cf1c9f84c6956a5"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bf6e718dbbf15f4fb24fd199ebe089c8a28ed712aedc7757d7730741e28cff27"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:01991cc5a538a4d3a58185dc0a9f01acd8e443b17ca9c78688de982aa46aa4fb"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:30214494b84c49247624c162d9141e7fc8dbc3957d21959feb92703cb87b474d"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:7ed862af0c9f05ea6df6c480eb207bba5d380fab9ecc3ca7c9741b59d5481000"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:5753d3204d13189a12a298c82c8c23eba94c252ee9aab3dddb7014b0cd4f37b1"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:2687cf56b00fe86a7f8e96d24c8045bf8fb247950fb4d84605655776c2dbb5c9"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:959fcb0554ca4136044bbb308654aa88c3ffa9031a6c6b074b29221dbb6d553f"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:4b176c726e74e089bea131e6542f151452d35d7d265590f64a2eb738c1a41795"},
|
||||||
{file = "python_bidi-0.6.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:6ca12af1e09355d6296730bd44adf5023a8b696ce77a9a04f35f56b10cd60428"},
|
{file = "python_bidi-0.6.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:b723da2743e73a866f7a14172edd9577bced2e3fba6286b1e5f54a7e1c404b8f"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f13deaa892d5dbc742b4ca4f96e9f6255d5f33b4bbfb04c4c77afc4c1b36378a"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b84d75948959d76dccefe0e656a107550dbc5907ffd47459eda9bb150b57be76"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:96b47ad6492fc3f17a8f9335ae76bafec6ae4769138da34c58f493618f653e78"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2bb9b99e9b9c0479f20880251161c52a15848373852281a60365e030fe39af85"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:56cb6986f7fe97a425c6914d465f7098223263a498a3e48c49dbffc9ebe46ee3"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2f60f19b6e90dac559cd9e800c09c911acd8dad0cd0d5a3fe8409ce9e5bed723"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b90235b0c665483821fd5ab4a0d4db59025f12769dbd4fa1e2d6b0616e1178d3"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c996e1516cb4daf3ff697525a87ce9ce0362d836fc395fa7b3637b09e11782a2"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:9ca5506abe6b3a03f139703deec86852e88c13ad32d6b66109b5630539f9f386"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c6e5a40bcf57c017f3bc59455711c4e52970907d5fbdcde984ab76119cbbd6aa"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:5f6f04dbd30a667a3dd61356ca9e97d33cbdd8fbbe953c5ad3ab86b6901c73e7"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:91308a5ade4ba96c5fc107f68f48e415ddd123f45d1b4a504572743d2547b46b"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-musllinux_1_2_i686.whl", hash = "sha256:d63cf8bd056c4ec14ff9d8ee7181543cd758c1f4ce0eea0710fa854e1fede644"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-musllinux_1_2_i686.whl", hash = "sha256:b489db2e7037bf4802747900463343c5b356a8437948b6032b07cc859b855956"},
|
||||||
{file = "python_bidi-0.6.0-pp37-pypy37_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:17a9db493051792e71a813dd09a4c555e475cd874bf7594429be9c0cf16e270f"},
|
{file = "python_bidi-0.6.1-pp37-pypy37_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dead77866bd69d948ccc836a772b711a74d4a18f692c1a8d8a45e8367d7a1a0c"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:618a70c0183372e23756766db135930507093e95f386c429187f9ae29c4d965f"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52047983079d9c8acea72d777a6fe15dda476ff96e47a784e8aec1aa4a3ed160"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4b880e8e75bea3136c5a44960365665d32abdee204024fd77e9a9975809c72ae"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:22ed0569e668063b63b5db786ed898718f954ad9271304d65a4be0906c478297"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f20e5592534f3b06b2beb0a38f1df8ce1fb2c8f628573381637ca53083dd4648"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:39958c9749c9e57c520c10c777b3688ec766be25a7b94abc717b6dbd4b755c88"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16c8a3284bf874b6c38f8cb10f0f48fd1d7c198cf0a4937d39e73e460096c652"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1647059eb21181e6ac03894834d96fc282b482e28f4117a748aafc0320c58019"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e079ac4ece790fcc4f1a4fcd0b4bfaa290482f2f04bd69936a93aff6a0ce9719"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:28df2ed9ca3a971a530b31240839e8d9593d5eaf4de159227aa6e8125d021623"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:f48486bc16d37361cd21b32a27b2109cb45372cf8e1b4cc59809f2ae4634ad22"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:0f460a333b5620cef14bd709b2f2ab95eb882b116cd8777bc006a35ded8be6fd"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:895fe7e1d67acff2d313184148a1414b50fbbf2148df272a5e9a84f8196f2d3e"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:6240ccbc0e1e582b17c994667af61a498844b50e2bdb51e76c29d3c2f4f225b8"},
|
||||||
{file = "python_bidi-0.6.0-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:18c2d0bffafa590629a5e95ee079c491954ee2249350d62db4497164f7d3f4cf"},
|
{file = "python_bidi-0.6.1-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:3d007a8708b490e8f5c89f828395f79e9ab8e19ff09ad8fcbb50d1b7fdea6442"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3dbde6d205610653d17cc5bb785c5d5da5af6ae634e5daf92a7a6e75a50f94a"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:646faaa87b1cd693c054124a0c4080449b1c125b6d5394cef4d77b56e92b7da1"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:132954f67e3af38ca7c7cd85bde6a49c89bd470ba01603acbd0baf8048acbab5"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:38b1cbb292eaf91cc5ea2282b98cccd3d79ae0ed3f1ce5a2f5d83938264227a7"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c2c8c79faafbc5852db896f8d488090530cb1421765528305a6678694a1961f0"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:af1c6103847397bb020057737f6c670f62b87dfab0395f3c7e72cfb915e37b43"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5c0191270c2438953329af2116fdee021c20da3a33f418303f1bf9859984eb"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35840cf91858522cea9887068f007835b9867518ac863379a840bcc58f468be3"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c652bab5b2d978f9abf324e9c1de50cb175599402b5ec14b7553780f68af597d"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc93b876f7d030c01639aab2d91cebb975a2676a7449347966017324683309e1"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:657ac6ddf02d40de633d616d8d052e616169787d535902e3a4240738ab902a0c"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:44cee53c32709c8ecb310d90c1fce083065a4f67a91639d749ec562ff14c9cac"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:8096c8a8d7498750cf54a55de44eb689a236ae8d3b47b642e25e55cfbcff6e4e"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d3c34a0d3d038a4e2f064f9c5e723e5aed146cbbd0cad9705f61b45d51dccf92"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:0458e2f2d24c115f5f103aa54d9fe8b98c5197b85b616b0db68aaba32908c28c"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:eedb5fee92fafc2cc118d826b464a31e4ca5875e97f8c6041071db68940a8dba"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:ce855e6be84e0b6e00286c62e2dd1ccb505beaeb78f969e270aec5998e53e4fb"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:b72dc51d43d1ae50f5abafc834d84260d73f011c1811bb0263aed45c98a88c6a"},
|
||||||
{file = "python_bidi-0.6.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:bb3c0dfc5131e706d46df50547ce51ff92722431b6d8d81142ea208374550b3e"},
|
{file = "python_bidi-0.6.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:21aba9a66f49a47a82069f1fc6507af94cbee68e985ffe2282e60f8eb1d73d4b"},
|
||||||
{file = "python_bidi-0.6.0.tar.gz", hash = "sha256:0665a0826074a9ff8d29640c0c405a2710b671db14fcc8b1c3ee6615ff10b837"},
|
{file = "python_bidi-0.6.1.tar.gz", hash = "sha256:671c5d331187455a73342c655c0717b1e36969b7fdb8c787c8f2234d0eab47f4"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
@ -4808,6 +5207,20 @@ files = [
|
|||||||
{file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
|
{file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pyyaml-env-tag"
|
||||||
|
version = "0.1"
|
||||||
|
description = "A custom YAML tag for referencing environment variables in YAML files. "
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.6"
|
||||||
|
files = [
|
||||||
|
{file = "pyyaml_env_tag-0.1-py3-none-any.whl", hash = "sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"},
|
||||||
|
{file = "pyyaml_env_tag-0.1.tar.gz", hash = "sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
pyyaml = "*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyzmq"
|
name = "pyzmq"
|
||||||
version = "26.2.0"
|
version = "26.2.0"
|
||||||
@ -5978,6 +6391,24 @@ test = ["cmapfile", "czifile", "dask", "defusedxml", "fsspec", "imagecodecs", "l
|
|||||||
xml = ["defusedxml", "lxml"]
|
xml = ["defusedxml", "lxml"]
|
||||||
zarr = ["fsspec", "zarr"]
|
zarr = ["fsspec", "zarr"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tinycss2"
|
||||||
|
version = "1.3.0"
|
||||||
|
description = "A tiny CSS parser"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.8"
|
||||||
|
files = [
|
||||||
|
{file = "tinycss2-1.3.0-py3-none-any.whl", hash = "sha256:54a8dbdffb334d536851be0226030e9505965bb2f30f21a4a82c55fb2a80fae7"},
|
||||||
|
{file = "tinycss2-1.3.0.tar.gz", hash = "sha256:152f9acabd296a8375fbca5b84c961ff95971fcfc32e79550c8df8e29118c54d"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
webencodings = ">=0.4"
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
doc = ["sphinx", "sphinx_rtd_theme"]
|
||||||
|
test = ["pytest", "ruff"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokenize-rt"
|
name = "tokenize-rt"
|
||||||
version = "6.0.0"
|
version = "6.0.0"
|
||||||
@ -6720,6 +7151,48 @@ platformdirs = ">=3.9.1,<5"
|
|||||||
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
|
docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
|
||||||
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
|
test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "watchdog"
|
||||||
|
version = "5.0.3"
|
||||||
|
description = "Filesystem events monitoring"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.9"
|
||||||
|
files = [
|
||||||
|
{file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:85527b882f3facda0579bce9d743ff7f10c3e1e0db0a0d0e28170a7d0e5ce2ea"},
|
||||||
|
{file = "watchdog-5.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:53adf73dcdc0ef04f7735066b4a57a4cd3e49ef135daae41d77395f0b5b692cb"},
|
||||||
|
{file = "watchdog-5.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e25adddab85f674acac303cf1f5835951345a56c5f7f582987d266679979c75b"},
|
||||||
|
{file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f01f4a3565a387080dc49bdd1fefe4ecc77f894991b88ef927edbfa45eb10818"},
|
||||||
|
{file = "watchdog-5.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:91b522adc25614cdeaf91f7897800b82c13b4b8ac68a42ca959f992f6990c490"},
|
||||||
|
{file = "watchdog-5.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d52db5beb5e476e6853da2e2d24dbbbed6797b449c8bf7ea118a4ee0d2c9040e"},
|
||||||
|
{file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:94d11b07c64f63f49876e0ab8042ae034674c8653bfcdaa8c4b32e71cfff87e8"},
|
||||||
|
{file = "watchdog-5.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:349c9488e1d85d0a58e8cb14222d2c51cbc801ce11ac3936ab4c3af986536926"},
|
||||||
|
{file = "watchdog-5.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:53a3f10b62c2d569e260f96e8d966463dec1a50fa4f1b22aec69e3f91025060e"},
|
||||||
|
{file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:950f531ec6e03696a2414b6308f5c6ff9dab7821a768c9d5788b1314e9a46ca7"},
|
||||||
|
{file = "watchdog-5.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae6deb336cba5d71476caa029ceb6e88047fc1dc74b62b7c4012639c0b563906"},
|
||||||
|
{file = "watchdog-5.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1021223c08ba8d2d38d71ec1704496471ffd7be42cfb26b87cd5059323a389a1"},
|
||||||
|
{file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:752fb40efc7cc8d88ebc332b8f4bcbe2b5cc7e881bccfeb8e25054c00c994ee3"},
|
||||||
|
{file = "watchdog-5.0.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a2e8f3f955d68471fa37b0e3add18500790d129cc7efe89971b8a4cc6fdeb0b2"},
|
||||||
|
{file = "watchdog-5.0.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b8ca4d854adcf480bdfd80f46fdd6fb49f91dd020ae11c89b3a79e19454ec627"},
|
||||||
|
{file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:90a67d7857adb1d985aca232cc9905dd5bc4803ed85cfcdcfcf707e52049eda7"},
|
||||||
|
{file = "watchdog-5.0.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:720ef9d3a4f9ca575a780af283c8fd3a0674b307651c1976714745090da5a9e8"},
|
||||||
|
{file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:223160bb359281bb8e31c8f1068bf71a6b16a8ad3d9524ca6f523ac666bb6a1e"},
|
||||||
|
{file = "watchdog-5.0.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:560135542c91eaa74247a2e8430cf83c4342b29e8ad4f520ae14f0c8a19cfb5b"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:dd021efa85970bd4824acacbb922066159d0f9e546389a4743d56919b6758b91"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-manylinux2014_armv7l.whl", hash = "sha256:78864cc8f23dbee55be34cc1494632a7ba30263951b5b2e8fc8286b95845f82c"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-manylinux2014_i686.whl", hash = "sha256:1e9679245e3ea6498494b3028b90c7b25dbb2abe65c7d07423ecfc2d6218ff7c"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64.whl", hash = "sha256:9413384f26b5d050b6978e6fcd0c1e7f0539be7a4f1a885061473c5deaa57221"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:294b7a598974b8e2c6123d19ef15de9abcd282b0fbbdbc4d23dfa812959a9e05"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-manylinux2014_s390x.whl", hash = "sha256:26dd201857d702bdf9d78c273cafcab5871dd29343748524695cecffa44a8d97"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:0f9332243355643d567697c3e3fa07330a1d1abf981611654a1f2bf2175612b7"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-win32.whl", hash = "sha256:c66f80ee5b602a9c7ab66e3c9f36026590a0902db3aea414d59a2f55188c1f49"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-win_amd64.whl", hash = "sha256:f00b4cf737f568be9665563347a910f8bdc76f88c2970121c86243c8cfdf90e9"},
|
||||||
|
{file = "watchdog-5.0.3-py3-none-win_ia64.whl", hash = "sha256:49f4d36cb315c25ea0d946e018c01bb028048023b9e103d3d3943f58e109dd45"},
|
||||||
|
{file = "watchdog-5.0.3.tar.gz", hash = "sha256:108f42a7f0345042a854d4d0ad0834b741d421330d5f575b81cb27b883500176"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
watchmedo = ["PyYAML (>=3.10)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wcwidth"
|
name = "wcwidth"
|
||||||
version = "0.2.13"
|
version = "0.2.13"
|
||||||
@ -6731,6 +7204,17 @@ files = [
|
|||||||
{file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
|
{file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "webencodings"
|
||||||
|
version = "0.5.1"
|
||||||
|
description = "Character encoding aliases for legacy web content"
|
||||||
|
optional = false
|
||||||
|
python-versions = "*"
|
||||||
|
files = [
|
||||||
|
{file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
|
||||||
|
{file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "wheel"
|
name = "wheel"
|
||||||
version = "0.44.0"
|
version = "0.44.0"
|
||||||
@ -7115,4 +7599,4 @@ tesserocr = ["tesserocr"]
|
|||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = "^3.10"
|
python-versions = "^3.10"
|
||||||
content-hash = "14143d6cc79f4c2c8a4d021711198697e91ca01ecf290dd270b483984461c3d1"
|
content-hash = "8a545ce70eb2001e47c79c102a494aa42d8f5efee5dfbf3dfd0acfb3fb0f8ec9"
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "docling"
|
name = "docling"
|
||||||
version = "2.0.0-dev1" # DO NOT EDIT, updated automatically
|
version = "1.20.0" # DO NOT EDIT, updated automatically
|
||||||
description = "Docling PDF conversion package"
|
description = "Docling PDF conversion package"
|
||||||
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
authors = ["Christoph Auer <cau@zurich.ibm.com>", "Michele Dolfi <dol@zurich.ibm.com>", "Maxim Lysak <mly@zurich.ibm.com>", "Nikos Livathinos <nli@zurich.ibm.com>", "Ahmed Nassar <ahn@zurich.ibm.com>", "Peter Staar <taa@zurich.ibm.com>"]
|
||||||
license = "MIT"
|
license = "MIT"
|
||||||
@ -40,8 +40,6 @@ pydantic = "^2.0.0"
|
|||||||
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "7c104d61aa5d003dd8d9711c37e23ce04799f4c9"}
|
docling-core = {git = "https://github.com/DS4SD/docling-core.git", rev = "7c104d61aa5d003dd8d9711c37e23ce04799f4c9"}
|
||||||
docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"}
|
docling-ibm-models = {git = "https://github.com/DS4SD/docling-ibm-models.git", rev = "1d2e2a2e6eb152c237f1383cdba20cf85db80b97"}
|
||||||
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "c185c4f985ccd29a470a1cddd3bec43880b739ee"}
|
deepsearch-glm = {git = "https://github.com/DS4SD/deepsearch-glm.git", rev = "c185c4f985ccd29a470a1cddd3bec43880b739ee"}
|
||||||
docling-parse = "^1.5.1"
|
|
||||||
|
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
pypdfium2 = "^4.30.0"
|
pypdfium2 = "^4.30.0"
|
||||||
pydantic-settings = "^2.3.0"
|
pydantic-settings = "^2.3.0"
|
||||||
@ -49,6 +47,7 @@ huggingface_hub = ">=0.23,<1"
|
|||||||
requests = "^2.32.3"
|
requests = "^2.32.3"
|
||||||
easyocr = "^1.7"
|
easyocr = "^1.7"
|
||||||
tesserocr = { version = "^2.7.1", optional = true }
|
tesserocr = { version = "^2.7.1", optional = true }
|
||||||
|
docling-parse = "^1.6.0"
|
||||||
certifi = ">=2024.7.4"
|
certifi = ">=2024.7.4"
|
||||||
rtree = "^1.3.0"
|
rtree = "^1.3.0"
|
||||||
scipy = "^1.14.1"
|
scipy = "^1.14.1"
|
||||||
@ -76,6 +75,8 @@ pandas-stubs = "^2.1.4.231227"
|
|||||||
ipykernel = "^6.29.5"
|
ipykernel = "^6.29.5"
|
||||||
ipywidgets = "^8.1.5"
|
ipywidgets = "^8.1.5"
|
||||||
nbqa = "^1.9.0"
|
nbqa = "^1.9.0"
|
||||||
|
mkdocs-material = "^9.5.40"
|
||||||
|
mkdocs-jupyter = "^0.25.0"
|
||||||
|
|
||||||
[tool.poetry.group.examples.dependencies]
|
[tool.poetry.group.examples.dependencies]
|
||||||
datasets = "^2.21.0"
|
datasets = "^2.21.0"
|
||||||
@ -114,6 +115,7 @@ py_version=311
|
|||||||
pretty = true
|
pretty = true
|
||||||
# strict = true
|
# strict = true
|
||||||
no_implicit_optional = true
|
no_implicit_optional = true
|
||||||
|
plugins = "pydantic.mypy"
|
||||||
python_version = "3.10"
|
python_version = "3.10"
|
||||||
|
|
||||||
[[tool.mypy.overrides]]
|
[[tool.mypy.overrides]]
|
||||||
@ -121,6 +123,15 @@ module = [
|
|||||||
"docling_parse.*",
|
"docling_parse.*",
|
||||||
"pypdfium2.*",
|
"pypdfium2.*",
|
||||||
"networkx.*",
|
"networkx.*",
|
||||||
|
"scipy.*",
|
||||||
|
"filetype.*",
|
||||||
|
"tesserocr.*",
|
||||||
|
"docling_ibm_models.*",
|
||||||
|
"easyocr.*",
|
||||||
|
"deepsearch_glm.*",
|
||||||
|
"lxml.*",
|
||||||
|
"bs4.*",
|
||||||
|
"huggingface_hub.*"
|
||||||
]
|
]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
|
|
||||||
|
@ -26,7 +26,6 @@ def _get_backend(pdf_doc):
|
|||||||
return doc_backend
|
return doc_backend
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.skip
|
|
||||||
def test_text_cell_counts():
|
def test_text_cell_counts():
|
||||||
pdf_doc = Path("./tests/data/redp5695.pdf")
|
pdf_doc = Path("./tests/data/redp5695.pdf")
|
||||||
|
|
||||||
|
@ -3,7 +3,6 @@ from pathlib import Path
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from docling.backend.docling_parse_backend import DoclingParseDocumentBackend
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
|
Loading…
Reference in New Issue
Block a user