From d412c363d73ac5944956bd5d2c1bb48e8f46ad7a Mon Sep 17 00:00:00 2001 From: Fasal Shah Date: Tue, 8 Oct 2024 14:16:43 +0530 Subject: [PATCH 1/6] fixed unload pdf backend resources (#129) Signed-off-by: faisal shah Co-authored-by: faisal shah --- docling/document_converter.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docling/document_converter.py b/docling/document_converter.py index a4c55ab7..3eac36dd 100644 --- a/docling/document_converter.py +++ b/docling/document_converter.py @@ -199,9 +199,6 @@ class DocumentConverter: end_pb_time = time.time() - start_pb_time _log.info(f"Finished converting page batch time={end_pb_time:.3f}") - # Free up mem resources of PDF backend - in_doc._backend.unload() - conv_res.pages = all_assembled_pages self._assemble_doc(conv_res) @@ -227,6 +224,11 @@ class DocumentConverter: f"{trace}" ) + finally: + # Always unload the PDF backend, even in case of failure + if in_doc._backend: + in_doc._backend.unload() + end_doc_time = time.time() - start_doc_time _log.info( f"Finished converting document time-pages={end_doc_time:.2f}/{in_doc.page_count}" From f96ea86a00fd1aafaa57025e46b5288b43958725 Mon Sep 17 00:00:00 2001 From: Michele Dolfi <97102151+dolfim-ibm@users.noreply.github.com> Date: Tue, 8 Oct 2024 19:07:08 +0200 Subject: [PATCH 2/6] feat: add options for choosing OCR engines (#118) --------- Signed-off-by: Michele Dolfi Signed-off-by: Nikos Livathinos Signed-off-by: Peter Staar Co-authored-by: Nikos Livathinos Co-authored-by: Peter Staar --- .github/workflows/checks.yml | 7 +- README.md | 73 +++++++++ docling/cli/main.py | 28 +++- docling/datamodel/base_models.py | 5 +- docling/datamodel/pipeline_options.py | 44 +++++- docling/models/base_ocr_model.py | 8 +- docling/models/easyocr_model.py | 23 ++- docling/models/tesseract_ocr_cli_model.py | 167 ++++++++++++++++++++ docling/models/tesseract_ocr_model.py | 122 ++++++++++++++ docling/pipeline/standard_model_pipeline.py | 41 ++++- examples/custom_convert.py | 38 ++++- poetry.lock | 45 +++++- pyproject.toml | 4 + tests/data_scanned/ocr_test.doctags.txt | 3 + tests/data_scanned/ocr_test.json | 1 + tests/data_scanned/ocr_test.md | 1 + tests/data_scanned/ocr_test.pages.json | 1 + tests/data_scanned/ocr_test.pdf | Bin 0 -> 93549 bytes tests/test_e2e_ocr_conversion.py | 98 ++++++++++++ tests/verify_utils.py | 22 ++- 20 files changed, 699 insertions(+), 32 deletions(-) create mode 100644 docling/models/tesseract_ocr_cli_model.py create mode 100644 docling/models/tesseract_ocr_model.py create mode 100644 tests/data_scanned/ocr_test.doctags.txt create mode 100644 tests/data_scanned/ocr_test.json create mode 100644 tests/data_scanned/ocr_test.md create mode 100644 tests/data_scanned/ocr_test.pages.json create mode 100644 tests/data_scanned/ocr_test.pdf create mode 100644 tests/test_e2e_ocr_conversion.py diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 8e92e76e..8c88acc5 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -9,6 +9,11 @@ jobs: python-version: ['3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 + - name: Install tesseract + run: sudo apt-get install -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa libleptonica-dev libtesseract-dev pkg-config + - name: Set TESSDATA_PREFIX + run: | + echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV" - uses: ./.github/actions/setup-poetry with: python-version: ${{ matrix.python-version }} @@ -32,4 +37,4 @@ jobs: poetry run python "$file" || exit 1 done - name: Build with poetry - run: poetry build \ No newline at end of file + run: poetry build diff --git a/README.md b/README.md index f3902454..882f3a8a 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,79 @@ Works on macOS, Linux and Windows environments. Both x86_64 and arm64 architectu ``` +
+ Alternative OCR engines + + Docling supports multiple OCR engines for processing scanned documents. The current version provides + the following engines. + + | Engine | Installation | Usage | + | ------ | ------------ | ----- | + | [EasyOCR](https://github.com/JaidedAI/EasyOCR) | Default in Docling or via `pip install easyocr`. | `EasyOcrOptions` | + | Tesseract | System dependency. See description for Tesseract and Tesserocr below. | `TesseractOcrOptions` | + | Tesseract CLI | System dependency. See description below. | `TesseractCliOcrOptions` | + + The Docling `DocumentConverter` allows to choose the OCR engine with the `ocr_options` settings. For example + + ```python + from docling.datamodel.base_models import ConversionStatus, PipelineOptions + from docling.datamodel.pipeline_options import PipelineOptions, EasyOcrOptions, TesseractOcrOptions + from docling.document_converter import DocumentConverter + + pipeline_options = PipelineOptions() + pipeline_options.do_ocr = True + pipeline_options.ocr_options = TesseractOcrOptions() # Use Tesseract + + doc_converter = DocumentConverter( + pipeline_options=pipeline_options, + ) + ``` + + #### Tesseract installation + + [Tesseract](https://github.com/tesseract-ocr/tesseract) is a popular OCR engine which is available + on most operating systems. For using this engine with Docling, Tesseract must be installed on your + system, using the packaging tool of your choice. Below we provide example commands. + After installing Tesseract you are expected to provide the path to its language files using the + `TESSDATA_PREFIX` environment variable (note that it must terminate with a slash `/`). + + For macOS, we reccomend using [Homebrew](https://brew.sh/). + + ```console + brew install tesseract leptonica pkg-config + TESSDATA_PREFIX=/opt/homebrew/share/tessdata/ + echo "Set TESSDATA_PREFIX=${TESSDATA_PREFIX}" + ``` + + For Debian-based systems. + + ```console + apt-get install tesseract-ocr tesseract-ocr-eng libtesseract-dev libleptonica-dev pkg-config + TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$) + echo "Set TESSDATA_PREFIX=${TESSDATA_PREFIX}" + ``` + + For RHEL systems. + + ```console + dnf install tesseract tesseract-devel tesseract-langpack-eng leptonica-devel + TESSDATA_PREFIX=/usr/share/tesseract/tessdata/ + echo "Set TESSDATA_PREFIX=${TESSDATA_PREFIX}" + ``` + + #### Linking to Tesseract + The most efficient usage of the Tesseract library is via linking. Docling is using + the [Tesserocr](https://github.com/sirfz/tesserocr) package for this. + + If you get into installation issues of Tesserocr, we suggest using the following + installation options: + + ```console + pip uninstall tesserocr + pip install --no-binary :all: tesserocr + ``` +
+
Docling development setup diff --git a/docling/cli/main.py b/docling/cli/main.py index b942d519..e27026d9 100644 --- a/docling/cli/main.py +++ b/docling/cli/main.py @@ -14,7 +14,12 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.datamodel.base_models import ConversionStatus from docling.datamodel.document import ConversionResult, DocumentConversionInput -from docling.datamodel.pipeline_options import PipelineOptions +from docling.datamodel.pipeline_options import ( + EasyOcrOptions, + PipelineOptions, + TesseractCliOcrOptions, + TesseractOcrOptions, +) from docling.document_converter import DocumentConverter warnings.filterwarnings(action="ignore", category=UserWarning, module="pydantic|torch") @@ -53,6 +58,13 @@ class Backend(str, Enum): DOCLING = "docling" +# Define an enum for the ocr engines +class OcrEngine(str, Enum): + EASYOCR = "easyocr" + TESSERACT_CLI = "tesseract_cli" + TESSERACT = "tesseract" + + def export_documents( conv_results: Iterable[ConversionResult], output_dir: Path, @@ -152,6 +164,9 @@ def convert( backend: Annotated[ Backend, typer.Option(..., help="The PDF backend to use.") ] = Backend.DOCLING, + ocr_engine: Annotated[ + OcrEngine, typer.Option(..., help="The OCR engine to use.") + ] = OcrEngine.EASYOCR, output: Annotated[ Path, typer.Option(..., help="Output directory where results are saved.") ] = Path("."), @@ -191,8 +206,19 @@ def convert( case _: raise RuntimeError(f"Unexpected backend type {backend}") + match ocr_engine: + case OcrEngine.EASYOCR: + ocr_options = EasyOcrOptions() + case OcrEngine.TESSERACT_CLI: + ocr_options = TesseractCliOcrOptions() + case OcrEngine.TESSERACT: + ocr_options = TesseractOcrOptions() + case _: + raise RuntimeError(f"Unexpected backend type {backend}") + pipeline_options = PipelineOptions( do_ocr=ocr, + ocr_options=ocr_options, do_table_structure=True, ) pipeline_options.table_structure_options.do_cell_matching = do_cell_matching diff --git a/docling/datamodel/base_models.py b/docling/datamodel/base_models.py index f18dbd7a..752e264a 100644 --- a/docling/datamodel/base_models.py +++ b/docling/datamodel/base_models.py @@ -110,7 +110,10 @@ class BoundingBox(BaseModel): return BoundingBox(l=l, t=t, r=r, b=b, coord_origin=origin) def area(self) -> float: - return (self.r - self.l) * (self.b - self.t) + area = (self.r - self.l) * (self.b - self.t) + if self.coord_origin == CoordOrigin.BOTTOMLEFT: + area = -area + return area def intersection_area_with(self, other: "BoundingBox") -> float: # Calculate intersection coordinates diff --git a/docling/datamodel/pipeline_options.py b/docling/datamodel/pipeline_options.py index 9ea7a77f..2ebff48d 100644 --- a/docling/datamodel/pipeline_options.py +++ b/docling/datamodel/pipeline_options.py @@ -1,6 +1,7 @@ from enum import Enum, auto +from typing import List, Literal, Optional, Union -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict, Field class TableFormerMode(str, Enum): @@ -18,8 +19,49 @@ class TableStructureOptions(BaseModel): mode: TableFormerMode = TableFormerMode.FAST +class OcrOptions(BaseModel): + kind: str + + +class EasyOcrOptions(OcrOptions): + kind: Literal["easyocr"] = "easyocr" + lang: List[str] = ["fr", "de", "es", "en"] + use_gpu: bool = True # same default as easyocr.Reader + model_storage_directory: Optional[str] = None + download_enabled: bool = True # same default as easyocr.Reader + + model_config = ConfigDict( + extra="forbid", + protected_namespaces=(), + ) + + +class TesseractCliOcrOptions(OcrOptions): + kind: Literal["tesseract"] = "tesseract" + lang: List[str] = ["fra", "deu", "spa", "eng"] + tesseract_cmd: str = "tesseract" + path: Optional[str] = None + + model_config = ConfigDict( + extra="forbid", + ) + + +class TesseractOcrOptions(OcrOptions): + kind: Literal["tesserocr"] = "tesserocr" + lang: List[str] = ["fra", "deu", "spa", "eng"] + path: Optional[str] = None + + model_config = ConfigDict( + extra="forbid", + ) + + class PipelineOptions(BaseModel): do_table_structure: bool = True # True: perform table structure extraction do_ocr: bool = True # True: perform OCR, replace programmatic PDF text table_structure_options: TableStructureOptions = TableStructureOptions() + ocr_options: Union[EasyOcrOptions, TesseractCliOcrOptions, TesseractOcrOptions] = ( + Field(EasyOcrOptions(), discriminator="kind") + ) diff --git a/docling/models/base_ocr_model.py b/docling/models/base_ocr_model.py index 3b3c261e..4139d689 100644 --- a/docling/models/base_ocr_model.py +++ b/docling/models/base_ocr_model.py @@ -3,21 +3,21 @@ import logging from abc import abstractmethod from typing import Iterable, List, Tuple -import numpy import numpy as np from PIL import Image, ImageDraw from rtree import index from scipy.ndimage import find_objects, label from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page +from docling.datamodel.pipeline_options import OcrOptions _log = logging.getLogger(__name__) class BaseOcrModel: - def __init__(self, config): - self.config = config - self.enabled = config["enabled"] + def __init__(self, enabled: bool, options: OcrOptions): + self.enabled = enabled + self.options = options # Computes the optimum amount and coordinates of rectangles to OCR on a given page def get_ocr_rects(self, page: Page) -> Tuple[bool, List[BoundingBox]]: diff --git a/docling/models/easyocr_model.py b/docling/models/easyocr_model.py index 5fb4066b..a4c64a78 100644 --- a/docling/models/easyocr_model.py +++ b/docling/models/easyocr_model.py @@ -4,21 +4,33 @@ from typing import Iterable import numpy from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page +from docling.datamodel.pipeline_options import EasyOcrOptions from docling.models.base_ocr_model import BaseOcrModel _log = logging.getLogger(__name__) class EasyOcrModel(BaseOcrModel): - def __init__(self, config): - super().__init__(config) + def __init__(self, enabled: bool, options: EasyOcrOptions): + super().__init__(enabled=enabled, options=options) + self.options: EasyOcrOptions self.scale = 3 # multiplier for 72 dpi == 216 dpi. if self.enabled: - import easyocr + try: + import easyocr + except ImportError: + raise ImportError( + "EasyOCR is not installed. Please install it via `pip install easyocr` to use this OCR engine. " + "Alternatively, Docling has support for other OCR engines. See the documentation." + ) - self.reader = easyocr.Reader(config["lang"]) + self.reader = easyocr.Reader( + lang_list=self.options.lang, + model_storage_directory=self.options.model_storage_directory, + download_enabled=self.options.download_enabled, + ) def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: @@ -31,6 +43,9 @@ class EasyOcrModel(BaseOcrModel): all_ocr_cells = [] for ocr_rect in ocr_rects: + # Skip zero area boxes + if ocr_rect.area() == 0: + continue high_res_image = page._backend.get_page_image( scale=self.scale, cropbox=ocr_rect ) diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py new file mode 100644 index 00000000..c3c19991 --- /dev/null +++ b/docling/models/tesseract_ocr_cli_model.py @@ -0,0 +1,167 @@ +import io +import logging +import tempfile +from subprocess import PIPE, Popen +from typing import Iterable, Tuple + +import pandas as pd + +from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page +from docling.datamodel.pipeline_options import TesseractCliOcrOptions +from docling.models.base_ocr_model import BaseOcrModel + +_log = logging.getLogger(__name__) + + +class TesseractOcrCliModel(BaseOcrModel): + + def __init__(self, enabled: bool, options: TesseractCliOcrOptions): + super().__init__(enabled=enabled, options=options) + self.options: TesseractCliOcrOptions + + self.scale = 3 # multiplier for 72 dpi == 216 dpi. + + self._name = None + self._version = None + + if self.enabled: + try: + self._get_name_and_version() + + except Exception as exc: + raise RuntimeError( + f"Tesseract is not available, aborting: {exc} " + "Install tesseract on your system and the tesseract binary is discoverable. " + "The actual command for Tesseract can be specified in `pipeline_options.ocr_options.tesseract_cmd='tesseract'`. " + "Alternatively, Docling has support for other OCR engines. See the documentation." + ) + + def _get_name_and_version(self) -> Tuple[str, str]: + + if self._name != None and self._version != None: + return self._name, self._version + + cmd = [self.options.tesseract_cmd, "--version"] + + proc = Popen(cmd, stdout=PIPE, stderr=PIPE) + stdout, stderr = proc.communicate() + + proc.wait() + + # HACK: Windows versions of Tesseract output the version to stdout, Linux versions + # to stderr, so check both. + version_line = ( + (stdout.decode("utf8").strip() or stderr.decode("utf8").strip()) + .split("\n")[0] + .strip() + ) + + # If everything else fails... + if not version_line: + version_line = "tesseract XXX" + + name, version = version_line.split(" ") + + self._name = name + self._version = version + + return name, version + + def _run_tesseract(self, ifilename: str): + + cmd = [self.options.tesseract_cmd] + + if self.options.lang is not None and len(self.options.lang) > 0: + cmd.append("-l") + cmd.append("+".join(self.options.lang)) + if self.options.path is not None: + cmd.append("--tessdata-dir") + cmd.append(self.options.path) + + cmd += [ifilename, "stdout", "tsv"] + _log.info("command: {}".format(" ".join(cmd))) + + proc = Popen(cmd, stdout=PIPE) + output, _ = proc.communicate() + + # _log.info(output) + + # Decode the byte string to a regular string + decoded_data = output.decode("utf-8") + # _log.info(decoded_data) + + # Read the TSV file generated by Tesseract + df = pd.read_csv(io.StringIO(decoded_data), sep="\t") + + # Display the dataframe (optional) + # _log.info("df: ", df.head()) + + # Filter rows that contain actual text (ignore header or empty rows) + df_filtered = df[df["text"].notnull() & (df["text"].str.strip() != "")] + + return df_filtered + + def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: + + if not self.enabled: + yield from page_batch + return + + for page in page_batch: + ocr_rects = self.get_ocr_rects(page) + + all_ocr_cells = [] + for ocr_rect in ocr_rects: + # Skip zero area boxes + if ocr_rect.area() == 0: + continue + high_res_image = page._backend.get_page_image( + scale=self.scale, cropbox=ocr_rect + ) + + with tempfile.NamedTemporaryFile(suffix=".png", mode="w") as image_file: + fname = image_file.name + high_res_image.save(fname) + + df = self._run_tesseract(fname) + + # _log.info(df) + + # Print relevant columns (bounding box and text) + for ix, row in df.iterrows(): + text = row["text"] + conf = row["conf"] + + l = float(row["left"]) + b = float(row["top"]) + w = float(row["width"]) + h = float(row["height"]) + + t = b + h + r = l + w + + cell = OcrCell( + id=ix, + text=text, + confidence=conf / 100.0, + bbox=BoundingBox.from_tuple( + coord=( + (l / self.scale) + ocr_rect.l, + (b / self.scale) + ocr_rect.t, + (r / self.scale) + ocr_rect.l, + (t / self.scale) + ocr_rect.t, + ), + origin=CoordOrigin.TOPLEFT, + ), + ) + all_ocr_cells.append(cell) + + ## Remove OCR cells which overlap with programmatic cells. + filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells) + + page.cells.extend(filtered_ocr_cells) + + # DEBUG code: + # self.draw_ocr_rects_and_cells(page, ocr_rects) + + yield page diff --git a/docling/models/tesseract_ocr_model.py b/docling/models/tesseract_ocr_model.py new file mode 100644 index 00000000..1b4f6f7f --- /dev/null +++ b/docling/models/tesseract_ocr_model.py @@ -0,0 +1,122 @@ +import logging +from typing import Iterable + +import numpy + +from docling.datamodel.base_models import BoundingBox, CoordOrigin, OcrCell, Page +from docling.datamodel.pipeline_options import TesseractCliOcrOptions +from docling.models.base_ocr_model import BaseOcrModel + +_log = logging.getLogger(__name__) + + +class TesseractOcrModel(BaseOcrModel): + def __init__(self, enabled: bool, options: TesseractCliOcrOptions): + super().__init__(enabled=enabled, options=options) + self.options: TesseractCliOcrOptions + + self.scale = 3 # multiplier for 72 dpi == 216 dpi. + self.reader = None + + if self.enabled: + setup_errmsg = ( + "tesserocr is not correctly installed. " + "Please install it via `pip install tesserocr` to use this OCR engine. " + "Note that tesserocr might have to be manually compiled for working with" + "your Tesseract installation. The Docling documentation provides examples for it. " + "Alternatively, Docling has support for other OCR engines. See the documentation." + ) + try: + import tesserocr + except ImportError: + raise ImportError(setup_errmsg) + + try: + tesseract_version = tesserocr.tesseract_version() + _log.debug("Initializing TesserOCR: %s", tesseract_version) + except: + raise ImportError(setup_errmsg) + + # Initialize the tesseractAPI + lang = "+".join(self.options.lang) + if self.options.path is not None: + self.reader = tesserocr.PyTessBaseAPI( + path=self.options.path, + lang=lang, + psm=tesserocr.PSM.AUTO, + init=True, + oem=tesserocr.OEM.DEFAULT, + ) + else: + self.reader = tesserocr.PyTessBaseAPI( + lang=lang, + psm=tesserocr.PSM.AUTO, + init=True, + oem=tesserocr.OEM.DEFAULT, + ) + self.reader_RIL = tesserocr.RIL + + def __del__(self): + if self.reader is not None: + # Finalize the tesseractAPI + self.reader.End() + + def __call__(self, page_batch: Iterable[Page]) -> Iterable[Page]: + + if not self.enabled: + yield from page_batch + return + + for page in page_batch: + ocr_rects = self.get_ocr_rects(page) + + all_ocr_cells = [] + for ocr_rect in ocr_rects: + # Skip zero area boxes + if ocr_rect.area() == 0: + continue + high_res_image = page._backend.get_page_image( + scale=self.scale, cropbox=ocr_rect + ) + + # Retrieve text snippets with their bounding boxes + self.reader.SetImage(high_res_image) + boxes = self.reader.GetComponentImages(self.reader_RIL.TEXTLINE, True) + + cells = [] + for ix, (im, box, _, _) in enumerate(boxes): + # Set the area of interest. Tesseract uses Bottom-Left for the origin + self.reader.SetRectangle(box["x"], box["y"], box["w"], box["h"]) + + # Extract text within the bounding box + text = self.reader.GetUTF8Text().strip() + confidence = self.reader.MeanTextConf() + left = box["x"] / self.scale + bottom = box["y"] / self.scale + right = (box["x"] + box["w"]) / self.scale + top = (box["y"] + box["h"]) / self.scale + + cells.append( + OcrCell( + id=ix, + text=text, + confidence=confidence, + bbox=BoundingBox.from_tuple( + coord=(left, top, right, bottom), + origin=CoordOrigin.TOPLEFT, + ), + ) + ) + + # del high_res_image + all_ocr_cells.extend(cells) + + ## Remove OCR cells which overlap with programmatic cells. + filtered_ocr_cells = self.filter_ocr_cells(all_ocr_cells, page.cells) + + page.cells.extend(filtered_ocr_cells) + + # DEBUG code: + # self.draw_ocr_rects_and_cells(page, ocr_rects) + + yield page diff --git a/docling/pipeline/standard_model_pipeline.py b/docling/pipeline/standard_model_pipeline.py index a68318b3..3cbd87d9 100644 --- a/docling/pipeline/standard_model_pipeline.py +++ b/docling/pipeline/standard_model_pipeline.py @@ -1,9 +1,17 @@ from pathlib import Path -from docling.datamodel.pipeline_options import PipelineOptions +from docling.datamodel.pipeline_options import ( + EasyOcrOptions, + PipelineOptions, + TesseractCliOcrOptions, + TesseractOcrOptions, +) +from docling.models.base_ocr_model import BaseOcrModel from docling.models.easyocr_model import EasyOcrModel from docling.models.layout_model import LayoutModel from docling.models.table_structure_model import TableStructureModel +from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel +from docling.models.tesseract_ocr_model import TesseractOcrModel from docling.pipeline.base_model_pipeline import BaseModelPipeline @@ -14,19 +22,38 @@ class StandardModelPipeline(BaseModelPipeline): def __init__(self, artifacts_path: Path, pipeline_options: PipelineOptions): super().__init__(artifacts_path, pipeline_options) + ocr_model: BaseOcrModel + if isinstance(pipeline_options.ocr_options, EasyOcrOptions): + ocr_model = EasyOcrModel( + enabled=pipeline_options.do_ocr, + options=pipeline_options.ocr_options, + ) + elif isinstance(pipeline_options.ocr_options, TesseractCliOcrOptions): + ocr_model = TesseractOcrCliModel( + enabled=pipeline_options.do_ocr, + options=pipeline_options.ocr_options, + ) + elif isinstance(pipeline_options.ocr_options, TesseractOcrOptions): + ocr_model = TesseractOcrModel( + enabled=pipeline_options.do_ocr, + options=pipeline_options.ocr_options, + ) + else: + raise RuntimeError( + f"The specified OCR kind is not supported: {pipeline_options.ocr_options.kind}." + ) + self.model_pipe = [ - EasyOcrModel( - config={ - "lang": ["fr", "de", "es", "en"], - "enabled": pipeline_options.do_ocr, - } - ), + # OCR + ocr_model, + # Layout LayoutModel( config={ "artifacts_path": artifacts_path / StandardModelPipeline._layout_model_path } ), + # Table structure TableStructureModel( config={ "artifacts_path": artifacts_path diff --git a/examples/custom_convert.py b/examples/custom_convert.py index 63c8bebc..e386bb3a 100644 --- a/examples/custom_convert.py +++ b/examples/custom_convert.py @@ -8,6 +8,10 @@ from docling.backend.docling_parse_backend import DoclingParseDocumentBackend from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend from docling.datamodel.base_models import ConversionStatus, PipelineOptions from docling.datamodel.document import ConversionResult, DocumentConversionInput +from docling.datamodel.pipeline_options import ( + TesseractCliOcrOptions, + TesseractOcrOptions, +) from docling.document_converter import DocumentConverter _log = logging.getLogger(__name__) @@ -71,7 +75,7 @@ def main(): # and PDF Backends for various configurations. # Uncomment one section at the time to see the differences in the output. - # PyPdfium without OCR + # PyPdfium without EasyOCR # -------------------- # pipeline_options = PipelineOptions() # pipeline_options.do_ocr=False @@ -83,7 +87,7 @@ def main(): # pdf_backend=PyPdfiumDocumentBackend, # ) - # PyPdfium with OCR + # PyPdfium with EasyOCR # ----------------- # pipeline_options = PipelineOptions() # pipeline_options.do_ocr=True @@ -95,7 +99,7 @@ def main(): # pdf_backend=PyPdfiumDocumentBackend, # ) - # Docling Parse without OCR + # Docling Parse without EasyOCR # ------------------------- pipeline_options = PipelineOptions() pipeline_options.do_ocr = False @@ -107,7 +111,7 @@ def main(): pdf_backend=DoclingParseDocumentBackend, ) - # Docling Parse with OCR + # Docling Parse with EasyOCR # ---------------------- # pipeline_options = PipelineOptions() # pipeline_options.do_ocr=True @@ -119,6 +123,32 @@ def main(): # pdf_backend=DoclingParseDocumentBackend, # ) + # Docling Parse with Tesseract + # ---------------------- + # pipeline_options = PipelineOptions() + # pipeline_options.do_ocr = True + # pipeline_options.do_table_structure = True + # pipeline_options.table_structure_options.do_cell_matching = True + # pipeline_options.ocr_options = TesseractOcrOptions() + + # doc_converter = DocumentConverter( + # pipeline_options=pipeline_options, + # pdf_backend=DoclingParseDocumentBackend, + # ) + + # Docling Parse with Tesseract CLI + # ---------------------- + # pipeline_options = PipelineOptions() + # pipeline_options.do_ocr = True + # pipeline_options.do_table_structure = True + # pipeline_options.table_structure_options.do_cell_matching = True + # pipeline_options.ocr_options = TesseractCliOcrOptions() + + # doc_converter = DocumentConverter( + # pipeline_options=pipeline_options, + # pdf_backend=DoclingParseDocumentBackend, + # ) + ########################################################################### # Define input files diff --git a/poetry.lock b/poetry.lock index 7733ecb7..27fac6b9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -5929,6 +5929,41 @@ files = [ doc = ["reno", "sphinx"] test = ["pytest", "tornado (>=4.5)", "typeguard"] +[[package]] +name = "tesserocr" +version = "2.7.1" +description = "A simple, Pillow-friendly, Python wrapper around tesseract-ocr API using Cython" +optional = true +python-versions = "*" +files = [ + {file = "tesserocr-2.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1b8c4828f970af7bcfca83a1fb228aa68a2587299387bc875d0dfad8b6baf8ed"}, + {file = "tesserocr-2.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3bb5d336ebf2cc47cd0d117cadc8b25b2e558f54fb9a2dedaa28a14cb5a6b437"}, + {file = "tesserocr-2.7.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:3ff7f6d6b5c12dd31b80842eb0892b661a41ca3edf0e6cc1e54ec2c14552ceef"}, + {file = "tesserocr-2.7.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:ae794c5434373f4afa4c7f8b59f19fde810f8caf096d8bb701a4b2f3a6739460"}, + {file = "tesserocr-2.7.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0a0895a4d9ff6a34f5a6f203fe0c9899f31d6f2378ae99be80605637b622687b"}, + {file = "tesserocr-2.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4c3187d14b95c866aa1d34cc374a53d583e2168742eefe33347e4790af70338e"}, + {file = "tesserocr-2.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ec52be3d82136430081427062ad0211a52fc38fa28fe58e216b89f840354f216"}, + {file = "tesserocr-2.7.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:44e71b3e8da36b2567760309398689ea9785ee62db3ff21140a9ea6941a233c4"}, + {file = "tesserocr-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:e31a49d7784e7e52fe656719145c3a872856d67daa9bfb340c2990db00e023e9"}, + {file = "tesserocr-2.7.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:37abde15c1c940d691305fd87836e4cad25a1434799729c324bbcd2277bcae44"}, + {file = "tesserocr-2.7.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:1b6349d35d333d420d24acf1953ad6f1d5613ffcde462c62126b68bdfca12753"}, + {file = "tesserocr-2.7.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:42f009cde8479f3b339da12a8e419fd9559b64b13bc08a248bd0833c6ae94331"}, + {file = "tesserocr-2.7.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:6e13204b3b92fac76ece6e33f55eba6335b30e379f4a7b75e285c2ad05762027"}, + {file = "tesserocr-2.7.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:65afdec0c5dc09a4a23a62e65524989cd940af41be1603e251a64ac10de9babf"}, + {file = "tesserocr-2.7.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4c5f59fb072c90bff8aa6a365fc82b747c2668b7b48233901728b155860d1ff9"}, + {file = "tesserocr-2.7.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:f62d662e3002868384e14e8cd620bdedf34ab9f9fc3ebbce527cfe032a7485ee"}, + {file = "tesserocr-2.7.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e80051812685bd521bc17cb70cf1480ffbb3e54ccc2883e90d5bcda15f8278ea"}, + {file = "tesserocr-2.7.1-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:2690cb2330fc9349d68ff027cbdac09693fdda36470836b196c04f16dcc99e9d"}, + {file = "tesserocr-2.7.1-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d01ebd094103451ecb77b6510ade2f6bb064c51413ff35b135f649f3d6067a67"}, + {file = "tesserocr-2.7.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:f8069ae6cd9ea3c056b6a596bc99f501ee9f95d6fd2928fcaffb9777071c210d"}, + {file = "tesserocr-2.7.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:b2d3d23223d0a448877fb91af83c46ce95ff0a497a82fa93e93068148c9712e5"}, + {file = "tesserocr-2.7.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ef8a09a44c2e96bab0f40dbf0633767d063680d86b79365b43fc4e1234219694"}, + {file = "tesserocr-2.7.1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:6e613213ea5b64db06f2cba0b93c3656b7e6aec2d9b2d2e929edf49da7143225"}, + {file = "tesserocr-2.7.1-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:4a8888b765e26680a6e34b8ec09b7bb85a17e08cea76f0661eafe2a84254562a"}, + {file = "tesserocr-2.7.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:64f25763e56c4c29b808e59b485c930cac46b6a1ac8eadd994086dc40a29d3a1"}, + {file = "tesserocr-2.7.1.tar.gz", hash = "sha256:3744c5c8bbabf18172849c7731be00dc2e5e44f8c556d37c850e788794ae0af4"}, +] + [[package]] name = "threadpoolctl" version = "3.5.0" @@ -6514,6 +6549,11 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, + {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, + {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, + {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, + {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, + {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -7121,7 +7161,10 @@ enabler = ["pytest-enabler (>=2.2)"] test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"] type = ["pytest-mypy"] +[extras] +tesserocr = ["tesserocr"] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "7c5fb235944009b74193d045f36c1be2a8e168393012bf952541e6e7dea08072" +content-hash = "a9bfb36209f3a9140b6923c51bae8c1e23af5be34e52d9622119a5683f125b2c" diff --git a/pyproject.toml b/pyproject.toml index f512a196..41d21cfa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ pydantic-settings = "^2.3.0" huggingface_hub = ">=0.23,<1" requests = "^2.32.3" easyocr = "^1.7" +tesserocr = { version = "^2.7.1", optional = true } docling-parse = "^1.4.1" certifi = ">=2024.7.4" rtree = "^1.3.0" @@ -81,6 +82,9 @@ langchain-huggingface = "^0.0.3" langchain-milvus = "^0.1.4" langchain-text-splitters = "^0.2.4" +[tool.poetry.extras] +tesserocr = ["tesserocr"] + [tool.poetry.scripts] docling = "docling.cli.main:app" diff --git a/tests/data_scanned/ocr_test.doctags.txt b/tests/data_scanned/ocr_test.doctags.txt new file mode 100644 index 00000000..7cd53510 --- /dev/null +++ b/tests/data_scanned/ocr_test.doctags.txt @@ -0,0 +1,3 @@ + +Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package + \ No newline at end of file diff --git a/tests/data_scanned/ocr_test.json b/tests/data_scanned/ocr_test.json new file mode 100644 index 00000000..bf0fb86d --- /dev/null +++ b/tests/data_scanned/ocr_test.json @@ -0,0 +1 @@ +{"_name": "", "type": "pdf-document", "description": {"logs": []}, "file-info": {"filename": "ocr_test_8.pdf", "document-hash": "73f23122e9edbdb0a115b448e03c8064a0ea8bdc21d02917ce220cf032454f31", "#-pages": 1, "page-hashes": [{"hash": "8c5c5b766c1bdb92242142ca37260089b02380f9c57729703350f646cdf4771e", "model": "default", "page": 1}]}, "main-text": [{"prov": [{"bbox": [69.0, 688.58837890625, 509.4446716308594, 767.422119140625], "page": 1, "span": [0, 94]}], "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package", "type": "paragraph", "name": "Text"}], "figures": [], "tables": [], "equations": [], "footnotes": [], "page-dimensions": [{"height": 841.9216918945312, "page": 1, "width": 595.201171875}], "page-footers": [], "page-headers": []} \ No newline at end of file diff --git a/tests/data_scanned/ocr_test.md b/tests/data_scanned/ocr_test.md new file mode 100644 index 00000000..42896546 --- /dev/null +++ b/tests/data_scanned/ocr_test.md @@ -0,0 +1 @@ +Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package \ No newline at end of file diff --git a/tests/data_scanned/ocr_test.pages.json b/tests/data_scanned/ocr_test.pages.json new file mode 100644 index 00000000..de3f5f5e --- /dev/null +++ b/tests/data_scanned/ocr_test.pages.json @@ -0,0 +1 @@ +[{"page_no": 0, "page_hash": "8c5c5b766c1bdb92242142ca37260089b02380f9c57729703350f646cdf4771e", "size": {"width": 595.201171875, "height": 841.9216918945312}, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 71.33333333333333, "t": 74.66666666666663, "r": 506.6666666666667, "b": 99.33333333333337, "coord_origin": "1"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.0, "t": 100.66666666666663, "r": 506.6666666666667, "b": 126.66666666666663, "coord_origin": "1"}}, {"id": 2, "text": "package", "bbox": {"l": 70.66666666666667, "t": 128.66666666666663, "r": 154.0, "b": 153.33333333333337, "coord_origin": "1"}}], "predictions": {"layout": {"clusters": [{"id": 0, "label": "Text", "bbox": {"l": 69.0, "t": 74.49958801269531, "r": 509.4446716308594, "b": 153.33333333333337, "coord_origin": "1"}, "confidence": 0.923837423324585, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 71.33333333333333, "t": 74.66666666666663, "r": 506.6666666666667, "b": 99.33333333333337, "coord_origin": "1"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.0, "t": 100.66666666666663, "r": 506.6666666666667, "b": 126.66666666666663, "coord_origin": "1"}}, {"id": 2, "text": "package", "bbox": {"l": 70.66666666666667, "t": 128.66666666666663, "r": 154.0, "b": 153.33333333333337, "coord_origin": "1"}}]}]}, "tablestructure": {"table_map": {}}, "figures_classification": null, "equations_prediction": null}, "assembled": {"elements": [{"label": "Text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "Text", "bbox": {"l": 69.0, "t": 74.49958801269531, "r": 509.4446716308594, "b": 153.33333333333337, "coord_origin": "1"}, "confidence": 0.923837423324585, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 71.33333333333333, "t": 74.66666666666663, "r": 506.6666666666667, "b": 99.33333333333337, "coord_origin": "1"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.0, "t": 100.66666666666663, "r": 506.6666666666667, "b": 126.66666666666663, "coord_origin": "1"}}, {"id": 2, "text": "package", "bbox": {"l": 70.66666666666667, "t": 128.66666666666663, "r": 154.0, "b": 153.33333333333337, "coord_origin": "1"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "body": [{"label": "Text", "id": 0, "page_no": 0, "cluster": {"id": 0, "label": "Text", "bbox": {"l": 69.0, "t": 74.49958801269531, "r": 509.4446716308594, "b": 153.33333333333337, "coord_origin": "1"}, "confidence": 0.923837423324585, "cells": [{"id": 0, "text": "Docling bundles PDF document conversion to", "bbox": {"l": 71.33333333333333, "t": 74.66666666666663, "r": 506.6666666666667, "b": 99.33333333333337, "coord_origin": "1"}}, {"id": 1, "text": "JSON and Markdown in an easy self contained", "bbox": {"l": 69.0, "t": 100.66666666666663, "r": 506.6666666666667, "b": 126.66666666666663, "coord_origin": "1"}}, {"id": 2, "text": "package", "bbox": {"l": 70.66666666666667, "t": 128.66666666666663, "r": 154.0, "b": 153.33333333333337, "coord_origin": "1"}}]}, "text": "Docling bundles PDF document conversion to JSON and Markdown in an easy self contained package"}], "headers": []}}] \ No newline at end of file diff --git a/tests/data_scanned/ocr_test.pdf b/tests/data_scanned/ocr_test.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b79f3c2824285bb0926637ad3523a37b1e2d50f0 GIT binary patch literal 93549 zcmeFYWmr_}_XcbnEL2oN0SS?AiJ?KHTe=zP7zCta02LMK25ISLB!*^2=|*y3DCzE+ zA>R#n&j0-CeLuWk50@7>dwAwq>t6S|*Lv38(7Y6vWO>HQeTxRZT6lPii;|tv*2wCX zpdcliys3@3qXi{5a7pDBB_$=Bq@}f^sXg%1+R)Kd+|=0C#PpW1@GS>NdsD;Lw_FoA z)z~k+&=;dLG&IUKw=}UZGfWLlXjg9`%zoK|6V3(a!f9n7$YA`yG;^6|2mcO1Zvf#A zQwM?GR^^p<_b%;~-`%}sYGd;IMavhv(630FrcRY$-k zNBuQKQ*XLJW{2dDrfC9&F_y5lpo|CYijKaki6Owx>#)*qhiRTxQ(4LWO@io*S|0~!q=T~?) z;f)WMX3OuI|0j3O9Ql(w9Dm03Y|{&e&&~Y*%N;&SzB4cWl{;sb{teV;XQcSwY4rbr zs?2MEd*}9QTABbk$;Ag^KYuB0YH4oaNXf(g40Qfl%+}i0Ue(Uf7~ty#5v1%5&&k3j z1F+8C&h{*K|KbeW3rk0bm!|e&wy*7MZO#bI`!5;2GUoP{Og)xjuzbt?OJ_TUXI@t3Z*iy8diJ~H`3T=@_2`#(Of2unV?XLA3KC;1$o zsSb%+lGbEg$`I;K>jk;GSxe3n6YFdX3H(mEOZ_fO(DLXc@=O1G{e!?i2>gSgS^KJ`0e#@>md*x><`BIzlko#eR z+la1udF`D9wQ4Y{C8B|B9jpaSl&UZPR_l58@o8rtf2v;%E4O;df!UIp@7kV-fK%|nrecj1Hm_JYyit6y;n?JqCb6?A7yu6RIP26UBU^Zh>8r+TcX zbLb2sW>9TUkZ91xuO)alGZ;2|}aK6?Pa-``jALYY{Ntiiksm66Vs+Kjr z=*tR>L>=k4&7kZdLb7WKTQpn)O~zR6OIr?6xX-hgEF|Hh#1VPp;j$+3;zz8Ad2)t~ z#8p|Z0ztqu!h1BnWHC!)7QdGq>Ohl=GpztT8u$;ZlrS=6BfSJ0-M#4kJQ__ zZd#C|6zs~3y!PDd3ZY_MQ+DDcT*v3zX)Bk!*_(jHsmov5h@dtNxx<)ox*W z_BM-S0N_a*NTRSi%+%4)A^lqsPT{7R5J95>n zvh8(C&Ruqlgt=M(7vt#hffy>$b0IT;JLxCOsc$g}xEA1R49^1CE57w_e3e%&?%s}B z7h&WRWOa8jgfI${n@@C=Q6Zb2J4wEt5P~i3J+~>eCw-H9^|qIrnHneg!J0uk=*!Qb z8Yb705s5wV2Hoo{^g=5YmXo?e^S0;NqB(GL=BhEhn$b^?SBW$kWX;$9h z9k_fAS;LvD?ecj0-2r};hfi5$(2R`u5q2o(;qnp*yixO%&T$!?d_mcEQX1O{W(GlW zqE`xy4FGMkKgjU-_~ngOB`jU?0poJ!4wrkmhwmnz=!L+Ci#W>36;y!kPOpSYOcXMLAmT9eTu!no8#>QiB`|AJzFBt`rg^ z)%<36Vqfnp@;iqP656MN~2W2A@j{IC5YiL{I z+~Gtav#75RXvcouKi_MaGgqoiI^ieT{jWuaC<=pJ)cn0t3$gl+`E=E z0Fgy$z6@XGCK( z?7c(wvP1nP#62$-B?w!`28X8;9- zn@bu`hp%4)7Q6l1Vhc%br~MB%^{)#)LI3dkOcx9VpDM(FAueZXM?#a8PZAV_yj_~< zW`CwcXCZa`Ba8q0FU|fyt2y#b?5Nc`A-dnWzsT>i%(QR6P1TW3bp(LbEIoGB!dj5J zKW?t|;lIKQ$kC!eBb{T`!{Oet{uIB@+F2|lLW5h7RYgFTqtj+|dw9MT5Lqc~&-(x7 z!NLtir?)>~6~qJa%Z+mmPr(e|GG6C!;@`Jjdz4)Lr=KC=hvMBaNMD^ksN2-PVq_<| z+n`c(b8NGbyHv=@gla0PsOTySa`5CrYYm8nz_g!q`mXT(+d6wmCpT;1@&?Gqvvo#P zQ;9*w&6RRq1;?$h*+KV6=reaj!hI_RT+@($HT5U4`cWb}i*TR|H1N*v^O{m)ust9; z&U8RQxU6h1pYDJ4eSu|4$G_2mKkeYKWdUllQ+fpNbj9ZQu2s*UNF{sLR!2#4b9)@@ zj4~~J7jMumlJQ9Xg@q6InSEHj?(-usr)23wHvvMRuL~eG; zVO~)4RZ67BOFY}ExXbLA(7NZtKM0koeAY6H6*DjAC&oso zF{9jk#xw)-C^JF#PQGR@oBhPHF?V16z0)Z^{A3m@yTr|{Y44i-!b5Vexu7_bQ$a!J zz{5YO@{%*x_mw*AOyQ`~hOg3C6 z70H6cuyX$+^v-o}XdOZq20z-Uq#H($kZ%kyQRF34w!T99#3XN3k_ne?J*efzBdc7Z zP3E7f(KTR9o%;hvyp>HN_YuCe!m&wE^J+Ypsmd3{V|OjNY-?YaRdDl{kO=xADp>ds zO=q5^S9n-Ck>Gvwu6n(q6C|*-Lu)O1X5|;EVsZ>lw68tKZ|us>EF?+nejNk%cky;x z_+mk`o$u1HtEyu){vsq{w!fz5?CMUE)@ zX*R1j!CQkt=4<8H1ust{`08DO)2eJA5dBKCH z^$$EIa}PaFVJzy2`ETjn3>@kH&{Y-Do_-TQhq|(}VR99+!N5?+jc460KVT#4T^sOft345gA>Td+$JMVt6dGR^7s2zIKxPa}%&e%S0s zW4NiN-_dfup9hPEIqoN~sZTW?Ifw7?o9`&(J2vdickxA|8cq$dcU;B~L+iV{;8ji4 z(|ff`qs%<>{_A;cnFcb{@FLsjYc>YeD^5A(842~=S!g91@PqE6DL?MR?9n5%r!QT) zIFZh@NK&h^C2YSCV=`vYV?N3Uo5`;#ypn3Nay6CnL4Z1G$jCD!QKm25BRd}@nKuP8 zuRK;q8`j5m&}$Ea!U?;R)NGY*PaLi>kErAgoq;EU%8qg}8GCq#UyBCzm;`*hILVHHO$?MNr3-Urm8JKrgf0@iox?Q89b!CUvB>-G=lDROohWiK?;m4u#rjAY88Mrt%oF2#CHE(PuP zC?6O4$#uXw5eHB2j0`|pSdr~TjooAsuI&;95+W${@tbErHcp6>V$HI-cVnKBmxH}n z)bK)x-})$VW8vuUdq%*_5({NnmEAgmq;R#N@caOrtjSdaX?Cy%96o(;qsG>y5JZAPS6x9mLK#9`Qr{ce<8Wq8O~awg)zHY8woA}|^Hb@3XM*6qr~ z_kd>RF#Gu(Vh6=#wikIwfux}#%d12pLSraS##Y!z5j`Ox5!(ch1$YMyt{0M-4GDTS zD^oT`!J%<9g&eGnVwmbvGS9C{xsp0#o5muKApMGV9d*XSUkM2sJ-;gq$jQO=G8ZL_ zFZ#>eC$s5{bgO9V$`o)?lBvfWqwvND+@*n8l>eHqa*AT+^Jb$&wEa`gtf%C81+TPx z4r^Yx{M^Jnoy=J^h7Y#G2!-?Kx?wm=0<+-w-G?)3Y^E(&D6>+{)w~0Q^e`feL`Ulj zA+Lk-Bev->4o?~CW%rcAgc+Y92jLr%>*jIuNpp|3v*uRU&C#)PrbB%CPrKTlom!Up z;W{^W^pT3>vv<4WWopaNVBtS~AzkVtQhjP8@V+C>&yi7J53d-t)g=e%=>4M(el{^Y zGB4Tt#|}w=+p9g-Xpvl1fZJwR_*fMpK>u1(zT znADRiN)=xXw7m*8ydK?OF}dZ)j38v#-_9pJI<<^Vff&7iyNnAZ7O0>u%Skb7)R8HBvi2DFbGqV;YNv zyUZU~9*U@rn}N3W)T&JjJ$g?vg;3XZtEaL{{I*@1OX4%En>M&=^Wu(lhUy3Uw)?^u z_Inm4y7a%MAEA|c%ksKqc`^zyFkRD}iabb~jCS1E%gf4Ja~9j-BJ1j}1#MtQIxXbm z6VJ(@jBT(^JZseu>2ex5aXJ?ySYYhEaZ+cogyFfb%3dtbMe^@&ocVfEB>lQ*tmL(w z(!LeE9VHYayYO|S<8XOLEIe1f1NQNehQl1HZZ|O;)=A|ra=~lZ2JygbPYXiV|6@|i zmT`w(p1GIfWYG!}*1E(Tr>z54%fAnUd2%>y$5?Fr$o^Q>t(WfRbetc%H)gIty&g9ylU`Z)veL;p)jUkHmnR=0(p6#^0aJ^P1;-!0LQ!aR z{yZW~QYyC{C!MG%B$<%cv{m)1o_;Tn{N`P?*iHiw5JwQmT~+7y;(#i|c(pt+7Ou&8 z@@O1VJ|nN%prA0ZL%_3>+JDFPtPCob`p>MppNTj*&B)ErHtF!mr7GH!~=>w-%p;G(0F}W9nZm0Z0*fo z2Oa5Y>q9_Zo_YX>n>|jQ8$Zi;P84Iu^c`vtZrgEIclbmUYCCCGgH<`pOef4lwlyNa zxGoj$?wRTC5NVNG?9T^V5VrjIB7M&?*F3jQ7n6~&?PJQ7_r|zyWv4~^8y_5I)iI=~B zyxb7kX_ym!b+A=&xYZWq$6ZvyP0>&L)ZGr2KroRRSOe($&T&hPZ7_%6v(X7Eo-Av3 zW1#+cQmOZ2gU#Wa6|aF+9vJ9Vw{zaoed`n!oc^T8Tb{Y5%E8K0x)|c%S%;HkCtCad zNaw3p#q(bg3`0e{h_a4Q`Eo`H@*|2Et?c}}Ra*&2`H?Dv*F}I58FB-&ChG}?bu_a~ z(S;rrFnCEid2{7b{H!?2aPsvr9UO)pl9iK{A51yPJ&q|;#N!zuS`0%h%kqm(@wmqT-o#WTAj+H8@C%H zoG0Jj+^2cy#;QMZMu0-XoYXGOIp8$@8u--l`g!7?K1vc;xPXTlMS_z+^x|P$bH7D+ zWo2it{LOltq_?8AM}W^>yx@g1WWEqEL@usAJk_gSafxV)92AX7+K#!pYy;&&5?O0? zCV_EBUD~Qaaw-cs25mrd65yY#-06?_VmkJz(N?i$80&fHwLbn4T;sO5SJF&3A?@T` z1v;j}R(^i6Oabk3+84&69wvLQ1h*>@BaMtw3TsL9reb2j)s^!z4SOs?b7ICQT~k;4 z>FT>y=yZ&pbIwzWTo#V)IUyqtyG3iTiyYieE)}vj0JVDoeg_xw8dB!krJ?!jHEL>ZFuGG@6md&&3 z6uTWmv6{HkT3An_nYf|59r_x%ZPDpBe2gh?qUo=B-PTRV)eoE+Y0!n!Vg(G0a)LH7 zM>-gCVbpi>v%)Yo8sWN+E{5cbI42S=*Ya-<@$J^YJ*T|1ygKhR6$(|2zeidiST0{x z`u^3x-HSy_B!LdoO0cG7t*vWjz2u4R+csPE|3v3WR8@1AKQ(Lq&#==!ClUR1qwgkhgv1_#`=rok5joB|`2gCoYpJtiH^s&; zaP%t!Z8V0Ln1E^OyN0J01t1IpEkA}Y6C=Is$H!N_PO8$@LCAT z8W6u8G));Kh>X1I-m300&2p!`E=j zDTb(ZoGS^yN*0;B^p)nN+95z0m;dBC3Jljc++AVsgfVK7V1aLDiYQOWAz!h>H}ffgVXrevfsa}>bwl`WS^>;lQY5pMzO~*X>!YjnO5px zEGC}YG^GXhkcWnVp~MG4>WxLCJrW%^8w4m0w$o))tsdS1j(I>^vo4U4*koC1mm9-j za`|V~CC*$53F8B$<(Qz(<0=hAhUkFHak3UxA?>NLUfqZh2zH1&8(1=xOo+~~88Dqg zSp`aP35 zna z{241aL~m|~C!7chI$_qCA`?CSd2P#Aa$zLVZ_HeosXWXF`q>Cw5umvmk;$YJ z6@i6H_-S>WA&Mr+y!Oi7JmPK08lz{D*lturqj73{Jk=KxLYSQO>j-^6 zY{~cdo8jiE={n+SI_)0s+xg&eoFDH~3GNf#zmLnPp{6Fj^MsL61xnO#mofVW(ax~M z${k#yZ!{f04GFHYcyo0(LDsuWmq1}ul;wW5Rh3Ds9 zOCM5P4oyZ6e%oF%o8s(WJ!p*zkYU2^|3u#V~7Qxb4^V zIp6u^TvYdTvj>jqZN)tIO>SJzX0%~G6(+^TwC&2Jw>KVDq#pPNFVvbc1nBgnlg%nA znG%4-OVswfyvFtWKFnnw$qPanEey47fsHJ4Z_Z5yo4>F$*| z-&w3xRxeh_pFh_4nH*I4m|XMMn`RW%sD9x{A`@bjs{3{t)=nbFVj7~_Vnm@16=9PT z;Y22Gu-faefhJG9mQTjyWxd*vSmC3fJ((%@C z>VlR`W@(~uv}RfO2uruS&F9v4&2aAQ>da|~?g!>B~n_jwf4+mdU&GBYen#oOyOyko@uRaOC z_6mkxfn)YKgg**@AcN=)BuZLC&YuRMVfw-RUVb*2!up%EO*+tGp;ySkXlEv}ovxCt zl@_$GOJj7cT)tIit6{SVl?r%&c-7^)!@8+({6-}vEw85uC7n=am+u{|E93L2^#RDN z&-0h2-R2MLPie;^Q%_gu-T03;nc9*j{G0?8#i5;wc-_9ObaNQAO%iI|TTWP9J2h_} z-?DE}lc2V|edhbL4oQDsM-wV=PYl)j!^9pODy;01=)K*CFfJjZmwupheOJrCIHJk= z6MdF?0VXa8VZ6$&aUb)t+&=R1rRN0lddfZ3%++$e1>?cg)~wY_9U7xSijUUaXux$s z{9CEsWFmbIMEaD%k|#8 zjVsvKm0Ht~ur*fHEMf*;FF5fUQ&yO57}kVZ5cphvI7k16BtGli3`ofBB)91FIu6pr zLG9M@N1xENnUc|viN==-FLI~fa7;m*-Cc#{g#jE-ri(tSq5JCeXgrr#ldobV!&4BN z?h#j&;-W-_IxV1`)*_{j?8IXaafb-k$v)C+p$V;7e~1$+T`zDgS?AED%~DS+(PO&c z{X&Y(JpI*6Z|T2Oq^1Utq9W3t;)(G^dB0@zB6Opj7@lC_-haJwnbGN|)DEdkm+(q6 ziXYRRvxCEg<*-~H!mD)?yjs>8VBIruIKM$X+5lpNCGKTavWC+JaUzHo*X+l%cRs-C z%x{rCtIVtq;6g}7V|m9@_jIoyDn8Q3t2Xcd$Wh-}P7`sX2JvmA4?pUcBMySauKGsb zr7by}`U4a+p~=nRUuMpsuG>ZDm*E3?sU^}iQ=01#4avBmyHNk?Z@Qx#gcihU4TcTH z#KJq)ImP1TbHq8^6N*pe)MlXRR9M1y6UoLS*K$V z5NAT%u2GW3I{>lH^&q_E#n=)u^Q51iF9)_eYlTPKFY!5pJP9vC3HY%0 z_fOOVf0ghl)-#+CFBU|EZKX?IPvRo=ac79wfTig2Aj!BZ6 zW$(>+%qmWztki2Hw#IV$A53q;Ph`tQ;!9$!CXC`=2NCzC2DD}Gr-#p?{mo~Yht`Y zz7!qIap}_M_kPQjDUOWz6N(Te6}O~mPL1A=H@w0!JgUETdr#y0F$V#9^EYc^pJq(sS_2d z;>I*&CZ%aLQlRm=wNT7W{4jOTC{V1Bdwfor6fK6n@(4MKx6b=|c75=7c;%s;8s~zB zz@VSXIz4Wf9l(x9;QV3v~t(7wWvmHg?zUzi}Pfv zy)TmA+#@=Yo!@5a{J-24Sk&7~qaSb#x3CS;726N1>5Xy`d3uwHp*zInfi;z)DG!1c zY!oQLra~v(%VG{Tc&WY6qTeYT#V%7M^Wey>o2<=EDl9?Wln(Z$5tTZwU+6(3!{FT= zdf-8dt^ypkEooJs(53GQZ>@-6@5HxW+|P;hxD}CMHB{XtD%Cn=?wl^-;ddp(hZeD^ zrRKqII0XT+*+5>ns1aqOlINqloA++QJf%Ifdv%Rj!NU2MObL6o`J+Hs?3%B}U0T!* z0`pNpm3IQESNv%8m9WHZvQod(<`mi31dV}idzM`?h;NbqE8nVuwA+_1rM{YSU%Sj7 z>&YO?9ZBu3llDnnx5%wLqJZAv{1v`KB=+M#m1 zY0~;S!|34Ej@m4Vif##?H&JFWF?hrw!+yFI7sMkSUL;`U0xz!Sh%HpJiMS`Uo^#?W zX@MPW9$`cVYhjBCZ=jMP(ub%fN9V$4l*&Yr^P{{Q z&e=l-l8p>PogsAr?~l`Ct_V#NB!uzJ(yW>k6Xt1tt27YaQ>9-+E5_OvPRGD zcKr4WnPSo}YR!33o_rB98?DS_T!_pQXA3`D>A70!#VJ3S1hRVciHXmlrS)e4pIVa7 z>G63uLj4jeFSsm|7!t5~N`N z{92G`kX-&VCJqp7iU=k7P43xu1r?0(o0kUyw_Dlv3#bz(e}`J{EYzj;>>0%458EN% zax$?|vcjPP;b%_osk2T1m8vGo`hwoieXHPq(OX+3bD>&xR8G!zaiHgYCyO@mhYpjj zV-jw?HZxDfFZ3y!aNMDUF`Cw)hf?l&Q9^;O1Y4@&Ciw9MsJ2@C`08KaH^mLjC<2#< z$Jt+w_^4A5)^iI}kQHSOnd9+qav{a;soWC&t=D-WjN-tRr((3=h6U>}u8nEeKY$Hn0*Cx9DXt`%0FxmaWT7 zRexTS(c|{OJx3(9%gczqT>G^D;#e>gJMu=DfZ7Pm>6i=g`W&AB$7IL7%%$(Nj4!oH z%0|ho$9+HeJs<%a8^7v}PZs*3wq>jKY|Wg5EPI^(ez70V(k$(~aPjiDbLUXk>?KE^ zdlpo2ZLR4$ZgTI@3y9|_n-D&lyt3dMHdeFkSinei&k(vQQyZ5b)O_E@d;=3#a2~ZO zK*b+c$nqI;f~NCiXQf0oDu}M_Ds(iWzt$lTJTR_J6UOMFc>GXE5DeAnBYBr zA~P5yli`Zpz7dfVtRKWZKZ6$EGL??j@eG zOigi18|snOe)Ur>PcAATGciJf#ilT%S)T8F^GpN=qvHZfJ7`B-FD6bV%;FHf@F1`e z6}T@(dR>*F?~n?P8{aWMvM{5+r-UVFUHf`1OMO;yc$EM8^M@U`&_)YcQKoAd#h`Tkrq=h?pRTA1TKxh95=wpb$oMU(ekL5E0*KxPdRZ(pgU2;orrOcuY%yPQFmM0)0vRqkxMdovAXnkHeR|aa*Do z&z$IpK{|Zpu9cmR`s+yeMU9$yS?==tbk(gdnYGdgB`Ma+I@z~QqH_97?!+6c&@1h& zdPEJJ4G_3&0t~MxtSw<0&J?t&E%O~>K-W)|_{xu6{X7_XH1XCViK}*LlX&Vn$mLtA3SLL*mouUJ_N#q6#0L+)Tsfa zU|3@dWceoFy6w9Ko(%$$K^I75^csaeSR6%dLs1og@ zwKVC*fu$BkNU~B7Ohe|s5z|bEe0B;El>bJ_O`289Q%gHrl+Kd_jXycd5$0+d-Ww!MpArrt~m^$&7lVPr) zAB^QGserh-fS&Otw|(TQh)VaQwK`K{RA^L|_CRpuG^CT~Cill+7}=5N_c-gI<~b|W zD?pN3XG%Ucqw;SH@=<^}$MIf2mM13K!*Pq%c*Nb#8zrM<9z-cqCy))Xx!_F1_9pjV zV5>8!Wd8br@s{LbEfi%6u2>mpmE_%UI%QO;Tq#5GY$1UJ80}lNcN?y7vvU z-yMNLa}^gobtNdZVjxNa5NFIA~aIk8@_XElZ4zEEc^kmBc|0H zHw30l>}oIkJ3Bd;R|0aZ>!#Ch#%LNpxFr%4zj1ddE?$F>)D*Tp;|}edQoeTSk}~@n z33cB{H|pdW^Y9tMV;%H2nuLz5uN0I#8`6$8&8SiQFD9}NEmq?AM{VvJRTsPOhT7Jq@Z zQ4UQ=g2si&ge&dlEw)5cR(y$j&WM#qJ`mv}2YY;$(Rdx}-OAvsaDR+2JR6XWn3^N| zD0XGQ(59lC1PaG<9Mhz#Yv;KA3Sd8DV4=z7d%+3tZ@q%MHEu2-!(l%~Z6jL;JW7Y(mur-^vYV&}5Jm93|GZY1os`L0 z@=70;B^{1E7|`p8{w9<>C8N^%;#l?wFf?eiKlc;{{=u+Ul|N{;3%dDHNMJOm)4S{( z+9BUNWb+h&k_9!QlI&7vc3m4o4zr_+C(;ZBXaj=7Kf*j*A$2q0=s!F^vu17%>8y<@^0pBro;Cio_IaXs`8OhGknLcTqm$Vd+iKe zA&EAge?yn#dDbip!-W@09Uk)3jkqED<%l$m7Z89w#`q0E`g9J^D zkC~slj=FVZEh1uhlRMyC{GS{!3XOy1fxHkl&W4oVd zPnF$p#twg-BAGk{qmn=^1GY7T!b6Q{Z)JlWgv3bPX+X&m`@_P_KBj+5_q3`b9yr2f zgm-txA;ZL3n^BQX#>Q|+QGflm>DF-FG(_WCQbNe@YJXecaEfiO4<`mUoPftDd?z9@ za^tS7I9~xHsLf!abr%!Ycz#&9cT70o=?FrohSIY~)0Q5EU87am` zrJ~{Xk3D_Amb?3yoW-(ZZk_6!Y9A03vhjq$=qqjQQqK9=YI;HQPF_!P^nzgwLCsB8 zD5D(T`S!opl=GI5IC`(KNFKe4Gx7H13yY$2{If~2$cMwFxkL0Go@e|a3OO4Bz1Y!I zOI$QzKKRC4$z^8(9NBq_Y1AfFe@ft9QJbRMpN80oR3UYgh-l)yi4;(;!Y-@}Z*M3( zmzcCXBn)+~Ft2&z%M_68g=*t*8A_*FnQ6ahXCDa7*h3WnE!3zc`X91g%4T4MkL=FE zutmIL(-s9}`$doZe?-(v0-njwP*k-eVjcn zA`?U&pQ|78Ula9aUo0B<=||Ey7uieFGjsJp`q z5aw_^)&!>WFq^mB{gPEeR#=2VTYy$^;tFnAE<%n+igD-c$Zvq`Wh2`pT#+e zg<=Y_Mt$&EE{EjMW!a`Wf)tRX@lZE%r5Jl&i;4L_t1KQ#;Tsp?B zOZbiuwoljnjDrzMRQ2g`Ff1piInxSd{NH(5w%14i+I@7e9}z1OfPyN>xnAGJH~J54 zSz^ka9p(A#g-;VN{y^a;Y*D)G!&59la;U6uFRVeypaC_C*Z72sEzuDr`2g3=xgtzZ zlg@yU8W2DHR#uGGd0PfwA1WmS3=G3Qon3BPsbREUtRw}iJ`TAvP}ut4Tb=yXfHHY2 zlIOZQvDlKMpE9o3)$aq{`@bn4VBPga<{Dr2Vfk>z@8&1Y=5qH&$r2<^+bu) zz!`CK=;dnM|4=_P;+1A~ZcXIza(xH5JSzDnpv#Q6IQ~OwHVM_O@c3`y-OtpOQ&~(V zO3y8(xq*O{CaBrRJJTSxs1b8r)(ZRaF?I1OORqNxwpK!|v$k{D)qx&|C@g58_76@1 zFjHYedO_ZBu5GBzty}*Atcsq}Gvdo%svu?+rrBie=O%6UpJrB{GVggS=8rd`__Y&0VfxkDK zdO9;axt(qbzHtU%P(qo0%?1!#tBmr z*3wCfk+<=`sD1zjVmpH2<-^}mvFqa4{e-#c=FIz{^!2-Q@J17dO%l%<#E0PJ2HK?b z;rtc_baP7F2k1D5`81?$x+n|{*FHbISOe1{(yK~qXHCo59EHI{H2kLg)_akXQ)kZY z@-l-)>S`aW-dGKFQqzLaKYNcORhDdeBu_(3>M3NgjjQmlM8B z&orkYOVdSu=-vNqni@Xs@oDjC8R>S`{d3cndcXXKX*>Q_`kefiZQ(lMj=VCwy5+2U z_q{ehI0)sK1ls_-1H?MvDBR#yup7AcKI4ldGLUIWIZ+lBDkW>7KQtYKt~4>v?;pPB zunAYTo;qEEw|yRYOf{1$#zz7M?t0(pCx3f>01wG{!&3Bb@CArIaJDt^j{T)O#};+E z$4E0wrk1>QF+e!$Y%b;>FVZ#DcGSAwTx$zgq`nF6FMvD*ohqkt?QGtuwKZ2bO;VY{`3T2EzxlLdUSqb_HFEq=D}k;kH!USNUUehEck)wsd0Rx?CT7u8Llqvb9K1 z4@ki!&o*Yf{u~b!6H_m?m{e39od%Q{pKO$wKLO?gm$QEloX(Do7o!?+R|$CTM=^b7 zrLky*N?$Y6r!opt`$XmD_}l}z@AZAsQte)|J^1V@9{DNk8wRa1SEd2`XnI;}h{oJU zC3qaj&^ZcY0}-h4eRK$9V0rMt+RDb248>o<4VhLniZsyxW2lNNr z8B?YpIg-|E?`NkRl}cKdmixofF(k(|M!QZVq^X49zU4uelALNQxPSiT<7>x<1|7-W zDlB_0Wm@J*8fn>jz!X2Y`96v~6vzdNh_hU%+~iKXs`LqtyRADk^Ws*ZP{w z;DE8c{OHc9(3evLI`0NgD_Q?aY3?Y!t|y)HCj75a`n@s2fqT9VK~h`$(r6DjqprSB zr)MDVuyK0$6B6|f!xDc~7cxx2G0Qfm)gKnrDXZq<&m1KRcoNYv9t9o$?irM>o1rX~ zIt9s!xZ@^gELK4ySThB2A3JymavC7yFQAgEf-0UO*tX4gnIg%C?dzS^{$D*A8J-av zVA?W19}o8CUz1@qvrnM2S>;N85k4Z?shHOmR6OAFfDnx9_-8!A^p(ff?aH?K6SXus zI~sPGb7Z#`9qio~xLAm?@QJ(4R;lF0Gi6WJ?*Pl>;9as@inW-9PzAx~81hJYkzyCN znDoOB=@Fjl2Csy$FDz>|;n!O~?P$PK8!WBOV3;_wa$1pI=|l~(*16-vlj9q6-AfSp zBHRUD@KUK?gwH1T*d{L;?NbaK6pXiuxC&`_5N3|QTE96PDiZZYbedlNa&{LllPeF< zD*AlQJ<3&wj#KglU0EZ6>|smwO69TxMF7FS2F_1%nHYb}CT!=M-#y5FKT-RW_tOyC>q)mE59dtYrIRlaVmZ#m3~GG)y37y`tjX+Tn|Ds+Sj0SP z=&qHsj=F7R1|lLGqw`H{Z1AFX&G|D*5Lq)0^aynGhL4JX;KMw*>S5R4B-~wcT4|kw zN`6^z1BI=4%mNQ^BBE;pYbR8w%z$|=$UASoHfE-PMK|B_j!gE+Dvgdr2XMC~%xT_m zm;Z!%f@2%V3g=~8nI4iK%v*MiU3%~3Vv(A9Fr5uI>mT489vMp16hyRXdcg|sYyuk4 z+T2?a{REBPp~v0>)XTMIGS=6*1qyO` z+EgZF)SxUwDK^U&XH9zvcsN*g(HwYHmf>-2*z5aD6GLp)_Rp%+|;xrD+5Co*i+JBLgzOEHk4+yG+|%h%2TK7=!l zWtUAG>>z(E#KvC}s!*<5Zqf(IUlU!Mb)I`LT2K2jIHCww8N#yE*Di|RKb>Zb-Hx7PI{D}Cl7y-yGt#)XFBDz|*sEQwL6RupK`|==pPXu)U>gNPTyE z-GT8NF?QdaDEkB?j!&z5rvT5~&Yl55OxjG)TggXVu4}lkngnB-LO#rklbbvk;yO2jg)Lkl?$QSN{mdvD5&N+q z=z|A#vAG%>P>?2Rzo;SFf~~ff$Sbj^(CBz#ZpaZhFh9Mx0|HT*1`$+aeT&l2(JRl7NPPu(cxYy$Ym6%&l;#KA9fl(j*K~ zqB@qqpIFsBso8rU-ADn})r#fHeqT53#qgs{jY`#yO5n2~6MP{PiE(9q zv7A@?Pe7U!a6OL72Vh(taylx++5N=&8Dn!jIY(&cukWgD;~)7d4f_phX}v!krX>2b zx`&$Fh*-4Y8HmK0qO1mRHS3*4(S|*8k#}6L2_uMJBDt~Z9UKXo`2wc zIrD|pk#e!cc*W5HP#SL}!?-`UG^t@Qv>A}>hq zmYhy`#!k*CDurq4asR5fRizULLzY(RW^6{w9XAV4R@nn0xr}mt=X{<-(@c4W>M4J? z4+S*x(Of>8h~6UC+y9_0-QR91)dPRo=Zn{9iJ1>_tdf2MaAvGeRsG&XO${LIs0fbj91 z8NKmA^W^loi;j!tN&_%d-N{6D^~KGK7M9cWp|8Dm_>aJeZEPvS%ix`+>j`ijQ_6r} zj;uk0)fW3q;9~AE3`~To|Fk$RQJ&P@W;rgY6njRzDcZlx^y6eSM)qL~D*X?<7I38R z_kQs~2x)Bfn-36H)1jqxjZ^L^+^wgIquz8fT-BaNl&7S5Z)L@QD{;Xa-ZSM=AkN@A zviwovAA}Rsc0gg5x7;AeXu6P{GtHJvVSoQ^xO z2<;5((FtiKL+HY0Z3oL`;vs$FJjpLtR3!8z27FGN`ZTZLV@!CH!$GaKU*Vp4kPuMa zIu;AVrvL|82rG;K1~B+_w)a)VY@Q@E0+@^$pHo$Qt)fk##dXffzvOP7`QC~LXZnI6 zrMdvU0Q3g_hUhoFp8O zY>pP`OoeK`9yl+!XYTgn0b7eXe}9%v-40mPQoHzWC)XgOcWNd8!ln2Prv4-C=SB2D zR>=wgUAeqhrev5E?ZQYL{{^forJ&R9mK+`<9Ed`;>oTX24+DS{)H7@kMnF*C{c56_ zurV-bUcz&+*a@pMXRAe&Mcq=RFQRd+-CGf!@|SHQ^_vz#rLqxKF6y+%nt`k6I+m&{ zVa?|mS#5(*?bs))Z%%w(*tUA^{df%o&mgD8O3mJ)TMb&)&~*mmJV^D2@hsGdxqLgx zl3G9c7vZ}NUG+OPyLs%HL6O}NC%_Rhm8SypL;P9A>toGR0v5oj^;+o}2zotp-s6u= zR8$!yUr~}9?Ecu~rs6(>NLKXr#zDF9O>05WJhcn9G`~@30Gd{_4e|ANge@Auws#V^ z#E-tq{uk(u9T8)t-aDNQf0(k<>Or*ewZ0=7i3oMErf0NFIaqcl~Cj3`N=e< zp$Nyb^j3*~HcUZ5o#!J9zq~#%c=BfWpeNP)nzhuYBvzL^+QZ4-2f?e=j5xHI#?HA~alf)y8zgUfsjnR}IU_62;w#c)9F7WBD6N(P%VAgcEAHHy znb%0qIb6e?)mO3D&PhQWM%T%oPA1HbSd2*~r@N-x6qUAh66NflB(@5jeH(`jr7YR2 z*MjUOPP$1hmS*xwD_Ex!6eg9C;1;ookC%w5gi$7u+t_97H7&b-4rX* zu-TRPwM5h@==^Y(AU`ldnCAyuku0!BO7x0YN99O(zBu#ymqWL(~{H4*iCxm zX(xXg&~`>iAu8n}ux7DSEvm*eFd`M-ls{s`(wwTTNE-L)B;mYd*YSH-GUY|uMRl~s ztms4}yUO$x=^4$)fmRQAeM&*bwPk|AIA1UGj;$_S;jX^>h?EJ%xO#<)^CY2J zbm61va5fDiD6R6^Ku&w-mU9gIqwRgkLqm!N8NgjnW$B6Ven+M3MTe`0DJ2Oez|)#S z&nXV@>b@HFmB-pKV_mQwJ=;JI)PKHArE5X0u1uGC`WtUSH|#FvV3l|mma(lt zW7oR@qN|P@s#3{5c?D11mbfFY-X=2(j9w?B2WT%Ru%P8#sqQb9)!&2$$~-vdcOS1G z?inE7WIfY(j6^Nd0*7cd3%26Ffaz6S1?R)wv+LG=dP(D@xBgb;Rk=~ zfmB)Rvt?OGub!(KIb+pK>7fQ;n3C=m5%x}=8)>$>+WM1T1MI8VKHj9uen~*$UI?N6 ziiW5U1#K6x;Dc8MI~f7q|nf1oS&u363=!3cRi>5NYd#yc{8KyqC zXFSHfKh@sKD+d^~hCdC4W0FD%02KI8^N^j#9+z0r5)iA$ew!Lo>auSzoMg+-)EGCL z{|nvDeKK8{>t^N7jbFHj5vq{q6Tb^2NW_deEx%{=c{vIx6^ALgv8>$x=8>4EPNTg80bcV{3tJ$Lc3VGv*^0QS|W!6Q!`BBR7NH$;(2qZuPa}9PvW}L z{zQwO!ZrM@Ax=e&wW<11#h$iYBjHBZUR}!-HK5ndjrZu+PZsNUdW` zC24LYYsLtiS!^)D{z4(0c{)S2wBczG91~b7JpNU8#ck)l$0b^{Bn|2P@!jye2@+@G zEzu4X*)*%(!OedAg-$}!GNtg%cjNmT-$*p^`9;JZWvbdm+53=#XC5KR zH+CJv%=NeZ7>n5v@Q$ZRG)&+I5QLq;9bavGmFU)T}{e{|;fjXFj=R zer2yA0WRzLY^<59M$FRd`hFLs!U9c;GH7>MUE>u!7eLv9Mqp(#S&SbQ)bI*SfA(z( zR>mf*4SR3ItXtMsR@FQ6LyyfEz6*ZX+mUqwibM0up?QQv>eNG3M19*4x&d$!5|=`7 zT*JC!Ez1|+n8fpK*Hq(Ug|H7X?;e?@g^THw3jP9AjC>1uKI$CtKWHls18DP`;uA}c z>3I#V*r&_7t1rp04cC>cBoV|^5t3Aw29RrIPV{UuD_1`AQF&Qv`jnoHpCe@Gi?poC zyr)|hcrTAr~%xo+}XcGiXNyia3oBw^hqK4&5m6vp13rak+Js=Q1zBWfvEpi%1j6Y!K>r}T!=eg-FVzyB@d^H6I31nOCN(zSbUGV@CaPAG^x%A9@ zI6hs)vCr7L>J6%7;1!-4gBy{^cxG0?jEHnKbYSmtX2OiFTJyR>PCzGU{`bEmC;ZE@ zA^{HvlqFEQ%vW(VI6{fY&(c|KK8=?T;2Yt$Jb6w0vJ8#Ys z5CX!Z_pW0vz!voA%A{zy&=5HLHV6|SYkxX-?gZ|&Z%~^Vv}2)Abv;X;UMM&SiUC#@ zZ0Trz)89DQUliIefY@76GysLwoT+>lt7hrGGLOObH~@_d0F6hjplvp!a>AA|eYW@W z^kmIup01a(a%K>=#lK+)V;H`I#WG^?BeOR=N6)Q2X!gRqP^6Vo51Ckqi3z)_9J)x$ zU(oIVXM|<;ezX1(TX)ngbLMP`D)dD3BxikSM`Ch`?ia zYm~-o#xb`&-9yLOi)rCpH{drwmRE~t73G} zJbV+M87r{&ab`tbmzeJITEAwi-IpN-bGzb4;axU99zw3l=)m_yG}1meyk>4P<*~@1 z*%9gtfmhEx{IgG6^r<#1+8pz*Z`aB@4e)N~2q+Gn-`UwHVbr5>STTf?6C2UrZ`W_# zgxiNBeT#;5xK@cD3d4Tol@^oW>sPU%dUzF6JUh5+Q!matFGykiRqm=Ex*f+6Cu{m_ z9pq?+oS9UAdcK$j$7x$3n-!1fW8rn?G{tbk^_2YuVdx!h3dC0&U7T*H?4PeM2K50KXw7I;g z>KK1AK|R2S_iu=*M4D@$Wb$T|%_FrY&+y7e{~4$*dY5jvbNg8uwwictht7ZBqMIDQ zI;L_%{@sE(IgK^-zaPA`E3tN7_nP_ci2Ff)mBJPK#=nsWb1??Nqh!w;d9XPAq5$N7 z=M~`ijDXx-x#5#_{LLidbZeS_KY3~3Xq0YPe}G*T90KnRt@HUefYIqlNI+gtqRw~S zM5=hBSE>BJq~xU-Qi^TB6R#b3uU`^a{+D6-0v3?MCPue$N|Q37p`LR5e@~-IJ2JTS zVe+CV(p6WA_qZe*bpPK_n4)tS;}53`DUlsRy1He^`=aE3Ll|=9<$?j>BP2O-_EMA) zLjv6&fuf?nAN-FPArS)g#7rM1PliKZfW2GW$bjAdpHCR)wM8%Hn3=BVG~-ax`;cl~ zOlxgZj(@{1(}U+}jOZUeIcmKlca~)h9@30j(ky@h=-EFXS%0hakMRrY({NO827_XJ znT2JzPTl*Z2<=&q-6`@*L0?teePFetIJmg`WIX;7a^{LdU8L0j1*{7_cG z?qvV{$FhSTHUo$WcuPIt>;HbZCTyhQc0uQ}3sz$PzI$26u!;YoMso$3UPXrc@7qU} zj+K^#bTyiXrS!IyPyc;Qbw6ghF2u?K)`$q2x8nKt^-I-4FQi18n+7 zC&~EhmpKW{jfiA-s6L*v&VS~N|8YU9*}XiE0T;Gqa}dpOLE=pNpTUU26L>)X%xHtj z3hmKknd?w88Q&!A@Ux7w(Dm6}{pJf6Rd~1H%4uA4^1Wo*`PP#K& zLednzc5bRnaDAbv76GsL%Tl)M7K^Z5Z$AsFw2*D$ZGGwYX_d9XcBSWDefzGXZ6zkg zgX#cszto4SZJQ`7k%l49Ub4|yGc_)~bSs8M{2q!f8;aW)k1~$s-sb1HjV?hcwHp~b zG|s3dJ$y58tm~T5Tycr1N;Pjj=h1bQc~|B&UBgN;Bh9l4 zUEHD*oeb8Q(gK1CraOYSC5;V6e@+{iO~Gzpa2FZ1&$T=3!`!l!rq4g7lqa0e?DOK0YhZ*u1FE{5-KLOI5X&H+aS7}`Zww5BOdzv1uIIV8 zV7P*Nv(nymw(0vww&ms5qk`n7XWflbpZOZ4AicMN1?yA9}ub`V^kp7xpGi1J&9zv7d^-4nU?`7_yI zcyAGXipT#o+hF+R;P?GSnOC(9h)I4!cyV;hTBUvtNG{5#-*JNM^}}CE2b3u)Vu252 zZ3jy3W{JOq;5;5c!&@f*%=QUbNLg6I>dVOl5q6m$QPSNT7%j3KNM3mOaVIozv?i6* zMWvhy!g-ftFjouT$hK(E!i(saPH+HuPjWMKeYsa9UhGr%*UYzDP%j4_BUk!EYLzhF zy#!EJcO$Bu2>@+-8z)l!N83PXgyXq;(#h~xy+oxbvA*3v^)%E}V{+rc+NWp`8>H|{ zKZ(|1_Br`T)<&M53V&&@ax`R+_f)CgKszdHmjHHTA_3?+(SmuOe=$9nnTh}CoBBO} z_~-Ot_w*UA2-CtKkL)Es%5@(a!Gj&h4 zmD<%KGo6%2PdgepwVCMiDV>(M!CcVhtYsBQm9-AkU)Osl+B`;DJ7E>`UKtgVNF))B z5X5dK6xJB!HpG?p`E){>OR6k%wg1P;9)r&VQps?uE`z=^>V~Osy03nBeT4Bc*a1Kb z`=p8%jyZ0UzTe|9RDrQ}{p9x^nRJF#eZ{ZEJyWkpn%G0H$jvm{C&7>!Fc7JcZJ&m< z81^q22EzqJng}6Pg;ReuE}u7|-#sZTuvQA=o%pvC(s;$EIU_l~iT9I^&$+)e!11_x zC1n-WcCVQ>y6-k z>dIj4Bg@}nd;|_k897NHShoNdzqF43``Rv|t@wKDhmGD$7^h6HA(mRtxhPnW&e#mV ziG#QMyp@VT4+ZIeUQ7F>>U)4aqzfz{d64iia>NYg{xYnxh1a?8b%*e|IC{Z;vRAcW zingm1b|v##ff`_LEgB&rv017iBp(za@{ADh5#P3FQW*L{5D-xDBRMbM+!4PH;KDfFt9u4%y)Ie=S!h-bYuQ8%s82*YiF}W zk*QK9s#QR$1oQU1`9K*N*naz={w9@c-e1#jxNvBl(IC$``i#29b5_9k9z}<aQ|! z_^(NSd1<#`9w#z9_uDD2qY+%XufJ7Hblc_WoN{U-PO(V5iaM8A20UvfVxrw@({lX{ zqeY8<5*pb`o4zpM{$(@*NKJ*T6)dILLg_f7zczyc3AiXrdQS zK%~aFg#5TSG5o{bAX^_v33AWk&FBpaY;O2mT~)^nA@~QeteI6|h~e2}OrKm4(G?PK z!NRdY;iF%4Jq6n<Y07!+FB`WmAl;k6E2pi=eYwtoM7(dGXyq#>eJx7B@ds+G;i+|ARN+W0RImf-3 z?#Var@AK|F3*r`*Rg)zFU;!o~$cl4cDx)IHc@5*gJ`m%R#f^FM4eT-D1`=z!dsN66 z+&}8IbTVN|_Z}1WS}UJ6Ntqk34GeZ=#95ps{h-80af%)v{czOci>duzaFBb#Y{B?wvd0Wg- zO(}iSOZnhGrhtJF#`Y4j#lzbj&^CFG=D$f+VHMlA09QV~3G zA~@(0x|}eugu@oMIkHTJG=A8}asy+YkP6BBrPmID=KXSS-ux8fkLiZMaj4hW2Z~HN znckB@%QwgbqMGjBuVc0DiRN!%#11KC{i$#3$Ub6$`RD2OJ2cn!W%{3;i>s~YkvU}l zOQc(A#@hb>>*0E`gVQj*A)(CMCRxH;v@c*~`&e68+62|~W9t_i$=*S5*n|(!8_rj7 zJb$=UzdXd!P~pfczju_$+EarayeT$+&vjh3pj&v>~7NsW3i0 z$!kNog*lvAoA)u&^Eb#w67i&&CRZXhGCqF&VQYo%``(J0>c=w06FRN`&;^)>n%%MTYxohmfReyL;fNrHly?XL7sB8n9NM}+@}>7q#A@5;i( zWdD_C#_U}eHSMK+Gfec7BRxR~ka|hHLz<}bNX>3UchKb%hE|bvv1}3w5?#_FcP!uP&|M~Gosb>}cvI(Rg;#BVKe(293J!84 zZU^$>!_>U230Id|kqs$=_NGOYuij}=JSfc%M8Wy$^6q%`47VXp9n`v4m%mO82mn0_ zCSdmFG+dQr72n=JZ{B@pdfM^?14A7U&{>$nt%Cx$WBSpj+D%uP>^bw4XNdd4^4*Tv z?)1cffUa$g97aEWZatZ3NEPNPKF4#n0M{SXKGYz6A-0o z+I&2x4c+iOOnO7`%EMXGMiFWokAZ z27C~tTlV4&?lU9R+%c@ue2n?z$z*86{A}CQkNsUDocs?J6{oLq9=dFDLshKTN=3sZ zIb|j5$yjkCNfkqKeeZTxDF*|+ZKBN}mKbu*nbi!IQ!&F!2maYV4<`q{zz^lwgW-gT z93y^cLFIQ<$QHD{&#H}wJEN^p3t0jxD0YVTGVr57Jllg6Q-&UgHNwIiQ_`#+rDT-` zDGL^}=QyQiDX`x%HyptditUYdWfX9Lunp=d9-&>%R+bF=%r&k|p`8ct#(DFyqAXE# z=X;n$q1#eYTg0r>u_v6Rju^sS`@a7f>f+g{T#*_Dm2okNnBg@kimS&^#|{R;>61i& zS{Cu*$}QQ>KKyv4yOgCf@qP^VjryZ4XeWqkd(eXgE6B>>EMqVcVp>^}e0YXI+!54) zx3%AEhuO#{^D0URM2|eeJ>PiRW-M?9n(?5n4C7j=FkJVqs3=~-9E|>_cH4Ao`GwC%Go7oe7rOoBDde%O)}ujBfI_{|Z$85b zuqg~cE~#*HIUQMbkyFAQ4k6C5-OkWOxYR<;;Etw+s>^2L=bbx5AwsVpIByIlU+tGg zf`Z_-sr)V)S%q-yIAK2{&j=edBG;K~yxk+NA!}x>D~U~-0G!xeUpQ@AujOV=o_?}r zH6rXakzi84gMtZ_36SQ_1S@Hd}raVZ~vQC?duQ81k>AJbSXu_>f%>a1s z(bSXh;)b8bLw9%&^K<71psOBFQ~V8RsDp9v&uSZO)k!UmbYFy*`OP&xpCK^Bz<8ZS z3SJOhJ1s9^NR}utBiHEkA{T5kv;n32kjKv2In&i@ER_wfR2CxaA0oAfS zE^Nru&o=1@2a3-S-+D#dU@~^*OPiu$ zpkIfA&VVZJexSz0`Xe&R#TlyDv29U!?Y06-abCT2yaln{;bwlEJ@t9iV(Vi65%hih zPc2#73cT27|P^_m4It;xaWFCvCk%k8Y(*A0U*BK~T0vy3Jgqsm5GMrEER*tu1* zm7iq!+7%9j1RK$a;g78_$Iw$j+0!>lbYDweHNAxNHdTL|H^y!DHo9PJ!@&3^&x5S^ z?9MhPC&88=$-*4BlPWjBUA|c_jNd(Pj=eshny>h#aMl8{<*}VcrBZ7jj!$i;TqaO> zb{@`*cyaZ{p2e6lYxwM8VoYc86L3ZREkH1k4A;w*Amzf;SN7(UkhN65EO_FG^;yzXJNc`Vaj< z(X^b@lj~QmNoO%f_lw^}r%{%v4~X!LC=>}xy7;K1YM`#n&U!7b*slZ>twNIm$dji| zCQw@!8%!vV&t9J;9SH7sk5S=y0L>bzKb9Kf4tX2h{O5CfMf^h-5E4Z-H1;B})v(nr z{XVJtA_mT-0HDJ+ap*`>IGKk8tkWxYIiJy_#N1C6u^*+hv7JTns6}7W%p(^;(vRBX zC{_2yeNMvKhMEdk#FaUXcHl}xUZ_0*9BQ@`;42azRvjK>G-fpcc}7abZi|JI%}UtA zl4S$i64m-8I70YkL_S-5j|k8TRE!j8ESD#$=Rdv2wMpGVs2U7oH(9%<*iuom&7JS3 zpSLwL$~rR!jpy|v`%s5SVR-g|KEM<)G9?~+8x*Cv`Jz%lRoclxb-~=6VwMT}cl}5P5(b6Tr^}q$f1GfF36m>ISIQ6qar$D%eWVJ?s@LE)jYa z79g=WT|Fy4r_}hu8;uYG9Fm57iRLl_wwZ< zTi&+^Di?Pu@stssf@i72Vf#^H^4I&_(xlNA`#I~Hg=?E&P|W)^VW8n2wpp3ixNVln zt4+UcY+jC9JURe)N*w8tRQ*+Z3Z$zKwDIs&E^i^i&I*7z`tu>SMPu`5Li!>TZ=3D- z*h25c=6*Y%>6Ed^=3AS!q-dTI*QIV9TYo+p`u(;=L9OI|Kwfzt78o+#@)NpNW8E$K za5ZiBJu=B!HlcgClEd;*;yHf()PQwpSRQ$D&&kBw5R^stR*ol&zERCr5bmSWIo2CSyW37iKk&;nQC%);?2kxRohh#cfO0gRLUlPa}@rtF4NBj-lBI z_ku@`cP!s~GC2?s3=es>3m5-3P8ql)BNEOD1Vf+=?PhWigYglk$XkY`-|n9YXk0@k zwcvtkNVoc-RE`B@?72A>PsHKH(@Tq#MvX8JXk|I6p%L+o+fTBm7qxf+qT80z?_ODH zO$3IJVz6A^DmhM59k-0Ke1H?d<8Mnm z5#XA)8;@|!P-UI;^G1dp&bN}C2}#j~#b1UV+ts*pYAdf?S?xSIBKS>qY^lrbHY99& z%DAz@!3&BT{ek<;un)_{2Ruh!^z6*X8|C0BxtEtcD(Kp4@vwg?RM`Ns<<-bLYD-$c zmPT?a*#s<=BuRk&IP3{g%x*G$a^AVKff=#wSit~ zoe->3&vLAgo76)Z2!};Li0eF8Hd5ku7YYT+JkMN)!y;Q0svdhTtMLQ8kSu?knrddR z&by$yWtS|U`W6s=DnGxjr@o?R^_;dBaJWk{-I|%@sM~0($_~_4If}Abos;YhR|vL5 z4$fDY*?ymsp_r(U<1r1qF;AmYlBYiNKuGDWC%KPGPFQ!wfbLEA_Ttn>TC!Xfg1yUl zPmU{Ux{n@fy*dA+#z6tuFQ$%l6nl+-ARxbXpCdzTx$V~1({&2ZzFufB$?>vz#&%7i zp@(h6czGe=qN6yB<5NVf&GG6YPnp0|Zy~|YXvEyaNc5xvJ(0mXnoeWDG&qsQ`?@cv zf18HDAJY&)O6!X=Yi)F`DsL#*VACl=OC7d7%k6_+qNNc3Pkm#tsDX z1}_F!I{G_*-8mQ*+iK+C^%}FB^K*8D22= z7(Gs{S3$^NS92!z&e-68GM1#w58e3uFJozuN6<|&O~R}F(9uepI@Syy>#V(LE63S^ zy`>$nYa8lznWOXdL@&^r3G1jcd?(1WG9)8mU(UloXi<1c!>R&4y_St`fc4TTU!G26 zO|?_XKXwz&QeVD1C(!3~i=o8B{A_yDN0M<8s3r{X-ilF>U+c&|e|BzNgXSr#joV(B zU)!79Oq(~KB44Z#>mrz3f5d(JGYo5bW&x$IEq3*x5#JOO9)xuno|&+W!a582Vi$Y1 z>@dzD?XXNRCg^+VQzaf)GE%ff%Q<7|cz4gJ5~8@v6dR3;&3h3iA`H^wiCMq)LB+DtB@@v@y9yGFl zBNIuc)u!nJ{FOGtw}WLp(%=u6?$| zZjmnJ0iyO0=HEx$S1}*sJ5B{qv4XyYUw?CR6#D&b<%mbBd!I@Bn+TzgQcx?Uq0wEB z`Ke8)E1}YiTusvg-xQrbRL8y!C-LRNY2J*5GAwH{*z#fX>aPtE-UsybCyrtH!pDUX z8fiJZZ88HvUobZcPFg?@l<2fAaPS+Qy{GL5Bzq{b?{?EAKLI*|GXsY>?dgj2pofQV z*Y(B;%E!LXmdAcel`3TRIOr1saQQvSPlqGD?v(@sES#Pys&awJ>`^BESmbjd^5W6D zFb<(4@-&2yU!VGczL2>Er$N@Dt)gHeJWS>s5kq7^Z!kauv6@VZM~lVm9gpbEfI|E^ zJ;h_R-lkZkz%jyWn7fDs2S9Ne6@Nj`;{KZw} zebDY?;+#++vX2f<<)Sh@$W=)7dwEphdAqF!)?MB1eCEO`iukI-g{YsUU6JZGthZH~U;uEX*6GDJ6(JNNg`q6WVF&|tWGdDx!{_MkNb!X7$ z1MFN;G*4r-q)bEgpd`dnLPGUp)|(lU<$?1m9Fat7F_k zY-c3AAwogF4FW7TS{fVErXg8C~L8ZA-2cY|uOdb$H zR;IRAE{WZY-wwnVh#yRI?`?gTpTUi&+J2yYE~4qrYEK=h`?#~r{y~BNGFGQk0gUpq ztyRG!^fBnouH=W9VVrWpnKhj2IF9gg@49Z!h^NtGJ3BPZn_CaH31RZNB*U(L#C7k6Yn^5@M6g!e3HV;yxL5rV|M!+6{m+&{6VjVJ zt1f67o#%WuG-E<2TD2RC;&B1`$jqpGV7dfaGx*u+Tz^wpVgL@TZrAM+Hau;JV7UXe z&aI(XVQP01DQZ{RZ+EW@nA}m;*@@1S!4@+yr~e%qb^|8IH$fb=VLtAMz|O+Cq`i7I z?`p?`CFLMA9Yq-}(okidC-E4Oz9wYH@g2XFcz`t;lfyN$Bdbk8H0!xYy1|jy!@bV6 z(w#+ND%tldPc-p52_p_Q+g)x?%#JdBu*_wiVdFqknYKDLblqOcNjP^aG=jOI89lV*x-K7`gYNi{c0vXah?b)T@)=zBx}VDSGyd_)tPRn(owt1Ew+w z&54`K8X4lLI}fi~H0f&GitQe-1pLHe^3`m2y4_X+YcG=`3O($%V=-;x>>oDv>S?p+ z!o3J&7me$+d7-pRm^aCxs&9bs*J%Dw&@Vyl!R{H*XH!}KrH!>Iuuiif*dN?!1>a@5ecIRxDF=ST5f0-aL(L*uO zt(G;*8q(_#F}Z!K-kO8V9}c6VwBUa|r&dJPIY>F_y2Xrk4McITXRU^a?2tuH<@7tn zt<)0EjuP_pNJB!fp?b0eb=_B&w;OH>_dgEVG3XR6aw${2Ru^3JD8px}YkJZz&gSSp zG8sPHi&+qB8(2FaKeIrf^iIuUp?*m7!?o!7WzUC0m)rCq9ywXb&jO+i-^!h^Wb%W^ zQFu?R0?jJur|Mr{!{1bkX)(HTQ6^7uh}hU=4rNuSA&J3_}mRsVfX*jM~$~=sguk4K3X<6O_@r-OjPcaj>b$x(NvSoG3 z*H@3ukHxQ9rpg!M6bU^j#G|>21rpJYOUK)jkM)=@-G>sOH z)#iExCi@=a60DkSv?#qG3oTFIkJ87Q70;+VdZ;#guf1B@lzZlz31w&&P*-+eZXmQ? z&AoPZ^y6=)%!$r8W~++GGuB70dSmou%ou^zA5FRSh|b!w@%e~`GP;cWL*RY;3Z9$K z6=ut3(+k$B=ZBsjwNB1Tnp!+)bz zHRiw!4H0QIp7TvuTFg!zq(bwR`KH@P$Wz?7m8jNs5~>WGe}m6XK;~=s$K!#M5f-Io zLKy^d0{>yl(RlO8f7n6^Ak$6n-p^)_?_vzzyQDZV-v8(XU6Qz&FKxEUs*w8#oH2Wk zr6dQ{TKxHBBF!RzqhLueq1UlmMzzsZ-YaX;p?6#nu@uic%t-j+4P-a`dMXm&%cM8m z`+2GTv2#D6Jq4FPtQios+OGuhFf-QA4rgaUci9Iu{aivbr}gG8guFdh-f}937^;;Y zG8efd7Dk1rE-tC|o`7`2Ek z%koa|V@YusmpRPiU&Jc?5YMV#OslkIcD=1=q3nw$PX_ws=vg!>rwwLk6rB@wI_J+_pTU zF8#_3cwJYHcJq*jv_RWlwsNz;{hjMECx-*suGE|d>IDl}5tgAZmRvy1*k(LcOf}z% z5WkOrM-@u1To}$P2Ov(sg)?Y9*0FX=^%y9g@iL#0nFy$6m%fd)LQv8@;)ti!OxKxS z&w7QJy_0k=rVRp}k3)SwtJ#(|4l%HDZLkoDm%oEO^p@Y3(5F^BuwcaY>1VXXMw?~) zV1=6g+}y1SlGskQgiEeY!F~B#?L~?)##6Iv#19!*qB=cuO((x`*9Y`68F|VuK10SB zc;UATFurRmig1u-jIaYD_P5%`qw0@Sz;49wnd$Fs9`kDmY_aes!yIXXYKZSumil(= zi;S)>v~Es}Je#x%xsSm*NN!8pM#jCG3@DuKJ+`na7nwF>dLvZWImIsAfc#+b-Io_j z+H^(=OMaYJS7PqRM2p;2=J3}{t=}d3G}23$~bpfI^p@bfF;AbwBI3G!=KlcH1jAm=*Mhk)u~L8 zd$wFbP-lU4K;BkU)AtFIXfN=IACil{U>L^}OcvG5Ws#R@ExWYtC=mMnx#dPA(-G86 zFYDVW+lC9L^_zvFl02Pc)e5}>wU>o9HoCyQVg)Lpt(U7r-JQ$Bb`2RKUGVz!QL^%i zKJg$5SRvF7LSq_*@{!)6aHI6O= zt_E*nIpw^P3Z@^-4Md`p+ArO+z`>R#sbF^nn2ofF->4M?5DBX zPx0z+C(pWx#CvbmxGtd<@$VACd625rpZCPOae1^Btnw*~zWlMa5H5_gOmh zqWJH+>s$K=Nv&=j^?q$|(&%O<(-SzEHItlrtmsql+&PY{>u%_QcA20x^XGz9?1Tk@1~2d&Fk9S zH+Rk@6t&i|B1BB_EqA;?y~x)aiD=W#9De-jQPPK5$tR&&DAR6^1|seGcONcObMhKw zIQGkmED|mf&h=Q2(f2EqRVb;Rk zRuiQiv+l*AC7R94v(FGyB+A@T{N2sI?Yr$VBC&es8Td>AlS~U^{!KzEQUDdeNKQ)0&5gjvJ3zy!_@& z<;SiK+HcZSl+S&0X*|Wy8fo|?vA5Gm<~$NiFmeZ# z{j9*s!X}oz?KB|sl|CiD)b^c*BB^QIn0>>JT5Lm%PcYSea~=jTdh)w|keXLWoOyv_d z1_>GOAan*GZ2EVOYD)H4sbS;gSJvy~oO}s^p{h-ad=!FWzObcz9Hs?*caPh~HL!K4 zN?R5J(W3r7`;DDQT=@Brm>n zq#J6(ASqRrbn%e-&7#%r259*5UA3t1MJ?wYjIl1N2F-;6mP%8+G0-Ksy_8DlyGc3K zElXMu=aufqWUx+^+y^oof+^!v@VRM{!Dq{A@{!OwlTg+xL8(HIY#o`h7>Fp%68RI3 zf-L(UcqjeCFIr8-X5330gx0G;*`4s}+18>x^RgvfzeauD`u+_Z>X@+!9zE`GsB*7x z$7BIsw!2DHfH~<3NGD?#dP_0+b;Cq_%1cMj2Rhv=WU6iTM`!nX>~vU0)+qW^G~Nmm zoPMJ2Iee#2dT*LS9zo(ZPn0z>Pig0+6XIF;)py!LnL=nH@giYt%%N<~=irN~%uL22ECU3@zkux#JLZnR5@R z5(AF1wRE;M6u23CDPyd0d*6b&XAOtir1w;z#<;24lHeF|B6ZCyq0|-mGz82MWeQiS zlw^IK@@CukOxX4>)uZ+7s_ZiFVkXa!Ae46=7Ki$ZoNP@i3)^JlV;0fT3HX6K(l}6V zOr~8r71naZy!2wn*nV$pU}h2ZA!WYKB43Zr?+?+sYzt;TmqEj)?*zpDiJY}TGh=Tg zs4}=mD%(gG@vbmf82R;T6~+QuHNh(Hhj;D_@Hh?6bk~1aLZ!S>NH8Xhfy#_=Y-{8M zpCUT&?`rk0=3O)tn9?BWsT(T3HH>w7CF*hX~TEln@9_dhaz5;BLh8e&6Sg`v=@H z?#&MjlI*dv=bE3{=Uy|hEH$h)qDI8l*Sz(8*|~qvDP`+{bpwx#cf)r(!rpy_AG0~B z{{nm6fQlvVDOZm*#mq`k+D<`kzlMz$4`r7JSDTXx5==)bv03QXY#Ack)}~$jwUwIk z5V`RFH=QG9E53iD*TyIv1S3Xz)+LSZ3?OvxU5Pg8muR!^!;q%u$4&{~hJ?^>@ZOKy zO+}HZbi+u)1Grd+oiLb5G9Yl7@ zPC1?n)ZQdX@>s(^tco2F$SU%LWa)=~W3Sd~+%ACB;oz;kgcC7C4~W;B8`yq~@2v4w z*hn|#Xk)l-_#L_T)w;LaK1TI*8OwS`K}9saKz0k2R~GFV@}n>Tt)!{YipT#4!n(^C zOK!SAnuR5q?qiouSnMc2NI8Xg*7RK)IjyZUZm|s1z&i{ z&8B&-GkbySx!Oe$Rr|(cVDXvo@-62G2cbVJCP&3$3cm0#KinvcHpq_-HGfNDs)c%4 z*jA;Z%AS5sJc5;J`&2-9KPPdEt-I?=M>TdKHq%G3%t_#+h5u*TIQC$COA_9%xBrfX z->XV^!M=}>*?&*iF0wELj9eFaN>dYI3db% zcOfj`@T}G79+G9v{>8MjX0PFf(V;ps2?12+QQKXDiG-(AM{gKH1i1Aa%7olE%x(8z zYwfq{%`{+9qAJgv_fEs#w(P@VeHZZ$_qJOe{>QLK4PGpR?#Vs4+l%%73PQ_#DvsCn zp_bYT=MgJNSxHQK5qp!O19_c_u$#%W$(lb z@l5A;YJIu#4@2KrqI$+OB+3OWJm*vq8r_Gtd+qcJCs?$sg;`TJ(W$LsuM%EA@D*bJ zJP60IO)KGm)4S3DlA+X3V;2AM~46|)uAcY*2pwepSynDADojSCLfq!{Mrq}e~4yz=* z*kkQXb;ceYK7VVgybIgo$_Oo`4N-9w2-ne+?pfC0$#iTN%n>-dsW~4 zh~CI0>|Q0_jWW;qZ2Mr%o3&X!@jkaen2mvhN$P?gp}|<$jvuiz?N}pM2GV_+p0iva z|BCLY=>03YbE$<5lHL6S z6Nj|;5DXyKks%*hvy5?Qq;wX0+aLCXgcE4Vt4apuG2`9Zx^pzTXPkDEy7Nwov^`$5 zWqW06W%VHZ!le1_`o`r$>U$C1mvec9_oh7BMk8!iqq$K`95+7Z4qKbn?!z1cZ`4P0 zGofP99Q~pwrW{(rk1iof4GUz@q+Gr6t10# zHt!nmR$bxN(DI0Al%FE=@vvTKeh@!fC8JHn8ff$9(AxxJQ$ZiZ!d+pKhuq>?$E|k> zyp_lj2h^C;_=@mJGD}HOJ)uX8sG|=nC9A7FMvGTujxAxj?LliEr9f0hXK)pj)dJ(y z-z_$x2Gi7q*O}g9p)!q;_O1-j^E5u`9AKt(hg8)L`J+fxxb+3>Y1lY>`OnuyFP1QL z44PlhB>HSJ}qADo)2@n!q?)@B;Kw^cuGO*4SGIsRo@Q#_E3O@4-bbmQ;^U2meV zWvLb;EQN=^?R?j$M0wcHSJ>7F!QQ{CYpcQ`f{nu`PL35Jp34TG(x%9@w7o!SH^I~} zzoc~Mmy~(~4s+qQfxM?mtsI$WED?2#y;*29@k5_OH4@0Xrym$|tJ`tbR z#m8z4S1uvqW|_JbCx$Ww&;9JGd7Zt00o;K{%NUzz<>OO!WpAOzCv^Awyb{Y3xc01e zuyqfe8R~*o0y_8o5QQX#Y*Ez~HmV}&JBO4SeYA%Ro>6to7YE>-wJU&btD=;-4Ju$G z&4#1YPs|+nRvb$#knNvg=*R%-mXVl02Fi=Y27Y5=UfDN+s>9CFVf zxQ_11a0LAFecS;RYS(`g#J3!sy)69{5%X%KqhO{V2N+;m0XWdAi0U8Df_V z)jL`En$0(Fl@8edb!i>ALUo_X0OC=s;KTcRIVcU2S{Yt3qin)A?G3s6aU#$7)GKXg*C$$?MuIn#*HTZ0*Izf$et2ZF z4r;TG`_Sk8+U8b8{~67=HnJ-uBJgn~+y+4olhB~gHf<5FDiGGq`3+B(!j-M#E@|~z z__B9Hblxm*gOzuHT%x;pJ6;U zYd;SCr7Q)2=fZwWViv5Zd>K=z__#8}hoQU^pO^_rwt!nF$+3FBFIe02ko-d4M_x8L zdmriFUl-5nJQa1$@CS^%0juM^N|C)HS-ud^-E;Y6HIHgMX+&%{7o&_9`IBc-+gS*! z-k3O+TG(7vy_2|A72|xKsinT~961t#{rk|<)y1}vU4S``c2f}!@+I-W98M$y$Yen+ zH6!!Ze}!^xSD{>t4jh!sWc!%V3CjQ7_I+>pxQir5H`#^>k#ZU|soLN@8M@1cb_pXc zF6A2hwHP3(Rdc(KEqL0YwG^ck~+%fKNGuR?L?9_*3VkNT3Cz36JU1&nCzf7ygRK=BXjgd}Sje3sdK zZoJi3N!HP|)>nw6ihu@}s8AnpdW{2afn}?Dgv;;~l=gFL!1mvA5am-9XH#FsByY`G zEd$}0PS$2ql*R~P1zz(Gzp&=voF-~P*T{$rXghkd@KB@3yB_oZeLOf8~M{miAd;2dE99E z_-_+OB)$T{*g7-litfRt^@EqZroaq++zYe6(kr1jq@C4*YcWd7_u{#+N_Qp6diPpi zm=;A8v~co;!%{=~PZI$ga@!@8w_wnt7Fj*!{ZoxO#c{B-=hA+2Z!K7q9t_Z^eDq|D zJO3pXpEv^4Qr;9;_poLjMP`|pzEtI*BO^Ide_%`% z!J&TZw+3O7SSjSmN9+4M&>BmTqYS_&N?oSIYHi)rS^BxF=RLSGb7q>Lxi$PW=R)ap zyBnUo-u!Crkhp((Ug!Ty>ar{w;Gb#R08;liYH9Glr49zSj!Zt%tZ-Yd?9_Ev@E5Cr zXg8sRs`Bt)H%Qi{o)^qwc%@#OW1zrW<7c1b+r?7lb7i=S-q)*??RM8&IAp431_w#%dq3 zds)aOhDJgad*-hwYg8*{#Tgd~_7iBpfP)>aHQ!mfJ=k|Dat8dA8&CgOOOdN1kmFGw z`t5j{?7Z5pgV&PNa&9ByjCbqRZ6L$w&=zRBL6bKx4OhnTIVLQyrEB8&mg@!g z@nAuBKRMUqa_@>|z{qLvYDiIEN+JEl{Y$-L@=^$)>XGjg4{xK?gA6oIhG5-_yNZ?n zx?r38SMV%QeN501_eP88*w=#hJiUTnCEm;Xy%+J?T>@)(Gy;KWDAD`R6G`cRJruk$ zU(XB-w&=>RD~4J^|2NZY;hX&KYv^82c;Xiv#AS0JSXAqQ<4t{iK!q}u|7n>-LJjQ^ zNAa6H8V84HpNR@GrzcC`kOb1pWXuVa_)Y``hdmdokf<`e@0_Dw97P$bCp@&6$NJdOJp*G}G)kr1MO-qfTx%jU!Cm2Hb3yQa|AWApHe z!k}y$5{`7%ipRHjMmfV$D0D+}UiRo1DF##ATNrSpH>u*ZqWD)`k_11I_Y{|poC$2* z$0tHD86^ZgK>qex!8GBJh{f^aL-Ss;7dgsUn#{h1Z0%AbCA$c~3fPRx{Xp2Sed+~b zyog0@->>tvl&G(#QBY4cC!b318_ocA+0hKB*zX)J;ZVYvFC0%zakr%b6TqF3u&mBc zCNnC`Uu?;pi6lKTY*y@c{c+4^GQdgnM%}Udv1@L->u=X&2!^`SnuoN=f+>Z(?&o36 zN(YpSqcTRTR{i7OqNA50q5FP*1XdkwR>7-ufYlw7*TWePL171(!|*~%p5qv4xo zHovnxL`j|p5e1Q7oqA93?r#Osbb#Hy60>vd;%+AzW%<2_N;Hmi;#tO(Y{)rgWiqzM zc>-)&2=x>2#2g|@U9tLiOgrED51u&+2zmf8ilCOp{|k(V(I@vM6v&=(zHw4@24le^ zPGL-@662FD(gMtQa$NVNl9JOP4!Ig39S!Cc&5m$}TW{@^XQ!%8#2hfA__eHFVT0F9 z0n&`AM7j`uBt2*)cH5QCRbp<82UWagwFZou!lVXRzvz=8F6KQ)UDj4tF__?$y! z1HzvT-B~BU#-XamChiJ{+W7)t7q)cPDE(W*Lq&Q5aVh!ecVPV|Wi8FR-N9?gp9x~7 z+j!NjRPKpj)9_d&?!UiB1!rABaUGv%`(IETHJ+2ilO)CmGBBM~t}NZV&o`A5SS!m< z<)!W9Sx{)wCN@v1FTIRmknj2XZSe(5NYOq_pxb3M@xAU)8a}Z`<&H{)?N=3^0mNci z@YDh(qXKk`!c}i14nva>NA0TAZOVW$B)}y6iPE4t^>iF0>?VDuYSD9#gNUUwb2BYx z`#Q>rGVp}|Wm;QYSm>y5XyRk3PpT&cnf;3S*n%0UHBRfz3buG_8+t~& zsR{>?^H4mFQ!Z9^5Q%V0f$nc!r2>KM?{!pOrI_%bPxz})cM6!(s@skS*JMzS&NO`x?h{4D zc5t{v1LsGZ+^?*{^(PF|7_sr$tVj2Kn!RjoPq6g|DjJP$gEMDTc;v9C^ga6J*!CTdX?LS*O-efOymrnNl0`-_43KrXqa6_E=PNM}4u}4{@MU zlRt8&r_cUZ2{F*M|K9ir_E|MqAO^=bQzD)6%K%)b0rgsSca@ZTUq8s~{gKkoospyG z{7Sa#NZ53LB3wV5`H9-w*YHgYnn77i?dm^O<>wP*aZrCLA_7Jlse z`D_-c_VMjL^^B@wh96*$uQHutvc)}alBR=uk9svJQ)%SNoB_Rf#Nj627nN*Q)WTTb zXWx`58MX(TOAp-oP1OL7Z~He@=AOtuNI0%i4F18xb7@YmrljBOV#ET16Sqcg29imJ z1P9razC83V#~vEZD*`IQ7~EMEd&b=>isDz&bdB*=1D0l326Oi``GNaHP_aE6F0oKT zRidntI;|I4fFfBKo+BIPBl8a^STt_Z$yXiI;k_tFh$bV>^V8eu{9eHXE)I)YP}Ee~ z<_oe%OxCLS73B6+s@fwL9A{Jhd1!$ro}~-+_0yxe5sG_Y3MJbkg2fp7@k1_(q?yI-dk9I&Y+CK5rrJ+b*-NIy*oc@wE zPLIv=2l0z^1#3ClrrkJ7w%lf=^<_HCsXMAh#f5*_mac4vJ@>gp5sAcywk_364lC3690>WGQJ)_HuQ4<@7km(`xR3JXSW&GG2hXH^Mm-7Hq0&To*5(XV^y z!`YLZ%D`^Es~rL2uYEiED47=eH4CV(yrLP`$FKm&kp<^*9hPoj3!~P5wct_a@yVW2 zvop7zxuC-~l(DI9gRt$eZ%W7E@fCX24b|hhVJy7Mip##ZzSh?gW24?>VAr0LBwN6z z4CYY2$BKuYJ*o=nu&Wjdb#($oKieLy(d3^N3kNz*c-cSU0u0ZQ} zhMq+e*~pjHx}Sl4vcMj%xqU9tA! z%!Bw<}mjGh&vJ>dwThG?HaZSBa3HAj9Xz+`Ow;{30A`;f?o zuZ=v`4PS(_I+t2GQSAiWrY=#STU6xZ?|pHD&g-fw&()2905?f47-O0QJ*x(f8(4Io zMnlgUAI2~bS$$4E&jR`m0KTgLe0MGUYskjG!NV7T_P}S;i-dxN@i~J@h;e4X^RmN2 zUR#5mxTd8HTUO~y2*zbLt|_*wkj*o)QGT548riFKr&7g55IoV{kMYNf73!t?&G=$c zKU9!GpPgLu3`D6=7@sle<} z$syi1i~vUi6{Qo&`-m@z)sC2w)YPfl*fBTcq20dz4%0lR3Oh6D6p#vfhW?sXvv z)IZ7NHF4GKv0Q@NiN}jP=N5UZ)070pt?v#Z@&=F5ms;-nMXDPF$)JFwc^6p>DaMW; z_D!T45%^fQR3Rv3KM(9=3sC~l4irvXt3UFvQ}1kpc>|gqynlBSJj3iy0y7aP?~{C# zph9Dp%c!Fbon&w?25uAS>~SZ=oy6jWuV3+wTJeCc$%;iVo?%(;c@F;yM!^>F)EFX?;Mev0^@Dvu18>!4J{C z1uri2Vxo3+$+;YIPlvPxqn5G0EPYz0H9MowFxB+od!K!MCMLnz9CrD`?Z{F z)RzFLhdDr^{(eZRV*>aXvQl|{I^Se$1Iz0fThlV6FygPAUN&Efv?&*7Ouyds>Vw|( z@y?5noYaMu3ru@UDl7r|e3Q;Y??8xUSo7s1V`T~dBxY&@VHRtfzF}T-6SF>U~Ws8jI4)S;jw%PTFK7vR!jBahaglM}!9az*D zyy2T?*_oTH3O$IxXMKk2AGOZ9L!$*yE;%cG;L!ow-YF>?`Tg!I0RU8biy1>H~jz$_pRD;QrId#@#I% z^%sE3|8o4J&eD0e;x20|gtJb|I)x<1{BIW3s?J%<+N?V}KI%S*!U?_ZNvcQQ3C^`b zDy~k&X6VvGkflouG>HKr3M~iW(x0CIr|jpF$pd{Ob2zi4L+$f=%0A;#py}#YM_Q3) zF`L9E2_C6#nxDQuQ%8#XAkfhVu9S2$Pln!?_Vtd0jC#mMEtSXrgFFBX-OI{%(iS+r zLn_C!Row#B&rVqlIKBsCT!Hm{v5!f|BbQ>JJ!VwEG$y~}9$Udzc7?Uw4b%6R1ILU( zNq4?4OLtAnuhZzT(!aJg^&6nL@X@py|HCi6FgWR^mA30Xljr=DK$g@r{f9@}PYy?m zc*BM^^C|JcSL_!rzBT@s#<}&A$#CQx`Y-KGr=S_~{+x%C?oq5b!@LYLy_T+{y?# z;XGK2i&GtW*d(1dFX(nJbTjFF1T<+-cGxXyXb)!l8OzRL2-Bp^U6peO#GY%MzJ4{G z^M}=MDcA=*_wP@|EYFh5`T-mS1page!3z-11oU4s$h-CA$gwC+pzl(8H`|e}roF5V+ zCFAco1UeQ&ONKRw8<=V>fvo!4%;%8JuRcFCBCG$Hg5B+1l!}}PXzk9Y*#j&<+%F5@ za#e(QBgVD&$O^u)pT0uiqB=3NVyTr%S^U%ggh9g-)=x?+lWktv-mr4M%%@V$k|+)2 zq|!=o(tr-=qW?UAWa0Jnfyj0zL(dGBUh0L)mo$O*pF*y!pd;XDqN^;OUZ^~e#?Z`Z z;KtSlyZSyQqILWk;}UZ5qm7ff7D2Z94;w;;PTwS5oJReUeNsU7i;jO}jb===Cx0|b ze(n;@(RpqdG<^O<+nWTTqo~_Umks~NH6fED|;vN)WyV( z;_<4Vxe5a}3eWQWWEiOuO5lIt^@LuZy;YS=ZCzcenH)s6Dhq>9@5cDf$4WzKzbjj`_I ztI~E_`NBi=>FU7VqzJ!=s_3N?8E?w}0?8mZ!D^+=NAp!+4tz5IDGt$a)~_EI5}B{cBj&e@1>H$(dMCCTO@F(d~&hH)2YNs!ceOYuICYf-5nv|cc zl)63UAcDSdE^1%qrx>%Z6S@bO#9f9;>$xm_AO;(x>HQeO)JEynRL{=E#UTo`mf6sg z4ohq2D(^X2P6KWM@7vI@C#WmQC~Kl_D%c`(%N%>Xf8grqDz;rQW~4Ph^cnw`hAVZV z`S5U*vox&8Zgt^Pq%G~tD`;h-MI#$aCSW6(zw`gaxQxl^8i8yF+Rq+$&lo2~i=mE8 z4s?IUO!LZIo)FB(@>X1BV+0XECZ6qdqF{CT3KcOC1OXq7w#hq+h1QT&N z@2<9HV0E9t8ZFV~65})^s3(j&=eK)-PCjJqhI`>j?dq~rY?r~XovEWlyvD+EyDds( zY;t?_0iz5fd0w@)VdSEKZf?@Noh)XR?PHvLVj0E+ZY|>^JY&OMVbX`<+WRK6KE^in zSmhbcTdos!zDP=Db-f#aoJxM z5eK03?kAa%Ic)j);!bGVWdVx2Oyycy0PX1~AtT{g#nN4osS*L zWm^RMPUEkSCyckBQnOXOY+P>l<0Nf_<_{`sFM@=o9tVijjRwA`L(A`0AJqaaAF43; z^3+tgKmpt>#C*qn_pn|fV&V7+x28*RpSyeqZavSgy>9MdN zWH(7w(Ch{t0Dv2LaTSf=!A{OY_bz`J3~Gs2MUhq=E?SoDEn`d(8IRITE~#-hYdkjY zq{PH;jxh9ru5yJm`I9KlRXWlX#Py2(OX)?Qcxac`ZH^c(J< z`70CKaY<(uuQAHq^*}@S)edrC>tjU>yk^E!nU$=bVQvGdT;A$t2TvmRD<$;CJgzww z%m^}~@^|gXUbZ~Y_v!E^q4%2n`TdqvViYuvsW^5yWV4o4@`ED!raDT?On4W$M5aY-TX4UyX= z0X1mIL_{{h%&ziOCdTZ)=%V{hRBf5(NrEPLz1o41xDo@o`&;L`HdY03) zmL~4@Gg7R+^B<6@|8b$u?%yaTDnHvH4|MVInCHOe87R};8fl?{mf)?&gYVS~4gi|Y zCJWJQaoP!JE-ivbSc{5&I@uJL>6^=`UMr!6@!~lCi#B+cj3q2D-c(Ifg&u-PS|s=k zfKCGSq;3D~wo@duaAbgD%w@uH>)}3nV=d8mX?d4((r&07o0*|Jne~ql(b9)H6(n$! z+Dc14T_LJr*==f)q8K!KS(sZdQaD;sMe)|5PH4-;sW^#9Qj_fxxT)Z+s>;(%h*`jm zd-Pr>O?n9Ph8AS}&2NW+&pJo~nU%hm%00U(`k2Fg7mpt9tpXiVUrT`=POC(g*Y>JT zRecPP6F3kDOc?J01LjAS#~K#Q(HOg4;`6ooa)ZlD;XH~2 z$M=1pr!1C+zF+5Z2)J5kWA3V8+hf!e^DTN}V3U{|TK}yqhBylD&A}^urg%WfFO9-b zicOZk^C=gA!zC4d9zIxkhNA2rZ?5d)W5vds1>KGm=a?c{Dq10zCHAm&|Hf^W>^aIJ zd$3KF!k0fCGb*WB44wUon2P>T}vYc;(E=I;~A!C`N!)&bcVww2p$5dRaX9JP##v zl#V6rVPeKD5gUPy@z9dPNZf}22jF5`jWjiCB4pvrX5g~{lE8t;RO-Mb48D%C&#^R_ zPLl2_-%bvsC$yrpy`k^RkAe;ELg-nI2)AAoLo98L=N&R{=>iEUZ=^S?7@%Ndco&m@ zES=UFZ5R3+G5{aY0H?RLvqDE=)tOVx;a5*P2rdKt0y+z(MR(8dFCY2mi84YYiEmMl zit{}xDSUP+Eo~N`^PXU0>AY+D+Ok&$)a3Ro9L1LrWqDo^`Ar*g3TI6YvnBE|gD8 zFoMEUcXX>;nqBfOOBY-mlQ&nebzxjuZ&HC`+Ruf93G8kYIkBK<#KPGVdwZ{3X9+@_ z6DV=puvXOnwe6Kp5tN0;rblGk76opWU8b z1I>xPe6&4)+`}n(f=i7KbF%wP3lS#(I;c`%I1$5bX6oRLbbKQK-pAW>_4qx46l!X~+;+x0f51B-Yd&2z+dV!49N%NMn80z%Pw;-TrZL#9MHAiY%Iou4$ znMypP`@(TJB_&b(0q7^H^~tCc^%t$ZGB9&{FzbPn{Lj=>(-!(`lX?5PGqE+aF|O8; zb?C2)-vokSpm{vDmm<@_iBWwPUmWi`nwS`l$nbnCaHUGtZRN_m z#QwahFzmcb@|@)|Oj-zRlD|%& z8;FU%QwboaM?cI5=gkZTK077}WZ|B87tcSE1RH$e^^1p`S3JDy2rups(g-R(Xp>Zi zwZIPRm*N(LqZ(bDa(Rnyd0%KwdfC1M;xjv^1A@FSK$AbT38O^)*(Fz1LxDZJVNDHa zZgjMmC#iWmy+*G zim8I~+ao9X?2(x;u@d*X;$$4M^t23Q&5vAc=C|dcWd=$Qmp#s#^nYqVFSF64=cyU+8v4Au-33@RP`z=IPotB6o{FO zaJ()Yq%X*NG&^p>naFxKFPIP)4K2iBUu400t27UMbOY%!8n9OTuzRldLV)$0Tv4!D z$D4od+a>0HC%0XrRx)_pBEMR|?i5x$uG*aNW;@Ys|!^ntP z>+@Vn?jwsO+pId42(E1tuR<^TTP5(eNymwC_*7R0PxS&%TRsi`kI5B?3YhP473rfl zOD|<;)Hy%k0ZloJsNOO>Id?5iCzIN+TRdD!f;v_QU#WXD8}jil${`h00L$}$u^uZ8 z3dF3ZDFRhlsxdm!ek_sis{=D8#(weq65zS0=$}9e*;1*_VIsc5E6jI0Nkgf)|CYw? zVvI;z@yeLX85`<(&ivl76>XNnSb*wLaRQBqrNa= zfx3L(@v#h{Agfrl6Ln3}=;;eR(nQx|9yg^v?xt`>{fsCQUx^zd>mohn)*V>oZUxLK zySynJzd@@5byy1QxJT`^(X~uOgG;08$OY5q3Ob5=&(1vMJfH9CAQInOF;*4XE~nY@ zbe@Z}zbk+{47t>YZ|(Yq2``NVmo#nnsgJ@AzZF$))jD(r0-qj56JEM+Cyn+n@;mlf zQsqXi(->V)aY{IJ9Afasts$;r7pKun$BU$GVD+$Xy4&I&^QDfe!l!x79cdneF|G|< z;=zvAD!#gWxVCx(oyU^BZ`fn6$poLe)0xbQnl}%>!Uv{m52&q$#*?L!fhJuo6|ddY z2%kpt&Tt6`|6Yli1V~Jp1(eSm$#`g7p4wyl8R#4YUN3oG7);7KxYmmNo-4*KqoOkm zn{uo%roSG|W6l9zEn32;u(<|YfACLRC!f)u`!$RdzXw%4FVNN+h9$+;OI}~_&1eBN zcZ8(rnR;^lfgyK!VWPphV8tZupVyyI(x++X;gGKuw|-A%D}DIr86Amt_W!=3N^=;t z9`k=z%|g|9TfhH5cPf7IbO@H>!{QqD?~S#8Dq%pmnAUp!OAJ3;UoQ&-R%LOG`}dkQ z%(;{YNU&}a^6$M+k$|2K!-|=s>s7BW`eu~A*G2o@1$0}5!xeG;*?)YzaSn~kTbE}~ zB0MpcPAS@3biWt45LS-TU~_V=75P0)&W#;-V$j~?f4vt+jUHt>9RHpxwxjC>-^4n& z{`(ee1-G`V^(46dr4tYbYSib||9a1aa`}dLaJ_f^{Z2ABO56UOR_z&ZLh04(q(2&} z=j~MM`R{wxH0Zj>M>EN5{`~Vk5f9G&KvGuvn_q^KT?ph`m;I+Dk0l+5uh%F&*bE>R zKXqz3=yo{%z0UPF|Nr0rZ|#5sZ)NaYfy41{G`>y7bHBkOC(nO_hmv^#bG;$|%tb(N zT>l#+3;&Iy=XAK=x&f1pDki?is_XR|L*2Xme{X&EGKF(3)6|X z-u18KiYoBpQ`fo!>fgWj&yZH1-kZ&WErkGQOktt~>Jw#mEYwkgtHsXR1LO=ZuAIG= zVsq^RiE~V?r)`aX{ol92?8t8I0Lbae$QR2CT((}nh+BYj?D`wEq!x<_$3xJO&?0?U zD55*>6s(mt<=GW6ff%o_wkEXpC}~>!U^a8mT{MDTNCsMS4Jnn*7g8_=?(>bDaP>Vy zdL#=AivMzRqF=;(9Sux0q~GGJ5<5_J_0~^#SA9)}*Z#rHAUW3e{Wk)RgG}{Cg{__P zYf}+6!bV!&tjsGlXrap#h|!_5_`@a`732nJRP3T@HQ$nz1I0i4=W6!C>wbczWoC}E zb5pS-OY7P>mk#~?p~*T-K{Yq8bp*7~%c3(DyP@Ba;@_y_@O$e-8A)a5`b)w>q21xO zcTS1Ut4_2fn(QDNjl|f^vu_`Vs;nm63tRbqSjN$qVs*4N;nv;V<%1XdWI;7_@oUxo zR+F#wY$rAm$@l$_WuYr&%^Wuf(%hug`)reIob;=8YIlO?Jg?FJlRHB2G<^q2ava)s zP$_1kpr+hUgOvBWoBwLNNB!BiPXz(h$~IrmA8e0}>L|gj*AmN{AxrI)cH&$T`b{2I zZF*BVPPG{`fz=JW!OCq~#~1u%7z)oX93{Uc$$%^{6Om+)VBc^dll$i;XG&aM^UbH) za!PHyYc3{3=LR0=<09+vmJ(Ym`mGe)`t-ECY||)h(@C6bCUv^G?gKU3`MJnMB7NE8 ztxW^fncqMi2y7iZk>(Wrn&1Mm+^p89I<{KKDLFaqpe$3iwMotJ^DQoyTiu88Jau)l zrt@TrU;mKcs+-GuSWRn_(wtNo;EnVhR{m|jeo6qFP&zw+qO(rmvT(Fc95qcdPYcS& z>@_Mcx&QJoV}rG2dCzPLxG;4@AM4StBf@FMTlvCN$@u# znP;J?`R5Z+o5PGs_sZ*^uBO)DjK#(uO`@cL0s85=MZ@#@8{Ej722 z%H@TGoPO%+>mok4j5qXI{ex>>N)LRaYMCsh4Z_A zDtw31Uu55<$~y}fb^^SgS0V%pxggo{u= zFYqMzd$gDY%ksC!WGo}A(XQxlKh;gLM|B)fCN2Hi;2cTXThqc#rORY8#3PL+q4FF2 z*LP&}`dO$=eD-wI$a&UfpU&K}XlVckedV;5dl7oq~2O(~R>B%|6W+D&!A z*3jBevJq;cJk{tSKhZ4yEF~gTok8_&O!N?v-o|pNLLMi!1bAiAmkCx2$k={HqwaBz z?qH4XW$x;6vHbZph~FxGZXHB-Piewta_Ys_rc(G=($jbQdUd(gjY4vjnh7r`6aFaa z`O*qX$W9*^*rf&T2*cncEw3 z$H&Ly5nO&QmMDn#ft6{4qgC_H3d23mR!y4}Xt2^B7<(lv1VG?B2K=$(mvs7)ul_FQ zPfF$zB9i_?(4qA1_`AUikM*%-`)lP+N#hgk9wvQ+5tA7+)PL4T_V_-g`{Oi2-C@e^ z)Tzw$mr>7mE&ig9R@K@V?NtSvs@1c;5iXfe;ISd){VoF?N7%z}4WJCwFFlsZr#W-_y)jq#o*%^rl*FyxJ8AX)K-|!R#_St`LAm5+|5S z1Uyn!j>{{e6n;VEKkWC2aLAI~YKY&gPER+(S*uh}?hQ`GtuelgPPN_H@9$l);wd97 zBb}eD>1m8N_h2NIvYm};m1{R^@f%TLm%&vGu#{Bol;Oo^- z0RLQX_Iv9`&K`YrKlk=iUF(q_Afo-BRN$IKu<7`i9s>SW!KP)ZOWAAkRkTpgL(3Sb zX9F@1vDkP=%ubAJ?n!?f>AHbzAhhS9ptjZOV%Pb-@4h21#A-iW8zNo;EFPS8(sp-^ zI3`<#-(Q@G!B}U+idUU{sN^r~is*Pkg>_|o52Bv* zp=-c9-+FYx3||}ZoB5+kaFL-Y6ghIp|9f1uDDeE-VXC2q!v#3wE!BKWoLS+Adhnr@ ze@!(^Q)y#h?+6Lsw@%E%z9r{biele`881xPHHQS!+KFuM zqO|*q7dCg5`Voz2-FqFM7mjF2zMQ(Pobo%T6_Ag&+;35c(_Mmf<&&47 zx9{r6*q)&H-`y1Y;!|lcwfn81;1e>@Oslr;V;=l&ZL@;X1U={sGS7-!-=2uO4vu%C zdlAm3tkUmlKdt1Sa&fEa%@rhuw0``fLfKxcCkXCk-gpx&HWEosGdH&n)6k}*y{%GV z39LPtAysUxb9|wY?&$O+Mv;yChIKt;9)?`j$$AdV;{{$nnG>M(OvIg6*t>K!VC3 zkGLo7fXC?VJqWk^w8&ExN+9EHz$9uRgr z#%_nQept+Fy{na{+p93nTO1Bizx~~qN-ooe4k=TM8veCh`=Fau$n8Asi_|AY<(bsg z_KBdAEfc?0HN}0dzG;OouS(+%Zwb@Ie^3~yb#b2gN{)R)_N4vHYr+77Q`$Vek$?9Z zYi@Z!ldV$(ct{1K#cL16*87(pOELHNRFWNj`Od53^y0QlwO(!odF&0xa8l=7o^7SVEcVyPC)YYF}quY3mG zO6MOF^brCJWsy361QvR8*E&0}24XF?_%wA5OB9gHMs&^aG}3+8S$tn_ zo)7MFfnaROR&mVhy2JPA-Dwt+8FYa5L^k_xZ$1SeiZk%9VPo9`i~l*FhQCbVsaCbd zca@$D!bq3+eqdBl5l^4=gz^ya7yZKjtG)AnYhvs6xK|Hip(qF{93+Ye0)Yeq(!2DC zQUU}K=>$kns&qvuf`}kZdNXt*B?v)4I-(IFKg)C$xO*hN`D)2BF+zuNtG7>1vtJ_`X zF=3X$jfXD7#-2km6a+&#Y%Uf;_yLh>V=iH`afo zp(nYVEp2&H>9^<&L3q8CNv>{ap773gWR9q`$25nz`EzR~OfIW>($|EdZcOM8@qzmI zV%)Lvr!8+DOw7l@iWpc#jUz;g9^M%%)URyiP*A5^7zl`QdKtXSa5l&7Wiu((Ei`&` zHflNbkRSi-h5m3BN^hCpyP#*WeWJaF@6S7&qM{yM( z!h)k`QJ_Drz~A$`6qb@lvx!l?-vM)}LBlo)7gswSuTnF>qIW8gqWZABs(JxK{OTup zb}euI<|C@kDN(vjym85v=DN@zb?bW}`YdQt70n=zqC=aCfI&~1JRCwCJ%6z3V$Utz zD=1AFY|D7MakciD!Oen!D+ryYnbWRqnnXaAl8Psb?@-R(+`%F-)TY!g9kEZ zF$PQ&DX85yjfDQY_wN<_%;dV`VjzaQ`Z>C<<&hvMLza*Lv!`35C@5K9`5YaX(LWx| z`tZwh16Xd%ixXDHfKQ>P@RR4#%nNfLHm5S_MRgeWw)?S>x~?(rDxX|lWAo)&vA@_v zzZo3A2PEYi%AaIxeh?_9mBu0hRpY``9n!5~KsCcGyliv56(Tc&$X#!GNET5pYyQ`j z@WsU%^I1``pDmK3BhLJuV^IRo456T7W2a0dtqa%;;h~6b%ijD1%&RvMf8s;@E|aL< zZnur)xRcQF(?wacXAo_HbrJa{LAvRu=Gl-!xQ}$FUS9*i2otwP@m2F)m2WF6Zs2C2 zUc7gDw7*?P%q#|7uX(+Z4M4BX&yl5_1x%D-o^xrI(_z+5UYB5=H;!`{Gnw5C_s>l^ z#FHCc(}=I{^AHxKYl~a~^4?&x6UN4lt!m6yo!3nvGN$^sjEF^@oHk!I%n|u!DKQzX z!^VSuW^EV7#oeiAS=w~3z&p0Me9Y9M|5e!As&vB@jvFPRSXv$cZ6#~EZs0|rBht}3 zR|9&=XOX9K_ZI9g=>%M?L3FQVx1Ni={ua@Mz>?crXJD7GwW5sp(cu+74dCZj>L~K%{Qc|oJyUX`b%;J$4 z=%BL|$tjIR@BU$3P44e&RPU3}a`Z}r(knuEC=2 zM8ZKvqi2PFXl8k7@AieXe)^d?T{>S}}w(Z`a=jJD(p@^SuA&p1XJL^MUA8>Nzp@+6(qZhyaQ2ekmQZ z-CCd3M|QQV52Kixmah-BG3@bLqzKWGLM=(%+Jdi-+pD6_JI+v z-MihzJ^RR4z;+#F(v)ZJMW0E_^Z`Ux1!7XxMY+zorlHAG$F-2!hFsbYW}mpR7bk`N z;EmZbniiOy0%k*^I7dYctZxfFPLH{25k!1*YUsvqw7P&tELS7FBQ*R)7BBJ4P}eE| zLliw|?pTNZ$kvJ|+%A$bCh;a6j3ylaku}igdcHF7G5VA2H14+q4GIbs>3SmS)CD$a zlK~UlGmY4_kH{<4%u=61e>7{w7EW(+(j;UhSfe~T^R1~4P;|W^beVB8@VAB~166)`XZS>vCZ}{2)ObRl*U*u(d9Rs$ zrp0lqkgjKEU=TRV9Ah|$;97K*v?d^Whww2twh3tJ}%7~&CMYUMJ=j+gREPeq(eA#PIL>D^)%*&CPIyI-# zE*R*jWuN8i4L5Sn8NkP=KNc~4-cc8&*_-Xh-N;6H!tTr1eXn$+-RfrH(a&eJDa!j} z^!|*;&pdWAiwuF#)m~p<(`-n)*zw_q(2?lJty>mR(gK%PEFW_0dmpW`gIfN4aZ{st zHDx2wKV>Y;x^I`Ot(9Nc{Zta10iKeZuM~5q)qM$J*ZX!+=&Rj;*q-KH@h9sz4R)z>CFu4m_!dLkbAn2r@yxj$P4x5u3inpo}pI{ z#%v{TyP&BZv7OGr%?dcVe$}AvK(-)Zi=hWd-;dM^Z^s29N+gka}@PzWqW_F z0@RAVxG$s43zr!BqKHr+J#b<4V~?6PfsLzc2bM@oGZoZ$u`9Z`*4B&JQ!F;nBhV*) z7!Rp5I%+{aY#{V|dfcy5!iI(_Z3P?c_b(4KJhbptzcQ2XM59pR0$ZJ~L8H^KTef=0 zdX~Y^7TUDhGM7)-|9RAVtD6-+s;)S@zM|f{|5#~#S*cHEJ~%Se0=`(unC|D$5!`$R z@vP6-XlQ%(i)ve>__XO2S^8V5aX65~&T-{ce@~kY4Ukp|sYK(vjQ@L|`vJ zKJlGX;$r{SQSEh zZijVG^79u^WI9_fQkMME$L81M^OmN)1EZBon#jRE+B8nAtD(<7*mu6}6w9{wgJ8 zoIxt-L#t!~_mo504?li^<=ydH%ku8DD8D^3SnZu&8Q^k)Qu0x*CXkJ%o(jAyQ+zqM z{?2WVI920s{Y$yjA3FjsC3>Fn>NH;&V?itU4gbV`3dJs=BcwP!P=nWN)Dx6)6lGm> z5%xFM`tj$>r+FAM#$QA1b;xft&*>itlKmN;6yqlf9i}}^-R*e7{^y8-4A+B-sjbg^ z(FBE!OwnpFrcKn4e6*ERGSAYuy>ZRpSm!TG8{cx5xwz@a*bKSMv2P7c6MQX;#G99g zv6!)b(zm;Y2U)rO4R%FiUcJB~WRNKlUz{6+V<-irmwHiVzHW6ASF))y`*jILQEBBX zR*7RvpEirQf-?ixhxB*tn(vh=5PY(kvjrr@cY&`cwTjxxQ89mmMjU{V7nJ2v?|>jwGZu88WHk*|mkz5@Qz+I5`8+sNcJE~1|xHiZqK$`ccS zRzf{)6_J;l_gi8kWxttXrZ(5AzX-?jYJXIs3qB<I4Pqi*`omtYDlHz|$4W2$HM|@U2$t_{!p3Xcn{!^7(PuGWabR)D=w_A}e zmq=h}Fid09;BSNKRduE)u)pZH*?8rS3JKCqA}|~cdUmVGwdZ=xg@6>cNoRP5m?N)3 zShtk@x|myH!CZi~)@-Tf>ikHK&9dk1lztn#EXwse28|Mq6}|D6{8MLuHk9qcfllDR zAFIyKBXD8NadJQ#H9Ut$HYSFmvc_V+I;+6ST^oz@3Z2G>kQl=<|B|yCnb^En=W=ug zlDyYC%E}reN^Ch%f*8EdGi$q9LX;5jd zZA#2LL+nZqYeY%^*X)+8(?cPF$aDajA**)mii@P+k7Q&A&?)Ta%)QnHWs7a2eB9R- z#KRQLi$6(95WaJk2WQTBkuPCH9>JQD&aJ!k6o7-d@**$0M z?H#QuJSQvb-7=zDpVv<9OOf=bk>7Ob+1_AfC-hbLLrr2tEK=SmnjGd+|9;z6 zW%?XzIBsFRVr<(#TB`y@nKpUVS>QUTO>3sYz_6zvQ3?-NE3z4o#3(Z^h zhji+aJa?gdo!nAiQjcmx`#G*Uq}1P3nI27%@!2BhXhxAX@IRm8JDuNpUz2pONk3xI z{~|K7v%ficN9Odv@+WsG^m@mbz@yVLpNh{meWmQ;73#4 zDbs^fA#s$kw?5Zb5@Ju#;wrALe{nu7DdS-g_B=f(@E7mrVYWD0|qUby_N*)X8l+~uQN!dua|gO(Ea zgn(hnMOMbcEG*Y@JbOW@)$eciU~IpaRhlPz?KFC2G5E%1YXAn_6?sCiqvS4l)*@!N zD2+=&<`G;pzsi&Wzjz-Ircs^vlt7a^zP;Sf3)p}inTv0!;!a}=A+rc2@)u3>Fb7`4 zLLu5ALh1g{92~j&K*`ed+i#=K*a^n9MkLw2U8%sX1l-f7F~lIm#Cwh=H+xjtwQilg zeaZgNBGGrqw}qjiI&Fb zZiRNW7+qLL+mD>o?&yvqD`S*wE}2M!bG7(g-#TQ_NX;- z(q@B05tU)Z*+kT5Z8AuYoxl}XKLc+-m4t6^g22Ls^GHJO`Oe(+uWOkM$4|F@g=pk?w^P|OYbabgSB#` zfaSZY(_^kqq`sY1&q-0*aF{s>v@v30ie3_Kly?tnaKv6G4tiza?g=e*&D?C zVPl#-MhDjiy&ID2dAfvrs!;BY_nkW|fd9qbr~Ld0Awbh#ey0li^ntTQ=#?AS=FodI z+Z#>P65?yl=;Y!0su~z8$h;-9w|gen#00%#fCKk^jO`7QUK%Y;OgEFu)F+%7A$iR1 zG;MG2NG=Q)wdrrKb+{$cXO3vK!3`t;5&#K+1V92H0gwPl03-ks011Eu{<-*N zKzo$j2ic`+Tr~zakN`*kBmfcs34jDZ0w4j907w8N01^NRfCT>S1guzDXz^A1?rvFf ztNh!Y1^ojFfCNASAOVm7NB|@N5&#K+1V92H0gwPlz&J!fnw~6oF$nty$O9w*5&#K+ z1V92H0gwPl03-ks011EuKms6ve?0+?)p2E7T)@8eUE*=0f4#e)gCGHr07w8N01^NR zfCNASAOVm7NB|@N5&#MGg9!t`lK>!_CeLOZ((%?Q{eVgTNrP4-g1M1}=R`{I4Cq%jM-EVn#l<>>y%StsLy!Y4yZ( zF}CgybBH)l#QhsB>?}M!c{j{TTdH1H;AB(6~@gK0vCl#iNYX) zjvgMj+-1eY_DUVxtZq4CY}`d%-5f4nq{{-fdSF~}>Q)|h5J7cWahNy)4ueU<;RqN4 zCJYl7fWa>Qy-}J?G>=oo1Og;7z8eH1oqbkk&={@ zkc8Mn=w&kEGE$cydwU_c-(@hExHKR|+x}Jt|3AuLaLIqxlLE;9*Rhgt!2JIzyCf;` z&oYUB)|386J-}wff7g?dfc@iG83{N-=09zCxLILb?A-PU8e)9yXk2KWZs6()bTEyt i!QOXmoITKwf3bA;uyXU*BPk;R^wJT2{>%CrNB$QagzYB) literal 0 HcmV?d00001 diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py new file mode 100644 index 00000000..96bc0871 --- /dev/null +++ b/tests/test_e2e_ocr_conversion.py @@ -0,0 +1,98 @@ +from pathlib import Path +from typing import List + +from docling.backend.docling_parse_backend import DoclingParseDocumentBackend +from docling.datamodel.document import ConversionResult +from docling.datamodel.pipeline_options import ( + EasyOcrOptions, + OcrOptions, + PipelineOptions, + TesseractCliOcrOptions, + TesseractOcrOptions, +) +from docling.document_converter import DocumentConverter + +from .verify_utils import verify_conversion_result + +GENERATE = False + + +# Debug +def save_output(pdf_path: Path, doc_result: ConversionResult, engine: str): + r""" """ + import json + import os + + parent = pdf_path.parent + eng = "" if engine is None else f".{engine}" + + dict_fn = os.path.join(parent, f"{pdf_path.stem}{eng}.json") + with open(dict_fn, "w") as fd: + json.dump(doc_result.render_as_dict(), fd) + + pages_fn = os.path.join(parent, f"{pdf_path.stem}{eng}.pages.json") + pages = [p.model_dump() for p in doc_result.pages] + with open(pages_fn, "w") as fd: + json.dump(pages, fd) + + doctags_fn = os.path.join(parent, f"{pdf_path.stem}{eng}.doctags.txt") + with open(doctags_fn, "w") as fd: + fd.write(doc_result.render_as_doctags()) + + md_fn = os.path.join(parent, f"{pdf_path.stem}{eng}.md") + with open(md_fn, "w") as fd: + fd.write(doc_result.render_as_markdown()) + + +def get_pdf_paths(): + # Define the directory you want to search + directory = Path("./tests/data_scanned") + + # List all PDF files in the directory and its subdirectories + pdf_files = sorted(directory.rglob("*.pdf")) + return pdf_files + + +def get_converter(ocr_options: OcrOptions): + pipeline_options = PipelineOptions() + pipeline_options.do_ocr = True + pipeline_options.do_table_structure = True + pipeline_options.table_structure_options.do_cell_matching = True + pipeline_options.ocr_options = ocr_options + + converter = DocumentConverter( + pipeline_options=pipeline_options, + pdf_backend=DoclingParseDocumentBackend, + ) + + return converter + + +def test_e2e_conversions(): + + pdf_paths = get_pdf_paths() + + engines: List[OcrOptions] = [ + EasyOcrOptions(), + TesseractOcrOptions(), + TesseractCliOcrOptions(), + ] + + for ocr_options in engines: + print(f"Converting with ocr_engine: {ocr_options.kind}") + converter = get_converter(ocr_options=ocr_options) + for pdf_path in pdf_paths: + print(f"converting {pdf_path}") + + doc_result: ConversionResult = converter.convert_single(pdf_path) + + # Save conversions + # save_output(pdf_path, doc_result, None) + + # Debug + verify_conversion_result( + input_path=pdf_path, + doc_result=doc_result, + generate=GENERATE, + skip_cells=True, + ) diff --git a/tests/verify_utils.py b/tests/verify_utils.py index a0b0f0e6..082b7c78 100644 --- a/tests/verify_utils.py +++ b/tests/verify_utils.py @@ -130,7 +130,11 @@ def verify_dt(doc_pred_dt, doc_true_dt): def verify_conversion_result( - input_path: Path, doc_result: ConversionResult, generate=False + input_path: Path, + doc_result: ConversionResult, + generate: bool = False, + ocr_engine: str = None, + skip_cells: bool = False, ): PageList = TypeAdapter(List[Page]) @@ -143,10 +147,11 @@ def verify_conversion_result( doc_pred_md = doc_result.render_as_markdown() doc_pred_dt = doc_result.render_as_doctags() - pages_path = input_path.with_suffix(".pages.json") - json_path = input_path.with_suffix(".json") - md_path = input_path.with_suffix(".md") - dt_path = input_path.with_suffix(".doctags.txt") + engine_suffix = "" if ocr_engine is None else f".{ocr_engine}" + pages_path = input_path.with_suffix(f"{engine_suffix}.pages.json") + json_path = input_path.with_suffix(f"{engine_suffix}.json") + md_path = input_path.with_suffix(f"{engine_suffix}.md") + dt_path = input_path.with_suffix(f"{engine_suffix}.doctags.txt") if generate: # only used when re-generating truth with open(pages_path, "w") as fw: @@ -173,9 +178,10 @@ def verify_conversion_result( with open(dt_path, "r") as fr: doc_true_dt = fr.read() - assert verify_cells( - doc_pred_pages, doc_true_pages - ), f"Mismatch in PDF cell prediction for {input_path}" + if not skip_cells: + assert verify_cells( + doc_pred_pages, doc_true_pages + ), f"Mismatch in PDF cell prediction for {input_path}" # assert verify_output( # doc_pred, doc_true From 0ffc1708d2532158d82ae1d4cd7c30881297a0a5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 8 Oct 2024 17:42:29 +0000 Subject: [PATCH 3/6] chore: bump version to 1.19.0 [skip ci] --- CHANGELOG.md | 6 ++++++ pyproject.toml | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94e773c2..1a8bc4fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [v1.19.0](https://github.com/DS4SD/docling/releases/tag/v1.19.0) - 2024-10-08 + +### Feature + +* Add options for choosing OCR engines ([#118](https://github.com/DS4SD/docling/issues/118)) ([`f96ea86`](https://github.com/DS4SD/docling/commit/f96ea86a00fd1aafaa57025e46b5288b43958725)) + ## [v1.18.0](https://github.com/DS4SD/docling/releases/tag/v1.18.0) - 2024-10-03 ### Feature diff --git a/pyproject.toml b/pyproject.toml index 41d21cfa..e290c1a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docling" -version = "1.18.0" # DO NOT EDIT, updated automatically +version = "1.19.0" # DO NOT EDIT, updated automatically description = "Docling PDF conversion package" authors = ["Christoph Auer ", "Michele Dolfi ", "Maxim Lysak ", "Nikos Livathinos ", "Ahmed Nassar ", "Peter Staar "] license = "MIT" From 6924999f1f8351c5d1479aa85eb0de8e15e1c8ac Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:50:39 +0200 Subject: [PATCH 4/6] chore: explicitly manage pandas dependency (#134) Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- poetry.lock | 7 +------ pyproject.toml | 3 ++- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 27fac6b9..06720be9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -6549,11 +6549,6 @@ files = [ {file = "triton-3.0.0-1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:34e509deb77f1c067d8640725ef00c5cbfcb2052a1a3cb6a6d343841f92624eb"}, {file = "triton-3.0.0-1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bcbf3b1c48af6a28011a5c40a5b3b9b5330530c3827716b5fbf6d7adcc1e53e9"}, {file = "triton-3.0.0-1-cp39-cp39-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6e5727202f7078c56f91ff13ad0c1abab14a0e7f2c87e91b12b6f64f3e8ae609"}, - {file = "triton-3.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39b052da883351fdf6be3d93cedae6db3b8e3988d3b09ed221bccecfa9612230"}, - {file = "triton-3.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cd34f19a8582af96e6291d4afce25dac08cb2a5d218c599163761e8e0827208e"}, - {file = "triton-3.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d5e10de8c011adeb7c878c6ce0dd6073b14367749e34467f1cff2bde1b78253"}, - {file = "triton-3.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8903767951bf86ec960b4fe4e21bc970055afc65e9d57e916d79ae3c93665e3"}, - {file = "triton-3.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41004fb1ae9a53fcb3e970745feb87f0e3c94c6ce1ba86e95fa3b8537894bef7"}, ] [package.dependencies] @@ -7167,4 +7162,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "a9bfb36209f3a9140b6923c51bae8c1e23af5be34e52d9622119a5683f125b2c" +content-hash = "167f6f29b025cdc166dc08b302aabada069786ecc6a68a187702b11a69da3d3e" diff --git a/pyproject.toml b/pyproject.toml index e290c1a4..c4f56bf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,6 +53,7 @@ rtree = "^1.3.0" scipy = "^1.14.1" pyarrow = "^16.1.0" typer = "^0.12.5" +pandas = "^2.1.4" [tool.poetry.group.dev.dependencies] black = {extras = ["jupyter"], version = "^24.4.2"} @@ -67,7 +68,7 @@ pytest-xdist = "^3.3.1" types-requests = "^2.31.0.2" flake8-pyproject = "^1.2.3" pylint = "^2.17.5" -pandas-stubs = "^2.2.2.240909" +pandas-stubs = "^2.1.4.231227" ipykernel = "^6.29.5" ipywidgets = "^8.1.5" nbqa = "^1.9.0" From 5f1bd9e9c8a19c667d1d587a557c3c36df494762 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 9 Oct 2024 22:17:56 +0200 Subject: [PATCH 5/6] docs: simplify LlamaIndex example using Docling extension (#135) Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- README.md | 17 +- examples/rag_llamaindex.ipynb | 635 ++++++++++++++++------------------ poetry.lock | 126 ++++++- pyproject.toml | 5 +- 4 files changed, 428 insertions(+), 355 deletions(-) diff --git a/README.md b/README.md index 882f3a8a..df5c9a76 100644 --- a/README.md +++ b/README.md @@ -289,15 +289,14 @@ from docling_core.transforms.chunker import HierarchicalChunker doc = DocumentConverter().convert_single("https://arxiv.org/pdf/2206.01062").output chunks = list(HierarchicalChunker().chunk(doc)) -# > [ -# > ChunkWithMetadata( -# > path='$.main-text[0]', -# > text='DocLayNet: A Large Human-Annotated Dataset [...]', -# > page=1, -# > bbox=[107.30, 672.38, 505.19, 709.08] -# > ), -# > [...] -# > ] +print(chunks[0]) +# ChunkWithMetadata( +# path='#/main-text/1', +# text='DocLayNet: A Large Human-Annotated Dataset [...]', +# page=1, +# bbox=[107.30, 672.38, 505.19, 709.08], +# [...] +# ) ``` diff --git a/examples/rag_llamaindex.ipynb b/examples/rag_llamaindex.ipynb index 6dd9e0f4..f5c0e91a 100644 --- a/examples/rag_llamaindex.ipynb +++ b/examples/rag_llamaindex.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\"Open" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -7,6 +14,38 @@ "# RAG with Docling and 🦙 LlamaIndex" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Overview" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LlamaIndex extensions `DoclingReader` and `DoclingNodeParser` presented in this notebook seamlessly integrate Docling into LlamaIndex, enabling you to:\n", + "- use PDF documents in your LLM applications with ease and speed, and\n", + "- leverage Docling's rich format for advanced, document-native grounding." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- 👉 For best conversion speed, use GPU acceleration whenever available; e.g. if running on Colab, use GPU-enabled runtime.\n", + "- Notebook uses HuggingFace's Inference API; for increased LLM quota, token can be provided via env var `HF_TOKEN`.\n", + "- Requirements can be installed as shown below (`--no-warn-conflicts` meant for Colab's pre-populated Python env; feel free to remove for stricter usage):" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -21,35 +60,49 @@ } ], "source": [ - "# requirements for this example:\n", - "%pip install -qq docling docling-core python-dotenv llama-index-embeddings-huggingface llama-index-llms-huggingface-api llama-index-vector-stores-milvus" + "%pip install -q --progress-bar off --no-warn-conflicts llama-index-core llama-index-readers-docling llama-index-node-parser-docling llama-index-embeddings-huggingface llama-index-llms-huggingface-api llama-index-readers-file python-dotenv" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import os\n", - "from tempfile import TemporaryDirectory\n", + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", + "from warnings import filterwarnings\n", "\n", "from dotenv import load_dotenv\n", - "from pydantic import TypeAdapter\n", - "from rich.pretty import pprint\n", "\n", - "load_dotenv()" + "\n", + "def _get_env_from_colab_or_os(key):\n", + " try:\n", + " from google.colab import userdata\n", + "\n", + " try:\n", + " return userdata.get(key)\n", + " except userdata.SecretNotFoundError:\n", + " pass\n", + " except ImportError:\n", + " pass\n", + " return os.getenv(key)\n", + "\n", + "\n", + "load_dotenv()\n", + "\n", + "filterwarnings(action=\"ignore\", category=UserWarning, module=\"pydantic\")\n", + "filterwarnings(action=\"ignore\", category=FutureWarning, module=\"easyocr\")\n", + "# https://github.com/huggingface/transformers/issues/5486:\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can now define the main parameters:" ] }, { @@ -58,250 +111,61 @@ "metadata": {}, "outputs": [], "source": [ - "import warnings\n", + "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n", + "from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI\n", "\n", - "warnings.filterwarnings(action=\"ignore\", category=UserWarning, module=\"pydantic|torch\")\n", - "warnings.filterwarnings(action=\"ignore\", category=FutureWarning, module=\"easyocr\")" + "EMBED_MODEL = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")\n", + "MILVUS_URI = str(Path(mkdtemp()) / \"docling.db\")\n", + "GEN_MODEL = HuggingFaceInferenceAPI(\n", + " token=_get_env_from_colab_or_os(\"HF_TOKEN\"),\n", + " model_name=\"mistralai/Mixtral-8x7B-Instruct-v0.1\",\n", + ")\n", + "SOURCE = \"https://arxiv.org/pdf/2408.09869\" # Docling Technical Report\n", + "QUERY = \"Which are the main AI models in Docling?\"\n", + "\n", + "embed_dim = len(EMBED_MODEL.get_text_embedding(\"hi\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Setup" + "## Using Markdown export" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Reader and node parser" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Below we set up:\n", - "- a `Reader` which will be used to create LlamaIndex documents, and\n", - "- a `NodeParser`, which will be used to create LlamaIndex nodes out of the documents" + "To create a simple RAG pipeline, we can:\n", + "- define a `DoclingReader`, which by default exports to Markdown, and\n", + "- use a standard node parser for these Markdown-based docs, e.g. a `MarkdownNodeParser`" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [], - "source": [ - "from enum import Enum\n", - "from typing import Iterable\n", - "\n", - "from llama_index.core.readers.base import BasePydanticReader\n", - "from llama_index.core.schema import Document as LIDocument\n", - "from pydantic import BaseModel\n", - "\n", - "from docling.document_converter import DocumentConverter\n", - "\n", - "\n", - "class DocumentMetadata(BaseModel):\n", - " dl_doc_hash: str\n", - "\n", - "\n", - "class DoclingPDFReader(BasePydanticReader):\n", - " class ParseType(str, Enum):\n", - " MARKDOWN = \"markdown\"\n", - " # JSON = \"json\"\n", - "\n", - " parse_type: ParseType = ParseType.MARKDOWN\n", - "\n", - " def lazy_load_data(self, file_path: str | list[str]) -> Iterable[LIDocument]:\n", - " file_paths = file_path if isinstance(file_path, list) else [file_path]\n", - " converter = DocumentConverter()\n", - " for source in file_paths:\n", - " dl_doc = converter.convert_single(source).output\n", - " match self.parse_type:\n", - " case self.ParseType.MARKDOWN:\n", - " text = dl_doc.export_to_markdown()\n", - " # case self.ParseType.JSON:\n", - " # text = dl_doc.model_dump_json()\n", - " case _:\n", - " raise RuntimeError(\n", - " f\"Unexpected parse type encountered: {self.parse_type}\"\n", - " )\n", - " excl_metadata_keys = [\"dl_doc_hash\"]\n", - " li_doc = LIDocument(\n", - " doc_id=dl_doc.file_info.document_hash,\n", - " text=text,\n", - " excluded_embed_metadata_keys=excl_metadata_keys,\n", - " excluded_llm_metadata_keys=excl_metadata_keys,\n", - " )\n", - " li_doc.metadata = DocumentMetadata(\n", - " dl_doc_hash=dl_doc.file_info.document_hash,\n", - " ).model_dump()\n", - " yield li_doc" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.core.node_parser import MarkdownNodeParser\n", - "\n", - "reader = DoclingPDFReader(parse_type=DoclingPDFReader.ParseType.MARKDOWN)\n", - "node_parser = MarkdownNodeParser()\n", - "transformations = [node_parser]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "One can include add more transformations, e.g. further chunking based on text size / overlap, as shown below:" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# from llama_index.core.node_parser import TokenTextSplitter\n", - "\n", - "# splitter = TokenTextSplitter(\n", - "# chunk_size=1024,\n", - "# chunk_overlap=20,\n", - "# )\n", - "# transformations.append(splitter)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Embed model" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.embeddings.huggingface import HuggingFaceEmbedding\n", - "\n", - "embed_model = HuggingFaceEmbedding(model_name=\"BAAI/bge-small-en-v1.5\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Vector store" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "INGEST = True # whether to ingest from scratch or reuse an existing vector store" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, "outputs": [ { - "name": "stderr", + "name": "stdout", "output_type": "stream", "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" + "Q: Which are the main AI models in Docling?\n", + "A: 1. A layout analysis model, an accurate object-detector for page elements. 2. TableFormer, a state-of-the-art table structure recognition model.\n", + "\n", + "Sources:\n" ] - } - ], - "source": [ - "from llama_index.vector_stores.milvus import MilvusVectorStore\n", - "\n", - "MILVUS_URL = os.environ.get(\n", - " \"MILVUS_URL\", f\"{(tmp_dir := TemporaryDirectory()).name}/milvus_demo.db\"\n", - ")\n", - "MILVUS_COLL_NAME = os.environ.get(\"MILVUS_COLL_NAME\", \"basic_llamaindex_pipeline\")\n", - "MILVUS_KWARGS = TypeAdapter(dict).validate_json(os.environ.get(\"MILVUS_KWARGS\", \"{}\"))\n", - "vector_store = MilvusVectorStore(\n", - " uri=MILVUS_URL,\n", - " collection_name=MILVUS_COLL_NAME,\n", - " dim=len(embed_model.get_text_embedding(\"hi\")),\n", - " overwrite=INGEST,\n", - " **MILVUS_KWARGS,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "536daee038de4d52a793445c6d853c72", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Fetching 7 files: 0%| | 0/7 [00:00[\n", - "│ Document(\n", - "│ │ id_='5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c84663'+14,\n", - "│ │ embedding=None,\n", - "│ │ metadata={'dl_doc_hash': '5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c84663'+14},\n", - "│ │ excluded_embed_metadata_keys=['dl_doc_hash'],\n", - "│ │ excluded_llm_metadata_keys=['dl_doc_hash'],\n", - "│ │ relationships={},\n", - "│ │ text='## DocLayNet: A Large Human-Annotated Dataset for '+50593,\n", - "│ │ mimetype='text/plain',\n", - "│ │ start_char_idx=None,\n", - "│ │ end_char_idx=None,\n", - "│ │ text_template='{metadata_str}\\n\\n{content}',\n", - "│ │ metadata_template='{key}: {value}',\n", - "│ │ metadata_seperator='\\n'\n", - "│ )\n", - "]\n", - "\n" - ], "text/plain": [ - "\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1;35mDocument\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mid_\u001b[0m=\u001b[32m'5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c84663'\u001b[0m+\u001b[1;36m14\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33membedding\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[32m'dl_doc_hash'\u001b[0m: \u001b[32m'5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c84663'\u001b[0m+\u001b[1;36m14\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_embed_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'dl_doc_hash'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mexcluded_llm_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[32m'dl_doc_hash'\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mrelationships\u001b[0m=\u001b[1m{\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext\u001b[0m=\u001b[32m'## DocLayNet: A Large Human-Annotated Dataset for '\u001b[0m+\u001b[1;36m50593\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmimetype\u001b[0m=\u001b[32m'text/plain'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mstart_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mend_char_idx\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mtext_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mmetadata_str\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\n\\n\u001b[0m\u001b[32m{\u001b[0m\u001b[32mcontent\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mkey\u001b[0m\u001b[32m}\u001b[0m\u001b[32m: \u001b[0m\u001b[32m{\u001b[0m\u001b[32mvalue\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33mmetadata_seperator\u001b[0m=\u001b[32m'\\n'\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m)\u001b[0m\n", - "\u001b[1m]\u001b[0m\n" + "[('3.2 AI models\\n\\nAs part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.',\n", + " {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n", + " 'Header_2': '3.2 AI models'}),\n", + " (\"5 Applications\\n\\nThanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets.\",\n", + " {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n", + " 'Header_2': '5 Applications'})]" ] }, "metadata": {}, @@ -310,131 +174,83 @@ ], "source": [ "from llama_index.core import StorageContext, VectorStoreIndex\n", + "from llama_index.core.node_parser import MarkdownNodeParser\n", + "from llama_index.readers.docling import DoclingReader\n", + "from llama_index.vector_stores.milvus import MilvusVectorStore\n", "\n", - "if INGEST:\n", - " # in this case we ingest the data into the vector store\n", - " docs = reader.load_data(\n", - " file_path=\"https://arxiv.org/pdf/2206.01062\", # DocLayNet paper\n", - " )\n", - " pprint(docs, max_length=1, max_string=50, max_depth=4)\n", - " storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", - " index = VectorStoreIndex.from_documents(\n", - " documents=docs,\n", - " embed_model=embed_model,\n", - " storage_context=storage_context,\n", - " transformations=transformations,\n", - " )\n", - "else:\n", - " # in this case we just load the vector store index\n", - " index = VectorStoreIndex.from_vector_store(\n", - " vector_store=vector_store,\n", - " embed_model=embed_model,\n", - " )" + "reader = DoclingReader()\n", + "node_parser = MarkdownNodeParser()\n", + "\n", + "vector_store = MilvusVectorStore(\n", + " uri=str(Path(mkdtemp()) / \"docling.db\"), # or set as needed\n", + " dim=embed_dim,\n", + " overwrite=True,\n", + ")\n", + "index = VectorStoreIndex.from_documents(\n", + " documents=reader.load_data(SOURCE),\n", + " transformations=[node_parser],\n", + " storage_context=StorageContext.from_defaults(vector_store=vector_store),\n", + " embed_model=EMBED_MODEL,\n", + ")\n", + "result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)\n", + "print(f\"Q: {QUERY}\\nA: {result.response.strip()}\\n\\nSources:\")\n", + "display([(n.text, n.metadata) for n in result.source_nodes])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### LLM" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI\n", - "\n", - "HF_API_KEY = os.environ.get(\"HF_API_KEY\")\n", - "\n", - "llm = HuggingFaceInferenceAPI(\n", - " token=HF_API_KEY,\n", - " model_name=\"mistralai/Mistral-7B-Instruct-v0.3\",\n", - ")" + "## Using Docling format" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## RAG" + "To leverage Docling's rich native format, we:\n", + "- create a `DoclingReader` with JSON export type, and\n", + "- employ a `DoclingNodeParser` in order to appropriately parse that Docling format.\n", + "\n", + "Notice how the sources now also contain document-level grounding (e.g. page number or bounding box information):" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Q: Which are the main AI models in Docling?\n", + "A: The main AI models in Docling are a layout analysis model and TableFormer. The layout analysis model is an accurate object-detector for page elements, and TableFormer is a state-of-the-art table structure recognition model.\n", + "\n", + "Sources:\n" + ] + }, { "data": { - "text/html": [ - "
Response(\n",
-       "│   response='80863 pages were human annotated.',\n",
-       "│   source_nodes=[\n",
-       "│   │   NodeWithScore(\n",
-       "│   │   │   node=TextNode(\n",
-       "│   │   │   │   id_='8874a117-d181-4f4f-a30b-0b5604370d77',\n",
-       "│   │   │   │   embedding=None,\n",
-       "│   │   │   │   metadata={...},\n",
-       "│   │   │   │   excluded_embed_metadata_keys=[...],\n",
-       "│   │   │   │   excluded_llm_metadata_keys=[...],\n",
-       "│   │   │   │   relationships={...},\n",
-       "│   │   │   │   text='3 THE DOCLAYNET DATASET\\n\\nDocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and 1591 carry three. This amounts to 91104 total annotation instances. The annotations provide layout information in the shape o'+5775,\n",
-       "│   │   │   │   mimetype='text/plain',\n",
-       "│   │   │   │   start_char_idx=9089,\n",
-       "│   │   │   │   end_char_idx=15114,\n",
-       "│   │   │   │   text_template='{metadata_str}\\n\\n{content}',\n",
-       "│   │   │   │   metadata_template='{key}: {value}',\n",
-       "│   │   │   │   metadata_seperator='\\n'\n",
-       "│   │   │   ),\n",
-       "│   │   │   score=0.7367570400238037\n",
-       "│   │   ),\n",
-       "│   │   ... +1\n",
-       "│   ],\n",
-       "│   metadata={\n",
-       "│   │   '8874a117-d181-4f4f-a30b-0b5604370d77': {\n",
-       "│   │   │   'dl_doc_hash': '5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c846634ff924e635e0dc',\n",
-       "│   │   │   ... +1\n",
-       "│   │   },\n",
-       "│   │   ... +1\n",
-       "│   }\n",
-       ")\n",
-       "
\n" - ], "text/plain": [ - "\u001b[1;35mResponse\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mresponse\u001b[0m=\u001b[32m'80863 pages were human annotated.'\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33msource_nodes\u001b[0m=\u001b[1m[\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1;35mNodeWithScore\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mnode\u001b[0m=\u001b[1;35mTextNode\u001b[0m\u001b[1m(\u001b[0m\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mid_\u001b[0m=\u001b[32m'8874a117-d181-4f4f-a30b-0b5604370d77'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33membedding\u001b[0m=\u001b[3;35mNone\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\u001b[33m...\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mexcluded_embed_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[33m...\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mexcluded_llm_metadata_keys\u001b[0m=\u001b[1m[\u001b[0m\u001b[33m...\u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mrelationships\u001b[0m=\u001b[1m{\u001b[0m\u001b[33m...\u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mtext\u001b[0m=\u001b[32m'3 THE DOCLAYNET DATASET\\n\\nDocLayNet contains 80863 PDF pages. Among these, 7059 carry two instances of human annotations, and 1591 carry three. This amounts to 91104 total annotation instances. The annotations provide layout information in the shape o'\u001b[0m+\u001b[1;36m5775\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mmimetype\u001b[0m=\u001b[32m'text/plain'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mstart_char_idx\u001b[0m=\u001b[1;36m9089\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mend_char_idx\u001b[0m=\u001b[1;36m15114\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mtext_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mmetadata_str\u001b[0m\u001b[32m}\u001b[0m\u001b[32m\\n\\n\u001b[0m\u001b[32m{\u001b[0m\u001b[32mcontent\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mmetadata_template\u001b[0m=\u001b[32m'\u001b[0m\u001b[32m{\u001b[0m\u001b[32mkey\u001b[0m\u001b[32m}\u001b[0m\u001b[32m: \u001b[0m\u001b[32m{\u001b[0m\u001b[32mvalue\u001b[0m\u001b[32m}\u001b[0m\u001b[32m'\u001b[0m,\n", - "\u001b[2;32m│ │ │ │ \u001b[0m\u001b[33mmetadata_seperator\u001b[0m=\u001b[32m'\\n'\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33mscore\u001b[0m=\u001b[1;36m0\u001b[0m\u001b[1;36m.7367570400238037\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m)\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33m...\u001b[0m +\u001b[1;36m1\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m]\u001b[0m,\n", - "\u001b[2;32m│ \u001b[0m\u001b[33mmetadata\u001b[0m=\u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[32m'8874a117-d181-4f4f-a30b-0b5604370d77'\u001b[0m: \u001b[1m{\u001b[0m\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[32m'dl_doc_hash'\u001b[0m: \u001b[32m'5dfbd8c115a15fd3396b68409124cfee29fc8efac7b5c846634ff924e635e0dc'\u001b[0m,\n", - "\u001b[2;32m│ │ │ \u001b[0m\u001b[33m...\u001b[0m +\u001b[1;36m1\u001b[0m\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[1m}\u001b[0m,\n", - "\u001b[2;32m│ │ \u001b[0m\u001b[33m...\u001b[0m +\u001b[1;36m1\u001b[0m\n", - "\u001b[2;32m│ \u001b[0m\u001b[1m}\u001b[0m\n", - "\u001b[1m)\u001b[0m\n" + "[('As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.',\n", + " {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n", + " 'path': '#/main-text/37',\n", + " 'heading': '3.2 AI models',\n", + " 'page': 3,\n", + " 'bbox': [107.36903381347656,\n", + " 330.07513427734375,\n", + " 506.29705810546875,\n", + " 407.3725280761719]}),\n", + " ('With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models.',\n", + " {'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n", + " 'path': '#/main-text/10',\n", + " 'heading': '1 Introduction',\n", + " 'page': 1,\n", + " 'bbox': [107.33261108398438,\n", + " 83.3067626953125,\n", + " 504.0033874511719,\n", + " 136.45367431640625]})]" ] }, "metadata": {}, @@ -442,9 +258,148 @@ } ], "source": [ - "query_engine = index.as_query_engine(llm=llm)\n", - "query_res = query_engine.query(\"How many pages were human annotated?\")\n", - "pprint(query_res, max_length=1, max_string=250, max_depth=4)" + "from llama_index.node_parser.docling import DoclingNodeParser\n", + "\n", + "reader = DoclingReader(export_type=DoclingReader.ExportType.JSON)\n", + "node_parser = DoclingNodeParser()\n", + "\n", + "vector_store = MilvusVectorStore(\n", + " uri=str(Path(mkdtemp()) / \"docling.db\"), # or set as needed\n", + " dim=embed_dim,\n", + " overwrite=True,\n", + ")\n", + "index = VectorStoreIndex.from_documents(\n", + " documents=reader.load_data(SOURCE),\n", + " transformations=[node_parser],\n", + " storage_context=StorageContext.from_defaults(vector_store=vector_store),\n", + " embed_model=EMBED_MODEL,\n", + ")\n", + "result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)\n", + "print(f\"Q: {QUERY}\\nA: {result.response.strip()}\\n\\nSources:\")\n", + "display([(n.text, n.metadata) for n in result.source_nodes])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## With Simple Directory Reader" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To demonstrate this usage pattern, we first set up a test document directory." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from tempfile import mkdtemp\n", + "\n", + "import requests\n", + "\n", + "tmp_dir_path = Path(mkdtemp())\n", + "r = requests.get(SOURCE)\n", + "with open(tmp_dir_path / f\"{Path(SOURCE).name}.pdf\", \"wb\") as out_file:\n", + " out_file.write(r.content)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the `reader` and `node_parser` definitions from any of the above variants, usage with `SimpleDirectoryReader` then looks as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Loading files: 100%|██████████| 1/1 [00:11<00:00, 11.15s/file]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Q: Which are the main AI models in Docling?\n", + "A: The main AI models in Docling are a layout analysis model and TableFormer. The layout analysis model is an accurate object-detector for page elements, and TableFormer is a state-of-the-art table structure recognition model.\n", + "\n", + "Sources:\n" + ] + }, + { + "data": { + "text/plain": [ + "[('As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.',\n", + " {'file_path': '/var/folders/76/4wwfs06x6835kcwj4186c0nc0000gn/T/tmp4vsev3_r/2408.09869.pdf',\n", + " 'file_name': '2408.09869.pdf',\n", + " 'file_type': 'application/pdf',\n", + " 'file_size': 5566574,\n", + " 'creation_date': '2024-10-09',\n", + " 'last_modified_date': '2024-10-09',\n", + " 'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n", + " 'path': '#/main-text/37',\n", + " 'heading': '3.2 AI models',\n", + " 'page': 3,\n", + " 'bbox': [107.36903381347656,\n", + " 330.07513427734375,\n", + " 506.29705810546875,\n", + " 407.3725280761719]}),\n", + " ('With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models.',\n", + " {'file_path': '/var/folders/76/4wwfs06x6835kcwj4186c0nc0000gn/T/tmp4vsev3_r/2408.09869.pdf',\n", + " 'file_name': '2408.09869.pdf',\n", + " 'file_type': 'application/pdf',\n", + " 'file_size': 5566574,\n", + " 'creation_date': '2024-10-09',\n", + " 'last_modified_date': '2024-10-09',\n", + " 'dl_doc_hash': '556ad9e23b6d2245e36b3208758cf0c8a709382bb4c859eacfe8e73b14e635aa',\n", + " 'path': '#/main-text/10',\n", + " 'heading': '1 Introduction',\n", + " 'page': 1,\n", + " 'bbox': [107.33261108398438,\n", + " 83.3067626953125,\n", + " 504.0033874511719,\n", + " 136.45367431640625]})]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from llama_index.core import SimpleDirectoryReader\n", + "\n", + "dir_reader = SimpleDirectoryReader(\n", + " input_dir=tmp_dir_path,\n", + " file_extractor={\".pdf\": reader},\n", + ")\n", + "\n", + "vector_store = MilvusVectorStore(\n", + " uri=str(Path(mkdtemp()) / \"docling.db\"), # or set as needed\n", + " dim=embed_dim,\n", + " overwrite=True,\n", + ")\n", + "index = VectorStoreIndex.from_documents(\n", + " documents=dir_reader.load_data(SOURCE),\n", + " transformations=[node_parser],\n", + " storage_context=StorageContext.from_defaults(vector_store=vector_store),\n", + " embed_model=EMBED_MODEL,\n", + ")\n", + "result = index.as_query_engine(llm=GEN_MODEL).query(QUERY)\n", + "print(f\"Q: {QUERY}\\nA: {result.response.strip()}\\n\\nSources:\")\n", + "display([(n.text, n.metadata) for n in result.source_nodes])" ] }, { diff --git a/poetry.lock b/poetry.lock index 06720be9..0e3f4319 100644 --- a/poetry.lock +++ b/poetry.lock @@ -278,6 +278,27 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["jaraco.test", "pytest (!=8.0.*)", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)"] +[[package]] +name = "beautifulsoup4" +version = "4.12.3" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"}, + {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +cchardet = ["cchardet"] +chardet = ["chardet"] +charset-normalizer = ["charset-normalizer"] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "black" version = "24.8.0" @@ -947,20 +968,20 @@ files = [ [[package]] name = "docling-core" -version = "1.6.2" +version = "1.7.2" description = "A python library to define and validate data types in Docling." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "docling_core-1.6.2-py3-none-any.whl", hash = "sha256:1473ab13910d76552015c10fe351b90079a00c225f76ada3cd4fc7442183ffd0"}, - {file = "docling_core-1.6.2.tar.gz", hash = "sha256:63f2b8a683dec56568ee1cd7d25cea419c0291211a88a11f74079ff2d62ccd5e"}, + {file = "docling_core-1.7.2-py3-none-any.whl", hash = "sha256:29be527a30440e060a444ee8806788f045d1e899b2cf51d8c99a2cdebb6536fa"}, + {file = "docling_core-1.7.2.tar.gz", hash = "sha256:3af1bb04a47c0cdf448e02c4390d2fad3793e8f75731d68059f137332638ac39"}, ] [package.dependencies] json-schema-for-humans = ">=1.0.0,<2.0.0" jsonref = ">=1.1.0,<2.0.0" jsonschema = ">=4.16.0,<5.0.0" -pandas = ">=2.2.2,<3.0.0" +pandas = ">=2.1.4,<3.0.0" pydantic = ">=2.6.0,<3.0.0" tabulate = ">=0.9.0,<0.10.0" @@ -2528,6 +2549,58 @@ files = [ huggingface-hub = ">=0.23.0,<0.24.0" llama-index-core = ">=0.11.0,<0.12.0" +[[package]] +name = "llama-index-node-parser-docling" +version = "0.1.0" +description = "llama-index node_parser docling integration" +optional = false +python-versions = "<4.0,>=3.10" +files = [ + {file = "llama_index_node_parser_docling-0.1.0-py3-none-any.whl", hash = "sha256:b24bec1737b7fdb60ae81b5c92e07c65a0427c265e74f31be1f4716f9cc8bc57"}, + {file = "llama_index_node_parser_docling-0.1.0.tar.gz", hash = "sha256:52b1b18e2980e2626e12629fd06b3c282193d2d53d69df5678e243fa978c8bcd"}, +] + +[package.dependencies] +docling-core = ">=1.7.1,<2.0.0" +llama-index-core = ">=0.11.0,<0.12.0" + +[[package]] +name = "llama-index-readers-docling" +version = "0.1.0" +description = "llama-index readers docling integration" +optional = false +python-versions = "<4.0,>=3.10" +files = [ + {file = "llama_index_readers_docling-0.1.0-py3-none-any.whl", hash = "sha256:a33bc29b5ab5c8d2087268e13243c632f16b61c362133fcb5c32347d3b0ee88a"}, + {file = "llama_index_readers_docling-0.1.0.tar.gz", hash = "sha256:164750d56bda3a251efd35f03126e53b5e387b6fdfbb2fd1b4a4e29917b52b9b"}, +] + +[package.dependencies] +docling = ">=1.16.1,<2.0.0" +docling-core = ">=1.7.1,<2.0.0" +llama-index-core = ">=0.11.0,<0.12.0" + +[[package]] +name = "llama-index-readers-file" +version = "0.2.2" +description = "llama-index readers file integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_readers_file-0.2.2-py3-none-any.whl", hash = "sha256:ffec878771c1e7575afb742887561059bcca77b97a81c1c1be310ebb73f10f46"}, + {file = "llama_index_readers_file-0.2.2.tar.gz", hash = "sha256:48459f90960b863737147b66ed83afec9ce8984f8eda2561b6d2500214365db2"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.12.3,<5.0.0" +llama-index-core = ">=0.11.0,<0.12.0" +pandas = "*" +pypdf = ">=4.0.1,<5.0.0" +striprtf = ">=0.0.26,<0.0.27" + +[package.extras] +pymupdf = ["pymupdf (>=1.23.21,<2.0.0)"] + [[package]] name = "llama-index-vector-stores-milvus" version = "0.2.3" @@ -4361,6 +4434,27 @@ files = [ [package.extras] diagrams = ["jinja2", "railroad-diagrams"] +[[package]] +name = "pypdf" +version = "4.3.1" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypdf-4.3.1-py3-none-any.whl", hash = "sha256:64b31da97eda0771ef22edb1bfecd5deee4b72c3d1736b7df2689805076d6418"}, + {file = "pypdf-4.3.1.tar.gz", hash = "sha256:b2f37fe9a3030aa97ca86067a56ba3f9d3565f9a791b305c7355d8392c30d91b"}, +] + +[package.dependencies] +typing_extensions = {version = ">=4.0", markers = "python_version < \"3.11\""} + +[package.extras] +crypto = ["PyCryptodome", "cryptography"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow (>=8.0.0)", "PyCryptodome", "cryptography"] +image = ["Pillow (>=8.0.0)"] + [[package]] name = "pypdfium2" version = "4.30.0" @@ -5777,6 +5871,17 @@ files = [ {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, ] +[[package]] +name = "soupsieve" +version = "2.6" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.8" +files = [ + {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"}, + {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"}, +] + [[package]] name = "sqlalchemy" version = "2.0.35" @@ -5883,6 +5988,17 @@ pure-eval = "*" [package.extras] tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] +[[package]] +name = "striprtf" +version = "0.0.26" +description = "A simple library to convert rtf to text" +optional = false +python-versions = "*" +files = [ + {file = "striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb"}, + {file = "striprtf-0.0.26.tar.gz", hash = "sha256:fdb2bba7ac440072d1c41eab50d8d74ae88f60a8b6575c6e2c7805dc462093aa"}, +] + [[package]] name = "sympy" version = "1.13.2" @@ -7162,4 +7278,4 @@ tesserocr = ["tesserocr"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "167f6f29b025cdc166dc08b302aabada069786ecc6a68a187702b11a69da3d3e" +content-hash = "e1a03e2789938c446af9648eec9bf3f8544d9c367ee3787644fa7d92179082f0" diff --git a/pyproject.toml b/pyproject.toml index c4f56bf4..6487ae95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ torchvision = [ ###################### python = "^3.10" pydantic = "^2.0.0" -docling-core = "^1.6.2" +docling-core = "^1.7.1" docling-ibm-models = "^2.0.0" deepsearch-glm = "^0.22.0" filetype = "^1.2.0" @@ -76,6 +76,9 @@ nbqa = "^1.9.0" [tool.poetry.group.examples.dependencies] datasets = "^2.21.0" python-dotenv = "^1.0.1" +llama-index-readers-docling = "^0.1.0" +llama-index-node-parser-docling = "^0.1.0" +llama-index-readers-file = "^0.2.2" llama-index-embeddings-huggingface = "^0.3.1" llama-index-llms-huggingface-api = "^0.2.0" llama-index-vector-stores-milvus = "^0.2.1" From dae2a3b66732e1e135b00cce24226c7d9f2eb2e4 Mon Sep 17 00:00:00 2001 From: Nikos Livathinos <100353117+nikos-livathinos@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:21:19 +0200 Subject: [PATCH 6/6] fix: remove stderr from tesseract cli and introduce fuzziness in the text validation of OCR tests (#138) * feat(OCR tests): Introduce fuzziness in the text validation of OCR tests Signed-off-by: Nikos Livathinos * fix(TesseractOcrCliModel): Send the stderr to devnull to avoid poluting the console with messages from tesseract cmd Signed-off-by: Nikos Livathinos --------- Signed-off-by: Nikos Livathinos --- docling/models/tesseract_ocr_cli_model.py | 4 +- tests/test_e2e_ocr_conversion.py | 2 +- tests/verify_utils.py | 61 +++++++++++++++++------ 3 files changed, 50 insertions(+), 17 deletions(-) diff --git a/docling/models/tesseract_ocr_cli_model.py b/docling/models/tesseract_ocr_cli_model.py index c3c19991..052d878e 100644 --- a/docling/models/tesseract_ocr_cli_model.py +++ b/docling/models/tesseract_ocr_cli_model.py @@ -1,7 +1,7 @@ import io import logging import tempfile -from subprocess import PIPE, Popen +from subprocess import DEVNULL, PIPE, Popen from typing import Iterable, Tuple import pandas as pd @@ -81,7 +81,7 @@ class TesseractOcrCliModel(BaseOcrModel): cmd += [ifilename, "stdout", "tsv"] _log.info("command: {}".format(" ".join(cmd))) - proc = Popen(cmd, stdout=PIPE) + proc = Popen(cmd, stdout=PIPE, stderr=DEVNULL) output, _ = proc.communicate() # _log.info(output) diff --git a/tests/test_e2e_ocr_conversion.py b/tests/test_e2e_ocr_conversion.py index 96bc0871..d3a61284 100644 --- a/tests/test_e2e_ocr_conversion.py +++ b/tests/test_e2e_ocr_conversion.py @@ -94,5 +94,5 @@ def test_e2e_conversions(): input_path=pdf_path, doc_result=doc_result, generate=GENERATE, - skip_cells=True, + fuzzy=True, ) diff --git a/tests/verify_utils.py b/tests/verify_utils.py index 082b7c78..fc587de2 100644 --- a/tests/verify_utils.py +++ b/tests/verify_utils.py @@ -11,6 +11,42 @@ from docling.datamodel.base_models import ConversionStatus, Page from docling.datamodel.document import ConversionResult +def levenshtein(str1: str, str2: str) -> int: + + # Ensure str1 is the shorter string to optimize memory usage + if len(str1) > len(str2): + str1, str2 = str2, str1 + + # Previous and current row buffers + previous_row = list(range(len(str2) + 1)) + current_row = [0] * (len(str2) + 1) + + # Compute the Levenshtein distance row by row + for i, c1 in enumerate(str1, start=1): + current_row[0] = i + for j, c2 in enumerate(str2, start=1): + insertions = previous_row[j] + 1 + deletions = current_row[j - 1] + 1 + substitutions = previous_row[j - 1] + (c1 != c2) + current_row[j] = min(insertions, deletions, substitutions) + # Swap rows for the next iteration + previous_row, current_row = current_row, previous_row + + # The result is in the last element of the previous row + return previous_row[-1] + + +def verify_text(gt: str, pred: str, fuzzy: bool, fuzzy_threshold: float = 0.4): + + if len(gt) == 0 or not fuzzy: + assert gt == pred, f"{gt}!={pred}" + else: + dist = levenshtein(gt, pred) + diff = dist / len(gt) + assert diff < fuzzy_threshold, f"{gt}!~{pred}" + return True + + def verify_cells(doc_pred_pages: List[Page], doc_true_pages: List[Page]): assert len(doc_pred_pages) == len( @@ -32,7 +68,6 @@ def verify_cells(doc_pred_pages: List[Page], doc_true_pages: List[Page]): true_text = cell_true_item.text pred_text = cell_pred_item.text - assert true_text == pred_text, f"{true_text}!={pred_text}" true_bbox = cell_true_item.bbox.as_tuple() @@ -69,7 +104,7 @@ def verify_maintext(doc_pred: DsDocument, doc_true: DsDocument): return True -def verify_tables(doc_pred: DsDocument, doc_true: DsDocument): +def verify_tables(doc_pred: DsDocument, doc_true: DsDocument, fuzzy: bool): if doc_true.tables is None: # No tables to check assert doc_pred.tables is None, "not expecting any table on this document" @@ -102,9 +137,7 @@ def verify_tables(doc_pred: DsDocument, doc_true: DsDocument): # print("pred: ", pred_item.data[i][j].text) # print("") - assert ( - true_item.data[i][j].text == pred_item.data[i][j].text - ), "table-cell does not have the same text" + verify_text(true_item.data[i][j].text, pred_item.data[i][j].text, fuzzy) assert ( true_item.data[i][j].obj_type == pred_item.data[i][j].obj_type @@ -121,12 +154,12 @@ def verify_output(doc_pred: DsDocument, doc_true: DsDocument): return True -def verify_md(doc_pred_md, doc_true_md): - return doc_pred_md == doc_true_md +def verify_md(doc_pred_md: str, doc_true_md: str, fuzzy: bool): + return verify_text(doc_true_md, doc_pred_md, fuzzy) -def verify_dt(doc_pred_dt, doc_true_dt): - return doc_pred_dt == doc_true_dt +def verify_dt(doc_pred_dt: str, doc_true_dt: str, fuzzy: bool): + return verify_text(doc_true_dt, doc_pred_dt, fuzzy) def verify_conversion_result( @@ -134,7 +167,7 @@ def verify_conversion_result( doc_result: ConversionResult, generate: bool = False, ocr_engine: str = None, - skip_cells: bool = False, + fuzzy: bool = False, ): PageList = TypeAdapter(List[Page]) @@ -178,7 +211,7 @@ def verify_conversion_result( with open(dt_path, "r") as fr: doc_true_dt = fr.read() - if not skip_cells: + if not fuzzy: assert verify_cells( doc_pred_pages, doc_true_pages ), f"Mismatch in PDF cell prediction for {input_path}" @@ -188,13 +221,13 @@ def verify_conversion_result( # ), f"Mismatch in JSON prediction for {input_path}" assert verify_tables( - doc_pred, doc_true + doc_pred, doc_true, fuzzy ), f"verify_tables(doc_pred, doc_true) mismatch for {input_path}" assert verify_md( - doc_pred_md, doc_true_md + doc_pred_md, doc_true_md, fuzzy ), f"Mismatch in Markdown prediction for {input_path}" assert verify_dt( - doc_pred_dt, doc_true_dt + doc_pred_dt, doc_true_dt, fuzzy ), f"Mismatch in DocTags prediction for {input_path}"