Fix generate_multimodal_pages

Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
Christoph Auer 2024-10-14 16:52:58 +02:00
parent 3f0b01702b
commit 57de8ad63a
2 changed files with 6 additions and 3 deletions

View File

@ -1,8 +1,8 @@
import logging import logging
from typing import Any, Dict, Iterable, List, Tuple, Union from typing import Any, Dict, Iterable, List, Tuple, Union
from docling_core.types.doc.base import BaseCell, BaseText, BoundingBox, Ref, Table from docling_core.types.doc.base import BaseCell, BaseText, Ref, Table
from docling_core.types.experimental import CoordOrigin from docling_core.types.experimental import BoundingBox, CoordOrigin
from docling.datamodel.base_models import OcrCell from docling.datamodel.base_models import OcrCell
from docling.datamodel.document import ConversionResult, Page from docling.datamodel.document import ConversionResult, Page

View File

@ -9,6 +9,7 @@ from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.utils.export import generate_multimodal_pages from docling.utils.export import generate_multimodal_pages
from docling.utils.utils import create_hash
_log = logging.getLogger(__name__) _log = logging.getLogger(__name__)
@ -56,7 +57,9 @@ def main():
{ {
"document": conv_res.input.file.name, "document": conv_res.input.file.name,
"hash": conv_res.input.document_hash, "hash": conv_res.input.document_hash,
"page_hash": page.page_hash, "page_hash": create_hash(
conv_res.input.document_hash + ":" + str(page.page_no - 1)
),
"image": { "image": {
"width": page.image.width, "width": page.image.width,
"height": page.image.height, "height": page.image.height,