mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Fix generate_multimodal_pages
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
3f0b01702b
commit
57de8ad63a
@ -1,8 +1,8 @@
|
||||
import logging
|
||||
from typing import Any, Dict, Iterable, List, Tuple, Union
|
||||
|
||||
from docling_core.types.doc.base import BaseCell, BaseText, BoundingBox, Ref, Table
|
||||
from docling_core.types.experimental import CoordOrigin
|
||||
from docling_core.types.doc.base import BaseCell, BaseText, Ref, Table
|
||||
from docling_core.types.experimental import BoundingBox, CoordOrigin
|
||||
|
||||
from docling.datamodel.base_models import OcrCell
|
||||
from docling.datamodel.document import ConversionResult, Page
|
||||
|
@ -9,6 +9,7 @@ from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
from docling.utils.export import generate_multimodal_pages
|
||||
from docling.utils.utils import create_hash
|
||||
|
||||
_log = logging.getLogger(__name__)
|
||||
|
||||
@ -56,7 +57,9 @@ def main():
|
||||
{
|
||||
"document": conv_res.input.file.name,
|
||||
"hash": conv_res.input.document_hash,
|
||||
"page_hash": page.page_hash,
|
||||
"page_hash": create_hash(
|
||||
conv_res.input.document_hash + ":" + str(page.page_no - 1)
|
||||
),
|
||||
"image": {
|
||||
"width": page.image.width,
|
||||
"height": page.image.height,
|
||||
|
Loading…
Reference in New Issue
Block a user