mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-30 14:04:27 +00:00
satisfying mypy and other checks
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
c97acfc8e0
commit
5c62f88175
@ -1,42 +1,28 @@
|
|||||||
import itertools
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
import warnings
|
import warnings
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
# from io import BytesIO
|
# from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import List, Optional, Union, cast
|
||||||
|
|
||||||
from docling_core.types import DoclingDocument
|
# from docling_core.types import DoclingDocument
|
||||||
from docling_core.types.doc.document import DocTagsDocument
|
from docling_core.types.doc import ( # DocItemLabel,; DoclingDocument,; GroupLabel,; ImageRefMode,; ProvenanceItem,; Size,; TableCell,; TableData,; TableItem,
|
||||||
from docling_core.types.doc import (
|
|
||||||
BoundingBox,
|
BoundingBox,
|
||||||
DocItem,
|
DocItem,
|
||||||
DocItemLabel,
|
|
||||||
DoclingDocument,
|
|
||||||
GroupLabel,
|
|
||||||
ImageRef,
|
ImageRef,
|
||||||
ImageRefMode,
|
|
||||||
PictureItem,
|
PictureItem,
|
||||||
ProvenanceItem,
|
|
||||||
Size,
|
|
||||||
TableCell,
|
|
||||||
TableData,
|
|
||||||
TableItem,
|
|
||||||
)
|
)
|
||||||
|
from docling_core.types.doc.document import DocTagsDocument
|
||||||
from docling_core.types.doc.tokens import DocumentToken, TableToken
|
from docling_core.types.doc.tokens import DocumentToken, TableToken
|
||||||
|
from PIL import Image as PILImage
|
||||||
|
|
||||||
from docling.backend.abstract_backend import AbstractDocumentBackend
|
from docling.backend.abstract_backend import AbstractDocumentBackend
|
||||||
from docling.backend.md_backend import MarkdownDocumentBackend
|
from docling.backend.md_backend import MarkdownDocumentBackend
|
||||||
from docling.backend.pdf_backend import PdfDocumentBackend
|
from docling.backend.pdf_backend import PdfDocumentBackend
|
||||||
from docling.datamodel.base_models import InputFormat, Page
|
from docling.datamodel.base_models import InputFormat, Page
|
||||||
from docling.datamodel.document import ConversionResult, InputDocument
|
from docling.datamodel.document import ConversionResult, InputDocument
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import ResponseFormat, VlmPipelineOptions
|
||||||
PdfPipelineOptions,
|
|
||||||
ResponseFormat,
|
|
||||||
VlmPipelineOptions,
|
|
||||||
)
|
|
||||||
from docling.datamodel.settings import settings
|
from docling.datamodel.settings import settings
|
||||||
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
from docling.models.hf_vlm_model import HuggingFaceVlmModel
|
||||||
from docling.pipeline.base_pipeline import PaginatedPipeline
|
from docling.pipeline.base_pipeline import PaginatedPipeline
|
||||||
@ -112,12 +98,19 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
image_list = []
|
image_list = []
|
||||||
for page in conv_res.pages:
|
for page in conv_res.pages:
|
||||||
predicted_doctags = ""
|
predicted_doctags = ""
|
||||||
|
img = PILImage.new("RGB", (1, 1), "rgb(255,255,255)")
|
||||||
if page.predictions.vlm_response:
|
if page.predictions.vlm_response:
|
||||||
predicted_doctags = page.predictions.vlm_response.text
|
predicted_doctags = page.predictions.vlm_response.text
|
||||||
image_list.append(page.image)
|
if page.image:
|
||||||
|
img = page.image
|
||||||
|
image_list.append(img)
|
||||||
doctags_list.append(predicted_doctags)
|
doctags_list.append(predicted_doctags)
|
||||||
|
|
||||||
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(doctags_list, image_list)
|
doctags_list_c = cast(List[Union[Path, str]], doctags_list)
|
||||||
|
image_list_c = cast(List[Union[Path, PILImage.Image]], image_list)
|
||||||
|
doctags_doc = DocTagsDocument.from_doctags_and_image_pairs(
|
||||||
|
doctags_list_c, image_list_c
|
||||||
|
)
|
||||||
conv_res.document.load_from_doctags(doctags_doc)
|
conv_res.document.load_from_doctags(doctags_doc)
|
||||||
# USE THIS TO FORCE BACKEND TEXT
|
# USE THIS TO FORCE BACKEND TEXT
|
||||||
# if self.force_backend_text:
|
# if self.force_backend_text:
|
||||||
|
Loading…
Reference in New Issue
Block a user