mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
fix: Linting, formatting, and bug fixes
The one bug fix was in the timeout arg to openai_image_request. Otherwise, this is all style changes to get MyPy and black passing cleanly. Branch: OllamaVlmModel Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
parent
7b7a3a2004
commit
f14c1b4f05
@ -537,7 +537,9 @@ def convert(
|
||||
if vlm_model == VlmModelType.GRANITE_VISION:
|
||||
pipeline_options.vlm_options = granite_vision_vlm_conversion_options
|
||||
elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
|
||||
pipeline_options.vlm_options = granite_vision_vlm_ollama_conversion_options
|
||||
pipeline_options.vlm_options = (
|
||||
granite_vision_vlm_ollama_conversion_options
|
||||
)
|
||||
elif vlm_model == VlmModelType.SMOLDOCLING:
|
||||
pipeline_options.vlm_options = smoldocling_vlm_conversion_options
|
||||
if sys.platform == "darwin":
|
||||
|
@ -266,6 +266,7 @@ class Page(BaseModel):
|
||||
|
||||
## OpenAI API Request / Response Models ##
|
||||
|
||||
|
||||
class OpenAiChatMessage(BaseModel):
|
||||
role: str
|
||||
content: str
|
||||
|
@ -290,7 +290,7 @@ class OpenAiVlmOptions(BaseVlmOptions):
|
||||
|
||||
model_id: str
|
||||
base_url: str = "http://localhost:11434/v1" # Default to ollama
|
||||
apikey: str | None = None,
|
||||
apikey: Optional[str] = None
|
||||
scale: float = 2.0
|
||||
timeout: float = 60
|
||||
response_format: ResponseFormat
|
||||
@ -322,8 +322,8 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
|
||||
granite_vision_vlm_ollama_conversion_options = OpenAiVlmOptions(
|
||||
model_id="granite3.2-vision:2b",
|
||||
prompt="OCR the full page to markdown.",
|
||||
scale = 1.0,
|
||||
timeout = 120,
|
||||
scale=1.0,
|
||||
timeout=120,
|
||||
response_format=ResponseFormat.MARKDOWN,
|
||||
)
|
||||
|
||||
@ -383,7 +383,9 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
|
||||
False # (To be used with vlms, or other generative models)
|
||||
)
|
||||
# If True, text from backend will be used instead of generated text
|
||||
vlm_options: Union[HuggingFaceVlmOptions] = smoldocling_vlm_conversion_options
|
||||
vlm_options: Union[HuggingFaceVlmOptions, OpenAiVlmOptions] = (
|
||||
smoldocling_vlm_conversion_options
|
||||
)
|
||||
|
||||
|
||||
class PdfPipelineOptions(PaginatedPipelineOptions):
|
||||
|
@ -18,11 +18,15 @@ class OpenAiVlmModel(BasePageModel):
|
||||
self.enabled = enabled
|
||||
self.vlm_options = vlm_options
|
||||
if self.enabled:
|
||||
self.url = "/".join([self.vlm_options.base_url.rstrip("/"), "chat/completions"])
|
||||
self.url = "/".join(
|
||||
[self.vlm_options.base_url.rstrip("/"), "chat/completions"]
|
||||
)
|
||||
self.apikey = self.vlm_options.apikey
|
||||
self.model_id = self.vlm_options.model_id
|
||||
self.timeout = self.vlm_options.timeout
|
||||
self.prompt_content = f"This is a page from a document.\n{self.vlm_options.prompt}"
|
||||
self.prompt_content = (
|
||||
f"This is a page from a document.\n{self.vlm_options.prompt}"
|
||||
)
|
||||
|
||||
def __call__(
|
||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
||||
@ -36,6 +40,7 @@ class OpenAiVlmModel(BasePageModel):
|
||||
assert page.size is not None
|
||||
|
||||
hi_res_image = page.get_image(scale=self.vlm_options.scale)
|
||||
assert hi_res_image is not None
|
||||
if hi_res_image:
|
||||
if hi_res_image.mode != "RGB":
|
||||
hi_res_image = hi_res_image.convert("RGB")
|
||||
|
@ -52,7 +52,7 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
||||
image=image,
|
||||
prompt=self.options.prompt,
|
||||
url=self.options.url,
|
||||
timeout=self.options.headers,
|
||||
timeout=self.options.timeout,
|
||||
headers=self.options.headers,
|
||||
**self.options.params,
|
||||
)
|
||||
|
@ -64,28 +64,29 @@ class VlmPipeline(PaginatedPipeline):
|
||||
self.build_pipe = [
|
||||
OpenAiVlmModel(
|
||||
enabled=True, # must be always enabled for this pipeline to make sense.
|
||||
vlm_options=self.pipeline_options.vlm_options,
|
||||
vlm_options=cast(
|
||||
OpenAiVlmOptions, self.pipeline_options.vlm_options
|
||||
),
|
||||
),
|
||||
]
|
||||
elif (
|
||||
self.pipeline_options.vlm_options.inference_framework
|
||||
== InferenceFramework.MLX
|
||||
):
|
||||
elif isinstance(self.pipeline_options.vlm_options, HuggingFaceVlmOptions):
|
||||
vlm_options = cast(HuggingFaceVlmOptions, self.pipeline_options.vlm_options)
|
||||
if vlm_options.inference_framework == InferenceFramework.MLX:
|
||||
self.build_pipe = [
|
||||
HuggingFaceMlxModel(
|
||||
enabled=True, # must be always enabled for this pipeline to make sense.
|
||||
artifacts_path=artifacts_path,
|
||||
accelerator_options=pipeline_options.accelerator_options,
|
||||
vlm_options=self.pipeline_options.vlm_options,
|
||||
vlm_options=vlm_options,
|
||||
),
|
||||
]
|
||||
elif isinstance(pipeline_options.vlm_options, HuggingFaceVlmOptions):
|
||||
else:
|
||||
self.build_pipe = [
|
||||
HuggingFaceVlmModel(
|
||||
enabled=True, # must be always enabled for this pipeline to make sense.
|
||||
artifacts_path=artifacts_path,
|
||||
accelerator_options=pipeline_options.accelerator_options,
|
||||
vlm_options=self.pipeline_options.vlm_options,
|
||||
vlm_options=vlm_options,
|
||||
),
|
||||
]
|
||||
|
||||
|
@ -4,10 +4,11 @@ import logging
|
||||
from io import BytesIO
|
||||
from itertools import islice
|
||||
from pathlib import Path
|
||||
from typing import List, Union
|
||||
from typing import Dict, List, Optional, Union
|
||||
|
||||
import requests
|
||||
from PIL import Image
|
||||
from pydantic import AnyUrl
|
||||
from tqdm import tqdm
|
||||
|
||||
from docling.datamodel.base_models import OpenAiApiResponse
|
||||
@ -75,10 +76,12 @@ def download_url_with_progress(url: str, progress: bool = False) -> BytesIO:
|
||||
def openai_image_request(
|
||||
image: Image.Image,
|
||||
prompt: str,
|
||||
url: str = "http://localhost:11434/v1/chat/completions", # Default to ollama
|
||||
apikey: str | None = None,
|
||||
url: Union[
|
||||
AnyUrl, str
|
||||
] = "http://localhost:11434/v1/chat/completions", # Default to ollama
|
||||
apikey: Optional[str] = None,
|
||||
timeout: float = 20,
|
||||
headers: dict[str, str] | None = None,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
**params,
|
||||
) -> str:
|
||||
img_io = BytesIO()
|
||||
@ -90,9 +93,7 @@ def openai_image_request(
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": f"data:image/png;base64,{image_base64}"
|
||||
},
|
||||
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
|
Loading…
Reference in New Issue
Block a user