mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
fix: Linting, formatting, and bug fixes
The one bug fix was in the timeout arg to openai_image_request. Otherwise, this is all style changes to get MyPy and black passing cleanly. Branch: OllamaVlmModel Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
parent
7b7a3a2004
commit
f14c1b4f05
@ -537,7 +537,9 @@ def convert(
|
|||||||
if vlm_model == VlmModelType.GRANITE_VISION:
|
if vlm_model == VlmModelType.GRANITE_VISION:
|
||||||
pipeline_options.vlm_options = granite_vision_vlm_conversion_options
|
pipeline_options.vlm_options = granite_vision_vlm_conversion_options
|
||||||
elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
|
elif vlm_model == VlmModelType.GRANITE_VISION_OLLAMA:
|
||||||
pipeline_options.vlm_options = granite_vision_vlm_ollama_conversion_options
|
pipeline_options.vlm_options = (
|
||||||
|
granite_vision_vlm_ollama_conversion_options
|
||||||
|
)
|
||||||
elif vlm_model == VlmModelType.SMOLDOCLING:
|
elif vlm_model == VlmModelType.SMOLDOCLING:
|
||||||
pipeline_options.vlm_options = smoldocling_vlm_conversion_options
|
pipeline_options.vlm_options = smoldocling_vlm_conversion_options
|
||||||
if sys.platform == "darwin":
|
if sys.platform == "darwin":
|
||||||
|
@ -266,6 +266,7 @@ class Page(BaseModel):
|
|||||||
|
|
||||||
## OpenAI API Request / Response Models ##
|
## OpenAI API Request / Response Models ##
|
||||||
|
|
||||||
|
|
||||||
class OpenAiChatMessage(BaseModel):
|
class OpenAiChatMessage(BaseModel):
|
||||||
role: str
|
role: str
|
||||||
content: str
|
content: str
|
||||||
|
@ -289,8 +289,8 @@ class OpenAiVlmOptions(BaseVlmOptions):
|
|||||||
kind: Literal["openai_model_options"] = "openai_model_options"
|
kind: Literal["openai_model_options"] = "openai_model_options"
|
||||||
|
|
||||||
model_id: str
|
model_id: str
|
||||||
base_url: str = "http://localhost:11434/v1" # Default to ollama
|
base_url: str = "http://localhost:11434/v1" # Default to ollama
|
||||||
apikey: str | None = None,
|
apikey: Optional[str] = None
|
||||||
scale: float = 2.0
|
scale: float = 2.0
|
||||||
timeout: float = 60
|
timeout: float = 60
|
||||||
response_format: ResponseFormat
|
response_format: ResponseFormat
|
||||||
@ -322,8 +322,8 @@ granite_vision_vlm_conversion_options = HuggingFaceVlmOptions(
|
|||||||
granite_vision_vlm_ollama_conversion_options = OpenAiVlmOptions(
|
granite_vision_vlm_ollama_conversion_options = OpenAiVlmOptions(
|
||||||
model_id="granite3.2-vision:2b",
|
model_id="granite3.2-vision:2b",
|
||||||
prompt="OCR the full page to markdown.",
|
prompt="OCR the full page to markdown.",
|
||||||
scale = 1.0,
|
scale=1.0,
|
||||||
timeout = 120,
|
timeout=120,
|
||||||
response_format=ResponseFormat.MARKDOWN,
|
response_format=ResponseFormat.MARKDOWN,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -383,7 +383,9 @@ class VlmPipelineOptions(PaginatedPipelineOptions):
|
|||||||
False # (To be used with vlms, or other generative models)
|
False # (To be used with vlms, or other generative models)
|
||||||
)
|
)
|
||||||
# If True, text from backend will be used instead of generated text
|
# If True, text from backend will be used instead of generated text
|
||||||
vlm_options: Union[HuggingFaceVlmOptions] = smoldocling_vlm_conversion_options
|
vlm_options: Union[HuggingFaceVlmOptions, OpenAiVlmOptions] = (
|
||||||
|
smoldocling_vlm_conversion_options
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class PdfPipelineOptions(PaginatedPipelineOptions):
|
class PdfPipelineOptions(PaginatedPipelineOptions):
|
||||||
|
@ -18,11 +18,15 @@ class OpenAiVlmModel(BasePageModel):
|
|||||||
self.enabled = enabled
|
self.enabled = enabled
|
||||||
self.vlm_options = vlm_options
|
self.vlm_options = vlm_options
|
||||||
if self.enabled:
|
if self.enabled:
|
||||||
self.url = "/".join([self.vlm_options.base_url.rstrip("/"), "chat/completions"])
|
self.url = "/".join(
|
||||||
|
[self.vlm_options.base_url.rstrip("/"), "chat/completions"]
|
||||||
|
)
|
||||||
self.apikey = self.vlm_options.apikey
|
self.apikey = self.vlm_options.apikey
|
||||||
self.model_id = self.vlm_options.model_id
|
self.model_id = self.vlm_options.model_id
|
||||||
self.timeout = self.vlm_options.timeout
|
self.timeout = self.vlm_options.timeout
|
||||||
self.prompt_content = f"This is a page from a document.\n{self.vlm_options.prompt}"
|
self.prompt_content = (
|
||||||
|
f"This is a page from a document.\n{self.vlm_options.prompt}"
|
||||||
|
)
|
||||||
|
|
||||||
def __call__(
|
def __call__(
|
||||||
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
self, conv_res: ConversionResult, page_batch: Iterable[Page]
|
||||||
@ -36,6 +40,7 @@ class OpenAiVlmModel(BasePageModel):
|
|||||||
assert page.size is not None
|
assert page.size is not None
|
||||||
|
|
||||||
hi_res_image = page.get_image(scale=self.vlm_options.scale)
|
hi_res_image = page.get_image(scale=self.vlm_options.scale)
|
||||||
|
assert hi_res_image is not None
|
||||||
if hi_res_image:
|
if hi_res_image:
|
||||||
if hi_res_image.mode != "RGB":
|
if hi_res_image.mode != "RGB":
|
||||||
hi_res_image = hi_res_image.convert("RGB")
|
hi_res_image = hi_res_image.convert("RGB")
|
||||||
|
@ -52,7 +52,7 @@ class PictureDescriptionApiModel(PictureDescriptionBaseModel):
|
|||||||
image=image,
|
image=image,
|
||||||
prompt=self.options.prompt,
|
prompt=self.options.prompt,
|
||||||
url=self.options.url,
|
url=self.options.url,
|
||||||
timeout=self.options.headers,
|
timeout=self.options.timeout,
|
||||||
headers=self.options.headers,
|
headers=self.options.headers,
|
||||||
**self.options.params,
|
**self.options.params,
|
||||||
)
|
)
|
||||||
|
@ -64,30 +64,31 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
self.build_pipe = [
|
self.build_pipe = [
|
||||||
OpenAiVlmModel(
|
OpenAiVlmModel(
|
||||||
enabled=True, # must be always enabled for this pipeline to make sense.
|
enabled=True, # must be always enabled for this pipeline to make sense.
|
||||||
vlm_options=self.pipeline_options.vlm_options,
|
vlm_options=cast(
|
||||||
),
|
OpenAiVlmOptions, self.pipeline_options.vlm_options
|
||||||
]
|
),
|
||||||
elif (
|
|
||||||
self.pipeline_options.vlm_options.inference_framework
|
|
||||||
== InferenceFramework.MLX
|
|
||||||
):
|
|
||||||
self.build_pipe = [
|
|
||||||
HuggingFaceMlxModel(
|
|
||||||
enabled=True, # must be always enabled for this pipeline to make sense.
|
|
||||||
artifacts_path=artifacts_path,
|
|
||||||
accelerator_options=pipeline_options.accelerator_options,
|
|
||||||
vlm_options=self.pipeline_options.vlm_options,
|
|
||||||
),
|
|
||||||
]
|
|
||||||
elif isinstance(pipeline_options.vlm_options, HuggingFaceVlmOptions):
|
|
||||||
self.build_pipe = [
|
|
||||||
HuggingFaceVlmModel(
|
|
||||||
enabled=True, # must be always enabled for this pipeline to make sense.
|
|
||||||
artifacts_path=artifacts_path,
|
|
||||||
accelerator_options=pipeline_options.accelerator_options,
|
|
||||||
vlm_options=self.pipeline_options.vlm_options,
|
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
elif isinstance(self.pipeline_options.vlm_options, HuggingFaceVlmOptions):
|
||||||
|
vlm_options = cast(HuggingFaceVlmOptions, self.pipeline_options.vlm_options)
|
||||||
|
if vlm_options.inference_framework == InferenceFramework.MLX:
|
||||||
|
self.build_pipe = [
|
||||||
|
HuggingFaceMlxModel(
|
||||||
|
enabled=True, # must be always enabled for this pipeline to make sense.
|
||||||
|
artifacts_path=artifacts_path,
|
||||||
|
accelerator_options=pipeline_options.accelerator_options,
|
||||||
|
vlm_options=vlm_options,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
self.build_pipe = [
|
||||||
|
HuggingFaceVlmModel(
|
||||||
|
enabled=True, # must be always enabled for this pipeline to make sense.
|
||||||
|
artifacts_path=artifacts_path,
|
||||||
|
accelerator_options=pipeline_options.accelerator_options,
|
||||||
|
vlm_options=vlm_options,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
self.enrichment_pipe = [
|
self.enrichment_pipe = [
|
||||||
# Other models working on `NodeItem` elements in the DoclingDocument
|
# Other models working on `NodeItem` elements in the DoclingDocument
|
||||||
|
@ -4,10 +4,11 @@ import logging
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from itertools import islice
|
from itertools import islice
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Union
|
from typing import Dict, List, Optional, Union
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
from pydantic import AnyUrl
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from docling.datamodel.base_models import OpenAiApiResponse
|
from docling.datamodel.base_models import OpenAiApiResponse
|
||||||
@ -75,10 +76,12 @@ def download_url_with_progress(url: str, progress: bool = False) -> BytesIO:
|
|||||||
def openai_image_request(
|
def openai_image_request(
|
||||||
image: Image.Image,
|
image: Image.Image,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
url: str = "http://localhost:11434/v1/chat/completions", # Default to ollama
|
url: Union[
|
||||||
apikey: str | None = None,
|
AnyUrl, str
|
||||||
|
] = "http://localhost:11434/v1/chat/completions", # Default to ollama
|
||||||
|
apikey: Optional[str] = None,
|
||||||
timeout: float = 20,
|
timeout: float = 20,
|
||||||
headers: dict[str, str] | None = None,
|
headers: Optional[Dict[str, str]] = None,
|
||||||
**params,
|
**params,
|
||||||
) -> str:
|
) -> str:
|
||||||
img_io = BytesIO()
|
img_io = BytesIO()
|
||||||
@ -90,9 +93,7 @@ def openai_image_request(
|
|||||||
"content": [
|
"content": [
|
||||||
{
|
{
|
||||||
"type": "image_url",
|
"type": "image_url",
|
||||||
"image_url": {
|
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
||||||
"url": f"data:image/png;base64,{image_base64}"
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"type": "text",
|
"type": "text",
|
||||||
|
Loading…
Reference in New Issue
Block a user