mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-02 07:22:14 +00:00
more renaming
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
b6ed0b34cd
commit
90f0428a62
@ -218,7 +218,7 @@ class PictureDescriptionVlmOptions(PictureDescriptionBaseOptions):
|
|||||||
smolvlm_picture_description = PictureDescriptionVlmOptions(
|
smolvlm_picture_description = PictureDescriptionVlmOptions(
|
||||||
repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
|
repo_id="HuggingFaceTB/SmolVLM-256M-Instruct"
|
||||||
)
|
)
|
||||||
# phi_pic_desc = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
|
# phi_picture_description = PictureDescriptionVlmOptions(repo_id="microsoft/Phi-3-vision-128k-instruct")
|
||||||
granite_picture_description = PictureDescriptionVlmOptions(
|
granite_picture_description = PictureDescriptionVlmOptions(
|
||||||
repo_id="ibm-granite/granite-vision-3.1-2b-preview",
|
repo_id="ibm-granite/granite-vision-3.1-2b-preview",
|
||||||
prompt="What is shown in this image?",
|
prompt="What is shown in this image?",
|
||||||
|
@ -12,7 +12,7 @@ from PIL import Image
|
|||||||
from pydantic import BaseModel, ConfigDict
|
from pydantic import BaseModel, ConfigDict
|
||||||
|
|
||||||
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
|
from docling.datamodel.pipeline_options import PictureDescriptionApiOptions
|
||||||
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
@ -1,6 +1,5 @@
|
|||||||
import json
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable, List, Optional, Union
|
from typing import Iterable, Optional, Union
|
||||||
|
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
|
|
||||||
@ -8,7 +7,7 @@ from docling.datamodel.pipeline_options import (
|
|||||||
AcceleratorOptions,
|
AcceleratorOptions,
|
||||||
PictureDescriptionVlmOptions,
|
PictureDescriptionVlmOptions,
|
||||||
)
|
)
|
||||||
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
||||||
from docling.utils.accelerator_utils import decide_device
|
from docling.utils.accelerator_utils import decide_device
|
||||||
|
|
||||||
|
|
@ -36,9 +36,9 @@ from docling.models.page_preprocessing_model import (
|
|||||||
PagePreprocessingModel,
|
PagePreprocessingModel,
|
||||||
PagePreprocessingOptions,
|
PagePreprocessingOptions,
|
||||||
)
|
)
|
||||||
from docling.models.pic_description_api_model import PictureDescriptionApiModel
|
from docling.models.picture_description_api_model import PictureDescriptionApiModel
|
||||||
from docling.models.pic_description_base_model import PictureDescriptionBaseModel
|
from docling.models.picture_description_base_model import PictureDescriptionBaseModel
|
||||||
from docling.models.pic_description_vlm_model import PictureDescriptionVlmModel
|
from docling.models.picture_description_vlm_model import PictureDescriptionVlmModel
|
||||||
from docling.models.rapid_ocr_model import RapidOcrModel
|
from docling.models.rapid_ocr_model import RapidOcrModel
|
||||||
from docling.models.table_structure_model import TableStructureModel
|
from docling.models.table_structure_model import TableStructureModel
|
||||||
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
from docling.models.tesseract_ocr_cli_model import TesseractOcrCliModel
|
||||||
@ -101,7 +101,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
]
|
]
|
||||||
|
|
||||||
# Picture description model
|
# Picture description model
|
||||||
if (pic_desc_model := self.get_pic_description_model()) is None:
|
if (picture_description_model := self.get_picture_description_model()) is None:
|
||||||
raise RuntimeError(
|
raise RuntimeError(
|
||||||
f"The specified picture description kind is not supported: {pipeline_options.picture_description_options.kind}."
|
f"The specified picture description kind is not supported: {pipeline_options.picture_description_options.kind}."
|
||||||
)
|
)
|
||||||
@ -126,7 +126,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
accelerator_options=pipeline_options.accelerator_options,
|
accelerator_options=pipeline_options.accelerator_options,
|
||||||
),
|
),
|
||||||
# Document Picture description
|
# Document Picture description
|
||||||
pic_desc_model,
|
picture_description_model,
|
||||||
]
|
]
|
||||||
|
|
||||||
if (
|
if (
|
||||||
@ -188,7 +188,7 @@ class StandardPdfPipeline(PaginatedPipeline):
|
|||||||
)
|
)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_pic_description_model(
|
def get_picture_description_model(
|
||||||
self, artifacts_path: Optional[Path] = None
|
self, artifacts_path: Optional[Path] = None
|
||||||
) -> Optional[PictureDescriptionBaseModel]:
|
) -> Optional[PictureDescriptionBaseModel]:
|
||||||
if isinstance(
|
if isinstance(
|
||||||
|
@ -20,7 +20,7 @@ def main():
|
|||||||
pipeline_options = PdfPipelineOptions()
|
pipeline_options = PdfPipelineOptions()
|
||||||
pipeline_options.do_picture_description = True
|
pipeline_options.do_picture_description = True
|
||||||
pipeline_options.picture_description_options = smolvlm_picture_description
|
pipeline_options.picture_description_options = smolvlm_picture_description
|
||||||
# pipeline_options.picture_description_options = granite_pic_desc
|
# pipeline_options.picture_description_options = granite_picture_description
|
||||||
|
|
||||||
pipeline_options.picture_description_options.prompt = (
|
pipeline_options.picture_description_options.prompt = (
|
||||||
"Describe the image in three sentences. Be consise and accurate."
|
"Describe the image in three sentences. Be consise and accurate."
|
||||||
|
Loading…
Reference in New Issue
Block a user