diff --git a/docs/examples/pictures_description.ipynb b/docs/examples/pictures_description.ipynb
index 33e94e01..f50860db 100644
--- a/docs/examples/pictures_description.ipynb
+++ b/docs/examples/pictures_description.ipynb
@@ -175,7 +175,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -265,6 +265,52 @@
"display.HTML(\"
\".join(html_buffer))"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "---"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Use other vision models\n",
+ "\n",
+ "The examples above can also be reproduced using other vision model.\n",
+ "The Docling options `PictureDescriptionVlmOptions` allows to speficy your favorite vision model from the Hugging Face Hub."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from docling.datamodel.pipeline_options import PictureDescriptionVlmOptions\n",
+ "\n",
+ "pipeline_options = PdfPipelineOptions()\n",
+ "pipeline_options.do_picture_description = True\n",
+ "pipeline_options.picture_description_options = PictureDescriptionVlmOptions(\n",
+ " repo_id=\"\", # <-- add here the Hugging Face repo_id of your favorite VLM\n",
+ " prompt=\"Describe the image in three sentences. Be consise and accurate.\",\n",
+ ")\n",
+ "pipeline_options.images_scale = 2.0\n",
+ "pipeline_options.generate_picture_images = True\n",
+ "\n",
+ "converter = DocumentConverter(\n",
+ " format_options={\n",
+ " InputFormat.PDF: PdfFormatOption(\n",
+ " pipeline_options=pipeline_options,\n",
+ " )\n",
+ " }\n",
+ ")\n",
+ "\n",
+ "# Uncomment to run:\n",
+ "# doc = converter.convert(DOC_SOURCE).document"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,