show other vlm

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
2025-07-31 14:34:40 +00:00 · 2025-02-19 08:25:57 +01:00 · 2025-02-19 08:25:57 +01:00 · 287e621c7a
commit 287e621c7a
parent 753c12b29e
1 changed files with 47 additions and 1 deletions
--- a/docs/examples/pictures_description.ipynb
+++ b/docs/examples/pictures_description.ipynb
@ -175,7 +175,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@ -265,6 +265,52 @@
    "display.HTML(\"<hr />\".join(html_buffer))"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Use other vision models\n",
+    "\n",
+    "The examples above can also be reproduced using other vision model.\n",
+    "The Docling options `PictureDescriptionVlmOptions` allows to speficy your favorite vision model from the Hugging Face Hub."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from docling.datamodel.pipeline_options import PictureDescriptionVlmOptions\n",
+    "\n",
+    "pipeline_options = PdfPipelineOptions()\n",
+    "pipeline_options.do_picture_description = True\n",
+    "pipeline_options.picture_description_options = PictureDescriptionVlmOptions(\n",
+    "    repo_id=\"\",  # <-- add here the Hugging Face repo_id of your favorite VLM\n",
+    "    prompt=\"Describe the image in three sentences. Be consise and accurate.\",\n",
+    ")\n",
+    "pipeline_options.images_scale = 2.0\n",
+    "pipeline_options.generate_picture_images = True\n",
+    "\n",
+    "converter = DocumentConverter(\n",
+    "    format_options={\n",
+    "        InputFormat.PDF: PdfFormatOption(\n",
+    "            pipeline_options=pipeline_options,\n",
+    "        )\n",
+    "    }\n",
+    ")\n",
+    "\n",
+    "# Uncomment to run:\n",
+    "# doc = converter.convert(DOC_SOURCE).document"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": null,