{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install -q docling[vlm] ipython"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from docling.datamodel.base_models import InputFormat\n",
"from docling.datamodel.pipeline_options import (\n",
" PdfPipelineOptions,\n",
" granite_picture_description,\n",
" smolvlm_picture_description,\n",
")\n",
"from docling.document_converter import DocumentConverter, PdfFormatOption"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "9d3bb7b3b4fd4640af40289dd7bf50d7",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Loading checkpoint shards: 0%| | 0/2 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"DOC_SOURCE = \"https://arxiv.org/pdf/2501.17887\"\n",
"\n",
"pipeline_options = PdfPipelineOptions()\n",
"pipeline_options.do_picture_description = True\n",
"# pipeline_options.picture_description_options = smolvlm_picture_description\n",
"pipeline_options.picture_description_options = granite_picture_description\n",
"pipeline_options.picture_description_options.prompt = (\n",
" \"Describe the image in three sentences. Be consise and accurate.\"\n",
")\n",
"pipeline_options.images_scale = 2.0\n",
"pipeline_options.generate_picture_images = True\n",
"\n",
"converter = DocumentConverter(\n",
" format_options={\n",
" InputFormat.PDF: PdfFormatOption(\n",
" pipeline_options=pipeline_options,\n",
" )\n",
" }\n",
")\n",
"doc = converter.convert(DOC_SOURCE).document"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
#/pictures/0
#/pictures/1
#/pictures/2
#/pictures/3
#/pictures/4
{pic.self_ref}