{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
""
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"%pip install -q docling ipython"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from docling.datamodel.base_models import InputFormat\n",
"from docling.datamodel.pipeline_options import ( # granite_picture_description,\n",
" PdfPipelineOptions,\n",
" smolvlm_picture_description,\n",
")\n",
"from docling.document_converter import DocumentConverter, PdfFormatOption"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"DOC_SOURCE = \"https://arxiv.org/pdf/2206.01062\"\n",
"\n",
"pipeline_options = PdfPipelineOptions()\n",
"pipeline_options.do_picture_description = True\n",
"pipeline_options.picture_description_options = smolvlm_picture_description\n",
"# pipeline_options.picture_description_options = granite_picture_description\n",
"pipeline_options.picture_description_options.prompt = (\n",
" \"Describe the image in three sentences. Be consise and accurate.\"\n",
")\n",
"pipeline_options.images_scale = 2.0\n",
"pipeline_options.generate_picture_images = True\n",
"\n",
"converter = DocumentConverter(\n",
" format_options={\n",
" InputFormat.PDF: PdfFormatOption(\n",
" pipeline_options=pipeline_options,\n",
" )\n",
" }\n",
")\n",
"doc = converter.convert(DOC_SOURCE).document"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
#/pictures/0
#/pictures/1
#/pictures/2
#/pictures/3
#/pictures/4
{pic.self_ref}