docling/index.html

3449 lines
62 KiB
HTML

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://docling-project.github.io/docling/">
<link rel="next" href="installation/">
<link rel="icon" href="assets/logo.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.6.15">
<title>Docling - Docling</title>
<link rel="stylesheet" href="assets/stylesheets/main.342714a4.min.css">
<link rel="stylesheet" href="assets/stylesheets/palette.06af60db.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="assets/_mkdocstrings.css">
<link rel="stylesheet" href="stylesheets/extra.css">
<script>__md_scope=new URL(".",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
</head>
<body dir="ltr" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#features" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="." title="Docling" class="md-header__button md-logo" aria-label="Docling" data-md-component="logo">
<img src="assets/logo.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
Docling
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Docling
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="(prefers-color-scheme)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9zM20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12zm-9.15 3.96h2.3L12 9z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to system preference" type="radio" name="__palette" id="__palette_2">
<label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/docling-project/docling" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
docling-project/docling
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item md-tabs__item--active">
<a href="." class="md-tabs__link">
Home
</a>
</li>
<li class="md-tabs__item">
<a href="concepts/" class="md-tabs__link">
Concepts
</a>
</li>
<li class="md-tabs__item">
<a href="examples/" class="md-tabs__link">
Examples
</a>
</li>
<li class="md-tabs__item">
<a href="integrations/" class="md-tabs__link">
Integrations
</a>
</li>
<li class="md-tabs__item">
<a href="reference/document_converter/" class="md-tabs__link">
Reference
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="." title="Docling" class="md-nav__button md-logo" aria-label="Docling" data-md-component="logo">
<img src="assets/logo.png" alt="logo">
</a>
Docling
</label>
<div class="md-nav__source">
<a href="https://github.com/docling-project/docling" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.7.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
</div>
<div class="md-source__repository">
docling-project/docling
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" checked>
<div class="md-nav__link md-nav__container">
<a href="." class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Home
</span>
</a>
<label class="md-nav__link md-nav__link--active" for="__nav_1" id="__nav_1_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_1">
<span class="md-nav__icon md-icon"></span>
Home
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_2" >
<div class="md-nav__link md-nav__container">
<a href="installation/" class="md-nav__link ">
<span class="md-ellipsis">
Installation
</span>
</a>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_2">
<span class="md-nav__icon md-icon"></span>
Installation
</label>
<ul class="md-nav__list" data-md-scrollfix>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_3" >
<div class="md-nav__link md-nav__container">
<a href="usage/" class="md-nav__link ">
<span class="md-ellipsis">
Usage
</span>
</a>
<label class="md-nav__link " for="__nav_1_3" id="__nav_1_3_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_3">
<span class="md-nav__icon md-icon"></span>
Usage
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="usage/supported_formats/" class="md-nav__link">
<span class="md-ellipsis">
Supported formats
</span>
</a>
</li>
<li class="md-nav__item">
<a href="usage/enrichments/" class="md-nav__link">
<span class="md-ellipsis">
Enrichment features
</span>
</a>
</li>
<li class="md-nav__item">
<a href="usage/vision_models/" class="md-nav__link">
<span class="md-ellipsis">
Vision models
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_4" >
<div class="md-nav__link md-nav__container">
<a href="faq/" class="md-nav__link ">
<span class="md-ellipsis">
FAQ
</span>
</a>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_4">
<span class="md-nav__icon md-icon"></span>
FAQ
</label>
<ul class="md-nav__list" data-md-scrollfix>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
<div class="md-nav__link md-nav__container">
<a href="concepts/" class="md-nav__link ">
<span class="md-ellipsis">
Concepts
</span>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Concepts
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="concepts/architecture/" class="md-nav__link">
<span class="md-ellipsis">
Architecture
</span>
</a>
</li>
<li class="md-nav__item">
<a href="concepts/docling_document/" class="md-nav__link">
<span class="md-ellipsis">
Docling Document
</span>
</a>
</li>
<li class="md-nav__item">
<a href="concepts/serialization/" class="md-nav__link">
<span class="md-ellipsis">
Serialization
</span>
</a>
</li>
<li class="md-nav__item">
<a href="concepts/confidence_scores/" class="md-nav__link">
<span class="md-ellipsis">
Confidence Scores
</span>
</a>
</li>
<li class="md-nav__item">
<a href="concepts/chunking/" class="md-nav__link">
<span class="md-ellipsis">
Chunking
</span>
</a>
</li>
<li class="md-nav__item">
<a href="concepts/plugins/" class="md-nav__link">
<span class="md-ellipsis">
Plugins
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<div class="md-nav__link md-nav__container">
<a href="examples/" class="md-nav__link ">
<span class="md-ellipsis">
Examples
</span>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Examples
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_2" >
<label class="md-nav__link" for="__nav_3_2" id="__nav_3_2_label" tabindex="0">
<span class="md-ellipsis">
🔀 Conversion
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_2">
<span class="md-nav__icon md-icon"></span>
🔀 Conversion
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="examples/minimal/" class="md-nav__link">
<span class="md-ellipsis">
Simple conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/custom_convert/" class="md-nav__link">
<span class="md-ellipsis">
Custom conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/batch_convert/" class="md-nav__link">
<span class="md-ellipsis">
Batch conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/run_with_formats/" class="md-nav__link">
<span class="md-ellipsis">
Multi-format conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/minimal_vlm_pipeline/" class="md-nav__link">
<span class="md-ellipsis">
VLM pipeline with SmolDocling
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/vlm_pipeline_api_model/" class="md-nav__link">
<span class="md-ellipsis">
VLM pipeline with remote model
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/compare_vlm_models/" class="md-nav__link">
<span class="md-ellipsis">
Compare VLM models
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/minimal_asr_pipeline/" class="md-nav__link">
<span class="md-ellipsis">
ASR pipeline with Whisper
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/export_figures/" class="md-nav__link">
<span class="md-ellipsis">
Figure export
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/export_tables/" class="md-nav__link">
<span class="md-ellipsis">
Table export
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/export_multimodal/" class="md-nav__link">
<span class="md-ellipsis">
Multimodal export
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/full_page_ocr/" class="md-nav__link">
<span class="md-ellipsis">
Force full page OCR
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/tesseract_lang_detection/" class="md-nav__link">
<span class="md-ellipsis">
Automatic OCR language detection with tesseract
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/rapidocr_with_custom_models/" class="md-nav__link">
<span class="md-ellipsis">
RapidOCR with custom OCR models
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/run_with_accelerator/" class="md-nav__link">
<span class="md-ellipsis">
Accelerator options
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/translate/" class="md-nav__link">
<span class="md-ellipsis">
Simple translation
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/backend_csv/" class="md-nav__link">
<span class="md-ellipsis">
Conversion of CSV files
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/backend_xml_rag/" class="md-nav__link">
<span class="md-ellipsis">
Conversion of custom XML
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_3" >
<label class="md-nav__link" for="__nav_3_3" id="__nav_3_3_label" tabindex="0">
<span class="md-ellipsis">
✂️ Serialization & chunking
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_3">
<span class="md-nav__icon md-icon"></span>
✂️ Serialization & chunking
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="examples/serialization/" class="md-nav__link">
<span class="md-ellipsis">
Serialization
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/hybrid_chunking/" class="md-nav__link">
<span class="md-ellipsis">
Hybrid chunking
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/advanced_chunking_and_serialization/" class="md-nav__link">
<span class="md-ellipsis">
Advanced chunking &amp; serialization
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_4" >
<label class="md-nav__link" for="__nav_3_4" id="__nav_3_4_label" tabindex="0">
<span class="md-ellipsis">
🤖 RAG with AI dev frameworks
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_4">
<span class="md-nav__icon md-icon"></span>
🤖 RAG with AI dev frameworks
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="examples/rag_haystack/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Haystack
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/rag_langchain/" class="md-nav__link">
<span class="md-ellipsis">
RAG with LangChain
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/rag_llamaindex/" class="md-nav__link">
<span class="md-ellipsis">
RAG with LlamaIndex
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/visual_grounding/" class="md-nav__link">
<span class="md-ellipsis">
Visual grounding
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_5" >
<label class="md-nav__link" for="__nav_3_5" id="__nav_3_5_label" tabindex="0">
<span class="md-ellipsis">
🖼️ Picture annotation
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_5">
<span class="md-nav__icon md-icon"></span>
🖼️ Picture annotation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="examples/pictures_description/" class="md-nav__link">
<span class="md-ellipsis">
Annotate picture with local VLM
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/pictures_description_api/" class="md-nav__link">
<span class="md-ellipsis">
Annotate picture with remote VLM
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6" >
<label class="md-nav__link" for="__nav_3_6" id="__nav_3_6_label" tabindex="0">
<span class="md-ellipsis">
✨ Enrichment development
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_6">
<span class="md-nav__icon md-icon"></span>
✨ Enrichment development
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="examples/develop_picture_enrichment/" class="md-nav__link">
<span class="md-ellipsis">
Figure enrichment
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/develop_formula_understanding/" class="md-nav__link">
<span class="md-ellipsis">
Formula enrichment
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/enrich_doclingdocument/" class="md-nav__link">
<span class="md-ellipsis">
Enrich DoclingDocument
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_7" >
<label class="md-nav__link" for="__nav_3_7" id="__nav_3_7_label" tabindex="0">
<span class="md-ellipsis">
🗂️ More examples
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_7">
<span class="md-nav__icon md-icon"></span>
🗂️ More examples
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="examples/rag_milvus/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Milvus
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/rag_weaviate/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Weaviate
</span>
</a>
</li>
<li class="md-nav__item">
<a href="https://github.com/ibm-granite-community/granite-snack-cookbook/blob/main/recipes/RAG/Granite_Docling_RAG.ipynb" class="md-nav__link">
<span class="md-ellipsis">
RAG with Granite [↗]
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/rag_azuresearch/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Azure AI Search
</span>
</a>
</li>
<li class="md-nav__item">
<a href="examples/retrieval_qdrant/" class="md-nav__link">
<span class="md-ellipsis">
Retrieval with Qdrant
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
<div class="md-nav__link md-nav__container">
<a href="integrations/" class="md-nav__link ">
<span class="md-ellipsis">
Integrations
</span>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
Integrations
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4_2" >
<label class="md-nav__link" for="__nav_4_2" id="__nav_4_2_label" tabindex="0">
<span class="md-ellipsis">
🤖 Agentic / AI dev frameworks
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_4_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4_2">
<span class="md-nav__icon md-icon"></span>
🤖 Agentic / AI dev frameworks
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="integrations/bee/" class="md-nav__link">
<span class="md-ellipsis">
Bee Agent Framework
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/crewai/" class="md-nav__link">
<span class="md-ellipsis">
Crew AI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/haystack/" class="md-nav__link">
<span class="md-ellipsis">
Haystack
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/langchain/" class="md-nav__link">
<span class="md-ellipsis">
LangChain
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/llamaindex/" class="md-nav__link">
<span class="md-ellipsis">
LlamaIndex
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/txtai/" class="md-nav__link">
<span class="md-ellipsis">
txtai
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4_3" >
<label class="md-nav__link" for="__nav_4_3" id="__nav_4_3_label" tabindex="0">
<span class="md-ellipsis">
⭐️ Featured
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_4_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4_3">
<span class="md-nav__icon md-icon"></span>
⭐️ Featured
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="integrations/apify/" class="md-nav__link">
<span class="md-ellipsis">
Apify
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/data_prep_kit/" class="md-nav__link">
<span class="md-ellipsis">
Data Prep Kit
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/instructlab/" class="md-nav__link">
<span class="md-ellipsis">
InstructLab
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/nvidia/" class="md-nav__link">
<span class="md-ellipsis">
NVIDIA
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/prodigy/" class="md-nav__link">
<span class="md-ellipsis">
Prodigy
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/rhel_ai/" class="md-nav__link">
<span class="md-ellipsis">
RHEL AI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/spacy/" class="md-nav__link">
<span class="md-ellipsis">
spaCy
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4_4" >
<label class="md-nav__link" for="__nav_4_4" id="__nav_4_4_label" tabindex="0">
<span class="md-ellipsis">
🗂️ More integrations
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_4_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4_4">
<span class="md-nav__icon md-icon"></span>
🗂️ More integrations
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="integrations/cloudera/" class="md-nav__link">
<span class="md-ellipsis">
Cloudera
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/docetl/" class="md-nav__link">
<span class="md-ellipsis">
DocETL
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/kotaemon/" class="md-nav__link">
<span class="md-ellipsis">
Kotaemon
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/opencontracts/" class="md-nav__link">
<span class="md-ellipsis">
OpenContracts
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/openwebui/" class="md-nav__link">
<span class="md-ellipsis">
Open WebUI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="integrations/vectara/" class="md-nav__link">
<span class="md-ellipsis">
Vectara
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
<span class="md-ellipsis">
Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1" >
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="0">
<span class="md-ellipsis">
Python API
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_1">
<span class="md-nav__icon md-icon"></span>
Python API
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="reference/document_converter/" class="md-nav__link">
<span class="md-ellipsis">
Document Converter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="reference/pipeline_options/" class="md-nav__link">
<span class="md-ellipsis">
Pipeline options
</span>
</a>
</li>
<li class="md-nav__item">
<a href="reference/docling_document/" class="md-nav__link">
<span class="md-ellipsis">
Docling Document
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_2" >
<label class="md-nav__link" for="__nav_5_2" id="__nav_5_2_label" tabindex="0">
<span class="md-ellipsis">
CLI
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_2">
<span class="md-nav__icon md-icon"></span>
CLI
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="reference/cli/" class="md-nav__link">
<span class="md-ellipsis">
CLI reference
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#features" class="md-nav__link">
<span class="md-ellipsis">
Features
</span>
</a>
<nav class="md-nav" aria-label="Features">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#coming-soon" class="md-nav__link">
<span class="md-ellipsis">
Coming soon
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#get-started" class="md-nav__link">
<span class="md-ellipsis">
Get started
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#live-assistant" class="md-nav__link">
<span class="md-ellipsis">
Live assistant
</span>
</a>
</li>
<li class="md-nav__item">
<a href="#lf-ai-data" class="md-nav__link">
<span class="md-ellipsis">
LF AI &amp; Data
</span>
</a>
<nav class="md-nav" aria-label="LF AI &amp; Data">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#ibm-open-source-ai" class="md-nav__link">
<span class="md-ellipsis">
IBM ❤️ Open Source AI
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<article class="md-content__inner md-typeset">
<h1>Docling</h1>
<p align="center">
<img loading="lazy" alt="Docling" src="assets/docling_processing.png" width="100%" />
<a href="https://trendshift.io/repositories/12132" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12132" alt="DS4SD%2Fdocling | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
</p>
<p><a href="https://arxiv.org/abs/2408.09869"><img alt="arXiv" src="https://img.shields.io/badge/arXiv-2408.09869-b31b1b.svg" /></a>
<a href="https://pypi.org/project/docling/"><img alt="PyPI version" src="https://img.shields.io/pypi/v/docling" /></a>
<a href="https://pypi.org/project/docling/"><img alt="PyPI - Python Version" src="https://img.shields.io/pypi/pyversions/docling" /></a>
<a href="https://github.com/astral-sh/uv"><img alt="uv" src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/uv/main/assets/badge/v0.json" /></a>
<a href="https://github.com/astral-sh/ruff"><img alt="Ruff" src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json" /></a>
<a href="https://pydantic.dev"><img alt="Pydantic v2" src="https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/pydantic/pydantic/main/docs/badge/v2.json" /></a>
<a href="https://github.com/pre-commit/pre-commit"><img alt="pre-commit" src="https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&amp;logoColor=white" /></a>
<a href="https://opensource.org/licenses/MIT"><img alt="License MIT" src="https://img.shields.io/github/license/docling-project/docling" /></a>
<a href="https://pepy.tech/projects/docling"><img alt="PyPI Downloads" src="https://static.pepy.tech/badge/docling/month" /></a>
<a href="https://apify.com/vancura/docling"><img alt="Docling Actor" src="https://apify.com/actor-badge?actor=vancura/docling?fpr=docling" /></a>
<a href="https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github"><img alt="Chat with Dosu" src="https://dosu.dev/dosu-chat-badge.svg" /></a>
<a href="https://www.bestpractices.dev/projects/10101"><img alt="OpenSSF Best Practices" src="https://www.bestpractices.dev/projects/10101/badge" /></a>
<a href="https://lfaidata.foundation/projects/"><img alt="LF AI &amp; Data" src="https://img.shields.io/badge/LF%20AI%20%26%20Data-003778?logo=linuxfoundation&amp;logoColor=fff&amp;color=0094ff&amp;labelColor=003778" /></a></p>
<p>Docling simplifies document processing, parsing diverse formats — including advanced PDF understanding — and providing seamless integrations with the gen AI ecosystem.</p>
<h2 id="features">Features</h2>
<ul>
<li>🗂️ Parsing of <a href="usage/supported_formats/">multiple document formats</a> incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more</li>
<li>📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more</li>
<li>🧬 Unified, expressive <a href="concepts/docling_document/">DoclingDocument</a> representation format</li>
<li>↪️ Various <a href="usage/supported_formats/">export formats</a> and options, including Markdown, HTML, <a href="https://arxiv.org/abs/2503.11576">DocTags</a> and lossless JSON</li>
<li>🔒 Local execution capabilities for sensitive data and air-gapped environments</li>
<li>🤖 Plug-and-play <a href="integrations/">integrations</a> incl. LangChain, LlamaIndex, Crew AI &amp; Haystack for agentic AI</li>
<li>🔍 Extensive OCR support for scanned PDFs and images</li>
<li>👓 Support of several Visual Language Models (<a href="https://huggingface.co/ds4sd/SmolDocling-256M-preview">SmolDocling</a>)</li>
<li>🎙️ Support for Audio with Automatic Speech Recognition (ASR) models</li>
<li>💻 Simple and convenient CLI</li>
</ul>
<h3 id="coming-soon">Coming soon</h3>
<ul>
<li>📝 Metadata extraction, including title, authors, references &amp; language</li>
<li>📝 Chart understanding (Barchart, Piechart, LinePlot, etc)</li>
<li>📝 Complex chemistry understanding (Molecular structures)</li>
</ul>
<h2 id="get-started">Get started</h2>
<div class="grid">
<a href="concepts/" class="card"><b>Concepts</b><br />Learn Docling fundamentals</a>
<a href="examples/" class="card"><b>Examples</b><br />Try out recipes for various use cases, including conversion, RAG, and more</a>
<a href="integrations/" class="card"><b>Integrations</b><br />Check out integrations with popular frameworks and tools</a>
<a href="reference/document_converter/" class="card"><b>Reference</b><br />See more API details</a>
</div>
<h2 id="live-assistant">Live assistant</h2>
<p>Do you want to leverage the power of AI and get a live support on Docling?
Try out the <a href="https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github">Chat with Dosu</a> functionalities provided by our friends at <a href="https://dosu.dev/">Dosu</a>.</p>
<p><a href="https://app.dosu.dev/097760a8-135e-4789-8234-90c8837d7f1c/ask?utm_source=github"><img alt="Chat with Dosu" src="https://dosu.dev/dosu-chat-badge.svg" /></a></p>
<h2 id="lf-ai-data">LF AI &amp; Data</h2>
<p>Docling is hosted as a project in the <a href="https://lfaidata.foundation/projects/">LF AI &amp; Data Foundation</a>.</p>
<h3 id="ibm-open-source-ai">IBM ❤️ Open Source AI</h3>
<p>The project was started by the AI for knowledge team at IBM Research Zurich.</p>
</article>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<nav class="md-footer__inner md-grid" aria-label="Footer" >
<a href="installation/" class="md-footer__link md-footer__link--next" aria-label="Next: Installation">
<div class="md-footer__title">
<span class="md-footer__direction">
Next
</span>
<div class="md-ellipsis">
Installation
</div>
</div>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
</div>
</a>
</nav>
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<div class="md-progress" data-md-component="progress" role="progressbar"></div>
<script id="__config" type="application/json">{"base": ".", "features": ["content.tabs.link", "content.code.annotate", "content.code.copy", "announce.dismiss", "navigation.footer", "navigation.tabs", "navigation.indexes", "navigation.instant", "navigation.instant.prefetch", "navigation.instant.progress", "navigation.path", "navigation.sections", "navigation.top", "navigation.tracking", "search.suggest", "toc.follow"], "search": "assets/javascripts/workers/search.d50fe291.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="assets/javascripts/bundle.56ea9cef.min.js"></script>
</body>
</html>