Files
docling/getting_started/installation/index.html

4905 lines
83 KiB
HTML

<!doctype html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<link rel="canonical" href="https://docling-project.github.io/docling/getting_started/installation/">
<link rel="prev" href="../..">
<link rel="next" href="../quickstart/">
<link rel="icon" href="../../assets/logo.png">
<meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.0">
<title>Installation - Docling</title>
<link rel="stylesheet" href="../../assets/stylesheets/main.618322db.min.css">
<link rel="stylesheet" href="../../assets/stylesheets/palette.ab4e12ef.min.css">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
<style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
<link rel="stylesheet" href="../../assets/_mkdocstrings.css">
<link rel="stylesheet" href="../../stylesheets/extra.css">
<script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
<script id="__analytics">function __md_analytics(){function e(){dataLayer.push(arguments)}window.dataLayer=window.dataLayer||[],e("js",new Date),e("config","G-MP75NXFDH4"),document.addEventListener("DOMContentLoaded",(function(){document.forms.search&&document.forms.search.query.addEventListener("blur",(function(){this.value&&e("event","search",{search_term:this.value})}));document$.subscribe((function(){var t=document.forms.feedback;if(void 0!==t)for(var a of t.querySelectorAll("[type=submit]"))a.addEventListener("click",(function(a){a.preventDefault();var n=document.location.pathname,d=this.getAttribute("data-md-value");e("event","feedback",{page:n,data:d}),t.firstElementChild.disabled=!0;var r=t.querySelector(".md-feedback__note [data-md-value='"+d+"']");r&&(r.hidden=!1)})),t.hidden=!1})),location$.subscribe((function(t){e("config","G-MP75NXFDH4",{page_path:t.pathname})}))}));var t=document.createElement("script");t.async=!0,t.src="https://www.googletagmanager.com/gtag/js?id=G-MP75NXFDH4",document.getElementById("__analytics").insertAdjacentElement("afterEnd",t)}</script>
<script>"undefined"!=typeof __md_analytics&&__md_analytics()</script>
</head>
<body dir="ltr" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo">
<input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
<input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
<label class="md-overlay" for="__drawer"></label>
<div data-md-component="skip">
<a href="#available-extras" class="md-skip">
Skip to content
</a>
</div>
<div data-md-component="announce">
</div>
<header class="md-header" data-md-component="header">
<nav class="md-header__inner md-grid" aria-label="Header">
<a href="../.." title="Docling" class="md-header__button md-logo" aria-label="Docling" data-md-component="logo">
<img src="../../assets/logo.png" alt="logo">
</a>
<label class="md-header__button md-icon" for="__drawer">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
</label>
<div class="md-header__title" data-md-component="header-title">
<div class="md-header__ellipsis">
<div class="md-header__topic">
<span class="md-ellipsis">
Docling
</span>
</div>
<div class="md-header__topic" data-md-component="header-topic">
<span class="md-ellipsis">
Installation
</span>
</div>
</div>
</div>
<form class="md-header__option" data-md-component="palette">
<input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_0">
<label class="md-header__button md-icon" title="Switch to light mode" for="__palette_1" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: light)" data-md-color-scheme="default" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_1">
<label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_2" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg>
</label>
<input class="md-option" data-md-color-media="(prefers-color-scheme: dark)" data-md-color-scheme="slate" data-md-color-primary="black" data-md-color-accent="indigo" aria-label="Switch to system preference" type="radio" name="__palette" id="__palette_2">
<label class="md-header__button md-icon" title="Switch to system preference" for="__palette_0" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="m14.3 16-.7-2h-3.2l-.7 2H7.8L11 7h2l3.2 9zM20 8.69V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12zm-9.15 3.96h2.3L12 9z"/></svg>
</label>
</form>
<script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script>
<label class="md-header__button md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
</label>
<div class="md-search" data-md-component="search" role="dialog">
<label class="md-search__overlay" for="__search"></label>
<div class="md-search__inner" role="search">
<form class="md-search__form" name="search">
<input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
<label class="md-search__icon md-icon" for="__search">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</label>
<nav class="md-search__options" aria-label="Search">
<button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
</button>
</nav>
<div class="md-search__suggest" data-md-component="search-suggest"></div>
</form>
<div class="md-search__output">
<div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
<div class="md-search-result" data-md-component="search-result">
<div class="md-search-result__meta">
Initializing search
</div>
<ol class="md-search-result__list" role="presentation"></ol>
</div>
</div>
</div>
</div>
</div>
<div class="md-header__source">
<a href="https://github.com/docling-project/docling" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
docling-project/docling
</div>
</a>
</div>
</nav>
</header>
<div class="md-container" data-md-component="container">
<nav class="md-tabs" aria-label="Tabs" data-md-component="tabs">
<div class="md-grid">
<ul class="md-tabs__list">
<li class="md-tabs__item md-tabs__item--active">
<a href="../.." class="md-tabs__link">
Documentation
</a>
</li>
<li class="md-tabs__item">
<a href="../../concepts/" class="md-tabs__link">
Concepts
</a>
</li>
<li class="md-tabs__item">
<a href="../../examples/" class="md-tabs__link">
Examples
</a>
</li>
<li class="md-tabs__item">
<a href="../../integrations/" class="md-tabs__link">
Integrations
</a>
</li>
<li class="md-tabs__item">
<a href="../../reference/document_converter/" class="md-tabs__link">
Reference
</a>
</li>
</ul>
</div>
</nav>
<main class="md-main" data-md-component="main">
<div class="md-main__inner md-grid">
<div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0">
<label class="md-nav__title" for="__drawer">
<a href="../.." title="Docling" class="md-nav__button md-logo" aria-label="Docling" data-md-component="logo">
<img src="../../assets/logo.png" alt="logo">
</a>
Docling
</label>
<div class="md-nav__source">
<a href="https://github.com/docling-project/docling" title="Go to repository" class="md-source" data-md-component="source">
<div class="md-source__icon md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg>
</div>
<div class="md-source__repository">
docling-project/docling
</div>
</a>
</div>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1" checked>
<div class="md-nav__link md-nav__container">
<a href="../.." class="md-nav__link ">
<span class="md-ellipsis">
Documentation
</span>
</a>
<label class="md-nav__link " for="__nav_1" id="__nav_1_label" tabindex="">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_1_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_1">
<span class="md-nav__icon md-icon"></span>
Documentation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_2" checked>
<label class="md-nav__link" for="__nav_1_2" id="__nav_1_2_label" tabindex="0">
<span class="md-ellipsis">
Getting started
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_2_label" aria-expanded="true">
<label class="md-nav__title" for="__nav_1_2">
<span class="md-nav__icon md-icon"></span>
Getting started
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--active">
<input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
<label class="md-nav__link md-nav__link--active" for="__toc">
<span class="md-ellipsis">
Installation
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<a href="./" class="md-nav__link md-nav__link--active">
<span class="md-ellipsis">
Installation
</span>
</a>
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#available-extras" class="md-nav__link">
<span class="md-ellipsis">
Available extras
</span>
</a>
<nav class="md-nav" aria-label="Available extras">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#ocr-engines" class="md-nav__link">
<span class="md-ellipsis">
OCR engines
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#development-setup" class="md-nav__link">
<span class="md-ellipsis">
Development setup
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="../quickstart/" class="md-nav__link">
<span class="md-ellipsis">
Quickstart
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_3" >
<label class="md-nav__link" for="__nav_1_3" id="__nav_1_3_label" tabindex="0">
<span class="md-ellipsis">
Usage
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_3">
<span class="md-nav__icon md-icon"></span>
Usage
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../usage/advanced_options/" class="md-nav__link">
<span class="md-ellipsis">
Advanced options
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/supported_formats/" class="md-nav__link">
<span class="md-ellipsis">
Supported formats
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/enrichments/" class="md-nav__link">
<span class="md-ellipsis">
Enrichment features
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/vision_models/" class="md-nav__link">
<span class="md-ellipsis">
Vision models
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/gpu/" class="md-nav__link">
<span class="md-ellipsis">
GPU support
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/mcp/" class="md-nav__link">
<span class="md-ellipsis">
MCP server
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../usage/jobkit/" class="md-nav__link">
<span class="md-ellipsis">
Jobkit
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_1_4" >
<div class="md-nav__link md-nav__container">
<a href="../../faq/" class="md-nav__link ">
<span class="md-ellipsis">
FAQ
</span>
</a>
</div>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_1_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_1_4">
<span class="md-nav__icon md-icon"></span>
FAQ
</label>
<ul class="md-nav__list" data-md-scrollfix>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_2" >
<div class="md-nav__link md-nav__container">
<a href="../../concepts/" class="md-nav__link ">
<span class="md-ellipsis">
Concepts
</span>
</a>
<label class="md-nav__link " for="__nav_2" id="__nav_2_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_2">
<span class="md-nav__icon md-icon"></span>
Concepts
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../concepts/architecture/" class="md-nav__link">
<span class="md-ellipsis">
Architecture
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../concepts/docling_document/" class="md-nav__link">
<span class="md-ellipsis">
Docling Document
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../concepts/serialization/" class="md-nav__link">
<span class="md-ellipsis">
Serialization
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../concepts/confidence_scores/" class="md-nav__link">
<span class="md-ellipsis">
Confidence Scores
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../concepts/chunking/" class="md-nav__link">
<span class="md-ellipsis">
Chunking
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../concepts/plugins/" class="md-nav__link">
<span class="md-ellipsis">
Plugins
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3" >
<div class="md-nav__link md-nav__container">
<a href="../../examples/" class="md-nav__link ">
<span class="md-ellipsis">
Examples
</span>
</a>
<label class="md-nav__link " for="__nav_3" id="__nav_3_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3">
<span class="md-nav__icon md-icon"></span>
Examples
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_2" >
<label class="md-nav__link" for="__nav_3_2" id="__nav_3_2_label" tabindex="0">
<span class="md-ellipsis">
🔀 Conversion
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_2">
<span class="md-nav__icon md-icon"></span>
🔀 Conversion
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/minimal/" class="md-nav__link">
<span class="md-ellipsis">
Simple conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/custom_convert/" class="md-nav__link">
<span class="md-ellipsis">
Custom conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/batch_convert/" class="md-nav__link">
<span class="md-ellipsis">
Batch conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/run_with_formats/" class="md-nav__link">
<span class="md-ellipsis">
Multi-format conversion
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/minimal_vlm_pipeline/" class="md-nav__link">
<span class="md-ellipsis">
VLM pipeline with GraniteDocling
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/vlm_pipeline_api_model/" class="md-nav__link">
<span class="md-ellipsis">
VLM pipeline with remote model
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/compare_vlm_models/" class="md-nav__link">
<span class="md-ellipsis">
VLM comparison
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/minimal_asr_pipeline/" class="md-nav__link">
<span class="md-ellipsis">
ASR pipeline with Whisper
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/export_figures/" class="md-nav__link">
<span class="md-ellipsis">
Figure export
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/export_tables/" class="md-nav__link">
<span class="md-ellipsis">
Table export
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/export_multimodal/" class="md-nav__link">
<span class="md-ellipsis">
Multimodal export
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/full_page_ocr/" class="md-nav__link">
<span class="md-ellipsis">
Force full page OCR
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/tesseract_lang_detection/" class="md-nav__link">
<span class="md-ellipsis">
Automatic OCR language detection with tesseract
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rapidocr_with_custom_models/" class="md-nav__link">
<span class="md-ellipsis">
RapidOCR with custom OCR models
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/suryaocr_with_custom_models/" class="md-nav__link">
<span class="md-ellipsis">
SuryaOCR with custom OCR models
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/run_with_accelerator/" class="md-nav__link">
<span class="md-ellipsis">
Accelerator options
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/pii_obfuscate/" class="md-nav__link">
<span class="md-ellipsis">
Detect and obfuscate PII
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/translate/" class="md-nav__link">
<span class="md-ellipsis">
Simple translation
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/backend_csv/" class="md-nav__link">
<span class="md-ellipsis">
Conversion of CSV files
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/backend_xml_rag/" class="md-nav__link">
<span class="md-ellipsis">
Conversion of custom XML
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_3" >
<label class="md-nav__link" for="__nav_3_3" id="__nav_3_3_label" tabindex="0">
<span class="md-ellipsis">
✂️ Serialization & chunking
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_3">
<span class="md-nav__icon md-icon"></span>
✂️ Serialization & chunking
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/serialization/" class="md-nav__link">
<span class="md-ellipsis">
Serialization
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/hybrid_chunking/" class="md-nav__link">
<span class="md-ellipsis">
Hybrid chunking
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/advanced_chunking_and_serialization/" class="md-nav__link">
<span class="md-ellipsis">
Advanced chunking &amp; serialization
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_4" >
<label class="md-nav__link" for="__nav_3_4" id="__nav_3_4_label" tabindex="0">
<span class="md-ellipsis">
📤 Information extraction
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_4">
<span class="md-nav__icon md-icon"></span>
📤 Information extraction
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/extraction/" class="md-nav__link">
<span class="md-ellipsis">
Information extraction
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_5" >
<label class="md-nav__link" for="__nav_3_5" id="__nav_3_5_label" tabindex="0">
<span class="md-ellipsis">
🤖 RAG with AI dev frameworks
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_5">
<span class="md-nav__icon md-icon"></span>
🤖 RAG with AI dev frameworks
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/rag_haystack/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Haystack
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rag_langchain/" class="md-nav__link">
<span class="md-ellipsis">
RAG with LangChain
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rag_llamaindex/" class="md-nav__link">
<span class="md-ellipsis">
RAG with LlamaIndex
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/visual_grounding/" class="md-nav__link">
<span class="md-ellipsis">
Visual grounding
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_6" >
<label class="md-nav__link" for="__nav_3_6" id="__nav_3_6_label" tabindex="0">
<span class="md-ellipsis">
🖼️ Picture annotation
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_6_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_6">
<span class="md-nav__icon md-icon"></span>
🖼️ Picture annotation
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/pictures_description/" class="md-nav__link">
<span class="md-ellipsis">
Annotate picture with local VLM
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/pictures_description_api/" class="md-nav__link">
<span class="md-ellipsis">
Annotate picture with remote VLM
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_7" >
<label class="md-nav__link" for="__nav_3_7" id="__nav_3_7_label" tabindex="0">
<span class="md-ellipsis">
✨ Enrichment development
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_7_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_7">
<span class="md-nav__icon md-icon"></span>
✨ Enrichment development
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/develop_picture_enrichment/" class="md-nav__link">
<span class="md-ellipsis">
Figure enrichment
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/develop_formula_understanding/" class="md-nav__link">
<span class="md-ellipsis">
Formula enrichment
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/enrich_doclingdocument/" class="md-nav__link">
<span class="md-ellipsis">
Enrich a DoclingDocument
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_8" >
<label class="md-nav__link" for="__nav_3_8" id="__nav_3_8_label" tabindex="0">
<span class="md-ellipsis">
⚡️ GPU optimization
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_8_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_8">
<span class="md-nav__icon md-icon"></span>
⚡️ GPU optimization
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/gpu_standard_pipeline/" class="md-nav__link">
<span class="md-ellipsis">
Standard pipeline
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/gpu_vlm_pipeline/" class="md-nav__link">
<span class="md-ellipsis">
VLM pipeline
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/parquet_images/" class="md-nav__link">
<span class="md-ellipsis">
Parquet benchmark
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_3_9" >
<label class="md-nav__link" for="__nav_3_9" id="__nav_3_9_label" tabindex="0">
<span class="md-ellipsis">
🗂️ More examples
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_3_9_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_3_9">
<span class="md-nav__icon md-icon"></span>
🗂️ More examples
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../examples/dpk-ingest-chunk-tokenize/" class="md-nav__link">
<span class="md-ellipsis">
Chunking &amp; tokenization with Data Prep Kit
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rag_azuresearch/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Azure AI Search
</span>
</a>
</li>
<li class="md-nav__item">
<a href="https://github.com/ibm-granite-community/granite-snack-cookbook/blob/main/recipes/RAG/Granite_Docling_RAG.ipynb" class="md-nav__link">
<span class="md-ellipsis">
RAG with Granite [↗]
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rag_milvus/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Milvus
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rag_opensearch/" class="md-nav__link">
<span class="md-ellipsis">
RAG with OpenSearch
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rag_weaviate/" class="md-nav__link">
<span class="md-ellipsis">
RAG with Weaviate
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/retrieval_qdrant/" class="md-nav__link">
<span class="md-ellipsis">
Retrieval with Qdrant
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../examples/rag_mongodb/" class="md-nav__link">
<span class="md-ellipsis">
RAG with MongoDB + VoyageAI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="https://github.com/workloads/pathfinder-prism" class="md-nav__link">
<span class="md-ellipsis">
RAG with Vault PII transform [↗]
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
<div class="md-nav__link md-nav__container">
<a href="../../integrations/" class="md-nav__link ">
<span class="md-ellipsis">
Integrations
</span>
</a>
<label class="md-nav__link " for="__nav_4" id="__nav_4_label" tabindex="0">
<span class="md-nav__icon md-icon"></span>
</label>
</div>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4">
<span class="md-nav__icon md-icon"></span>
Integrations
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4_2" >
<label class="md-nav__link" for="__nav_4_2" id="__nav_4_2_label" tabindex="0">
<span class="md-ellipsis">
🤖 Agentic / AI dev frameworks
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_4_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4_2">
<span class="md-nav__icon md-icon"></span>
🤖 Agentic / AI dev frameworks
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../integrations/bee/" class="md-nav__link">
<span class="md-ellipsis">
Bee Agent Framework
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/crewai/" class="md-nav__link">
<span class="md-ellipsis">
Crew AI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/haystack/" class="md-nav__link">
<span class="md-ellipsis">
Haystack
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/hector/" class="md-nav__link">
<span class="md-ellipsis">
Hector
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/langchain/" class="md-nav__link">
<span class="md-ellipsis">
LangChain
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/langflow/" class="md-nav__link">
<span class="md-ellipsis">
Langflow
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/llamaindex/" class="md-nav__link">
<span class="md-ellipsis">
LlamaIndex
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/txtai/" class="md-nav__link">
<span class="md-ellipsis">
txtai
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4_3" >
<label class="md-nav__link" for="__nav_4_3" id="__nav_4_3_label" tabindex="0">
<span class="md-ellipsis">
⭐️ Featured
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_4_3_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4_3">
<span class="md-nav__icon md-icon"></span>
⭐️ Featured
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../integrations/apify/" class="md-nav__link">
<span class="md-ellipsis">
Apify
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/data_prep_kit/" class="md-nav__link">
<span class="md-ellipsis">
Data Prep Kit
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/instructlab/" class="md-nav__link">
<span class="md-ellipsis">
InstructLab
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/nvidia/" class="md-nav__link">
<span class="md-ellipsis">
NVIDIA
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/prodigy/" class="md-nav__link">
<span class="md-ellipsis">
Prodigy
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/rhel_ai/" class="md-nav__link">
<span class="md-ellipsis">
RHEL AI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/spacy/" class="md-nav__link">
<span class="md-ellipsis">
spaCy
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4_4" >
<label class="md-nav__link" for="__nav_4_4" id="__nav_4_4_label" tabindex="0">
<span class="md-ellipsis">
🗂️ More integrations
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_4_4_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_4_4">
<span class="md-nav__icon md-icon"></span>
🗂️ More integrations
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../integrations/arconia/" class="md-nav__link">
<span class="md-ellipsis">
Arconia
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/cloudera/" class="md-nav__link">
<span class="md-ellipsis">
Cloudera
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/docetl/" class="md-nav__link">
<span class="md-ellipsis">
DocETL
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/kotaemon/" class="md-nav__link">
<span class="md-ellipsis">
Kotaemon
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/opencontracts/" class="md-nav__link">
<span class="md-ellipsis">
OpenContracts
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/openwebui/" class="md-nav__link">
<span class="md-ellipsis">
Open WebUI
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/quarkus/" class="md-nav__link">
<span class="md-ellipsis">
Quarkus
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../integrations/vectara/" class="md-nav__link">
<span class="md-ellipsis">
Vectara
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
<label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
<span class="md-ellipsis">
Reference
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5">
<span class="md-nav__icon md-icon"></span>
Reference
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_1" >
<label class="md-nav__link" for="__nav_5_1" id="__nav_5_1_label" tabindex="0">
<span class="md-ellipsis">
Python API
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_1_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_1">
<span class="md-nav__icon md-icon"></span>
Python API
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../reference/document_converter/" class="md-nav__link">
<span class="md-ellipsis">
Document Converter
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../reference/pipeline_options/" class="md-nav__link">
<span class="md-ellipsis">
Pipeline options
</span>
</a>
</li>
<li class="md-nav__item">
<a href="../../reference/docling_document/" class="md-nav__link">
<span class="md-ellipsis">
Docling Document
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item md-nav__item--nested">
<input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_2" >
<label class="md-nav__link" for="__nav_5_2" id="__nav_5_2_label" tabindex="0">
<span class="md-ellipsis">
CLI
</span>
<span class="md-nav__icon md-icon"></span>
</label>
<nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_2_label" aria-expanded="false">
<label class="md-nav__title" for="__nav_5_2">
<span class="md-nav__icon md-icon"></span>
CLI
</label>
<ul class="md-nav__list" data-md-scrollfix>
<li class="md-nav__item">
<a href="../../reference/cli/" class="md-nav__link">
<span class="md-ellipsis">
CLI reference
</span>
</a>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
<div class="md-sidebar__scrollwrap">
<div class="md-sidebar__inner">
<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
<label class="md-nav__title" for="__toc">
<span class="md-nav__icon md-icon"></span>
Table of contents
</label>
<ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
<li class="md-nav__item">
<a href="#available-extras" class="md-nav__link">
<span class="md-ellipsis">
Available extras
</span>
</a>
<nav class="md-nav" aria-label="Available extras">
<ul class="md-nav__list">
<li class="md-nav__item">
<a href="#ocr-engines" class="md-nav__link">
<span class="md-ellipsis">
OCR engines
</span>
</a>
</li>
</ul>
</nav>
</li>
<li class="md-nav__item">
<a href="#development-setup" class="md-nav__link">
<span class="md-ellipsis">
Development setup
</span>
</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
<div class="md-content" data-md-component="content">
<nav class="md-path" aria-label="Navigation" >
<ol class="md-path__list">
<li class="md-path__item">
<a href="../.." class="md-path__link">
<span class="md-ellipsis">
Documentation
</span>
</a>
</li>
<li class="md-path__item">
<a href="./" class="md-path__link">
<span class="md-ellipsis">
Getting started
</span>
</a>
</li>
</ol>
</nav>
<article class="md-content__inner md-typeset">
<h1>Installation</h1>
<p>To use Docling, simply install <code>docling</code> from your Python package manager, e.g. pip:
<div class="highlight"><pre><span></span><code>pip<span class="w"> </span>install<span class="w"> </span>docling
</code></pre></div></p>
<p>Works on macOS, Linux, and Windows, with support for both x86_64 and arm64 architectures.</p>
<details>
<summary>Alternative PyTorch distributions</summary>
<p>The Docling models depend on the <a href="https://pytorch.org/">PyTorch</a> library.
Depending on your architecture, you might want to use a different distribution of <code>torch</code>.
For example, you might want support for different accelerator or for a cpu-only version.
All the different ways for installing <code>torch</code> are listed on their website <a href="https://pytorch.org/">https://pytorch.org/</a>.</p>
<p>One common situation is the installation on Linux systems with cpu-only support.
In this case, we suggest the installation of Docling with the following options</p>
<div class="highlight"><pre><span></span><code><span class="c1"># Example for installing on the Linux cpu-only version</span>
pip<span class="w"> </span>install<span class="w"> </span>docling<span class="w"> </span>--extra-index-url<span class="w"> </span>https://download.pytorch.org/whl/cpu
</code></pre></div>
</details>
<details>
<summary>Installation on macOS Intel (x86_64)</summary>
<p>When installing Docling on macOS with Intel processors, you might encounter errors with PyTorch compatibility.
This happens because newer PyTorch versions (2.6.0+) no longer provide wheels for Intel-based Macs.</p>
<p>If you're using an Intel Mac, install Docling with compatible PyTorch
<strong>Note:</strong> PyTorch 2.2.2 requires Python 3.12 or lower. Make sure you're not using Python 3.13+.</p>
<div class="highlight"><pre><span></span><code><span class="c1"># For uv users</span>
uv<span class="w"> </span>add<span class="w"> </span><span class="nv">torch</span><span class="o">==</span><span class="m">2</span>.2.2<span class="w"> </span><span class="nv">torchvision</span><span class="o">==</span><span class="m">0</span>.17.2<span class="w"> </span>docling
<span class="c1"># For pip users</span>
pip<span class="w"> </span>install<span class="w"> </span><span class="s2">&quot;docling[mac_intel]&quot;</span>
<span class="c1"># For Poetry users</span>
poetry<span class="w"> </span>add<span class="w"> </span>docling
</code></pre></div>
</details>
<h2 id="available-extras">Available extras</h2>
<p>The <code>docling</code> package is designed to offer a working solution for the Docling default options.
Some Docling functionalities require additional third-party packages and are therefore installed only if selected as extras (or installed independently).</p>
<p>The following table summarizes the extras available in the <code>docling</code> package. They can be activated with:
<code>pip install "docling[NAME1,NAME2]"</code></p>
<table>
<thead>
<tr>
<th>Extra</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td><code>asr</code></td>
<td>Installs dependencies for running the ASR pipeline.</td>
</tr>
<tr>
<td><code>vlm</code></td>
<td>Installs dependencies for running the VLM pipeline.</td>
</tr>
<tr>
<td><code>easyocr</code></td>
<td>Installs the <a href="https://github.com/JaidedAI/EasyOCR">EasyOCR</a> OCR engine.</td>
</tr>
<tr>
<td><code>tesserocr</code></td>
<td>Installs the Tesseract binding for using it as OCR engine.</td>
</tr>
<tr>
<td><code>ocrmac</code></td>
<td>Installs the OcrMac OCR engine.</td>
</tr>
<tr>
<td><code>rapidocr</code></td>
<td>Installs the <a href="https://github.com/RapidAI/RapidOCR">RapidOCR</a> OCR engine with <a href="https://github.com/microsoft/onnxruntime/">onnxruntime</a> backend.</td>
</tr>
</tbody>
</table>
<h3 id="ocr-engines">OCR engines</h3>
<p>Docling supports multiple OCR engines for processing scanned documents. The current version provides
the following engines.</p>
<table>
<thead>
<tr>
<th>Engine</th>
<th>Installation</th>
<th>Usage</th>
</tr>
</thead>
<tbody>
<tr>
<td><a href="https://github.com/JaidedAI/EasyOCR">EasyOCR</a></td>
<td><code>easyocr</code> extra or via <code>pip install easyocr</code>.</td>
<td><code>EasyOcrOptions</code></td>
</tr>
<tr>
<td>Tesseract</td>
<td>System dependency. See description for Tesseract and Tesserocr below.</td>
<td><code>TesseractOcrOptions</code></td>
</tr>
<tr>
<td>Tesseract CLI</td>
<td>System dependency. See description below.</td>
<td><code>TesseractCliOcrOptions</code></td>
</tr>
<tr>
<td>OcrMac</td>
<td>System dependency. See description below.</td>
<td><code>OcrMacOptions</code></td>
</tr>
<tr>
<td><a href="https://github.com/RapidAI/RapidOCR">RapidOCR</a></td>
<td><code>rapidocr</code> extra can or via <code>pip install rapidocr onnxruntime</code></td>
<td><code>RapidOcrOptions</code></td>
</tr>
<tr>
<td><a href="https://github.com/felixdittrich92/OnnxTR">OnnxTR</a></td>
<td>Can be installed via the plugin system <code>pip install "docling-ocr-onnxtr[cpu]"</code>. Please take a look at <a href="https://github.com/felixdittrich92/docling-OCR-OnnxTR">docling-OCR-OnnxTR</a>.</td>
<td><code>OnnxtrOcrOptions</code></td>
</tr>
</tbody>
</table>
<p>The Docling <code>DocumentConverter</code> allows to choose the OCR engine with the <code>ocr_options</code> settings. For example</p>
<div class="highlight"><pre><span></span><code><span class="kn">from</span><span class="w"> </span><span class="nn">docling.datamodel.base_models</span><span class="w"> </span><span class="kn">import</span> <span class="n">ConversionStatus</span><span class="p">,</span> <span class="n">PipelineOptions</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">docling.datamodel.pipeline_options</span><span class="w"> </span><span class="kn">import</span> <span class="n">PipelineOptions</span><span class="p">,</span> <span class="n">EasyOcrOptions</span><span class="p">,</span> <span class="n">TesseractOcrOptions</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">docling.document_converter</span><span class="w"> </span><span class="kn">import</span> <span class="n">DocumentConverter</span>
<span class="n">pipeline_options</span> <span class="o">=</span> <span class="n">PipelineOptions</span><span class="p">()</span>
<span class="n">pipeline_options</span><span class="o">.</span><span class="n">do_ocr</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">pipeline_options</span><span class="o">.</span><span class="n">ocr_options</span> <span class="o">=</span> <span class="n">TesseractOcrOptions</span><span class="p">()</span> <span class="c1"># Use Tesseract</span>
<span class="n">doc_converter</span> <span class="o">=</span> <span class="n">DocumentConverter</span><span class="p">(</span>
<span class="n">pipeline_options</span><span class="o">=</span><span class="n">pipeline_options</span><span class="p">,</span>
<span class="p">)</span>
</code></pre></div>
<details>
<summary>Tesseract installation</summary>
<p><a href="https://github.com/tesseract-ocr/tesseract">Tesseract</a> is a popular OCR engine which is available
on most operating systems. For using this engine with Docling, Tesseract must be installed on your
system, using the packaging tool of your choice. Below we provide example commands.
After installing Tesseract you are expected to provide the path to its language files using the
<code>TESSDATA_PREFIX</code> environment variable (note that it must terminate with a slash <code>/</code>).</p>
<div class="tabbed-set tabbed-alternate" data-tabs="1:3"><input checked="checked" id="macos-via-homebrew" name="__tabbed_1" type="radio" /><input id="debian-based" name="__tabbed_1" type="radio" /><input id="rhel" name="__tabbed_1" type="radio" /><div class="tabbed-labels"><label for="macos-via-homebrew">macOS (via <a href="https://brew.sh/">Homebrew</a>)</label><label for="debian-based">Debian-based</label><label for="rhel">RHEL</label></div>
<div class="tabbed-content">
<div class="tabbed-block">
<div class="highlight"><pre><span></span><code><span class="go">brew install tesseract leptonica pkg-config</span>
<span class="go">TESSDATA_PREFIX=/opt/homebrew/share/tessdata/</span>
<span class="go">echo &quot;Set TESSDATA_PREFIX=${TESSDATA_PREFIX}&quot;</span>
</code></pre></div>
</div>
<div class="tabbed-block">
<div class="highlight"><pre><span></span><code><span class="go">apt-get install tesseract-ocr tesseract-ocr-eng libtesseract-dev libleptonica-dev pkg-config</span>
<span class="go">TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)</span>
<span class="go">echo &quot;Set TESSDATA_PREFIX=${TESSDATA_PREFIX}&quot;</span>
</code></pre></div>
</div>
<div class="tabbed-block">
<div class="highlight"><pre><span></span><code><span class="go">dnf install tesseract tesseract-devel tesseract-langpack-eng tesseract-osd leptonica-devel</span>
<span class="go">TESSDATA_PREFIX=/usr/share/tesseract/tessdata/</span>
<span class="go">echo &quot;Set TESSDATA_PREFIX=${TESSDATA_PREFIX}&quot;</span>
</code></pre></div>
</div>
</div>
</div>
<p><h4>Linking to Tesseract</h4>
The most efficient usage of the Tesseract library is via linking. Docling is using
the <a href="https://github.com/sirfz/tesserocr">Tesserocr</a> package for this.</p>
<p>If you get into installation issues of Tesserocr, we suggest using the following
installation options:</p>
<div class="highlight"><pre><span></span><code><span class="go">pip uninstall tesserocr</span>
<span class="go">pip install --no-binary :all: tesserocr</span>
</code></pre></div>
</details>
<h2 id="development-setup">Development setup</h2>
<p>To develop Docling features, bugfixes etc., install as follows from your local clone's root dir:</p>
<div class="highlight"><pre><span></span><code>uv<span class="w"> </span>sync<span class="w"> </span>--all-extras
</code></pre></div>
</article>
</div>
<script>var tabs=__md_get("__tabs");if(Array.isArray(tabs))e:for(var set of document.querySelectorAll(".tabbed-set")){var labels=set.querySelector(".tabbed-labels");for(var tab of tabs)for(var label of labels.getElementsByTagName("label"))if(label.innerText.trim()===tab){var input=document.getElementById(label.htmlFor);input.checked=!0;continue e}}</script>
<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
</div>
<button type="button" class="md-top md-icon" data-md-component="top" hidden>
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg>
Back to top
</button>
</main>
<footer class="md-footer">
<nav class="md-footer__inner md-grid" aria-label="Footer" >
<a href="../.." class="md-footer__link md-footer__link--prev" aria-label="Previous: Documentation">
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
</div>
<div class="md-footer__title">
<span class="md-footer__direction">
Previous
</span>
<div class="md-ellipsis">
Documentation
</div>
</div>
</a>
<a href="../quickstart/" class="md-footer__link md-footer__link--next" aria-label="Next: Quickstart">
<div class="md-footer__title">
<span class="md-footer__direction">
Next
</span>
<div class="md-ellipsis">
Quickstart
</div>
</div>
<div class="md-footer__button md-icon">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11z"/></svg>
</div>
</a>
</nav>
<div class="md-footer-meta md-typeset">
<div class="md-footer-meta__inner md-grid">
<div class="md-copyright">
Made with
<a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
Material for MkDocs
</a>
</div>
<div class="md-social">
<a href="https://github.com/docling-project/docling" target="_blank" rel="noopener" title="github.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M173.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6m-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3m44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9M252.8 8C114.1 8 8 113.3 8 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C436.2 457.8 504 362.9 504 252 504 113.3 391.5 8 252.8 8M105.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1m-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7m32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1m-11.4-14.7c-1.6 1-1.6 3.6 0 5.9s4.3 3.3 5.6 2.3c1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2"/></svg>
</a>
<a href="https://docling.ai/discord" target="_blank" rel="noopener" title="docling.ai" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 576 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M492.5 69.8c-.2-.3-.4-.6-.8-.7-38.1-17.5-78.4-30-119.7-37.1-.4-.1-.8 0-1.1.1s-.6.4-.8.8c-5.5 9.9-10.5 20.2-14.9 30.6-44.6-6.8-89.9-6.8-134.4 0-4.5-10.5-9.5-20.7-15.1-30.6-.2-.3-.5-.6-.8-.8s-.7-.2-1.1-.2C162.5 39 122.2 51.5 84.1 69c-.3.1-.6.4-.8.7C7.1 183.5-13.8 294.6-3.6 404.2c0 .3.1.5.2.8s.3.4.5.6c44.4 32.9 94 58 146.8 74.2.4.1.8.1 1.1 0s.7-.4.9-.7c11.3-15.4 21.4-31.8 30-48.8.1-.2.2-.5.2-.8s0-.5-.1-.8-.2-.5-.4-.6-.4-.3-.7-.4c-15.8-6.1-31.2-13.4-45.9-21.9-.3-.2-.5-.4-.7-.6s-.3-.6-.3-.9 0-.6.2-.9.3-.5.6-.7c3.1-2.3 6.2-4.7 9.1-7.1.3-.2.6-.4.9-.4s.7 0 1 .1c96.2 43.9 200.4 43.9 295.5 0 .3-.1.7-.2 1-.2s.7.2.9.4c2.9 2.4 6 4.9 9.1 7.2.2.2.4.4.6.7s.2.6.2.9-.1.6-.3.9-.4.5-.6.6c-14.7 8.6-30 15.9-45.9 21.8-.2.1-.5.2-.7.4s-.3.4-.4.7-.1.5-.1.8.1.5.2.8c8.8 17 18.8 33.3 30 48.8.2.3.6.6.9.7s.8.1 1.1 0c52.9-16.2 102.6-41.3 147.1-74.2.2-.2.4-.4.5-.6s.2-.5.2-.8c12.3-126.8-20.5-236.9-86.9-334.5zm-302 267.7c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.4 59.2-52.8 59.2m195.4 0c-29 0-52.8-26.6-52.8-59.2s23.4-59.2 52.8-59.2c29.7 0 53.3 26.8 52.8 59.2 0 32.7-23.2 59.2-52.8 59.2"/></svg>
</a>
<a href="https://linkedin.com/company/docling/" target="_blank" rel="noopener" title="linkedin.com" class="md-social__link">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M416 32H31.9C14.3 32 0 46.5 0 64.3v383.4C0 465.5 14.3 480 31.9 480H416c17.6 0 32-14.5 32-32.3V64.3c0-17.8-14.4-32.3-32-32.3M135.4 416H69V202.2h66.5V416zM102.2 96a38.5 38.5 0 1 1 0 77 38.5 38.5 0 1 1 0-77m282.1 320h-66.4V312c0-24.8-.5-56.7-34.5-56.7-34.6 0-39.9 27-39.9 54.9V416h-66.4V202.2h63.7v29.2h.9c8.9-16.8 30.6-34.5 62.9-34.5 67.2 0 79.7 44.3 79.7 101.9z"/></svg>
</a>
</div>
</div>
</div>
</footer>
</div>
<div class="md-dialog" data-md-component="dialog">
<div class="md-dialog__inner md-typeset"></div>
</div>
<div class="md-progress" data-md-component="progress" role="progressbar"></div>
<script id="__config" type="application/json">{"annotate": null, "base": "../..", "features": ["content.tabs.link", "content.code.annotate", "content.code.copy", "content.tooltips", "announce.dismiss", "navigation.footer", "navigation.tabs", "navigation.indexes", "navigation.instant", "navigation.instant.prefetch", "navigation.instant.progress", "navigation.path", "navigation.top", "navigation.tracking", "search.suggest", "toc.follow"], "search": "../../assets/javascripts/workers/search.7a47a382.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script>
<script src="../../assets/javascripts/bundle.e71a0d61.min.js"></script>
</body>
</html>