diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
index 31a6e95c..622338be 100644
--- a/.github/workflows/cd.yml
+++ b/.github/workflows/cd.yml
@@ -10,19 +10,14 @@ env:
PYTHON_KEYRING_BACKEND: keyring.backends.null.Keyring
jobs:
- # To be enabled when we add docs
- # docs:
- # permissions:
- # contents: write
- # runs-on: ubuntu-latest
- # steps:
- # - uses: actions/checkout@v3
- # - uses: ./.github/actions/setup-poetry
- # - name: Build and push docs
- # run: poetry run mkdocs gh-deploy --force
-
code-checks:
uses: ./.github/workflows/checks.yml
+ build-deploy-docs:
+ uses: ./.github/workflows/docs.yml
+ with:
+ deploy: true
+ permissions:
+ contents: write
pre-release-check:
runs-on: ubuntu-latest
outputs:
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 6a124361..05f23fa0 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -10,7 +10,7 @@ jobs:
steps:
- uses: actions/checkout@v3
- name: Install tesseract
- run: sudo apt-get install -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa libleptonica-dev libtesseract-dev pkg-config
+ run: sudo apt-get update && sudo apt-get install -y tesseract-ocr tesseract-ocr-eng tesseract-ocr-fra tesseract-ocr-deu tesseract-ocr-spa libleptonica-dev libtesseract-dev pkg-config
- name: Set TESSDATA_PREFIX
run: |
echo "TESSDATA_PREFIX=$(dpkg -L tesseract-ocr-eng | grep tessdata$)" >> "$GITHUB_ENV"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 81a31744..a1ceca81 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -16,13 +16,7 @@ env:
jobs:
code-checks:
uses: ./.github/workflows/checks.yml
-
- # To enable when we add the ./docs
- # build-docs:
- # runs-on: ubuntu-latest
- # steps:
- # - uses: actions/checkout@v3
- # - uses: ./.github/actions/setup-poetry
- # - name: Build docs
- # run: poetry run mkdocs build --verbose --clean
-
+ build-docs:
+ uses: ./.github/workflows/docs.yml
+ with:
+ deploy: false
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 00000000..d2b9bdd6
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,28 @@
+on:
+ workflow_call:
+ inputs:
+ deploy:
+ type: boolean
+ description: "If true, the docs will be deployed."
+ default: false
+
+jobs:
+ run-docs:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Install poetry
+ run: pipx install poetry==1.8.3
+ shell: bash
+ - uses: actions/setup-python@v5
+ with:
+ cache: 'poetry'
+ - name: Install dependencies
+ run: poetry install --only docs
+ shell: bash
+ - name: Build docs
+ run: poetry run mkdocs build --verbose --clean
+ - name: Build and push docs
+ if: inputs.deploy
+ run: poetry run mkdocs gh-deploy --force
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 099a09e6..67c9f1bb 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-
+
@@ -19,6 +19,7 @@
Docling parses documents and exports them to the desired format with ease and speed.
+
## Features
* 🗂️ Multi-format support for input (PDF, DOCX etc.) & output (Markdown, JSON etc.)
@@ -53,7 +54,6 @@ source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
converter = DocumentConverter()
result = converter.convert(source)
print(result.document.export_to_markdown()) # output: "## Docling Technical Report[...]"
-print(result.document.export_to_document_tokens()) # output: "..."
```
diff --git a/docs/assets/docling_processing.png b/docs/assets/docling_processing.png
new file mode 100644
index 00000000..5fc165e1
Binary files /dev/null and b/docs/assets/docling_processing.png differ
diff --git a/docs/concepts/docling_format.md b/docs/concepts/docling_document.md
similarity index 75%
rename from docs/concepts/docling_format.md
rename to docs/concepts/docling_document.md
index 0e84e44f..00b5452f 100644
--- a/docs/concepts/docling_format.md
+++ b/docs/concepts/docling_document.md
@@ -1,4 +1,4 @@
-With Docling v2, we introduce a unified document representation format called `DoclingDocument`. It is defined as a
+With Docling v2, we introduce a unified document representation format called `DoclingDocument`. It is defined as a
pydantic datatype, which can express several features common to documents, such as:
* Text, Tables, Pictures, and more
@@ -9,15 +9,16 @@ pydantic datatype, which can express several features common to documents, such
It also brings a set of document construction APIs to build up a `DoclingDocument` from scratch.
-# Example document structures
+## Example document structures
-To illustrate the features of the `DoclingDocument` format, consider the following side-by-side comparison of a
-`DoclingDocument` converted from `test/data/word_sample.docx`. Left side shows snippets from the converted document
-serialized as YAML, right side shows the corresponding visual parts in MS Word.
+To illustrate the features of the `DoclingDocument` format, in the subsections below we consider the
+`DoclingDocument` converted from `tests/data/word_sample.docx` and we present some side-by-side comparisons,
+where the left side shows snippets from the converted document
+serialized as YAML and the right one shows the corresponding parts of the original MS Word.
-## Basic structure
+### Basic structure
-A `DoclingDocument` exposes top-level fields for the document content, organized in two categories.
+A `DoclingDocument` exposes top-level fields for the document content, organized in two categories.
The first category is the _content items_, which are stored in these fields:
- `texts`: All items that have a text representation (paragraph, section heading, equation, ...). Base class is `TextItem`.
@@ -34,32 +35,34 @@ The second category is _content structure_, which is encapsualted in:
- `furniture`: The root node of a tree-structure for all items that don't belong into the body (headers, footers, ...)
- `groups`: A set of items that don't represent content, but act as containers for other content items (e.g. a list, a chapter)
-All of the above fields are only storing `NodeItem` instances, which reference children and parents
-through JSON pointers.
+All of the above fields are only storing `NodeItem` instances, which reference children and parents
+through JSON pointers.
The reading order of the document is encapsulated through the `body` tree and the order of _children_ in each item
in the tree.
-Below example shows how all items in the first page are nested below the `title` item (`#/texts/1`).
+Below example shows how all items in the first page are nested below the `title` item (`#/texts/1`).

-## Grouping
+### Grouping
Below example shows how all items under the heading "Let's swim" (`#/texts/5`) are nested as chilrden. The children of
-"Let's swim" are both text items and groups, which contain the list elements. The group items are stored in the
+"Let's swim" are both text items and groups, which contain the list elements. The group items are stored in the
top-level `groups` field.

-## Tables
+
diff --git a/docs/usage.md b/docs/usage.md
new file mode 100644
index 00000000..3f9b679e
--- /dev/null
+++ b/docs/usage.md
@@ -0,0 +1,171 @@
+## Conversion
+
+### Convert a single document
+
+To convert invidual PDF documents, use `convert()`, for example:
+
+```python
+from docling.document_converter import DocumentConverter
+
+source = "https://arxiv.org/pdf/2408.09869" # PDF path or URL
+converter = DocumentConverter()
+result = converter.convert(source)
+print(result.document.export_to_markdown()) # output: "### Docling Technical Report[...]"
+```
+
+### CLI
+
+You can also use Docling directly from your command line to convert individual files —be it local or by URL— or whole directories.
+
+A simple example would look like this:
+```console
+docling https://arxiv.org/pdf/2206.01062
+```
+
+To see all available options (export formats etc.) run `docling --help`.
+
+
+ CLI reference
+
+ Here are the available options as of this writing (for an up-to-date listing, run `docling --help`):
+
+ ```console
+ $ docling --help
+
+ Usage: docling [OPTIONS] source
+
+╭─ Arguments ───────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ * input_sources source PDF files to convert. Can be local file / directory paths or URL. [default: None] │
+│ [required] │
+╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
+│ --from [docx|pptx|html|image|pdf] Specify input formats to convert from. │
+│ Defaults to all formats. │
+│ [default: None] │
+│ --to [md|json|text|doctags] Specify output formats. Defaults to │
+│ Markdown. │
+│ [default: None] │
+│ --ocr --no-ocr If enabled, the bitmap content will be │
+│ processed using OCR. │
+│ [default: ocr] │
+│ --ocr-engine [easyocr|tesseract_cli|tesseract] The OCR engine to use. [default: easyocr] │
+│ --abort-on-error --no-abort-on-error If enabled, the bitmap content will be │
+│ processed using OCR. │
+│ [default: no-abort-on-error] │
+│ --output PATH Output directory where results are saved. │
+│ [default: .] │
+│ --version Show version information. │
+│ --help Show this message and exit. │
+╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
+ ```
+
+
+
+
+### Advanced options
+
+#### Adjust pipeline features
+
+The example file [custom_convert.py](./examples/custom_convert.py) contains multiple ways
+one can adjust the conversion pipeline and features.
+
+
+##### Control PDF table extraction options
+
+You can control if table structure recognition should map the recognized structure back to PDF cells (default) or use text cells from the structure prediction itself.
+This can improve output quality if you find that multiple columns in extracted tables are erroneously merged into one.
+
+
+```python
+from docling.datamodel.base_models import InputFormat
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.datamodel.pipeline_options import PdfPipelineOptions
+
+pipeline_options = PdfPipelineOptions(do_table_structure=True)
+pipeline_options.table_structure_options.do_cell_matching = False # uses text cells predicted from table structure model
+
+doc_converter = DocumentConverter(
+ format_options={
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
+ }
+)
+```
+
+Since docling 1.16.0: You can control which TableFormer mode you want to use. Choose between `TableFormerMode.FAST` (default) and `TableFormerMode.ACCURATE` (better, but slower) to receive better quality with difficult table structures.
+
+```python
+from docling.datamodel.base_models import InputFormat
+from docling.document_converter import DocumentConverter, PdfFormatOption
+from docling.datamodel.pipeline_options import PdfPipelineOptions, TableFormerMode
+
+pipeline_options = PdfPipelineOptions(do_table_structure=True)
+pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE # use more accurate TableFormer model
+
+doc_converter = DocumentConverter(
+ format_options={
+ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
+ }
+)
+```
+
+#### Impose limits on the document size
+
+You can limit the file size and number of pages which should be allowed to process per document:
+
+```python
+from pathlib import Path
+from docling.document_converter import DocumentConverter
+
+source = "https://arxiv.org/pdf/2408.09869"
+converter = DocumentConverter()
+result = converter.convert(source, max_num_pages=100, max_file_size=20971520)
+```
+
+#### Convert from binary PDF streams
+
+You can convert PDFs from a binary stream instead of from the filesystem as follows:
+
+```python
+from io import BytesIO
+from docling.datamodel.base_models import DocumentStream
+from docling.document_converter import DocumentConverter
+
+buf = BytesIO(your_binary_stream)
+source = DocumentStream(filename="my_doc.pdf", stream=buf)
+converter = DocumentConverter()
+result = converter.convert(source)
+```
+
+#### Limit resource usage
+
+You can limit the CPU threads used by Docling by setting the environment variable `OMP_NUM_THREADS` accordingly. The default setting is using 4 CPU threads.
+
+
+## Chunking
+
+You can perform a hierarchy-aware chunking of a Docling document as follows:
+
+```python
+from docling.document_converter import DocumentConverter
+from docling_core.transforms.chunker import HierarchicalChunker
+
+conv_res = DocumentConverter().convert("https://arxiv.org/pdf/2206.01062")
+doc = conv_res.document
+chunks = list(HierarchicalChunker().chunk(doc))
+
+print(chunks[30])
+# {
+# "text": "Lately, new types of ML models for document-layout analysis have emerged [...]",
+# "meta": {
+# "doc_items": [{
+# "self_ref": "#/texts/40",
+# "label": "text",
+# "prov": [{
+# "page_no": 2,
+# "bbox": {"l": 317.06, "t": 325.81, "r": 559.18, "b": 239.97, ...},
+# }]
+# }],
+# "headings": ["2 RELATED WORK"],
+# }
+# }
+```
diff --git a/docs/v2.md b/docs/v2.md
index 32d4a4d8..1c6ee6d9 100644
--- a/docs/v2.md
+++ b/docs/v2.md
@@ -2,7 +2,7 @@
Docling v2 introduces several new features:
-- Understands and converts PDF, MS Word, MS Powerpoint, HTML and several image formats
+- Understands and converts PDF, MS Word, MS Powerpoint, HTML and several image formats
- Produces a new, universal document representation which can encapsulate document hierarchy
- Comes with a fresh new API and CLI
@@ -22,7 +22,7 @@ docling myfile.pdf --to json --to md --no-ocr
docling ./input/dir --from pdf
# Convert PDF and Word files in input directory to Markdown and JSON
-docling ./input/dir --from pdf --from docx --to md --to json --output ./scratch
+docling ./input/dir --from pdf --from docx --to md --to json --output ./scratch
# Convert all supported files in input directory to Markdown, but abort on first error
docling ./input/dir --output ./scratch --abort-on-error
@@ -38,8 +38,8 @@ docling ./input/dir --output ./scratch --abort-on-error
### Setting up a `DocumentConverter`
To accomodate many input formats, we changed the way you need to set up your `DocumentConverter` object.
-You can now define a list of allowed formats on the `DocumentConverter` initialization, and specify custom options
-per-format if desired. By default, all supported formats are allowed. If you don't provide `format_options`, defaults
+You can now define a list of allowed formats on the `DocumentConverter` initialization, and specify custom options
+per-format if desired. By default, all supported formats are allowed. If you don't provide `format_options`, defaults
will be used for all `allowed_formats`.
Format options can include the pipeline class to use, the options to provide to the pipeline, and the document backend.
@@ -59,7 +59,7 @@ from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.backend.pypdfium2_backend import PyPdfiumDocumentBackend
## Default initialization still works as before:
-# doc_converter = DocumentConverter()
+# doc_converter = DocumentConverter()
# previous `PipelineOptions` is now `PdfPipelineOptions`
@@ -68,7 +68,7 @@ pipeline_options.do_ocr = False
pipeline_options.do_table_structure = True
#...
-## Custom options are now defined per format.
+## Custom options are now defined per format.
doc_converter = (
DocumentConverter( # all of the below is optional, has internal defaults.
allowed_formats=[
@@ -100,8 +100,8 @@ More options are shown in the following example units:
### Converting documents
-We have simplified the way you can feed input to the `DocumentConverter` and renamed the conversion methods for
-better semantics. You can now call the conversion directly with a single file, or a list of input files,
+We have simplified the way you can feed input to the `DocumentConverter` and renamed the conversion methods for
+better semantics. You can now call the conversion directly with a single file, or a list of input files,
or `DocumentStream` objects, without constructing a `DocumentConversionInput` object first.
* `DocumentConverter.convert` now converts a single file input (previously `DocumentConverter.convert_single`).
@@ -129,7 +129,7 @@ input_files = [
conv_results_iter = doc_converter.convert_all(input_files) # previously `convert`
```
-Through the `raises_on_error` argument, you can also control if the conversion should raise exceptions when first
+Through the `raises_on_error` argument, you can also control if the conversion should raise exceptions when first
encountering a problem, or resiliently convert all files first and reflect errors in each file's conversion status.
By default, any error is immediately raised and the conversion aborts (previously, exceptions were swallowed).
@@ -139,7 +139,7 @@ conv_results_iter = doc_converter.convert_all(input_files, raises_on_error=False
```
-### Access document structures
+### Access document structures
We have simplified how you can access and export the converted document data, too. Our universal document representation
is now available in conversion results as a `DoclingDocument` object.
@@ -167,7 +167,7 @@ for item, level in conv_result.document.iterate_items:
conv_result.legacy_document # provides the representation in previous ExportedCCSDocument type
```
-## Export into JSON, Markdown, Doctags
+### Export into JSON, Markdown, Doctags
**Note**: All `render_...` methods in `ConversionResult` have been removed in Docling v2,
and are now available on `DoclingDocument` as:
@@ -184,7 +184,7 @@ print(conv_res.document.export_to_markdown())
print(conv_res.document.export_to_document_tokens())
```
-**Note**: While it is deprecated, you can _still_ export Docling v1 JSON format. This is available through the same
+**Note**: While it is deprecated, you can _still_ export Docling v1 JSON format. This is available through the same
methods as on the `DoclingDocument` type:
```shell
## Export legacy document representation to desired format, for v1 compatibility:
@@ -193,7 +193,7 @@ print(conv_res.legacy_document.export_to_markdown())
print(conv_res.legacy_document.export_to_document_tokens())
```
-## Reload a `DoclingDocument` stored as JSON
+### Reload a `DoclingDocument` stored as JSON
You can save and reload a `DoclingDocument` to disk in JSON format using the following codes:
@@ -211,3 +211,19 @@ with Path("./doc.json").open("r") as fp:
```
+### Chunking
+
+Docling v2 defines new base classes for chunking:
+
+- `BaseMeta` for chunk metadata
+- `BaseChunk` containing the chunk text and metadata, and
+- `BaseChunker` for chunkers, producing chunks out of a `DoclingDocument`.
+
+Additionally, it provides an updated `HierarchicalChunker` implementation, which
+leverages the new `DoclingDocument` and provides a new, richer chunk output format, including:
+
+- the respective doc items for grounding
+- any applicable headings for context
+- any applicable captions for context
+
+For an example, check out [Chunking usage](../usage/#chunking).
diff --git a/mkdocs.yml b/mkdocs.yml
index 5fd180a4..1fef4428 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -54,10 +54,10 @@ nav:
- Get started:
- Home: index.md
- Installation: installation.md
- - Use Docling: use_docling.md
+ - Usage: usage.md
- Docling v2: v2.md
- Concepts:
- - The Docling Document format: concepts/docling_format.md
+ - Docling Document: concepts/docling_document.md
# - Chunking: concepts/chunking.md
- Examples:
- Conversion:
diff --git a/poetry.lock b/poetry.lock
index 656cd50b..d5dee06b 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -196,8 +196,8 @@ files = [
lazy-object-proxy = ">=1.4.0"
typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
wrapt = [
- {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
{version = ">=1.11,<2", markers = "python_version < \"3.11\""},
+ {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
]
[[package]]
@@ -943,8 +943,8 @@ networkx = ">=3.1,<4.0"
netwulf = ">=0.1.5,<0.2.0"
numerize = ">=0.12,<0.13"
numpy = [
- {version = ">=2.0.2,<3.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.26.4,<2.0.0", markers = "python_version >= \"3.9\" and python_version < \"3.13\""},
+ {version = ">=2.0.2,<3.0.0", markers = "python_version >= \"3.13\""},
]
pandas = {version = ">=2.1.4,<3.0.0", markers = "python_version >= \"3.9\""}
python-dotenv = ">=1.0.0,<2.0.0"
@@ -1030,8 +1030,8 @@ jsonlines = ">=3.1.0,<4.0.0"
lxml = ">=4.9.1,<5.0.0"
mean_average_precision = ">=2021.4.26.0,<2022.0.0.0"
numpy = [
- {version = ">=2.1.0,<3.0.0", markers = "python_version >= \"3.13\""},
{version = ">=1.24.4,<2.0.0", markers = "python_version < \"3.13\""},
+ {version = ">=2.1.0,<3.0.0", markers = "python_version >= \"3.13\""},
]
opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
Pillow = ">=10.0.0,<11.0.0"
@@ -2355,8 +2355,8 @@ jsonpatch = ">=1.33,<2.0"
langsmith = ">=0.1.112,<0.2.0"
packaging = ">=23.2,<25"
pydantic = [
- {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
{version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
+ {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
]
PyYAML = ">=5.3"
tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
@@ -2424,8 +2424,8 @@ files = [
httpx = ">=0.23.0,<1"
orjson = ">=3.9.14,<4.0.0"
pydantic = [
- {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
{version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
+ {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
]
requests = ">=2,<3"
requests-toolbelt = ">=1.0.0,<2.0.0"
@@ -3737,10 +3737,10 @@ files = [
[package.dependencies]
numpy = [
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""},
{version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""},
{version = ">=1.23.5", markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
[[package]]
@@ -3888,9 +3888,9 @@ files = [
[package.dependencies]
numpy = [
- {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
+ {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@@ -4480,8 +4480,8 @@ files = [
annotated-types = ">=0.6.0"
pydantic-core = "2.23.4"
typing-extensions = [
- {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
{version = ">=4.6.1", markers = "python_version < \"3.13\""},
+ {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
]
[package.extras]
@@ -4649,8 +4649,8 @@ files = [
astroid = ">=2.15.8,<=2.17.0-dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
- {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
{version = ">=0.2", markers = "python_version < \"3.11\""},
+ {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
]
isort = ">=4.2.5,<6"
mccabe = ">=0.6,<0.8"
@@ -7513,4 +7513,4 @@ tesserocr = ["tesserocr"]
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
-content-hash = "8eb8024c32b37b2367c8d83e2833c3d118b3cfdd2f53966712c95dec8d830199"
+content-hash = "ba5b52f1a318810bd363d2aa4f60fdfc2e5899e1729b0f0c51026082c93d23e0"
diff --git a/pyproject.toml b/pyproject.toml
index 684d9a83..71281a46 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -75,6 +75,8 @@ pandas-stubs = "^2.1.4.231227"
ipykernel = "^6.29.5"
ipywidgets = "^8.1.5"
nbqa = "^1.9.0"
+
+[tool.poetry.group.docs.dependencies]
mkdocs-material = "^9.5.40"
mkdocs-jupyter = "^0.25.0"