Skip to content

Document converter

This is an automatic generated API reference of the main components of Docling.

document_converter

Classes:

DocumentConverter

DocumentConverter(allowed_formats: Optional[list[InputFormat]] = None, format_options: Optional[dict[InputFormat, FormatOption]] = None)

Methods:

Attributes:

allowed_formats instance-attribute

allowed_formats = allowed_formats if allowed_formats is not None else list(InputFormat)

format_to_options instance-attribute

format_to_options: dict[InputFormat, FormatOption] = {format: (_get_default_option(format=format) if (custom_option := (get(format))) is None else custom_option) for format in (allowed_formats)}

initialized_pipelines instance-attribute

initialized_pipelines: dict[tuple[Type[BasePipeline], str], BasePipeline] = {}

convert

convert(source: Union[Path, str, DocumentStream], headers: Optional[dict[str, str]] = None, raises_on_error: bool = True, max_num_pages: int = maxsize, max_file_size: int = maxsize, page_range: PageRange = DEFAULT_PAGE_RANGE) -> ConversionResult

convert_all

convert_all(source: Iterable[Union[Path, str, DocumentStream]], headers: Optional[dict[str, str]] = None, raises_on_error: bool = True, max_num_pages: int = maxsize, max_file_size: int = maxsize, page_range: PageRange = DEFAULT_PAGE_RANGE) -> Iterator[ConversionResult]

convert_string

convert_string(content: str, format: InputFormat, name: Optional[str] = None) -> ConversionResult

initialize_pipeline

initialize_pipeline(format: InputFormat)

Initialize the conversion pipeline for the selected format.

ConversionResult pydantic-model

Bases: ConversionAssets

Show JSON schema:
{
  "$defs": {
    "AssembledUnit": {
      "properties": {
        "elements": {
          "default": [],
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/TextElement"
              },
              {
                "$ref": "#/$defs/Table"
              },
              {
                "$ref": "#/$defs/FigureElement"
              },
              {
                "$ref": "#/$defs/ContainerElement"
              }
            ]
          },
          "title": "Elements",
          "type": "array"
        },
        "body": {
          "default": [],
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/TextElement"
              },
              {
                "$ref": "#/$defs/Table"
              },
              {
                "$ref": "#/$defs/FigureElement"
              },
              {
                "$ref": "#/$defs/ContainerElement"
              }
            ]
          },
          "title": "Body",
          "type": "array"
        },
        "headers": {
          "default": [],
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/TextElement"
              },
              {
                "$ref": "#/$defs/Table"
              },
              {
                "$ref": "#/$defs/FigureElement"
              },
              {
                "$ref": "#/$defs/ContainerElement"
              }
            ]
          },
          "title": "Headers",
          "type": "array"
        }
      },
      "title": "AssembledUnit",
      "type": "object"
    },
    "BaseMeta": {
      "additionalProperties": true,
      "description": "Base class for metadata.",
      "properties": {
        "summary": {
          "anyOf": [
            {
              "$ref": "#/$defs/SummaryMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        }
      },
      "title": "BaseMeta",
      "type": "object"
    },
    "BitmapResource": {
      "description": "Model representing a bitmap resource with positioning and URI information.",
      "properties": {
        "index": {
          "default": -1,
          "title": "Index",
          "type": "integer"
        },
        "rect": {
          "$ref": "#/$defs/BoundingRectangle"
        },
        "uri": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Uri"
        }
      },
      "required": [
        "rect"
      ],
      "title": "BitmapResource",
      "type": "object"
    },
    "BoundingBox": {
      "description": "BoundingBox.",
      "properties": {
        "l": {
          "title": "L",
          "type": "number"
        },
        "t": {
          "title": "T",
          "type": "number"
        },
        "r": {
          "title": "R",
          "type": "number"
        },
        "b": {
          "title": "B",
          "type": "number"
        },
        "coord_origin": {
          "$ref": "#/$defs/CoordOrigin",
          "default": "TOPLEFT"
        }
      },
      "required": [
        "l",
        "t",
        "r",
        "b"
      ],
      "title": "BoundingBox",
      "type": "object"
    },
    "BoundingRectangle": {
      "description": "Model representing a rectangular boundary with four corner points.",
      "properties": {
        "r_x0": {
          "title": "R X0",
          "type": "number"
        },
        "r_y0": {
          "title": "R Y0",
          "type": "number"
        },
        "r_x1": {
          "title": "R X1",
          "type": "number"
        },
        "r_y1": {
          "title": "R Y1",
          "type": "number"
        },
        "r_x2": {
          "title": "R X2",
          "type": "number"
        },
        "r_y2": {
          "title": "R Y2",
          "type": "number"
        },
        "r_x3": {
          "title": "R X3",
          "type": "number"
        },
        "r_y3": {
          "title": "R Y3",
          "type": "number"
        },
        "coord_origin": {
          "$ref": "#/$defs/CoordOrigin",
          "default": "BOTTOMLEFT"
        }
      },
      "required": [
        "r_x0",
        "r_y0",
        "r_x1",
        "r_y1",
        "r_x2",
        "r_y2",
        "r_x3",
        "r_y3"
      ],
      "title": "BoundingRectangle",
      "type": "object"
    },
    "ChartBar": {
      "description": "Represents a bar in a bar chart.\n\nAttributes:\n    label (str): The label for the bar.\n    values (float): The value associated with the bar.",
      "properties": {
        "label": {
          "title": "Label",
          "type": "string"
        },
        "values": {
          "title": "Values",
          "type": "number"
        }
      },
      "required": [
        "label",
        "values"
      ],
      "title": "ChartBar",
      "type": "object"
    },
    "ChartLine": {
      "description": "Represents a line in a line chart.\n\nAttributes:\n    label (str): The label for the line.\n    values (List[Tuple[float, float]]): A list of (x, y) coordinate pairs\n        representing the line's data points.",
      "properties": {
        "label": {
          "title": "Label",
          "type": "string"
        },
        "values": {
          "items": {
            "maxItems": 2,
            "minItems": 2,
            "prefixItems": [
              {
                "type": "number"
              },
              {
                "type": "number"
              }
            ],
            "type": "array"
          },
          "title": "Values",
          "type": "array"
        }
      },
      "required": [
        "label",
        "values"
      ],
      "title": "ChartLine",
      "type": "object"
    },
    "ChartPoint": {
      "description": "Represents a point in a scatter chart.\n\nAttributes:\n    value (Tuple[float, float]): A (x, y) coordinate pair representing a point in a\n        chart.",
      "properties": {
        "value": {
          "maxItems": 2,
          "minItems": 2,
          "prefixItems": [
            {
              "type": "number"
            },
            {
              "type": "number"
            }
          ],
          "title": "Value",
          "type": "array"
        }
      },
      "required": [
        "value"
      ],
      "title": "ChartPoint",
      "type": "object"
    },
    "ChartSlice": {
      "description": "Represents a slice in a pie chart.\n\nAttributes:\n    label (str): The label for the slice.\n    value (float): The value represented by the slice.",
      "properties": {
        "label": {
          "title": "Label",
          "type": "string"
        },
        "value": {
          "title": "Value",
          "type": "number"
        }
      },
      "required": [
        "label",
        "value"
      ],
      "title": "ChartSlice",
      "type": "object"
    },
    "ChartStackedBar": {
      "description": "Represents a stacked bar in a stacked bar chart.\n\nAttributes:\n    label (List[str]): The labels for the stacked bars. Multiple values are stored\n        in cases where the chart is \"double stacked,\" meaning bars are stacked both\n        horizontally and vertically.\n    values (List[Tuple[str, int]]): A list of values representing different segments\n        of the stacked bar along with their label.",
      "properties": {
        "label": {
          "items": {
            "type": "string"
          },
          "title": "Label",
          "type": "array"
        },
        "values": {
          "items": {
            "maxItems": 2,
            "minItems": 2,
            "prefixItems": [
              {
                "type": "string"
              },
              {
                "type": "integer"
              }
            ],
            "type": "array"
          },
          "title": "Values",
          "type": "array"
        }
      },
      "required": [
        "label",
        "values"
      ],
      "title": "ChartStackedBar",
      "type": "object"
    },
    "Cluster": {
      "properties": {
        "id": {
          "title": "Id",
          "type": "integer"
        },
        "label": {
          "$ref": "#/$defs/DocItemLabel"
        },
        "bbox": {
          "$ref": "#/$defs/BoundingBox"
        },
        "confidence": {
          "default": 1.0,
          "title": "Confidence",
          "type": "number"
        },
        "cells": {
          "default": [],
          "items": {
            "$ref": "#/$defs/TextCell"
          },
          "title": "Cells",
          "type": "array"
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/Cluster"
          },
          "title": "Children",
          "type": "array"
        }
      },
      "required": [
        "id",
        "label",
        "bbox"
      ],
      "title": "Cluster",
      "type": "object"
    },
    "CodeItem": {
      "additionalProperties": false,
      "description": "CodeItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/FloatingMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "const": "code",
          "default": "code",
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "formatting": {
          "anyOf": [
            {
              "$ref": "#/$defs/Formatting"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "hyperlink": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Hyperlink"
        },
        "captions": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Captions",
          "type": "array"
        },
        "references": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "References",
          "type": "array"
        },
        "footnotes": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Footnotes",
          "type": "array"
        },
        "image": {
          "anyOf": [
            {
              "$ref": "#/$defs/ImageRef"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "code_language": {
          "$ref": "#/$defs/CodeLanguageLabel",
          "default": "unknown"
        }
      },
      "required": [
        "self_ref",
        "orig",
        "text"
      ],
      "title": "CodeItem",
      "type": "object"
    },
    "CodeLanguageLabel": {
      "description": "CodeLanguageLabel.",
      "enum": [
        "Ada",
        "Awk",
        "Bash",
        "bc",
        "C",
        "C#",
        "C++",
        "CMake",
        "COBOL",
        "CSS",
        "Ceylon",
        "Clojure",
        "Crystal",
        "Cuda",
        "Cython",
        "D",
        "Dart",
        "dc",
        "Dockerfile",
        "Elixir",
        "Erlang",
        "FORTRAN",
        "Forth",
        "Go",
        "HTML",
        "Haskell",
        "Haxe",
        "Java",
        "JavaScript",
        "JSON",
        "Julia",
        "Kotlin",
        "Lisp",
        "Lua",
        "Matlab",
        "MoonScript",
        "Nim",
        "OCaml",
        "ObjectiveC",
        "Octave",
        "PHP",
        "Pascal",
        "Perl",
        "Prolog",
        "Python",
        "Racket",
        "Ruby",
        "Rust",
        "SML",
        "SQL",
        "Scala",
        "Scheme",
        "Swift",
        "TypeScript",
        "unknown",
        "VisualBasic",
        "XML",
        "YAML"
      ],
      "title": "CodeLanguageLabel",
      "type": "string"
    },
    "ColorRGBA": {
      "description": "Model representing an RGBA color value.",
      "properties": {
        "r": {
          "maximum": 255,
          "minimum": 0,
          "title": "R",
          "type": "integer"
        },
        "g": {
          "maximum": 255,
          "minimum": 0,
          "title": "G",
          "type": "integer"
        },
        "b": {
          "maximum": 255,
          "minimum": 0,
          "title": "B",
          "type": "integer"
        },
        "a": {
          "default": 255,
          "maximum": 255,
          "minimum": 0,
          "title": "A",
          "type": "integer"
        }
      },
      "required": [
        "r",
        "g",
        "b"
      ],
      "title": "ColorRGBA",
      "type": "object"
    },
    "ConfidenceReport": {
      "properties": {
        "parse_score": {
          "default": NaN,
          "title": "Parse Score",
          "type": "number"
        },
        "layout_score": {
          "default": NaN,
          "title": "Layout Score",
          "type": "number"
        },
        "table_score": {
          "default": NaN,
          "title": "Table Score",
          "type": "number"
        },
        "ocr_score": {
          "default": NaN,
          "title": "Ocr Score",
          "type": "number"
        },
        "pages": {
          "additionalProperties": {
            "$ref": "#/$defs/PageConfidenceScores"
          },
          "title": "Pages",
          "type": "object"
        }
      },
      "title": "ConfidenceReport",
      "type": "object"
    },
    "ContainerElement": {
      "properties": {
        "label": {
          "$ref": "#/$defs/DocItemLabel"
        },
        "id": {
          "title": "Id",
          "type": "integer"
        },
        "page_no": {
          "title": "Page No",
          "type": "integer"
        },
        "cluster": {
          "$ref": "#/$defs/Cluster"
        },
        "text": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Text"
        }
      },
      "required": [
        "label",
        "id",
        "page_no",
        "cluster"
      ],
      "title": "ContainerElement",
      "type": "object"
    },
    "ContentLayer": {
      "description": "ContentLayer.",
      "enum": [
        "body",
        "furniture",
        "background",
        "invisible",
        "notes"
      ],
      "title": "ContentLayer",
      "type": "string"
    },
    "ConversionStatus": {
      "enum": [
        "pending",
        "started",
        "failure",
        "success",
        "partial_success",
        "skipped"
      ],
      "title": "ConversionStatus",
      "type": "string"
    },
    "Coord2D": {
      "maxItems": 2,
      "minItems": 2,
      "prefixItems": [
        {
          "title": "X",
          "type": "number"
        },
        {
          "title": "Y",
          "type": "number"
        }
      ],
      "type": "array"
    },
    "CoordOrigin": {
      "description": "CoordOrigin.",
      "enum": [
        "TOPLEFT",
        "BOTTOMLEFT"
      ],
      "title": "CoordOrigin",
      "type": "string"
    },
    "DeclarativeBackendOptions": {
      "description": "Default backend options for a declarative document backend.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "declarative",
          "default": "declarative",
          "title": "Kind",
          "type": "string"
        }
      },
      "title": "DeclarativeBackendOptions",
      "type": "object"
    },
    "DescriptionAnnotation": {
      "description": "DescriptionAnnotation.",
      "properties": {
        "kind": {
          "const": "description",
          "default": "description",
          "title": "Kind",
          "type": "string"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "provenance": {
          "title": "Provenance",
          "type": "string"
        }
      },
      "required": [
        "text",
        "provenance"
      ],
      "title": "DescriptionAnnotation",
      "type": "object"
    },
    "DescriptionMetaField": {
      "additionalProperties": true,
      "description": "Description metadata field.",
      "properties": {
        "confidence": {
          "anyOf": [
            {
              "maximum": 1,
              "minimum": 0,
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The confidence of the prediction.",
          "examples": [
            0.9,
            0.42
          ],
          "title": "Confidence"
        },
        "created_by": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The origin of the prediction.",
          "examples": [
            "ibm-granite/granite-docling-258M"
          ],
          "title": "Created By"
        },
        "text": {
          "title": "Text",
          "type": "string"
        }
      },
      "required": [
        "text"
      ],
      "title": "DescriptionMetaField",
      "type": "object"
    },
    "DocItemLabel": {
      "description": "DocItemLabel.",
      "enum": [
        "caption",
        "chart",
        "footnote",
        "formula",
        "list_item",
        "page_footer",
        "page_header",
        "picture",
        "section_header",
        "table",
        "text",
        "title",
        "document_index",
        "code",
        "checkbox_selected",
        "checkbox_unselected",
        "form",
        "key_value_region",
        "grading_scale",
        "handwritten_text",
        "empty_value",
        "paragraph",
        "reference"
      ],
      "title": "DocItemLabel",
      "type": "string"
    },
    "DoclingComponentType": {
      "enum": [
        "document_backend",
        "model",
        "doc_assembler",
        "user_input",
        "pipeline"
      ],
      "title": "DoclingComponentType",
      "type": "string"
    },
    "DoclingDocument": {
      "description": "DoclingDocument.",
      "properties": {
        "schema_name": {
          "const": "DoclingDocument",
          "default": "DoclingDocument",
          "title": "Schema Name",
          "type": "string"
        },
        "version": {
          "default": "1.8.0",
          "pattern": "^(?P<major>0|[1-9]\\d*)\\.(?P<minor>0|[1-9]\\d*)\\.(?P<patch>0|[1-9]\\d*)(?:-(?P<prerelease>(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+(?P<buildmetadata>[0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$",
          "title": "Version",
          "type": "string"
        },
        "name": {
          "title": "Name",
          "type": "string"
        },
        "origin": {
          "anyOf": [
            {
              "$ref": "#/$defs/DocumentOrigin"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "furniture": {
          "$ref": "#/$defs/GroupItem",
          "default": {
            "self_ref": "#/furniture",
            "parent": null,
            "children": [],
            "content_layer": "furniture",
            "meta": null,
            "name": "_root_",
            "label": "unspecified"
          },
          "deprecated": true
        },
        "body": {
          "$ref": "#/$defs/GroupItem",
          "default": {
            "self_ref": "#/body",
            "parent": null,
            "children": [],
            "content_layer": "body",
            "meta": null,
            "name": "_root_",
            "label": "unspecified"
          }
        },
        "groups": {
          "default": [],
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/ListGroup"
              },
              {
                "$ref": "#/$defs/InlineGroup"
              },
              {
                "$ref": "#/$defs/GroupItem"
              }
            ]
          },
          "title": "Groups",
          "type": "array"
        },
        "texts": {
          "default": [],
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/TitleItem"
              },
              {
                "$ref": "#/$defs/SectionHeaderItem"
              },
              {
                "$ref": "#/$defs/ListItem"
              },
              {
                "$ref": "#/$defs/CodeItem"
              },
              {
                "$ref": "#/$defs/FormulaItem"
              },
              {
                "$ref": "#/$defs/TextItem"
              }
            ]
          },
          "title": "Texts",
          "type": "array"
        },
        "pictures": {
          "default": [],
          "items": {
            "$ref": "#/$defs/PictureItem"
          },
          "title": "Pictures",
          "type": "array"
        },
        "tables": {
          "default": [],
          "items": {
            "$ref": "#/$defs/TableItem"
          },
          "title": "Tables",
          "type": "array"
        },
        "key_value_items": {
          "default": [],
          "items": {
            "$ref": "#/$defs/KeyValueItem"
          },
          "title": "Key Value Items",
          "type": "array"
        },
        "form_items": {
          "default": [],
          "items": {
            "$ref": "#/$defs/FormItem"
          },
          "title": "Form Items",
          "type": "array"
        },
        "pages": {
          "additionalProperties": {
            "$ref": "#/$defs/PageItem"
          },
          "default": {},
          "title": "Pages",
          "type": "object"
        }
      },
      "required": [
        "name"
      ],
      "title": "DoclingDocument",
      "type": "object"
    },
    "DoclingVersion": {
      "properties": {
        "docling_version": {
          "default": "2.64.1",
          "title": "Docling Version",
          "type": "string"
        },
        "docling_core_version": {
          "default": "2.51.1",
          "title": "Docling Core Version",
          "type": "string"
        },
        "docling_ibm_models_version": {
          "default": "3.10.2",
          "title": "Docling Ibm Models Version",
          "type": "string"
        },
        "docling_parse_version": {
          "default": "4.7.1",
          "title": "Docling Parse Version",
          "type": "string"
        },
        "platform_str": {
          "default": "Linux-6.11.0-1018-azure-x86_64-with-glibc2.39",
          "title": "Platform Str",
          "type": "string"
        },
        "py_impl_version": {
          "default": "cpython-312",
          "title": "Py Impl Version",
          "type": "string"
        },
        "py_lang_version": {
          "default": "3.12.3",
          "title": "Py Lang Version",
          "type": "string"
        }
      },
      "title": "DoclingVersion",
      "type": "object"
    },
    "DocumentLimits": {
      "properties": {
        "max_num_pages": {
          "default": 9223372036854775807,
          "title": "Max Num Pages",
          "type": "integer"
        },
        "max_file_size": {
          "default": 9223372036854775807,
          "title": "Max File Size",
          "type": "integer"
        },
        "page_range": {
          "default": [
            1,
            9223372036854775807
          ],
          "title": "Page Range"
        }
      },
      "title": "DocumentLimits",
      "type": "object"
    },
    "DocumentOrigin": {
      "description": "FileSource.",
      "properties": {
        "mimetype": {
          "title": "Mimetype",
          "type": "string"
        },
        "binary_hash": {
          "maximum": 18446744073709551615,
          "minimum": 0,
          "title": "Binary Hash",
          "type": "integer"
        },
        "filename": {
          "title": "Filename",
          "type": "string"
        },
        "uri": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Uri"
        }
      },
      "required": [
        "mimetype",
        "binary_hash",
        "filename"
      ],
      "title": "DocumentOrigin",
      "type": "object"
    },
    "EquationPrediction": {
      "properties": {
        "equation_count": {
          "default": 0,
          "title": "Equation Count",
          "type": "integer"
        },
        "equation_map": {
          "additionalProperties": {
            "$ref": "#/$defs/TextElement"
          },
          "default": {},
          "title": "Equation Map",
          "type": "object"
        }
      },
      "title": "EquationPrediction",
      "type": "object"
    },
    "ErrorItem": {
      "properties": {
        "component_type": {
          "$ref": "#/$defs/DoclingComponentType"
        },
        "module_name": {
          "title": "Module Name",
          "type": "string"
        },
        "error_message": {
          "title": "Error Message",
          "type": "string"
        }
      },
      "required": [
        "component_type",
        "module_name",
        "error_message"
      ],
      "title": "ErrorItem",
      "type": "object"
    },
    "FigureClassificationPrediction": {
      "properties": {
        "figure_count": {
          "default": 0,
          "title": "Figure Count",
          "type": "integer"
        },
        "figure_map": {
          "additionalProperties": {
            "$ref": "#/$defs/FigureElement"
          },
          "default": {},
          "title": "Figure Map",
          "type": "object"
        }
      },
      "title": "FigureClassificationPrediction",
      "type": "object"
    },
    "FigureElement": {
      "properties": {
        "label": {
          "$ref": "#/$defs/DocItemLabel"
        },
        "id": {
          "title": "Id",
          "type": "integer"
        },
        "page_no": {
          "title": "Page No",
          "type": "integer"
        },
        "cluster": {
          "$ref": "#/$defs/Cluster"
        },
        "text": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Text"
        },
        "annotations": {
          "default": [],
          "items": {
            "discriminator": {
              "mapping": {
                "bar_chart_data": "#/$defs/PictureBarChartData",
                "classification": "#/$defs/PictureClassificationData",
                "description": "#/$defs/DescriptionAnnotation",
                "line_chart_data": "#/$defs/PictureLineChartData",
                "misc": "#/$defs/MiscAnnotation",
                "molecule_data": "#/$defs/PictureMoleculeData",
                "pie_chart_data": "#/$defs/PicturePieChartData",
                "scatter_chart_data": "#/$defs/PictureScatterChartData",
                "stacked_bar_chart_data": "#/$defs/PictureStackedBarChartData",
                "tabular_chart_data": "#/$defs/PictureTabularChartData"
              },
              "propertyName": "kind"
            },
            "oneOf": [
              {
                "$ref": "#/$defs/DescriptionAnnotation"
              },
              {
                "$ref": "#/$defs/MiscAnnotation"
              },
              {
                "$ref": "#/$defs/PictureClassificationData"
              },
              {
                "$ref": "#/$defs/PictureMoleculeData"
              },
              {
                "$ref": "#/$defs/PictureTabularChartData"
              },
              {
                "$ref": "#/$defs/PictureLineChartData"
              },
              {
                "$ref": "#/$defs/PictureBarChartData"
              },
              {
                "$ref": "#/$defs/PictureStackedBarChartData"
              },
              {
                "$ref": "#/$defs/PicturePieChartData"
              },
              {
                "$ref": "#/$defs/PictureScatterChartData"
              }
            ]
          },
          "title": "Annotations",
          "type": "array"
        },
        "provenance": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Provenance"
        },
        "predicted_class": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Predicted Class"
        },
        "confidence": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Confidence"
        }
      },
      "required": [
        "label",
        "id",
        "page_no",
        "cluster"
      ],
      "title": "FigureElement",
      "type": "object"
    },
    "FloatingMeta": {
      "additionalProperties": true,
      "description": "Metadata model for floating.",
      "properties": {
        "summary": {
          "anyOf": [
            {
              "$ref": "#/$defs/SummaryMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "description": {
          "anyOf": [
            {
              "$ref": "#/$defs/DescriptionMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        }
      },
      "title": "FloatingMeta",
      "type": "object"
    },
    "FormItem": {
      "additionalProperties": false,
      "description": "FormItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/FloatingMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "const": "form",
          "default": "form",
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "captions": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Captions",
          "type": "array"
        },
        "references": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "References",
          "type": "array"
        },
        "footnotes": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Footnotes",
          "type": "array"
        },
        "image": {
          "anyOf": [
            {
              "$ref": "#/$defs/ImageRef"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "graph": {
          "$ref": "#/$defs/GraphData"
        }
      },
      "required": [
        "self_ref",
        "graph"
      ],
      "title": "FormItem",
      "type": "object"
    },
    "Formatting": {
      "description": "Formatting.",
      "properties": {
        "bold": {
          "default": false,
          "title": "Bold",
          "type": "boolean"
        },
        "italic": {
          "default": false,
          "title": "Italic",
          "type": "boolean"
        },
        "underline": {
          "default": false,
          "title": "Underline",
          "type": "boolean"
        },
        "strikethrough": {
          "default": false,
          "title": "Strikethrough",
          "type": "boolean"
        },
        "script": {
          "$ref": "#/$defs/Script",
          "default": "baseline"
        }
      },
      "title": "Formatting",
      "type": "object"
    },
    "FormulaItem": {
      "additionalProperties": false,
      "description": "FormulaItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "const": "formula",
          "default": "formula",
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "formatting": {
          "anyOf": [
            {
              "$ref": "#/$defs/Formatting"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "hyperlink": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Hyperlink"
        }
      },
      "required": [
        "self_ref",
        "orig",
        "text"
      ],
      "title": "FormulaItem",
      "type": "object"
    },
    "GraphCell": {
      "description": "GraphCell.",
      "properties": {
        "label": {
          "$ref": "#/$defs/GraphCellLabel"
        },
        "cell_id": {
          "title": "Cell Id",
          "type": "integer"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "prov": {
          "anyOf": [
            {
              "$ref": "#/$defs/ProvenanceItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "item_ref": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        }
      },
      "required": [
        "label",
        "cell_id",
        "text",
        "orig"
      ],
      "title": "GraphCell",
      "type": "object"
    },
    "GraphCellLabel": {
      "description": "GraphCellLabel.",
      "enum": [
        "unspecified",
        "key",
        "value",
        "checkbox"
      ],
      "title": "GraphCellLabel",
      "type": "string"
    },
    "GraphData": {
      "description": "GraphData.",
      "properties": {
        "cells": {
          "items": {
            "$ref": "#/$defs/GraphCell"
          },
          "title": "Cells",
          "type": "array"
        },
        "links": {
          "items": {
            "$ref": "#/$defs/GraphLink"
          },
          "title": "Links",
          "type": "array"
        }
      },
      "title": "GraphData",
      "type": "object"
    },
    "GraphLink": {
      "description": "GraphLink.",
      "properties": {
        "label": {
          "$ref": "#/$defs/GraphLinkLabel"
        },
        "source_cell_id": {
          "title": "Source Cell Id",
          "type": "integer"
        },
        "target_cell_id": {
          "title": "Target Cell Id",
          "type": "integer"
        }
      },
      "required": [
        "label",
        "source_cell_id",
        "target_cell_id"
      ],
      "title": "GraphLink",
      "type": "object"
    },
    "GraphLinkLabel": {
      "description": "GraphLinkLabel.",
      "enum": [
        "unspecified",
        "to_value",
        "to_key",
        "to_parent",
        "to_child"
      ],
      "title": "GraphLinkLabel",
      "type": "string"
    },
    "GroupItem": {
      "additionalProperties": false,
      "description": "GroupItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "name": {
          "default": "group",
          "title": "Name",
          "type": "string"
        },
        "label": {
          "$ref": "#/$defs/GroupLabel",
          "default": "unspecified"
        }
      },
      "required": [
        "self_ref"
      ],
      "title": "GroupItem",
      "type": "object"
    },
    "GroupLabel": {
      "description": "GroupLabel.",
      "enum": [
        "unspecified",
        "list",
        "ordered_list",
        "chapter",
        "section",
        "sheet",
        "slide",
        "form_area",
        "key_value_area",
        "comment_section",
        "inline",
        "picture_area"
      ],
      "title": "GroupLabel",
      "type": "string"
    },
    "HTMLBackendOptions": {
      "description": "Options specific to the HTML backend.\n\nThis class can be extended to include options specific to HTML processing.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "html",
          "default": "html",
          "title": "Kind",
          "type": "string"
        },
        "fetch_images": {
          "default": false,
          "description": "Whether the backend should access remote or local resources to parse images in an HTML document.",
          "title": "Fetch Images",
          "type": "boolean"
        },
        "source_uri": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The URI that originates the HTML document. If provided, the backend will use it to resolve relative paths in the HTML document.",
          "title": "Source Uri"
        }
      },
      "title": "HTMLBackendOptions",
      "type": "object"
    },
    "ImageRef": {
      "description": "ImageRef.",
      "properties": {
        "mimetype": {
          "title": "Mimetype",
          "type": "string"
        },
        "dpi": {
          "title": "Dpi",
          "type": "integer"
        },
        "size": {
          "$ref": "#/$defs/Size"
        },
        "uri": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            }
          ],
          "title": "Uri"
        }
      },
      "required": [
        "mimetype",
        "dpi",
        "size",
        "uri"
      ],
      "title": "ImageRef",
      "type": "object"
    },
    "InlineGroup": {
      "additionalProperties": false,
      "description": "InlineGroup.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "name": {
          "default": "group",
          "title": "Name",
          "type": "string"
        },
        "label": {
          "const": "inline",
          "default": "inline",
          "title": "Label",
          "type": "string"
        }
      },
      "required": [
        "self_ref"
      ],
      "title": "InlineGroup",
      "type": "object"
    },
    "InputDocument": {
      "description": "A document as an input of a Docling conversion.",
      "properties": {
        "file": {
          "description": "A path representation the input document.",
          "format": "path",
          "title": "File",
          "type": "string"
        },
        "document_hash": {
          "description": "A stable hash of the path or stream of the input document.",
          "title": "Document Hash",
          "type": "string"
        },
        "valid": {
          "default": true,
          "description": "Whether this is is a valid input document.",
          "title": "Valid",
          "type": "boolean"
        },
        "backend_options": {
          "anyOf": [
            {
              "discriminator": {
                "mapping": {
                  "declarative": "#/$defs/DeclarativeBackendOptions",
                  "html": "#/$defs/HTMLBackendOptions",
                  "md": "#/$defs/MarkdownBackendOptions",
                  "pdf": "#/$defs/PdfBackendOptions",
                  "xlsx": "#/$defs/MsExcelBackendOptions"
                },
                "propertyName": "kind"
              },
              "oneOf": [
                {
                  "$ref": "#/$defs/DeclarativeBackendOptions"
                },
                {
                  "$ref": "#/$defs/HTMLBackendOptions"
                },
                {
                  "$ref": "#/$defs/MarkdownBackendOptions"
                },
                {
                  "$ref": "#/$defs/PdfBackendOptions"
                },
                {
                  "$ref": "#/$defs/MsExcelBackendOptions"
                }
              ]
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Custom options for backends.",
          "title": "Backend Options"
        },
        "limits": {
          "$ref": "#/$defs/DocumentLimits",
          "default": {
            "max_num_pages": 9223372036854775807,
            "max_file_size": 9223372036854775807,
            "page_range": [
              1,
              9223372036854775807
            ]
          },
          "description": "Limits in the input document for the conversion."
        },
        "format": {
          "$ref": "#/$defs/InputFormat",
          "description": "The document format."
        },
        "filesize": {
          "anyOf": [
            {
              "type": "integer"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Size of the input file, in bytes.",
          "title": "Filesize"
        },
        "page_count": {
          "default": 0,
          "description": "Number of pages in the input document.",
          "title": "Page Count",
          "type": "integer"
        }
      },
      "required": [
        "file",
        "document_hash",
        "format"
      ],
      "title": "InputDocument",
      "type": "object"
    },
    "InputFormat": {
      "description": "A document format supported by document backend parsers.",
      "enum": [
        "docx",
        "pptx",
        "html",
        "image",
        "pdf",
        "asciidoc",
        "md",
        "csv",
        "xlsx",
        "xml_uspto",
        "xml_jats",
        "mets_gbs",
        "json_docling",
        "audio",
        "vtt"
      ],
      "title": "InputFormat",
      "type": "string"
    },
    "KeyValueItem": {
      "additionalProperties": false,
      "description": "KeyValueItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/FloatingMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "const": "key_value_region",
          "default": "key_value_region",
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "captions": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Captions",
          "type": "array"
        },
        "references": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "References",
          "type": "array"
        },
        "footnotes": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Footnotes",
          "type": "array"
        },
        "image": {
          "anyOf": [
            {
              "$ref": "#/$defs/ImageRef"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "graph": {
          "$ref": "#/$defs/GraphData"
        }
      },
      "required": [
        "self_ref",
        "graph"
      ],
      "title": "KeyValueItem",
      "type": "object"
    },
    "LayoutPrediction": {
      "properties": {
        "clusters": {
          "default": [],
          "items": {
            "$ref": "#/$defs/Cluster"
          },
          "title": "Clusters",
          "type": "array"
        }
      },
      "title": "LayoutPrediction",
      "type": "object"
    },
    "ListGroup": {
      "additionalProperties": false,
      "description": "ListGroup.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "name": {
          "default": "group",
          "title": "Name",
          "type": "string"
        },
        "label": {
          "const": "list",
          "default": "list",
          "title": "Label",
          "type": "string"
        }
      },
      "required": [
        "self_ref"
      ],
      "title": "ListGroup",
      "type": "object"
    },
    "ListItem": {
      "additionalProperties": false,
      "description": "SectionItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "const": "list_item",
          "default": "list_item",
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "formatting": {
          "anyOf": [
            {
              "$ref": "#/$defs/Formatting"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "hyperlink": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Hyperlink"
        },
        "enumerated": {
          "default": false,
          "title": "Enumerated",
          "type": "boolean"
        },
        "marker": {
          "default": "-",
          "title": "Marker",
          "type": "string"
        }
      },
      "required": [
        "self_ref",
        "orig",
        "text"
      ],
      "title": "ListItem",
      "type": "object"
    },
    "MarkdownBackendOptions": {
      "description": "Options specific to the Markdown backend.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "md",
          "default": "md",
          "title": "Kind",
          "type": "string"
        },
        "fetch_images": {
          "default": false,
          "description": "Whether the backend should access remote or local resources to parse images in the markdown document.",
          "title": "Fetch Images",
          "type": "boolean"
        },
        "source_uri": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The URI that originates the markdown document. If provided, the backend will use it to resolve relative paths in the markdown document.",
          "title": "Source Uri"
        }
      },
      "title": "MarkdownBackendOptions",
      "type": "object"
    },
    "MiscAnnotation": {
      "description": "MiscAnnotation.",
      "properties": {
        "kind": {
          "const": "misc",
          "default": "misc",
          "title": "Kind",
          "type": "string"
        },
        "content": {
          "additionalProperties": true,
          "title": "Content",
          "type": "object"
        }
      },
      "required": [
        "content"
      ],
      "title": "MiscAnnotation",
      "type": "object"
    },
    "MoleculeMetaField": {
      "additionalProperties": true,
      "description": "Molecule metadata field.",
      "properties": {
        "confidence": {
          "anyOf": [
            {
              "maximum": 1,
              "minimum": 0,
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The confidence of the prediction.",
          "examples": [
            0.9,
            0.42
          ],
          "title": "Confidence"
        },
        "created_by": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The origin of the prediction.",
          "examples": [
            "ibm-granite/granite-docling-258M"
          ],
          "title": "Created By"
        },
        "smi": {
          "description": "The SMILES representation of the molecule.",
          "title": "Smi",
          "type": "string"
        }
      },
      "required": [
        "smi"
      ],
      "title": "MoleculeMetaField",
      "type": "object"
    },
    "MsExcelBackendOptions": {
      "description": "Options specific to the MS Excel backend.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "xlsx",
          "default": "xlsx",
          "title": "Kind",
          "type": "string"
        },
        "treat_singleton_as_text": {
          "default": false,
          "description": "Whether to treat singleton cells (1x1 tables with empty neighboring cells) as TextItem instead of TableItem.",
          "title": "Treat Singleton As Text",
          "type": "boolean"
        }
      },
      "title": "MsExcelBackendOptions",
      "type": "object"
    },
    "Page": {
      "properties": {
        "page_no": {
          "title": "Page No",
          "type": "integer"
        },
        "size": {
          "anyOf": [
            {
              "$ref": "#/$defs/Size"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "parsed_page": {
          "anyOf": [
            {
              "$ref": "#/$defs/SegmentedPdfPage"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "predictions": {
          "$ref": "#/$defs/PagePredictions",
          "default": {
            "layout": null,
            "tablestructure": null,
            "figures_classification": null,
            "equations_prediction": null,
            "vlm_response": null
          }
        },
        "assembled": {
          "anyOf": [
            {
              "$ref": "#/$defs/AssembledUnit"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        }
      },
      "required": [
        "page_no"
      ],
      "title": "Page",
      "type": "object"
    },
    "PageConfidenceScores": {
      "properties": {
        "parse_score": {
          "default": NaN,
          "title": "Parse Score",
          "type": "number"
        },
        "layout_score": {
          "default": NaN,
          "title": "Layout Score",
          "type": "number"
        },
        "table_score": {
          "default": NaN,
          "title": "Table Score",
          "type": "number"
        },
        "ocr_score": {
          "default": NaN,
          "title": "Ocr Score",
          "type": "number"
        }
      },
      "title": "PageConfidenceScores",
      "type": "object"
    },
    "PageItem": {
      "description": "PageItem.",
      "properties": {
        "size": {
          "$ref": "#/$defs/Size"
        },
        "image": {
          "anyOf": [
            {
              "$ref": "#/$defs/ImageRef"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "page_no": {
          "title": "Page No",
          "type": "integer"
        }
      },
      "required": [
        "size",
        "page_no"
      ],
      "title": "PageItem",
      "type": "object"
    },
    "PagePredictions": {
      "properties": {
        "layout": {
          "anyOf": [
            {
              "$ref": "#/$defs/LayoutPrediction"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "tablestructure": {
          "anyOf": [
            {
              "$ref": "#/$defs/TableStructurePrediction"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "figures_classification": {
          "anyOf": [
            {
              "$ref": "#/$defs/FigureClassificationPrediction"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "equations_prediction": {
          "anyOf": [
            {
              "$ref": "#/$defs/EquationPrediction"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "vlm_response": {
          "anyOf": [
            {
              "$ref": "#/$defs/VlmPrediction"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        }
      },
      "title": "PagePredictions",
      "type": "object"
    },
    "PdfBackendOptions": {
      "description": "Backend options for pdf document backends.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "pdf",
          "default": "pdf",
          "title": "Kind",
          "type": "string"
        },
        "password": {
          "anyOf": [
            {
              "format": "password",
              "type": "string",
              "writeOnly": true
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Password"
        }
      },
      "title": "PdfBackendOptions",
      "type": "object"
    },
    "PdfCellRenderingMode": {
      "description": "Text Rendering Mode, according to PDF32000.",
      "enum": [
        0,
        1,
        2,
        3,
        4,
        5,
        6,
        7,
        -1
      ],
      "title": "PdfCellRenderingMode",
      "type": "integer"
    },
    "PdfLine": {
      "description": "Model representing a line in a PDF document.",
      "properties": {
        "index": {
          "default": -1,
          "title": "Index",
          "type": "integer"
        },
        "rgba": {
          "$ref": "#/$defs/ColorRGBA",
          "default": {
            "r": 0,
            "g": 0,
            "b": 0,
            "a": 255
          }
        },
        "parent_id": {
          "title": "Parent Id",
          "type": "integer"
        },
        "points": {
          "items": {
            "$ref": "#/$defs/Coord2D"
          },
          "title": "Points",
          "type": "array"
        },
        "width": {
          "default": 1.0,
          "title": "Width",
          "type": "number"
        },
        "coord_origin": {
          "$ref": "#/$defs/CoordOrigin",
          "default": "BOTTOMLEFT"
        }
      },
      "required": [
        "parent_id",
        "points"
      ],
      "title": "PdfLine",
      "type": "object"
    },
    "PdfPageBoundaryType": {
      "description": "Enumeration of PDF page boundary types.",
      "enum": [
        "art_box",
        "bleed_box",
        "crop_box",
        "media_box",
        "trim_box"
      ],
      "title": "PdfPageBoundaryType",
      "type": "string"
    },
    "PdfPageGeometry": {
      "description": "Extended dimensions model specific to PDF pages with boundary types.",
      "properties": {
        "angle": {
          "title": "Angle",
          "type": "number"
        },
        "rect": {
          "$ref": "#/$defs/BoundingRectangle"
        },
        "boundary_type": {
          "$ref": "#/$defs/PdfPageBoundaryType"
        },
        "art_bbox": {
          "$ref": "#/$defs/BoundingBox"
        },
        "bleed_bbox": {
          "$ref": "#/$defs/BoundingBox"
        },
        "crop_bbox": {
          "$ref": "#/$defs/BoundingBox"
        },
        "media_bbox": {
          "$ref": "#/$defs/BoundingBox"
        },
        "trim_bbox": {
          "$ref": "#/$defs/BoundingBox"
        }
      },
      "required": [
        "angle",
        "rect",
        "boundary_type",
        "art_bbox",
        "bleed_bbox",
        "crop_bbox",
        "media_bbox",
        "trim_bbox"
      ],
      "title": "PdfPageGeometry",
      "type": "object"
    },
    "PdfTextCell": {
      "description": "Specialized text cell for PDF documents with font information.",
      "properties": {
        "index": {
          "default": -1,
          "title": "Index",
          "type": "integer"
        },
        "rgba": {
          "$ref": "#/$defs/ColorRGBA",
          "default": {
            "r": 0,
            "g": 0,
            "b": 0,
            "a": 255
          }
        },
        "rect": {
          "$ref": "#/$defs/BoundingRectangle"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text_direction": {
          "$ref": "#/$defs/TextDirection",
          "default": "left_to_right"
        },
        "confidence": {
          "default": 1.0,
          "title": "Confidence",
          "type": "number"
        },
        "from_ocr": {
          "const": false,
          "default": false,
          "title": "From Ocr",
          "type": "boolean"
        },
        "rendering_mode": {
          "$ref": "#/$defs/PdfCellRenderingMode"
        },
        "widget": {
          "title": "Widget",
          "type": "boolean"
        },
        "font_key": {
          "title": "Font Key",
          "type": "string"
        },
        "font_name": {
          "title": "Font Name",
          "type": "string"
        }
      },
      "required": [
        "rect",
        "text",
        "orig",
        "rendering_mode",
        "widget",
        "font_key",
        "font_name"
      ],
      "title": "PdfTextCell",
      "type": "object"
    },
    "PictureBarChartData": {
      "description": "Represents data of a bar chart.\n\nAttributes:\n    kind (Literal[\"bar_chart_data\"]): The type of the chart.\n    x_axis_label (str): The label for the x-axis.\n    y_axis_label (str): The label for the y-axis.\n    bars (List[ChartBar]): A list of bars in the chart.",
      "properties": {
        "kind": {
          "const": "bar_chart_data",
          "default": "bar_chart_data",
          "title": "Kind",
          "type": "string"
        },
        "title": {
          "title": "Title",
          "type": "string"
        },
        "x_axis_label": {
          "title": "X Axis Label",
          "type": "string"
        },
        "y_axis_label": {
          "title": "Y Axis Label",
          "type": "string"
        },
        "bars": {
          "items": {
            "$ref": "#/$defs/ChartBar"
          },
          "title": "Bars",
          "type": "array"
        }
      },
      "required": [
        "title",
        "x_axis_label",
        "y_axis_label",
        "bars"
      ],
      "title": "PictureBarChartData",
      "type": "object"
    },
    "PictureClassificationClass": {
      "description": "PictureClassificationData.",
      "properties": {
        "class_name": {
          "title": "Class Name",
          "type": "string"
        },
        "confidence": {
          "title": "Confidence",
          "type": "number"
        }
      },
      "required": [
        "class_name",
        "confidence"
      ],
      "title": "PictureClassificationClass",
      "type": "object"
    },
    "PictureClassificationData": {
      "description": "PictureClassificationData.",
      "properties": {
        "kind": {
          "const": "classification",
          "default": "classification",
          "title": "Kind",
          "type": "string"
        },
        "provenance": {
          "title": "Provenance",
          "type": "string"
        },
        "predicted_classes": {
          "items": {
            "$ref": "#/$defs/PictureClassificationClass"
          },
          "title": "Predicted Classes",
          "type": "array"
        }
      },
      "required": [
        "provenance",
        "predicted_classes"
      ],
      "title": "PictureClassificationData",
      "type": "object"
    },
    "PictureClassificationMetaField": {
      "additionalProperties": true,
      "description": "Picture classification metadata field.",
      "properties": {
        "predictions": {
          "items": {
            "$ref": "#/$defs/PictureClassificationPrediction"
          },
          "minItems": 1,
          "title": "Predictions",
          "type": "array"
        }
      },
      "title": "PictureClassificationMetaField",
      "type": "object"
    },
    "PictureClassificationPrediction": {
      "additionalProperties": true,
      "description": "Picture classification instance.",
      "properties": {
        "confidence": {
          "anyOf": [
            {
              "maximum": 1,
              "minimum": 0,
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The confidence of the prediction.",
          "examples": [
            0.9,
            0.42
          ],
          "title": "Confidence"
        },
        "created_by": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The origin of the prediction.",
          "examples": [
            "ibm-granite/granite-docling-258M"
          ],
          "title": "Created By"
        },
        "class_name": {
          "title": "Class Name",
          "type": "string"
        }
      },
      "required": [
        "class_name"
      ],
      "title": "PictureClassificationPrediction",
      "type": "object"
    },
    "PictureItem": {
      "additionalProperties": false,
      "description": "PictureItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/PictureMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "default": "picture",
          "enum": [
            "picture",
            "chart"
          ],
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "captions": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Captions",
          "type": "array"
        },
        "references": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "References",
          "type": "array"
        },
        "footnotes": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Footnotes",
          "type": "array"
        },
        "image": {
          "anyOf": [
            {
              "$ref": "#/$defs/ImageRef"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "annotations": {
          "default": [],
          "deprecated": true,
          "items": {
            "discriminator": {
              "mapping": {
                "bar_chart_data": "#/$defs/PictureBarChartData",
                "classification": "#/$defs/PictureClassificationData",
                "description": "#/$defs/DescriptionAnnotation",
                "line_chart_data": "#/$defs/PictureLineChartData",
                "misc": "#/$defs/MiscAnnotation",
                "molecule_data": "#/$defs/PictureMoleculeData",
                "pie_chart_data": "#/$defs/PicturePieChartData",
                "scatter_chart_data": "#/$defs/PictureScatterChartData",
                "stacked_bar_chart_data": "#/$defs/PictureStackedBarChartData",
                "tabular_chart_data": "#/$defs/PictureTabularChartData"
              },
              "propertyName": "kind"
            },
            "oneOf": [
              {
                "$ref": "#/$defs/DescriptionAnnotation"
              },
              {
                "$ref": "#/$defs/MiscAnnotation"
              },
              {
                "$ref": "#/$defs/PictureClassificationData"
              },
              {
                "$ref": "#/$defs/PictureMoleculeData"
              },
              {
                "$ref": "#/$defs/PictureTabularChartData"
              },
              {
                "$ref": "#/$defs/PictureLineChartData"
              },
              {
                "$ref": "#/$defs/PictureBarChartData"
              },
              {
                "$ref": "#/$defs/PictureStackedBarChartData"
              },
              {
                "$ref": "#/$defs/PicturePieChartData"
              },
              {
                "$ref": "#/$defs/PictureScatterChartData"
              }
            ]
          },
          "title": "Annotations",
          "type": "array"
        }
      },
      "required": [
        "self_ref"
      ],
      "title": "PictureItem",
      "type": "object"
    },
    "PictureLineChartData": {
      "description": "Represents data of a line chart.\n\nAttributes:\n    kind (Literal[\"line_chart_data\"]): The type of the chart.\n    x_axis_label (str): The label for the x-axis.\n    y_axis_label (str): The label for the y-axis.\n    lines (List[ChartLine]): A list of lines in the chart.",
      "properties": {
        "kind": {
          "const": "line_chart_data",
          "default": "line_chart_data",
          "title": "Kind",
          "type": "string"
        },
        "title": {
          "title": "Title",
          "type": "string"
        },
        "x_axis_label": {
          "title": "X Axis Label",
          "type": "string"
        },
        "y_axis_label": {
          "title": "Y Axis Label",
          "type": "string"
        },
        "lines": {
          "items": {
            "$ref": "#/$defs/ChartLine"
          },
          "title": "Lines",
          "type": "array"
        }
      },
      "required": [
        "title",
        "x_axis_label",
        "y_axis_label",
        "lines"
      ],
      "title": "PictureLineChartData",
      "type": "object"
    },
    "PictureMeta": {
      "additionalProperties": true,
      "description": "Metadata model for pictures.",
      "properties": {
        "summary": {
          "anyOf": [
            {
              "$ref": "#/$defs/SummaryMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "description": {
          "anyOf": [
            {
              "$ref": "#/$defs/DescriptionMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "classification": {
          "anyOf": [
            {
              "$ref": "#/$defs/PictureClassificationMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "molecule": {
          "anyOf": [
            {
              "$ref": "#/$defs/MoleculeMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "tabular_chart": {
          "anyOf": [
            {
              "$ref": "#/$defs/TabularChartMetaField"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        }
      },
      "title": "PictureMeta",
      "type": "object"
    },
    "PictureMoleculeData": {
      "description": "PictureMoleculeData.",
      "properties": {
        "kind": {
          "const": "molecule_data",
          "default": "molecule_data",
          "title": "Kind",
          "type": "string"
        },
        "smi": {
          "title": "Smi",
          "type": "string"
        },
        "confidence": {
          "title": "Confidence",
          "type": "number"
        },
        "class_name": {
          "title": "Class Name",
          "type": "string"
        },
        "segmentation": {
          "items": {
            "maxItems": 2,
            "minItems": 2,
            "prefixItems": [
              {
                "type": "number"
              },
              {
                "type": "number"
              }
            ],
            "type": "array"
          },
          "title": "Segmentation",
          "type": "array"
        },
        "provenance": {
          "title": "Provenance",
          "type": "string"
        }
      },
      "required": [
        "smi",
        "confidence",
        "class_name",
        "segmentation",
        "provenance"
      ],
      "title": "PictureMoleculeData",
      "type": "object"
    },
    "PicturePieChartData": {
      "description": "Represents data of a pie chart.\n\nAttributes:\n    kind (Literal[\"pie_chart_data\"]): The type of the chart.\n    slices (List[ChartSlice]): A list of slices in the pie chart.",
      "properties": {
        "kind": {
          "const": "pie_chart_data",
          "default": "pie_chart_data",
          "title": "Kind",
          "type": "string"
        },
        "title": {
          "title": "Title",
          "type": "string"
        },
        "slices": {
          "items": {
            "$ref": "#/$defs/ChartSlice"
          },
          "title": "Slices",
          "type": "array"
        }
      },
      "required": [
        "title",
        "slices"
      ],
      "title": "PicturePieChartData",
      "type": "object"
    },
    "PictureScatterChartData": {
      "description": "Represents data of a scatter chart.\n\nAttributes:\n    kind (Literal[\"scatter_chart_data\"]): The type of the chart.\n    x_axis_label (str): The label for the x-axis.\n    y_axis_label (str): The label for the y-axis.\n    points (List[ChartPoint]): A list of points in the scatter chart.",
      "properties": {
        "kind": {
          "const": "scatter_chart_data",
          "default": "scatter_chart_data",
          "title": "Kind",
          "type": "string"
        },
        "title": {
          "title": "Title",
          "type": "string"
        },
        "x_axis_label": {
          "title": "X Axis Label",
          "type": "string"
        },
        "y_axis_label": {
          "title": "Y Axis Label",
          "type": "string"
        },
        "points": {
          "items": {
            "$ref": "#/$defs/ChartPoint"
          },
          "title": "Points",
          "type": "array"
        }
      },
      "required": [
        "title",
        "x_axis_label",
        "y_axis_label",
        "points"
      ],
      "title": "PictureScatterChartData",
      "type": "object"
    },
    "PictureStackedBarChartData": {
      "description": "Represents data of a stacked bar chart.\n\nAttributes:\n    kind (Literal[\"stacked_bar_chart_data\"]): The type of the chart.\n    x_axis_label (str): The label for the x-axis.\n    y_axis_label (str): The label for the y-axis.\n    stacked_bars (List[ChartStackedBar]): A list of stacked bars in the chart.",
      "properties": {
        "kind": {
          "const": "stacked_bar_chart_data",
          "default": "stacked_bar_chart_data",
          "title": "Kind",
          "type": "string"
        },
        "title": {
          "title": "Title",
          "type": "string"
        },
        "x_axis_label": {
          "title": "X Axis Label",
          "type": "string"
        },
        "y_axis_label": {
          "title": "Y Axis Label",
          "type": "string"
        },
        "stacked_bars": {
          "items": {
            "$ref": "#/$defs/ChartStackedBar"
          },
          "title": "Stacked Bars",
          "type": "array"
        }
      },
      "required": [
        "title",
        "x_axis_label",
        "y_axis_label",
        "stacked_bars"
      ],
      "title": "PictureStackedBarChartData",
      "type": "object"
    },
    "PictureTabularChartData": {
      "description": "Base class for picture chart data.\n\nAttributes:\n    title (str): The title of the chart.\n    chart_data (TableData): Chart data in the table format.",
      "properties": {
        "kind": {
          "const": "tabular_chart_data",
          "default": "tabular_chart_data",
          "title": "Kind",
          "type": "string"
        },
        "title": {
          "title": "Title",
          "type": "string"
        },
        "chart_data": {
          "$ref": "#/$defs/TableData"
        }
      },
      "required": [
        "title",
        "chart_data"
      ],
      "title": "PictureTabularChartData",
      "type": "object"
    },
    "ProfilingItem": {
      "properties": {
        "scope": {
          "$ref": "#/$defs/ProfilingScope"
        },
        "count": {
          "default": 0,
          "title": "Count",
          "type": "integer"
        },
        "times": {
          "default": [],
          "items": {
            "type": "number"
          },
          "title": "Times",
          "type": "array"
        },
        "start_timestamps": {
          "default": [],
          "items": {
            "format": "date-time",
            "type": "string"
          },
          "title": "Start Timestamps",
          "type": "array"
        }
      },
      "required": [
        "scope"
      ],
      "title": "ProfilingItem",
      "type": "object"
    },
    "ProfilingScope": {
      "enum": [
        "page",
        "document"
      ],
      "title": "ProfilingScope",
      "type": "string"
    },
    "ProvenanceItem": {
      "description": "ProvenanceItem.",
      "properties": {
        "page_no": {
          "title": "Page No",
          "type": "integer"
        },
        "bbox": {
          "$ref": "#/$defs/BoundingBox"
        },
        "charspan": {
          "maxItems": 2,
          "minItems": 2,
          "prefixItems": [
            {
              "type": "integer"
            },
            {
              "type": "integer"
            }
          ],
          "title": "Charspan",
          "type": "array"
        }
      },
      "required": [
        "page_no",
        "bbox",
        "charspan"
      ],
      "title": "ProvenanceItem",
      "type": "object"
    },
    "RefItem": {
      "description": "RefItem.",
      "properties": {
        "$ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "$Ref",
          "type": "string"
        }
      },
      "required": [
        "$ref"
      ],
      "title": "RefItem",
      "type": "object"
    },
    "RichTableCell": {
      "description": "RichTableCell.",
      "properties": {
        "bbox": {
          "anyOf": [
            {
              "$ref": "#/$defs/BoundingBox"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "row_span": {
          "default": 1,
          "title": "Row Span",
          "type": "integer"
        },
        "col_span": {
          "default": 1,
          "title": "Col Span",
          "type": "integer"
        },
        "start_row_offset_idx": {
          "title": "Start Row Offset Idx",
          "type": "integer"
        },
        "end_row_offset_idx": {
          "title": "End Row Offset Idx",
          "type": "integer"
        },
        "start_col_offset_idx": {
          "title": "Start Col Offset Idx",
          "type": "integer"
        },
        "end_col_offset_idx": {
          "title": "End Col Offset Idx",
          "type": "integer"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "column_header": {
          "default": false,
          "title": "Column Header",
          "type": "boolean"
        },
        "row_header": {
          "default": false,
          "title": "Row Header",
          "type": "boolean"
        },
        "row_section": {
          "default": false,
          "title": "Row Section",
          "type": "boolean"
        },
        "fillable": {
          "default": false,
          "title": "Fillable",
          "type": "boolean"
        },
        "ref": {
          "$ref": "#/$defs/RefItem"
        }
      },
      "required": [
        "start_row_offset_idx",
        "end_row_offset_idx",
        "start_col_offset_idx",
        "end_col_offset_idx",
        "text",
        "ref"
      ],
      "title": "RichTableCell",
      "type": "object"
    },
    "Script": {
      "description": "Text script position.",
      "enum": [
        "baseline",
        "sub",
        "super"
      ],
      "title": "Script",
      "type": "string"
    },
    "SectionHeaderItem": {
      "additionalProperties": false,
      "description": "SectionItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "const": "section_header",
          "default": "section_header",
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "formatting": {
          "anyOf": [
            {
              "$ref": "#/$defs/Formatting"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "hyperlink": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Hyperlink"
        },
        "level": {
          "default": 1,
          "maximum": 100,
          "minimum": 1,
          "title": "Level",
          "type": "integer"
        }
      },
      "required": [
        "self_ref",
        "orig",
        "text"
      ],
      "title": "SectionHeaderItem",
      "type": "object"
    },
    "SegmentedPdfPage": {
      "description": "Extended segmented page model specific to PDF documents.",
      "properties": {
        "dimension": {
          "$ref": "#/$defs/PdfPageGeometry"
        },
        "bitmap_resources": {
          "default": [],
          "items": {
            "$ref": "#/$defs/BitmapResource"
          },
          "title": "Bitmap Resources",
          "type": "array"
        },
        "char_cells": {
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/PdfTextCell"
              },
              {
                "$ref": "#/$defs/TextCell"
              }
            ]
          },
          "title": "Char Cells",
          "type": "array"
        },
        "word_cells": {
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/PdfTextCell"
              },
              {
                "$ref": "#/$defs/TextCell"
              }
            ]
          },
          "title": "Word Cells",
          "type": "array"
        },
        "textline_cells": {
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/PdfTextCell"
              },
              {
                "$ref": "#/$defs/TextCell"
              }
            ]
          },
          "title": "Textline Cells",
          "type": "array"
        },
        "has_chars": {
          "default": false,
          "title": "Has Chars",
          "type": "boolean"
        },
        "has_words": {
          "default": false,
          "title": "Has Words",
          "type": "boolean"
        },
        "has_lines": {
          "default": false,
          "title": "Has Lines",
          "type": "boolean"
        },
        "image": {
          "anyOf": [
            {
              "$ref": "#/$defs/ImageRef"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "lines": {
          "default": [],
          "items": {
            "$ref": "#/$defs/PdfLine"
          },
          "title": "Lines",
          "type": "array"
        }
      },
      "required": [
        "dimension",
        "char_cells",
        "word_cells",
        "textline_cells"
      ],
      "title": "SegmentedPdfPage",
      "type": "object"
    },
    "Size": {
      "description": "Size.",
      "properties": {
        "width": {
          "default": 0.0,
          "title": "Width",
          "type": "number"
        },
        "height": {
          "default": 0.0,
          "title": "Height",
          "type": "number"
        }
      },
      "title": "Size",
      "type": "object"
    },
    "SummaryMetaField": {
      "additionalProperties": true,
      "description": "Summary data.",
      "properties": {
        "confidence": {
          "anyOf": [
            {
              "maximum": 1,
              "minimum": 0,
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The confidence of the prediction.",
          "examples": [
            0.9,
            0.42
          ],
          "title": "Confidence"
        },
        "created_by": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The origin of the prediction.",
          "examples": [
            "ibm-granite/granite-docling-258M"
          ],
          "title": "Created By"
        },
        "text": {
          "title": "Text",
          "type": "string"
        }
      },
      "required": [
        "text"
      ],
      "title": "SummaryMetaField",
      "type": "object"
    },
    "Table": {
      "properties": {
        "label": {
          "$ref": "#/$defs/DocItemLabel"
        },
        "id": {
          "title": "Id",
          "type": "integer"
        },
        "page_no": {
          "title": "Page No",
          "type": "integer"
        },
        "cluster": {
          "$ref": "#/$defs/Cluster"
        },
        "text": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Text"
        },
        "otsl_seq": {
          "items": {
            "type": "string"
          },
          "title": "Otsl Seq",
          "type": "array"
        },
        "num_rows": {
          "default": 0,
          "title": "Num Rows",
          "type": "integer"
        },
        "num_cols": {
          "default": 0,
          "title": "Num Cols",
          "type": "integer"
        },
        "table_cells": {
          "items": {
            "$ref": "#/$defs/TableCell"
          },
          "title": "Table Cells",
          "type": "array"
        }
      },
      "required": [
        "label",
        "id",
        "page_no",
        "cluster",
        "otsl_seq",
        "table_cells"
      ],
      "title": "Table",
      "type": "object"
    },
    "TableCell": {
      "description": "TableCell.",
      "properties": {
        "bbox": {
          "anyOf": [
            {
              "$ref": "#/$defs/BoundingBox"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "row_span": {
          "default": 1,
          "title": "Row Span",
          "type": "integer"
        },
        "col_span": {
          "default": 1,
          "title": "Col Span",
          "type": "integer"
        },
        "start_row_offset_idx": {
          "title": "Start Row Offset Idx",
          "type": "integer"
        },
        "end_row_offset_idx": {
          "title": "End Row Offset Idx",
          "type": "integer"
        },
        "start_col_offset_idx": {
          "title": "Start Col Offset Idx",
          "type": "integer"
        },
        "end_col_offset_idx": {
          "title": "End Col Offset Idx",
          "type": "integer"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "column_header": {
          "default": false,
          "title": "Column Header",
          "type": "boolean"
        },
        "row_header": {
          "default": false,
          "title": "Row Header",
          "type": "boolean"
        },
        "row_section": {
          "default": false,
          "title": "Row Section",
          "type": "boolean"
        },
        "fillable": {
          "default": false,
          "title": "Fillable",
          "type": "boolean"
        }
      },
      "required": [
        "start_row_offset_idx",
        "end_row_offset_idx",
        "start_col_offset_idx",
        "end_col_offset_idx",
        "text"
      ],
      "title": "TableCell",
      "type": "object"
    },
    "TableData": {
      "description": "BaseTableData.",
      "properties": {
        "table_cells": {
          "default": [],
          "items": {
            "anyOf": [
              {
                "$ref": "#/$defs/RichTableCell"
              },
              {
                "$ref": "#/$defs/TableCell"
              }
            ]
          },
          "title": "Table Cells",
          "type": "array"
        },
        "num_rows": {
          "default": 0,
          "title": "Num Rows",
          "type": "integer"
        },
        "num_cols": {
          "default": 0,
          "title": "Num Cols",
          "type": "integer"
        }
      },
      "title": "TableData",
      "type": "object"
    },
    "TableItem": {
      "additionalProperties": false,
      "description": "TableItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/FloatingMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "default": "table",
          "enum": [
            "document_index",
            "table"
          ],
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "captions": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Captions",
          "type": "array"
        },
        "references": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "References",
          "type": "array"
        },
        "footnotes": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Footnotes",
          "type": "array"
        },
        "image": {
          "anyOf": [
            {
              "$ref": "#/$defs/ImageRef"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "data": {
          "$ref": "#/$defs/TableData"
        },
        "annotations": {
          "default": [],
          "deprecated": true,
          "items": {
            "discriminator": {
              "mapping": {
                "description": "#/$defs/DescriptionAnnotation",
                "misc": "#/$defs/MiscAnnotation"
              },
              "propertyName": "kind"
            },
            "oneOf": [
              {
                "$ref": "#/$defs/DescriptionAnnotation"
              },
              {
                "$ref": "#/$defs/MiscAnnotation"
              }
            ]
          },
          "title": "Annotations",
          "type": "array"
        }
      },
      "required": [
        "self_ref",
        "data"
      ],
      "title": "TableItem",
      "type": "object"
    },
    "TableStructurePrediction": {
      "properties": {
        "table_map": {
          "additionalProperties": {
            "$ref": "#/$defs/Table"
          },
          "default": {},
          "title": "Table Map",
          "type": "object"
        }
      },
      "title": "TableStructurePrediction",
      "type": "object"
    },
    "TabularChartMetaField": {
      "additionalProperties": true,
      "description": "Tabular chart metadata field.",
      "properties": {
        "confidence": {
          "anyOf": [
            {
              "maximum": 1,
              "minimum": 0,
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The confidence of the prediction.",
          "examples": [
            0.9,
            0.42
          ],
          "title": "Confidence"
        },
        "created_by": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The origin of the prediction.",
          "examples": [
            "ibm-granite/granite-docling-258M"
          ],
          "title": "Created By"
        },
        "title": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Title"
        },
        "chart_data": {
          "$ref": "#/$defs/TableData"
        }
      },
      "required": [
        "chart_data"
      ],
      "title": "TabularChartMetaField",
      "type": "object"
    },
    "TextCell": {
      "description": "Model representing a text cell with positioning and content information.",
      "properties": {
        "index": {
          "default": -1,
          "title": "Index",
          "type": "integer"
        },
        "rgba": {
          "$ref": "#/$defs/ColorRGBA",
          "default": {
            "r": 0,
            "g": 0,
            "b": 0,
            "a": 255
          }
        },
        "rect": {
          "$ref": "#/$defs/BoundingRectangle"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text_direction": {
          "$ref": "#/$defs/TextDirection",
          "default": "left_to_right"
        },
        "confidence": {
          "default": 1.0,
          "title": "Confidence",
          "type": "number"
        },
        "from_ocr": {
          "title": "From Ocr",
          "type": "boolean"
        }
      },
      "required": [
        "rect",
        "text",
        "orig",
        "from_ocr"
      ],
      "title": "TextCell",
      "type": "object"
    },
    "TextDirection": {
      "description": "Enumeration for text direction options.",
      "enum": [
        "left_to_right",
        "right_to_left",
        "unspecified"
      ],
      "title": "TextDirection",
      "type": "string"
    },
    "TextElement": {
      "properties": {
        "label": {
          "$ref": "#/$defs/DocItemLabel"
        },
        "id": {
          "title": "Id",
          "type": "integer"
        },
        "page_no": {
          "title": "Page No",
          "type": "integer"
        },
        "cluster": {
          "$ref": "#/$defs/Cluster"
        },
        "text": {
          "title": "Text",
          "type": "string"
        }
      },
      "required": [
        "label",
        "id",
        "page_no",
        "cluster",
        "text"
      ],
      "title": "TextElement",
      "type": "object"
    },
    "TextItem": {
      "additionalProperties": false,
      "description": "TextItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "enum": [
            "caption",
            "checkbox_selected",
            "checkbox_unselected",
            "footnote",
            "page_footer",
            "page_header",
            "paragraph",
            "reference",
            "text",
            "empty_value"
          ],
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "formatting": {
          "anyOf": [
            {
              "$ref": "#/$defs/Formatting"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "hyperlink": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Hyperlink"
        }
      },
      "required": [
        "self_ref",
        "label",
        "orig",
        "text"
      ],
      "title": "TextItem",
      "type": "object"
    },
    "TitleItem": {
      "additionalProperties": false,
      "description": "TitleItem.",
      "properties": {
        "self_ref": {
          "pattern": "^#(?:/([\\w-]+)(?:/(\\d+))?)?$",
          "title": "Self Ref",
          "type": "string"
        },
        "parent": {
          "anyOf": [
            {
              "$ref": "#/$defs/RefItem"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "children": {
          "default": [],
          "items": {
            "$ref": "#/$defs/RefItem"
          },
          "title": "Children",
          "type": "array"
        },
        "content_layer": {
          "$ref": "#/$defs/ContentLayer",
          "default": "body"
        },
        "meta": {
          "anyOf": [
            {
              "$ref": "#/$defs/BaseMeta"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "label": {
          "const": "title",
          "default": "title",
          "title": "Label",
          "type": "string"
        },
        "prov": {
          "default": [],
          "items": {
            "$ref": "#/$defs/ProvenanceItem"
          },
          "title": "Prov",
          "type": "array"
        },
        "orig": {
          "title": "Orig",
          "type": "string"
        },
        "text": {
          "title": "Text",
          "type": "string"
        },
        "formatting": {
          "anyOf": [
            {
              "$ref": "#/$defs/Formatting"
            },
            {
              "type": "null"
            }
          ],
          "default": null
        },
        "hyperlink": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Hyperlink"
        }
      },
      "required": [
        "self_ref",
        "orig",
        "text"
      ],
      "title": "TitleItem",
      "type": "object"
    },
    "VlmPrediction": {
      "properties": {
        "text": {
          "default": "",
          "title": "Text",
          "type": "string"
        },
        "generated_tokens": {
          "default": [],
          "items": {
            "$ref": "#/$defs/VlmPredictionToken"
          },
          "title": "Generated Tokens",
          "type": "array"
        },
        "generation_time": {
          "default": -1,
          "title": "Generation Time",
          "type": "number"
        },
        "num_tokens": {
          "anyOf": [
            {
              "type": "integer"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Num Tokens"
        },
        "stop_reason": {
          "$ref": "#/$defs/VlmStopReason",
          "default": "unspecified"
        },
        "input_prompt": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Input Prompt"
        }
      },
      "title": "VlmPrediction",
      "type": "object"
    },
    "VlmPredictionToken": {
      "properties": {
        "text": {
          "default": "",
          "title": "Text",
          "type": "string"
        },
        "token": {
          "default": -1,
          "title": "Token",
          "type": "integer"
        },
        "logprob": {
          "default": -1,
          "title": "Logprob",
          "type": "number"
        }
      },
      "title": "VlmPredictionToken",
      "type": "object"
    },
    "VlmStopReason": {
      "enum": [
        "length",
        "stop_sequence",
        "end_of_sequence",
        "unspecified"
      ],
      "title": "VlmStopReason",
      "type": "string"
    }
  },
  "properties": {
    "version": {
      "$ref": "#/$defs/DoclingVersion",
      "default": {
        "docling_version": "2.64.1",
        "docling_core_version": "2.51.1",
        "docling_ibm_models_version": "3.10.2",
        "docling_parse_version": "4.7.1",
        "platform_str": "Linux-6.11.0-1018-azure-x86_64-with-glibc2.39",
        "py_impl_version": "cpython-312",
        "py_lang_version": "3.12.3"
      }
    },
    "timestamp": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "title": "Timestamp"
    },
    "status": {
      "$ref": "#/$defs/ConversionStatus",
      "default": "pending"
    },
    "errors": {
      "default": [],
      "items": {
        "$ref": "#/$defs/ErrorItem"
      },
      "title": "Errors",
      "type": "array"
    },
    "pages": {
      "default": [],
      "items": {
        "$ref": "#/$defs/Page"
      },
      "title": "Pages",
      "type": "array"
    },
    "timings": {
      "additionalProperties": {
        "$ref": "#/$defs/ProfilingItem"
      },
      "default": {},
      "title": "Timings",
      "type": "object"
    },
    "confidence": {
      "$ref": "#/$defs/ConfidenceReport"
    },
    "document": {
      "$ref": "#/$defs/DoclingDocument",
      "default": {
        "schema_name": "DoclingDocument",
        "version": "1.8.0",
        "name": "dummy",
        "origin": null,
        "furniture": {
          "children": [],
          "content_layer": "furniture",
          "label": "unspecified",
          "meta": null,
          "name": "_root_",
          "parent": null,
          "self_ref": "#/furniture"
        },
        "body": {
          "children": [],
          "content_layer": "body",
          "label": "unspecified",
          "meta": null,
          "name": "_root_",
          "parent": null,
          "self_ref": "#/body"
        },
        "groups": [],
        "texts": [],
        "pictures": [],
        "tables": [],
        "key_value_items": [],
        "form_items": [],
        "pages": {}
      }
    },
    "input": {
      "$ref": "#/$defs/InputDocument"
    },
    "assembled": {
      "$ref": "#/$defs/AssembledUnit",
      "default": {
        "elements": [],
        "body": [],
        "headers": []
      }
    }
  },
  "required": [
    "input"
  ],
  "title": "ConversionResult",
  "type": "object"
}

Fields:

assembled pydantic-field

assembled: AssembledUnit

confidence pydantic-field

confidence: ConfidenceReport

document pydantic-field

document: DoclingDocument = _EMPTY_DOCLING_DOC

errors pydantic-field

errors: list[ErrorItem] = []

input pydantic-field

input: InputDocument

legacy_document property

legacy_document

pages pydantic-field

pages: list[Page] = []

status pydantic-field

timestamp pydantic-field

timestamp: Optional[str] = None

timings pydantic-field

timings: dict[str, ProfilingItem] = {}

version pydantic-field

version: DoclingVersion

load classmethod

load(filename: Union[str, Path]) -> ConversionAssets

Load a ConversionAssets.

save

save(*, filename: Union[str, Path], indent: Optional[int] = 2)

Serialize the full ConversionAssets to JSON.

ConversionStatus

Bases: str, Enum

Attributes:

FAILURE class-attribute instance-attribute

FAILURE = 'failure'

PARTIAL_SUCCESS class-attribute instance-attribute

PARTIAL_SUCCESS = 'partial_success'

PENDING class-attribute instance-attribute

PENDING = 'pending'

SKIPPED class-attribute instance-attribute

SKIPPED = 'skipped'

STARTED class-attribute instance-attribute

STARTED = 'started'

SUCCESS class-attribute instance-attribute

SUCCESS = 'success'

FormatOption pydantic-model

Bases: BaseFormatOption

Show JSON schema:
{
  "$defs": {
    "AcceleratorDevice": {
      "description": "Devices to run model inference",
      "enum": [
        "auto",
        "cpu",
        "cuda",
        "mps"
      ],
      "title": "AcceleratorDevice",
      "type": "string"
    },
    "AcceleratorOptions": {
      "additionalProperties": false,
      "properties": {
        "num_threads": {
          "default": 4,
          "title": "Num Threads",
          "type": "integer"
        },
        "device": {
          "anyOf": [
            {
              "type": "string"
            },
            {
              "$ref": "#/$defs/AcceleratorDevice"
            }
          ],
          "default": "auto",
          "title": "Device"
        },
        "cuda_use_flash_attention2": {
          "default": false,
          "title": "Cuda Use Flash Attention2",
          "type": "boolean"
        }
      },
      "title": "AcceleratorOptions",
      "type": "object"
    },
    "DeclarativeBackendOptions": {
      "description": "Default backend options for a declarative document backend.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "declarative",
          "default": "declarative",
          "title": "Kind",
          "type": "string"
        }
      },
      "title": "DeclarativeBackendOptions",
      "type": "object"
    },
    "HTMLBackendOptions": {
      "description": "Options specific to the HTML backend.\n\nThis class can be extended to include options specific to HTML processing.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "html",
          "default": "html",
          "title": "Kind",
          "type": "string"
        },
        "fetch_images": {
          "default": false,
          "description": "Whether the backend should access remote or local resources to parse images in an HTML document.",
          "title": "Fetch Images",
          "type": "boolean"
        },
        "source_uri": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The URI that originates the HTML document. If provided, the backend will use it to resolve relative paths in the HTML document.",
          "title": "Source Uri"
        }
      },
      "title": "HTMLBackendOptions",
      "type": "object"
    },
    "MarkdownBackendOptions": {
      "description": "Options specific to the Markdown backend.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "md",
          "default": "md",
          "title": "Kind",
          "type": "string"
        },
        "fetch_images": {
          "default": false,
          "description": "Whether the backend should access remote or local resources to parse images in the markdown document.",
          "title": "Fetch Images",
          "type": "boolean"
        },
        "source_uri": {
          "anyOf": [
            {
              "format": "uri",
              "minLength": 1,
              "type": "string"
            },
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "The URI that originates the markdown document. If provided, the backend will use it to resolve relative paths in the markdown document.",
          "title": "Source Uri"
        }
      },
      "title": "MarkdownBackendOptions",
      "type": "object"
    },
    "MsExcelBackendOptions": {
      "description": "Options specific to the MS Excel backend.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "xlsx",
          "default": "xlsx",
          "title": "Kind",
          "type": "string"
        },
        "treat_singleton_as_text": {
          "default": false,
          "description": "Whether to treat singleton cells (1x1 tables with empty neighboring cells) as TextItem instead of TableItem.",
          "title": "Treat Singleton As Text",
          "type": "boolean"
        }
      },
      "title": "MsExcelBackendOptions",
      "type": "object"
    },
    "PdfBackendOptions": {
      "description": "Backend options for pdf document backends.",
      "properties": {
        "enable_remote_fetch": {
          "default": false,
          "description": "Enable remote resource fetching.",
          "title": "Enable Remote Fetch",
          "type": "boolean"
        },
        "enable_local_fetch": {
          "default": false,
          "description": "Enable local resource fetching.",
          "title": "Enable Local Fetch",
          "type": "boolean"
        },
        "kind": {
          "const": "pdf",
          "default": "pdf",
          "title": "Kind",
          "type": "string"
        },
        "password": {
          "anyOf": [
            {
              "format": "password",
              "type": "string",
              "writeOnly": true
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "title": "Password"
        }
      },
      "title": "PdfBackendOptions",
      "type": "object"
    },
    "PipelineOptions": {
      "description": "Base pipeline options.",
      "properties": {
        "document_timeout": {
          "anyOf": [
            {
              "type": "number"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Maximum allowed processing time for a document before timing out. If None, no timeout is enforced.",
          "examples": [
            10.0,
            20.0
          ],
          "title": "Document Timeout"
        },
        "accelerator_options": {
          "$ref": "#/$defs/AcceleratorOptions",
          "default": {
            "num_threads": 4,
            "device": "auto",
            "cuda_use_flash_attention2": false
          },
          "description": "Configuration options for hardware acceleration (e.g., GPU or optimized execution settings)."
        },
        "enable_remote_services": {
          "default": false,
          "description": "Enable calling external APIs or cloud services during pipeline execution.",
          "examples": [
            false
          ],
          "title": "Enable Remote Services",
          "type": "boolean"
        },
        "allow_external_plugins": {
          "default": false,
          "description": "Allow loading external third-party plugins or modules. Disabled by default for safety.",
          "examples": [
            false
          ],
          "title": "Allow External Plugins",
          "type": "boolean"
        },
        "artifacts_path": {
          "anyOf": [
            {
              "format": "path",
              "type": "string"
            },
            {
              "type": "string"
            },
            {
              "type": "null"
            }
          ],
          "default": null,
          "description": "Filesystem path where pipeline artifacts should be stored. If None, artifacts will be fetched. You can use the utility `docling-tools models download` to pre-fetch the model artifacts.",
          "examples": [
            "./artifacts",
            "/tmp/docling_outputs"
          ],
          "title": "Artifacts Path"
        }
      },
      "title": "PipelineOptions",
      "type": "object"
    }
  },
  "properties": {
    "pipeline_options": {
      "anyOf": [
        {
          "$ref": "#/$defs/PipelineOptions"
        },
        {
          "type": "null"
        }
      ],
      "default": null
    },
    "backend": {
      "title": "Backend"
    },
    "pipeline_cls": {
      "title": "Pipeline Cls"
    },
    "backend_options": {
      "anyOf": [
        {
          "discriminator": {
            "mapping": {
              "declarative": "#/$defs/DeclarativeBackendOptions",
              "html": "#/$defs/HTMLBackendOptions",
              "md": "#/$defs/MarkdownBackendOptions",
              "pdf": "#/$defs/PdfBackendOptions",
              "xlsx": "#/$defs/MsExcelBackendOptions"
            },
            "propertyName": "kind"
          },
          "oneOf": [
            {
              "$ref": "#/$defs/DeclarativeBackendOptions"
            },
            {
              "$ref": "#/$defs/HTMLBackendOptions"
            },
            {
              "$ref": "#/$defs/MarkdownBackendOptions"
            },
            {
              "$ref": "#/$defs/PdfBackendOptions"
            },
            {
              "$ref": "#/$defs/MsExcelBackendOptions"
            }
          ]
        },
        {
          "type": "null"
        }
      ],
      "default": null,
      "title": "Backend Options"
    }
  },
  "required": [
    "backend",
    "pipeline_cls"
  ],
  "title": "FormatOption",
  "type": "object"
}

Config:

  • arbitrary_types_allowed: True

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend]

backend_options pydantic-field

backend_options: Optional[BackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type[BasePipeline]

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

InputFormat

Bases: str, Enum

A document format supported by document backend parsers.

Attributes:

ASCIIDOC class-attribute instance-attribute

ASCIIDOC = 'asciidoc'

AUDIO class-attribute instance-attribute

AUDIO = 'audio'

CSV class-attribute instance-attribute

CSV = 'csv'

DOCX class-attribute instance-attribute

DOCX = 'docx'

HTML class-attribute instance-attribute

HTML = 'html'

IMAGE class-attribute instance-attribute

IMAGE = 'image'

JSON_DOCLING class-attribute instance-attribute

JSON_DOCLING = 'json_docling'

MD class-attribute instance-attribute

MD = 'md'

METS_GBS class-attribute instance-attribute

METS_GBS = 'mets_gbs'

PDF class-attribute instance-attribute

PDF = 'pdf'

PPTX class-attribute instance-attribute

PPTX = 'pptx'

VTT class-attribute instance-attribute

VTT = 'vtt'

XLSX class-attribute instance-attribute

XLSX = 'xlsx'

XML_JATS class-attribute instance-attribute

XML_JATS = 'xml_jats'

XML_USPTO class-attribute instance-attribute

XML_USPTO = 'xml_uspto'

PdfFormatOption pydantic-model

Bases: FormatOption

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend] = DoclingParseV4DocumentBackend

backend_options pydantic-field

backend_options: Optional[PdfBackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type = StandardPdfPipeline

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

ImageFormatOption pydantic-model

Bases: FormatOption

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend] = ImageDocumentBackend

backend_options pydantic-field

backend_options: Optional[BackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type = StandardPdfPipeline

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

StandardPdfPipeline

StandardPdfPipeline(pipeline_options: ThreadedPdfPipelineOptions)

Bases: ConvertPipeline

High-performance PDF pipeline with multi-threaded stages.

Methods:

Attributes:

artifacts_path instance-attribute

artifacts_path: Optional[Path] = None

build_pipe instance-attribute

build_pipe: List[Callable] = []

enrichment_pipe instance-attribute

enrichment_pipe = [DocumentPictureClassifier(enabled=do_picture_classification, artifacts_path=artifacts_path, options=DocumentPictureClassifierOptions(), accelerator_options=accelerator_options), picture_description_model]

keep_images instance-attribute

keep_images = False

pipeline_options instance-attribute

pipeline_options: ThreadedPdfPipelineOptions = pipeline_options

execute

execute(in_doc: InputDocument, raises_on_error: bool) -> ConversionResult

get_default_options classmethod

get_default_options() -> ThreadedPdfPipelineOptions

is_backend_supported classmethod

is_backend_supported(backend: AbstractDocumentBackend) -> bool

WordFormatOption pydantic-model

Bases: FormatOption

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend] = MsWordDocumentBackend

backend_options pydantic-field

backend_options: Optional[BackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type = SimplePipeline

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

PowerpointFormatOption pydantic-model

Bases: FormatOption

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend] = MsPowerpointDocumentBackend

backend_options pydantic-field

backend_options: Optional[BackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type = SimplePipeline

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

MarkdownFormatOption pydantic-model

Bases: FormatOption

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend] = MarkdownDocumentBackend

backend_options pydantic-field

backend_options: Optional[MarkdownBackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type = SimplePipeline

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

AsciiDocFormatOption pydantic-model

Bases: FormatOption

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend] = AsciiDocBackend

backend_options pydantic-field

backend_options: Optional[BackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type = SimplePipeline

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

HTMLFormatOption pydantic-model

Bases: FormatOption

Fields:

Validators:

backend pydantic-field

backend: Type[AbstractDocumentBackend] = HTMLDocumentBackend

backend_options pydantic-field

backend_options: Optional[HTMLBackendOptions] = None

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

pipeline_cls pydantic-field

pipeline_cls: Type = SimplePipeline

pipeline_options pydantic-field

pipeline_options: Optional[PipelineOptions] = None

set_optional_field_default pydantic-validator

set_optional_field_default() -> Self

SimplePipeline

SimplePipeline(pipeline_options: ConvertPipelineOptions)

Bases: ConvertPipeline

SimpleModelPipeline.

This class is used at the moment for formats / backends which produce straight DoclingDocument output.

Methods:

Attributes:

artifacts_path instance-attribute

artifacts_path: Optional[Path] = None

build_pipe instance-attribute

build_pipe: List[Callable] = []

enrichment_pipe instance-attribute

enrichment_pipe = [DocumentPictureClassifier(enabled=do_picture_classification, artifacts_path=artifacts_path, options=DocumentPictureClassifierOptions(), accelerator_options=accelerator_options), picture_description_model]

keep_images instance-attribute

keep_images = False

pipeline_options instance-attribute

pipeline_options: ConvertPipelineOptions

execute

execute(in_doc: InputDocument, raises_on_error: bool) -> ConversionResult

get_default_options classmethod

get_default_options() -> ConvertPipelineOptions

is_backend_supported classmethod

is_backend_supported(backend: AbstractDocumentBackend)