Granitedocling repetition stopping

Experimental VLM pipeline with custom repetition stopping criteria.
This script demonstrates the use of custom stopping criteria that detect repetitive location coordinate patterns in generated text and stop generation when such patterns are found.
What this example does
Uses the GraniteDocling model with custom repetition stopping criteria injected
Processes a PDF document or image and monitors for repetitive coordinate patterns
Stops generation early when repetitive patterns are detected
In [ ]:
Copied!





import logging

from docling.datamodel import vlm_model_specs
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import VlmPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.models.utils.generation_utils import (
    DocTagsRepetitionStopper,
)
from docling.pipeline.vlm_pipeline import VlmPipeline

logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")


# Set up logging to see when repetition stopping is triggered
logging.basicConfig(level=logging.INFO)

# Replace with a local path if preferred.
# source = "https://ibm.biz/docling-page-with-table" # Example that shows no repetitions.
source = "tests/data_scanned/old_newspaper.png"  # Example that creates repetitions.
print(f"Processing document: {source}")

###### USING GRANITEDOCLING WITH CUSTOM REPETITION STOPPING

## Using standard Huggingface Transformers (most portable, slowest)
custom_vlm_options = vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.model_copy()

# Uncomment this to use MLX-accelerated version on Apple Silicon
# custom_vlm_options = vlm_model_specs.GRANITEDOCLING_MLX.model_copy() # use this for Apple Silicon


# Create custom VLM options with repetition stopping criteria
custom_vlm_options.custom_stopping_criteria = [
    DocTagsRepetitionStopper(N=32)
]  # check for repetitions for every 32 new tokens decoded.

pipeline_options = VlmPipelineOptions(
    vlm_options=custom_vlm_options,
)

converter = DocumentConverter(
    format_options={
        InputFormat.IMAGE: PdfFormatOption(
            pipeline_cls=VlmPipeline,
            pipeline_options=pipeline_options,
        ),
    }
)

doc = converter.convert(source=source).document

print(doc.export_to_markdown())

## Using a remote VLM inference service (for example VLLM) - uncomment to use

# custom_vlm_options = ApiVlmOptions(
#     url="http://localhost:8000/v1/chat/completions",  # LM studio defaults to port 1234, VLLM to 8000
#     params=dict(
#         model=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.repo_id,
#         max_tokens=8192,
#         skip_special_tokens=True,  # needed for VLLM
#     ),
#     headers={
#         "Authorization": "Bearer YOUR_API_KEY",
#     },
#     prompt=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.prompt,
#     timeout=90,
#     scale=2.0,
#     temperature=0.0,
#     response_format=ResponseFormat.DOCTAGS,
#     custom_stopping_criteria=[
#         DocTagsRepetitionStopper(N=1)
#     ],  # check for repetitions for every new chunk of the response stream
# )


# pipeline_options = VlmPipelineOptions(
#     vlm_options=custom_vlm_options,
#     enable_remote_services=True, # required when using a remote inference service.
# )

# converter = DocumentConverter(
#     format_options={
#         InputFormat.IMAGE: PdfFormatOption(
#             pipeline_cls=VlmPipeline,
#             pipeline_options=pipeline_options,
#         ),
#     }
# )

# doc = converter.convert(source=source).document

# print(doc.export_to_markdown())

import logging

from docling.datamodel import vlm_model_specs
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import VlmPipelineOptions
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.models.utils.generation_utils import (
    DocTagsRepetitionStopper,
)
from docling.pipeline.vlm_pipeline import VlmPipeline

logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message)s")


# Set up logging to see when repetition stopping is triggered
logging.basicConfig(level=logging.INFO)

# Replace with a local path if preferred.
# source = "https://ibm.biz/docling-page-with-table" # Example that shows no repetitions.
source = "tests/data_scanned/old_newspaper.png"  # Example that creates repetitions.
print(f"Processing document: {source}")

###### USING GRANITEDOCLING WITH CUSTOM REPETITION STOPPING

## Using standard Huggingface Transformers (most portable, slowest)
custom_vlm_options = vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.model_copy()

# Uncomment this to use MLX-accelerated version on Apple Silicon
# custom_vlm_options = vlm_model_specs.GRANITEDOCLING_MLX.model_copy() # use this for Apple Silicon


# Create custom VLM options with repetition stopping criteria
custom_vlm_options.custom_stopping_criteria = [
    DocTagsRepetitionStopper(N=32)
]  # check for repetitions for every 32 new tokens decoded.

pipeline_options = VlmPipelineOptions(
    vlm_options=custom_vlm_options,
)

converter = DocumentConverter(
    format_options={
        InputFormat.IMAGE: PdfFormatOption(
            pipeline_cls=VlmPipeline,
            pipeline_options=pipeline_options,
        ),
    }
)

doc = converter.convert(source=source).document

print(doc.export_to_markdown())

## Using a remote VLM inference service (for example VLLM) - uncomment to use

# custom_vlm_options = ApiVlmOptions(
#     url="http://localhost:8000/v1/chat/completions",  # LM studio defaults to port 1234, VLLM to 8000
#     params=dict(
#         model=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.repo_id,
#         max_tokens=8192,
#         skip_special_tokens=True,  # needed for VLLM
#     ),
#     headers={
#         "Authorization": "Bearer YOUR_API_KEY",
#     },
#     prompt=vlm_model_specs.GRANITEDOCLING_TRANSFORMERS.prompt,
#     timeout=90,
#     scale=2.0,
#     temperature=0.0,
#     response_format=ResponseFormat.DOCTAGS,
#     custom_stopping_criteria=[
#         DocTagsRepetitionStopper(N=1)
#     ],  # check for repetitions for every new chunk of the response stream
# )


# pipeline_options = VlmPipelineOptions(
#     vlm_options=custom_vlm_options,
#     enable_remote_services=True, # required when using a remote inference service.
# )

# converter = DocumentConverter(
#     format_options={
#         InputFormat.IMAGE: PdfFormatOption(
#             pipeline_cls=VlmPipeline,
#             pipeline_options=pipeline_options,
#         ),
#     }
# )

# doc = converter.convert(source=source).document

# print(doc.export_to_markdown())