diff --git a/docling/pipeline/vlm_pipeline.py b/docling/pipeline/vlm_pipeline.py index ec7d05a8..5cb6bf84 100644 --- a/docling/pipeline/vlm_pipeline.py +++ b/docling/pipeline/vlm_pipeline.py @@ -89,14 +89,7 @@ class VlmPipeline(PaginatedPipeline): "code": "lightblue", } - """ - if pipeline_options.artifacts_path is None: - self.artifacts_path = self.download_models_hf() - else: - self.artifacts_path = Path(pipeline_options.artifacts_path) - """ - - keep_images = ( + self.keep_images = ( self.pipeline_options.generate_page_images or self.pipeline_options.generate_picture_images or self.pipeline_options.generate_table_images @@ -429,9 +422,6 @@ class VlmPipeline(PaginatedPipeline): text_content = extract_text_from_backend(page, bbox) else: text_content = extract_inner_text(full_chunk) - # If it's code, wrap it with
tags
- if doc_label == DocItemLabel.CODE:
- text_content = f"{text_content}
"
doc.add_text(
label=doc_label,
text=text_content,
@@ -454,6 +444,3 @@ class VlmPipeline(PaginatedPipeline):
@classmethod
def is_backend_supported(cls, backend: AbstractDocumentBackend):
return isinstance(backend, PdfDocumentBackend)
-
- # def _turn_tags_into_doc(self, document_tags):
- # return DoclingDocument()