mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Removed special html code wrapping when exporting to docling document, cleaned up comments
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
parent
b12f5ba80f
commit
b0935daec4
@ -89,14 +89,7 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
"code": "lightblue",
|
"code": "lightblue",
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
self.keep_images = (
|
||||||
if pipeline_options.artifacts_path is None:
|
|
||||||
self.artifacts_path = self.download_models_hf()
|
|
||||||
else:
|
|
||||||
self.artifacts_path = Path(pipeline_options.artifacts_path)
|
|
||||||
"""
|
|
||||||
|
|
||||||
keep_images = (
|
|
||||||
self.pipeline_options.generate_page_images
|
self.pipeline_options.generate_page_images
|
||||||
or self.pipeline_options.generate_picture_images
|
or self.pipeline_options.generate_picture_images
|
||||||
or self.pipeline_options.generate_table_images
|
or self.pipeline_options.generate_table_images
|
||||||
@ -429,9 +422,6 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
text_content = extract_text_from_backend(page, bbox)
|
text_content = extract_text_from_backend(page, bbox)
|
||||||
else:
|
else:
|
||||||
text_content = extract_inner_text(full_chunk)
|
text_content = extract_inner_text(full_chunk)
|
||||||
# If it's code, wrap it with <pre><code> tags
|
|
||||||
if doc_label == DocItemLabel.CODE:
|
|
||||||
text_content = f"<pre><code>{text_content}</code></pre>"
|
|
||||||
doc.add_text(
|
doc.add_text(
|
||||||
label=doc_label,
|
label=doc_label,
|
||||||
text=text_content,
|
text=text_content,
|
||||||
@ -454,6 +444,3 @@ class VlmPipeline(PaginatedPipeline):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def is_backend_supported(cls, backend: AbstractDocumentBackend):
|
def is_backend_supported(cls, backend: AbstractDocumentBackend):
|
||||||
return isinstance(backend, PdfDocumentBackend)
|
return isinstance(backend, PdfDocumentBackend)
|
||||||
|
|
||||||
# def _turn_tags_into_doc(self, document_tags):
|
|
||||||
# return DoclingDocument()
|
|
||||||
|
Loading…
Reference in New Issue
Block a user