Fixing doctags starting tag, that broke elements on first line during assembly

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Maksym Lysak 2025-01-21 11:14:55 +01:00
parent 0fe12d819a
commit 88b9ac6706

View File

@ -323,7 +323,7 @@ class VlmPipeline(PaginatedPipeline):
for line in lines:
line = line.strip()
line = line.replace("<doc_tag>", "")
line = line.replace("<doctag>", "")
if line.startswith("<paragraph>"):
prov_item = extract_bounding_box(line)
if self.force_backend_text: