rename option

Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
Michele Dolfi 2024-10-18 12:16:23 +02:00
parent c8734850ef
commit 63976fdc76
3 changed files with 4 additions and 4 deletions

View File

@ -22,7 +22,7 @@ class TableStructureOptions(BaseModel):
class OcrOptions(BaseModel): class OcrOptions(BaseModel):
kind: str kind: str
coverage_threshold: float = ( bitmap_area_threshold: float = (
0.05 # percentage of the area for a bitmap to processed with OCR 0.05 # percentage of the area for a bitmap to processed with OCR
) )

View File

@ -69,7 +69,7 @@ class BaseOcrModel:
coverage, ocr_rects = find_ocr_rects(page.size, bitmap_rects) coverage, ocr_rects = find_ocr_rects(page.size, bitmap_rects)
# return full-page rectangle if sufficiently covered with bitmaps # return full-page rectangle if sufficiently covered with bitmaps
if coverage > max(BITMAP_COVERAGE_TRESHOLD, self.options.coverage_threshold): if coverage > max(BITMAP_COVERAGE_TRESHOLD, self.options.bitmap_area_threshold):
return [ return [
BoundingBox( BoundingBox(
l=0, l=0,
@ -87,7 +87,7 @@ class BaseOcrModel:
rect rect
for rect in ocr_rects for rect in ocr_rects
if rect.area() / (page.size.width * page.size.height) if rect.area() / (page.size.width * page.size.height)
> self.options.coverage_threshold > self.options.bitmap_area_threshold
] ]
return ocr_rects return ocr_rects

View File

@ -47,7 +47,7 @@ def test_e2e_conversions(test_doc_path):
def test_ocr_coverage_threshold(test_doc_path): def test_ocr_coverage_threshold(test_doc_path):
pipeline_options = PdfPipelineOptions() pipeline_options = PdfPipelineOptions()
pipeline_options.do_ocr = True pipeline_options.do_ocr = True
pipeline_options.ocr_options.coverage_threshold = 1.1 pipeline_options.ocr_options.bitmap_area_threshold = 1.1
converter = DocumentConverter( converter = DocumentConverter(
format_options={ format_options={