mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
chore(ocr): revert layout updates
Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
parent
1181338737
commit
bf00fa1a9f
@ -266,11 +266,15 @@ class TesseractOcrCliModel(BaseOcrModel):
|
||||
text = row["text"]
|
||||
conf = row["conf"]
|
||||
|
||||
l, t = float(row["left"]), float(row["top"])
|
||||
r = l + float(row["width"])
|
||||
b = t + row["height"]
|
||||
left, top = float(row["left"]), float(row["top"])
|
||||
right = left + float(row["width"])
|
||||
bottom = top + row["height"]
|
||||
bbox = BoundingBox(
|
||||
l=l, t=t, r=r, b=b, coord_origin=CoordOrigin.TOPLEFT
|
||||
l=left,
|
||||
t=top,
|
||||
r=right,
|
||||
b=bottom,
|
||||
coord_origin=CoordOrigin.TOPLEFT,
|
||||
)
|
||||
rect = tesseract_box_to_bounding_rectangle(
|
||||
bbox,
|
||||
|
@ -194,11 +194,15 @@ class TesseractOcrModel(BaseOcrModel):
|
||||
# Extract text within the bounding box
|
||||
text = local_reader.GetUTF8Text().strip()
|
||||
confidence = local_reader.MeanTextConf()
|
||||
l, t = box["x"], box["y"]
|
||||
r = l + box["w"]
|
||||
b = t + box["h"]
|
||||
left, top = box["x"], box["y"]
|
||||
right = left + box["w"]
|
||||
bottom = top + box["h"]
|
||||
bbox = BoundingBox(
|
||||
l=l, t=t, r=r, b=b, coord_origin=CoordOrigin.TOPLEFT
|
||||
l=left,
|
||||
t=top,
|
||||
r=right,
|
||||
b=bottom,
|
||||
coord_origin=CoordOrigin.TOPLEFT,
|
||||
)
|
||||
rect = tesseract_box_to_bounding_rectangle(
|
||||
bbox,
|
||||
|
@ -54,7 +54,7 @@ def tesseract_box_to_bounding_rectangle(
|
||||
coord_origin=CoordOrigin.TOPLEFT,
|
||||
)
|
||||
if original_offset is not None:
|
||||
if not original_offset.coord_origin is CoordOrigin.TOPLEFT:
|
||||
if original_offset.coord_origin is not CoordOrigin.TOPLEFT:
|
||||
msg = f"expected coordinate origin to be {CoordOrigin.TOPLEFT.value}"
|
||||
raise ValueError(msg)
|
||||
if original_offset is not None:
|
||||
|
@ -13,44 +13,44 @@ def rotate_bounding_box(
|
||||
# Bounding rectangle start with r_0 at the bottom left whatever the
|
||||
# coordinate system. Then other corners are found rotating counterclockwise
|
||||
bbox = bbox.to_top_left_origin(im_size[1])
|
||||
l, t, w, h = bbox.l, bbox.t, bbox.width, bbox.height
|
||||
left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
|
||||
im_h, im_w = im_size
|
||||
angle = angle % 360
|
||||
if angle == 0:
|
||||
r_x0 = l
|
||||
r_y0 = t + h
|
||||
r_x1 = r_x0 + w
|
||||
r_x0 = left
|
||||
r_y0 = top + height
|
||||
r_x1 = r_x0 + width
|
||||
r_y1 = r_y0
|
||||
r_x2 = r_x0 + w
|
||||
r_y2 = r_y0 - h
|
||||
r_x2 = r_x0 + width
|
||||
r_y2 = r_y0 - height
|
||||
r_x3 = r_x0
|
||||
r_y3 = r_y0 - h
|
||||
r_y3 = r_y0 - height
|
||||
elif angle == 90:
|
||||
r_x0 = im_w - (t + h)
|
||||
r_y0 = l
|
||||
r_x0 = im_w - (top + height)
|
||||
r_y0 = left
|
||||
r_x1 = r_x0
|
||||
r_y1 = r_y0 + w
|
||||
r_x2 = r_x0 + h
|
||||
r_y2 = r_y0 + w
|
||||
r_y1 = r_y0 + width
|
||||
r_x2 = r_x0 + height
|
||||
r_y2 = r_y0 + width
|
||||
r_x3 = r_x0
|
||||
r_y3 = r_y0 + w
|
||||
r_y3 = r_y0 + width
|
||||
elif angle == 180:
|
||||
r_x0 = im_h - l
|
||||
r_y0 = im_w - (t + h)
|
||||
r_x1 = r_x0 - w
|
||||
r_x0 = im_h - left
|
||||
r_y0 = im_w - (top + height)
|
||||
r_x1 = r_x0 - width
|
||||
r_y1 = r_y0
|
||||
r_x2 = r_x0 - w
|
||||
r_y2 = r_y0 + h
|
||||
r_x2 = r_x0 - width
|
||||
r_y2 = r_y0 + height
|
||||
r_x3 = r_x0
|
||||
r_y3 = r_y0 + h
|
||||
r_y3 = r_y0 + height
|
||||
elif angle == 270:
|
||||
r_x0 = t + h
|
||||
r_y0 = im_h - l
|
||||
r_x0 = top + height
|
||||
r_y0 = im_h - left
|
||||
r_x1 = r_x0
|
||||
r_y1 = r_y0 - w
|
||||
r_x2 = r_x0 - h
|
||||
r_y2 = r_y0 - w
|
||||
r_x3 = r_x0 - h
|
||||
r_y1 = r_y0 - width
|
||||
r_x2 = r_x0 - height
|
||||
r_y2 = r_y0 - width
|
||||
r_x3 = r_x0 - height
|
||||
r_y3 = r_y0
|
||||
else:
|
||||
msg = (
|
||||
|
Loading…
Reference in New Issue
Block a user