mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
chore(ocr): revert layout updates
Signed-off-by: Clément Doumouro <clement.doumouro@gmail.com>
This commit is contained in:
parent
1181338737
commit
bf00fa1a9f
@ -266,11 +266,15 @@ class TesseractOcrCliModel(BaseOcrModel):
|
|||||||
text = row["text"]
|
text = row["text"]
|
||||||
conf = row["conf"]
|
conf = row["conf"]
|
||||||
|
|
||||||
l, t = float(row["left"]), float(row["top"])
|
left, top = float(row["left"]), float(row["top"])
|
||||||
r = l + float(row["width"])
|
right = left + float(row["width"])
|
||||||
b = t + row["height"]
|
bottom = top + row["height"]
|
||||||
bbox = BoundingBox(
|
bbox = BoundingBox(
|
||||||
l=l, t=t, r=r, b=b, coord_origin=CoordOrigin.TOPLEFT
|
l=left,
|
||||||
|
t=top,
|
||||||
|
r=right,
|
||||||
|
b=bottom,
|
||||||
|
coord_origin=CoordOrigin.TOPLEFT,
|
||||||
)
|
)
|
||||||
rect = tesseract_box_to_bounding_rectangle(
|
rect = tesseract_box_to_bounding_rectangle(
|
||||||
bbox,
|
bbox,
|
||||||
|
@ -194,11 +194,15 @@ class TesseractOcrModel(BaseOcrModel):
|
|||||||
# Extract text within the bounding box
|
# Extract text within the bounding box
|
||||||
text = local_reader.GetUTF8Text().strip()
|
text = local_reader.GetUTF8Text().strip()
|
||||||
confidence = local_reader.MeanTextConf()
|
confidence = local_reader.MeanTextConf()
|
||||||
l, t = box["x"], box["y"]
|
left, top = box["x"], box["y"]
|
||||||
r = l + box["w"]
|
right = left + box["w"]
|
||||||
b = t + box["h"]
|
bottom = top + box["h"]
|
||||||
bbox = BoundingBox(
|
bbox = BoundingBox(
|
||||||
l=l, t=t, r=r, b=b, coord_origin=CoordOrigin.TOPLEFT
|
l=left,
|
||||||
|
t=top,
|
||||||
|
r=right,
|
||||||
|
b=bottom,
|
||||||
|
coord_origin=CoordOrigin.TOPLEFT,
|
||||||
)
|
)
|
||||||
rect = tesseract_box_to_bounding_rectangle(
|
rect = tesseract_box_to_bounding_rectangle(
|
||||||
bbox,
|
bbox,
|
||||||
|
@ -54,7 +54,7 @@ def tesseract_box_to_bounding_rectangle(
|
|||||||
coord_origin=CoordOrigin.TOPLEFT,
|
coord_origin=CoordOrigin.TOPLEFT,
|
||||||
)
|
)
|
||||||
if original_offset is not None:
|
if original_offset is not None:
|
||||||
if not original_offset.coord_origin is CoordOrigin.TOPLEFT:
|
if original_offset.coord_origin is not CoordOrigin.TOPLEFT:
|
||||||
msg = f"expected coordinate origin to be {CoordOrigin.TOPLEFT.value}"
|
msg = f"expected coordinate origin to be {CoordOrigin.TOPLEFT.value}"
|
||||||
raise ValueError(msg)
|
raise ValueError(msg)
|
||||||
if original_offset is not None:
|
if original_offset is not None:
|
||||||
|
@ -13,44 +13,44 @@ def rotate_bounding_box(
|
|||||||
# Bounding rectangle start with r_0 at the bottom left whatever the
|
# Bounding rectangle start with r_0 at the bottom left whatever the
|
||||||
# coordinate system. Then other corners are found rotating counterclockwise
|
# coordinate system. Then other corners are found rotating counterclockwise
|
||||||
bbox = bbox.to_top_left_origin(im_size[1])
|
bbox = bbox.to_top_left_origin(im_size[1])
|
||||||
l, t, w, h = bbox.l, bbox.t, bbox.width, bbox.height
|
left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
|
||||||
im_h, im_w = im_size
|
im_h, im_w = im_size
|
||||||
angle = angle % 360
|
angle = angle % 360
|
||||||
if angle == 0:
|
if angle == 0:
|
||||||
r_x0 = l
|
r_x0 = left
|
||||||
r_y0 = t + h
|
r_y0 = top + height
|
||||||
r_x1 = r_x0 + w
|
r_x1 = r_x0 + width
|
||||||
r_y1 = r_y0
|
r_y1 = r_y0
|
||||||
r_x2 = r_x0 + w
|
r_x2 = r_x0 + width
|
||||||
r_y2 = r_y0 - h
|
r_y2 = r_y0 - height
|
||||||
r_x3 = r_x0
|
r_x3 = r_x0
|
||||||
r_y3 = r_y0 - h
|
r_y3 = r_y0 - height
|
||||||
elif angle == 90:
|
elif angle == 90:
|
||||||
r_x0 = im_w - (t + h)
|
r_x0 = im_w - (top + height)
|
||||||
r_y0 = l
|
r_y0 = left
|
||||||
r_x1 = r_x0
|
r_x1 = r_x0
|
||||||
r_y1 = r_y0 + w
|
r_y1 = r_y0 + width
|
||||||
r_x2 = r_x0 + h
|
r_x2 = r_x0 + height
|
||||||
r_y2 = r_y0 + w
|
r_y2 = r_y0 + width
|
||||||
r_x3 = r_x0
|
r_x3 = r_x0
|
||||||
r_y3 = r_y0 + w
|
r_y3 = r_y0 + width
|
||||||
elif angle == 180:
|
elif angle == 180:
|
||||||
r_x0 = im_h - l
|
r_x0 = im_h - left
|
||||||
r_y0 = im_w - (t + h)
|
r_y0 = im_w - (top + height)
|
||||||
r_x1 = r_x0 - w
|
r_x1 = r_x0 - width
|
||||||
r_y1 = r_y0
|
r_y1 = r_y0
|
||||||
r_x2 = r_x0 - w
|
r_x2 = r_x0 - width
|
||||||
r_y2 = r_y0 + h
|
r_y2 = r_y0 + height
|
||||||
r_x3 = r_x0
|
r_x3 = r_x0
|
||||||
r_y3 = r_y0 + h
|
r_y3 = r_y0 + height
|
||||||
elif angle == 270:
|
elif angle == 270:
|
||||||
r_x0 = t + h
|
r_x0 = top + height
|
||||||
r_y0 = im_h - l
|
r_y0 = im_h - left
|
||||||
r_x1 = r_x0
|
r_x1 = r_x0
|
||||||
r_y1 = r_y0 - w
|
r_y1 = r_y0 - width
|
||||||
r_x2 = r_x0 - h
|
r_x2 = r_x0 - height
|
||||||
r_y2 = r_y0 - w
|
r_y2 = r_y0 - width
|
||||||
r_x3 = r_x0 - h
|
r_x3 = r_x0 - height
|
||||||
r_y3 = r_y0
|
r_y3 = r_y0
|
||||||
else:
|
else:
|
||||||
msg = (
|
msg = (
|
||||||
|
Loading…
Reference in New Issue
Block a user