mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
feat: export document pages as multimodal output (#54)
* feat: export document pages as multimodal output Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * create a single parquet output Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * add loading into HF datasets library Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * renaming Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> * cleanup Signed-off-by: Michele Dolfi <dol@zurich.ibm.com> --------- Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
@@ -71,6 +71,15 @@ class BoundingBox(BaseModel):
|
||||
|
||||
return out_bbox
|
||||
|
||||
def normalized(self, page_size: PageSize) -> "BoundingBox":
|
||||
out_bbox = copy.deepcopy(self)
|
||||
out_bbox.l /= page_size.width
|
||||
out_bbox.r /= page_size.width
|
||||
out_bbox.t /= page_size.height
|
||||
out_bbox.b /= page_size.height
|
||||
|
||||
return out_bbox
|
||||
|
||||
def as_tuple(self):
|
||||
if self.coord_origin == CoordOrigin.TOPLEFT:
|
||||
return (self.l, self.t, self.r, self.b)
|
||||
|
||||
Reference in New Issue
Block a user