feat: Use new TableFormer model weights and default to accurate model version (#1100)

* feat: New tableformer model weights [WIP]

Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>

* Updated TF version

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

* Updated tests, after merging with Main, Switched to Accurate TF model by default

Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>

---------

Signed-off-by: Christoph Auer <60343111+cau-git@users.noreply.github.com>
Signed-off-by: Maksym Lysak <mly@zurich.ibm.com>
Co-authored-by: Maksym Lysak <mly@zurich.ibm.com>
This commit is contained in:
Christoph Auer
2025-03-11 10:53:49 +01:00
committed by GitHub
parent 5e30381c0d
commit eb97357b05
43 changed files with 213 additions and 229 deletions

View File

@@ -210,7 +210,7 @@ def convert(
table_mode: Annotated[
TableFormerMode,
typer.Option(..., help="The mode to use in the table structure model."),
] = TableFormerMode.FAST,
] = TableFormerMode.ACCURATE,
enrich_code: Annotated[
bool,
typer.Option(..., help="Enable the code enrichment model in the pipeline."),

View File

@@ -99,7 +99,7 @@ class TableStructureOptions(BaseModel):
# are merged across table columns.
# False: Let table structure model define the text cells, ignore PDF cells.
)
mode: TableFormerMode = TableFormerMode.FAST
mode: TableFormerMode = TableFormerMode.ACCURATE
class OcrOptions(BaseModel):

View File

@@ -95,7 +95,7 @@ class TableStructureModel(BasePageModel):
repo_id="ds4sd/docling-models",
force_download=force,
local_dir=local_dir,
revision="v2.1.0",
revision="v2.2.0",
)
return Path(download_path)