mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
Update docs
Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
parent
d8163b0865
commit
6ea6f29516
16
README.md
16
README.md
@ -110,6 +110,8 @@ This can improve output quality if you find that multiple columns in extracted t
|
|||||||
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
from docling.datamodel.pipeline_options import PipelineOptions
|
||||||
|
|
||||||
pipeline_options = PipelineOptions(do_table_structure=True)
|
pipeline_options = PipelineOptions(do_table_structure=True)
|
||||||
pipeline_options.table_structure_options.do_cell_matching = False # uses text cells predicted from table structure model
|
pipeline_options.table_structure_options.do_cell_matching = False # uses text cells predicted from table structure model
|
||||||
|
|
||||||
@ -119,6 +121,20 @@ doc_converter = DocumentConverter(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Since docling 1.14.0: You can control which TableFormer mode you want to use. Choose between `TableFormerMode.FAST` (default) and `TableFormerMode.ACCURATE` (better, but slower) to receive better quality with difficult table structures.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from docling.datamodel.pipeline_options import PipelineOptions, TableFormerMode
|
||||||
|
|
||||||
|
pipeline_options = PipelineOptions(do_table_structure=True)
|
||||||
|
pipeline_options.table_structure_options.mode = TableFormerMode.ACCURATE # use more accurate TableFormer model
|
||||||
|
|
||||||
|
doc_converter = DocumentConverter(
|
||||||
|
artifacts_path=artifacts_path,
|
||||||
|
pipeline_options=pipeline_options,
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
### Impose limits on the document size
|
### Impose limits on the document size
|
||||||
|
|
||||||
You can limit the file size and number of pages which should be allowed to process per document:
|
You can limit the file size and number of pages which should be allowed to process per document:
|
||||||
|
@ -83,9 +83,9 @@ def main():
|
|||||||
# PyPdfium with OCR
|
# PyPdfium with OCR
|
||||||
# -----------------
|
# -----------------
|
||||||
# pipeline_options = PipelineOptions()
|
# pipeline_options = PipelineOptions()
|
||||||
# pipeline_options.do_ocr=False
|
# pipeline_options.do_ocr=True
|
||||||
# pipeline_options.do_table_structure=True
|
# pipeline_options.do_table_structure=True
|
||||||
# pipeline_options.table_structure_options.do_cell_matching = True
|
# pipeline_options.table_structure_options.do_cell_matching = False
|
||||||
|
|
||||||
# doc_converter = DocumentConverter(
|
# doc_converter = DocumentConverter(
|
||||||
# pipeline_options=pipeline_options,
|
# pipeline_options=pipeline_options,
|
||||||
|
Loading…
Reference in New Issue
Block a user