mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-27 04:24:45 +00:00
apply pre-commit with new path
Signed-off-by: Michele Dolfi <dol@zurich.ibm.com>
This commit is contained in:
parent
4cead1e38b
commit
4918e11cbe
@ -4,13 +4,13 @@ repos:
|
|||||||
hooks:
|
hooks:
|
||||||
- id: black
|
- id: black
|
||||||
name: Black
|
name: Black
|
||||||
entry: poetry run black docling examples tests
|
entry: poetry run black docling docs/examples tests
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
language: system
|
language: system
|
||||||
files: '\.py$'
|
files: '\.py$'
|
||||||
- id: isort
|
- id: isort
|
||||||
name: isort
|
name: isort
|
||||||
entry: poetry run isort docling examples tests
|
entry: poetry run isort docling docs/examples tests
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
language: system
|
language: system
|
||||||
files: '\.py$'
|
files: '\.py$'
|
||||||
@ -28,13 +28,13 @@ repos:
|
|||||||
files: '\.py$'
|
files: '\.py$'
|
||||||
- id: nbqa_black
|
- id: nbqa_black
|
||||||
name: nbQA Black
|
name: nbQA Black
|
||||||
entry: poetry run nbqa black examples
|
entry: poetry run nbqa black docs/examples
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
language: system
|
language: system
|
||||||
files: '\.ipynb$'
|
files: '\.ipynb$'
|
||||||
- id: nbqa_isort
|
- id: nbqa_isort
|
||||||
name: nbQA isort
|
name: nbQA isort
|
||||||
entry: poetry run nbqa isort examples
|
entry: poetry run nbqa isort docs/examples
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
language: system
|
language: system
|
||||||
files: '\.ipynb$'
|
files: '\.ipynb$'
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
import time
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from docling.datamodel.base_models import ConversionStatus
|
from docling.datamodel.base_models import ConversionStatus
|
||||||
|
@ -1,13 +1,10 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import time
|
|
||||||
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import (
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
PdfPipelineOptions,
|
|
||||||
)
|
|
||||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
@ -2,8 +2,13 @@ import logging
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Iterable
|
from typing import Any, Iterable
|
||||||
|
|
||||||
from docling_core.types.doc import DoclingDocument, NodeItem
|
from docling_core.types.doc import (
|
||||||
from docling_core.types.doc import PictureClassificationData, PictureItem, PictureClassificationClass
|
DoclingDocument,
|
||||||
|
NodeItem,
|
||||||
|
PictureClassificationClass,
|
||||||
|
PictureClassificationData,
|
||||||
|
PictureItem,
|
||||||
|
)
|
||||||
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
@ -36,10 +41,14 @@ class ExamplePictureClassifierEnrichmentModel(BaseEnrichmentModel):
|
|||||||
# uncomment this to interactively visualize the image
|
# uncomment this to interactively visualize the image
|
||||||
# element.image.pil_image.show()
|
# element.image.pil_image.show()
|
||||||
|
|
||||||
element.annotations.append(PictureClassificationData(
|
element.annotations.append(
|
||||||
provenance="example_classifier-0.0.1",
|
PictureClassificationData(
|
||||||
predicted_classes=[PictureClassificationClass(class_name="dummy", confidence=0.42)]
|
provenance="example_classifier-0.0.1",
|
||||||
))
|
predicted_classes=[
|
||||||
|
PictureClassificationClass(class_name="dummy", confidence=0.42)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
yield element
|
yield element
|
||||||
|
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import time
|
|
||||||
|
|
||||||
from docling.datamodel.base_models import InputFormat
|
from docling.datamodel.base_models import InputFormat
|
||||||
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
from docling.datamodel.pipeline_options import PdfPipelineOptions
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import time
|
|
||||||
|
|
||||||
from docling.document_converter import DocumentConverter
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
|
@ -16,6 +16,7 @@ from docling.pipeline.standard_pdf_pipeline import StandardPdfPipeline
|
|||||||
|
|
||||||
_log = logging.getLogger(__name__)
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
input_paths = [
|
input_paths = [
|
||||||
Path("tests/data/wiki_duck.html"),
|
Path("tests/data/wiki_duck.html"),
|
||||||
@ -69,5 +70,7 @@ def main():
|
|||||||
|
|
||||||
with (out_path / f"{res.input.file.name}.yaml").open("w") as fp:
|
with (out_path / f"{res.input.file.name}.yaml").open("w") as fp:
|
||||||
fp.write(yaml.safe_dump(res.document.export_to_dict()))
|
fp.write(yaml.safe_dump(res.document.export_to_dict()))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user