mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 23:12:20 +00:00
docs: Add example and CSV format documentation
Signed-off-by: Tobias Strebitzer <tobias.strebitzer@magloft.com>
This commit is contained in:
parent
d91ea7b186
commit
79eed3ef08
@ -29,8 +29,7 @@ class CsvDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if isinstance(self.path_or_stream, BytesIO):
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
# Decode bytes to string for CSV reading
|
content = self.path_or_stream.getvalue().decode("utf-8")
|
||||||
content = self.path_or_stream.read().decode('utf-8')
|
|
||||||
self.csv_data = list(csv.reader(StringIO(content)))
|
self.csv_data = list(csv.reader(StringIO(content)))
|
||||||
elif isinstance(self.path_or_stream, Path):
|
elif isinstance(self.path_or_stream, Path):
|
||||||
with open(self.path_or_stream, 'r', newline='') as f:
|
with open(self.path_or_stream, 'r', newline='') as f:
|
||||||
|
32
docs/examples/run_csv.py
Normal file
32
docs/examples/run_csv.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Convert CSV to Docling document:
|
||||||
|
source = "https://drive.google.com/uc?id=1zO8ekHWx9U7mrbx_0Hoxxu6od7uxJqWw&export=download"
|
||||||
|
converter = DocumentConverter()
|
||||||
|
result = converter.convert(source)
|
||||||
|
|
||||||
|
# Export Docling document:
|
||||||
|
out_path = Path("scratch")
|
||||||
|
print(f"Document converted." f"\nSaving output to: {str(out_path)}")
|
||||||
|
with (out_path / f"customers-100.md").open("w") as fp:
|
||||||
|
fp.write(result.document.export_to_markdown())
|
||||||
|
|
||||||
|
with (out_path / f"customers-100.json").open("w") as fp:
|
||||||
|
fp.write(json.dumps(result.document.export_to_dict()))
|
||||||
|
|
||||||
|
with (out_path / f"customers-100.yaml").open("w") as fp:
|
||||||
|
fp.write(yaml.safe_dump(result.document.export_to_dict()))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@ -13,6 +13,7 @@ Below you can find a listing of all supported input and output formats.
|
|||||||
| Markdown | |
|
| Markdown | |
|
||||||
| AsciiDoc | |
|
| AsciiDoc | |
|
||||||
| HTML, XHTML | |
|
| HTML, XHTML | |
|
||||||
|
| CSV | |
|
||||||
| PNG, JPEG, TIFF, BMP | Image formats |
|
| PNG, JPEG, TIFF, BMP | Image formats |
|
||||||
|
|
||||||
Schema-specific support:
|
Schema-specific support:
|
||||||
|
Loading…
Reference in New Issue
Block a user