mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-31 14:34:40 +00:00
docs: Add example and CSV format documentation
Signed-off-by: Tobias Strebitzer <tobias.strebitzer@magloft.com>
This commit is contained in:
parent
d91ea7b186
commit
79eed3ef08
@ -29,8 +29,7 @@ class CsvDocumentBackend(DeclarativeDocumentBackend):
|
||||
|
||||
try:
|
||||
if isinstance(self.path_or_stream, BytesIO):
|
||||
# Decode bytes to string for CSV reading
|
||||
content = self.path_or_stream.read().decode('utf-8')
|
||||
content = self.path_or_stream.getvalue().decode("utf-8")
|
||||
self.csv_data = list(csv.reader(StringIO(content)))
|
||||
elif isinstance(self.path_or_stream, Path):
|
||||
with open(self.path_or_stream, 'r', newline='') as f:
|
||||
|
32
docs/examples/run_csv.py
Normal file
32
docs/examples/run_csv.py
Normal file
@ -0,0 +1,32 @@
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
def main():
|
||||
# Convert CSV to Docling document:
|
||||
source = "https://drive.google.com/uc?id=1zO8ekHWx9U7mrbx_0Hoxxu6od7uxJqWw&export=download"
|
||||
converter = DocumentConverter()
|
||||
result = converter.convert(source)
|
||||
|
||||
# Export Docling document:
|
||||
out_path = Path("scratch")
|
||||
print(f"Document converted." f"\nSaving output to: {str(out_path)}")
|
||||
with (out_path / f"customers-100.md").open("w") as fp:
|
||||
fp.write(result.document.export_to_markdown())
|
||||
|
||||
with (out_path / f"customers-100.json").open("w") as fp:
|
||||
fp.write(json.dumps(result.document.export_to_dict()))
|
||||
|
||||
with (out_path / f"customers-100.yaml").open("w") as fp:
|
||||
fp.write(yaml.safe_dump(result.document.export_to_dict()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -13,6 +13,7 @@ Below you can find a listing of all supported input and output formats.
|
||||
| Markdown | |
|
||||
| AsciiDoc | |
|
||||
| HTML, XHTML | |
|
||||
| CSV | |
|
||||
| PNG, JPEG, TIFF, BMP | Image formats |
|
||||
|
||||
Schema-specific support:
|
||||
|
Loading…
Reference in New Issue
Block a user