docs: Add example and CSV format documentation

Signed-off-by: Tobias Strebitzer <tobias.strebitzer@magloft.com>
This commit is contained in:
Tobias Strebitzer 2025-02-12 12:11:32 +08:00
parent d91ea7b186
commit 79eed3ef08
3 changed files with 34 additions and 2 deletions

View File

@ -29,8 +29,7 @@ class CsvDocumentBackend(DeclarativeDocumentBackend):
try:
if isinstance(self.path_or_stream, BytesIO):
# Decode bytes to string for CSV reading
content = self.path_or_stream.read().decode('utf-8')
content = self.path_or_stream.getvalue().decode("utf-8")
self.csv_data = list(csv.reader(StringIO(content)))
elif isinstance(self.path_or_stream, Path):
with open(self.path_or_stream, 'r', newline='') as f:

32
docs/examples/run_csv.py Normal file
View File

@ -0,0 +1,32 @@
import json
import logging
from pathlib import Path
import yaml
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter
logging.basicConfig(level=logging.DEBUG)
def main():
# Convert CSV to Docling document:
source = "https://drive.google.com/uc?id=1zO8ekHWx9U7mrbx_0Hoxxu6od7uxJqWw&export=download"
converter = DocumentConverter()
result = converter.convert(source)
# Export Docling document:
out_path = Path("scratch")
print(f"Document converted." f"\nSaving output to: {str(out_path)}")
with (out_path / f"customers-100.md").open("w") as fp:
fp.write(result.document.export_to_markdown())
with (out_path / f"customers-100.json").open("w") as fp:
fp.write(json.dumps(result.document.export_to_dict()))
with (out_path / f"customers-100.yaml").open("w") as fp:
fp.write(yaml.safe_dump(result.document.export_to_dict()))
if __name__ == "__main__":
main()

View File

@ -13,6 +13,7 @@ Below you can find a listing of all supported input and output formats.
| Markdown | |
| AsciiDoc | |
| HTML, XHTML | |
| CSV | |
| PNG, JPEG, TIFF, BMP | Image formats |
Schema-specific support: