feat: add convert_string to document-converter (#2069)

* feat: add convert_string to document-converter

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* fix unsupported operand type(s) for |: type and NoneType

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

* added tests for convert_string

Signed-off-by: Peter Staar <taa@zurich.ibm.com>

---------

Signed-off-by: Peter Staar <taa@zurich.ibm.com>
This commit is contained in:
Peter W. J. Staar
2025-08-12 11:02:38 +02:00
committed by GitHub
parent e2cca931be
commit b09033cb73
2 changed files with 82 additions and 1 deletions

View File

@@ -5,7 +5,9 @@ import threading
import time
from collections.abc import Iterable, Iterator
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from functools import partial
from io import BytesIO
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type, Union
@@ -275,6 +277,34 @@ class DocumentConverter:
"Conversion failed because the provided file has no recognizable format or it wasn't in the list of allowed formats."
)
@validate_call(config=ConfigDict(strict=True))
def convert_string(
self,
content: str,
format: InputFormat,
name: Optional[str],
) -> ConversionResult:
name = name or datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
if format == InputFormat.MD:
if not name.endswith(".md"):
name += ".md"
buff = BytesIO(content.encode("utf-8"))
doc_stream = DocumentStream(name=name, stream=buff)
return self.convert(doc_stream)
elif format == InputFormat.HTML:
if not name.endswith(".html"):
name += ".html"
buff = BytesIO(content.encode("utf-8"))
doc_stream = DocumentStream(name=name, stream=buff)
return self.convert(doc_stream)
else:
raise ValueError(f"format {format} is not supported in `convert_string`")
def _convert(
self, conv_input: _DocumentConversionInput, raises_on_error: bool
) -> Iterator[ConversionResult]: