mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
@@ -65,7 +65,7 @@ SMOLDOCLING_VLLM = InlineVlmOptions(
|
|||||||
SMOLVLM500_TRANSFORMERS = InlineVlmOptions(
|
SMOLVLM500_TRANSFORMERS = InlineVlmOptions(
|
||||||
repo_id="HuggingFaceTB/SmolVLM-500M-Instruct",
|
repo_id="HuggingFaceTB/SmolVLM-500M-Instruct",
|
||||||
prompt="Transcribe this image to plain text.",
|
prompt="Transcribe this image to plain text.",
|
||||||
response_format=ResponseFormat.DOCTAGS,
|
response_format=ResponseFormat.PLAINTEXT,
|
||||||
inference_framework=InferenceFramework.TRANSFORMERS,
|
inference_framework=InferenceFramework.TRANSFORMERS,
|
||||||
transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
|
transformers_model_type=TransformersModelType.AUTOMODEL_IMAGETEXTTOTEXT,
|
||||||
supported_devices=[
|
supported_devices=[
|
||||||
|
|||||||
@@ -161,7 +161,7 @@ class ThreadedMultiStageVlmPipelineOptions(PaginatedPipelineOptions):
|
|||||||
# text_opts = DOLPHIN_TRANSFORMERS.model_copy()
|
# text_opts = DOLPHIN_TRANSFORMERS.model_copy()
|
||||||
# text_opts.prompt = "<s>Read text in the image. <Answer/>"
|
# text_opts.prompt = "<s>Read text in the image. <Answer/>"
|
||||||
|
|
||||||
base_model = SMOLVLM500_TRANSFORMERS
|
base_model = SMOLVLM500_MLX
|
||||||
|
|
||||||
text_opts = base_model.model_copy()
|
text_opts = base_model.model_copy()
|
||||||
# text_opts.prompt = "Convert this page to docling."
|
# text_opts.prompt = "Convert this page to docling."
|
||||||
|
|||||||
Reference in New Issue
Block a user