formatted script

This commit is contained in:
Mislav 2025-03-17 22:08:18 +13:00
parent 955bed5c5c
commit 627abcd082

View File

@ -182,12 +182,18 @@ class DocumentConverter:
) )
for format in self.allowed_formats for format in self.allowed_formats
} }
self.initialized_pipelines: Dict[Tuple[Type[BasePipeline], str], BasePipeline] = {} self.initialized_pipelines: Dict[
Tuple[Type[BasePipeline], str], BasePipeline
] = {}
def _get_pipeline_options_hash(self, pipeline_options: PipelineOptions) -> str: def _get_pipeline_options_hash(self, pipeline_options: PipelineOptions) -> str:
"""Generate a hash of pipeline options to use as part of the cache key.""" """Generate a hash of pipeline options to use as part of the cache key."""
options_str = str(pipeline_options.model_dump() if hasattr(pipeline_options, 'model_dump') else pipeline_options) options_str = str(
return hashlib.md5(options_str.encode('utf-8')).hexdigest() pipeline_options.model_dump()
if hasattr(pipeline_options, "model_dump")
else pipeline_options
)
return hashlib.md5(options_str.encode("utf-8")).hexdigest()
def initialize_pipeline(self, format: InputFormat): def initialize_pipeline(self, format: InputFormat):
"""Initialize the conversion pipeline for the selected format.""" """Initialize the conversion pipeline for the selected format."""
@ -299,10 +305,16 @@ class DocumentConverter:
cache_key = (pipeline_class, options_hash) cache_key = (pipeline_class, options_hash)
if cache_key not in self.initialized_pipelines: if cache_key not in self.initialized_pipelines:
_log.info(f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}") _log.info(
self.initialized_pipelines[cache_key] = pipeline_class(pipeline_options=pipeline_options) f"Initializing pipeline for {pipeline_class.__name__} with options hash {options_hash}"
)
self.initialized_pipelines[cache_key] = pipeline_class(
pipeline_options=pipeline_options
)
else: else:
_log.debug(f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}") _log.debug(
f"Reusing cached pipeline for {pipeline_class.__name__} with options hash {options_hash}"
)
return self.initialized_pipelines[cache_key] return self.initialized_pipelines[cache_key]