mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-11 06:08:09 +00:00
feat: Add pipeline timings and toggle visualization, establish debug settings (#183)
* Add settings to turn visualization on or off Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add profiling code to all models Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Refactor and fix profiling codes Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Visualization codes output PNG to debug dir Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Fixes for time logging Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Optimize imports Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Update lockfile Signed-off-by: Christoph Auer <cau@zurich.ibm.com> * Add start_timestamps to ProfilingItem Signed-off-by: Christoph Auer <cau@zurich.ibm.com> --------- Signed-off-by: Christoph Auer <cau@zurich.ibm.com>
This commit is contained in:
62
docling/utils/profiling.py
Normal file
62
docling/utils/profiling.py
Normal file
@@ -0,0 +1,62 @@
|
||||
import time
|
||||
from datetime import datetime
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, List
|
||||
|
||||
import numpy as np
|
||||
from pydantic import BaseModel
|
||||
|
||||
from docling.datamodel.settings import settings
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from docling.datamodel.document import ConversionResult
|
||||
|
||||
|
||||
class ProfilingScope(str, Enum):
|
||||
PAGE = "page"
|
||||
DOCUMENT = "document"
|
||||
|
||||
|
||||
class ProfilingItem(BaseModel):
|
||||
scope: ProfilingScope
|
||||
count: int = 0
|
||||
times: List[float] = []
|
||||
start_timestamps: List[datetime] = []
|
||||
|
||||
def avg(self) -> float:
|
||||
return np.average(self.times) # type: ignore
|
||||
|
||||
def std(self) -> float:
|
||||
return np.std(self.times) # type: ignore
|
||||
|
||||
def mean(self) -> float:
|
||||
return np.mean(self.times) # type: ignore
|
||||
|
||||
def percentile(self, perc: float) -> float:
|
||||
return np.percentile(self.times, perc) # type: ignore
|
||||
|
||||
|
||||
class TimeRecorder:
|
||||
def __init__(
|
||||
self,
|
||||
conv_res: "ConversionResult",
|
||||
key: str,
|
||||
scope: ProfilingScope = ProfilingScope.PAGE,
|
||||
):
|
||||
if settings.debug.profile_pipeline_timings:
|
||||
if key not in conv_res.timings.keys():
|
||||
conv_res.timings[key] = ProfilingItem(scope=scope)
|
||||
self.conv_res = conv_res
|
||||
self.key = key
|
||||
|
||||
def __enter__(self):
|
||||
if settings.debug.profile_pipeline_timings:
|
||||
self.start = time.monotonic()
|
||||
self.conv_res.timings[self.key].start_timestamps.append(datetime.utcnow())
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
if settings.debug.profile_pipeline_timings:
|
||||
elapsed = time.monotonic() - self.start
|
||||
self.conv_res.timings[self.key].times.append(elapsed)
|
||||
self.conv_res.timings[self.key].count += 1
|
||||
Reference in New Issue
Block a user