Stub for implementing uspto backend meta-data extraction

Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
This commit is contained in:
Viktor Kuropiatnyk
2025-09-18 10:51:01 +02:00
parent 8322c2ea9b
commit 6455579a90
4 changed files with 10 additions and 0 deletions

View File

@@ -38,6 +38,10 @@ class AbstractDocumentBackend(ABC):
def supported_formats(cls) -> Set["InputFormat"]: def supported_formats(cls) -> Set["InputFormat"]:
pass pass
@abstractmethod
def extract_metadata(self) -> Dict[str, Any]:
return {}
class PaginatedDocumentBackend(AbstractDocumentBackend): class PaginatedDocumentBackend(AbstractDocumentBackend):
"""DeclarativeDocumentBackend. """DeclarativeDocumentBackend.

View File

@@ -148,6 +148,10 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
f"name={self.file.name}) because the backend failed to init." f"name={self.file.name}) because the backend failed to init."
) )
@override
def extract_metadata(self) -> Dict[str, Any]:
return {}
class PatentUspto(ABC): class PatentUspto(ABC):
"""Parser of patent documents from the US Patent Office.""" """Parser of patent documents from the US Patent Office."""

View File

@@ -207,6 +207,7 @@ class ConversionResult(BaseModel):
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport) confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
document: DoclingDocument = _EMPTY_DOCLING_DOC document: DoclingDocument = _EMPTY_DOCLING_DOC
metadata: Dict[str, Any] = {}
@property @property
@deprecated("Use document instead.") @deprecated("Use document instead.")

View File

@@ -38,6 +38,7 @@ class SimplePipeline(ConvertPipeline):
# a DoclingDocument straight. # a DoclingDocument straight.
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT): with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
conv_res.document = conv_res.input._backend.convert() conv_res.document = conv_res.input._backend.convert()
conv_res.metadata = conv_res.input._backend.extract_metadata()
return conv_res return conv_res
def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus: def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus: