mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
Stub for implementing uspto backend meta-data extraction
Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
This commit is contained in:
@@ -38,6 +38,10 @@ class AbstractDocumentBackend(ABC):
|
||||
def supported_formats(cls) -> Set["InputFormat"]:
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def extract_metadata(self) -> Dict[str, Any]:
|
||||
return {}
|
||||
|
||||
|
||||
class PaginatedDocumentBackend(AbstractDocumentBackend):
|
||||
"""DeclarativeDocumentBackend.
|
||||
|
||||
@@ -148,6 +148,10 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
|
||||
f"name={self.file.name}) because the backend failed to init."
|
||||
)
|
||||
|
||||
@override
|
||||
def extract_metadata(self) -> Dict[str, Any]:
|
||||
return {}
|
||||
|
||||
|
||||
class PatentUspto(ABC):
|
||||
"""Parser of patent documents from the US Patent Office."""
|
||||
|
||||
@@ -207,6 +207,7 @@ class ConversionResult(BaseModel):
|
||||
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
|
||||
|
||||
document: DoclingDocument = _EMPTY_DOCLING_DOC
|
||||
metadata: Dict[str, Any] = {}
|
||||
|
||||
@property
|
||||
@deprecated("Use document instead.")
|
||||
|
||||
@@ -38,6 +38,7 @@ class SimplePipeline(ConvertPipeline):
|
||||
# a DoclingDocument straight.
|
||||
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
|
||||
conv_res.document = conv_res.input._backend.convert()
|
||||
conv_res.metadata = conv_res.input._backend.extract_metadata()
|
||||
return conv_res
|
||||
|
||||
def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
|
||||
|
||||
Reference in New Issue
Block a user