mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
Stub for implementing uspto backend meta-data extraction
Signed-off-by: Viktor Kuropiatnyk <vku@zurich.ibm.com>
This commit is contained in:
@@ -38,6 +38,10 @@ class AbstractDocumentBackend(ABC):
|
|||||||
def supported_formats(cls) -> Set["InputFormat"]:
|
def supported_formats(cls) -> Set["InputFormat"]:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def extract_metadata(self) -> Dict[str, Any]:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class PaginatedDocumentBackend(AbstractDocumentBackend):
|
class PaginatedDocumentBackend(AbstractDocumentBackend):
|
||||||
"""DeclarativeDocumentBackend.
|
"""DeclarativeDocumentBackend.
|
||||||
|
|||||||
@@ -148,6 +148,10 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
|
|||||||
f"name={self.file.name}) because the backend failed to init."
|
f"name={self.file.name}) because the backend failed to init."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@override
|
||||||
|
def extract_metadata(self) -> Dict[str, Any]:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
class PatentUspto(ABC):
|
class PatentUspto(ABC):
|
||||||
"""Parser of patent documents from the US Patent Office."""
|
"""Parser of patent documents from the US Patent Office."""
|
||||||
|
|||||||
@@ -207,6 +207,7 @@ class ConversionResult(BaseModel):
|
|||||||
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
|
confidence: ConfidenceReport = Field(default_factory=ConfidenceReport)
|
||||||
|
|
||||||
document: DoclingDocument = _EMPTY_DOCLING_DOC
|
document: DoclingDocument = _EMPTY_DOCLING_DOC
|
||||||
|
metadata: Dict[str, Any] = {}
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@deprecated("Use document instead.")
|
@deprecated("Use document instead.")
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ class SimplePipeline(ConvertPipeline):
|
|||||||
# a DoclingDocument straight.
|
# a DoclingDocument straight.
|
||||||
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
|
with TimeRecorder(conv_res, "doc_build", scope=ProfilingScope.DOCUMENT):
|
||||||
conv_res.document = conv_res.input._backend.convert()
|
conv_res.document = conv_res.input._backend.convert()
|
||||||
|
conv_res.metadata = conv_res.input._backend.extract_metadata()
|
||||||
return conv_res
|
return conv_res
|
||||||
|
|
||||||
def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
|
def _determine_status(self, conv_res: ConversionResult) -> ConversionStatus:
|
||||||
|
|||||||
Reference in New Issue
Block a user