mirror of
https://github.com/DS4SD/docling.git
synced 2025-08-01 15:02:21 +00:00
refactor: change the name of the USPTO input format
Change the name of the patent USPTO input format to show the typical format (XML). Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
parent
0a35f45092
commit
c957901239
@ -110,7 +110,7 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
|
||||
@classmethod
|
||||
@override
|
||||
def supported_formats(cls) -> set[InputFormat]:
|
||||
return {InputFormat.PATENT_USPTO}
|
||||
return {InputFormat.XML_USPTO}
|
||||
|
||||
@override
|
||||
def convert(self) -> DoclingDocument:
|
@ -36,7 +36,7 @@ class InputFormat(str, Enum):
|
||||
ASCIIDOC = "asciidoc"
|
||||
MD = "md"
|
||||
XLSX = "xlsx"
|
||||
PATENT_USPTO = "uspto"
|
||||
XML_USPTO = "uspto"
|
||||
|
||||
|
||||
class OutputFormat(str, Enum):
|
||||
@ -56,7 +56,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
||||
InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"],
|
||||
InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
|
||||
InputFormat.XLSX: ["xlsx"],
|
||||
InputFormat.PATENT_USPTO: ["xml", "txt"],
|
||||
InputFormat.XML_USPTO: ["xml", "txt"],
|
||||
}
|
||||
|
||||
FormatToMimeType: Dict[InputFormat, List[str]] = {
|
||||
@ -83,7 +83,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
||||
InputFormat.XLSX: [
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
],
|
||||
InputFormat.PATENT_USPTO: ["application/xml", "text/plain"],
|
||||
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
|
||||
}
|
||||
|
||||
MimeTypeToFormat = {
|
||||
|
@ -15,7 +15,7 @@ from docling.backend.md_backend import MarkdownDocumentBackend
|
||||
from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
||||
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
||||
from docling.backend.msword_backend import MsWordDocumentBackend
|
||||
from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend
|
||||
from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend
|
||||
from docling.datamodel.base_models import (
|
||||
ConversionStatus,
|
||||
DoclingComponentType,
|
||||
@ -118,7 +118,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
||||
InputFormat.HTML: FormatOption(
|
||||
pipeline_cls=SimplePipeline, backend=HTMLDocumentBackend
|
||||
),
|
||||
InputFormat.PATENT_USPTO: FormatOption(
|
||||
InputFormat.XML_USPTO: FormatOption(
|
||||
pipeline_cls=SimplePipeline, backend=PatentUsptoDocumentBackend
|
||||
),
|
||||
InputFormat.IMAGE: FormatOption(
|
||||
|
@ -12,7 +12,7 @@ import yaml
|
||||
from docling_core.types import DoclingDocument
|
||||
from docling_core.types.doc import DocItemLabel, TableData, TextItem
|
||||
|
||||
from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend, XmlTable
|
||||
from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend, XmlTable
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.document import (
|
||||
ConversionResult,
|
||||
@ -45,7 +45,7 @@ def patents() -> list[tuple[Path, DoclingDocument]]:
|
||||
for in_path in patent_paths:
|
||||
in_doc = InputDocument(
|
||||
path_or_stream=in_path,
|
||||
format=InputFormat.PATENT_USPTO,
|
||||
format=InputFormat.XML_USPTO,
|
||||
backend=PatentUsptoDocumentBackend,
|
||||
)
|
||||
backend = PatentUsptoDocumentBackend(in_doc=in_doc, path_or_stream=in_path)
|
||||
|
Loading…
Reference in New Issue
Block a user