refactor: change the name of the USPTO input format

Change the name of the patent USPTO input format to show the typical format (XML).

Signed-off-by: Cesar Berrospi Ramis <75900930+ceberam@users.noreply.github.com>
This commit is contained in:
Cesar Berrospi Ramis 2024-12-13 15:06:47 +01:00 committed by Cesar Berrospi Ramis
parent 0a35f45092
commit c957901239
4 changed files with 8 additions and 8 deletions

View File

@ -110,7 +110,7 @@ class PatentUsptoDocumentBackend(DeclarativeDocumentBackend):
@classmethod
@override
def supported_formats(cls) -> set[InputFormat]:
return {InputFormat.PATENT_USPTO}
return {InputFormat.XML_USPTO}
@override
def convert(self) -> DoclingDocument:

View File

@ -36,7 +36,7 @@ class InputFormat(str, Enum):
ASCIIDOC = "asciidoc"
MD = "md"
XLSX = "xlsx"
PATENT_USPTO = "uspto"
XML_USPTO = "uspto"
class OutputFormat(str, Enum):
@ -56,7 +56,7 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
InputFormat.IMAGE: ["jpg", "jpeg", "png", "tif", "tiff", "bmp"],
InputFormat.ASCIIDOC: ["adoc", "asciidoc", "asc"],
InputFormat.XLSX: ["xlsx"],
InputFormat.PATENT_USPTO: ["xml", "txt"],
InputFormat.XML_USPTO: ["xml", "txt"],
}
FormatToMimeType: Dict[InputFormat, List[str]] = {
@ -83,7 +83,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
InputFormat.XLSX: [
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
],
InputFormat.PATENT_USPTO: ["application/xml", "text/plain"],
InputFormat.XML_USPTO: ["application/xml", "text/plain"],
}
MimeTypeToFormat = {

View File

@ -15,7 +15,7 @@ from docling.backend.md_backend import MarkdownDocumentBackend
from docling.backend.msexcel_backend import MsExcelDocumentBackend
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
from docling.backend.msword_backend import MsWordDocumentBackend
from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend
from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend
from docling.datamodel.base_models import (
ConversionStatus,
DoclingComponentType,
@ -118,7 +118,7 @@ def _get_default_option(format: InputFormat) -> FormatOption:
InputFormat.HTML: FormatOption(
pipeline_cls=SimplePipeline, backend=HTMLDocumentBackend
),
InputFormat.PATENT_USPTO: FormatOption(
InputFormat.XML_USPTO: FormatOption(
pipeline_cls=SimplePipeline, backend=PatentUsptoDocumentBackend
),
InputFormat.IMAGE: FormatOption(

View File

@ -12,7 +12,7 @@ import yaml
from docling_core.types import DoclingDocument
from docling_core.types.doc import DocItemLabel, TableData, TextItem
from docling.backend.patent_uspto_backend import PatentUsptoDocumentBackend, XmlTable
from docling.backend.xml_uspto_backend import PatentUsptoDocumentBackend, XmlTable
from docling.datamodel.base_models import InputFormat
from docling.datamodel.document import (
ConversionResult,
@ -45,7 +45,7 @@ def patents() -> list[tuple[Path, DoclingDocument]]:
for in_path in patent_paths:
in_doc = InputDocument(
path_or_stream=in_path,
format=InputFormat.PATENT_USPTO,
format=InputFormat.XML_USPTO,
backend=PatentUsptoDocumentBackend,
)
backend = PatentUsptoDocumentBackend(in_doc=in_doc, path_or_stream=in_path)