mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 20:58:11 +00:00
feat: add a backend parser for WebVTT files (#2288)
* feat: add a backend parser for WebVTT files Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com> * docs: update README with VTT support Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com> * docs: add description to supported formats Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com> * chore: upgrade docling-core to unescape WebVTT in markdown Pin the new release of docling-core 2.48.2. Do not escape HTML reserved characters when exporting WebVTT documents to markdown. Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com> * test: add missing copyright notice Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com> --------- Signed-off-by: Cesar Berrospi Ramis <ceb@zurich.ibm.com>
This commit is contained in:
committed by
GitHub
parent
b5628f1227
commit
46efaaefee
@@ -29,7 +29,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
|
|||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
|
* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, VTT, images (PNG, TIFF, JPEG, ...), and more
|
||||||
* 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
|
* 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
|
||||||
* 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
|
* 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
|
||||||
* ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
|
* ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
|
||||||
@@ -45,13 +45,13 @@ Docling simplifies document processing, parsing diverse formats — including ad
|
|||||||
* 📤 Structured [information extraction][extraction] \[🧪 beta\]
|
* 📤 Structured [information extraction][extraction] \[🧪 beta\]
|
||||||
* 📑 New layout model (**Heron**) by default, for faster PDF parsing
|
* 📑 New layout model (**Heron**) by default, for faster PDF parsing
|
||||||
* 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
|
* 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
|
||||||
|
* 💬 Parsing of Web Video Text Tracks (WebVTT) files
|
||||||
|
|
||||||
### Coming soon
|
### Coming soon
|
||||||
|
|
||||||
* 📝 Metadata extraction, including title, authors, references & language
|
* 📝 Metadata extraction, including title, authors, references & language
|
||||||
* 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
|
* 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
|
||||||
* 📝 Complex chemistry understanding (Molecular structures)
|
* 📝 Complex chemistry understanding (Molecular structures)
|
||||||
* 📝 Parsing of Web Video Text Tracks (WebVTT) files
|
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
572
docling/backend/webvtt_backend.py
Normal file
572
docling/backend/webvtt_backend.py
Normal file
@@ -0,0 +1,572 @@
|
|||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from io import BytesIO
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Annotated, ClassVar, Literal, Optional, Union, cast
|
||||||
|
|
||||||
|
from docling_core.types.doc import (
|
||||||
|
ContentLayer,
|
||||||
|
DocItemLabel,
|
||||||
|
DoclingDocument,
|
||||||
|
DocumentOrigin,
|
||||||
|
Formatting,
|
||||||
|
GroupLabel,
|
||||||
|
NodeItem,
|
||||||
|
)
|
||||||
|
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
||||||
|
from pydantic.types import StringConstraints
|
||||||
|
from typing_extensions import Self, override
|
||||||
|
|
||||||
|
from docling.backend.abstract_backend import DeclarativeDocumentBackend
|
||||||
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
from docling.datamodel.document import InputDocument
|
||||||
|
|
||||||
|
_log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTTimestamp(BaseModel):
|
||||||
|
"""Model representing a WebVTT timestamp.
|
||||||
|
|
||||||
|
A WebVTT timestamp is always interpreted relative to the current playback position
|
||||||
|
of the media data that the WebVTT file is to be synchronized with.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(regex_engine="python-re")
|
||||||
|
|
||||||
|
raw: Annotated[
|
||||||
|
str,
|
||||||
|
Field(
|
||||||
|
description="A representation of the WebVTT Timestamp as a single string"
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
_pattern: ClassVar[re.Pattern] = re.compile(
|
||||||
|
r"^(?:(\d{2,}):)?([0-5]\d):([0-5]\d)\.(\d{3})$"
|
||||||
|
)
|
||||||
|
_hours: int
|
||||||
|
_minutes: int
|
||||||
|
_seconds: int
|
||||||
|
_millis: int
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def validate_raw(self) -> Self:
|
||||||
|
m = self._pattern.match(self.raw)
|
||||||
|
if not m:
|
||||||
|
raise ValueError(f"Invalid WebVTT timestamp format: {self.raw}")
|
||||||
|
self._hours = int(m.group(1)) if m.group(1) else 0
|
||||||
|
self._minutes = int(m.group(2))
|
||||||
|
self._seconds = int(m.group(3))
|
||||||
|
self._millis = int(m.group(4))
|
||||||
|
|
||||||
|
if self._minutes < 0 or self._minutes > 59:
|
||||||
|
raise ValueError("Minutes must be between 0 and 59")
|
||||||
|
if self._seconds < 0 or self._seconds > 59:
|
||||||
|
raise ValueError("Seconds must be between 0 and 59")
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
@property
|
||||||
|
def seconds(self) -> float:
|
||||||
|
"""A representation of the WebVTT Timestamp in seconds"""
|
||||||
|
return (
|
||||||
|
self._hours * 3600
|
||||||
|
+ self._minutes * 60
|
||||||
|
+ self._seconds
|
||||||
|
+ self._millis / 1000.0
|
||||||
|
)
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self.raw
|
||||||
|
|
||||||
|
|
||||||
|
_WebVTTCueIdentifier = Annotated[
|
||||||
|
str, StringConstraints(strict=True, pattern=r"^(?!.*-->)[^\n\r]+$")
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueTimings(BaseModel):
|
||||||
|
"""Model representating WebVTT cue timings."""
|
||||||
|
|
||||||
|
start: Annotated[
|
||||||
|
_WebVTTTimestamp, Field(description="Start time offset of the cue")
|
||||||
|
]
|
||||||
|
end: Annotated[_WebVTTTimestamp, Field(description="End time offset of the cue")]
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def check_order(self) -> Self:
|
||||||
|
if self.start and self.end:
|
||||||
|
if self.end.seconds <= self.start.seconds:
|
||||||
|
raise ValueError("End timestamp must be greater than start timestamp")
|
||||||
|
return self
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.start} --> {self.end}"
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueTextSpan(BaseModel):
|
||||||
|
"""Model representing a WebVTT cue text span."""
|
||||||
|
|
||||||
|
text: str
|
||||||
|
span_type: Literal["text"] = "text"
|
||||||
|
|
||||||
|
@field_validator("text", mode="after")
|
||||||
|
@classmethod
|
||||||
|
def validate_text(cls, value: str) -> str:
|
||||||
|
if any(ch in value for ch in {"\n", "\r", "&", "<"}):
|
||||||
|
raise ValueError("Cue text span contains invalid characters")
|
||||||
|
if len(value) == 0:
|
||||||
|
raise ValueError("Cue text span cannot be empty")
|
||||||
|
return value
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self):
|
||||||
|
return self.text
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueVoiceSpan(BaseModel):
|
||||||
|
"""Model representing a WebVTT cue voice span."""
|
||||||
|
|
||||||
|
annotation: Annotated[
|
||||||
|
str,
|
||||||
|
Field(
|
||||||
|
description=(
|
||||||
|
"Cue span start tag annotation text representing the name of thevoice"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
]
|
||||||
|
classes: Annotated[
|
||||||
|
list[str],
|
||||||
|
Field(description="List of classes representing the cue span's significance"),
|
||||||
|
] = []
|
||||||
|
components: Annotated[
|
||||||
|
list["_WebVTTCueComponent"],
|
||||||
|
Field(description="The components representing the cue internal text"),
|
||||||
|
] = []
|
||||||
|
span_type: Literal["v"] = "v"
|
||||||
|
|
||||||
|
@field_validator("annotation", mode="after")
|
||||||
|
@classmethod
|
||||||
|
def validate_annotation(cls, value: str) -> str:
|
||||||
|
if any(ch in value for ch in {"\n", "\r", "&", ">"}):
|
||||||
|
raise ValueError(
|
||||||
|
"Cue span start tag annotation contains invalid characters"
|
||||||
|
)
|
||||||
|
if not value:
|
||||||
|
raise ValueError("Cue text span cannot be empty")
|
||||||
|
return value
|
||||||
|
|
||||||
|
@field_validator("classes", mode="after")
|
||||||
|
@classmethod
|
||||||
|
def validate_classes(cls, value: list[str]) -> list[str]:
|
||||||
|
for item in value:
|
||||||
|
if any(ch in item for ch in {"\t", "\n", "\r", " ", "&", "<", ">", "."}):
|
||||||
|
raise ValueError(
|
||||||
|
"A cue span start tag class contains invalid characters"
|
||||||
|
)
|
||||||
|
if not item:
|
||||||
|
raise ValueError("Cue span start tag classes cannot be empty")
|
||||||
|
return value
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self):
|
||||||
|
tag = f"v.{'.'.join(self.classes)}" if self.classes else "v"
|
||||||
|
inner = "".join(str(span) for span in self.components)
|
||||||
|
return f"<{tag} {self.annotation}>{inner}</v>"
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueClassSpan(BaseModel):
|
||||||
|
span_type: Literal["c"] = "c"
|
||||||
|
components: list["_WebVTTCueComponent"]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self):
|
||||||
|
inner = "".join(str(span) for span in self.components)
|
||||||
|
return f"<c>{inner}</c>"
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueItalicSpan(BaseModel):
|
||||||
|
span_type: Literal["i"] = "i"
|
||||||
|
components: list["_WebVTTCueComponent"]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self):
|
||||||
|
inner = "".join(str(span) for span in self.components)
|
||||||
|
return f"<i>{inner}</i>"
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueBoldSpan(BaseModel):
|
||||||
|
span_type: Literal["b"] = "b"
|
||||||
|
components: list["_WebVTTCueComponent"]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self):
|
||||||
|
inner = "".join(str(span) for span in self.components)
|
||||||
|
return f"<b>{inner}</b>"
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueUnderlineSpan(BaseModel):
|
||||||
|
span_type: Literal["u"] = "u"
|
||||||
|
components: list["_WebVTTCueComponent"]
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __str__(self):
|
||||||
|
inner = "".join(str(span) for span in self.components)
|
||||||
|
return f"<u>{inner}</u>"
|
||||||
|
|
||||||
|
|
||||||
|
_WebVTTCueComponent = Annotated[
|
||||||
|
Union[
|
||||||
|
_WebVTTCueTextSpan,
|
||||||
|
_WebVTTCueClassSpan,
|
||||||
|
_WebVTTCueItalicSpan,
|
||||||
|
_WebVTTCueBoldSpan,
|
||||||
|
_WebVTTCueUnderlineSpan,
|
||||||
|
_WebVTTCueVoiceSpan,
|
||||||
|
],
|
||||||
|
Field(discriminator="span_type", description="The WebVTT cue component"),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTCueBlock(BaseModel):
|
||||||
|
"""Model representing a WebVTT cue block.
|
||||||
|
|
||||||
|
The optional WebVTT cue settings list is not supported.
|
||||||
|
The cue payload is limited to the following spans: text, class, italic, bold,
|
||||||
|
underline, and voice.
|
||||||
|
"""
|
||||||
|
|
||||||
|
model_config = ConfigDict(regex_engine="python-re")
|
||||||
|
|
||||||
|
identifier: Optional[_WebVTTCueIdentifier] = Field(
|
||||||
|
None, description="The WebVTT cue identifier"
|
||||||
|
)
|
||||||
|
timings: Annotated[_WebVTTCueTimings, Field(description="The WebVTT cue timings")]
|
||||||
|
payload: Annotated[list[_WebVTTCueComponent], Field(description="The cue payload")]
|
||||||
|
|
||||||
|
_pattern_block: ClassVar[re.Pattern] = re.compile(
|
||||||
|
r"<(/?)(i|b|c|u|v(?:\.[^\t\n\r &<>.]+)*)(?:\s+([^>]*))?>"
|
||||||
|
)
|
||||||
|
_pattern_voice_tag: ClassVar[re.Pattern] = re.compile(
|
||||||
|
r"^<v(?P<class>\.[^\t\n\r &<>]+)?" # zero or more classes
|
||||||
|
r"[ \t]+(?P<annotation>[^\n\r&>]+)>" # required space and annotation
|
||||||
|
)
|
||||||
|
|
||||||
|
@field_validator("payload", mode="after")
|
||||||
|
@classmethod
|
||||||
|
def validate_payload(cls, payload):
|
||||||
|
for voice in payload:
|
||||||
|
if "-->" in str(voice):
|
||||||
|
raise ValueError("Cue payload must not contain '-->'")
|
||||||
|
return payload
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse(cls, raw: str) -> "_WebVTTCueBlock":
|
||||||
|
lines = raw.strip().splitlines()
|
||||||
|
if not lines:
|
||||||
|
raise ValueError("Cue block must have at least one line")
|
||||||
|
identifier: Optional[_WebVTTCueIdentifier] = None
|
||||||
|
timing_line = lines[0]
|
||||||
|
if "-->" not in timing_line and len(lines) > 1:
|
||||||
|
identifier = timing_line
|
||||||
|
timing_line = lines[1]
|
||||||
|
cue_lines = lines[2:]
|
||||||
|
else:
|
||||||
|
cue_lines = lines[1:]
|
||||||
|
|
||||||
|
if "-->" not in timing_line:
|
||||||
|
raise ValueError("Cue block must contain WebVTT cue timings")
|
||||||
|
|
||||||
|
start, end = [t.strip() for t in timing_line.split("-->")]
|
||||||
|
end = re.split(" |\t", end)[0] # ignore the cue settings list
|
||||||
|
timings: _WebVTTCueTimings = _WebVTTCueTimings(
|
||||||
|
start=_WebVTTTimestamp(raw=start), end=_WebVTTTimestamp(raw=end)
|
||||||
|
)
|
||||||
|
cue_text = " ".join(cue_lines).strip()
|
||||||
|
if cue_text.startswith("<v") and "</v>" not in cue_text:
|
||||||
|
# adding close tag for cue voice spans without end tag
|
||||||
|
cue_text += "</v>"
|
||||||
|
|
||||||
|
stack: list[list[_WebVTTCueComponent]] = [[]]
|
||||||
|
tag_stack: list[Union[str, tuple]] = []
|
||||||
|
|
||||||
|
pos = 0
|
||||||
|
matches = list(cls._pattern_block.finditer(cue_text))
|
||||||
|
i = 0
|
||||||
|
while i < len(matches):
|
||||||
|
match = matches[i]
|
||||||
|
if match.start() > pos:
|
||||||
|
stack[-1].append(_WebVTTCueTextSpan(text=cue_text[pos : match.start()]))
|
||||||
|
tag = match.group(0)
|
||||||
|
|
||||||
|
if tag.startswith(("<i>", "<b>", "<u>", "<c>")):
|
||||||
|
tag_type = tag[1:2]
|
||||||
|
tag_stack.append(tag_type)
|
||||||
|
stack.append([])
|
||||||
|
elif tag == "</i>":
|
||||||
|
children = stack.pop()
|
||||||
|
stack[-1].append(_WebVTTCueItalicSpan(components=children))
|
||||||
|
tag_stack.pop()
|
||||||
|
elif tag == "</b>":
|
||||||
|
children = stack.pop()
|
||||||
|
stack[-1].append(_WebVTTCueBoldSpan(components=children))
|
||||||
|
tag_stack.pop()
|
||||||
|
elif tag == "</u>":
|
||||||
|
children = stack.pop()
|
||||||
|
stack[-1].append(_WebVTTCueUnderlineSpan(components=children))
|
||||||
|
tag_stack.pop()
|
||||||
|
elif tag == "</c>":
|
||||||
|
children = stack.pop()
|
||||||
|
stack[-1].append(_WebVTTCueClassSpan(components=children))
|
||||||
|
tag_stack.pop()
|
||||||
|
elif tag.startswith("<v"):
|
||||||
|
tag_stack.append(("v", tag))
|
||||||
|
stack.append([])
|
||||||
|
elif tag.startswith("</v"):
|
||||||
|
children = stack.pop() if stack else []
|
||||||
|
if (
|
||||||
|
tag_stack
|
||||||
|
and isinstance(tag_stack[-1], tuple)
|
||||||
|
and tag_stack[-1][0] == "v"
|
||||||
|
):
|
||||||
|
_, voice = cast(tuple, tag_stack.pop())
|
||||||
|
voice_match = cls._pattern_voice_tag.match(voice)
|
||||||
|
if voice_match:
|
||||||
|
class_string = voice_match.group("class")
|
||||||
|
annotation = voice_match.group("annotation")
|
||||||
|
if annotation:
|
||||||
|
classes: list[str] = []
|
||||||
|
if class_string:
|
||||||
|
classes = [c for c in class_string.split(".") if c]
|
||||||
|
stack[-1].append(
|
||||||
|
_WebVTTCueVoiceSpan(
|
||||||
|
annotation=annotation.strip(),
|
||||||
|
classes=classes,
|
||||||
|
components=children,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
pos = match.end()
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if pos < len(cue_text):
|
||||||
|
stack[-1].append(_WebVTTCueTextSpan(text=cue_text[pos:]))
|
||||||
|
|
||||||
|
return cls(
|
||||||
|
identifier=identifier,
|
||||||
|
timings=timings,
|
||||||
|
payload=stack[0],
|
||||||
|
)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
parts = []
|
||||||
|
if self.identifier:
|
||||||
|
parts.append(f"{self.identifier}\n")
|
||||||
|
timings_line = str(self.timings)
|
||||||
|
parts.append(timings_line + "\n")
|
||||||
|
for idx, span in enumerate(self.payload):
|
||||||
|
if idx == 0 and len(self.payload) == 1 and span.span_type == "v":
|
||||||
|
# the end tag may be omitted for brevity
|
||||||
|
parts.append(str(span).removesuffix("</v>"))
|
||||||
|
else:
|
||||||
|
parts.append(str(span))
|
||||||
|
|
||||||
|
return "".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
class _WebVTTFile(BaseModel):
|
||||||
|
"""A model representing a WebVTT file."""
|
||||||
|
|
||||||
|
cue_blocks: list[_WebVTTCueBlock]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def verify_signature(content: str) -> bool:
|
||||||
|
if not content:
|
||||||
|
return False
|
||||||
|
elif len(content) == 6:
|
||||||
|
return content == "WEBVTT"
|
||||||
|
elif len(content) > 6 and content.startswith("WEBVTT"):
|
||||||
|
return content[6] in (" ", "\t", "\n")
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse(cls, raw: str) -> "_WebVTTFile":
|
||||||
|
# Normalize newlines to LF
|
||||||
|
raw = raw.replace("\r\n", "\n").replace("\r", "\n")
|
||||||
|
|
||||||
|
# Check WebVTT signature
|
||||||
|
if not cls.verify_signature(raw):
|
||||||
|
raise ValueError("Invalid WebVTT file signature")
|
||||||
|
|
||||||
|
# Strip "WEBVTT" header line
|
||||||
|
lines = raw.split("\n", 1)
|
||||||
|
body = lines[1] if len(lines) > 1 else ""
|
||||||
|
|
||||||
|
# Remove NOTE/STYLE/REGION blocks
|
||||||
|
body = re.sub(r"^(NOTE[^\n]*\n(?:.+\n)*?)\n", "", body, flags=re.MULTILINE)
|
||||||
|
body = re.sub(r"^(STYLE|REGION)(?:.+\n)*?\n", "", body, flags=re.MULTILINE)
|
||||||
|
|
||||||
|
# Split into cue blocks
|
||||||
|
raw_blocks = re.split(r"\n\s*\n", body.strip())
|
||||||
|
cues: list[_WebVTTCueBlock] = []
|
||||||
|
for block in raw_blocks:
|
||||||
|
try:
|
||||||
|
cues.append(_WebVTTCueBlock.parse(block))
|
||||||
|
except ValueError as e:
|
||||||
|
_log.warning(f"Failed to parse cue block:\n{block}\n{e}")
|
||||||
|
|
||||||
|
return cls(cue_blocks=cues)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.cue_blocks)
|
||||||
|
|
||||||
|
def __getitem__(self, idx):
|
||||||
|
return self.cue_blocks[idx]
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.cue_blocks)
|
||||||
|
|
||||||
|
|
||||||
|
class WebVTTDocumentBackend(DeclarativeDocumentBackend):
|
||||||
|
"""Declarative backend for WebVTT (.vtt) files.
|
||||||
|
|
||||||
|
This parser reads the content of a WebVTT file and converts
|
||||||
|
it to a DoclingDocument, following the W3C specs on https://www.w3.org/TR/webvtt1
|
||||||
|
|
||||||
|
Each cue becomes a TextItem and the items are appended to the
|
||||||
|
document body by the cue's start time.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@override
|
||||||
|
def __init__(self, in_doc: InputDocument, path_or_stream: Union[BytesIO, Path]):
|
||||||
|
super().__init__(in_doc, path_or_stream)
|
||||||
|
|
||||||
|
self.content: str = ""
|
||||||
|
try:
|
||||||
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
|
self.content = self.path_or_stream.getvalue().decode("utf-8")
|
||||||
|
if isinstance(self.path_or_stream, Path):
|
||||||
|
with open(self.path_or_stream, encoding="utf-8") as f:
|
||||||
|
self.content = f.read()
|
||||||
|
except Exception as e:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Could not initialize the WebVTT backend for file with hash "
|
||||||
|
f"{self.document_hash}."
|
||||||
|
) from e
|
||||||
|
|
||||||
|
@override
|
||||||
|
def is_valid(self) -> bool:
|
||||||
|
return _WebVTTFile.verify_signature(self.content)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@override
|
||||||
|
def supports_pagination(cls) -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@override
|
||||||
|
def unload(self):
|
||||||
|
if isinstance(self.path_or_stream, BytesIO):
|
||||||
|
self.path_or_stream.close()
|
||||||
|
self.path_or_stream = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
@override
|
||||||
|
def supported_formats(cls) -> set[InputFormat]:
|
||||||
|
return {InputFormat.VTT}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _add_text_from_component(
|
||||||
|
doc: DoclingDocument, item: _WebVTTCueComponent, parent: Optional[NodeItem]
|
||||||
|
) -> None:
|
||||||
|
"""Adds a TextItem to a document by extracting text from a cue span component.
|
||||||
|
|
||||||
|
TODO: address nesting
|
||||||
|
"""
|
||||||
|
formatting = Formatting()
|
||||||
|
text = ""
|
||||||
|
if isinstance(item, _WebVTTCueItalicSpan):
|
||||||
|
formatting.italic = True
|
||||||
|
elif isinstance(item, _WebVTTCueBoldSpan):
|
||||||
|
formatting.bold = True
|
||||||
|
elif isinstance(item, _WebVTTCueUnderlineSpan):
|
||||||
|
formatting.underline = True
|
||||||
|
if isinstance(item, _WebVTTCueTextSpan):
|
||||||
|
text = item.text
|
||||||
|
else:
|
||||||
|
# TODO: address nesting
|
||||||
|
text = "".join(
|
||||||
|
[t.text for t in item.components if isinstance(t, _WebVTTCueTextSpan)]
|
||||||
|
)
|
||||||
|
if text := text.strip():
|
||||||
|
doc.add_text(
|
||||||
|
label=DocItemLabel.TEXT,
|
||||||
|
text=text,
|
||||||
|
parent=parent,
|
||||||
|
content_layer=ContentLayer.BODY,
|
||||||
|
formatting=formatting,
|
||||||
|
)
|
||||||
|
|
||||||
|
@override
|
||||||
|
def convert(self) -> DoclingDocument:
|
||||||
|
_log.debug("Starting WebVTT conversion...")
|
||||||
|
if not self.is_valid():
|
||||||
|
raise RuntimeError("Invalid WebVTT document.")
|
||||||
|
|
||||||
|
origin = DocumentOrigin(
|
||||||
|
filename=self.file.name or "file",
|
||||||
|
mimetype="text/vtt",
|
||||||
|
binary_hash=self.document_hash,
|
||||||
|
)
|
||||||
|
doc = DoclingDocument(name=self.file.stem or "file", origin=origin)
|
||||||
|
|
||||||
|
vtt: _WebVTTFile = _WebVTTFile.parse(self.content)
|
||||||
|
for block in vtt.cue_blocks:
|
||||||
|
block_group = doc.add_group(
|
||||||
|
label=GroupLabel.SECTION,
|
||||||
|
name="WebVTT cue block",
|
||||||
|
parent=None,
|
||||||
|
content_layer=ContentLayer.BODY,
|
||||||
|
)
|
||||||
|
if block.identifier:
|
||||||
|
doc.add_text(
|
||||||
|
label=DocItemLabel.TEXT,
|
||||||
|
text=str(block.identifier),
|
||||||
|
parent=block_group,
|
||||||
|
content_layer=ContentLayer.BODY,
|
||||||
|
)
|
||||||
|
doc.add_text(
|
||||||
|
label=DocItemLabel.TEXT,
|
||||||
|
text=str(block.timings),
|
||||||
|
parent=block_group,
|
||||||
|
content_layer=ContentLayer.BODY,
|
||||||
|
)
|
||||||
|
for cue_span in block.payload:
|
||||||
|
if isinstance(cue_span, _WebVTTCueVoiceSpan):
|
||||||
|
voice_group = doc.add_group(
|
||||||
|
label=GroupLabel.INLINE,
|
||||||
|
name="WebVTT cue voice span",
|
||||||
|
parent=block_group,
|
||||||
|
content_layer=ContentLayer.BODY,
|
||||||
|
)
|
||||||
|
voice = cue_span.annotation
|
||||||
|
if classes := cue_span.classes:
|
||||||
|
voice += f" ({', '.join(classes)})"
|
||||||
|
voice += ": "
|
||||||
|
doc.add_text(
|
||||||
|
label=DocItemLabel.TEXT,
|
||||||
|
text=voice,
|
||||||
|
parent=voice_group,
|
||||||
|
content_layer=ContentLayer.BODY,
|
||||||
|
)
|
||||||
|
for item in cue_span.components:
|
||||||
|
WebVTTDocumentBackend._add_text_from_component(
|
||||||
|
doc, item, voice_group
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
WebVTTDocumentBackend._add_text_from_component(
|
||||||
|
doc, cue_span, block_group
|
||||||
|
)
|
||||||
|
|
||||||
|
return doc
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
import math
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from typing import TYPE_CHECKING, Dict, List, Optional, Type, Union
|
from typing import TYPE_CHECKING, Optional, Type, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from docling_core.types.doc import (
|
from docling_core.types.doc import (
|
||||||
@@ -14,9 +13,7 @@ from docling_core.types.doc import (
|
|||||||
)
|
)
|
||||||
from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
|
from docling_core.types.doc.base import PydanticSerCtxKey, round_pydantic_float
|
||||||
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
|
from docling_core.types.doc.page import SegmentedPdfPage, TextCell
|
||||||
from docling_core.types.io import (
|
from docling_core.types.io import DocumentStream
|
||||||
DocumentStream,
|
|
||||||
)
|
|
||||||
|
|
||||||
# DO NOT REMOVE; explicitly exposed from this location
|
# DO NOT REMOVE; explicitly exposed from this location
|
||||||
from PIL.Image import Image
|
from PIL.Image import Image
|
||||||
@@ -71,6 +68,7 @@ class InputFormat(str, Enum):
|
|||||||
METS_GBS = "mets_gbs"
|
METS_GBS = "mets_gbs"
|
||||||
JSON_DOCLING = "json_docling"
|
JSON_DOCLING = "json_docling"
|
||||||
AUDIO = "audio"
|
AUDIO = "audio"
|
||||||
|
VTT = "vtt"
|
||||||
|
|
||||||
|
|
||||||
class OutputFormat(str, Enum):
|
class OutputFormat(str, Enum):
|
||||||
@@ -82,7 +80,7 @@ class OutputFormat(str, Enum):
|
|||||||
DOCTAGS = "doctags"
|
DOCTAGS = "doctags"
|
||||||
|
|
||||||
|
|
||||||
FormatToExtensions: Dict[InputFormat, List[str]] = {
|
FormatToExtensions: dict[InputFormat, list[str]] = {
|
||||||
InputFormat.DOCX: ["docx", "dotx", "docm", "dotm"],
|
InputFormat.DOCX: ["docx", "dotx", "docm", "dotm"],
|
||||||
InputFormat.PPTX: ["pptx", "potx", "ppsx", "pptm", "potm", "ppsm"],
|
InputFormat.PPTX: ["pptx", "potx", "ppsx", "pptm", "potm", "ppsm"],
|
||||||
InputFormat.PDF: ["pdf"],
|
InputFormat.PDF: ["pdf"],
|
||||||
@@ -97,9 +95,10 @@ FormatToExtensions: Dict[InputFormat, List[str]] = {
|
|||||||
InputFormat.METS_GBS: ["tar.gz"],
|
InputFormat.METS_GBS: ["tar.gz"],
|
||||||
InputFormat.JSON_DOCLING: ["json"],
|
InputFormat.JSON_DOCLING: ["json"],
|
||||||
InputFormat.AUDIO: ["wav", "mp3"],
|
InputFormat.AUDIO: ["wav", "mp3"],
|
||||||
|
InputFormat.VTT: ["vtt"],
|
||||||
}
|
}
|
||||||
|
|
||||||
FormatToMimeType: Dict[InputFormat, List[str]] = {
|
FormatToMimeType: dict[InputFormat, list[str]] = {
|
||||||
InputFormat.DOCX: [
|
InputFormat.DOCX: [
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.template",
|
||||||
@@ -130,6 +129,7 @@ FormatToMimeType: Dict[InputFormat, List[str]] = {
|
|||||||
InputFormat.METS_GBS: ["application/mets+xml"],
|
InputFormat.METS_GBS: ["application/mets+xml"],
|
||||||
InputFormat.JSON_DOCLING: ["application/json"],
|
InputFormat.JSON_DOCLING: ["application/json"],
|
||||||
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
|
InputFormat.AUDIO: ["audio/x-wav", "audio/mpeg", "audio/wav", "audio/mp3"],
|
||||||
|
InputFormat.VTT: ["text/vtt"],
|
||||||
}
|
}
|
||||||
|
|
||||||
MimeTypeToFormat: dict[str, list[InputFormat]] = {
|
MimeTypeToFormat: dict[str, list[InputFormat]] = {
|
||||||
@@ -162,8 +162,8 @@ class Cluster(BaseModel):
|
|||||||
label: DocItemLabel
|
label: DocItemLabel
|
||||||
bbox: BoundingBox
|
bbox: BoundingBox
|
||||||
confidence: float = 1.0
|
confidence: float = 1.0
|
||||||
cells: List[TextCell] = []
|
cells: list[TextCell] = []
|
||||||
children: List["Cluster"] = [] # Add child cluster support
|
children: list["Cluster"] = [] # Add child cluster support
|
||||||
|
|
||||||
@field_serializer("confidence")
|
@field_serializer("confidence")
|
||||||
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
def _serialize(self, value: float, info: FieldSerializationInfo) -> float:
|
||||||
@@ -179,7 +179,7 @@ class BasePageElement(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class LayoutPrediction(BaseModel):
|
class LayoutPrediction(BaseModel):
|
||||||
clusters: List[Cluster] = []
|
clusters: list[Cluster] = []
|
||||||
|
|
||||||
|
|
||||||
class VlmPredictionToken(BaseModel):
|
class VlmPredictionToken(BaseModel):
|
||||||
@@ -201,14 +201,14 @@ class ContainerElement(
|
|||||||
|
|
||||||
|
|
||||||
class Table(BasePageElement):
|
class Table(BasePageElement):
|
||||||
otsl_seq: List[str]
|
otsl_seq: list[str]
|
||||||
num_rows: int = 0
|
num_rows: int = 0
|
||||||
num_cols: int = 0
|
num_cols: int = 0
|
||||||
table_cells: List[TableCell]
|
table_cells: list[TableCell]
|
||||||
|
|
||||||
|
|
||||||
class TableStructurePrediction(BaseModel):
|
class TableStructurePrediction(BaseModel):
|
||||||
table_map: Dict[int, Table] = {}
|
table_map: dict[int, Table] = {}
|
||||||
|
|
||||||
|
|
||||||
class TextElement(BasePageElement):
|
class TextElement(BasePageElement):
|
||||||
@@ -216,7 +216,7 @@ class TextElement(BasePageElement):
|
|||||||
|
|
||||||
|
|
||||||
class FigureElement(BasePageElement):
|
class FigureElement(BasePageElement):
|
||||||
annotations: List[PictureDataType] = []
|
annotations: list[PictureDataType] = []
|
||||||
provenance: Optional[str] = None
|
provenance: Optional[str] = None
|
||||||
predicted_class: Optional[str] = None
|
predicted_class: Optional[str] = None
|
||||||
confidence: Optional[float] = None
|
confidence: Optional[float] = None
|
||||||
@@ -234,12 +234,12 @@ class FigureElement(BasePageElement):
|
|||||||
|
|
||||||
class FigureClassificationPrediction(BaseModel):
|
class FigureClassificationPrediction(BaseModel):
|
||||||
figure_count: int = 0
|
figure_count: int = 0
|
||||||
figure_map: Dict[int, FigureElement] = {}
|
figure_map: dict[int, FigureElement] = {}
|
||||||
|
|
||||||
|
|
||||||
class EquationPrediction(BaseModel):
|
class EquationPrediction(BaseModel):
|
||||||
equation_count: int = 0
|
equation_count: int = 0
|
||||||
equation_map: Dict[int, TextElement] = {}
|
equation_map: dict[int, TextElement] = {}
|
||||||
|
|
||||||
|
|
||||||
class PagePredictions(BaseModel):
|
class PagePredictions(BaseModel):
|
||||||
@@ -254,9 +254,9 @@ PageElement = Union[TextElement, Table, FigureElement, ContainerElement]
|
|||||||
|
|
||||||
|
|
||||||
class AssembledUnit(BaseModel):
|
class AssembledUnit(BaseModel):
|
||||||
elements: List[PageElement] = []
|
elements: list[PageElement] = []
|
||||||
body: List[PageElement] = []
|
body: list[PageElement] = []
|
||||||
headers: List[PageElement] = []
|
headers: list[PageElement] = []
|
||||||
|
|
||||||
|
|
||||||
class ItemAndImageEnrichmentElement(BaseModel):
|
class ItemAndImageEnrichmentElement(BaseModel):
|
||||||
@@ -280,12 +280,12 @@ class Page(BaseModel):
|
|||||||
None # Internal PDF backend. By default it is cleared during assembling.
|
None # Internal PDF backend. By default it is cleared during assembling.
|
||||||
)
|
)
|
||||||
_default_image_scale: float = 1.0 # Default image scale for external usage.
|
_default_image_scale: float = 1.0 # Default image scale for external usage.
|
||||||
_image_cache: Dict[
|
_image_cache: dict[
|
||||||
float, Image
|
float, Image
|
||||||
] = {} # Cache of images in different scales. By default it is cleared during assembling.
|
] = {} # Cache of images in different scales. By default it is cleared during assembling.
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def cells(self) -> List[TextCell]:
|
def cells(self) -> list[TextCell]:
|
||||||
"""Return text cells as a read-only view of parsed_page.textline_cells."""
|
"""Return text cells as a read-only view of parsed_page.textline_cells."""
|
||||||
if self.parsed_page is not None:
|
if self.parsed_page is not None:
|
||||||
return self.parsed_page.textline_cells
|
return self.parsed_page.textline_cells
|
||||||
@@ -354,7 +354,7 @@ class OpenAiApiResponse(BaseModel):
|
|||||||
|
|
||||||
id: str
|
id: str
|
||||||
model: Optional[str] = None # returned by openai
|
model: Optional[str] = None # returned by openai
|
||||||
choices: List[OpenAiResponseChoice]
|
choices: list[OpenAiResponseChoice]
|
||||||
created: int
|
created: int
|
||||||
usage: OpenAiResponseUsage
|
usage: OpenAiResponseUsage
|
||||||
|
|
||||||
@@ -430,7 +430,7 @@ class PageConfidenceScores(BaseModel):
|
|||||||
|
|
||||||
|
|
||||||
class ConfidenceReport(PageConfidenceScores):
|
class ConfidenceReport(PageConfidenceScores):
|
||||||
pages: Dict[int, PageConfidenceScores] = Field(
|
pages: dict[int, PageConfidenceScores] = Field(
|
||||||
default_factory=lambda: defaultdict(PageConfidenceScores)
|
default_factory=lambda: defaultdict(PageConfidenceScores)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -394,6 +394,8 @@ class _DocumentConversionInput(BaseModel):
|
|||||||
mime = FormatToMimeType[InputFormat.PPTX][0]
|
mime = FormatToMimeType[InputFormat.PPTX][0]
|
||||||
elif ext in FormatToExtensions[InputFormat.XLSX]:
|
elif ext in FormatToExtensions[InputFormat.XLSX]:
|
||||||
mime = FormatToMimeType[InputFormat.XLSX][0]
|
mime = FormatToMimeType[InputFormat.XLSX][0]
|
||||||
|
elif ext in FormatToExtensions[InputFormat.VTT]:
|
||||||
|
mime = FormatToMimeType[InputFormat.VTT][0]
|
||||||
|
|
||||||
return mime
|
return mime
|
||||||
|
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ from docling.backend.msexcel_backend import MsExcelDocumentBackend
|
|||||||
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
from docling.backend.mspowerpoint_backend import MsPowerpointDocumentBackend
|
||||||
from docling.backend.msword_backend import MsWordDocumentBackend
|
from docling.backend.msword_backend import MsWordDocumentBackend
|
||||||
from docling.backend.noop_backend import NoOpBackend
|
from docling.backend.noop_backend import NoOpBackend
|
||||||
|
from docling.backend.webvtt_backend import WebVTTDocumentBackend
|
||||||
from docling.backend.xml.jats_backend import JatsDocumentBackend
|
from docling.backend.xml.jats_backend import JatsDocumentBackend
|
||||||
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
|
from docling.backend.xml.uspto_backend import PatentUsptoDocumentBackend
|
||||||
from docling.datamodel.base_models import (
|
from docling.datamodel.base_models import (
|
||||||
@@ -170,6 +171,9 @@ def _get_default_option(format: InputFormat) -> FormatOption:
|
|||||||
pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
|
pipeline_cls=SimplePipeline, backend=DoclingJSONBackend
|
||||||
),
|
),
|
||||||
InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
|
InputFormat.AUDIO: FormatOption(pipeline_cls=AsrPipeline, backend=NoOpBackend),
|
||||||
|
InputFormat.VTT: FormatOption(
|
||||||
|
pipeline_cls=SimplePipeline, backend=WebVTTDocumentBackend
|
||||||
|
),
|
||||||
}
|
}
|
||||||
if (options := format_to_default_options.get(format)) is not None:
|
if (options := format_to_default_options.get(format)) is not None:
|
||||||
return options
|
return options
|
||||||
|
|||||||
4
docs/index.md
vendored
4
docs/index.md
vendored
@@ -21,7 +21,7 @@ Docling simplifies document processing, parsing diverse formats — including ad
|
|||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, images (PNG, TIFF, JPEG, ...), and more
|
* 🗂️ Parsing of [multiple document formats][supported_formats] incl. PDF, DOCX, PPTX, XLSX, HTML, WAV, MP3, VTT, images (PNG, TIFF, JPEG, ...), and more
|
||||||
* 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
|
* 📑 Advanced PDF understanding incl. page layout, reading order, table structure, code, formulas, image classification, and more
|
||||||
* 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
|
* 🧬 Unified, expressive [DoclingDocument][docling_document] representation format
|
||||||
* ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
|
* ↪️ Various [export formats][supported_formats] and options, including Markdown, HTML, [DocTags](https://arxiv.org/abs/2503.11576) and lossless JSON
|
||||||
@@ -37,13 +37,13 @@ Docling simplifies document processing, parsing diverse formats — including ad
|
|||||||
* 📤 Structured [information extraction][extraction] \[🧪 beta\]
|
* 📤 Structured [information extraction][extraction] \[🧪 beta\]
|
||||||
* 📑 New layout model (**Heron**) by default, for faster PDF parsing
|
* 📑 New layout model (**Heron**) by default, for faster PDF parsing
|
||||||
* 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
|
* 🔌 [MCP server](https://docling-project.github.io/docling/usage/mcp/) for agentic applications
|
||||||
|
* 💬 Parsing of Web Video Text Tracks (WebVTT) files
|
||||||
|
|
||||||
### Coming soon
|
### Coming soon
|
||||||
|
|
||||||
* 📝 Metadata extraction, including title, authors, references & language
|
* 📝 Metadata extraction, including title, authors, references & language
|
||||||
* 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
|
* 📝 Chart understanding (Barchart, Piechart, LinePlot, etc)
|
||||||
* 📝 Complex chemistry understanding (Molecular structures)
|
* 📝 Complex chemistry understanding (Molecular structures)
|
||||||
* 📝 Parsing of Web Video Text Tracks (WebVTT) files
|
|
||||||
|
|
||||||
## Get started
|
## Get started
|
||||||
|
|
||||||
|
|||||||
5
docs/usage/supported_formats.md
vendored
5
docs/usage/supported_formats.md
vendored
@@ -11,10 +11,11 @@ Below you can find a listing of all supported input and output formats.
|
|||||||
| PDF | |
|
| PDF | |
|
||||||
| DOCX, XLSX, PPTX | Default formats in MS Office 2007+, based on Office Open XML |
|
| DOCX, XLSX, PPTX | Default formats in MS Office 2007+, based on Office Open XML |
|
||||||
| Markdown | |
|
| Markdown | |
|
||||||
| AsciiDoc | |
|
| AsciiDoc | Human-readable, plain-text markup language for structured technical content |
|
||||||
| HTML, XHTML | |
|
| HTML, XHTML | |
|
||||||
| CSV | |
|
| CSV | |
|
||||||
| PNG, JPEG, TIFF, BMP, WEBP | Image formats |
|
| PNG, JPEG, TIFF, BMP, WEBP | Image formats |
|
||||||
|
| WebVTT | Web Video Text Tracks format for displaying timed text |
|
||||||
|
|
||||||
Schema-specific support:
|
Schema-specific support:
|
||||||
|
|
||||||
@@ -32,4 +33,4 @@ Schema-specific support:
|
|||||||
| Markdown | |
|
| Markdown | |
|
||||||
| JSON | Lossless serialization of Docling Document |
|
| JSON | Lossless serialization of Docling Document |
|
||||||
| Text | Plain text, i.e. without Markdown markers |
|
| Text | Plain text, i.e. without Markdown markers |
|
||||||
| Doctags | |
|
| [Doctags](https://arxiv.org/pdf/2503.11576) | Markup format for efficiently representing the full content and layout characteristics of a document |
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ authors = [
|
|||||||
requires-python = '>=3.9,<4.0'
|
requires-python = '>=3.9,<4.0'
|
||||||
dependencies = [
|
dependencies = [
|
||||||
'pydantic (>=2.0.0,<3.0.0)',
|
'pydantic (>=2.0.0,<3.0.0)',
|
||||||
'docling-core[chunking] (>=2.48.0,<3.0.0)',
|
'docling-core[chunking] (>=2.48.2,<3.0.0)',
|
||||||
'docling-parse (>=4.4.0,<5.0.0)',
|
'docling-parse (>=4.4.0,<5.0.0)',
|
||||||
"docling-ibm-models>=3.9.1,<4",
|
"docling-ibm-models>=3.9.1,<4",
|
||||||
'filetype (>=1.2.0,<2.0.0)',
|
'filetype (>=1.2.0,<2.0.0)',
|
||||||
|
|||||||
66
tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.itxt
vendored
Normal file
66
tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.itxt
vendored
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
item-0 at level 0: unspecified: group _root_
|
||||||
|
item-1 at level 1: section: group WebVTT cue block
|
||||||
|
item-2 at level 2: text: 00:11.000 --> 00:13.000
|
||||||
|
item-3 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-4 at level 3: text: Roger Bingham:
|
||||||
|
item-5 at level 3: text: We are in New York City
|
||||||
|
item-6 at level 1: section: group WebVTT cue block
|
||||||
|
item-7 at level 2: text: 00:13.000 --> 00:16.000
|
||||||
|
item-8 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-9 at level 3: text: Roger Bingham:
|
||||||
|
item-10 at level 3: text: We’re actually at the Lucern Hotel, just down the street
|
||||||
|
item-11 at level 1: section: group WebVTT cue block
|
||||||
|
item-12 at level 2: text: 00:16.000 --> 00:18.000
|
||||||
|
item-13 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-14 at level 3: text: Roger Bingham:
|
||||||
|
item-15 at level 3: text: from the American Museum of Natural History
|
||||||
|
item-16 at level 1: section: group WebVTT cue block
|
||||||
|
item-17 at level 2: text: 00:18.000 --> 00:20.000
|
||||||
|
item-18 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-19 at level 3: text: Roger Bingham:
|
||||||
|
item-20 at level 3: text: And with me is Neil deGrasse Tyson
|
||||||
|
item-21 at level 1: section: group WebVTT cue block
|
||||||
|
item-22 at level 2: text: 00:20.000 --> 00:22.000
|
||||||
|
item-23 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-24 at level 3: text: Roger Bingham:
|
||||||
|
item-25 at level 3: text: Astrophysicist, Director of the Hayden Planetarium
|
||||||
|
item-26 at level 1: section: group WebVTT cue block
|
||||||
|
item-27 at level 2: text: 00:22.000 --> 00:24.000
|
||||||
|
item-28 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-29 at level 3: text: Roger Bingham:
|
||||||
|
item-30 at level 3: text: at the AMNH.
|
||||||
|
item-31 at level 1: section: group WebVTT cue block
|
||||||
|
item-32 at level 2: text: 00:24.000 --> 00:26.000
|
||||||
|
item-33 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-34 at level 3: text: Roger Bingham:
|
||||||
|
item-35 at level 3: text: Thank you for walking down here.
|
||||||
|
item-36 at level 1: section: group WebVTT cue block
|
||||||
|
item-37 at level 2: text: 00:27.000 --> 00:30.000
|
||||||
|
item-38 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-39 at level 3: text: Roger Bingham:
|
||||||
|
item-40 at level 3: text: And I want to do a follow-up on the last conversation we did.
|
||||||
|
item-41 at level 1: section: group WebVTT cue block
|
||||||
|
item-42 at level 2: text: 00:30.000 --> 00:31.500
|
||||||
|
item-43 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-44 at level 3: text: Roger Bingham:
|
||||||
|
item-45 at level 3: text: When we e-mailed—
|
||||||
|
item-46 at level 1: section: group WebVTT cue block
|
||||||
|
item-47 at level 2: text: 00:30.500 --> 00:32.500
|
||||||
|
item-48 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-49 at level 3: text: Neil deGrasse Tyson:
|
||||||
|
item-50 at level 3: text: Didn’t we talk about enough in that conversation?
|
||||||
|
item-51 at level 1: section: group WebVTT cue block
|
||||||
|
item-52 at level 2: text: 00:32.000 --> 00:35.500
|
||||||
|
item-53 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-54 at level 3: text: Roger Bingham:
|
||||||
|
item-55 at level 3: text: No! No no no no; 'cos 'cos obviously 'cos
|
||||||
|
item-56 at level 1: section: group WebVTT cue block
|
||||||
|
item-57 at level 2: text: 00:32.500 --> 00:33.500
|
||||||
|
item-58 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-59 at level 3: text: Neil deGrasse Tyson:
|
||||||
|
item-60 at level 3: text: Laughs
|
||||||
|
item-61 at level 1: section: group WebVTT cue block
|
||||||
|
item-62 at level 2: text: 00:35.500 --> 00:38.000
|
||||||
|
item-63 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-64 at level 3: text: Roger Bingham:
|
||||||
|
item-65 at level 3: text: You know I’m so excited my glasses are falling off here.
|
||||||
1074
tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.json
vendored
Normal file
1074
tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
51
tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.md
vendored
Normal file
51
tests/data/groundtruth/docling_v2/webvtt_example_01.vtt.md
vendored
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
00:11.000 --> 00:13.000
|
||||||
|
|
||||||
|
Roger Bingham: We are in New York City
|
||||||
|
|
||||||
|
00:13.000 --> 00:16.000
|
||||||
|
|
||||||
|
Roger Bingham: We’re actually at the Lucern Hotel, just down the street
|
||||||
|
|
||||||
|
00:16.000 --> 00:18.000
|
||||||
|
|
||||||
|
Roger Bingham: from the American Museum of Natural History
|
||||||
|
|
||||||
|
00:18.000 --> 00:20.000
|
||||||
|
|
||||||
|
Roger Bingham: And with me is Neil deGrasse Tyson
|
||||||
|
|
||||||
|
00:20.000 --> 00:22.000
|
||||||
|
|
||||||
|
Roger Bingham: Astrophysicist, Director of the Hayden Planetarium
|
||||||
|
|
||||||
|
00:22.000 --> 00:24.000
|
||||||
|
|
||||||
|
Roger Bingham: at the AMNH.
|
||||||
|
|
||||||
|
00:24.000 --> 00:26.000
|
||||||
|
|
||||||
|
Roger Bingham: Thank you for walking down here.
|
||||||
|
|
||||||
|
00:27.000 --> 00:30.000
|
||||||
|
|
||||||
|
Roger Bingham: And I want to do a follow-up on the last conversation we did.
|
||||||
|
|
||||||
|
00:30.000 --> 00:31.500
|
||||||
|
|
||||||
|
Roger Bingham: When we e-mailed—
|
||||||
|
|
||||||
|
00:30.500 --> 00:32.500
|
||||||
|
|
||||||
|
Neil deGrasse Tyson: Didn’t we talk about enough in that conversation?
|
||||||
|
|
||||||
|
00:32.000 --> 00:35.500
|
||||||
|
|
||||||
|
Roger Bingham: No! No no no no; 'cos 'cos obviously 'cos
|
||||||
|
|
||||||
|
00:32.500 --> 00:33.500
|
||||||
|
|
||||||
|
Neil deGrasse Tyson: *Laughs*
|
||||||
|
|
||||||
|
00:35.500 --> 00:38.000
|
||||||
|
|
||||||
|
Roger Bingham: You know I’m so excited my glasses are falling off here.
|
||||||
22
tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.itxt
vendored
Normal file
22
tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.itxt
vendored
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
item-0 at level 0: unspecified: group _root_
|
||||||
|
item-1 at level 1: section: group WebVTT cue block
|
||||||
|
item-2 at level 2: text: 00:00.000 --> 00:02.000
|
||||||
|
item-3 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-4 at level 3: text: Esme (first, loud):
|
||||||
|
item-5 at level 3: text: It’s a blue apple tree!
|
||||||
|
item-6 at level 1: section: group WebVTT cue block
|
||||||
|
item-7 at level 2: text: 00:02.000 --> 00:04.000
|
||||||
|
item-8 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-9 at level 3: text: Mary:
|
||||||
|
item-10 at level 3: text: No way!
|
||||||
|
item-11 at level 1: section: group WebVTT cue block
|
||||||
|
item-12 at level 2: text: 00:04.000 --> 00:06.000
|
||||||
|
item-13 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-14 at level 3: text: Esme:
|
||||||
|
item-15 at level 3: text: Hee!
|
||||||
|
item-16 at level 2: text: laughter
|
||||||
|
item-17 at level 1: section: group WebVTT cue block
|
||||||
|
item-18 at level 2: text: 00:06.000 --> 00:08.000
|
||||||
|
item-19 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-20 at level 3: text: Mary (loud):
|
||||||
|
item-21 at level 3: text: That’s awesome!
|
||||||
376
tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.json
vendored
Normal file
376
tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.json
vendored
Normal file
@@ -0,0 +1,376 @@
|
|||||||
|
{
|
||||||
|
"schema_name": "DoclingDocument",
|
||||||
|
"version": "1.6.0",
|
||||||
|
"name": "webvtt_example_02",
|
||||||
|
"origin": {
|
||||||
|
"mimetype": "text/vtt",
|
||||||
|
"binary_hash": 12867774546881601731,
|
||||||
|
"filename": "webvtt_example_02.vtt"
|
||||||
|
},
|
||||||
|
"furniture": {
|
||||||
|
"self_ref": "#/furniture",
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "furniture",
|
||||||
|
"name": "_root_",
|
||||||
|
"label": "unspecified"
|
||||||
|
},
|
||||||
|
"body": {
|
||||||
|
"self_ref": "#/body",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/2"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/4"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/6"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "_root_",
|
||||||
|
"label": "unspecified"
|
||||||
|
},
|
||||||
|
"groups": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/0"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue block",
|
||||||
|
"label": "section"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/1",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/2"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue voice span",
|
||||||
|
"label": "inline"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/2",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/3"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/3"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue block",
|
||||||
|
"label": "section"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/3",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/2"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/4"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/5"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue voice span",
|
||||||
|
"label": "inline"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/4",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/6"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/5"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/9"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue block",
|
||||||
|
"label": "section"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/5",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/4"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/7"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/8"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue voice span",
|
||||||
|
"label": "inline"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/6",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/body"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/10"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/groups/7"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue block",
|
||||||
|
"label": "section"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/groups/7",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/6"
|
||||||
|
},
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/11"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/texts/12"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content_layer": "body",
|
||||||
|
"name": "WebVTT cue voice span",
|
||||||
|
"label": "inline"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"texts": [
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/0",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/0"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "00:00.000 --> 00:02.000",
|
||||||
|
"text": "00:00.000 --> 00:02.000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/1",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/1"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Esme (first, loud): ",
|
||||||
|
"text": "Esme (first, loud): "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/2",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/1"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "It’s a blue apple tree!",
|
||||||
|
"text": "It’s a blue apple tree!",
|
||||||
|
"formatting": {
|
||||||
|
"bold": false,
|
||||||
|
"italic": false,
|
||||||
|
"underline": false,
|
||||||
|
"strikethrough": false,
|
||||||
|
"script": "baseline"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/3",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/2"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "00:02.000 --> 00:04.000",
|
||||||
|
"text": "00:02.000 --> 00:04.000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/4",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/3"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Mary: ",
|
||||||
|
"text": "Mary: "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/5",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/3"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "No way!",
|
||||||
|
"text": "No way!",
|
||||||
|
"formatting": {
|
||||||
|
"bold": false,
|
||||||
|
"italic": false,
|
||||||
|
"underline": false,
|
||||||
|
"strikethrough": false,
|
||||||
|
"script": "baseline"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/6",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/4"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "00:04.000 --> 00:06.000",
|
||||||
|
"text": "00:04.000 --> 00:06.000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/7",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/5"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Esme: ",
|
||||||
|
"text": "Esme: "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/8",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/5"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Hee!",
|
||||||
|
"text": "Hee!",
|
||||||
|
"formatting": {
|
||||||
|
"bold": false,
|
||||||
|
"italic": false,
|
||||||
|
"underline": false,
|
||||||
|
"strikethrough": false,
|
||||||
|
"script": "baseline"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/9",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/4"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "laughter",
|
||||||
|
"text": "laughter",
|
||||||
|
"formatting": {
|
||||||
|
"bold": false,
|
||||||
|
"italic": true,
|
||||||
|
"underline": false,
|
||||||
|
"strikethrough": false,
|
||||||
|
"script": "baseline"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/10",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/6"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "00:06.000 --> 00:08.000",
|
||||||
|
"text": "00:06.000 --> 00:08.000"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/11",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/7"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "Mary (loud): ",
|
||||||
|
"text": "Mary (loud): "
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"self_ref": "#/texts/12",
|
||||||
|
"parent": {
|
||||||
|
"$ref": "#/groups/7"
|
||||||
|
},
|
||||||
|
"children": [],
|
||||||
|
"content_layer": "body",
|
||||||
|
"label": "text",
|
||||||
|
"prov": [],
|
||||||
|
"orig": "That’s awesome!",
|
||||||
|
"text": "That’s awesome!",
|
||||||
|
"formatting": {
|
||||||
|
"bold": false,
|
||||||
|
"italic": false,
|
||||||
|
"underline": false,
|
||||||
|
"strikethrough": false,
|
||||||
|
"script": "baseline"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"pictures": [],
|
||||||
|
"tables": [],
|
||||||
|
"key_value_items": [],
|
||||||
|
"form_items": [],
|
||||||
|
"pages": {}
|
||||||
|
}
|
||||||
17
tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.md
vendored
Normal file
17
tests/data/groundtruth/docling_v2/webvtt_example_02.vtt.md
vendored
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
00:00.000 --> 00:02.000
|
||||||
|
|
||||||
|
Esme (first, loud): It’s a blue apple tree!
|
||||||
|
|
||||||
|
00:02.000 --> 00:04.000
|
||||||
|
|
||||||
|
Mary: No way!
|
||||||
|
|
||||||
|
00:04.000 --> 00:06.000
|
||||||
|
|
||||||
|
Esme: Hee!
|
||||||
|
|
||||||
|
*laughter*
|
||||||
|
|
||||||
|
00:06.000 --> 00:08.000
|
||||||
|
|
||||||
|
Mary (loud): That’s awesome!
|
||||||
77
tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.itxt
vendored
Normal file
77
tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.itxt
vendored
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
item-0 at level 0: unspecified: group _root_
|
||||||
|
item-1 at level 1: section: group WebVTT cue block
|
||||||
|
item-2 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/15-0
|
||||||
|
item-3 at level 2: text: 00:00:04.963 --> 00:00:08.571
|
||||||
|
item-4 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-5 at level 3: text: Speaker A:
|
||||||
|
item-6 at level 3: text: OK, I think now we should be recording
|
||||||
|
item-7 at level 1: section: group WebVTT cue block
|
||||||
|
item-8 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/15-1
|
||||||
|
item-9 at level 2: text: 00:00:08.571 --> 00:00:09.403
|
||||||
|
item-10 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-11 at level 3: text: Speaker A:
|
||||||
|
item-12 at level 3: text: properly.
|
||||||
|
item-13 at level 1: section: group WebVTT cue block
|
||||||
|
item-14 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/16-0
|
||||||
|
item-15 at level 2: text: 00:00:10.683 --> 00:00:11.563
|
||||||
|
item-16 at level 2: text: Good.
|
||||||
|
item-17 at level 1: section: group WebVTT cue block
|
||||||
|
item-18 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/17-0
|
||||||
|
item-19 at level 2: text: 00:00:13.363 --> 00:00:13.803
|
||||||
|
item-20 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-21 at level 3: text: Speaker A:
|
||||||
|
item-22 at level 3: text: Yeah.
|
||||||
|
item-23 at level 1: section: group WebVTT cue block
|
||||||
|
item-24 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/78-0
|
||||||
|
item-25 at level 2: text: 00:00:49.603 --> 00:00:53.363
|
||||||
|
item-26 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-27 at level 3: text: Speaker B:
|
||||||
|
item-28 at level 3: text: I was also thinking.
|
||||||
|
item-29 at level 1: section: group WebVTT cue block
|
||||||
|
item-30 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/113-0
|
||||||
|
item-31 at level 2: text: 00:00:54.963 --> 00:01:02.072
|
||||||
|
item-32 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-33 at level 3: text: Speaker B:
|
||||||
|
item-34 at level 3: text: Would be maybe good to create items,
|
||||||
|
item-35 at level 1: section: group WebVTT cue block
|
||||||
|
item-36 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/113-1
|
||||||
|
item-37 at level 2: text: 00:01:02.072 --> 00:01:06.811
|
||||||
|
item-38 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-39 at level 3: text: Speaker B:
|
||||||
|
item-40 at level 3: text: some metadata, some options that can be specific.
|
||||||
|
item-41 at level 1: section: group WebVTT cue block
|
||||||
|
item-42 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/150-0
|
||||||
|
item-43 at level 2: text: 00:01:10.243 --> 00:01:13.014
|
||||||
|
item-44 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-45 at level 3: text: Speaker A:
|
||||||
|
item-46 at level 3: text: Yeah, I mean I think you went even more than
|
||||||
|
item-47 at level 1: section: group WebVTT cue block
|
||||||
|
item-48 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/119-0
|
||||||
|
item-49 at level 2: text: 00:01:10.563 --> 00:01:12.643
|
||||||
|
item-50 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-51 at level 3: text: Speaker B:
|
||||||
|
item-52 at level 3: text: But we preserved the atoms.
|
||||||
|
item-53 at level 1: section: group WebVTT cue block
|
||||||
|
item-54 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/150-1
|
||||||
|
item-55 at level 2: text: 00:01:13.014 --> 00:01:15.907
|
||||||
|
item-56 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-57 at level 3: text: Speaker A:
|
||||||
|
item-58 at level 3: text: than me. I just opened the format.
|
||||||
|
item-59 at level 1: section: group WebVTT cue block
|
||||||
|
item-60 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/197-1
|
||||||
|
item-61 at level 2: text: 00:01:50.222 --> 00:01:51.643
|
||||||
|
item-62 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-63 at level 3: text: Speaker A:
|
||||||
|
item-64 at level 3: text: give it a try, yeah.
|
||||||
|
item-65 at level 1: section: group WebVTT cue block
|
||||||
|
item-66 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/200-0
|
||||||
|
item-67 at level 2: text: 00:01:52.043 --> 00:01:55.043
|
||||||
|
item-68 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-69 at level 3: text: Speaker B:
|
||||||
|
item-70 at level 3: text: Okay, talk to you later.
|
||||||
|
item-71 at level 1: section: group WebVTT cue block
|
||||||
|
item-72 at level 2: text: 62357a1d-d250-41d5-a1cf-6cc0eeceffcc/202-0
|
||||||
|
item-73 at level 2: text: 00:01:54.603 --> 00:01:55.283
|
||||||
|
item-74 at level 2: inline: group WebVTT cue voice span
|
||||||
|
item-75 at level 3: text: Speaker A:
|
||||||
|
item-76 at level 3: text: See you.
|
||||||
1240
tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.json
vendored
Normal file
1240
tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.json
vendored
Normal file
File diff suppressed because it is too large
Load Diff
77
tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.md
vendored
Normal file
77
tests/data/groundtruth/docling_v2/webvtt_example_03.vtt.md
vendored
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/15-0
|
||||||
|
|
||||||
|
00:00:04.963 --> 00:00:08.571
|
||||||
|
|
||||||
|
Speaker A: OK, I think now we should be recording
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/15-1
|
||||||
|
|
||||||
|
00:00:08.571 --> 00:00:09.403
|
||||||
|
|
||||||
|
Speaker A: properly.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/16-0
|
||||||
|
|
||||||
|
00:00:10.683 --> 00:00:11.563
|
||||||
|
|
||||||
|
Good.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/17-0
|
||||||
|
|
||||||
|
00:00:13.363 --> 00:00:13.803
|
||||||
|
|
||||||
|
Speaker A: Yeah.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/78-0
|
||||||
|
|
||||||
|
00:00:49.603 --> 00:00:53.363
|
||||||
|
|
||||||
|
Speaker B: I was also thinking.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/113-0
|
||||||
|
|
||||||
|
00:00:54.963 --> 00:01:02.072
|
||||||
|
|
||||||
|
Speaker B: Would be maybe good to create items,
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/113-1
|
||||||
|
|
||||||
|
00:01:02.072 --> 00:01:06.811
|
||||||
|
|
||||||
|
Speaker B: some metadata, some options that can be specific.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/150-0
|
||||||
|
|
||||||
|
00:01:10.243 --> 00:01:13.014
|
||||||
|
|
||||||
|
Speaker A: Yeah, I mean I think you went even more than
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/119-0
|
||||||
|
|
||||||
|
00:01:10.563 --> 00:01:12.643
|
||||||
|
|
||||||
|
Speaker B: But we preserved the atoms.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/150-1
|
||||||
|
|
||||||
|
00:01:13.014 --> 00:01:15.907
|
||||||
|
|
||||||
|
Speaker A: than me. I just opened the format.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/197-1
|
||||||
|
|
||||||
|
00:01:50.222 --> 00:01:51.643
|
||||||
|
|
||||||
|
Speaker A: give it a try, yeah.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/200-0
|
||||||
|
|
||||||
|
00:01:52.043 --> 00:01:55.043
|
||||||
|
|
||||||
|
Speaker B: Okay, talk to you later.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/202-0
|
||||||
|
|
||||||
|
00:01:54.603 --> 00:01:55.283
|
||||||
|
|
||||||
|
Speaker A: See you.
|
||||||
42
tests/data/webvtt/webvtt_example_01.vtt
vendored
Normal file
42
tests/data/webvtt/webvtt_example_01.vtt
vendored
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
WEBVTT
|
||||||
|
|
||||||
|
NOTE Copyright © 2019 World Wide Web Consortium. https://www.w3.org/TR/webvtt1/
|
||||||
|
|
||||||
|
00:11.000 --> 00:13.000
|
||||||
|
<v Roger Bingham>We are in New York City
|
||||||
|
|
||||||
|
00:13.000 --> 00:16.000
|
||||||
|
<v Roger Bingham>We’re actually at the Lucern Hotel, just down the street
|
||||||
|
|
||||||
|
00:16.000 --> 00:18.000
|
||||||
|
<v Roger Bingham>from the American Museum of Natural History
|
||||||
|
|
||||||
|
00:18.000 --> 00:20.000
|
||||||
|
<v Roger Bingham>And with me is Neil deGrasse Tyson
|
||||||
|
|
||||||
|
00:20.000 --> 00:22.000
|
||||||
|
<v Roger Bingham>Astrophysicist, Director of the Hayden Planetarium
|
||||||
|
|
||||||
|
00:22.000 --> 00:24.000
|
||||||
|
<v Roger Bingham>at the AMNH.
|
||||||
|
|
||||||
|
00:24.000 --> 00:26.000
|
||||||
|
<v Roger Bingham>Thank you for walking down here.
|
||||||
|
|
||||||
|
00:27.000 --> 00:30.000
|
||||||
|
<v Roger Bingham>And I want to do a follow-up on the last conversation we did.
|
||||||
|
|
||||||
|
00:30.000 --> 00:31.500 align:right size:50%
|
||||||
|
<v Roger Bingham>When we e-mailed—
|
||||||
|
|
||||||
|
00:30.500 --> 00:32.500 align:left size:50%
|
||||||
|
<v Neil deGrasse Tyson>Didn’t we talk about enough in that conversation?
|
||||||
|
|
||||||
|
00:32.000 --> 00:35.500 align:right size:50%
|
||||||
|
<v Roger Bingham>No! No no no no; 'cos 'cos obviously 'cos
|
||||||
|
|
||||||
|
00:32.500 --> 00:33.500 align:left size:50%
|
||||||
|
<v Neil deGrasse Tyson><i>Laughs</i>
|
||||||
|
|
||||||
|
00:35.500 --> 00:38.000
|
||||||
|
<v Roger Bingham>You know I’m so excited my glasses are falling off here.
|
||||||
15
tests/data/webvtt/webvtt_example_02.vtt
vendored
Normal file
15
tests/data/webvtt/webvtt_example_02.vtt
vendored
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
WEBVTT
|
||||||
|
|
||||||
|
NOTE Copyright © 2019 World Wide Web Consortium. https://www.w3.org/TR/webvtt1/
|
||||||
|
|
||||||
|
00:00.000 --> 00:02.000
|
||||||
|
<v.first.loud Esme>It’s a blue apple tree!
|
||||||
|
|
||||||
|
00:02.000 --> 00:04.000
|
||||||
|
<v Mary>No way!
|
||||||
|
|
||||||
|
00:04.000 --> 00:06.000
|
||||||
|
<v Esme>Hee!</v> <i>laughter</i>
|
||||||
|
|
||||||
|
00:06.000 --> 00:08.000
|
||||||
|
<v.loud Mary>That’s awesome!
|
||||||
57
tests/data/webvtt/webvtt_example_03.vtt
vendored
Normal file
57
tests/data/webvtt/webvtt_example_03.vtt
vendored
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
WEBVTT
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/15-0
|
||||||
|
00:00:04.963 --> 00:00:08.571
|
||||||
|
<v Speaker A>OK,
|
||||||
|
I think now we should be recording</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/15-1
|
||||||
|
00:00:08.571 --> 00:00:09.403
|
||||||
|
<v Speaker A>properly.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/16-0
|
||||||
|
00:00:10.683 --> 00:00:11.563
|
||||||
|
Good.
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/17-0
|
||||||
|
00:00:13.363 --> 00:00:13.803
|
||||||
|
<v Speaker A>Yeah.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/78-0
|
||||||
|
00:00:49.603 --> 00:00:53.363
|
||||||
|
<v Speaker B>I was also thinking.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/113-0
|
||||||
|
00:00:54.963 --> 00:01:02.072
|
||||||
|
<v Speaker B>Would be maybe good to create items,</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/113-1
|
||||||
|
00:01:02.072 --> 00:01:06.811
|
||||||
|
<v Speaker B>some metadata,
|
||||||
|
some options that can be specific.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/150-0
|
||||||
|
00:01:10.243 --> 00:01:13.014
|
||||||
|
<v Speaker A>Yeah,
|
||||||
|
I mean I think you went even more than</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/119-0
|
||||||
|
00:01:10.563 --> 00:01:12.643
|
||||||
|
<v Speaker B>But we preserved the atoms.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/150-1
|
||||||
|
00:01:13.014 --> 00:01:15.907
|
||||||
|
<v Speaker A>than me.
|
||||||
|
I just opened the format.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/197-1
|
||||||
|
00:01:50.222 --> 00:01:51.643
|
||||||
|
<v Speaker A>give it a try, yeah.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/200-0
|
||||||
|
00:01:52.043 --> 00:01:55.043
|
||||||
|
<v Speaker B>Okay, talk to you later.</v>
|
||||||
|
|
||||||
|
62357a1d-d250-41d5-a1cf-6cc0eeceffcc/202-0
|
||||||
|
00:01:54.603 --> 00:01:55.283
|
||||||
|
<v Speaker A>See you.</v>
|
||||||
232
tests/test_backend_vtt.py
Normal file
232
tests/test_backend_vtt.py
Normal file
@@ -0,0 +1,232 @@
|
|||||||
|
# Assisted by watsonx Code Assistant
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from docling_core.types.doc import DoclingDocument
|
||||||
|
from pydantic import ValidationError
|
||||||
|
|
||||||
|
from docling.backend.webvtt_backend import (
|
||||||
|
_WebVTTCueItalicSpan,
|
||||||
|
_WebVTTCueTextSpan,
|
||||||
|
_WebVTTCueTimings,
|
||||||
|
_WebVTTCueVoiceSpan,
|
||||||
|
_WebVTTFile,
|
||||||
|
_WebVTTTimestamp,
|
||||||
|
)
|
||||||
|
from docling.datamodel.base_models import InputFormat
|
||||||
|
from docling.datamodel.document import ConversionResult
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
|
from .test_data_gen_flag import GEN_TEST_DATA
|
||||||
|
from .verify_utils import verify_document, verify_export
|
||||||
|
|
||||||
|
GENERATE = GEN_TEST_DATA
|
||||||
|
|
||||||
|
|
||||||
|
def test_vtt_cue_commponents():
|
||||||
|
"""Test WebVTT components."""
|
||||||
|
valid_timestamps = [
|
||||||
|
"00:01:02.345",
|
||||||
|
"12:34:56.789",
|
||||||
|
"02:34.567",
|
||||||
|
"00:00:00.000",
|
||||||
|
]
|
||||||
|
valid_total_seconds = [
|
||||||
|
1 * 60 + 2.345,
|
||||||
|
12 * 3600 + 34 * 60 + 56.789,
|
||||||
|
2 * 60 + 34.567,
|
||||||
|
0.0,
|
||||||
|
]
|
||||||
|
for idx, ts in enumerate(valid_timestamps):
|
||||||
|
model = _WebVTTTimestamp(raw=ts)
|
||||||
|
assert model.seconds == valid_total_seconds[idx]
|
||||||
|
|
||||||
|
"""Test invalid WebVTT timestamps."""
|
||||||
|
invalid_timestamps = [
|
||||||
|
"00:60:02.345", # minutes > 59
|
||||||
|
"00:01:60.345", # seconds > 59
|
||||||
|
"00:01:02.1000", # milliseconds > 999
|
||||||
|
"01:02:03", # missing milliseconds
|
||||||
|
"01:02", # missing milliseconds
|
||||||
|
":01:02.345", # extra : for missing hours
|
||||||
|
"abc:01:02.345", # invalid format
|
||||||
|
]
|
||||||
|
for ts in invalid_timestamps:
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTTimestamp(raw=ts)
|
||||||
|
|
||||||
|
"""Test the timestamp __str__ method."""
|
||||||
|
model = _WebVTTTimestamp(raw="00:01:02.345")
|
||||||
|
assert str(model) == "00:01:02.345"
|
||||||
|
|
||||||
|
"""Test valid cue timings."""
|
||||||
|
start = _WebVTTTimestamp(raw="00:10.005")
|
||||||
|
end = _WebVTTTimestamp(raw="00:14.007")
|
||||||
|
cue_timings = _WebVTTCueTimings(start=start, end=end)
|
||||||
|
assert cue_timings.start == start
|
||||||
|
assert cue_timings.end == end
|
||||||
|
assert str(cue_timings) == "00:10.005 --> 00:14.007"
|
||||||
|
|
||||||
|
"""Test invalid cue timings with end timestamp before start."""
|
||||||
|
start = _WebVTTTimestamp(raw="00:10.700")
|
||||||
|
end = _WebVTTTimestamp(raw="00:10.500")
|
||||||
|
with pytest.raises(ValidationError) as excinfo:
|
||||||
|
_WebVTTCueTimings(start=start, end=end)
|
||||||
|
assert "End timestamp must be greater than start timestamp" in str(excinfo.value)
|
||||||
|
|
||||||
|
"""Test invalid cue timings with missing end."""
|
||||||
|
start = _WebVTTTimestamp(raw="00:10.500")
|
||||||
|
with pytest.raises(ValidationError) as excinfo:
|
||||||
|
_WebVTTCueTimings(start=start)
|
||||||
|
assert "Field required" in str(excinfo.value)
|
||||||
|
|
||||||
|
"""Test invalid cue timings with missing start."""
|
||||||
|
end = _WebVTTTimestamp(raw="00:10.500")
|
||||||
|
with pytest.raises(ValidationError) as excinfo:
|
||||||
|
_WebVTTCueTimings(end=end)
|
||||||
|
assert "Field required" in str(excinfo.value)
|
||||||
|
|
||||||
|
"""Test with valid text."""
|
||||||
|
valid_text = "This is a valid cue text span."
|
||||||
|
span = _WebVTTCueTextSpan(text=valid_text)
|
||||||
|
assert span.text == valid_text
|
||||||
|
assert str(span) == valid_text
|
||||||
|
|
||||||
|
"""Test with text containing newline characters."""
|
||||||
|
invalid_text = "This cue text span\ncontains a newline."
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTCueTextSpan(text=invalid_text)
|
||||||
|
|
||||||
|
"""Test with text containing ampersand."""
|
||||||
|
invalid_text = "This cue text span contains &."
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTCueTextSpan(text=invalid_text)
|
||||||
|
|
||||||
|
"""Test with text containing less-than sign."""
|
||||||
|
invalid_text = "This cue text span contains <."
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTCueTextSpan(text=invalid_text)
|
||||||
|
|
||||||
|
"""Test with empty text."""
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTCueTextSpan(text="")
|
||||||
|
|
||||||
|
"""Test that annotation validation works correctly."""
|
||||||
|
valid_annotation = "valid-annotation"
|
||||||
|
invalid_annotation = "invalid\nannotation"
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTCueVoiceSpan(annotation=invalid_annotation)
|
||||||
|
assert _WebVTTCueVoiceSpan(annotation=valid_annotation)
|
||||||
|
|
||||||
|
"""Test that classes validation works correctly."""
|
||||||
|
annotation = "speaker name"
|
||||||
|
valid_classes = ["class1", "class2"]
|
||||||
|
invalid_classes = ["class\nwith\nnewlines", ""]
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTCueVoiceSpan(annotation=annotation, classes=invalid_classes)
|
||||||
|
assert _WebVTTCueVoiceSpan(annotation=annotation, classes=valid_classes)
|
||||||
|
|
||||||
|
"""Test that components validation works correctly."""
|
||||||
|
annotation = "speaker name"
|
||||||
|
valid_components = [_WebVTTCueTextSpan(text="random text")]
|
||||||
|
invalid_components = [123, "not a component"]
|
||||||
|
with pytest.raises(ValidationError):
|
||||||
|
_WebVTTCueVoiceSpan(annotation=annotation, components=invalid_components)
|
||||||
|
assert _WebVTTCueVoiceSpan(annotation=annotation, components=valid_components)
|
||||||
|
|
||||||
|
"""Test valid cue voice spans."""
|
||||||
|
cue_span = _WebVTTCueVoiceSpan(
|
||||||
|
annotation="speaker",
|
||||||
|
classes=["loud", "clear"],
|
||||||
|
components=[_WebVTTCueTextSpan(text="random text")],
|
||||||
|
)
|
||||||
|
|
||||||
|
expected_str = "<v.loud.clear speaker>random text</v>"
|
||||||
|
assert str(cue_span) == expected_str
|
||||||
|
|
||||||
|
cue_span = _WebVTTCueVoiceSpan(
|
||||||
|
annotation="speaker",
|
||||||
|
components=[_WebVTTCueTextSpan(text="random text")],
|
||||||
|
)
|
||||||
|
expected_str = "<v speaker>random text</v>"
|
||||||
|
assert str(cue_span) == expected_str
|
||||||
|
|
||||||
|
|
||||||
|
def test_webvtt_file():
|
||||||
|
"""Test WebVTT files."""
|
||||||
|
with open("./tests/data/webvtt/webvtt_example_01.vtt", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
vtt = _WebVTTFile.parse(content)
|
||||||
|
assert len(vtt) == 13
|
||||||
|
block = vtt.cue_blocks[11]
|
||||||
|
assert str(block.timings) == "00:32.500 --> 00:33.500"
|
||||||
|
assert len(block.payload) == 1
|
||||||
|
cue_span = block.payload[0]
|
||||||
|
assert isinstance(cue_span, _WebVTTCueVoiceSpan)
|
||||||
|
assert cue_span.annotation == "Neil deGrasse Tyson"
|
||||||
|
assert not cue_span.classes
|
||||||
|
assert len(cue_span.components) == 1
|
||||||
|
comp = cue_span.components[0]
|
||||||
|
assert isinstance(comp, _WebVTTCueItalicSpan)
|
||||||
|
assert len(comp.components) == 1
|
||||||
|
comp2 = comp.components[0]
|
||||||
|
assert isinstance(comp2, _WebVTTCueTextSpan)
|
||||||
|
assert comp2.text == "Laughs"
|
||||||
|
|
||||||
|
with open("./tests/data/webvtt/webvtt_example_02.vtt", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
vtt = _WebVTTFile.parse(content)
|
||||||
|
assert len(vtt) == 4
|
||||||
|
reverse = (
|
||||||
|
"WEBVTT\n\nNOTE Copyright © 2019 World Wide Web Consortium. "
|
||||||
|
"https://www.w3.org/TR/webvtt1/\n\n"
|
||||||
|
)
|
||||||
|
reverse += "\n\n".join([str(block) for block in vtt.cue_blocks])
|
||||||
|
assert content == reverse
|
||||||
|
|
||||||
|
with open("./tests/data/webvtt/webvtt_example_03.vtt", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
vtt = _WebVTTFile.parse(content)
|
||||||
|
assert len(vtt) == 13
|
||||||
|
for block in vtt:
|
||||||
|
assert block.identifier
|
||||||
|
block = vtt.cue_blocks[0]
|
||||||
|
assert block.identifier == "62357a1d-d250-41d5-a1cf-6cc0eeceffcc/15-0"
|
||||||
|
assert str(block.timings) == "00:00:04.963 --> 00:00:08.571"
|
||||||
|
assert len(block.payload) == 1
|
||||||
|
assert isinstance(block.payload[0], _WebVTTCueVoiceSpan)
|
||||||
|
block = vtt.cue_blocks[2]
|
||||||
|
assert isinstance(cue_span, _WebVTTCueVoiceSpan)
|
||||||
|
assert block.identifier == "62357a1d-d250-41d5-a1cf-6cc0eeceffcc/16-0"
|
||||||
|
assert str(block.timings) == "00:00:10.683 --> 00:00:11.563"
|
||||||
|
assert len(block.payload) == 1
|
||||||
|
assert isinstance(block.payload[0], _WebVTTCueTextSpan)
|
||||||
|
assert block.payload[0].text == "Good."
|
||||||
|
|
||||||
|
|
||||||
|
def test_e2e_vtt_conversions():
|
||||||
|
directory = Path("./tests/data/webvtt/")
|
||||||
|
vtt_paths = sorted(directory.rglob("*.vtt"))
|
||||||
|
converter = DocumentConverter(allowed_formats=[InputFormat.VTT])
|
||||||
|
|
||||||
|
for vtt in vtt_paths:
|
||||||
|
gt_path = vtt.parent.parent / "groundtruth" / "docling_v2" / vtt.name
|
||||||
|
|
||||||
|
conv_result: ConversionResult = converter.convert(vtt)
|
||||||
|
|
||||||
|
doc: DoclingDocument = conv_result.document
|
||||||
|
|
||||||
|
pred_md: str = doc.export_to_markdown(escape_html=False)
|
||||||
|
assert verify_export(pred_md, str(gt_path) + ".md", generate=GENERATE), (
|
||||||
|
"export to md"
|
||||||
|
)
|
||||||
|
|
||||||
|
pred_itxt: str = doc._export_to_indented_text(
|
||||||
|
max_text_len=70, explicit_tables=False
|
||||||
|
)
|
||||||
|
assert verify_export(pred_itxt, str(gt_path) + ".itxt", generate=GENERATE), (
|
||||||
|
"export to indented-text"
|
||||||
|
)
|
||||||
|
|
||||||
|
assert verify_document(doc, str(gt_path) + ".json", GENERATE)
|
||||||
@@ -206,6 +206,11 @@ def test_guess_format(tmp_path):
|
|||||||
doc_path.write_text("xyz", encoding="utf-8")
|
doc_path.write_text("xyz", encoding="utf-8")
|
||||||
assert dci._guess_format(doc_path) is None
|
assert dci._guess_format(doc_path) is None
|
||||||
|
|
||||||
|
# Valid WebVTT
|
||||||
|
buf = BytesIO(Path("./tests/data/webvtt/webvtt_example_01.vtt").open("rb").read())
|
||||||
|
stream = DocumentStream(name="webvtt_example_01.vtt", stream=buf)
|
||||||
|
assert dci._guess_format(stream) == InputFormat.VTT
|
||||||
|
|
||||||
# Valid Docling JSON
|
# Valid Docling JSON
|
||||||
test_str = '{"name": ""}'
|
test_str = '{"name": ""}'
|
||||||
stream = DocumentStream(name="test.json", stream=BytesIO(f"{test_str}".encode()))
|
stream = DocumentStream(name="test.json", stream=BytesIO(f"{test_str}".encode()))
|
||||||
|
|||||||
13
uv.lock
generated
13
uv.lock
generated
@@ -1154,7 +1154,7 @@ requires-dist = [
|
|||||||
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
|
{ name = "accelerate", marker = "extra == 'vlm'", specifier = ">=1.2.1,<2.0.0" },
|
||||||
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
|
{ name = "beautifulsoup4", specifier = ">=4.12.3,<5.0.0" },
|
||||||
{ name = "certifi", specifier = ">=2024.7.4" },
|
{ name = "certifi", specifier = ">=2024.7.4" },
|
||||||
{ name = "docling-core", extras = ["chunking"], specifier = ">=2.48.0,<3.0.0" },
|
{ name = "docling-core", extras = ["chunking"], specifier = ">=2.48.2,<3.0.0" },
|
||||||
{ name = "docling-ibm-models", specifier = ">=3.9.1,<4" },
|
{ name = "docling-ibm-models", specifier = ">=3.9.1,<4" },
|
||||||
{ name = "docling-parse", specifier = ">=4.4.0,<5.0.0" },
|
{ name = "docling-parse", specifier = ">=4.4.0,<5.0.0" },
|
||||||
{ name = "easyocr", specifier = ">=1.7,<2.0" },
|
{ name = "easyocr", specifier = ">=1.7,<2.0" },
|
||||||
@@ -1233,7 +1233,7 @@ examples = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "docling-core"
|
name = "docling-core"
|
||||||
version = "2.48.1"
|
version = "2.48.2"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "jsonref" },
|
{ name = "jsonref" },
|
||||||
@@ -1247,9 +1247,9 @@ dependencies = [
|
|||||||
{ name = "typer" },
|
{ name = "typer" },
|
||||||
{ name = "typing-extensions" },
|
{ name = "typing-extensions" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/f9/0c/dce7f80e99e56570d143885fc40536107e8a39ef4de2888959e055b39607/docling_core-2.48.1.tar.gz", hash = "sha256:48cb77575dfd020a51413957e96b165e45f6d1027c641710fddb389dcb9b189c", size = 161311, upload-time = "2025-09-11T12:33:22.46Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/dd/e6/922de61f2a7b7d337ffc781f8e85f5581b12801fe193827066ccd6c5ba04/docling_core-2.48.2.tar.gz", hash = "sha256:01c12a1d3c9877c6658d0d6adf5cdcefd56cb814d8083860ba2d77ab882ac2d0", size = 161344, upload-time = "2025-09-22T08:39:41.431Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/90/fe/1b96120c9d94c97016716ccf46ad2708a2e76157e52dfcca4101db70fc21/docling_core-2.48.1-py3-none-any.whl", hash = "sha256:a3985999ac2067e15e589ef0f11ccde264deacaea403c0f94049242f10a6189a", size = 164330, upload-time = "2025-09-11T12:33:20.935Z" },
|
{ url = "https://files.pythonhosted.org/packages/97/bc/a77739cc31d7de2be9d6682f880761083a2038355e513e813a73a041c644/docling_core-2.48.2-py3-none-any.whl", hash = "sha256:d1f2fe9be9a9f7e7a2fb6ddcc9d9fcbf437bfb02e0c6005cdec1ece1cf4aed44", size = 164376, upload-time = "2025-09-22T08:39:39.704Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.optional-dependencies]
|
[package.optional-dependencies]
|
||||||
@@ -4936,6 +4936,9 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/20/8a/b35a615ae6f04550d696bb179c414538b3b477999435fdd4ad75b76139e4/pybase64-1.4.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a370dea7b1cee2a36a4d5445d4e09cc243816c5bc8def61f602db5a6f5438e52", size = 54320, upload-time = "2025-07-27T13:03:27.495Z" },
|
{ url = "https://files.pythonhosted.org/packages/20/8a/b35a615ae6f04550d696bb179c414538b3b477999435fdd4ad75b76139e4/pybase64-1.4.2-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:a370dea7b1cee2a36a4d5445d4e09cc243816c5bc8def61f602db5a6f5438e52", size = 54320, upload-time = "2025-07-27T13:03:27.495Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d3/a9/8bd4f9bcc53689f1b457ecefed1eaa080e4949d65a62c31a38b7253d5226/pybase64-1.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9aa4de83f02e462a6f4e066811c71d6af31b52d7484de635582d0e3ec3d6cc3e", size = 56482, upload-time = "2025-07-27T13:03:28.942Z" },
|
{ url = "https://files.pythonhosted.org/packages/d3/a9/8bd4f9bcc53689f1b457ecefed1eaa080e4949d65a62c31a38b7253d5226/pybase64-1.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9aa4de83f02e462a6f4e066811c71d6af31b52d7484de635582d0e3ec3d6cc3e", size = 56482, upload-time = "2025-07-27T13:03:28.942Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/75/e5/4a7735b54a1191f61c3f5c2952212c85c2d6b06eb5fb3671c7603395f70c/pybase64-1.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83a1c2f9ed00fee8f064d548c8654a480741131f280e5750bb32475b7ec8ee38", size = 70959, upload-time = "2025-07-27T13:03:30.171Z" },
|
{ url = "https://files.pythonhosted.org/packages/75/e5/4a7735b54a1191f61c3f5c2952212c85c2d6b06eb5fb3671c7603395f70c/pybase64-1.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83a1c2f9ed00fee8f064d548c8654a480741131f280e5750bb32475b7ec8ee38", size = 70959, upload-time = "2025-07-27T13:03:30.171Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f4/56/5337f27a8b8d2d6693f46f7b36bae47895e5820bfa259b0072574a4e1057/pybase64-1.4.2-cp313-cp313-android_21_arm64_v8a.whl", hash = "sha256:0f331aa59549de21f690b6ccc79360ffed1155c3cfbc852eb5c097c0b8565a2b", size = 33888, upload-time = "2025-07-27T13:03:35.698Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e3/ff/470768f0fe6de0aa302a8cb1bdf2f9f5cffc3f69e60466153be68bc953aa/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:69d3f0445b0faeef7bb7f93bf8c18d850785e2a77f12835f49e524cc54af04e7", size = 30914, upload-time = "2025-07-27T13:03:38.475Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/75/6b/d328736662665e0892409dc410353ebef175b1be5eb6bab1dad579efa6df/pybase64-1.4.2-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:2372b257b1f4dd512f317fb27e77d313afd137334de64c87de8374027aacd88a", size = 31380, upload-time = "2025-07-27T13:03:39.7Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ca/96/7ff718f87c67f4147c181b73d0928897cefa17dc75d7abc6e37730d5908f/pybase64-1.4.2-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:fb794502b4b1ec91c4ca5d283ae71aef65e3de7721057bd9e2b3ec79f7a62d7d", size = 38230, upload-time = "2025-07-27T13:03:41.637Z" },
|
{ url = "https://files.pythonhosted.org/packages/ca/96/7ff718f87c67f4147c181b73d0928897cefa17dc75d7abc6e37730d5908f/pybase64-1.4.2-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:fb794502b4b1ec91c4ca5d283ae71aef65e3de7721057bd9e2b3ec79f7a62d7d", size = 38230, upload-time = "2025-07-27T13:03:41.637Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/71/ab/db4dbdfccb9ca874d6ce34a0784761471885d96730de85cee3d300381529/pybase64-1.4.2-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d377d48acf53abf4b926c2a7a24a19deb092f366a04ffd856bf4b3aa330b025d", size = 71608, upload-time = "2025-07-27T13:03:47.01Z" },
|
{ url = "https://files.pythonhosted.org/packages/71/ab/db4dbdfccb9ca874d6ce34a0784761471885d96730de85cee3d300381529/pybase64-1.4.2-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d377d48acf53abf4b926c2a7a24a19deb092f366a04ffd856bf4b3aa330b025d", size = 71608, upload-time = "2025-07-27T13:03:47.01Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f2/58/7f2cef1ceccc682088958448d56727369de83fa6b29148478f4d2acd107a/pybase64-1.4.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:ab9cdb6a8176a5cb967f53e6ad60e40c83caaa1ae31c5e1b29e5c8f507f17538", size = 56413, upload-time = "2025-07-27T13:03:49.908Z" },
|
{ url = "https://files.pythonhosted.org/packages/f2/58/7f2cef1ceccc682088958448d56727369de83fa6b29148478f4d2acd107a/pybase64-1.4.2-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:ab9cdb6a8176a5cb967f53e6ad60e40c83caaa1ae31c5e1b29e5c8f507f17538", size = 56413, upload-time = "2025-07-27T13:03:49.908Z" },
|
||||||
@@ -4957,6 +4960,8 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/95/f0/c392c4ac8ccb7a34b28377c21faa2395313e3c676d76c382642e19a20703/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad59362fc267bf15498a318c9e076686e4beeb0dfe09b457fabbc2b32468b97a", size = 58103, upload-time = "2025-07-27T13:04:29.996Z" },
|
{ url = "https://files.pythonhosted.org/packages/95/f0/c392c4ac8ccb7a34b28377c21faa2395313e3c676d76c382642e19a20703/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ad59362fc267bf15498a318c9e076686e4beeb0dfe09b457fabbc2b32468b97a", size = 58103, upload-time = "2025-07-27T13:04:29.996Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/32/30/00ab21316e7df8f526aa3e3dc06f74de6711d51c65b020575d0105a025b2/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:01593bd064e7dcd6c86d04e94e44acfe364049500c20ac68ca1e708fbb2ca970", size = 60779, upload-time = "2025-07-27T13:04:31.549Z" },
|
{ url = "https://files.pythonhosted.org/packages/32/30/00ab21316e7df8f526aa3e3dc06f74de6711d51c65b020575d0105a025b2/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:01593bd064e7dcd6c86d04e94e44acfe364049500c20ac68ca1e708fbb2ca970", size = 60779, upload-time = "2025-07-27T13:04:31.549Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a6/65/114ca81839b1805ce4a2b7d58bc16e95634734a2059991f6382fc71caf3e/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5b81547ad8ea271c79fdf10da89a1e9313cb15edcba2a17adf8871735e9c02a0", size = 74684, upload-time = "2025-07-27T13:04:32.976Z" },
|
{ url = "https://files.pythonhosted.org/packages/a6/65/114ca81839b1805ce4a2b7d58bc16e95634734a2059991f6382fc71caf3e/pybase64-1.4.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5b81547ad8ea271c79fdf10da89a1e9313cb15edcba2a17adf8871735e9c02a0", size = 74684, upload-time = "2025-07-27T13:04:32.976Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/99/bf/00a87d951473ce96c8c08af22b6983e681bfabdb78dd2dcf7ee58eac0932/pybase64-1.4.2-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:4157ad277a32cf4f02a975dffc62a3c67d73dfa4609b2c1978ef47e722b18b8e", size = 30924, upload-time = "2025-07-27T13:04:39.189Z" },
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/ae/43/dee58c9d60e60e6fb32dc6da722d84592e22f13c277297eb4ce6baf99a99/pybase64-1.4.2-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:e113267dc349cf624eb4f4fbf53fd77835e1aa048ac6877399af426aab435757", size = 31390, upload-time = "2025-07-27T13:04:40.995Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e1/11/b28906fc2e330b8b1ab4bc845a7bef808b8506734e90ed79c6062b095112/pybase64-1.4.2-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:cea5aaf218fd9c5c23afacfe86fd4464dfedc1a0316dd3b5b4075b068cc67df0", size = 38212, upload-time = "2025-07-27T13:04:42.729Z" },
|
{ url = "https://files.pythonhosted.org/packages/e1/11/b28906fc2e330b8b1ab4bc845a7bef808b8506734e90ed79c6062b095112/pybase64-1.4.2-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:cea5aaf218fd9c5c23afacfe86fd4464dfedc1a0316dd3b5b4075b068cc67df0", size = 38212, upload-time = "2025-07-27T13:04:42.729Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/e4/2e/851eb51284b97354ee5dfa1309624ab90920696e91a33cd85b13d20cc5c1/pybase64-1.4.2-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a3e54dcf0d0305ec88473c9d0009f698cabf86f88a8a10090efeff2879c421bb", size = 71674, upload-time = "2025-07-27T13:04:49.294Z" },
|
{ url = "https://files.pythonhosted.org/packages/e4/2e/851eb51284b97354ee5dfa1309624ab90920696e91a33cd85b13d20cc5c1/pybase64-1.4.2-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a3e54dcf0d0305ec88473c9d0009f698cabf86f88a8a10090efeff2879c421bb", size = 71674, upload-time = "2025-07-27T13:04:49.294Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a4/8e/3479266bc0e65f6cc48b3938d4a83bff045330649869d950a378f2ddece0/pybase64-1.4.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:753da25d4fd20be7bda2746f545935773beea12d5cb5ec56ec2d2960796477b1", size = 56461, upload-time = "2025-07-27T13:04:52.37Z" },
|
{ url = "https://files.pythonhosted.org/packages/a4/8e/3479266bc0e65f6cc48b3938d4a83bff045330649869d950a378f2ddece0/pybase64-1.4.2-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:753da25d4fd20be7bda2746f545935773beea12d5cb5ec56ec2d2960796477b1", size = 56461, upload-time = "2025-07-27T13:04:52.37Z" },
|
||||||
|
|||||||
Reference in New Issue
Block a user