chore: typo fix (#1465)

* typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

* chore: typo fix

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>

---------

Signed-off-by: nkh0472 <67589323+nkh0472@users.noreply.github.com>
This commit is contained in:
nkh0472
2025-04-28 14:52:09 +08:00
committed by GitHub
parent 3afbe6c969
commit a097ccd8d5
14 changed files with 19 additions and 19 deletions

View File

@@ -409,7 +409,7 @@ class MarkdownDocumentBackend(DeclarativeDocumentBackend):
)
return _txt
# restore original HTML by removing previouly added markers
# restore original HTML by removing previously added markers
for regex in [
rf"<pre>\s*<code>\s*{_START_MARKER}",
rf"{_STOP_MARKER}\s*</code>\s*</pre>",

View File

@@ -436,7 +436,7 @@ class MsWordDocumentBackend(DeclarativeDocumentBackend):
# Common styles for bullet and numbered lists.
# "List Bullet", "List Number", "List Paragraph"
# Identify wether list is a numbered list or not
# Identify whether list is a numbered list or not
# is_numbered = "List Bullet" not in paragraph.style.name
is_numbered = False
p_style_id, p_level = self._get_label_and_level(paragraph)

View File

@@ -91,7 +91,7 @@ class JatsDocumentBackend(DeclarativeDocumentBackend):
super().__init__(in_doc, path_or_stream)
self.path_or_stream = path_or_stream
# Initialize the root of the document hiearchy
# Initialize the root of the document hierarchy
self.root: Optional[NodeItem] = None
self.valid = False

View File

@@ -1,6 +1,6 @@
"""Backend to parse patents from the United States Patent Office (USPTO).
The parsers included in this module can handle patent grants pubished since 1976 and
The parsers included in this module can handle patent grants published since 1976 and
patent applications since 2001.
The original files can be found in https://bulkdata.uspto.gov.
"""
@@ -440,7 +440,7 @@ class PatentUsptoIce(PatentUspto):
)
elif name == self.Element.PARAGRAPH.value and text:
# remmove blank spaces added in paragraphs
# remove blank spaces added in paragraphs
text = re.sub("\\s+", " ", text)
if self.Element.ABSTRACT.value in self.property:
self.abstract = (
@@ -1697,7 +1697,7 @@ class XmlTable:
class HtmlEntity:
"""Provide utility functions to get the HTML entities of styled characters.
This class has been developped from:
This class has been developed from:
https://unicode-table.com/en/html-entities/
https://www.w3.org/TR/WD-math-970515/table03.html
"""
@@ -1896,7 +1896,7 @@ class HtmlEntity:
"""Get an HTML entity of a greek letter in ISO 8879.
Args:
The text to transform, as an ISO 8879 entitiy.
The text to transform, as an ISO 8879 entity.
Returns:
The HTML entity representing a greek letter. If the input text is not

View File

@@ -521,7 +521,7 @@ def convert( # noqa: C901
if image_export_mode != ImageRefMode.PLACEHOLDER:
pipeline_options.generate_page_images = True
pipeline_options.generate_picture_images = (
True # FIXME: to be deprecated in verson 3
True # FIXME: to be deprecated in version 3
)
pipeline_options.images_scale = 2

View File

@@ -234,7 +234,7 @@ class TableStructureModel(BasePageModel):
tcells = table_cluster.cells
tokens = []
for c in tcells:
# Only allow non empty stings (spaces) into the cells of a table
# Only allow non empty strings (spaces) into the cells of a table
if len(c.text.strip()) > 0:
new_cell = copy.deepcopy(c)
new_cell.rect = BoundingRectangle.from_bounding_box(

View File

@@ -151,7 +151,7 @@ class TesseractOcrModel(BaseOcrModel):
script = map_tesseract_script(script)
lang = f"{self.script_prefix}{script}"
# Check if the detected languge is present in the system
# Check if the detected language is present in the system
if lang not in self._tesserocr_languages:
msg = f"Tesseract detected the script '{script}' and language '{lang}'."
msg += " However this language is not installed in your system and will be ignored."