Create a XML backend for PubMed documents based on the pubmed_parser library

This commit is contained in:
lucas-morin 2024-12-05 13:20:00 +01:00
parent 6c818d0926
commit 7867014d0b

View File

@ -1,12 +1,10 @@
import hashlib
import logging import logging
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from pprint import pprint from typing import Set, Union
from typing import Final, Set, Union
import pubmed_parser # type: ignore import pubmed_parser # type: ignore
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from docling_core.types.doc import ( from docling_core.types.doc import (
DocItemLabel, DocItemLabel,
DoclingDocument, DoclingDocument,