mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Added example GUI
This commit is contained in:
parent
3220a592e7
commit
880de0379b
152
docs/examples/gui-gtk4.py
Normal file
152
docs/examples/gui-gtk4.py
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
import gi
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
|
# You need to install GTK 4 for Python on your system
|
||||||
|
gi.require_version("Gtk", "4.0")
|
||||||
|
from gi.repository import Gio, Gtk, Gdk, GLib
|
||||||
|
|
||||||
|
ALLOWED_EXTENSIONS = ["pdf"]
|
||||||
|
|
||||||
|
|
||||||
|
class DragDropWindow(Gtk.ApplicationWindow):
|
||||||
|
"""Main application window."""
|
||||||
|
|
||||||
|
doc_converter = None
|
||||||
|
file_dialog = None
|
||||||
|
target_folder = None
|
||||||
|
cur_file = None
|
||||||
|
|
||||||
|
def __init__(self, app):
|
||||||
|
Gtk.ApplicationWindow.__init__(self, application=app)
|
||||||
|
self.file_dialog = Gtk.FileDialog()
|
||||||
|
|
||||||
|
# Set up window
|
||||||
|
self.set_title("Docling")
|
||||||
|
self.set_default_size(500, 360)
|
||||||
|
|
||||||
|
# Main container
|
||||||
|
self.box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
|
||||||
|
self.set_child(self.box)
|
||||||
|
|
||||||
|
# Add the logo
|
||||||
|
self.logo = Gtk.Picture.new_for_filename("./docs/assets/logo.png")
|
||||||
|
self.box.append(self.logo)
|
||||||
|
|
||||||
|
# Label to indicate drop area
|
||||||
|
self.label = Gtk.Label(
|
||||||
|
label="Drop files (PDF, ..) here to start processing",
|
||||||
|
halign=Gtk.Align.CENTER,
|
||||||
|
)
|
||||||
|
self.box.append(self.label)
|
||||||
|
|
||||||
|
# Add some padding at the bottom
|
||||||
|
self.box.append(Gtk.Label(label="\n\n"))
|
||||||
|
|
||||||
|
# Enable drag-and-drop
|
||||||
|
file_drop_target = Gtk.DropTarget.new(Gio.File, Gdk.DragAction.COPY)
|
||||||
|
self.add_controller(file_drop_target)
|
||||||
|
file_drop_target.connect("drop", self.on_file_drop)
|
||||||
|
|
||||||
|
# Drop Hover Effect
|
||||||
|
file_drop_target.connect(
|
||||||
|
"enter", lambda _target, _x, _y: self.box.add_css_class("overlay-drag-area")
|
||||||
|
)
|
||||||
|
file_drop_target.connect(
|
||||||
|
"leave", lambda _target: self.box.remove_css_class("overlay-drag-area")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Initialize converter
|
||||||
|
self.doc_converter = DocumentConverter()
|
||||||
|
|
||||||
|
def on_file_drop(self, target, value, _x, _y):
|
||||||
|
"""Handle user interaction."""
|
||||||
|
print(f"Feeling the drop on {target} {_x} {_y}")
|
||||||
|
self.box.remove_css_class("overlay-drag-area")
|
||||||
|
if not isinstance(value, Gio.File):
|
||||||
|
return False
|
||||||
|
if self.file_dialog is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
file_info = value.query_info("standard::content-type", 0, None)
|
||||||
|
content_type = file_info.get_content_type()
|
||||||
|
if content_type.startswith("application/pdf"):
|
||||||
|
file_name = value.get_basename()
|
||||||
|
file_paths = []
|
||||||
|
|
||||||
|
# Convert URI to local path with proper unescaping
|
||||||
|
for ext in ALLOWED_EXTENSIONS:
|
||||||
|
if file_name.lower().endswith("." + ext):
|
||||||
|
file_paths.append(value.get_path())
|
||||||
|
|
||||||
|
# File handling logic
|
||||||
|
for path in file_paths:
|
||||||
|
self.label.set_text(f"Processing file: {file_name}")
|
||||||
|
self.cur_file = path
|
||||||
|
self.file_dialog.select_folder(
|
||||||
|
parent=self,
|
||||||
|
cancellable=None,
|
||||||
|
callback=self.on_select_folder,
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.label.set_text("No valid files dropped")
|
||||||
|
|
||||||
|
def on_select_folder(self, file_dialog, result):
|
||||||
|
"""Call back from Save window."""
|
||||||
|
file = file_dialog.select_folder_finish(result)
|
||||||
|
self.target_folder = file.get_path()
|
||||||
|
self.convert_document(self.cur_file, self.target_folder)
|
||||||
|
|
||||||
|
def convert_document(self, input_doc_path, output_dir):
|
||||||
|
"""Starts the Docling processing."""
|
||||||
|
start_time = time.time()
|
||||||
|
if self.doc_converter is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
conv_res = self.doc_converter.convert(input_doc_path)
|
||||||
|
doc_filename = conv_res.input.file.stem
|
||||||
|
|
||||||
|
# Export tables
|
||||||
|
for table_ix, table in enumerate(conv_res.document.tables):
|
||||||
|
table_df: pd.DataFrame = table.export_to_dataframe()
|
||||||
|
|
||||||
|
# Save the table as csv
|
||||||
|
element_csv_filename = (
|
||||||
|
output_dir + "/" + f"{doc_filename}-table-{table_ix + 1}.csv"
|
||||||
|
)
|
||||||
|
print(f"Saving CSV table to {element_csv_filename}")
|
||||||
|
table_df.to_csv(element_csv_filename)
|
||||||
|
|
||||||
|
# Save the table as html
|
||||||
|
element_html_filename = (
|
||||||
|
output_dir
|
||||||
|
+ "/"
|
||||||
|
+ f"{doc_filename}-table-{table_ix + 1}.element_html_filenameml"
|
||||||
|
)
|
||||||
|
print(f"Saving HTML table to {element_html_filename}")
|
||||||
|
with open(element_html_filename, "w") as fp:
|
||||||
|
fp.write(table.export_to_html(doc=conv_res.document))
|
||||||
|
|
||||||
|
end_time = time.time() - start_time
|
||||||
|
self.label.set_text(f"Document converted in {end_time:.2f} sec.")
|
||||||
|
|
||||||
|
|
||||||
|
class Application(Gtk.Application):
|
||||||
|
def __init__(self):
|
||||||
|
Gtk.Application.__init__(self, application_id="ch.datalets.DoclingApp")
|
||||||
|
self.connect("activate", self.on_activate)
|
||||||
|
|
||||||
|
def on_activate(self, app):
|
||||||
|
self.win = DragDropWindow(app)
|
||||||
|
self.win.present()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app = Application()
|
||||||
|
app.run(sys.argv)
|
Loading…
Reference in New Issue
Block a user