mirror of
https://github.com/DS4SD/docling.git
synced 2025-07-26 20:14:47 +00:00
Added example GUI
This commit is contained in:
parent
3220a592e7
commit
880de0379b
152
docs/examples/gui-gtk4.py
Normal file
152
docs/examples/gui-gtk4.py
Normal file
@ -0,0 +1,152 @@
|
||||
import gi
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from docling.document_converter import DocumentConverter
|
||||
|
||||
# You need to install GTK 4 for Python on your system
|
||||
gi.require_version("Gtk", "4.0")
|
||||
from gi.repository import Gio, Gtk, Gdk, GLib
|
||||
|
||||
ALLOWED_EXTENSIONS = ["pdf"]
|
||||
|
||||
|
||||
class DragDropWindow(Gtk.ApplicationWindow):
|
||||
"""Main application window."""
|
||||
|
||||
doc_converter = None
|
||||
file_dialog = None
|
||||
target_folder = None
|
||||
cur_file = None
|
||||
|
||||
def __init__(self, app):
|
||||
Gtk.ApplicationWindow.__init__(self, application=app)
|
||||
self.file_dialog = Gtk.FileDialog()
|
||||
|
||||
# Set up window
|
||||
self.set_title("Docling")
|
||||
self.set_default_size(500, 360)
|
||||
|
||||
# Main container
|
||||
self.box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
|
||||
self.set_child(self.box)
|
||||
|
||||
# Add the logo
|
||||
self.logo = Gtk.Picture.new_for_filename("./docs/assets/logo.png")
|
||||
self.box.append(self.logo)
|
||||
|
||||
# Label to indicate drop area
|
||||
self.label = Gtk.Label(
|
||||
label="Drop files (PDF, ..) here to start processing",
|
||||
halign=Gtk.Align.CENTER,
|
||||
)
|
||||
self.box.append(self.label)
|
||||
|
||||
# Add some padding at the bottom
|
||||
self.box.append(Gtk.Label(label="\n\n"))
|
||||
|
||||
# Enable drag-and-drop
|
||||
file_drop_target = Gtk.DropTarget.new(Gio.File, Gdk.DragAction.COPY)
|
||||
self.add_controller(file_drop_target)
|
||||
file_drop_target.connect("drop", self.on_file_drop)
|
||||
|
||||
# Drop Hover Effect
|
||||
file_drop_target.connect(
|
||||
"enter", lambda _target, _x, _y: self.box.add_css_class("overlay-drag-area")
|
||||
)
|
||||
file_drop_target.connect(
|
||||
"leave", lambda _target: self.box.remove_css_class("overlay-drag-area")
|
||||
)
|
||||
|
||||
# Initialize converter
|
||||
self.doc_converter = DocumentConverter()
|
||||
|
||||
def on_file_drop(self, target, value, _x, _y):
|
||||
"""Handle user interaction."""
|
||||
print(f"Feeling the drop on {target} {_x} {_y}")
|
||||
self.box.remove_css_class("overlay-drag-area")
|
||||
if not isinstance(value, Gio.File):
|
||||
return False
|
||||
if self.file_dialog is None:
|
||||
return
|
||||
|
||||
file_info = value.query_info("standard::content-type", 0, None)
|
||||
content_type = file_info.get_content_type()
|
||||
if content_type.startswith("application/pdf"):
|
||||
file_name = value.get_basename()
|
||||
file_paths = []
|
||||
|
||||
# Convert URI to local path with proper unescaping
|
||||
for ext in ALLOWED_EXTENSIONS:
|
||||
if file_name.lower().endswith("." + ext):
|
||||
file_paths.append(value.get_path())
|
||||
|
||||
# File handling logic
|
||||
for path in file_paths:
|
||||
self.label.set_text(f"Processing file: {file_name}")
|
||||
self.cur_file = path
|
||||
self.file_dialog.select_folder(
|
||||
parent=self,
|
||||
cancellable=None,
|
||||
callback=self.on_select_folder,
|
||||
)
|
||||
|
||||
else:
|
||||
self.label.set_text("No valid files dropped")
|
||||
|
||||
def on_select_folder(self, file_dialog, result):
|
||||
"""Call back from Save window."""
|
||||
file = file_dialog.select_folder_finish(result)
|
||||
self.target_folder = file.get_path()
|
||||
self.convert_document(self.cur_file, self.target_folder)
|
||||
|
||||
def convert_document(self, input_doc_path, output_dir):
|
||||
"""Starts the Docling processing."""
|
||||
start_time = time.time()
|
||||
if self.doc_converter is None:
|
||||
return
|
||||
|
||||
conv_res = self.doc_converter.convert(input_doc_path)
|
||||
doc_filename = conv_res.input.file.stem
|
||||
|
||||
# Export tables
|
||||
for table_ix, table in enumerate(conv_res.document.tables):
|
||||
table_df: pd.DataFrame = table.export_to_dataframe()
|
||||
|
||||
# Save the table as csv
|
||||
element_csv_filename = (
|
||||
output_dir + "/" + f"{doc_filename}-table-{table_ix + 1}.csv"
|
||||
)
|
||||
print(f"Saving CSV table to {element_csv_filename}")
|
||||
table_df.to_csv(element_csv_filename)
|
||||
|
||||
# Save the table as html
|
||||
element_html_filename = (
|
||||
output_dir
|
||||
+ "/"
|
||||
+ f"{doc_filename}-table-{table_ix + 1}.element_html_filenameml"
|
||||
)
|
||||
print(f"Saving HTML table to {element_html_filename}")
|
||||
with open(element_html_filename, "w") as fp:
|
||||
fp.write(table.export_to_html(doc=conv_res.document))
|
||||
|
||||
end_time = time.time() - start_time
|
||||
self.label.set_text(f"Document converted in {end_time:.2f} sec.")
|
||||
|
||||
|
||||
class Application(Gtk.Application):
|
||||
def __init__(self):
|
||||
Gtk.Application.__init__(self, application_id="ch.datalets.DoclingApp")
|
||||
self.connect("activate", self.on_activate)
|
||||
|
||||
def on_activate(self, app):
|
||||
self.win = DragDropWindow(app)
|
||||
self.win.present()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app = Application()
|
||||
app.run(sys.argv)
|
Loading…
Reference in New Issue
Block a user