mirror of
https://github.com/DS4SD/docling.git
synced 2025-12-08 12:48:28 +00:00
Commit Graph
Select branches
Hide Pull Requests
add-json-export-indentation
adr-model-stages
cau/dpv4-test-updates
cau/fix-layout-vlm-pipeline-artifacts-path
cau/layout-vlm-pipeline-page-images
cau/multi-stage-vlm-pipeline
cau/new-layout-processing
cau/pin-docling-parse-pre-3.2
cau/test-dp-word-lines
cau/test-pypdfium2-beta
copilot/fix-document-timeout-bug
copilot/fix-keyerror-in-docling
copilot/fix-page-range-bug
cp_main_20250602
demo
dev-granite-docling-table
dev/add-asr-pipeline
dev/add-granite-docling-extension
dev/add-granite-docling-preview
dev/add-r2l-tests
dev/add-reading-order-model
dev/add-two-stage-vlm
dev/analysis-for-granite-docling
dev/doctag_backend
dev/fix_msword_backend_identify_text_after_image
dev/table-orientation
dev/update-html-parser-with-h1
dev/update-to-latest-docling-parse-again
docs/add-extraction-script
elh/update_2stage_inference
extend-metadata-in-examples
gh-pages
main
mao/doctags
mly/smol-docling-integration
nli/fix_glm_utils
nli/fix_ocr_tests
nli/layout_dfine
nli/layout_heron2
nli/layout_rtdetr_v2
nli/layoutmodel_improvements
nli/tesseract_ocr_models
ocr-enrichment
pretest-core-2-51-0
propagate-core-fixes-20250502
remodel-lists-2
revert-803-refactor_viz
rtdl/docx_latex
rtdl/drawingml_import
vku/uspto_meta
#1
#10
#100
#101
#1010
#1015
#1017
#102
#1021
#1024
#1027
#103
#1038
#1039
#1040
#1041
#1051
#1052
#1053
#1054
#1055
#1057
#1061
#1062
#1077
#1096
#1097
#1098
#11
#110
#1100
#1106
#1107
#111
#1114
#1115
#1118
#1124
#1130
#1140
#1141
#1147
#1150
#1152
#1154
#1156
#1158
#1160
#1165
#1167
#117
#1173
#118
#1182
#1183
#1194
#1196
#1197
#1199
#12
#120
#1201
#121
#1210
#122
#1220
#1222
#1223
#123
#1231
#1238
#1239
#1241
#1244
#1247
#1248
#1261
#1263
#1268
#1270
#1286
#129
#1294
#1295
#13
#131
#1313
#1315
#1316
#1319
#132
#1320
#1326
#1328
#1332
#1334
#1337
#134
#1340
#1346
#135
#1350
#1355
#1359
#1363
#1371
#1375
#1377
#1378
#1379
#138
#1381
#1382
#1383
#1389
#139
#1392
#1399
#14
#140
#1400
#1402
#141
#1411
#1415
#1416
#1419
#1427
#1428
#143
#1430
#1436
#1442
#1449
#145
#1458
#1459
#1463
#1465
#1486
#149
#1490
#1492
#1494
#1496
#15
#150
#1500
#151
#1511
#1512
#152
#1520
#1523
#1524
#1525
#1526
#1527
#1528
#153
#1530
#1536
#1538
#154
#1548
#1549
#155
#1551
#1553
#1556
#1559
#156
#1560
#1561
#1563
#1566
#157
#1570
#1576
#1577
#158
#1582
#1583
#1587
#1589
#159
#1593
#1596
#16
#160
#1600
#1609
#161
#1610
#1615
#1617
#1619
#162
#1636
#164
#1658
#1659
#1660
#1663
#1664
#1665
#1667
#1671
#1673
#1676
#1679
#168
#1683
#1684
#1688
#1689
#169
#1691
#1698
#17
#170
#1700
#1701
#1706
#1707
#171
#1711
#1717
#1718
#1723
#1724
#1725
#1728
#173
#1734
#1735
#1745
#1746
#1747
#175
#1759
#1763
#1769
#177
#1772
#1775
#178
#179
#1791
#1795
#18
#180
#1802
#1804
#1808
#1810
#1812
#1815
#1816
#1819
#182
#1820
#1821
#1824
#1825
#1827
#183
#1836
#1838
#184
#1844
#1850
#1851
#1852
#1856
#1857
#186
#1863
#1866
#1867
#187
#1870
#1874
#1875
#1876
#188
#1884
#189
#1897
#1898
#1898
#19
#190
#1902
#1904
#1905
#1907
#1908
#1910
#1912
#1914
#1917
#1923
#1925
#1926
#1928
#193
#1931
#1934
#1937
#194
#1940
#1943
#1948
#1951
#1952
#196
#1960
#1969
#1970
#1971
#1975
#1981
#1982
#1984
#1986
#1988
#1989
#1992
#1995
#2
#20
#2001
#2002
#2006
#2011
#2017
#2018
#2024
#203
#2031
#2039
#2042
#2048
#2061
#2068
#2069
#2078
#2079
#2083
#2084
#2084
#2088
#2093
#2094
#2095
#2095
#21
#2100
#2105
#2106
#2110
#2111
#2112
#2113
#2114
#2114
#2122
#2123
#2124
#2126
#2131
#2132
#2133
#2138
#214
#2141
#2146
#2154
#2155
#2165
#2166
#2169
#217
#217
#2171
#2178
#218
#218
#2183
#2185
#2187
#219
#2199
#22
#2200
#2208
#2212
#2218
#2219
#2227
#2227
#2231
#2234
#2237
#2238
#224
#2242
#2244
#2251
#2252
#226
#2262
#2264
#2265
#2266
#2272
#228
#2281
#2284
#2284
#2287
#2288
#229
#2291
#2294
#2304
#2309
#2313
#2315
#2322
#2323
#2324
#233
#2339
#234
#2340
#2341
#235
#2357
#2359
#2361
#2365
#2366
#2371
#2372
#2373
#2378
#2378
#2382
#2383
#2388
#2391
#2394
#240
#2401
#2403
#2403
#2404
#2407
#2409
#2409
#241
#2410
#2411
#2413
#2415
#2418
#2420
#2421
#2422
#2423
#2424
#2425
#2426
#2427
#2429
#2430
#2431
#2433
#2436
#2441
#2442
#2445
#2445
#2447
#2452
#2453
#2454
#2458
#2459
#2468
#2473
#2474
#248
#2484
#2486
#2488
#2488
#2489
#2498
#2499
#2501
#2502
#2503
#251
#2511
#2512
#2513
#2517
#2519
#2520
#2521
#2526
#2527
#2530
#2531
#2533
#2541
#2543
#2546
#2548
#2549
#2553
#2563
#2569
#2571
#2573
#2578
#2582
#2585
#2587
#2587
#2588
#2589
#259
#2596
#2599
#26
#2600
#2605
#2613
#2618
#2622
#2622
#2624
#2627
#2636
#2637
#2638
#2639
#2640
#2641
#2644
#2645
#2645
#2648
#2649
#2651
#2653
#2656
#2658
#2659
#2660
#2662
#2664
#2665
#2669
#2671
#2674
#2676
#2676
#2678
#2678
#2682
#2682
#2689
#2692
#2693
#27
#2706
#2707
#2708
#2712
#2716
#2717
#2720
#2721
#2721
#2723
#2723
#2728
#2735
#2738
#2738
#2739
#2740
#2740
#2741
#2741
#275
#276
#279
#28
#282
#286
#29
#290
#3
#302
#305
#307
#31
#310
#312
#314
#315
#316
#319
#32
#320
#322
#323
#325
#33
#330
#332
#334
#339
#34
#340
#341
#349
#35
#350
#36
#37
#371
#374
#375
#378
#379
#38
#384
#388
#39
#392
#393
#396
#4
#40
#401
#407
#408
#409
#415
#416
#42
#429
#43
#430
#432
#44
#442
#449
#45
#451
#456
#457
#46
#466
#467
#468
#47
#472
#474
#475
#482
#484
#487
#49
#490
#492
#495
#496
#497
#5
#50
#500
#501
#502
#504
#51
#511
#512
#513
#514
#517
#52
#528
#53
#530
#531
#532
#533
#534
#537
#54
#544
#549
#550
#551
#552
#555
#556
#557
#558
#56
#569
#57
#58
#59
#593
#6
#604
#606
#608
#613
#615
#616
#618
#624
#628
#63
#630
#631
#633
#642
#65
#650
#655
#656
#662
#675
#679
#68
#69
#691
#693
#694
#695
#697
#698
#7
#70
#700
#701
#702
#708
#71
#716
#717
#718
#719
#72
#733
#735
#739
#742
#75
#752
#759
#769
#772
#777
#783
#786
#788
#79
#793
#8
#80
#800
#801
#803
#804
#805
#808
#81
#811
#814
#815
#816
#817
#818
#819
#82
#820
#821
#824
#825
#826
#827
#83
#830
#831
#832
#837
#839
#84
#841
#842
#843
#850
#852
#853
#854
#855
#856
#857
#86
#862
#868
#869
#872
#873
#874
#875
#876
#878
#88
#880
#881
#883
#896
#897
#90
#901
#903
#905
#91
#910
#912
#916
#919
#92
#929
#93
#932
#935
#940
#941
#945
#948
#949
#95
#951
#958
#96
#965
#966
#967
#98
#99
#999
v0.1.1
v0.2.0
v0.3.0
v0.3.1
v0.4.0
v1.0.0
v1.0.1
v1.0.2
v1.1.0
v1.1.1
v1.1.2
v1.10.0
v1.11.0
v1.12.0
v1.12.1
v1.12.2
v1.13.0
v1.13.1
v1.14.0
v1.15.0
v1.16.0
v1.16.1
v1.17.0
v1.18.0
v1.19.0
v1.19.1
v1.2.0
v1.2.1
v1.20.0
v1.3.0
v1.4.0
v1.5.0
v1.6.0
v1.6.1
v1.6.2
v1.6.3
v1.7.0
v1.7.1
v1.8.0
v1.8.1
v1.8.2
v1.8.3
v1.8.4
v1.8.5
v1.9.0
v2.0.0
v2.1.0
v2.10.0
v2.11.0
v2.12.0
v2.13.0
v2.14.0
v2.15.0
v2.15.1
v2.16.0
v2.17.0
v2.18.0
v2.19.0
v2.2.0
v2.2.1
v2.20.0
v2.21.0
v2.22.0
v2.23.0
v2.23.1
v2.24.0
v2.25.0
v2.25.1
v2.25.2
v2.26.0
v2.27.0
v2.28.0
v2.28.1
v2.28.2
v2.28.3
v2.28.4
v2.29.0
v2.3.0
v2.3.1
v2.30.0
v2.31.0
v2.31.1
v2.31.2
v2.32.0
v2.33.0
v2.34.0
v2.35.0
v2.36.0
v2.36.1
v2.37.0
v2.38.0
v2.38.1
v2.39.0
v2.4.0
v2.4.1
v2.4.2
v2.40.0
v2.41.0
v2.42.0
v2.42.1
v2.42.2
v2.43.0
v2.44.0
v2.45.0
v2.46.0
v2.47.0
v2.47.1
v2.48.0
v2.49.0
v2.5.0
v2.5.1
v2.5.2
v2.50.0
v2.51.0
v2.52.0
v2.53.0
v2.54.0
v2.55.0
v2.55.1
v2.56.0
v2.56.1
v2.57.0
v2.58.0
v2.59.0
v2.6.0
v2.60.0
v2.60.1
v2.61.0
v2.61.1
v2.61.2
v2.62.0
v2.63.0
v2.64.0
v2.7.0
v2.7.1
v2.8.0
v2.8.1
v2.8.2
v2.8.3
v2.9.0
Select branches
Hide Pull Requests
add-json-export-indentation
adr-model-stages
cau/dpv4-test-updates
cau/fix-layout-vlm-pipeline-artifacts-path
cau/layout-vlm-pipeline-page-images
cau/multi-stage-vlm-pipeline
cau/new-layout-processing
cau/pin-docling-parse-pre-3.2
cau/test-dp-word-lines
cau/test-pypdfium2-beta
copilot/fix-document-timeout-bug
copilot/fix-keyerror-in-docling
copilot/fix-page-range-bug
cp_main_20250602
demo
dev-granite-docling-table
dev/add-asr-pipeline
dev/add-granite-docling-extension
dev/add-granite-docling-preview
dev/add-r2l-tests
dev/add-reading-order-model
dev/add-two-stage-vlm
dev/analysis-for-granite-docling
dev/doctag_backend
dev/fix_msword_backend_identify_text_after_image
dev/table-orientation
dev/update-html-parser-with-h1
dev/update-to-latest-docling-parse-again
docs/add-extraction-script
elh/update_2stage_inference
extend-metadata-in-examples
gh-pages
main
mao/doctags
mly/smol-docling-integration
nli/fix_glm_utils
nli/fix_ocr_tests
nli/layout_dfine
nli/layout_heron2
nli/layout_rtdetr_v2
nli/layoutmodel_improvements
nli/tesseract_ocr_models
ocr-enrichment
pretest-core-2-51-0
propagate-core-fixes-20250502
remodel-lists-2
revert-803-refactor_viz
rtdl/docx_latex
rtdl/drawingml_import
vku/uspto_meta
#1
#10
#100
#101
#1010
#1015
#1017
#102
#1021
#1024
#1027
#103
#1038
#1039
#1040
#1041
#1051
#1052
#1053
#1054
#1055
#1057
#1061
#1062
#1077
#1096
#1097
#1098
#11
#110
#1100
#1106
#1107
#111
#1114
#1115
#1118
#1124
#1130
#1140
#1141
#1147
#1150
#1152
#1154
#1156
#1158
#1160
#1165
#1167
#117
#1173
#118
#1182
#1183
#1194
#1196
#1197
#1199
#12
#120
#1201
#121
#1210
#122
#1220
#1222
#1223
#123
#1231
#1238
#1239
#1241
#1244
#1247
#1248
#1261
#1263
#1268
#1270
#1286
#129
#1294
#1295
#13
#131
#1313
#1315
#1316
#1319
#132
#1320
#1326
#1328
#1332
#1334
#1337
#134
#1340
#1346
#135
#1350
#1355
#1359
#1363
#1371
#1375
#1377
#1378
#1379
#138
#1381
#1382
#1383
#1389
#139
#1392
#1399
#14
#140
#1400
#1402
#141
#1411
#1415
#1416
#1419
#1427
#1428
#143
#1430
#1436
#1442
#1449
#145
#1458
#1459
#1463
#1465
#1486
#149
#1490
#1492
#1494
#1496
#15
#150
#1500
#151
#1511
#1512
#152
#1520
#1523
#1524
#1525
#1526
#1527
#1528
#153
#1530
#1536
#1538
#154
#1548
#1549
#155
#1551
#1553
#1556
#1559
#156
#1560
#1561
#1563
#1566
#157
#1570
#1576
#1577
#158
#1582
#1583
#1587
#1589
#159
#1593
#1596
#16
#160
#1600
#1609
#161
#1610
#1615
#1617
#1619
#162
#1636
#164
#1658
#1659
#1660
#1663
#1664
#1665
#1667
#1671
#1673
#1676
#1679
#168
#1683
#1684
#1688
#1689
#169
#1691
#1698
#17
#170
#1700
#1701
#1706
#1707
#171
#1711
#1717
#1718
#1723
#1724
#1725
#1728
#173
#1734
#1735
#1745
#1746
#1747
#175
#1759
#1763
#1769
#177
#1772
#1775
#178
#179
#1791
#1795
#18
#180
#1802
#1804
#1808
#1810
#1812
#1815
#1816
#1819
#182
#1820
#1821
#1824
#1825
#1827
#183
#1836
#1838
#184
#1844
#1850
#1851
#1852
#1856
#1857
#186
#1863
#1866
#1867
#187
#1870
#1874
#1875
#1876
#188
#1884
#189
#1897
#1898
#1898
#19
#190
#1902
#1904
#1905
#1907
#1908
#1910
#1912
#1914
#1917
#1923
#1925
#1926
#1928
#193
#1931
#1934
#1937
#194
#1940
#1943
#1948
#1951
#1952
#196
#1960
#1969
#1970
#1971
#1975
#1981
#1982
#1984
#1986
#1988
#1989
#1992
#1995
#2
#20
#2001
#2002
#2006
#2011
#2017
#2018
#2024
#203
#2031
#2039
#2042
#2048
#2061
#2068
#2069
#2078
#2079
#2083
#2084
#2084
#2088
#2093
#2094
#2095
#2095
#21
#2100
#2105
#2106
#2110
#2111
#2112
#2113
#2114
#2114
#2122
#2123
#2124
#2126
#2131
#2132
#2133
#2138
#214
#2141
#2146
#2154
#2155
#2165
#2166
#2169
#217
#217
#2171
#2178
#218
#218
#2183
#2185
#2187
#219
#2199
#22
#2200
#2208
#2212
#2218
#2219
#2227
#2227
#2231
#2234
#2237
#2238
#224
#2242
#2244
#2251
#2252
#226
#2262
#2264
#2265
#2266
#2272
#228
#2281
#2284
#2284
#2287
#2288
#229
#2291
#2294
#2304
#2309
#2313
#2315
#2322
#2323
#2324
#233
#2339
#234
#2340
#2341
#235
#2357
#2359
#2361
#2365
#2366
#2371
#2372
#2373
#2378
#2378
#2382
#2383
#2388
#2391
#2394
#240
#2401
#2403
#2403
#2404
#2407
#2409
#2409
#241
#2410
#2411
#2413
#2415
#2418
#2420
#2421
#2422
#2423
#2424
#2425
#2426
#2427
#2429
#2430
#2431
#2433
#2436
#2441
#2442
#2445
#2445
#2447
#2452
#2453
#2454
#2458
#2459
#2468
#2473
#2474
#248
#2484
#2486
#2488
#2488
#2489
#2498
#2499
#2501
#2502
#2503
#251
#2511
#2512
#2513
#2517
#2519
#2520
#2521
#2526
#2527
#2530
#2531
#2533
#2541
#2543
#2546
#2548
#2549
#2553
#2563
#2569
#2571
#2573
#2578
#2582
#2585
#2587
#2587
#2588
#2589
#259
#2596
#2599
#26
#2600
#2605
#2613
#2618
#2622
#2622
#2624
#2627
#2636
#2637
#2638
#2639
#2640
#2641
#2644
#2645
#2645
#2648
#2649
#2651
#2653
#2656
#2658
#2659
#2660
#2662
#2664
#2665
#2669
#2671
#2674
#2676
#2676
#2678
#2678
#2682
#2682
#2689
#2692
#2693
#27
#2706
#2707
#2708
#2712
#2716
#2717
#2720
#2721
#2721
#2723
#2723
#2728
#2735
#2738
#2738
#2739
#2740
#2740
#2741
#2741
#275
#276
#279
#28
#282
#286
#29
#290
#3
#302
#305
#307
#31
#310
#312
#314
#315
#316
#319
#32
#320
#322
#323
#325
#33
#330
#332
#334
#339
#34
#340
#341
#349
#35
#350
#36
#37
#371
#374
#375
#378
#379
#38
#384
#388
#39
#392
#393
#396
#4
#40
#401
#407
#408
#409
#415
#416
#42
#429
#43
#430
#432
#44
#442
#449
#45
#451
#456
#457
#46
#466
#467
#468
#47
#472
#474
#475
#482
#484
#487
#49
#490
#492
#495
#496
#497
#5
#50
#500
#501
#502
#504
#51
#511
#512
#513
#514
#517
#52
#528
#53
#530
#531
#532
#533
#534
#537
#54
#544
#549
#550
#551
#552
#555
#556
#557
#558
#56
#569
#57
#58
#59
#593
#6
#604
#606
#608
#613
#615
#616
#618
#624
#628
#63
#630
#631
#633
#642
#65
#650
#655
#656
#662
#675
#679
#68
#69
#691
#693
#694
#695
#697
#698
#7
#70
#700
#701
#702
#708
#71
#716
#717
#718
#719
#72
#733
#735
#739
#742
#75
#752
#759
#769
#772
#777
#783
#786
#788
#79
#793
#8
#80
#800
#801
#803
#804
#805
#808
#81
#811
#814
#815
#816
#817
#818
#819
#82
#820
#821
#824
#825
#826
#827
#83
#830
#831
#832
#837
#839
#84
#841
#842
#843
#850
#852
#853
#854
#855
#856
#857
#86
#862
#868
#869
#872
#873
#874
#875
#876
#878
#88
#880
#881
#883
#896
#897
#90
#901
#903
#905
#91
#910
#912
#916
#919
#92
#929
#93
#932
#935
#940
#941
#945
#948
#949
#95
#951
#958
#96
#965
#966
#967
#98
#99
#999
v0.1.1
v0.2.0
v0.3.0
v0.3.1
v0.4.0
v1.0.0
v1.0.1
v1.0.2
v1.1.0
v1.1.1
v1.1.2
v1.10.0
v1.11.0
v1.12.0
v1.12.1
v1.12.2
v1.13.0
v1.13.1
v1.14.0
v1.15.0
v1.16.0
v1.16.1
v1.17.0
v1.18.0
v1.19.0
v1.19.1
v1.2.0
v1.2.1
v1.20.0
v1.3.0
v1.4.0
v1.5.0
v1.6.0
v1.6.1
v1.6.2
v1.6.3
v1.7.0
v1.7.1
v1.8.0
v1.8.1
v1.8.2
v1.8.3
v1.8.4
v1.8.5
v1.9.0
v2.0.0
v2.1.0
v2.10.0
v2.11.0
v2.12.0
v2.13.0
v2.14.0
v2.15.0
v2.15.1
v2.16.0
v2.17.0
v2.18.0
v2.19.0
v2.2.0
v2.2.1
v2.20.0
v2.21.0
v2.22.0
v2.23.0
v2.23.1
v2.24.0
v2.25.0
v2.25.1
v2.25.2
v2.26.0
v2.27.0
v2.28.0
v2.28.1
v2.28.2
v2.28.3
v2.28.4
v2.29.0
v2.3.0
v2.3.1
v2.30.0
v2.31.0
v2.31.1
v2.31.2
v2.32.0
v2.33.0
v2.34.0
v2.35.0
v2.36.0
v2.36.1
v2.37.0
v2.38.0
v2.38.1
v2.39.0
v2.4.0
v2.4.1
v2.4.2
v2.40.0
v2.41.0
v2.42.0
v2.42.1
v2.42.2
v2.43.0
v2.44.0
v2.45.0
v2.46.0
v2.47.0
v2.47.1
v2.48.0
v2.49.0
v2.5.0
v2.5.1
v2.5.2
v2.50.0
v2.51.0
v2.52.0
v2.53.0
v2.54.0
v2.55.0
v2.55.1
v2.56.0
v2.56.1
v2.57.0
v2.58.0
v2.59.0
v2.6.0
v2.60.0
v2.60.1
v2.61.0
v2.61.1
v2.61.2
v2.62.0
v2.63.0
v2.64.0
v2.7.0
v2.7.1
v2.8.0
v2.8.1
v2.8.2
v2.8.3
v2.9.0
-
af18215714
Rename docling backend to v4
Christoph Auer
2025-03-14 12:35:06 +01:00 -
fa16b12316
chore: move to docling-project org (#1160)
Michele Dolfi
2025-03-14 12:35:29 +01:00 -
b77f73beec
Text fixes, new test data
Christoph Auer
2025-03-14 11:44:09 +01:00 -
f94da44ec5
fix(html): handle nested empty lists (#1154)
Cesar Berrospi Ramis
2025-03-13 16:56:58 +01:00 -
e00f362405
Update tests, use TextCell.from_ocr property
Christoph Auer
2025-03-13 16:04:08 +01:00 -
0945973b79
fix: use first table row as col headers (#1156)
Panos Vagenas
2025-03-13 15:34:18 +01:00 -
6eb718f849
feat: equations to latex in MSWord backend (with inline groups) (#1114)
Rafael Teixeira de Lima
2025-03-13 15:12:22 +01:00 -
aa92a57fa9
fix: Pass tests, update docling-core to 2.22.0 (#1150)
Cesar Berrospi Ramis
2025-03-13 09:45:55 +01:00 -
6e06040da6
Fix tests
Christoph Auer
2025-03-12 20:04:17 +01:00 -
f1cce8ff07
Ground-truth files updated
Christoph Auer
2025-03-12 19:57:18 +01:00 -
519bc43e47
fix: update docling-core to 2.22.0
Cesar Berrospi Ramis
2025-03-12 19:38:03 +01:00 -
90b0f73d06
Update locks
Christoph Auer
2025-03-12 16:54:23 +01:00 -
9ebd7108f2
Add back DoclingParse v1 backend, pipeline options
Christoph Auer
2025-03-12 16:28:25 +01:00 -
8a45a2cafa
update test units
Christoph Auer
2025-03-12 12:07:03 +01:00 -
15282547cb
update test cases
Christoph Auer
2025-03-12 11:04:48 +01:00 -
18b4991aa4
Reset tests
Christoph Auer
2025-03-11 16:34:38 +01:00 -
a5089ef8f6
Merge branch 'cau/docling-parse-api' of github.com:DS4SD/docling into cau/docling-parse-api
Christoph Auer
2025-03-11 16:31:50 +01:00 -
1b9fcf0edf
Fix streams
Christoph Auer
2025-03-11 16:24:49 +01:00 -
31c86613e5
Fix streams
Christoph Auer
2025-03-11 16:24:49 +01:00 -
fbcde2cdeb
Merge branch 'main' of github.com:DS4SD/docling into cau/docling-parse-api
Christoph Auer
2025-03-11 16:06:55 +01:00 -
f411772569
Fixes and test updates
Christoph Auer
2025-03-11 16:06:28 +01:00 -
0dd596ff09
Draft implementation of Doctag backend
dev/doctag_backend
Maksym Lysak
2025-03-11 14:02:34 +01:00 -
78353f1697
Use docling-core with docling-parse types
Christoph Auer
2025-03-11 13:37:24 +01:00 -
17c5bf1242
chore: bump version to 2.26.0 [skip ci]
v2.26.0
github-actions[bot]
2025-03-11 11:12:43 +00:00 -
eb97357b05
feat: Use new TableFormer model weights and default to accurate model version (#1100)
Christoph Auer
2025-03-11 10:53:49 +01:00 -
5e30381c0d
perf: New revision code formula model and document picture classifier (#1140)
Matteo
2025-03-11 09:15:28 +00:00 -
099aa4da83
Updates for DoclingParseV3DocumentBackend
Christoph Auer
2025-03-10 17:11:20 +01:00 -
4d64c4c0b6
fix(CLI): fix help message for abort options (#1130)
Michele Dolfi
2025-03-07 14:47:49 +01:00 -
e1c49ad727
docs: add description of DOCLING_ARTIFACTS_PATH env var (#1124)
Michele Dolfi
2025-03-06 07:30:07 +01:00 -
a3c957ca6b
chore: bump version to 2.25.2 [skip ci]
v2.25.2
github-actions[bot]
2025-03-05 14:51:57 +00:00 -
c56ab3a66b
fix: Proper handling of orphan IDs in layout postprocessing (#1118)
Christoph Auer
2025-03-05 14:30:59 +01:00 -
655e95dd72
Upgrading docling core and adding groups
rtdl/docx_latex
Rafael Teixeira de Lima
2025-03-04 17:18:40 +01:00 -
5630c6b8fd
Merge branch 'main' into rtdl/docx_latex
Rafael Teixeira de Lima
2025-03-04 16:51:53 +01:00 -
357d41cc47
docs: Enrichment models (#1097)
Michele Dolfi
2025-03-04 14:24:38 +01:00 -
b1e79cadc7
chore: bump version to 2.25.1 [skip ci]
v2.25.1
github-actions[bot]
2025-03-03 00:56:40 +00:00 -
0c1e9391de
chore: use gh cache for huggingface models (#1096)
Michele Dolfi
2025-03-03 00:13:47 +01:00 -
8dc0562542
fix: enable locks for threadsafe pdfium (#1052)
Michele Dolfi
2025-03-02 20:06:44 +01:00 -
e25d557c06
refactor: add the contentlayer to html-backend (#1040)
Peter W. J. Staar
2025-03-02 10:37:53 -05:00 -
db3ceefd4a
docs: improve docs on token limit warning triggered by HybridChunker (#1077)
Panos Vagenas
2025-02-28 14:54:46 +01:00 -
de7b963b09
fix(html): use 'start' attribute when parsing ordered lists from HTML docs (#1062)
Cesar Berrospi Ramis
2025-02-27 09:46:57 +01:00 -
37dd8c1cc7
chore: bump version to 2.25.0 [skip ci]
v2.25.0
github-actions[bot]
2025-02-26 14:16:15 +00:00 -
3c9fe76b70
feat: [Experimental] Introduce VLM pipeline using HF AutoModelForVision2Seq, featuring SmolDocling model (#1054)
Christoph Auer
2025-02-26 14:43:26 +01:00 -
ab683e4fb6
feat(cli): add option for downloading all models, refine help messages (#1061)
Panos Vagenas
2025-02-26 13:27:29 +01:00 -
e197225739
fix: vlm using artifacts path (#1057)
Michele Dolfi
2025-02-26 08:33:50 +01:00 -
c84b973959
docs: extend chunking docs, add FAQ on token limit (#1053)
Panos Vagenas
2025-02-25 13:07:38 +01:00 -
1c75b52f85
re-built poetry.lock
mly/smol-docling-integration
Maksym Lysak
2025-02-24 17:37:35 +01:00 -
9ecec1d330
Updated poetry.lock
Maksym Lysak
2025-02-24 17:27:50 +01:00 -
923f766ada
Replaced remaining strings to appropriate enums
Maksym Lysak
2025-02-24 16:54:59 +01:00 -
a095a7c5b7
removing changes from base_pipeline
Maksym Lysak
2025-02-24 15:13:59 +01:00 -
a7a1f32b10
Added example on how to get original predicted doctags in minimal_smol_docling
Maksym Lysak
2025-02-24 14:39:18 +01:00 -
1dbedcbb4e
removed pipeline_options.generate_table_images from vlm_pipeline (deprecated in the pipelines)
Maksym Lysak
2025-02-24 14:17:06 +01:00 -
0c60ef199a
Moved keep_backend = True to vlm pipeline
Maksym Lysak
2025-02-13 17:53:03 +01:00 -
853544ba11
Addressing PR comments, added enabled property to SmolDocling, and related VLM pipeline option, few other minor things
Maksym Lysak
2025-02-13 17:19:53 +01:00 -
b0935daec4
Removed special html code wrapping when exporting to docling document, cleaned up comments
Maksym Lysak
2025-02-13 10:29:37 +01:00 -
b12f5ba80f
removed minimal_smol_docling example from CI checks
Maksym Lysak
2025-02-13 09:42:45 +01:00 -
66532eadb6
More elegant solution in removing the input prompt
Maksym Lysak
2025-02-12 18:48:48 +01:00 -
e486eb1720
Cleaned up unnecessary logging
Maksym Lysak
2025-02-12 17:56:37 +01:00 -
55fa4eb4e3
Fix repo id
Christoph Auer
2025-02-12 17:09:56 +01:00 -
6f9f4f4aee
Update minimal smoldocling example
Christoph Auer
2025-02-12 17:07:00 +01:00 -
b1df461ca8
Added captions for the images for SmolDocling assembly code, improved provenance definition for all elements
Maksym Lysak
2025-02-11 16:42:23 +01:00 -
d7abe1b1cd
Updated example of Smol Docling usage
Maksym Lysak
2025-02-11 13:53:19 +01:00 -
479ee239aa
New assembly code for latest model revision, updated prompt and parsing of doctags, updated logging
Maksym Lysak
2025-02-11 13:34:14 +01:00 -
7c4ab5c716
Moved artifacts_path for SmolDocling into vlm_options instead of global pipeline option
Maksym Lysak
2025-01-21 18:00:05 +01:00 -
f2751e11f9
Introduced SmolDoclingOptions to configure model parameters (such as query and artifacts path) via client code, see example in minimal_smol_docling. Provisioning for other potential vlm all-in-one models.
Maksym Lysak
2025-01-21 17:37:11 +01:00 -
88b9ac6706
Fixing doctags starting tag, that broke elements on first line during assembly
Maksym Lysak
2025-01-21 11:14:55 +01:00 -
0fe12d819a
Updated vlm pipeline assembly and smol docling model code to support updated doctags
Maksym Lysak
2025-01-17 17:54:55 +01:00 -
f6d123a01c
Flipped keep_backend to True for vlm_pipeline assembly to work
Maksym Lysak
2025-01-16 16:51:27 +01:00 -
9901729d8c
Exposed "force_backend_text" as pipeline parameter
Maksym Lysak
2025-01-16 14:23:59 +01:00 -
0dc3ac43b1
Added capability for vlm_pipeline to grab text from preconfigured backend
Maksym Lysak
2025-01-16 10:44:49 +01:00 -
e0929781f4
Added tokens/sec measurement, improved example
Maksym Lysak
2025-01-15 10:22:48 +01:00 -
437053572d
Replaced hardcoded otsl tokens with the ones from docling-core tokens.py enum
Maksym Lysak
2025-01-14 16:07:37 +01:00 -
2a43c199d5
Cleaned up logs, added pages to vlm_pipeline, basic timing per page measurement in smol_docling models
Maksym Lysak
2025-01-14 14:04:47 +01:00 -
61bb9dbba2
Properly propagating image data per page, together with predicted tags in VLM pipeline. This enables correct figure extraction and page numbers in provenances
Maksym Lysak
2025-01-13 15:21:19 +01:00 -
01c46e24b1
Fix for table span compute in vlm_pipeline
Maksym Lysak
2025-01-10 16:30:12 +01:00 -
ef079e4e78
Enabled figure support in vlm_pipeline
Maksym Lysak
2025-01-10 13:56:46 +01:00 -
1b968e4984
Fixes to preserve page image and demo export to html
Maksym Lysak
2025-01-10 10:50:35 +01:00 -
3c4c647615
WIP, first working code for inference of SmolDocling, and vlm pipeline assembly code, example included.
Maksym Lysak
2025-01-09 18:41:00 +01:00 -
03c8d45790
wip smolDocling inference and vlm pipeline
Maksym Lysak
2025-01-09 14:43:04 +01:00 -
1b0ead6907
fix(html): Parse text in div elements as TextItem (#1041)
Cesar Berrospi Ramis
2025-02-24 12:38:29 +01:00 -
dc3a388aa2
Skeleton for SmolDocling model and VLM Pipeline
Christoph Auer
2025-01-08 10:16:54 +01:00 -
1d17e7397a
test: avoid testing exact JSON in CSV backend (#1038)
Suehtam
2025-02-24 07:10:40 +00:00 -
d8a81c3168
chore: bump version to 2.24.0 [skip ci]
v2.24.0
github-actions[bot]
2025-02-20 18:31:20 +00:00 -
c93e36988f
feat: Implement new reading-order model (#916)
Christoph Auer
2025-02-20 17:51:17 +01:00 -
c031a7ae47
chore: bump version to 2.23.1 [skip ci]
v2.23.1
github-actions[bot]
2025-02-20 16:26:41 +00:00 -
1ac010354f
test: avoid testing exact JSON (#1027)
Cesar Berrospi Ramis
2025-02-20 16:20:07 +01:00 -
6796f0a132
fix: Runtime error when Pandas Series is not always of string type (#1024)
fanszoro
2025-02-20 22:41:41 +08:00 -
dfcc30dddb
chore: Update tests and lockfile (#1021)
Christoph Auer
2025-02-19 16:51:53 +01:00 -
27c04007bc
docs: revamp picture description example (#1015)
Panos Vagenas
2025-02-19 11:28:54 +01:00 -
7450050ace
refactor: upgrade BeautifulSoup4 with type hints (#999)
Cesar Berrospi Ramis
2025-02-18 11:30:47 +01:00 -
dadff50589
fix: Disable the TOKENIZERS_PARALLELISM in test_e2e_ocr_conversion.py to avoid warning messages from HF
nli/fix_ocr_tests
Nikos Livathinos
2025-02-18 10:58:11 +01:00 -
75db61127c
chore: bump version to 2.23.0 [skip ci]
v2.23.0
github-actions[bot]
2025-02-17 14:22:49 +00:00 -
6e75f0b5d3
fix: Revise DocTags, fix iterate_items to output content_layer in items (#965)
Maxim Lysak
2025-02-17 14:11:55 +01:00 -
77eb77bdc2
feat: Support cuda:n GPU device allocation (#694)
Ahmed Nassar
2025-02-17 11:31:13 +01:00 -
428b656793
feat(xml-jats): parse XML JATS documents (#967)
Cesar Berrospi Ramis
2025-02-17 10:43:31 +01:00 -
e1436a8b05
test: validate actual docitems in tests (#966)
Michele Dolfi
2025-02-14 17:47:53 +01:00 -
b5b1ddca3b
chore: Restore the orphan clusters
Nikos Livathinos
2025-02-14 11:13:54 +01:00 -
ffbde1d1b0
chore: bump version to 2.22.0 [skip ci]
v2.22.0
github-actions[bot]
2025-02-14 08:53:20 +00:00 -
00d9405b0a
feat: Add support for CSV input with new backend to transform CSV files to DoclingDocument (#945)
Tobias Strebitzer
2025-02-14 15:55:09 +08:00 -
7493d5b01f
docs: update example Dockerfile with download CLI (#929)
Michele Dolfi
2025-02-13 14:19:50 +01:00 -
af19c03f6e
fix: update Pillow constraints (#958)
Michele Dolfi
2025-02-13 14:19:37 +01:00