-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathreferences.bib
671 lines (618 loc) · 39.3 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
@inproceedings{eschauzier_quweda_2023,
author = {{Eschauzier, Ruben and Taelman, Ruben and Verborgh, Ruben}},
booktitle = {{7th Workshop on Storing, Querying and Benchmarking Knowledge Graphs (QuWeDa) at ISWC 2023}},
issn = {{1613-0073}},
language = {{und}},
location = {{Athens, Greece}},
pages = {{26--33}},
title = {{How does the link queue evolve during traversal-based query processing?}},
year = {{2023}},
}
@article{hartig2016Ldql,
title = {LDQL: A query language for the Web of Linked Data},
volume = {41},
ISSN = {1570-8268},
url = {http://dx.doi.org/10.1016/j.websem.2016.10.001},
DOI = {10.1016/j.websem.2016.10.001},
journal = {Journal of Web Semantics},
publisher = {Elsevier BV},
author = {Hartig, Olaf and Pérez, Jorge},
year = {2016},
month = dec,
pages = {9–29}
}
@Article{Schmidt2008FoundationsOS,
author = {Michael Schmidt},
journal = {ArXiv},
title = {Foundations of SPARQL query optimization},
year = {2008},
volume = {abs/0812.3788},
file = {:articles/Foundations of SPARQL Query Optimization.pdf:PDF},
priority = {prio3},
}
@Article{Sklavos2022EstimatingTC,
author = {Anton Sklavos and Pavlos Fafalios and Yannis Tzitzikas},
journal = {ArXiv},
title = {Estimating the Cost of Executing Link Traversal based SPARQL Queries},
year = {2022},
volume = {abs/2210.09100},
file = {:articles/Estimating the Cost of Executing Link Traversal based SPARQL Queries.pdf:PDF},
groups = {Link traversal, optimisation technique},
ranking = {rank3},
readstatus = {read},
}
@Article{Komamizu2017CROISSANTCR,
author = {Takahiro Komamizu and Toshiyuki Amagasa and Hiroyuki Kitagawa},
journal = {Proceedings of the 19th International Conference on Information Integration and Web-based Applications \& Services},
title = {CROISSANT: centralized relational interface for web-scale SPARQL endpoints},
year = {2017},
file = {:articles/CROISSANT Centralized Relational Interface for Web-scale SPARQL Endpoints.pdf:PDF},
groups = {optimisation technique, push down},
priority = {prio1},
ranking = {rank3},
readstatus = {read},
}
@InProceedings{Ghita2020WhiteboxCL,
author = {Bogdan Vladimir Ghita and Diego G. Tom{\'e} and Peter A. Boncz},
booktitle = {Conference on Innovative Data Systems Research},
title = {White-box Compression: Learning and Exploiting Compact Table Representations},
year = {2020},
file = {:articles/White-box Compression Learning and Exploiting Compact Table Representations.pdf:PDF},
}
@Article{Atre2022TheCO,
author = {Medha Atre},
journal = {Proceedings of the ACM Web Conference 2022},
title = {The Case of SPARQL UNION, FILTER and DISTINCT},
year = {2022},
}
@InProceedings{Zahmatkesh2016WhenAF,
author = {Shima Zahmatkesh and Emanuele Della Valle and D. Dell'Aglio},
booktitle = {International Conference on Web Engineering},
title = {When a FILTER Makes the Difference in Continuously Answering SPARQL Queries on Streaming and Quasi-Static Linked Data},
year = {2016},
file = {:articles/When a FILTER Makes the Difference in Continuously Answering SPARQL.pdf:PDF},
groups = {optimisation technique},
priority = {prio2},
ranking = {rank2},
readstatus = {skimmed},
}
@Article{Lin2022ATM,
author = {Xiaoqing Lin and Dongyang Jiang},
journal = {J. Sensors},
title = {A Two-Phase Method for Optimization of the SPARQL Query},
year = {2022},
pages = {1-12},
volume = {2022},
file = {:articles/A Two-Phase Method for Optimization of the SPARQL Query.pdf:PDF},
priority = {prio1},
ranking = {rank2},
readstatus = {skimmed},
}
@InProceedings{10.1145/1804669.1804675,
author = {Schmidt, Michael and Meier, Michael and Lausen, Georg},
booktitle = {Proceedings of the 13th International Conference on Database Theory},
title = {Foundations of SPARQL Query Optimization},
year = {2010},
address = {New York, NY, USA},
pages = {4–33},
publisher = {Association for Computing Machinery},
series = {ICDT '10},
abstract = {We study fundamental aspects related to the efficient processing of the SPARQL query language for RDF, proposed by the W3C to encode machine-readable information in the Semantic Web. Our key contributions are (i) a complete complexity analysis for all operator fragments of the SPARQL query language, which -- as a central result -- shows that the SPARQL operator Optional alone is responsible for the PSpace-completeness of the evaluation problem, (ii) a study of equivalences over SPARQL algebra, including both rewriting rules like filter and projection pushing that are well-known from relational algebra optimization as well as SPARQL-specific rewriting schemes, and (iii) an approach to the semantic optimization of SPARQL queries, built on top of the classical chase algorithm. While studied in the context of a theoretically motivated set semantics, almost all results carry over to the official, bag-based semantics and therefore are of immediate practical relevance.},
doi = {10.1145/1804669.1804675},
file = {:articles/Foundations of SPARQL Query Optimization_2.pdf:PDF},
isbn = {9781605589473},
keywords = {complexity, RDF, query optimization, SPARQL algebra, SPARQL, semantic query optimization},
location = {Lausanne, Switzerland},
numpages = {30},
url = {https://doi.org/10.1145/1804669.1804675},
}
@Article{Tinedo2019LamdaFlowAP,
author = {Ra{\'u}l Gracia Tinedo and Marc S{\'a}nchez Artigas and Pedro Garc{\'i}a L{\'o}pez and Yosef Moatti and Filip Gluszak},
journal = {2019 19th IEEE/ACM International Symposium on Cluster, Cloud and Grid Computing (CCGRID)},
title = {Lamda-Flow: Automatic Pushdown of Dataflow Operators Close to the Data},
year = {2019},
pages = {112-121},
file = {:articles/λFlow Automatic Pushdown of Dataflow Operators Close to the Data.pdf:PDF},
priority = {prio3},
}
@Article{10.1145/1567274.1567278,
author = {P\'{e}rez, Jorge and Arenas, Marcelo and Gutierrez, Claudio},
journal = {ACM Trans. Database Syst.},
title = {Semantics and Complexity of SPARQL},
year = {2009},
issn = {0362-5915},
month = {sep},
number = {3},
volume = {34},
abstract = {SPARQL is the standard language for querying RDF data. In this article, we addresssystematically the formal study of the database aspects of SPARQL, concentrating in its graph pattern matching facility. We provide a compositional semantics for the core part of SPARQL, and study the complexity of the evaluation of several fragments of the language. Among other complexity results, we show that the evaluation of general SPARQL patterns is PSPACE-complete. We identify a large class of SPARQL patterns, defined by imposing a simple and natural syntactic restriction, where the query evaluation problem can be solved more efficiently. This restriction gives rise to the class of well-designed patterns. We show that the evaluation problem is coNP-complete for well-designed patterns. Moreover, we provide several rewriting rules for well-designed patterns whose application may have a considerable impact in the cost of evaluating SPARQL queries.},
address = {New York, NY, USA},
articleno = {16},
doi = {10.1145/1567274.1567278},
file = {:articles/Semantics and Complexity of SPARQL.pdf:PDF},
issue_date = {August 2009},
keywords = {SPARQL, semantic Web, Complexity, query language, RDF},
numpages = {45},
publisher = {Association for Computing Machinery},
url = {https://doi.org/10.1145/1567274.1567278},
}
@Article{Taelman2019,
author = {Taelman, Ruben and Colpaert, Pieter and Mannens, Erik and Verborgh, Ruben},
journal = {Semantic Web},
title = {Generating public transport data based on population distributions for RDF benchmarking},
year = {2019},
issn = {2210-4968},
note = {2},
pages = {305-328},
volume = {10},
abstract = {When benchmarking rdf data management systems such as public transport route planners, system evaluation needs to happen under various realistic circumstances, which requires a wide range of datasets with different properties. Real-world datasets are almost ideal, as they offer these realistic circumstances, but they are often hard to obtain and inflexible for testing. For these reasons, synthetic dataset generators are typically preferred over real-world datasets due to their intrinsic flexibility. Unfortunately, many synthetic dataset that are generated within benchmarks are insufficiently realistic, raising questions about the generalizability of benchmark results to real-world scenarios. In order to benchmark geospatial and temporal rdf data management systems such as route planners with sufficient external validity and depth, we designed p od igg , a highly configurable generation algorithm for synthetic public transport datasets with realistic geospatial and temporal characteristics comparable to those of their real-world variants. The algorithm is inspired by real-world public transit network design and scheduling methodologies. This article discusses the design and implementation of p od igg and validates the properties of its generated datasets. Our findings show that the generator achieves a sufficient level of realism, based on the existing coherence metric and new metrics we introduce specifically for the public transport domain. Thereby, p od igg provides a flexible foundation for benchmarking rdf data management systems with geospatial and temporal data.},
doi = {10.3233/SW-180319},
file = {:articles/Generating Public Transport Data based on Population Distributions for RDF Benchmarking.pdf:PDF},
groups = {Link traversal, Benchmark},
keywords = {Public Transport; dataset generator; benchmarking; rdf; linked data},
publisher = {IOS Press},
ranking = {rank4},
readstatus = {skimmed},
url = {https://doi.org/10.3233/SW-180319},
}
@InProceedings{inproceedings,
author = {Bellandi, Andrea and Furletti, Barbara and Grossi, Valerio and Romei, Andrea},
title = {Ontology-Driven Association Rule Extraction: A Case Study},
year = {2007},
month = {01},
volume = {298},
file = {:articles/Ontology-Driven Association Rule Extraction A Case Study.pdf:PDF},
journal = {CEUR Workshop Proceedings},
}
@InProceedings{Lynden2013AHA,
author = {Steven J. Lynden and Isao Kojima and Akiyoshi Matono and Akihito Nakamura and Makoto Yui},
booktitle = {LDOW},
title = {A Hybrid Approach to Linked Data Query Processing with Time Constraints},
year = {2013},
file = {:articles/A Hybrid Approach to Linked Data Query Processing with Time Constraints.pdf:PDF},
groups = {Link traversal},
}
@Article{Umbrich2014LinkTQ,
author = {J{\"u}rgen Umbrich and Aidan Hogan and Axel Polleres and S. Decker},
journal = {Semantic Web},
title = {Link traversal querying for a diverse Web of Data},
year = {2014},
pages = {585-624},
volume = {6},
file = {:articles/Link traversal querying for a diverse Web of Data.pdf:PDF},
groups = {Link traversal},
}
@InProceedings{Hartig2016,
author = {Hartig, Olaf and {\"O}zsu, M. Tamer},
booktitle = {The Semantic Web -- ISWC 2016},
title = {Walking Without a Map: Ranking-Based Traversal for Querying Linked Data},
year = {2016},
address = {Cham},
editor = {Groth, Paul and Simperl, Elena and Gray, Alasdair and Sabou, Marta and Kr{\"o}tzsch, Markus and Lecue, Freddy and Fl{\"o}ck, Fabian and Gil, Yolanda},
pages = {305--324},
publisher = {Springer International Publishing},
abstract = {The traversal-based approach to execute queries over Linked Data on the WWW fetches data by traversing data links and, thus, is able to make use of up-to-date data from initially unknown data sources. While the downside of this approach is the delay before the query engine completes a query execution, user perceived response time may be improved significantly by returning as many elements of the result set as soon as possible. To this end, the query engine requires a traversal strategy that enables the engine to fetch result-relevant data as early as possible. The challenge for such a strategy is that the query engine does not know a priori which of the data sources discovered during the query execution will contain result-relevant data. In this paper, we investigate 14 different approaches to rank traversal steps and achieve a variety of traversal strategies. We experimentally study their impact on response times and compare them to a baseline that resembles a breadth-first traversal. While our experiments show that some of the approaches can achieve noteworthy improvements over the baseline in a significant number of cases, we also observe that for every approach, there is a non-negligible chance to achieve response times that are worse than the baseline.},
file = {:articles/Walking Without a Map Ranking-Based Traversal for Querying Linked Data.pdf:PDF},
groups = {Link traversal},
isbn = {978-3-319-46523-4},
ranking = {rank3},
readstatus = {skimmed},
}
@InProceedings{Tiddi2014WalkingLD,
author = {Ilaria Tiddi and Mathieu d’Aquin and Enrico Motta},
booktitle = {COLD},
title = {Walking Linked Data: a Graph Traversal Approach to Explain Clusters},
year = {2014},
file = {:articles/Walking Linked Data a Graph Traversal Approach to Explain Clusters.pdf:PDF},
groups = {Link traversal},
ranking = {rank1},
readstatus = {skimmed},
}
@InProceedings{Tiddi2014,
author = {Tiddi, Ilaria and d'Aquin, Mathieu and Motta, Enrico},
booktitle = {The Semantic Web: Trends and Challenges},
title = {Dedalo: Looking for Clusters Explanations in a Labyrinth of Linked Data},
year = {2014},
address = {Cham},
editor = {Presutti, Valentina and d'Amato, Claudia and Gandon, Fabien and d'Aquin, Mathieu and Staab, Steffen and Tordai, Anna},
pages = {333--348},
publisher = {Springer International Publishing},
abstract = {We present Dedalo, a framework which is able to exploit Linked Data to generate explanations for clusters. In general, any result of a Knowledge Discovery process, including clusters, is interpreted by human experts who use their background knowledge to explain them. However, for someone without such expert knowledge, those results may be difficult to understand. Obtaining a complete and satisfactory explanation becomes a laborious and time-consuming process, involving expertise in possibly different domains. Having said so, not only does the Web of Data contain vast amounts of such background knowledge, but it also natively connects those domains. While the efforts put in the interpretation process can be reduced with the support of Linked Data, how to automatically access the right piece of knowledge in such a big space remains an issue. Dedalo is a framework that dynamically traverses Linked Data to find commonalities that form explanations for items of a cluster. We have developed different strategies (or heuristics) to guide this traversal, reducing the time to get the best explanation. In our experiments, we compare those strategies and demonstrate that Dedalo finds relevant and sophisticated Linked Data explanations from different areas.},
file = {:articles/Dedalo Looking for Clusters Explanations in a Labyrinth of Linked Data.pdf:PDF},
groups = {Link traversal},
isbn = {978-3-319-07443-6},
ranking = {rank3},
readstatus = {skimmed},
}
@Misc{verborgh2020,
author = {Verborgh, Ruben and Taelman, Ruben},
title = {Guided Link-Traversal-Based Query Processing},
year = {2020},
copyright = {arXiv.org perpetual, non-exclusive license},
doi = {10.48550/ARXIV.2005.02239},
file = {:articles/Guided Link-Traversal-Based Query Processing.pdf:PDF},
keywords = {Databases (cs.DB), Information Retrieval (cs.IR), Social and Information Networks (cs.SI), FOS: Computer and information sciences, FOS: Computer and information sciences},
publisher = {arXiv},
ranking = {rank5},
readstatus = {read},
url = {https://arxiv.org/abs/2005.02239},
}
@Article{Umbrich2011ComparingDS,
author = {J{\"u}rgen Umbrich and Katja Hose and Marcel Karnstedt and A. Harth and Axel Polleres},
journal = {World Wide Web},
title = {Comparing data summaries for processing live queries over Linked Data},
year = {2011},
pages = {495-544},
volume = {14},
file = {:articles/Comparing data summaries for processing live queries over Linked Data.pdf:PDF},
groups = {Link traversal},
}
@Article{ColpaertMaterializedTREE,
author = {Pieter Colpaert},
title = {Building materializable querying interfaces with the TREE hypermedia specification},
year = {2022},
file = {:articles/Building materializable querying interfaces with the TREE hypermedia specification.pdf:PDF},
groups = {Link traversal fragmented RDF, fragmented RDF},
ranking = {rank5},
readstatus = {read},
url = {https://treecg.github.io/paper-materializable-interfaces/},
}
@Article{lancker2021LDS,
author = {Van Lancker, D. and
Colpaert, P. and
Delva, H. and
Van de Vyvere, B. and
Rojas Meléndez, J. and
Dedecker, R. and
Michiels, P. and
Buyle, R. and
De Craene, A. and
Verborgh, R.},
journal = {Proceedings 21th ICWE},
title = {Publishing base registries as Linked Data Event Streams},
year = {2021},
file = {:articles/Publishing base registries as Linked Data Event Streams.pdf:PDF},
groups = {Link traversal, fragmented RDF},
ranking = {rank3},
readstatus = {read},
url = {https://raw.githubusercontent.com/ddvlanck/Publishing-Base-Registries-As-LDES/master/Linked-Data-Event-Streams.pdf},
}
@Article{Hartig2013AnOO,
author = {Olaf Hartig},
journal = {Datenbank-Spektrum},
title = {An Overview on Execution Strategies for Linked Data Queries},
year = {2013},
pages = {89-99},
volume = {13},
file = {:articles/An Overview on Execution Strategies for Linked Data Queries.pdf:PDF},
groups = {Link traversal},
}
@InProceedings{Schmedding2011IncrementalSE,
author = {Florian Schmedding},
booktitle = {COLD},
title = {Incremental SPARQL Evaluation for Query Answering on Linked Data},
year = {2011},
file = {:articles/Incremental SPARQL Evaluation for Query.pdf:PDF},
groups = {Link traversal},
ranking = {rank4},
readstatus = {skimmed},
}
@InProceedings{Hartig2011ZeroKnowledgeQP,
author = {Olaf Hartig},
booktitle = {Extended Semantic Web Conference},
title = {Zero-Knowledge Query Planning for an Iterator Implementation of Link Traversal Based Query Execution},
year = {2011},
file = {:articles/Zero Knowledge Query Planning for an Iterator Implementation of Link Traversal Based Query Execution.pdf:PDF},
groups = {Link traversal},
readstatus = {skimmed},
}
@InProceedings{Hartig2009ExecutingSQ,
author = {Olaf Hartig and Christian Bizer and Johann-Christoph Freytag},
booktitle = {International Workshop on the Semantic Web},
title = {Executing SPARQL Queries over the Web of Linked Data},
year = {2009},
file = {:articles/Executing SPARQL Queries over the Web of Linked Data.pdf:PDF},
groups = {Link traversal},
}
@InProceedings{Hartig2014LinkedDQ,
author = {Olaf Hartig},
booktitle = {Linked Data Management},
title = {Linked Data Query Processing Based on Link Traversal},
year = {2014},
file = {:articles/Linked Data Query Processing.pdf:PDF},
groups = {Link traversal},
}
@InProceedings{taelman2023,
author = {Taelman, Ruben and Verborgh, Ruben},
booktitle = {Proceedings of the 22nd International Semantic Web Conference},
title = {Link Traversal Query Processing over Decentralized Environments with Structural Assumptions},
year = {2023},
month = {november},
groups = {Link traversal},
url = {https://comunica.github.io/Article-ISWC2023-SolidQuery/},
}
@TechReport{spec:shacl,
author = {Holger Knublauch, Dimitris Kontokostas},
institution = {W3C},
title = {Shapes Constraint Language (SHACL)},
year = {2017},
month = {July},
type = {Recommendation},
groups = {specification},
url = {https://www.w3.org/TR/shacl/},
}
@TechReport{spec:solid,
author = {Sarven Capadisli},
institution = {W3C Solid Community Group},
title = {Solid Technical Reports},
year = {2023},
month = {February},
type = {Recommendation},
groups = {specification},
url = {https://solidproject.org/TR/},
}
@TechReport{spec:ldp,
author = {Steve Speicher, John Arwe, Ashok Malhotra},
institution = {W3C},
title = {Linked Data Platform 1.0},
year = {2015},
month = {February},
type = {Recommendation},
groups = {specification},
url = {https://www.w3.org/TR/ldp/},
}
@TechReport{spec:typeIndex,
author = {Timea Turdean , John Arwe, Virginia Balseiro,Sarven Capadisli, Tim Berners-Lee},
institution = {W3C Solid Community Group},
title = {Type Indexes Version 1.0.0},
year = {2022},
month = {August},
type = {Recommendation},
groups = {specification},
url = {https://solid.github.io/type-indexes/},
}
@TechReport{spec:hydra,
title = {Hydra},
type = {Recommendation},
groups = {specification},
url = {https://www.hydra-cg.com/spec/latest/core/},
}
@TechReport{spec:as,
author = {James M Snell, Evan Prodromou},
institution = {W3C},
title = {Activity Streams 2.0},
year = {2017},
month = {May},
type = {Recommendation},
groups = {specification},
url = {https://www.w3.org/TR/activitystreams-core/},
}
@TechReport{spec:tree,
author = {Pieter Colpaert},
title = {The TREE hypermedia specification},
year = {2022},
month = {July},
type = {Recommendation},
groups = {specification},
url = {https://treecg.github.io/specification/},
}
@InProceedings{hartig2012,
author = {Hartig, Olaf and Freytag, Johann-Christoph},
booktitle = {Conference on Hypertext and Social Media},
title = {Foundations of Traversal Based Query Execution over Linked Data},
year = {2012},
address = {New York, NY, USA},
pages = {43–52},
publisher = {ACM},
series = {HT '12},
abstract = {Query execution over the Web of Linked Data has attracted much attention recently. A particularly interesting approach is link traversal based query execution which proposes to integrate the traversal of data links into the creation of query results. Hence -in contrast to traditional query execution paradigms- this does not assume a fixed set of relevant data sources beforehand; instead, the traversal process discovers data and data sources on the fly and, thus, enables applications to tap the full potential of the Web.While several authors have studied possibilities to implement the idea of link traversal based query execution and to optimize query execution in this context, no work exists that discusses theoretical foundations of the approach in general. Our paper fills this gap.We introduce a well-defined semantics for queries that may be executed using a link traversal based approach. Based on this semantics we formally analyze properties of such queries. In particular, we study the computability of queries as well as the implications of querying a potentially infinite Web of Linked Data. Our results show that query computation in general is not guaranteed to terminate and that for any given query it is undecidable whether the execution terminates. Furthermore, we define an abstract execution model that captures the integration of link traversal into the query execution process. Based on this model we prove the soundness and completeness of link traversal based query execution and analyze an existing implementation approach.},
doi = {10.1145/2309996.2310005},
file = {:articles/Foundations of Traversal Based Query Execution.pdf:PDF},
groups = {Link traversal},
isbn = {9781450313353},
keywords = {link traversal based query execution, computability, query semantics, web of data, linked data},
location = {Milwaukee, Wisconsin, USA},
numpages = {10},
ranking = {rank5},
readstatus = {read},
url = {https://doi.org/10.1145/2309996.2310005},
}
@Article{Verborgh2016TriplePF,
author = {Ruben Verborgh and Miel Vander Sande and Olaf Hartig and Jo Van Herwegen and Laurens De Vocht and Ben De Meester and Gerald Haesendonck and Pieter Colpaert},
journal = {J. Web Semant.},
title = {Triple Pattern Fragments: A low-cost knowledge graph interface for the Web},
year = {2016},
pages = {184-206},
volume = {37-38},
file = {:articles/Triple Pattern Fragments A low-cost knowledge graph interface for the Web.pdf:PDF},
}
@Article{Hellerstein1998OptimizationTF,
author = {Joseph M. Hellerstein},
journal = {ACM Trans. Database Syst.},
title = {Optimization techniques for queries with expensive methods},
year = {1998},
pages = {113-157},
volume = {23},
file = {:articles/Optimization Techniques For Queries with Expensive.pdf:PDF},
groups = {push down},
ranking = {rank3},
readstatus = {skimmed},
}
@InProceedings{Zhang2002XmlAO,
author = {Xin Zhang and Bradford Pielech and Elke A. Rundesnteiner},
title = {Xml Algebra Optimization. Xml Algebra Optimization},
year = {2002},
file = {:articles/XML Algebra Optimization.pdf:PDF},
groups = {push down},
ranking = {rank1},
readstatus = {skimmed},
}
@Article{Yang2021FlexPushdownDBHP,
author = {Yifei Yang and Matt Youill and Matthew Woicik and Yizhou Liu and Xiangyao Yu and Marco Serafini and Ashraf Aboulnaga and Michael Stonebraker},
journal = {Proc. VLDB Endow.},
title = {FlexPushdownDB: Hybrid Pushdown and Caching in a Cloud DBMS},
year = {2021},
pages = {2101-2113},
volume = {14},
file = {:articles/FlexPushdownDB Hybrid Pushdown and Caching.pdf:PDF},
groups = {push down},
ranking = {rank3},
readstatus = {skimmed},
}
@InProceedings{bogaerts_rulemlrr_2021,
author = {Bogaerts, Bart and Ketsman, Bas and Zeboudj, Younes and Aamer, Heba and Taelman, Ruben and Verborgh, Ruben},
booktitle = {Proceedings of the 5th International Joint Conference on Rules and Reasoning},
title = {Link Traversal with Distributed Subweb Specifications},
year = {2021},
editor = {Moschoyiannis, Sotiris and Pe\~naloza, Rafael and Vanthienen, Jan and Soylu, Ahmet and Roman, Dumitru},
month = sep,
pages = {62--79},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
volume = {12851},
doi = {10.1007/978-3-030-91167-6_5},
file = {:articles/Link Traversal with Distributed Subweb Specifications.pdf:PDF},
groups = {Link traversal},
isbn = {978-3-030-91167-6},
url = {https://www.bartbogaerts.eu/articles/2021/005-RuleML-GuidedLink-SubwebSpec/SubwebSpecifications.pdf},
}
@InProceedings{Delva2020,
author = {Delva, Harm and Rojas, Juli{\'a}n Andr{\'e}s and Vandenberghe, Pieter-Jan and Colpaert, Pieter and Verborgh, Ruben},
booktitle = {Web Engineering},
title = {Geospatial Partitioning of Open Transit Data},
year = {2020},
address = {Cham},
editor = {Bielikova, Maria and Mikkonen, Tommi and Pautasso, Cesare},
pages = {305--320},
publisher = {Springer},
abstract = {Public transit operators often publish their open data as a single data dump, but developers with limited computational resources may not be able to process all this data. Existing work has already focused on fragmenting the data by departure time, so that data consumers can be more selective in the data they process. However, each fragment still contains data from the entire operator's service area. We build upon this idea by fragmenting geospatially as well as by departure time. Our method is robust to changes in the original data, such as the deletion or the addition of stops, which is crucial in scenarios where data publishers do not control the data itself. In this paper we explore popular clustering methods such as k-means and METIS, alongside two simple domain-specific methods of our own. We compare the effectiveness of each for the use case of client-side route planning, focusing on the ease of use of the data and the cacheability of the data fragments. Our results show that simply clustering stops by their proximity to 8 transport hubs yields the most promising results: queries are 2.4 times faster and download 4 times less data. More than anything though, our results show that the difference between clustering methods is small, and that engineers can safely choose practical and simple solutions. We expect that this insight also holds true for publishing other geospatial data such as road networks, sensor data, or points of interest.},
groups = {application},
isbn = {978-3-030-50578-3},
}
@InProceedings{Delva2020a,
author = {Delva, Harm and Smets, Annelien and Colpaert, Pieter and Ballon, Pieter and Verborgh, Ruben},
booktitle = {Current Trends in Web Engineering},
title = {Interactive Route Personalization Using Regions of Interest},
year = {2020},
address = {Cham},
editor = {Ko, In-Young and Murillo, Juan Manuel and Vuorimaa, Petri},
pages = {47--52},
publisher = {Springer International Publishing},
abstract = {There is an abundance of services and applications that find the most efficient route between two places, people are not always interested in efficiency; sometimes we just want a pleasant route. Such routes are subjective though, and may depend on contextual factors that route planners are oblivious to. One possible solution is to automatically learn what a user wants, but this requires behavioral data, leading to a cold start problem. An alternative approach is to let the user express their desires explicitly, effectively helping them create the most pleasant route themselves. In this paper we provide a proof of concept of a client-side route planner that does exactly that. We aggregated the Point of Interest information from OpenStreetMap into Regions of Interest, and published the results on the Web. These regions are described semantically, enabling the route planner to align the user's input to what is known about their environment. Planning a 3 km long pedestrian route through a city center takes 5 s, but subsequent adjustments to the route require less than a second to compute. These execution times imply that our approach is feasible, although further optimizations are needed to bring this to the general public.},
groups = {application},
isbn = {978-3-030-65665-2},
}
@Software{wout_slabbinck_2023_7702089,
author = {Wout Slabbinck},
doi = {10.5281/zenodo.7702089},
groups = {Benchmark},
month = mar,
publisher = {Zenodo},
title = {TREEcg/LDES-timeseries: v0.0.2},
url = {https://doi.org/10.5281/zenodo.7702089},
version = {v0.0.2},
year = {2023},
}
@InProceedings{Ermilov2013,
author = {Ermilov, Ivan and Martin, Michael and Lehmann, Jens and Auer, S{\"o}ren},
booktitle = {Knowledge Engineering and the Semantic Web},
title = {Linked Open Data Statistics: Collection and Exploitation},
year = {2013},
address = {Berlin, Heidelberg},
editor = {Klinov, Pavel and Mouromtsev, Dmitry},
pages = {242--249},
publisher = {Springer},
abstract = {This demo presents LODStats, a web application for collection and exploration of the Linked Open Data statistics. LODStats consists of two parts: the core collects statistics about the LOD cloud and publishes it on the LODStats web portal, a front-end for exploration of dataset statistics. Statistics are published both in human-readable and machine-readable formats, thus allowing consumption of the data through web front-end by the users as well as through an API by services and applications. As an example for the latter we showcase how to visualize the statistical data with the CubeViz application.},
groups = {rdf statistic},
isbn = {978-3-642-41360-5},
}
@PhdThesis{thomasFieldingPhdThesis,
author = {Roy Thomas Fielding},
school = {University of California,Irvine},
title = {Architectural Styles and the Design of Network-based Software Architectures},
year = {2000},
}
@InProceedings{aranda2013,
author = {Buil-Aranda, Carlos and Hogan, Aidan and Umbrich, J\"{u}rgen and Vandenbussche, Pierre-Yves},
booktitle = {Proceedings 12th ISWC},
title = {SPARQL Web-Querying Infrastructure: Ready for Action?},
year = {2013},
address = {Berlin, Heidelberg},
pages = {277–293},
publisher= {Springer-Verlag},
series = {ISWC '13},
abstract = {Hundreds of public SPARQL endpoints have been deployed on the Web, forming a novel decentralised infrastructure for querying billions of structured facts from a variety of sources on a plethora of topics. But is this infrastructure mature enough to support applications? For 427 public SPARQL endpoints registered on the DataHub, we conduct various experiments to test their maturity. Regarding discoverability, we find that only one-third of endpoints make descriptive meta-data available, making it difficult to locate or learn about their content and capabilities. Regarding interoperability, we find patchy support for established SPARQL features like ORDER BY as well as (understandably) for new SPARQL 1.1 features. Regarding efficiency, we show that the performance of endpoints for generic queries can vary by up to 3—4 orders of magnitude. Regarding availability, based on a 27-month long monitoring experiment, we show that only 32.2% of public endpoints can be expected to have (monthly) "two-nines" uptimes of 99—100%.},
doi = {10.1007/978-3-642-41338-4_18},
file = {:articles/SPARQL Web-Querying Infrastructure.pdf:PDF},
groups = {rdf statistic, statistic},
isbn = {9783642413377},
numpages = {17},
url = {https://doi.org/10.1007/978-3-642-41338-4_18},
}
@InProceedings{Azzam2020,
author = {Azzam, Amr and Fern\'{a}ndez, Javier D. and Acosta, Maribel and Beno, Martin and Polleres, Axel},
booktitle = {Proceedings of The Web Conference 2020},
title = {SMART-KG: Hybrid Shipping for SPARQL Querying on the Web},
year = {2020},
address = {New York, NY, USA},
pages = {984–994},
publisher = {Association for Computing Machinery},
series = {WWW '20},
abstract = {While Linked Data (LD) provides standards for publishing (RDF) and (SPARQL) querying Knowledge Graphs (KGs) on the Web, serving, accessing and processing such open, decentralized KGs is often practically impossible, as query timeouts on publicly available SPARQL endpoints show. Alternative solutions such as Triple Pattern Fragments (TPF) attempt to tackle the problem of availability by pushing query processing workload to the client side, but suffer from unnecessary transfer of irrelevant data on complex queries with large intermediate results. In this paper we present smart-KG, a novel approach to share the load between servers and clients, while significantly reducing data transfer volume, by combining TPF with shipping compressed KG partitions. Our evaluations show that smart-KG outperforms state-of-the-art client-side solutions and increases server-side availability towards more cost-effective and balanced hosting of open and decentralized KGs.},
doi = {10.1145/3366423.3380177},
file = {:articles/SMART-KG Hybrid Shipping for SPARQL Querying on the Web.pdf:PDF},
groups = {SPARQL enpoint, SPARQL interfaces},
isbn = {9781450370233},
location = {Taipei, Taiwan},
numpages = {11},
url = {https://doi.org/10.1145/3366423.3380177},
}
@InProceedings{Azzam2021,
author = {Azzam, Amr and Aebeloe, Christian and Montoya, Gabriela and Keles, Ilkcan and Polleres, Axel and Hose, Katja},
booktitle = {Proceedings of the Web Conference 2021},
title = {WiseKG: Balanced Access to Web Knowledge Graphs},
year = {2021},
address = {New York, NY, USA},
pages = {1422–1434},
publisher= {Association for Computing Machinery},
series = {WWW '21},
abstract = {SPARQL query services that balance processing between clients and servers become more and more essential to handle the increasing load for open and decentralized knowledge graphs on the Web. To this end, Linked Data Fragments (LDF) have introduced a foundational framework that has sparked research exploring a spectrum of potential Web querying interfaces in between server-side query processing via SPARQL endpoints and client-side query processing of data dumps. Current proposals in between typically suffer from imbalanced load on either the client or the server. In this paper, to the best of our knowledge, we present the first work that combines both client-side and server-side query optimization techniques in a truly dynamic fashion: we introduce WiseKG, a system that employs a cost model that dynamically delegates the load between servers and clients by combining client-side processing of shipped partitions with efficient server-side processing of star-shaped sub-queries, based on current server workload and client capabilities. Our experiments show that WiseKG significantly outperforms state-of-the-art solutions in terms of average total query execution time per client, while at the same time decreasing network traffic and increasing server-side availability.},
doi = {10.1145/3442381.3449911},
file = {:articles/WiseKG Balanced Access to Web Knowledge Graphs.pdf:PDF},
groups = {SPARQL enpoint, SPARQL interfaces},
isbn = {9781450383127},
location = {Ljubljana, Slovenia},
numpages = {13},
url = {https://doi.org/10.1145/3442381.3449911},
}
@InProceedings{dahcc_resource,
author = {{Bram, S., De Brouwer, M., Stojchevska, M. , Van Der Donckt, J. , Nelis, J. , Ruyssinck, J., van der Herten, J. , Casier, K. , Van Ooteghem, J. , Crombez, P. , De Turck, F. , Van Hoecke, S. and Ongenae, F.}},
booktitle = {Published in the proceedings of the sixteenth EAI Pervasive Healthcare conference},
title = {{Data Analytics For Health and Connected Care: Ontology, Knowledge Graph and Applications}},
year = {2022},
month = {December},
groups = {statistic},
publisher= {Springer},
url = {https://dahcc.idlab.ugent.be},
}
@InProceedings{comunica,
author = {Taelman, Ruben and Van Herwegen, Joachim and Vander Sande, Miel and Verborgh, Ruben},
booktitle = {Proceedings 17th ISWC},
title = {Comunica: a Modular SPARQL Query Engine for the Web},
year = {2018},
month = oct,
url = {https://comunica.github.io/Article-ISWC2018-Resource/},
}
@Comment{jabref-meta: databaseType:bibtex;}
@Comment{jabref-meta: grouping:
0 AllEntriesGroup:;
1 StaticGroup:application\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:Benchmark\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:fragmented RDF\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:Link traversal\;0\;0\;0x8a8a8aff\;\;\;;
1 StaticGroup:optimisation technique\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:push down\;0\;0\;0x8a8a8aff\;\;\;;
1 StaticGroup:rdf statistic\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:SPARQL interfaces\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:specification\;0\;1\;0x8a8a8aff\;\;\;;
1 StaticGroup:statistic\;0\;1\;0x8a8a8aff\;\;\;;
}