@conference {rekatsinas:sigmod16, title = {SourceSight: Enabling Effective Source Selection}, booktitle = {SIGMOD}, year = {2016}, abstract = {

Recently there has been a rapid increase in the number of data sources and data services, such as cloud-based data markets and data portals, that facilitate the collection, publishing and trading of data. Data sources typically exhibit large heterogeneity in the type and quality of data they provide. Unfortunately, when the number of data sources is large, it is difficult for users to reason about the actual usefulness of sources for their applications and the trade-offs between the benefits and costs of acquiring and integrating sources. In this demonstration we present SOURCESIGHT, a system that allows users to interactively explore a large number of heterogeneous data sources, and discover valuable sets of sources for diverse integration tasks. SOURCESIGHT uses a novel multi-level source quality index that enables effective source selection at different granularity levels, and introduces a collection of new techniques to discover and evaluate relevant sources for integration.

}, author = {Theodoros Rekatsinas and Amol Deshpande and Luna Dong and Lise Getoor and Divesh Srivastava} }