@conference {328, title = {Scalable Probabilistic Causal Structure Discovery}, booktitle = {International Joint Conference on Artificial Intelligence (IJCAI)}, year = {2018}, abstract = {

Complex causal networks underlie many real-world problems, from the regulatory interactions between genes to the environmental patterns used to understand climate change. Computational methods seek to infer these casual networks using observational data and domain knowledge. In this paper, we identify three key requirements for inferring the structure of causal networks for scientific discovery: (1) robustness to noise in observed measurements; (2) scalability to handle hundreds of variables; and (3) flexibility to encode domain knowledge and other structural constraints. We first formalize the problem of joint probabilistic causal structure discovery.\ We develop an approach using probabilistic soft logic (PSL) that exploits multiple statistical tests, supports efficient optimization over hundreds of variables, and can easily incorporate structural constraints, including imperfect domain knowledge. We compare our method against multiple well-studied approaches on biological and synthetic datasets, showing improvements of up to 20\% in F1-score over the best performing baseline in realistic settings.

}, url = {https://bitbucket.org/linqs/causpsl/src/master/}, author = {Dhanya Sridhar and Pujara, Jay and Lise Getoor} } @conference {332, title = {Scalable Structure Learning for Probabilistic Soft Logic}, booktitle = {IJCAI Workshop on Statistical Relational AI (StarAI)}, year = {2018}, month = {06/2018}, abstract = {

Statistical relational frameworks such as Markov logic networks and probabilistic soft logic (PSL) encode model structure with weighted first-order logical clauses. Learning these clauses from data is referred to as structure learning. Structure learning alleviates the manual cost of specifying models. However, this benefit comes with high computational costs; structure learning typically requires an expensive search over the space of clauses which involves repeated optimization of clause weights. In this paper, we propose the first two approaches to structure learning for PSL. We introduce a greedy search-based algorithm and a novel optimization method that trade-off scalability and approximations to the structure learning problem in varying ways. The highly scalable optimization method combines data-driven generation of clauses with a piecewise pseudolikelihood (PPLL) objective that learns model structure by optimizing clause weights only once. We compare both methods across five real-world tasks, showing that PPLL achieves an order of magnitude runtime speedup and AUC gains up to 15\% over greedy search.

}, author = {Varun Embar and Dhanya Sridhar and Golnoosh Farnadi and Lise Getoor} } @conference {335, title = {Sustainability at Scale: Bridging the Intention-Behavior Gap with Sustainable Recommendations}, booktitle = {Recommender Systems (RecSys)}, year = {2018}, abstract = {

Finding sustainable products and evaluating their claims is a significant barrier facing sustainability-minded customers. Tools that reduce both these burdens are likely to boost the sale of sustainable products. However, it is difficult to determine the sustainability characteristics of these products {\textemdash} there are a variety of certifications and definitions of sustainability, and quality labeling requires input from domain experts. In this paper, we propose a flexible probabilistic framework that uses domain knowledge to identify sustainable products and customers, and uses these labels to predict customer purchases. We evaluate our approach on grocery items from the Amazon catalog. Our proposed approach outperforms established recommender system models in predicting future purchases while jointly inferring sustainability scores for customers and products.

}, author = {Tomkins, Sabina and Isley, Steve and London, Ben and Lise Getoor} } @article {farnadi:mlj17, title = {Soft quantification in statistical relational learning}, journal = {Machine Learning Journal}, year = {2017}, author = {Golnoosh Farnadi and Bach, Stephen H. and Moens, Marie-Francine and Lise Getoor and De Cock, Martine} } @conference {pujara:emnlp17, title = {Sparsity and Noise: Where Knowledge Graph Embeddings Fall Short}, booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP)}, year = {2017}, url = {https://github.com/eriq-augustine/meta-kg}, author = {Pujara, Jay and Eriq Augustine and Lise Getoor} } @conference {rekatsinas:sigmod16, title = {SourceSight: Enabling Effective Source Selection}, booktitle = {SIGMOD}, year = {2016}, abstract = {

Recently there has been a rapid increase in the number of data sources and data services, such as cloud-based data markets and data portals, that facilitate the collection, publishing and trading of data. Data sources typically exhibit large heterogeneity in the type and quality of data they provide. Unfortunately, when the number of data sources is large, it is difficult for users to reason about the actual usefulness of sources for their applications and the trade-offs between the benefits and costs of acquiring and integrating sources. In this demonstration we present SOURCESIGHT, a system that allows users to interactively explore a large number of heterogeneous data sources, and discover valuable sets of sources for diverse integration tasks. SOURCESIGHT uses a novel multi-level source quality index that enables effective source selection at different granularity levels, and introduces a collection of new techniques to discover and evaluate relevant sources for integration.

}, author = {Theodoros Rekatsinas and Amol Deshpande and Luna Dong and Lise Getoor and Divesh Srivastava} } @article {london:jmlr16, title = {Stability and Generalization in Structured Prediction}, journal = {Journal of Machine Learning Research}, volume = {17}, year = {2016}, note = {to appear}, chapter = {1--52}, abstract = {

Structured prediction models have been found to learn effectively from a few large examples\ {\textemdash} sometimes even just one. Despite empirical evidence, canonical learning theory cannot guarantee generalization in this setting because the error bounds decrease as a function of the number of examples. We therefore propose new PAC-Bayesian generalization bounds for structured prediction that decrease as a function of both the number of examples and the size of each example. Our analysis hinges on the stability of joint inference and the smoothness of the data distribution. We apply our bounds to several common learning scenarios, including max-margin and soft-max training of Markov random fields. Under certain conditions, the resulting error bounds can be far more optimistic than previous results and can even guarantee generalization from a single large example.

}, keywords = {PAC-Bayes, generalization bounds, learning theory, structured prediction}, author = {Ben London and Bert Huang and Lise Getoor} } @conference {rekatsinas:sdm15, title = {SourceSeer: Forecasting Rare Disease Outbreaks Using Multiple Data Sources}, booktitle = {2015 SIAM International Conference on Data Mining (SDM15)}, year = {2015}, note = {Best Research Paper Award}, publisher = {SIAM}, organization = {SIAM}, author = {Rekatsinas, Theodoros and Ghosh, Saurav and Mekaru, Sumiko and Nsoesie, Elaine and Brownstein, John and Lise Getoor and Ramakrishnan, Naren} } @article {london:stability15, title = {Stability and Generalization in Structured Prediction}, journal = {{\textendash}}, year = {2015}, note = {preprint}, keywords = {PAC-Bayes, generalization bounds, learning theory, structured prediction}, author = {London, Ben and Huang, Bert and Lise Getoor} } @mastersthesis {london:thesis15, title = {On the Stability of Structured Prediction}, year = {2015}, school = {University of Maryland}, type = {phd}, author = {London, Ben} } @conference {farnadi:ilp15, title = {Statistical Relational Learning with Soft Quantifiers}, booktitle = {International Conference on Inductive Logic Programming (ILP)}, year = {2015}, note = {Winner of Best Student Paper award.}, author = {Golnoosh Farnadi and Bach, Stephen H. and Blondeel, Marjon and Moens, Marie-Francine and Lise Getoor and De Cock, Martine} } @conference {london:nips14ws, title = {On the Strong Convexity of Variational Inference}, booktitle = {NIPS Workshop on Advances in Variational Inference}, year = {2014}, author = {London, Ben and Huang, Bert and Lise Getoor} } @conference {moustafa:icde14, title = {Subgraph Pattern Matching over Uncertain Graphs with Identity Linkage Uncertainty}, booktitle = {International Conference on Data Engineering (ICDE)}, year = {2014}, author = {Moustafa, Walaa Eldin and Kimmig, Angelika and Deshpande, Amol and Lise Getoor} } @conference {bach:nips12, title = {Scaling MPE Inference for Constrained Continuous Markov Random Fields with Consensus Optimization}, booktitle = {NeuRIPS}, year = {2012}, abstract = {

Probabilistic graphical models are powerful tools for analyzing constrained, continuous domains. However, finding most-probable explanations (MPEs) in these models can be computationally expensive. In this paper, we improve the scalability of MPE inference in a class of graphical models with piecewise-linear and piecewise-quadratic dependencies and linear constraints over continuous domains. We derive algorithms based on a consensus-optimization framework and demonstrate their superior performance over state of the art. We show empirically that in a large-scale voter-preference modeling problem our algorithms scale linearly in the number of dependencies and constraints

}, author = {Stephen Bach and Matthias Broecheler and Lise Getoor and Dianne O{\textquoteright}Leary} } @conference {huang:social2012, title = {Social Group Modeling with Probabilistic Soft Logic}, booktitle = {NeuRIPS Workshop on SNSMA}, year = {2012}, abstract = {

In this work, we show how to model the group affiliations of social media users using probabilistic soft logic. We consider groups of a broad variety, motivated by ideas from the social sciences on groups and their roles in social identity. By modeling group affiliations, we allow the possibility of efficient higher-level relational reasoning about the groups themselves, where the number of groups is relatively small compared to the number of users. We discuss preliminary results from experiments using real social media data collected from Twitter.

}, author = {Huang Bert and Bach Stephen and Norris Eric and Pujara Jay and Lise Getoor} } @conference {sharara:ase12, title = {Stability vs. Diversity: Understanding the Dynamics of Actors in Time-varying Affiliation Networks}, booktitle = {ICSI}, year = {2012}, chapter = {1--6}, abstract = {

Most networks contain embedded communities or groups that impact the overall gathering and dissemination of ideas and information. These groups consist of important or prominent individuals who actively participate in network activities over time. In this paper, we introduce a new method for identifying actors with prominent group memberships in timevarying affiliation networks. We define a prominent actor to be one who participates in the same group regularly (stable participation) and participates across different groups consistently (diverse participation), thereby having a position of structural influence in the network. Our proposed methods for quantifying stable and diverse participation takes into consideration the underlying semantics for group participation as well as the level of impact of an actor{\textquoteright}s history on his or her current behavior. We illustrate the semantics of our measures on real-world data sets with varying temporal connectivity structures.

}, author = {Sharara Hossam and Singh Lisa and Lise Getoor and Mann Janet} } @conference {zheleva:www10, title = {Statistical Models of Music-listening Sessions in Social Media}, booktitle = {19th International World Wide Web Conference (WWW)}, year = {2010}, author = {Zheleva, Elena and Guiver, John and Mendes Rodrigues, Eduarda and Milic-Frayling, Natasa} } @conference {somasundaran:emnlp09, title = {Supervised and Unsupervised Methods in Employing Discourse Relations for Improving Opinion Polarity Classification}, booktitle = {Conference on Empirical Methods in Natural Language Processing}, year = {2009}, month = {August}, author = {Somasundaran, Swapna and Namata, Galileo Mark and Wiebe, Janyce and Lise Getoor} } @article {dietterich:ml08, title = {Structured machine learning: the next ten years}, journal = {Machine Learning}, volume = {73}, number = {1}, year = {2008}, note = {Full version is available at http://dx.doi.org/10.1007/s10994-008-5079-1}, pages = {3{\textendash}23}, author = {Dietterich, Thomas and Domingos, Pedro and Lise Getoor and Muggleton, Stephen and Tadepalli, Prasad} } @article {islamaj:nar07, title = {SplicePort - An interactive splice-site analysis tool}, journal = {Nucleic Acids Research}, year = {2007}, author = {Islamaj, Rezarta and Lise Getoor and Wilbur, W. John and Mount, Stephen} } @conference {licamele:icdm06, title = {Social Capital in Friendship-Event Networks}, booktitle = {IEEE International Conference on Data Mining (ICDM)}, year = {2006}, month = {December}, author = {Licamele, Louis and Lise Getoor} } @article {getoor:de03, title = {Structure Discovery Using Statistical Relational Learning}, journal = {Data Engineering Bulletin}, volume = {26}, number = {3}, year = {2003}, pages = {11- -18}, author = {Lise Getoor} } @conference {getoor:sigmod01, title = {Selectivity estimation using probabilistic relational models}, booktitle = {Proceedings of ACM-SIGMOD 2001 International Conference on Management of Data}, year = {2001}, author = {Lise Getoor and Koller, Daphne and Benjamin Taskar} } @conference {lansky:ijcai95, title = {Scope and Abstraction: Two Criteria for Localized Planning}, booktitle = {Proceedings of the International Joint Conference on Arti cial Intelligence}, year = {1995}, author = {Lansky, Amy and Lise Getoor} } @conference {lansky:tra94, title = {Scope and Abstraction: Two Criteria for Localized Planning}, booktitle = {Proceedings of the Workshop on Theory Reformulation and Abstraction}, year = {1994}, author = {Lansky, Amy and Lise Getoor} }