@conference {key355, title = {BOWL: Bayesian Optimization for Weight Learning in Probabilistic Soft Logic}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020}, abstract = {Probabilistic soft logic (PSL) is a statistical relational learning framework that represents complex relational models with weighted first-order logical rules. The weights of the rules in PSL indicate their importance in the model and influence the effectiveness of the model on a given task. Existing weight learning approaches often attempt to learn a set of weights that maximizes some function of data likelihood. However, this does not always translate to optimal performance on a desired domain metric, such as accuracy or F1 score. In this paper, we introduce a new weight learning approach called Bayesian optimization for weight learning (BOWL) based on Gaussian process regression that directly optimizes weights on a chosen domain performance metric. The key to the success of our approach is a novel projection that captures the semantic distance between the possible weight configurations. Our experimental results show that our proposed approach outperforms likelihood-based approaches and yields up to a 10\% improvement across a variety of performance metrics. Further, we performed experiments to measure the scalability and robustness of our approach on various real world datasets.}, author = {Sriram Srinivasan and Golnoosh Farnadi and Lise Getoor} } @conference {357, title = {Estimating Aggregate Properties In Relational Networks With Unobserved Data}, booktitle = {AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)}, year = {2020}, abstract = {Aggregate network properties such as cluster cohesion and the number of bridge nodes can be used to glean insights about a network{\textquoteright}s community structure, spread of influence and the resilience of the network to faults. Efficiently computing network properties when the network is fully observed has received significant attention (Wasserman and Faust 1994; Cook and Holder 2006), however the problem of computing aggregate network properties when there is missing data attributes has received little attention. Computing these properties for networks with missing attributes involves performing inference over the network. Statistical relational learning (SRL) and graph neural networks (GNNs) are two classes of machine learning approaches well suited for inferring missing attributes in a graph. In this paper, we study the effectiveness of these approaches in estimating aggregate properties on networks with missing attributes. We compare two SRL approaches and three GNNs. For these approaches we estimate these properties using point estimates such as MAP and mean. For SRL-based approaches that can infer a joint distribution over the missing attributes, we also estimate these properties as an expectation over the distribution. To compute the expectation tractably for probabilistic soft logic, one of the SRL approaches that we study, we introduce a novel sampling framework. In the experimental evaluation, using three benchmark datasets, we show that SRL-based approaches tend to outperform GNN-based approaches both in computing aggregate properties and predictive accuracy. Specifically, we show that estimating the aggregate properties as an expectation over the joint distribution outperforms point estimates. }, author = {Varun Embar and Sriram Srinivasan and Lise Getoor} } @conference {356, title = {Tandem Inference: An Out-of-Core Streaming Algorithm For Very Large-Scale Relational Inference}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020}, abstract = {Statistical relational learning (SRL) frameworks allow users to create large, complex graphical models using a compact, rule-based representation. However, these models can quickly become prohibitively large and not fit into machine memory. In this work we address this issue by introducing a novel technique called tandem inference (TI). The primary idea of TI is to combine grounding and inference such that both processes happen in tandem. TI uses an out-of-core streaming approach to overcome memory limitations. Even when memory is not an issue, we show that our proposed approach is able to do inference faster while using less memory than existing approaches. To show the effectiveness of TI, we use a popular SRL framework called Probabilistic Soft Logic (PSL). We implement TI for PSL by proposing a gradient-based inference engine and a streaming approach to grounding. We show that we are able to run an SRL model with over 1B cliques in under nine hours and using only 10 GB of RAM; previous approaches required more than 800 GB for this model and are infeasible on common hardware. To the best of our knowledge, this is the largest SRL model ever run. }, author = {Sriram Srinivasan and Eriq Augustine and Lise Getoor} } @conference {352, title = {Identifying Facet Mismatches In Search Via Micrographs}, booktitle = {International Conference on Information and Knowledge Management (CIKM)}, year = {2019}, abstract = {E-commerce search engines are the primary means by which customers shop for products online. Each customer query contains multiple facets such as product type, color, brand, etc. A successful search engine retrieves products that are relevant to the query along each of these attributes. However, due to lexical (erroneous title, description, etc.) and behavioral irregularities (clicks or purchases of products that do not belong to the same facet as the query), some mismatched products are shown in the search results. These irregularities are often detected using simple binary classifiers like gradient boosted decision trees or logistic regression. Typically, these binary classifiers use strong independence assumptions between the samples and ignore structural relationships available in the data, such as the connections between products and queries. In this paper, we use the connections that exist between products and query to identify a special kind of structure we refer to as a micrograph. Further, we make use of Statistical Relational Learning (SRL) to incorporate these micrographs in the data and pose the problem as a structured prediction problem. We refer to this approach as structured mismatch classification (smc). In addition, we show that naive addition of structure does not improve the performance of the model and hence introduce a variation of smc, strong smc (s2mc), which improves over the baseline by passing information from high-confidence predictions to lower confidence predictions. In our empirical evaluation we show that our proposed approach outperforms the baseline classification methods by up to 12\% in precision. Furthermore, we use quasi-Newton methods to make our method viable for real-time inference in a search engine and show that our approach is up to 150 times faster than existing ADMM-based solvers.}, keywords = {collective classification, defect, probabilistic soft logic, search, statistical relational language, structured prediction}, author = {Sriram Srinivasan and Nikhil S Rao and Karthik Subbaian and Lise Getoor} } @conference {341, title = {Lifted Hinge-Loss Markov Random Fields}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, year = {2019}, month = {11/2018}, abstract = {Statistical relational learning models are powerful tools that combine ideas from first-order logic with probabilistic graphical models to represent complex dependencies. Despite their success in encoding large problems with a compact set of weighted rules, performing inference over these models is often challenging. In this paper, we show how to effectively combine two powerful ideas for scaling inference for large graphical models. The first idea, lifted inference, is a wellstudied approach to speeding up inference in graphical models by exploiting symmetries in the underlying problem. The second idea is to frame Maximum a posteriori (MAP) inference as a convex optimization problem and use alternating direction method of multipliers (ADMM) to solve the problem in parallel. A well-studied relaxation to the combinatorial optimization problem defined for logical Markov random fields gives rise to a hinge-loss Markov random field (HLMRF) for which MAP inference is a convex optimization problem. We show how the formalism introduced for coloring weighted bipartite graphs using a color refinement algorithm can be integrated with the ADMM optimization technique to take advantage of the sparse dependency structures of HLMRFs. Our proposed approach, lifted hinge-loss Markov random fields (LHL-MRFs), preserves the structure of the original problem after lifting and solves lifted inference as distributed convex optimization with ADMM. In our empirical evaluation on real-world problems, we observe up to a three times speed up in inference over HL-MRFs.}, author = {Sriram Srinivasan and Behrouz Babaki and Golnoosh Farnadi and Lise Getoor} } @conference {350, title = {Tractable Marginal Inference for Hinge-Loss Markov Random Fields}, booktitle = {ICML Workshop on Tractable Probabilistic Modeling (TPM)}, year = {2019}, month = {06/2019}, abstract = {Hinge-loss Markov random fields (HL-MRFs) are a class of undirected graphical models that has been successfully applied to model richly structured data. HL-MRFs are defined over a set of continuous random variables in the range [0,1], which makes computing the MAP convex. However, computation of marginal distributions remain intractable. In this paper, we introduce a novel sampling-based algorithm to compute marginal distributions. We define the notion of association blocks, which help identify islands of high probability, and propose a novel approach to sample from these regions. We validate our approach by estimating both average precision and various properties of a social network. We show that the proposed approach outperforms MAP estimates in both average precision and the accuracy of the properties by 20\% and 40\% respectively on the large social network.}, author = {Varun Embar and Sriram Srinivasan and Lise Getoor} } @conference {337, title = {A Fairness-aware Hybrid Recommender System}, booktitle = {RecSys Workshop on Responsible Recommendation (FATREC)}, year = {2018}, abstract = {

Recommender systems are used in variety of domains affecting people{\textquoteright}s lives. This has raised concerns about possible biases and discrimination that such systems might exacerbate. There are two primary kinds of biases inherent in recommender systems: observation bias and bias stemming from imbalanced data. Observation bias exists due to a feedback loop which causes the model to learn to only predict recommendations similar to previous ones. Imbalance in data occurs when systematic societal, historical, or other ambient bias is present in the data. In this paper, we address both biases by proposing a hybrid fairness-aware recommender system. Our model provides efficient and accurate recommendations by incorporating multiple user-user and item-item similarity measures, content, and demographic information, while addressing recommendation biases. We implement our model using a powerful and expressive probabilistic programming language called probabilistic soft logic. We experimentally evaluate our approach on a popular movie recommendation dataset, showing that our proposed model can provide more accurate and fairer recommendations, compared to a state-of-the art fair recommender system.

}, author = {Golnoosh Farnadi and Kouki, Pigi and Spencer K. Thompson and Sriram Srinivasan and Lise Getoor} }