@conference {356, title = {Tandem Inference: An Out-of-Core Streaming Algorithm For Very Large-Scale Relational Inference}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020}, abstract = {Statistical relational learning (SRL) frameworks allow users to create large, complex graphical models using a compact, rule-based representation. However, these models can quickly become prohibitively large and not fit into machine memory. In this work we address this issue by introducing a novel technique called tandem inference (TI). The primary idea of TI is to combine grounding and inference such that both processes happen in tandem. TI uses an out-of-core streaming approach to overcome memory limitations. Even when memory is not an issue, we show that our proposed approach is able to do inference faster while using less memory than existing approaches. To show the effectiveness of TI, we use a popular SRL framework called Probabilistic Soft Logic (PSL). We implement TI for PSL by proposing a gradient-based inference engine and a streaming approach to grounding. We show that we are able to run an SRL model with over 1B cliques in under nine hours and using only 10 GB of RAM; previous approaches required more than 800 GB for this model and are infeasible on common hardware. To the best of our knowledge, this is the largest SRL model ever run. }, author = {Sriram Srinivasan and Eriq Augustine and Lise Getoor} } @conference {350, title = {Tractable Marginal Inference for Hinge-Loss Markov Random Fields}, booktitle = {ICML Workshop on Tractable Probabilistic Modeling (TPM)}, year = {2019}, month = {06/2019}, abstract = {Hinge-loss Markov random fields (HL-MRFs) are a class of undirected graphical models that has been successfully applied to model richly structured data. HL-MRFs are defined over a set of continuous random variables in the range [0,1], which makes computing the MAP convex. However, computation of marginal distributions remain intractable. In this paper, we introduce a novel sampling-based algorithm to compute marginal distributions. We define the notion of association blocks, which help identify islands of high probability, and propose a novel approach to sample from these regions. We validate our approach by estimating both average precision and various properties of a social network. We show that the proposed approach outperforms MAP estimates in both average precision and the accuracy of the properties by 20\% and 40\% respectively on the large social network.}, author = {Varun Embar and Sriram Srinivasan and Lise Getoor} } @conference {348, title = {Tractable Probabilistic Reasoning Through Effective Grounding}, booktitle = {ICML Workshop on Tractable Probabilistic Modeling (TPM)}, year = {2019}, abstract = {Templated Statistical Relational Learning languages, such as Markov Logic Networks (MLNs) and Probabilistic Soft Logic (PSL), offer much of the expressivity of probabilistic graphical models in a compact form that is intuitive to both experienced modelers and domain experts. However, these languages have historically suffered from tractability issues stemming from the large size of the instantiated models and the complex joint inference performed over these models. Although much research has gone into improving the tractability of these languages using approximate or lifted inference, a relatively small amount of research has gone into improving tractability through efficient instantiation of these large models. In this position paper, we will draw attention to open research areas around efficiently instantiating templated probabilistic models.}, author = {Eriq Augustine and Theodoros Rekatsinas and Lise Getoor} } @conference {334, title = {The Impact of Environmental Stressors on Human Trafficking}, booktitle = {ICWSM Workshop on Beyond Online Data (BOD)}, year = {2018}, abstract = {

Severe environmental events have extreme effects on all segments of society, including criminal activity. Extreme weather events, such as tropical storms, fires, and floods create instability in communities, and can be exploited by criminal organizations. Here we investigate the potential impact of catastrophic storms on the criminal activity of human trafficking. We propose three theories of how these catastrophic storms might impact trafficking and provide evidence for each. Researching human trafficking is made difficult by its illicit nature and the obscurity of high-quality data. Here, we analyze online advertisements for services which can be collected at scale and provide insights into traffickers{\textquoteright} behavior. To successfully combine relevant heterogenous sources of information, as well as spatial and temporal structure, we propose a collective, probabilistic approach. We implement this approach with Probabilistic Soft Logic, a probabilistic programming framework which can flexibly model relational structure and for which inference of future locations is highly efficient. Furthermore, this framework can be used to model hidden structure, such as latent links between locations. Our proposed approach can model and predict how traffickers move. In addition, we propose a model which learns connections between locations. This model is then adapted to have knowledge of environmental events, and we demonstrate that incorporating knowledge of environmental events can improve prediction of future locations. While we have validated our models on the impact of severe weather on human trafficking, we believe our models can be generalized to a variety of other settings in which environmental events impact human behavior.

}, author = {Tomkins, Sabina and Golnoosh Farnadi and Brian Amantullah and Lise Getoor and Steven Minton} } @conference {338, title = {The Impact of Environmental Stressors on Human Trafficking}, booktitle = {International Conference on Data Mining (ICDM)}, year = {2018}, abstract = {

{\textemdash}Severe environmental events have extreme effects on all segments of society, including criminal activity. Extreme weather events, such as tropical storms, fires, and floods create instability in communities, and can be exploited by criminal organizations. Here we investigate the potential impact of catastrophic storms on the criminal activity of human trafficking. We propose three theories of how these catastrophic storms might impact trafficking and provide evidence for each. Researching human trafficking is made difficult by its illicit nature and the obscurity of high-quality data. Here, we analyze online advertisements for services which can be collected at scale and provide insights into traffickers{\textquoteright} behavior. To successfully combine relevant heterogenous sources of information, as well as spatial and temporal structure, we propose a collective, probabilistic approach. We implement this approach with Probabilistic Soft Logic, a probabilistic programming framework which can flexibly model relational structure and for which inference of future locations is highly efficient. Furthermore, this framework can be used to model hidden structure, such as latent links between locations. Our proposed approach can model and predict how traffickers move. In addition, we propose a model which learns connections between locations. This model is then adapted to have knowledge of environmental events, and we demonstrate that incorporating knowledge of environmental events can improve prediction of future locations. While we have validated our models on the impact of severe weather on human trafficking, we believe our models can be generalized to a variety of other settings in which environmental events impact human behavior

}, author = {Tomkins, Sabina and Golnoosh Farnadi and Brian Amantullah and Lise Getoor and Steven Minton} } @conference {london:icml15, title = {The Benefits of Learning with Strongly Convex Approximate Inference}, booktitle = {ICML}, year = {2015}, abstract = {

We explore the benefits of strongly convex free energies in variational inference, providing both theoretical motivation and a new meta-algorithm. Using the duality between strong convexity and stability, we prove a high-probability bound on the error of learned marginals that is inversely proportional to the modulus of convexity of the free energy, thereby motivating free energies whose moduli are constant with respect to the size of the graph. We identify sufficient conditions for Ω(1)-strong convexity in two popular variational techniques: tree-reweighted and counting number entropies. Our insights for the latter suggest a novel counting number optimization framework, which guarantees strong convexity for any given modulus. Our experiments demonstrate that learning with a strongly convex free energy, using our optimization framework to guarantee a given modulus, results in substantially more accurate marginal probabilities, thereby validating our theoretical claims and the effectiveness of our framework.

}, author = {Ben London and Bert Huang and Lise Getoor} } @article {skaggs:tois2014, title = {Topic Modeling for Wikipedia Link Disambiguation}, journal = {ACM Transactions on Information Systems}, volume = {32}, number = {3}, year = {2014}, author = {Bradley , Skaggs and Lise Getoor} } @article {getoor:tkde12b, title = {TACI: Taxonomy-Aware Catalog Integration}, journal = {TKDE}, volume = {25}, year = {2012}, chapter = {1643--1655}, abstract = {

A fundamental data integration task faced by online commercial portals and commerce search engines is the integration of products coming from multiple providers to their product catalogs. In this scenario, the commercial portal has its own taxonomy (the {\textquotedblleft}master taxonomy{\textquotedblright}), while each data provider organizes its products into a different taxonomy (the {\textquotedblleft}provider taxonomy{\textquotedblright}). In this paper, we consider the problem of categorizing products from the data providers into the master taxonomy, while making use of the provider taxonomy information. Our approach is based on a taxonomy-aware processing step that adjusts the results of a text-based classifier to ensure that products that are close together in the provider taxonomy remain close in the master taxonomy. We formulate this intuition as a structured prediction optimization problem. To the best of our knowledge, this is the first approach that leverages the structure of taxonomies in order to enhance catalog integration. We propose algorithms that are scalable and thus applicable to the large datasets that are typical on the Web. We evaluate our algorithms on real-world data and we show that taxonomy-aware classification provides a significant improvement over existing approaches.

}, author = {Papadimitriou Panagiotis and Tsaparas Panayiotis and Fuxman Ariel and Lise Getoor} } @conference {zheleva:www09, title = {To Join or not to Join: The Illusion of Privacy in Social Networks with Mixed Public and Private User Profiles}, booktitle = {18th International World Wide Web conference (WWW)}, year = {2009}, note = {Earlier version appears as CS-TR-4926.}, month = {April}, keywords = {anonymity online, groups, privacy, sensitive attribute inference, social networks}, author = {Zheleva, Elena and Lise Getoor} } @conference {zheleva:umtr08, title = {To Join or not to Join: The Illusion of Privacy in Social Networks with Mixed Public and Private User Profiles}, booktitle = {The Web Conference (WWW)}, number = {CS-TR-4926}, year = {2009}, note = {An earlier version appears as CS-TR-4922, July 2008}, publisher = {University of Maryland}, organization = {University of Maryland}, address = {College Park}, keywords = {anonymity online, groups, privacy, sensitive attribute inference, social networks}, author = {Zheleva, Elena and Lise Getoor} } @article {zheleva:tois08, title = {Trusting Spam Reporters: A Reporter-based Reputation System for Email Filtering}, journal = {ACM Transactions on Information Systems}, volume = {27}, number = {1}, year = {2008}, note = {Full version in ACM library}, month = {December}, author = {Zheleva, Elena and Kolcz, Alek and Lise Getoor} } @conference {kddpanel06, title = {Is there a grand challenge or X-prize for data mining?}, booktitle = {12th International Conference on Knowledge Discovery and Data Mining}, year = {2006}, author = {Piatetsky-Shapiro, Gregory and Grossman, Robert and Djeraba, Chabane and Feldman, Ronen and Lise Getoor and Zaki, Mohammed} }