@conference {key355, title = {BOWL: Bayesian Optimization for Weight Learning in Probabilistic Soft Logic}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020}, abstract = {Probabilistic soft logic (PSL) is a statistical relational learning framework that represents complex relational models with weighted first-order logical rules. The weights of the rules in PSL indicate their importance in the model and influence the effectiveness of the model on a given task. Existing weight learning approaches often attempt to learn a set of weights that maximizes some function of data likelihood. However, this does not always translate to optimal performance on a desired domain metric, such as accuracy or F1 score. In this paper, we introduce a new weight learning approach called Bayesian optimization for weight learning (BOWL) based on Gaussian process regression that directly optimizes weights on a chosen domain performance metric. The key to the success of our approach is a novel projection that captures the semantic distance between the possible weight configurations. Our experimental results show that our proposed approach outperforms likelihood-based approaches and yields up to a 10\% improvement across a variety of performance metrics. Further, we performed experiments to measure the scalability and robustness of our approach on various real world datasets.}, author = {Sriram Srinivasan and Golnoosh Farnadi and Lise Getoor} } @conference {359, title = {Causal Relational Learning}, booktitle = {International Conference on Management of Data (SIGMOD)}, year = {2020}, abstract = {Causal inference is at the heart of empirical research in natural and social sciences and is critical for scientific discovery and informed decision making. The gold standard in causal inference is performing randomized controlled trials; unfortunately these are not always feasible due to ethical, legal, or cost constraints. As an alternative, methodologies for causal inference from observational data have been developed in statistical studies and social sciences. However, existing methods critically rely on restrictive assumptions such as the study population consisting of homogeneous elements that can be represented in a single flat table, where each row is referred to as a unit. In contrast, in many real-world settings, the study domain naturally consists of heterogeneous elements with complex relational structure, where the data is naturally represented in multiple related tables. In this paper, we present a formal framework for causal inference from such relational data.We propose a declarative language called CaRL for capturing causal background knowledge and assumptions, and specifying causal queries using simple Datalog-like rules. CaRL provides a foundation for inferring causality and reasoning about the effect of complex interventions in relational domains.We present an extensive experimental evaluation on real relational data to illustrate the applicability of CaRL in social sciences and healthcare.}, author = {Babak Salami and Harsh Parikh and Moe Kayali and Sudeepa Roy and Lise Getoor and Dan Suciu} } @conference {360, title = {Contrastive Entity Linkage: Mining Variational Attributes from Large Catalogs for Entity Linkage}, booktitle = {Automated Knowledge Base Construction (AKBC)}, year = {2020}, abstract = {Presence of near identical, but distinct, entities called entity variations makes the task of data integration challenging. For example, in the domain of grocery products, variations share the same value for attributes such as brand, manufacturer and product line, but differ in other attributes, called variational attributes, such as package size and color. Identifying variations across data sources is an important task in itself and is crucial for identifying duplicates. However, this task is challenging as the variational attributes are often present as a part of unstructured text and are domain dependent. In this work, we propose our approach, Contrastive entity linkage, to identify both entity pairs that are the same and pairs that are variations of each other. We propose a novel unsupervised approach, VarSpot, to mine domain-dependent variational attributes present in unstructured text. The proposed approach reasons about both similarities and differences between entities and can easily scale to large sources containing millions of entities. We show the generality of our approach by performing experimental evaluation on three different domains. Our approach significantly outperforms state-of-the-art learning-based and rule-based entity linkage systems by up to 4\% F1 score when identifying duplicates, and up to 41\% when identifying entity variations.}, author = {Varun Embar and Bunyamin Sisman and Hao Wei and Xin Luna Dong and Christos Faloutsos and Lise Getoor} } @conference {357, title = {Estimating Aggregate Properties In Relational Networks With Unobserved Data}, booktitle = {AAAI Workshop on Statistical Relational Artificial Intelligence (StarAI)}, year = {2020}, abstract = {Aggregate network properties such as cluster cohesion and the number of bridge nodes can be used to glean insights about a network{\textquoteright}s community structure, spread of influence and the resilience of the network to faults. Efficiently computing network properties when the network is fully observed has received significant attention (Wasserman and Faust 1994; Cook and Holder 2006), however the problem of computing aggregate network properties when there is missing data attributes has received little attention. Computing these properties for networks with missing attributes involves performing inference over the network. Statistical relational learning (SRL) and graph neural networks (GNNs) are two classes of machine learning approaches well suited for inferring missing attributes in a graph. In this paper, we study the effectiveness of these approaches in estimating aggregate properties on networks with missing attributes. We compare two SRL approaches and three GNNs. For these approaches we estimate these properties using point estimates such as MAP and mean. For SRL-based approaches that can infer a joint distribution over the missing attributes, we also estimate these properties as an expectation over the distribution. To compute the expectation tractably for probabilistic soft logic, one of the SRL approaches that we study, we introduce a novel sampling framework. In the experimental evaluation, using three benchmark datasets, we show that SRL-based approaches tend to outperform GNN-based approaches both in computing aggregate properties and predictive accuracy. Specifically, we show that estimating the aggregate properties as an expectation over the joint distribution outperforms point estimates. }, author = {Varun Embar and Sriram Srinivasan and Lise Getoor} } @conference {356, title = {Tandem Inference: An Out-of-Core Streaming Algorithm For Very Large-Scale Relational Inference}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020}, abstract = {Statistical relational learning (SRL) frameworks allow users to create large, complex graphical models using a compact, rule-based representation. However, these models can quickly become prohibitively large and not fit into machine memory. In this work we address this issue by introducing a novel technique called tandem inference (TI). The primary idea of TI is to combine grounding and inference such that both processes happen in tandem. TI uses an out-of-core streaming approach to overcome memory limitations. Even when memory is not an issue, we show that our proposed approach is able to do inference faster while using less memory than existing approaches. To show the effectiveness of TI, we use a popular SRL framework called Probabilistic Soft Logic (PSL). We implement TI for PSL by proposing a gradient-based inference engine and a streaming approach to grounding. We show that we are able to run an SRL model with over 1B cliques in under nine hours and using only 10 GB of RAM; previous approaches required more than 800 GB for this model and are infeasible on common hardware. To the best of our knowledge, this is the largest SRL model ever run. }, author = {Sriram Srinivasan and Eriq Augustine and Lise Getoor} } @conference {351, title = {Estimating Causal Effects of Tone in Online Debates}, booktitle = {International Joint Conference on Artificial Intelligence (IJCAI)}, year = {2019}, abstract = {Statistical methods applied to social media posts shed light on the dynamics of online dialogue. For example, users{\textquoteright} wording choices predict their persuasiveness and users adopt the language patterns of other dialogue participants. In this paper, we estimate the causal effect of reply tones in debates on linguistic and sentiment changes in subsequent responses. The challenge for this estimation is that a reply{\textquoteright}s tone and subsequent responses are confounded by the users{\textquoteright} ideologies on the debate topic and their emotions. To overcome this challenge, we learn representations of ideology using generative models of text.vWe study debates from 4Forums.com and compare annotated tones of replying such as emotional versus factual, or reasonable versus attacking. We show that our latent confounder representation reduces bias in ATE estimation. Our results suggest that factual and asserting tones affect dialogue and provide a methodology for estimating causal effects from text. }, author = {Dhanya Sridhar and Lise Getoor} } @article {363, title = {Generating and Understanding Personalized Explanations in Hybrid Recommender Systems}, journal = {ACM Transactions on Interactive Intelligent Systems}, year = {2019}, abstract = {Recommender systems are ubiquitous, and shape the way users access information and make decisions. As these systems become more complex, there is a growing need for transparency and interpretability. In this paper, we study the problem of generating and visualizing personalized explanations for recommender systems which incorporate signals from many different data sources. We use a flexible, extendable probabilistic programming approach, and show how we can generate real-time personalized recommendations. We then turn these personalized recommendations into explanations. We perform an extensive user study to evaluate the benefits of explanations for hybrid recommender systems. We conduct a crowd-sourced user study where our system generates personalized recommendations and explanations for real users of the last.fm music platform. First, we evaluate the performance of the recommendations in terms of perceived accuracy and novelty. Next, we experiment with 1) different explanation styles (e.g., user-based, item-based), 2) manipulating the number of explanation styles presented, and 3) manipulating the presentation format (e.g., textual vs. visual). We also apply a mixed-model statistical analysis to consider user personality traits as a control variable and demonstrate the usefulness of our approach in creating personalized hybrid explanations with different style, number, and format. Finally, we perform a post analysis which shows different preferences for explanation styles between experienced and novice last.fm users.}, author = {Pigi Kouki and James Schaffer and Jay Pujara and John O{\textquoteright}Donovan and Lise Getoor} } @conference {352, title = {Identifying Facet Mismatches In Search Via Micrographs}, booktitle = {International Conference on Information and Knowledge Management (CIKM)}, year = {2019}, abstract = {E-commerce search engines are the primary means by which customers shop for products online. Each customer query contains multiple facets such as product type, color, brand, etc. A successful search engine retrieves products that are relevant to the query along each of these attributes. However, due to lexical (erroneous title, description, etc.) and behavioral irregularities (clicks or purchases of products that do not belong to the same facet as the query), some mismatched products are shown in the search results. These irregularities are often detected using simple binary classifiers like gradient boosted decision trees or logistic regression. Typically, these binary classifiers use strong independence assumptions between the samples and ignore structural relationships available in the data, such as the connections between products and queries. In this paper, we use the connections that exist between products and query to identify a special kind of structure we refer to as a micrograph. Further, we make use of Statistical Relational Learning (SRL) to incorporate these micrographs in the data and pose the problem as a structured prediction problem. We refer to this approach as structured mismatch classification (smc). In addition, we show that naive addition of structure does not improve the performance of the model and hence introduce a variation of smc, strong smc (s2mc), which improves over the baseline by passing information from high-confidence predictions to lower confidence predictions. In our empirical evaluation we show that our proposed approach outperforms the baseline classification methods by up to 12\% in precision. Furthermore, we use quasi-Newton methods to make our method viable for real-time inference in a search engine and show that our approach is up to 150 times faster than existing ADMM-based solvers.}, keywords = {collective classification, defect, probabilistic soft logic, search, statistical relational language, structured prediction}, author = {Sriram Srinivasan and Nikhil S Rao and Karthik Subbaian and Lise Getoor} } @conference {341, title = {Lifted Hinge-Loss Markov Random Fields}, booktitle = {AAAI Conference on Artificial Intelligence (AAAI)}, year = {2019}, month = {11/2018}, abstract = {Statistical relational learning models are powerful tools that combine ideas from first-order logic with probabilistic graphical models to represent complex dependencies. Despite their success in encoding large problems with a compact set of weighted rules, performing inference over these models is often challenging. In this paper, we show how to effectively combine two powerful ideas for scaling inference for large graphical models. The first idea, lifted inference, is a wellstudied approach to speeding up inference in graphical models by exploiting symmetries in the underlying problem. The second idea is to frame Maximum a posteriori (MAP) inference as a convex optimization problem and use alternating direction method of multipliers (ADMM) to solve the problem in parallel. A well-studied relaxation to the combinatorial optimization problem defined for logical Markov random fields gives rise to a hinge-loss Markov random field (HLMRF) for which MAP inference is a convex optimization problem. We show how the formalism introduced for coloring weighted bipartite graphs using a color refinement algorithm can be integrated with the ADMM optimization technique to take advantage of the sparse dependency structures of HLMRFs. Our proposed approach, lifted hinge-loss Markov random fields (LHL-MRFs), preserves the structure of the original problem after lifting and solves lifted inference as distributed convex optimization with ADMM. In our empirical evaluation on real-world problems, we observe up to a three times speed up in inference over HL-MRFs.}, author = {Sriram Srinivasan and Behrouz Babaki and Golnoosh Farnadi and Lise Getoor} } @conference {342, title = {Personalized Explanations for Hybrid Recommender Systems}, booktitle = {Intelligent User Interfaces (IUI)}, year = {2019}, abstract = {Hybrid recommender systems, which combine the strength of several information sources to provide recommendations, have emerged as a means to improve the quality of recommendations. Although such systems are highly effective, they are inherently complex. As a result, providing users with a visually-appealing and useful explanation for each recommendation poses a significant challenge. In this paper, we study the problems of generating and visualizing personalized explanations from hybrid recommender systems. We build upon a hybrid probabilistic graphical model and develop an approach to generate real-time recommendations along with personalized explanations. To study the benefits of explanations for hybrid recommender systems, we conduct a crowd-sourced user study where our system generates personalized recommendations and explanations for real users of the last.fm music platform. styles. We also experiment with different presentation formats, such as textual or graphical. We experiment with 1) different explanation styles (e.g., user-based, item-based), 2) varying the volume (i.e., number) of the explanation styles, and 3) a variety of presentation formats (such as textual or visual). We apply a mixed model statistical analysis to consider the user personality traits as a control variable, and demonstrate the usefulness of our approach in creating personalized hybrid explanations with different style, volume, and format.}, author = {Pigi Kouki and James Schaffer and Jay Pujara and John Odonovan and Lise Getoor} } @conference {350, title = {Tractable Marginal Inference for Hinge-Loss Markov Random Fields}, booktitle = {ICML Workshop on Tractable Probabilistic Modeling (TPM)}, year = {2019}, month = {06/2019}, abstract = {Hinge-loss Markov random fields (HL-MRFs) are a class of undirected graphical models that has been successfully applied to model richly structured data. HL-MRFs are defined over a set of continuous random variables in the range [0,1], which makes computing the MAP convex. However, computation of marginal distributions remain intractable. In this paper, we introduce a novel sampling-based algorithm to compute marginal distributions. We define the notion of association blocks, which help identify islands of high probability, and propose a novel approach to sample from these regions. We validate our approach by estimating both average precision and various properties of a social network. We show that the proposed approach outperforms MAP estimates in both average precision and the accuracy of the properties by 20\% and 40\% respectively on the large social network.}, author = {Varun Embar and Sriram Srinivasan and Lise Getoor} } @conference {337, title = {A Fairness-aware Hybrid Recommender System}, booktitle = {RecSys Workshop on Responsible Recommendation (FATREC)}, year = {2018}, abstract = {

Recommender systems are used in variety of domains affecting people{\textquoteright}s lives. This has raised concerns about possible biases and discrimination that such systems might exacerbate. There are two primary kinds of biases inherent in recommender systems: observation bias and bias stemming from imbalanced data. Observation bias exists due to a feedback loop which causes the model to learn to only predict recommendations similar to previous ones. Imbalance in data occurs when systematic societal, historical, or other ambient bias is present in the data. In this paper, we address both biases by proposing a hybrid fairness-aware recommender system. Our model provides efficient and accurate recommendations by incorporating multiple user-user and item-item similarity measures, content, and demographic information, while addressing recommendation biases. We implement our model using a powerful and expressive probabilistic programming language called probabilistic soft logic. We experimentally evaluate our approach on a popular movie recommendation dataset, showing that our proposed model can provide more accurate and fairer recommendations, compared to a state-of-the art fair recommender system.

}, author = {Golnoosh Farnadi and Kouki, Pigi and Spencer K. Thompson and Sriram Srinivasan and Lise Getoor} } @conference {323, title = {A Structured Approach to Understanding Recovery and Relapse in AA}, booktitle = {The Web Conference (WWW)}, year = {2018}, abstract = {

Alcoholism, also known as Alcohol Use Disorder (AUD) is a serious problem affecting millions of people worldwide. Recovery from AUD is known to be challenging and often leads to relapse at various points after enrolling in a rehabilitation program such as Alcoholics Anonymous (AA). In this work, we take a structured approach to understand recovery and relapse from AUD using social media data. To do so, we combine linguistic and psychological attributes of users with relational features that capture useful structure in the user interaction network. We evaluate our models on AA-attending users extracted from the Twitter social network and predict recovery at two different points{\textemdash}90-days and 1 year after the user joins AA, respectively. Our experiments reveal that our structured approach is helpful in predicting recovery in these users. We perform extensive quantitative analysis of different groups of features and dependencies among them. Our analysis sheds light on the role of each feature group and how they combine to predict recovery and relapse. Finally, we present a qualitative analysis of different reasons behind users relapsing to AUD. Our models and analysis are helpful in making meaningful predictions in scenarios where only a subset of features are available and can potentially be helpful in identifying and preventing relapse early.

}, url = {https://github.com/yzhan202/zhang-www18-experiments}, author = {Zhang, Yue and Ramesh, Arti and Golbeck, Jennifer and Dhanya Sridhar and Lise Getoor} } @conference {330, title = {Estimating Causal Effects of Exercise from Mood Logging Data}, booktitle = {ICML Workshop on Causal Machine Learning (CausalML)}, year = {2018}, abstract = {

Mood and activity logging applications empower users to monitor their daily well-being and make informed health choices. To provide users with useful feedback that can improve quality of life, a critical task is understanding the causal effects of daily activities on mood and other wellness markers. In this work, we analyze observational data from EmotiCal, a recently developed mood-logging web application, to explore the effects of exercise on mood.\ We investigate several methodological choices for estimating the conditional average treatment effect, and highlight a novel use of textual data to improve the significance of our results.

}, author = {Dhanya Sridhar and Aaron Springer and Victoria Hollis and Steve Whittaker and Lise Getoor} } @conference {328, title = {Scalable Probabilistic Causal Structure Discovery}, booktitle = {International Joint Conference on Artificial Intelligence (IJCAI)}, year = {2018}, abstract = {

Complex causal networks underlie many real-world problems, from the regulatory interactions between genes to the environmental patterns used to understand climate change. Computational methods seek to infer these casual networks using observational data and domain knowledge. In this paper, we identify three key requirements for inferring the structure of causal networks for scientific discovery: (1) robustness to noise in observed measurements; (2) scalability to handle hundreds of variables; and (3) flexibility to encode domain knowledge and other structural constraints. We first formalize the problem of joint probabilistic causal structure discovery.\ We develop an approach using probabilistic soft logic (PSL) that exploits multiple statistical tests, supports efficient optimization over hundreds of variables, and can easily incorporate structural constraints, including imperfect domain knowledge. We compare our method against multiple well-studied approaches on biological and synthetic datasets, showing improvements of up to 20\% in F1-score over the best performing baseline in realistic settings.

}, url = {https://bitbucket.org/linqs/causpsl/src/master/}, author = {Dhanya Sridhar and Pujara, Jay and Lise Getoor} } @conference {332, title = {Scalable Structure Learning for Probabilistic Soft Logic}, booktitle = {IJCAI Workshop on Statistical Relational AI (StarAI)}, year = {2018}, month = {06/2018}, abstract = {

Statistical relational frameworks such as Markov logic networks and probabilistic soft logic (PSL) encode model structure with weighted first-order logical clauses. Learning these clauses from data is referred to as structure learning. Structure learning alleviates the manual cost of specifying models. However, this benefit comes with high computational costs; structure learning typically requires an expensive search over the space of clauses which involves repeated optimization of clause weights. In this paper, we propose the first two approaches to structure learning for PSL. We introduce a greedy search-based algorithm and a novel optimization method that trade-off scalability and approximations to the structure learning problem in varying ways. The highly scalable optimization method combines data-driven generation of clauses with a piecewise pseudolikelihood (PPLL) objective that learns model structure by optimizing clause weights only once. We compare both methods across five real-world tasks, showing that PPLL achieves an order of magnitude runtime speedup and AUC gains up to 15\% over greedy search.

}, author = {Varun Embar and Dhanya Sridhar and Golnoosh Farnadi and Lise Getoor} } @conference {kouki:recsys17, title = {User Preferences for Hybrid Explanations}, booktitle = {11th ACM Conference on Recommender Systems (RecSys)}, year = {2017}, author = {Kouki, Pigi and Schaffer, James and Pujara, Jay and ODonovan, John and Lise Getoor} } @conference {sridhar:akbc17, title = {Using Noisy Extractions to Discover Causal Knowledge}, booktitle = {NIPS Workshop on Automated Knowledge Base Construction}, year = {2017}, author = {Dhanya Sridhar and Pujara, Jay and Lise Getoor} } @article {sridhar:bioinformatics16, title = {A Probabilistic Approach for Collective Similarity-based Drug-Drug Interaction Prediction}, journal = {Bioinformatics}, volume = {32}, year = {2016}, chapter = {3175--3182}, abstract = {

MOTIVATION: As concurrent use of multiple medications becomes ubiquitous among patients, it is crucial to characterize both adverse and synergistic interactions between drugs. Statistical methods for prediction of putative drug-drug interactions (DDIs) can guide in vitro testing and cut down significant cost and effort. With the abundance of experimental data characterizing drugs and their associated targets, such methods must effectively fuse multiple sources of information and perform inference over the network of drugs.

RESULTS: We propose a probabilistic approach for jointly inferring unknown DDIs from a network of multiple drug-based similarities and known interactions. We use the highly scalable and easily extensible probabilistic programming framework Probabilistic Soft Logic We compare against two methods including a state-of-the-art DDI prediction system across three experiments and show best performing improvements of more than 50\% in AUPR over both baselines. We find five novel interactions validated by external sources among the top-ranked predictions of our model.

AVAILABILITY AND IMPLEMENTATION: Final versions of all datasets and implementations will be made publicly available.

CONTACT: dsridhar@ucsc.edu.

}, author = {Dhanya Sridhar and Shobeir Fakhraei and Lise Getoor} } @conference {sridhar:kddws16, title = {Joint Probabilistic Inference of Causal Structure}, booktitle = {KDD Workshop on CD}, year = {2016}, abstract = {

Causal directed acyclic graphical models (DAGs) are powerful reasoning tools in the study and estimation of cause and effect in scientific and socio-behavioral phenomena. In many domains where the cause and effect structure is unknown, a key challenge in studying causality with DAGs is learning the structure of causal graphs directly from observational data. Traditional approaches to causal structure discovery are categorized as constraint-based or score-based approaches. Score-based methods perform greedy search over the space of models whereas constraint-based methods iteratively prune and orient edges using structural and statistical constraints. However, both types of approaches rely on heuristics that introduce false positives and negatives. In our work, we cast causal structure discovery as an inference problem and propose a joint probabilistic approach for optimizing over model structures. We use a recently introduced and highly efficient probabilistic programming framework known as Probabilistic Soft Logic (PSL) to encode constraint-based structure search. With this novel probabilistic approach to structure discovery, we leverage multiple independence tests and avoid early pruning and variable ordering. We compare our method to the notable PC algorithm on a well-studied synthetic dataset and show improvements in accuracy of predicting causal edges.

}, author = {Dhanya Sridhar and Lise Getoor} } @conference {sridhar:uaiws16, title = {Probabilistic Inference for Causal Structure Discovery}, booktitle = {UAI Workshop on Causation}, year = {2016}, author = {Dhanya Sridhar and Lise Getoor} } @conference {rekatsinas:sigmod16, title = {SourceSight: Enabling Effective Source Selection}, booktitle = {SIGMOD}, year = {2016}, abstract = {

Recently there has been a rapid increase in the number of data sources and data services, such as cloud-based data markets and data portals, that facilitate the collection, publishing and trading of data. Data sources typically exhibit large heterogeneity in the type and quality of data they provide. Unfortunately, when the number of data sources is large, it is difficult for users to reason about the actual usefulness of sources for their applications and the trade-offs between the benefits and costs of acquiring and integrating sources. In this demonstration we present SOURCESIGHT, a system that allows users to interactively explore a large number of heterogeneous data sources, and discover valuable sets of sources for diverse integration tasks. SOURCESIGHT uses a novel multi-level source quality index that enables effective source selection at different granularity levels, and introduces a collection of new techniques to discover and evaluate relevant sources for integration.

}, author = {Theodoros Rekatsinas and Amol Deshpande and Luna Dong and Lise Getoor and Divesh Srivastava} } @conference {fakhraei:kdd15, title = {Collective Spammer Detection in Evolving Multi-Relational Social Networks}, booktitle = {KDD}, year = {2015}, note = {Data and Code: https://github.com/shobeir/fakhraei_kdd2015}, abstract = {

Detecting unsolicited content and the spammers who create it is a long-standing challenge that affects all of us on a daily basis. The recent growth of richly-structured social networks has provided new challenges and opportunities in the spam detection landscape. Motivated by the Tagged.com social network, we develop methods to identify spammers in evolving multi-relational social networks. We model a social network as a time-stamped multi-relational graph where vertices represent users, and edges represent different activities between them. To identify spammer accounts, our approach makes use of structural features, sequence modelling, and collective reasoning. We leverage relational sequence information using k-gram features and probabilistic modelling with a mixture of Markov models. Furthermore, in order to perform collective reasoning and improve the predictive power of a noisy abuse reporting system, we develop a statistical relational model using hinge-loss Markov random fields (HL-MRFs), a class of probabilistic graphical models which are highly scalable. We use Graphlab Create and Probabilistic Soft Logic (PSL) to prototype and experimentally evaluate our solutions on internet-scale data from Tagged.com. Our experiments demonstrate the effectiveness of our approach, and show that models which incorporate the multi-relational nature of the social network significantly gain predictive performance over those that do not.

}, author = {Shobeir Fakhraei and James Foulds and Madhusudana Shashanka and Lise Getoor} } @conference {rekatsinas:cidr15, title = {Finding Quality in Quantity: The Challenge of Discovering Valuable Sources for Integration}, booktitle = {7th Biennial Conference on Innovative Data Systems Research (CIDR {\textquoteleft}15)}, year = {2015}, author = {Rekatsinas, Theodoros and Dong, Xin Luna and Lise Getoor and Srivastava, Divesh} } @conference {sridhar:acl15, title = {Joint Models of Disagreement and Stance in Online Debate}, booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)}, year = {2015}, author = {Dhanya Sridhar and Foulds, James and Walker, Marilyn and Huang, Bert and Lise Getoor} } @conference {ramakrishnan:kdd14, title = {{\textquoteleft}Beating the news{\textquoteright} with EMBERS: Forecasting Civil Unrest using Open Source Indicators}, booktitle = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, year = {2014}, abstract = {

We describe the design, implementation, and evaluation of EMBERS, an automated, 24x7 continuous system for forecasting civil unrest across 10 countries of Latin America using open source indicators such as tweets, news sources, blogs, economic indicators, and other data sources. Unlike retrospective studies, EMBERS has been making forecasts into the future since Nov 2012 which have been (and continue to be) evaluated by an independent T\&E team (MITRE). Of note, EMBERS has successfully forecast the uptick and downtick of incidents during the June 2013 protests in Brazil. We outline the system architecture of EMBERS, individual models that leverage specific data sources, and a fusion and suppression engine that supports trading off specific evaluation criteria. EMBERS also provides an audit trail interface that enables the investigation of why specific predictions were made along with the data utilized for forecasting. Through numerous evaluations, we demonstrate the superiority of EMBERS over baserate methods and its capability to forecast significant societal happenings.\ 

}, author = {Ramakrishnan, Naren and Butler, Patrick and Self, Nathan and Khandpur, Rupinder and Saraf, Parang and Wang, Wei and Cadena, Jose and Vullikanti, Anil and Korkmaz, Gizem and Kuhlman, Christopher and Marathe, Achla and Zhao, Liang and Ting, Hua and Huang, Bert and Srinivasan, Aravind and Trinh, Khoa and Lise Getoor and Katz, Graham and Doyle, Andy and Ackermann, Chris and Zavorin, Ilya and Ford, Jim and Summers, Kristin and Fayed, Youssef and Arredondo, Jaime and Gupta, Dipak and Mares, David} } @conference {3, title = {Collective Stance Classification of Posts in Online Debate Forums}, booktitle = {ACL Joint Workshop on Social Dynamics and Personal Attributes in Social Media}, year = {2014}, author = {Dhanya Sridhar and Lise Getoor and Walker, Marilyn} } @conference {sridhar:baylearn14, title = {Collective classification of stance and disagreement in online debate forums}, booktitle = {Bay Area Machine Learning Symposium (BayLearn)}, year = {2014}, author = {Dhanya Sridhar and Foulds, James and Huang, Bert and Walker, Marilyn and Lise Getoor} } @article {sharara:hj12, title = {Finding Prominent Actors in Dynamic Affiliation Networks}, journal = {Human Journal}, year = {2012}, note = {Best Paper Award in ASE Conference 2012}, author = {Sharara, Hossam and Singh, Lisa and Lise Getoor} } @conference {pujara:nips12, title = {Large-Scale Hierarchical Topic Models}, booktitle = {NIPS Workshop on BigLearn}, year = {2012}, abstract = {

In the past decade, a number of advances in topic modeling have produced sophisticated models that are capable of generating hierarchies of topics. One challenge for these models is scalability: they are incapable of working at the massive scale of millions of documents and hundreds of thousands of terms. We address this challenge with a technique that learns a hierarchy of topics by iteratively applying topic models and processing subtrees of the hierarchy in parallel. This approach has a number of scalability advantages compared to existing techniques, and shows promising results in experiments assessing runtime and human evaluations of quality. We detail extensions to this approach that may further improve hierarchical topic modeling for large-scale applications.

}, author = {Jay Pujara and Peter Skomoroch} } @conference {huang:social2012, title = {Social Group Modeling with Probabilistic Soft Logic}, booktitle = {NeuRIPS Workshop on SNSMA}, year = {2012}, abstract = {

In this work, we show how to model the group affiliations of social media users using probabilistic soft logic. We consider groups of a broad variety, motivated by ideas from the social sciences on groups and their roles in social identity. By modeling group affiliations, we allow the possibility of efficient higher-level relational reasoning about the groups themselves, where the number of groups is relatively small compared to the number of users. We discuss preliminary results from experiments using real social media data collected from Twitter.

}, author = {Huang Bert and Bach Stephen and Norris Eric and Pujara Jay and Lise Getoor} } @conference {ramesh:nips12, title = {User Role Prediction in Online Discussion Forums using Probabilistic Soft Logic}, booktitle = {NeuRIPS Workshop on PE}, year = {2012}, author = {Ramesh Arti and Yoo Jaebong and Shen Shitian and Lise Getoor and Kim Jihie} } @conference {sharara:icwsm11, title = {Differential Adaptive Diffusion: Understanding Diversity and Learning whom to Trust in Viral Marketing}, booktitle = {ICWSM}, year = {2011}, abstract = {

Viral marketing mechanisms use the existing social network between customers to spread information about products and encourage product adoption. Existing viral marketing modelsfocus on the dynamics of the diffusion process, however theytypically: (a) only consider a single product campaign and (b)fail to model the evolution of the social network, as the trustbetween individuals changes over time, during the course ofmultiple campaigns. In this work, we propose an adaptive viralmarketing model which captures: (1) multiple differentproduct campaigns, (2) the diversity in customer preferencesamong different product categories, and (3) changing confidencein peers{\textquoteright} recommendations over time. By applyingour model to a real-world network extracted from the Diggsocial news website, we provide insights into the effects ofnetwork dynamics on the different products{\textquoteright} adoption. Ourexperiments show that our proposed model outperforms earliernon-adaptive diffusion models in predicting future productadoptions. We also show how this model can be used toexplore new viral marketing strategies that are more successfulthan classic strategies which ignore the dynamic nature ofsocial networks.

}, author = {Sharara, Hossam and Rand, William and Lise Getoor} } @conference {sharara:vast11, title = {G-PARE: A Visual Analytic Tool for Comparative Analysis of Uncertain Graphs}, booktitle = {IEEE Conference on Visual Analytics Science and Technology (VAST)}, year = {2011}, keywords = {Comparative Analysis, Model Comparison, Uncertain Graphs, Visualizing Uncertainty}, author = {Sharara, Hossam and Sopan, Awalin and Namata, Galileo Mark and Lise Getoor and Singh, Lisa} } @conference {minton:cmla11, title = {Improving Classifier Performance by Autonomously Collecting Background Knowledge from the Web}, booktitle = {Tenth International Conference on Machine Learning and Applications}, year = {2011}, author = {Minton, Steve and Michelson, Matthew and See, Kane and Macskassy, Sofus and Gazen, Bora C. and Lise Getoor} } @conference {sharara:sunbelt11, title = {Multi-dimensional Trajectory Analysis for Career Histories}, booktitle = {International Sunbelt Social Networks Conference (Sunbelt XXXI)}, year = {2011}, author = {Sharara, Hossam and Halgin, Daniel and Lise Getoor and Borgatti, Steve} } @conference {getoor:icml11, title = {Proceedings of the 28th International Conference on Machine Learning}, booktitle = {Proceedings of the 28th International Conference on Machine Learning}, year = {2011}, author = {Lise Getoor and Scheffer, Tobias} } @article {sharara:snam10, title = {Understanding Actor Loyalty to Event-Based Groups in Affiliation Networks}, journal = {Journal of Advances in Social Networks Analysis and Mining}, volume = {1}, number = {2}, year = {2011}, month = {April}, pages = {115{\textendash}126}, author = {Sharara, Hossam and Singh, Lisa and Lise Getoor and Mann, Janet} } @book {namata:lmbook10, title = {A Survey of Link Mining Tasks for Analyzing Noisy and Incomplete Networks}, series = {Link Mining: Models, Algorithms, and Applications}, volume = {1}, year = {2010}, pages = {107--133}, publisher = {Springer}, organization = {Springer}, edition = {1}, chapter = {4}, abstract = {

Many data sets of interest today are best described as networks or graphs of interlinked entities. Examples include Web and text collections, social networks and social media sites, information, transaction and communication networks, and all manner of scientific networks, including biological networks. Unfortunately, often the data collection and extraction process for gathering these network data sets is imprecise, noisy, and/or incomplete. In this chapter, we review a collection of link mining algorithms that are well suited to analyzing and making inferences about networks, especially in the case where the data is noisy or missing.

}, author = {Galileo Namata and Hossam Sharara and Lise Getoor}, editor = {Philip Yu and Jiawei Han and Christos Faloutsos} } @conference {sharara:win10, title = {An Active Learning Approach for Identifying Key Opinion Leaders}, booktitle = {The 2nd Workshop on Information in Networks (WIN)}, year = {2010}, author = {Sharara, Hossam and Lise Getoor and Norton, Myra} } @conference {sharara:nips2010-nad, title = {Active Surveying}, booktitle = {NIPS Workshop on Networks Across Disciplines in Theory and Applications}, year = {2010}, author = {Sharara, Hossam and Lise Getoor and Norton, Myra} } @conference {sharara:sunbelt10, title = {Active Surveying for Leadership Identification}, booktitle = {The International Sunbelt Social Networks Conference XXX}, year = {2010}, author = {Sharara, Hossam and Norton, Myra and Lise Getoor} } @article {sen:eml10, title = {Collective Classification}, journal = {Encyclopedia of Machine Learning}, year = {2010}, author = {Sen, Prithviraj and Namata, Galileo Mark and Bilgic, Mustafa and Lise Getoor} } @article {sharara:eml10, title = {Group Detection}, journal = {Encyclopedia of Machine Learning}, year = {2010}, author = {Sharara, Hossam and Lise Getoor} } @conference {zheleva:nips10, title = {Higher-order Graphical Models for Classification in Social and Affiliation Networks}, booktitle = {NIPS Workshop on Networks Across Disciplines: Theory and Applications}, year = {2010}, author = {Zheleva, Elena and Lise Getoor and Sarawagi, Sunita} } @conference {sen:vldb10, title = {Read-Once Functions and Query Evaluation in Probabilistic Databases}, booktitle = {International Conference on Very Large Data Bases}, year = {2010}, author = {Sen, Prithviraj and Deshpande, Amol and Lise Getoor} } @conference {sen:uai09, title = {Bisimulation-based Approximate Lifted Inference}, booktitle = {Uncertainty in Artificial Intelligence}, year = {2009}, author = {Sen, Prithviraj and Deshpande, Amol and Lise Getoor} } @conference {zheleva:kdd09, title = {Co-evolution of Social and Affiliation Networks}, booktitle = {15th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)}, year = {2009}, month = {June}, author = {Zheleva, Elena and Sharara, Hossam and Lise Getoor} } @book {namata:tmbook09, title = {Collective Classification for Text Classification}, series = {Text Mining: Classification, Clustering, and Applications}, volume = {1}, year = {2009}, pages = {51--69}, publisher = {Taylor and Francis Group}, organization = {Taylor and Francis Group}, edition = {1}, chapter = {3}, abstract = {

Text classification, the classification of text documents according to categories or topics, is an important component of any text processing system. There is a large body of work which makes use of content{\textendash}the words appearing in the documents, the structure of the documents{\textendash}and external sources to build accurate document classifiers. In addition, there is a growing body of literature on methods which attempt to make use of the link structure among the documents in order to improve document classification performance. Text documents can be connected together in a variety of ways. The most common link structure is the citation graph: eg, papers cite other papers and webpages link to other webpages. But links among papers can be constructed from other relationships such as co-author, co-citation, appearance at a conference venue, and others. All of these can be combined together to create a interlinked collection of text documents. In these cases, we are often not interested in determining the topic of just a single document, but we have a collection of unlabeled (or partially labeled) documents, and we want to correctly infer values for all of the missing labels.

}, author = {Galileo Namata and Prithviraj Sen and Mustafa Bilgic and Lise Getoor}, editor = {Mehran Sahami and Ashok Srivastava} } @conference {barash:wsm09, title = {Distinguishing Knowledge vs Social Capital in Social Media with Roles and Context}, booktitle = {International Conference on Weblogs and Social Media}, year = {2009}, month = {May}, author = {Barash, Vladimir and Smith, Marc and Lise Getoor and Welser, Howard} } @conference {sharara:asonam09, title = {The Dynamics of Actor Loyalty to Groups in Affiliation Networks}, booktitle = {International Conference on Advances in Social Networks Analysis and Mining}, year = {2009}, month = {July}, author = {Sharara, Hossam and Singh, Lisa and Lise Getoor and Mann, Janet} } @conference {sayyadi:sdm09, title = {Future Rank: Ranking Scientific Articles by Predicting their Future PageRank}, booktitle = {2009 SIAM International Conference on Data Mining (SDM09)}, year = {2009}, month = {April}, author = {Sayyadi, Hassan and Lise Getoor} } @book {deshpande:mmudchapter09, title = {Graphical Models for Uncertain Data}, series = {Managing and Mining Uncertain Data}, volume = {1}, year = {2009}, pages = {1--34}, publisher = {Springer}, organization = {Springer}, edition = {1}, chapter = {1}, abstract = {

Graphical models are a popular and well-studied framework for compact representation of a joint probability distribution over a large number of interdependent variables, and for efficient reasoning about such a distribution. They have been proven useful in a wide range of domains from natural language processing to computer vision to bioinformatics. In this chapter, we present an approach to using graphical models for managing and querying large-scale uncertain databases. We present a unified framework based on the concepts from graphical models that can model not only tuple-level and attribute-level uncertainties, but can also handle arbitrary correlations that may be present among the data; our framework can also naturally capture shared correlations where the same uncertainties and correlations occur repeatedly in the data. We develop an efficient strategy for query evaluation over such probabilistic databases by casting the query processing problem as an inference problem in an appropriately constructed graphical model, and present optimizations specific to probabilistic databases that enable efficient query evaluation. We conclude the chapter with a discussion of related and future work on these topics.

}, author = {Amol Deshpande and Lise Getoor and Prithviraj Sen}, editor = {Charu Aggarwal} } @conference {schnaitter:vldb09, title = {Index Interactions in Physical Design Tuning: Modeling, Analysis, and Applications}, booktitle = {International Conference on Very Large Data Bases}, year = {2009}, author = {Schnaitter, Karl and Polyzotis, Neoklis and Lise Getoor} } @conference {saha:sdm09, title = {On Maximum Coverage in the Streaming Model \& Application to Multi-topic Blog-Watch}, booktitle = {2009 SIAM International Conference on Data Mining (SDM09)}, year = {2009}, month = {April}, author = {Saha, Barna and Lise Getoor} } @conference {somasundaran:textgraphs09, title = {Opinion Graphs for Polarity and Discourse Classification}, booktitle = {TextGraphs-4: Graph-based Methods for Natural Language Processing}, year = {2009}, month = {August}, author = {Somasundaran, Swapna and Namata, Galileo Mark and Lise Getoor and Wiebe, Janyce} } @article {sen:vldbj09, title = {PrDB: Managing and Exploiting Rich Correlations in Probabilistic Databases}, journal = {VLDB Journal, special issue on uncertain and probabilistic databases}, year = {2009}, author = {Sen, Prithviraj and Deshpande, Amol and Lise Getoor} } @mastersthesis {sen:thesis09, title = {Representing and Querying Uncertain Data}, year = {2009}, school = {University of Maryland, College Park}, type = {phd}, author = {Sen, Prithviraj} } @conference {somasundaran:emnlp09, title = {Supervised and Unsupervised Methods in Employing Discourse Relations for Improving Opinion Polarity Classification}, booktitle = {Conference on Empirical Methods in Natural Language Processing}, year = {2009}, month = {August}, author = {Somasundaran, Swapna and Namata, Galileo Mark and Wiebe, Janyce and Lise Getoor} } @article {sen:aimag08, title = {Collective Classification in Network Data}, journal = {AI Magazine}, volume = {29}, number = {3}, year = {2008}, pages = {93{\textendash}106}, author = {Sen, Prithviraj and Namata, Galileo Mark and Bilgic, Mustafa and Lise Getoor and Gallagher, Brian and Eliassi-Rad, Tina} } @article {sen:dmkd08, title = {Cost-Sensitive Learning with Conditional Markov Networks}, journal = {Data Mining and Knowledge Discovery, Special Issue on Utility Based Data Mining}, volume = {17}, number = {2}, year = {2008}, month = {October}, pages = {136{\textendash}163}, author = {Sen, Prithviraj and Lise Getoor} } @conference {sen:vldb08, title = {Exploiting Shared Correlations in Probabilistic Databases}, booktitle = {International Conference on Very Large Data Bases}, year = {2008}, author = {Sen, Prithviraj and Deshpande, Amol and Lise Getoor} } @conference {saha:snakdd08, title = {Group Proximity Measure for Recommending Groups in Online Social Networks}, booktitle = {2nd ACM SIGKDD Workshop on Social Network Mining and Analysis (SNA-KDD)}, year = {2008}, author = {Saha, Barna and Lise Getoor} } @article {kang:tvcg08, title = {Interactive Entity Resolution in Relational Data: A Visual Analytic Tool and Its Evaluation}, journal = {IEEE Transactions on Visualization and Computer Graphics}, volume = {14}, number = {5}, year = {2008}, pages = {999{\textendash}1014}, author = {Kang, Hyunmo and Lise Getoor and Shneiderman, Ben and Bilgic, Mustafa and Licamele, Louis} } @conference {smith:cikm07-ssm, title = {Leveraging Social Context for Searching Social Media}, booktitle = {CIKM Workshop on Search in Social Media}, year = {2008}, author = {Smith, Marc and Barash, Vladimir and Lise Getoor and Lauw, Hady} } @conference {kang:vast07, title = {C-GROUP: A Visual Analytic Tool for Pairwise Analysis of Dynamic Group Membership}, booktitle = {Visual Analytics Science and Technology (VAST)}, year = {2007}, author = {Kang, Hyunmo and Lise Getoor and Singh, Lisa} } @conference {namata:cikm07, title = {A Dual-View Approach to Interactive Network Visualization}, booktitle = {ACM Conference on Information and Knowledge Management}, year = {2007}, author = {Namata, Galileo Mark and Staats, Brian and Lise Getoor and Shneiderman, Ben} } @conference {kang:iv07, title = {GeoDDupe: A Novel Interface for Interactive Entity Resolution in Geospatial Data}, booktitle = {International Conference on Information Visualization}, year = {2007}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, author = {Kang, Hyunmo and Sehgal, Vivek and Lise Getoor} } @article {singh:de07, title = {Increasing the predictive power of affiliation networks.}, journal = {IEEE Data Engineering Bulletin}, volume = {30}, number = {2}, year = {2007}, month = {jul}, author = {Singh, Lisa and Lise Getoor} } @unpublished {sen:um-tr07, title = {Link-based Classification}, number = {CS-TR-4858}, year = {2007}, month = {February}, publisher = {University of Maryland}, type = {Technical Report}, author = {Sen, Prithviraj and Lise Getoor} } @article {hung:tocl, title = {Probabilistic Interval XML}, journal = {ACM Transactions on Computational Logic (TOCL)}, year = {2007}, author = {Hung, Edward and Lise Getoor and Subrahmanian, V. S.} } @conference {sen:dune07, title = {Representing Tuple and Attribute Uncertainty in Probabilistic Databases}, booktitle = {Workshop on Data Mining of Uncertain Data (ICDM)}, year = {2007}, author = {Sen, Prithviraj and Deshpande, Amol and Lise Getoor} } @conference {sen:icde07, title = {Representing and Querying Correlated Tuples in Probabilistic Databases}, booktitle = {International Conference on Data Engineering}, year = {2007}, author = {Sen, Prithviraj and Deshpande, Amol} } @article {kang:kdd07, title = {Visual Analysis of Dynamic Group Membership in Temporal Social Networks}, journal = {SIGKDD Explorations, Special Issue on Visual Analytics}, volume = {9}, number = {2}, year = {2007}, month = {dec}, pages = {13-21}, author = {Kang, Hyunmo and Lise Getoor and Singh, Lisa} } @conference {singh:iv07, title = {Visual mining of multi-modal social networks at different abstraction levelsx}, booktitle = {L. Singh, M. Beard, L. Getoor, M. Blake. Visual mining of multi-modal social networks at different abstraction levels. IEEE Conference on Information Visualization - Symposium of Visual Data Mining (IV-VDM)}, year = {2007}, author = {Singh, Lisa and Beard, Mitchell and Lise Getoor and Blake, M. Brian} } @conference {sen:sim_lacs06, title = {Cost-Sensitive Learning with Conditional Markov Networks}, booktitle = {SIAM Data Mining Workshop on Link Analysis, Counterterrorism and Security}, year = {2006}, author = {Sen, Prithviraj and Lise Getoor} } @conference {sen:icml06, title = {Cost-Sensitive Learning with Conditional Markov Networks}, booktitle = {International Conference on Machine Learning}, year = {2006}, author = {Sen, Prithviraj and Lise Getoor} } @conference {bilgic:vast06, title = {D-Dupe: An Interactive Tool for Entity Resolution in Social Networks}, booktitle = {Visual Analytics Science and Technology (VAST)}, year = {2006}, month = {October}, address = {Baltimore}, author = {Bilgic, Mustafa and Licamele, Louis and Lise Getoor and Shneiderman, Ben} } @conference {sen:srl06, title = {Empirical Comparison of Approximate Inference Algorithms for Networked Data}, booktitle = {ICML Workshop on Statistical Relational Learning (SRL)}, year = {2006}, author = {Sen, Prithviraj and Lise Getoor} } @conference {entity-res-geodata, title = {Entity Resolution in Geospatial Data Integration}, booktitle = {ACM GIS}, year = {2006}, author = {Sehgal, Vivek and Lise Getoor and Viechnicki, Peter} } @conference {zhao:sna06, title = {Event Classification and Relationship Labeling in Affiliation Networks}, booktitle = {ICML Workshop on Statistical Network Analysis (SNA)}, year = {2006}, author = {Zhao, Bin and Sen, Prithviraj and Lise Getoor} } @conference {bilgic:gd05, title = {D-Dupe: An Interactive Tool for Entity Resolution in Social Networks}, booktitle = {International Symposium on Graph Drawing}, series = {Lecture Notes in Computer Science}, volume = {3843}, year = {2005}, month = {September}, pages = {505{\textendash}507}, publisher = {Springer}, organization = {Springer}, author = {Bilgic, Mustafa and Licamele, Louis and Lise Getoor and Shneiderman, Ben}, editor = {Patrick Healy and Nikola S. Nikolov} } @conference {singh:icdm05, title = {Pruning Social Networks Using Structural Properties and Descriptive Attributes}, booktitle = {IEEE International Conference on Data Mining (ICDM)}, year = {2005}, pages = {773-776}, author = {Singh, Lisa and Lise Getoor and Licamele, Louis} } @article {getoor:aimj04, title = {Understanding Tuberculosis Epidemiology Using Probabilistic Relational Models}, journal = {AI in Medicine Journal}, volume = {30}, year = {2004}, pages = {233-256}, author = {Lise Getoor and Rhee, Jeanne and Koller, Daphne and Small, Peter} } @conference {hung:icde03, title = {PXML: A Probabilistic Semistructured Data Model and Algebra}, booktitle = {Proceedings of the IEEE International Conference on Data Engineering}, year = {2003}, author = {Hung, Edward and Lise Getoor and Subrahmanian, V. S.} } @conference {hung:icdt03, title = {Probabilistic Interval XML}, booktitle = {Proceedings of the International Conference on Database Theory}, year = {2003}, author = {Hung, Edward and Lise Getoor and Subrahmanian, V. S.} } @conference {getoor:ijcaiws01, title = {Probabilistic Models of Text and Link Structure for Hypertext Classification}, booktitle = {IJCAI Workshop on Text Learning: Beyond Supervision}, year = {2001}, author = {Lise Getoor and Segal, Eran and Benjamin Taskar and Koller, Daphne} } @conference {getoor:webkdd99, title = {Using Probabilistic Relational Models for Collaborative Filtering}, booktitle = {Working Notes of the KDD Workshop on Web Usage Analysis and User Profiling}, year = {1999}, author = {Lise Getoor and Mehran Sahami} } @conference {chajewska:uai98, title = {Utility Elicitation as a Classi cation Problem}, booktitle = {Uncertainty in Arti cial Intelligence}, year = {1998}, author = {Chajewska, Ursulza and Lise Getoor and Norman, Joseph and Shahar, Yuval} } @conference {lansky:aaiss95, title = {The Collage/Khoros Link: Planning for Image Processing Tasks}, booktitle = {Proceedings of the AAAI Spring Symposium on Integrated Planning Applications}, year = {1995}, author = {Lansky, Amy and Friedman, Mark and Lise Getoor and Schmidler, Scott and Short Jr., Nick} }