@conference {360, title = {Contrastive Entity Linkage: Mining Variational Attributes from Large Catalogs for Entity Linkage}, booktitle = {Automated Knowledge Base Construction (AKBC)}, year = {2020}, abstract = {Presence of near identical, but distinct, entities called entity variations makes the task of data integration challenging. For example, in the domain of grocery products, variations share the same value for attributes such as brand, manufacturer and product line, but differ in other attributes, called variational attributes, such as package size and color. Identifying variations across data sources is an important task in itself and is crucial for identifying duplicates. However, this task is challenging as the variational attributes are often present as a part of unstructured text and are domain dependent. In this work, we propose our approach, Contrastive entity linkage, to identify both entity pairs that are the same and pairs that are variations of each other. We propose a novel unsupervised approach, VarSpot, to mine domain-dependent variational attributes present in unstructured text. The proposed approach reasons about both similarities and differences between entities and can easily scale to large sources containing millions of entities. We show the generality of our approach by performing experimental evaluation on three different domains. Our approach significantly outperforms state-of-the-art learning-based and rule-based entity linkage systems by up to 4\% F1 score when identifying duplicates, and up to 41\% when identifying entity variations.}, author = {Varun Embar and Bunyamin Sisman and Hao Wei and Xin Luna Dong and Christos Faloutsos and Lise Getoor} } @conference {330, title = {Estimating Causal Effects of Exercise from Mood Logging Data}, booktitle = {ICML Workshop on Causal Machine Learning (CausalML)}, year = {2018}, abstract = {

Mood and activity logging applications empower users to monitor their daily well-being and make informed health choices. To provide users with useful feedback that can improve quality of life, a critical task is understanding the causal effects of daily activities on mood and other wellness markers. In this work, we analyze observational data from EmotiCal, a recently developed mood-logging web application, to explore the effects of exercise on mood.\ We investigate several methodological choices for estimating the conditional average treatment effect, and highlight a novel use of textual data to improve the significance of our results.

}, author = {Dhanya Sridhar and Aaron Springer and Victoria Hollis and Steve Whittaker and Lise Getoor} } @conference {sridhar:acl15, title = {Joint Models of Disagreement and Stance in Online Debate}, booktitle = {Annual Meeting of the Association for Computational Linguistics (ACL)}, year = {2015}, author = {Dhanya Sridhar and Foulds, James and Walker, Marilyn and Huang, Bert and Lise Getoor} } @conference {grycner:emnlp15, title = {RELLY: Inferring Hypernym Relationships Between Relational Phrases}, booktitle = {Conference on Empirical Methods in Natural Language Processing}, year = {2015}, author = {Grycner, Adam and Weikum, Gerhard and Pujara, Jay and Foulds, James and Lise Getoor} } @conference {grycner:akbc2014, title = {A Unified Probabilistic Approach for Semantic Clustering of Relational Phrases}, booktitle = {NeurIPS}, year = {2014}, abstract = {

The task of finding synonymous relational phrases is important in natural language understanding problems such as question answering and paraphrase detection. While this task has been addressed by many previous systems, each of these existing approaches is limited either in expressivity or in scalability. To address this challenge, we present a large-scale statistical relational method for clustering relational phrases using Probabilistic Soft Logic (PSL) [1]. To assess the quality of our approach, we evaluated it relative to a set of baseline methods. The proposed technique was found to outperform the baselines for both clustering and link prediction, and was shown to be scalable enough to be applied to 200,000relational phrases.

}, author = {Adam Grycner and Gerhard Weikum and Jay Pujara and James Foulds and Lise Getoor} } @conference {ramakrishnan:kdd14, title = {{\textquoteleft}Beating the news{\textquoteright} with EMBERS: Forecasting Civil Unrest using Open Source Indicators}, booktitle = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, year = {2014}, abstract = {

We describe the design, implementation, and evaluation of EMBERS, an automated, 24x7 continuous system for forecasting civil unrest across 10 countries of Latin America using open source indicators such as tweets, news sources, blogs, economic indicators, and other data sources. Unlike retrospective studies, EMBERS has been making forecasts into the future since Nov 2012 which have been (and continue to be) evaluated by an independent T\&E team (MITRE). Of note, EMBERS has successfully forecast the uptick and downtick of incidents during the June 2013 protests in Brazil. We outline the system architecture of EMBERS, individual models that leverage specific data sources, and a fusion and suppression engine that supports trading off specific evaluation criteria. EMBERS also provides an audit trail interface that enables the investigation of why specific predictions were made along with the data utilized for forecasting. Through numerous evaluations, we demonstrate the superiority of EMBERS over baserate methods and its capability to forecast significant societal happenings.\ 

}, author = {Ramakrishnan, Naren and Butler, Patrick and Self, Nathan and Khandpur, Rupinder and Saraf, Parang and Wang, Wei and Cadena, Jose and Vullikanti, Anil and Korkmaz, Gizem and Kuhlman, Christopher and Marathe, Achla and Zhao, Liang and Ting, Hua and Huang, Bert and Srinivasan, Aravind and Trinh, Khoa and Lise Getoor and Katz, Graham and Doyle, Andy and Ackermann, Chris and Zavorin, Ilya and Ford, Jim and Summers, Kristin and Fayed, Youssef and Arredondo, Jaime and Gupta, Dipak and Mares, David} } @conference {3, title = {Collective Stance Classification of Posts in Online Debate Forums}, booktitle = {ACL Joint Workshop on Social Dynamics and Personal Attributes in Social Media}, year = {2014}, author = {Dhanya Sridhar and Lise Getoor and Walker, Marilyn} } @conference {sridhar:baylearn14, title = {Collective classification of stance and disagreement in online debate forums}, booktitle = {Bay Area Machine Learning Symposium (BayLearn)}, year = {2014}, author = {Dhanya Sridhar and Foulds, James and Huang, Bert and Walker, Marilyn and Lise Getoor} } @conference {barash:wsm09, title = {Distinguishing Knowledge vs Social Capital in Social Media with Roles and Context}, booktitle = {International Conference on Weblogs and Social Media}, year = {2009}, month = {May}, author = {Barash, Vladimir and Smith, Marc and Lise Getoor and Welser, Howard} } @conference {somasundaran:textgraphs09, title = {Opinion Graphs for Polarity and Discourse Classification}, booktitle = {TextGraphs-4: Graph-based Methods for Natural Language Processing}, year = {2009}, month = {August}, author = {Somasundaran, Swapna and Namata, Galileo Mark and Lise Getoor and Wiebe, Janyce} } @conference {somasundaran:emnlp09, title = {Supervised and Unsupervised Methods in Employing Discourse Relations for Improving Opinion Polarity Classification}, booktitle = {Conference on Empirical Methods in Natural Language Processing}, year = {2009}, month = {August}, author = {Somasundaran, Swapna and Namata, Galileo Mark and Wiebe, Janyce and Lise Getoor} } @book {islamaj:fga-book07, title = {A Feature Generation Algorithm with Applications to Biological Sequence Classification}, series = {Computational Methods of Feature Selection}, volume = {1}, year = {2008}, pages = {355--376}, publisher = {Chapman and Hall/CRC Press}, organization = {Chapman and Hall/CRC Press}, edition = {1}, chapter = {18}, author = {Rezarta Islamaj and Lise Getoor and John Wilbur}, editor = {Huan Liu and Hiroshi Motoda} } @conference {islamaj:icdm07, title = {Characterizing RNA secondary-structure features and their effects on splice-site prediction}, booktitle = {IEEE ICDM Workshop on Mining and Management of Biological Data}, year = {2007}, author = {Islamaj, Rezarta and Lise Getoor and Wilbur, W. John} } @article {islamaj:bmc07, title = {Features generated for computational splice-site prediction correspond to functional elements}, journal = {BMC Bioinformatics}, volume = {8}, number = {410}, year = {2007}, note = {Electronic version is available at http://www.biomedcentral.com/1471-2105/8/410}, month = {October}, keywords = {feature generation, functional biological signals, splice-site}, author = {Islamaj, Rezarta and Lise Getoor and Wilbur, W. John and Mount, Stephen} } @article {islamaj:nar07, title = {SplicePort - An interactive splice-site analysis tool}, journal = {Nucleic Acids Research}, year = {2007}, author = {Islamaj, Rezarta and Lise Getoor and Wilbur, W. John and Mount, Stephen} } @conference {islamaj:pkdd06, title = {Feature Generation Algorithm: an Application to Splice Site Prediction}, booktitle = {Knowledge Discovery in Databases: PKDD 2006}, series = {Lecture Notes in Computer Science}, volume = {4213}, year = {2006}, month = {September}, pages = {553-560}, publisher = {Springer}, organization = {Springer}, address = {Berlin, Germany}, author = {Islamaj, Rezarta and Lise Getoor and Wilbur, W. John} } @conference {islamaj:fsdm06, title = {A Feature Generation Algorithm for Sequences with Application to Splice Site Prediction}, booktitle = {International Workshop on Feature Selection for Data Mining (FSDM)}, year = {2006}, month = {April}, address = {Bethesda, Maryland}, author = {Islamaj, Rezarta and Lise Getoor and Wilbur, W. John} }