@conference {333, title = {A Socio-linguistic Model for Cyberbullying Detection}, booktitle = {International Conference on Advances in Social Networks Analysis and Mining (ASONAM)}, year = {2018}, abstract = {

Cyberbullying is a serious threat to both the short and long-term well-being of social media users. Addressing this problem in online environments demands the ability to automatically detect cyberbullying and to identify the roles that participants assume in social interactions. As cyberbullying occurs within online communities, it is also vital to understand the group dynamics that support bullying behavior. To this end, we propose a socio-linguistic model which jointly detects cyberbullying content in messages, discovers latent text categories, identifies participant roles and exploits social interactions. While our method makes use of content that is labeled as bullying, it does not require category, role or relationship labels. Furthermore, as bullying labels are often subjective, noisy and inconsistent, an important contribution of our paper is effective methods for leveraging inconsistent labels. Rather than discard inconsistent labels, we evaluate different methods for learning from them, demonstrating that incorporating uncertainty allows for better generalization. Our proposed socio-linguistic model achieves an 18\% improvement over state-of-the-art methods.

}, author = {Tomkins, Sabina and Lise Getoor and Chen, Yunfei and Zhang, Yi} } @conference {329, title = {Clustering System Data using Aggregate Measures}, booktitle = {Machine Learning and Systems (MLSys)}, year = {2018}, abstract = {

Many real-world systems generate a tremendous amount of data cataloging the actions, responses, and internal states. Prominent examples include user logs on web servers, instrumentation of source code, and performance statistics in large data centers. The magnitude of this data makes it impossible to log individual events, but instead requires capturing aggregate statistics at a coarser granularity, resulting in statistical distributions instead of discrete values. We survey several popular statistical distance measures and demonstrate how appropriate statistical distances can allow meaningful clustering of web log data.

}, author = {Johnnie Chang and Robert Chen and Jay Pujara and Lise Getoor} } @conference {tomkins:lld2017, title = {Detecting Cyber-bullying from Sparse Data and Inconsistent Labels}, booktitle = {Learning with Limited Labeled Data (LLD) NIPS Workshop}, year = {2017}, author = {Tomkins, Sabina and Lise Getoor and Chen, Yunfei and Zhang, Yi} } @conference {pujara:starai15, title = {Online Inference for Knowledge Graph Construction.}, booktitle = {Workshop on Statistical Relational AI}, year = {2015}, author = {Pujara, Jay and London, Ben and Lise Getoor and Cohen, William} } @article {pujara:aimag15, title = {Using Semantics \& Statistics to Turn Data into Knowledge}, journal = {AI Magazine}, volume = {36}, number = {1}, year = {2015}, pages = {65{\textendash}74}, author = {Pujara, Jay and Miao, Hui and Lise Getoor and Cohen, William} } @conference {ramakrishnan:kdd14, title = {{\textquoteleft}Beating the news{\textquoteright} with EMBERS: Forecasting Civil Unrest using Open Source Indicators}, booktitle = {ACM SIGKDD Conference on Knowledge Discovery and Data Mining}, year = {2014}, abstract = {

We describe the design, implementation, and evaluation of EMBERS, an automated, 24x7 continuous system for forecasting civil unrest across 10 countries of Latin America using open source indicators such as tweets, news sources, blogs, economic indicators, and other data sources. Unlike retrospective studies, EMBERS has been making forecasts into the future since Nov 2012 which have been (and continue to be) evaluated by an independent T\&E team (MITRE). Of note, EMBERS has successfully forecast the uptick and downtick of incidents during the June 2013 protests in Brazil. We outline the system architecture of EMBERS, individual models that leverage specific data sources, and a fusion and suppression engine that supports trading off specific evaluation criteria. EMBERS also provides an audit trail interface that enables the investigation of why specific predictions were made along with the data utilized for forecasting. Through numerous evaluations, we demonstrate the superiority of EMBERS over baserate methods and its capability to forecast significant societal happenings.\ 

}, author = {Ramakrishnan, Naren and Butler, Patrick and Self, Nathan and Khandpur, Rupinder and Saraf, Parang and Wang, Wei and Cadena, Jose and Vullikanti, Anil and Korkmaz, Gizem and Kuhlman, Christopher and Marathe, Achla and Zhao, Liang and Ting, Hua and Huang, Bert and Srinivasan, Aravind and Trinh, Khoa and Lise Getoor and Katz, Graham and Doyle, Andy and Ackermann, Chris and Zavorin, Ilya and Ford, Jim and Summers, Kristin and Fayed, Youssef and Arredondo, Jaime and Gupta, Dipak and Mares, David} } @conference {pujara:iswc13, title = {Knowledge Graph Identification}, booktitle = {International Semantic Web Conference (ISWC)}, year = {2013}, note = {Winner of Best Student Paper award}, author = {Pujara, Jay and Miao, Hui and Lise Getoor and Cohen, William} } @conference {pujara:slg13, title = {Large-Scale Knowledge Graph Identification using PSL}, booktitle = {ICML Workshop on Structured Learning (SLG)}, year = {2013}, author = {Pujara, Jay and Miao, Hui and Lise Getoor and Cohen, William} } @conference {pujara:sbd13, title = {Large-Scale Knowledge Graph Identification using PSL}, booktitle = {AAAI Fall Symposium on Semantics for Big Data}, year = {2013}, author = {Pujara, Jay and Miao, Hui and Lise Getoor and Cohen, William} } @conference {pujara:akbc13, title = {Ontology-Aware Partitioning for Knowledge Graph Identification}, booktitle = {CIKM Workshop on Automatic Knowledge Base Construction}, year = {2013}, author = {Pujara, Jay and Miao, Hui and Lise Getoor and Cohen, William} } @conference {rastegari:icml13, title = {Predictable Dual-View Hashing}, booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)}, year = {2013}, pages = {1328{\textendash}1336}, publisher = {JMLR}, organization = {JMLR}, author = {Rastegari, Mohammad and Choi, Jonghyun and Fakhraei, Shobeir and Daume III, Hal and Davis, Larry} } @article {Polymeropoulos:SchizRes09, title = {Common effect of antipsychotics on the biosynthesis and regulation of fatty acids and cholesterol supports a key role of lipid homeostasis in schizophrenia.}, journal = {Schizophrenia Research}, year = {2009}, keywords = {bioinformatics gene expression analysis antipsychotic pharmacogenetics}, author = {Polymeropoulos, Mihales and Licamele, Louis and Volpi, Simona and Mack, Kendra and Mitkus, Shruti and Carstea, Eugene and Lise Getoor and Lavedan, Christian} } @conference {chen:nips09-wkshp, title = {Efficient Resource-constrained Retrospective Analysis of Long Video Sequences}, booktitle = {NIPS Workshop on Adaptive Sensing, Active Learning and Experimental Design: Theory, Methods and Applications}, year = {2009}, author = {Chen, Daozheng and Bilgic, Mustafa and Lise Getoor and Jacobs, David} } @book {bhattacharya:mgd-book06, title = {Entity Resolutions in Graphs}, series = {Mining Graph Data}, volume = {1}, year = {2006}, pages = {311--344}, publisher = {Wiley}, organization = {Wiley}, edition = {1}, chapter = {13}, abstract = {

In many applications, there are a variety of ways of referring to the same underlying real-world entity. For example, J. Doe, Jonathan Doe, and Jon Doe may all refer to the same person. In addition, entity references may be linked or grouped together. For example, Jonathan Doe may be married to Jeanette Doe and may have dependents James Doe, Jason Doe, and Jacqueline Doe, and Jon Doe may be married to Jean Doe and J. Doe may have dependents Jim Doe, Jason Doe, and Jackie Doe. Given such data, we can build a graph from the entity references, where the nodes are the entity references and edges (or often hyperedges) in the graph indicate links among the references.

However, the problem is that for any real-world entity there may well be more than one node in the graph that refers to that entity. In the example above, we may have three nodes all referring to the individual Jonathan Doe, two nodes referring to Jeanette Doe, two nodes referring to each of James Doe, Jason Doe, and Jacqueline Doe. Further, because the edges are defined over entity references, rather than entities themselves, the graph does not accurately reflect the relationships between entities. For example, until we realize that Jon Doe refers to the same person as Jonathan Doe, we may not think that Jon Doe has any children, and until we realize that J. Doe refers to the same person as Jonathan Doe, we will not realize that he is married.

}, author = {Indrajit Bhattacharya and Lise Getoor}, editor = {Diane Cook and Lawrence Holder} } @book {getoor:lbc-book-ch05, title = {Link-based Classification}, series = {Advanced Methods for Knowledge Discovery from Complex Data}, volume = {1}, year = {2005}, pages = {189--207}, publisher = {Springer-Verlag}, organization = {Springer-Verlag}, edition = {1}, chapter = {7}, abstract = {

A key challenge for machine learning is the problem of mining richly structured data sets, where the objects are linked in some way due to either an explicit or implicit relationship that exists between the objects. Links among the objects demonstrate certain patterns, which can be helpful for many machine learning tasks and are usually hard to capture with traditional statistical models. Recently there has been a surge of interest in this area, fuelled largely by interest in web and hypertext mining, but also by interest in mining social networks, bibliographic citation data, epidemiological data and other domains best described using a linked or graph structure. In this chapter we propose a framework for modeling link distributions, a link-based model that supports discriminative models describing both the link distributions and the attributes of linked objects. We use a structured logistic regression model, capturing both content and links. We systematically evaluate several variants of our link-based model on a range of data sets including both web and citation collections. In all cases, the use of the link distribution improves classification performance.

}, author = {Lise Getoor}, editor = {Ujjwal Maulik and Lawrence Holder and Diane Cook} } @conference {chajewska:mdm98, title = {Using Classi cation Techniques for Utility Elicitation: A Comparison between StandardGamble and Visual Analog Scale Methods}, booktitle = {Twentieth Anniversary Meeting of the Society for Medical Decision Making}, year = {1998}, author = {Chajewska, Ursulza and Norman, Joseph and Lise Getoor} } @conference {chajewska:uai98, title = {Utility Elicitation as a Classi cation Problem}, booktitle = {Uncertainty in Arti cial Intelligence}, year = {1998}, author = {Chajewska, Ursulza and Lise Getoor and Norman, Joseph and Shahar, Yuval} } @conference {chajewska:aaaiss98, title = {Utility Elicitation as a Classification Problem}, booktitle = {Proceedings of the AAAI Spring Symposium Series on Interactive and Mixed Initiative Decision-Theoretic Systems}, year = {1998}, author = {Chajewska, Ursulza and Lise Getoor and Norman, Joseph} } @conference {getoor:aaai97, title = {Effictive Redundant Constraints for Online Scheduling}, booktitle = {Proceedings of the Fourteenth national Conference on Artificial Intelligence}, year = {1997}, author = {Lise Getoor and Ottosson, Gregor and Fromherz, Markus and Carlson, Bjorn} }