@article {muthiah:aimag16, title = {Capturing Planned Protests from Open Source Indicators}, journal = {AI Mag}, volume = {37}, number = {2}, year = {2016}, pages = {63{\textendash}75}, abstract = {

Civil unrest events (protests, strikes, and {\textquotedblleft}occupy{\textquotedblright} events) are common occurrences in both democracies and authoritarian regimes. The study of civil unrest is a key topic for political scientists as it helps capture an important mechanism by which citizenry express themselves. In countries where civil unrest is lawful, qualitative analysis has revealed that more than 75 percent of the protests are planned, organized, or announced in advance; therefore detecting references to future planned events in relevant news and social media is a direct way to develop a protest forecasting system. We report on a system for doing that in this article. It uses a combination of keyphrase learning to identify what to look for, probabilistic soft logic to reason about location occurrences in extracted results, and time normalization to resolve future time mentions. We illustrate the application of our system to 10 countries in Latin America: Argentina, Brazil, Chile, Colombia, Ecuador, El Salvador, Mexico, Paraguay, Uruguay, and Venezuela. Results demonstrate our successes in capturing significant societal unrest in these countries with an average lead time of 4.08 days. We also study the selective superiorities of news media versus social media (Twitter, Facebook) to identify relevant trade-offs.

}, author = {Sathappan Muthiah and Bert Huang and Jaime Arredondo and David Mares and Lise Getoor and Graham Katz and Naren Ramakrishnan} } @article {rekatsinas:sam2016, title = {Forecasting Rare Disease Outbreaks Using Multiple Data Sources}, journal = {STAT ANAL DATA MIN}, year = {2015}, note = {Best of SDM 2015, Special Issue}, chapter = {379}, abstract = {

Rapidly increasing volumes of news feeds from diverse data sources, such as online newspapers, Twitter and online blogs are proving to be extremely valuable resources in helping anticipate, detect, and forecast outbreaks of rare diseases. This paper presents SourceSeer, a novel algorithmic framework that combines spatio-temporal topic models with sourcebased anomaly detection techniques to effectively forecast the emergence and progression of infectious rare diseases. SourceSeer is capable of discovering the location focus of each source allowing sources to be used as experts with varying degrees of authoritativeness. To fuse the individual source predictions into a final outbreak prediction we employ a multiplicative weights algorithm taking into account the accuracy of each source. We evaluate the performance of SourceSeer using incidence data for hantavirus syndromes in multiple countries of Latin America provided by HealthMap over a timespan of fifteen months. We demonstrate that SourceSeer makes predictions of increased accuracy compared to several baselines and is capable of forecasting disease outbreaks in a timely manner even when no outbreaks were previously reported.

}, author = {Theodoros Rekatsinas and Saurav Ghosh and Sumiko Mekaru and Elaine Nsoesie and John Brownstein and Lise Getoor and Naren Ramakrishnan} }