ml_measurement_error_overleaf/Bibliography.bib

@article{adcock_measurement_2001,
  title = {Measurement {{Validity}}: {{A Shared Standard}} for {{Qualitative}} and {{Quantitative Research}}},
  shorttitle = {Measurement {{Validity}}},
  author = {Adcock, Robert and Collier, David},
  date = {2001-09},
  journaltitle = {American Political Science Review},
  volume = {95},
  number = {3},
  pages = {529--546},
  publisher = {{Cambridge University Press}},
  issn = {0003-0554, 1537-5943},
  abstract = {Scholars routinely make claims that presuppose the validity of the observations and measurements that operationalize their concepts. Yet, despite recent advances in political science methods, surprisingly little attention has been devoted to measurement validity. We address this gap by exploring four themes. First, we seek to establish a shared framework that allows quantitative and qualitative scholars to assess more effectively, and communicate about, issues of valid measurement. Second, we underscore the need to draw a clear distinction between measurement issues and disputes about concepts. Third, we discuss the contextual specificity of measurement claims, exploring a variety of measurement strategies that seek to combine generality and validity by devoting greater attention to context. Fourth, we address the proliferation of terms for alternative measurement validation procedures and offer an account of the three main types of validation most relevant to political scientists.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/9XTPCM8L/Adcock_Collier_2001_Measurement Validity.pdf;/home/nathante/Zotero/storage/PNSZW6MZ/91C7A9800DB26A76EBBABC5889A50C8B.html}
}

@unpublished{aliapoulios_gospel_2021,
  title = {The {{Gospel According}} to {{Q}}: {{Understanding}} the {{QAnon Conspiracy}} from the {{Perspective}} of {{Canonical Information}}},
  shorttitle = {The {{Gospel According}} to {{Q}}},
  author = {Aliapoulios, Max and Papasavva, Antonis and Ballard, Cameron and De Cristofaro, Emiliano and Stringhini, Gianluca and Zannettou, Savvas and Blackburn, Jeremy},
  date = {2021-05-20},
  eprint = {2101.08750},
  eprinttype = {arxiv},
  eprintclass = {cs},
  abstract = {The QAnon conspiracy theory claims that a cabal of (literally) blood-thirsty politicians and media personalities are engaged in a war to destroy society. By interpreting cryptic "drops" of information from an anonymous insider calling themself Q, adherents of the conspiracy theory believe that Donald Trump is leading them in an active fight against this cabal. QAnon has been covered extensively by the media, as its adherents have been involved in multiple violent acts, including the January 6th, 2021 seditious storming of the US Capitol building. Nevertheless, we still have relatively little understanding of how the theory evolved and spread on the Web, and the role played in that by multiple platforms. To address this gap, we study QAnon from the perspective of "Q" themself. We build a dataset of 4,949 canonical Q drops collected from six "aggregation sites," which curate and archive them from their original posting to anonymous and ephemeral image boards. We expose that these sites have a relatively low (overall) agreement, and thus at least some Q drops should probably be considered apocryphal. We then analyze the Q drops' contents to identify topics of discussion and find statistically significant indications that drops were not authored by a single individual. Finally, we look at how posts on Reddit are used to disseminate Q drops to wider audiences. We find that dissemination was (initially) limited to a few sub-communities and that, while heavy-handed moderation decisions have reduced the overall issue, the "gospel" of Q persists on the Web.},
  keywords = {Computer Science - Computers and Society,Computer Science - Social and Information Networks},
  file = {/home/nathante/Zotero/storage/V96424CW/Aliapoulios et al_2021_The Gospel According to Q.pdf;/home/nathante/Zotero/storage/USF2Z7ZX/2101.html}
}

@article{araujo_automated_2020,
  title = {Automated {{Visual Content Analysis}} ({{AVCA}}) in {{Communication Research}}: {{A Protocol}} for {{Large Scale Image Classification}} with {{Pre-Trained Computer Vision Models}}},
  shorttitle = {Automated {{Visual Content Analysis}} ({{AVCA}}) in {{Communication Research}}},
  author = {Araujo, Theo and Lock, Irina and family=Velde, given=Bob, prefix=van de, useprefix=true},
  date = {2020-10-01},
  journaltitle = {Communication Methods and Measures},
  volume = {14},
  number = {4},
  pages = {239--265},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {The increasing volume of images published online in a wide variety of contexts requires communication researchers to address this reality by analyzing visual content at a large scale. Ongoing advances in computer vision to automatically detect objects, concepts, and features in images provide a promising opportunity for communication research. We propose a research protocol for Automated Visual Content Analysis (AVCA) to enable large-scale content analysis of images. It offers inductive and deductive ways to use commercial pre-trained models for theory building in communication science. Using the example of corporations’ website images on sustainability, we show in a step-by-step fashion how to classify a large sample (N = 21,876) of images with unsupervised and supervised machine learning, as well as custom models. The possibilities and pitfalls of these approaches are discussed, ethical issues are addressed, and application examples for future communication research are detailed.},
  file = {/home/nathante/Zotero/storage/YUAKMGKV/Araujo et al_2020_Automated Visual Content Analysis (AVCA) in Communication Research.pdf}
}

@article{bachl_correcting_2017,
  title = {Correcting {{Measurement Error}} in {{Content Analysis}}},
  author = {Bachl, Marko and Scharkow, Michael},
  date = {2017-04-03},
  journaltitle = {Communication Methods and Measures},
  shortjournal = {Communication Methods and Measures},
  volume = {11},
  number = {2},
  pages = {87--104},
  issn = {1931-2458, 1931-2466},
  langid = {english},
  file = {/home/nathante/Zotero/storage/3D3G9IP7/Bachl & Scharkow (2017) Correcting Measurement Error in CA.pdf;/home/nathante/Zotero/storage/76CKDXD8/Bachl und Scharkow - 2017 - Correcting Measurement Error in Content Analysis.pdf}
}

@article{baden_three_2022,
  title = {Three {{Gaps}} in {{Computational Text Analysis Methods}} for {{Social Sciences}}: {{A Research Agenda}}},
  shorttitle = {Three {{Gaps}} in {{Computational Text Analysis Methods}} for {{Social Sciences}}},
  author = {Baden, Christian and Pipal, Christian and Schoonvelde, Martijn and family=Velden, given=Mariken A. C. G, prefix=van der, useprefix=true},
  date = {2022-01-02},
  journaltitle = {Communication Methods and Measures},
  shortjournal = {Communication Methods and Measures},
  volume = {16},
  number = {1},
  pages = {1--18},
  issn = {1931-2458, 1931-2466},
  abstract = {We identify three gaps that limit the utility and obstruct the progress of computational text analysis methods (CTAM) for social science research. First, we contend that CTAM development has prioritized technological over validity concerns, giving limited attention to the operationalization of social scientific measurements. Second, we identify a mismatch between CTAMs’ focus on extracting specific contents and document-level patterns, and social science researchers’ need for measuring multiple, often complex contents in the text. Third, we argue that the dominance of English language tools depresses comparative research and inclusivity toward scholarly commu nities examining languages other than English. We substantiate our claims by drawing upon a broad review of methodological work in the computa tional social sciences, as well as an inventory of leading research publications using quantitative textual analysis. Subsequently, we discuss implications of these three gaps for social scientists’ uneven uptake of CTAM, as well as the field of computational social science text research as a whole. Finally, we propose a research agenda intended to bridge the identified gaps and improve the validity, utility, and inclusiveness of CTAM.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/4HHJ9VCN/Baden et al. - 2022 - Three Gaps in Computational Text Analysis Methods .pdf}
}

@book{barocas_fairness_2019,
  title = {Fairness in {{Machine Learning}}},
  author = {Barocas, Solon and Hardt, Moritz and Narayanan, Arvind},
  date = {2019},
  publisher = {{fairmlbook.org}},
  langid = {english},
  file = {/home/nathante/Zotero/storage/UJ59IVEC/Barocas et al_2019_Fairness in Machine Learning.pdf}
}

@inproceedings{bender_dangers_2021,
  title = {On the {{Dangers}} of {{Stochastic Parrots}}: {{Can Language Models Be Too Big}}? 🦜},
  shorttitle = {On the {{Dangers}} of {{Stochastic Parrots}}},
  booktitle = {Proceedings of the 2021 {{ACM Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}},
  author = {Bender, Emily M. and Gebru, Timnit and McMillan-Major, Angelina and Shmitchell, Shmargaret},
  date = {2021-03-03},
  series = {{{FAccT}} '21},
  pages = {610--623},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  abstract = {The past 3 years of work in NLP have been characterized by the development and deployment of ever larger language models, especially for English. BERT, its variants, GPT-2/3, and others, most recently Switch-C, have pushed the boundaries of the possible both through architectural innovations and through sheer size. Using these pretrained models and the methodology of fine-tuning them for specific tasks, researchers have extended the state of the art on a wide array of tasks as measured by leaderboards on specific benchmarks for English. In this paper, we take a step back and ask: How big is too big? What are the possible risks associated with this technology and what paths are available for mitigating those risks? We provide recommendations including weighing the environmental and financial costs first, investing resources into curating and carefully documenting datasets rather than ingesting everything on the web, carrying out pre-development exercises evaluating how the planned approach fits into research and development goals and supports stakeholder values, and encouraging research directions beyond ever larger language models.},
  isbn = {978-1-4503-8309-7},
  file = {/home/nathante/Zotero/storage/VIEBVAWK/Bender et al_2021_On the Dangers of Stochastic Parrots.pdf}
}

@article{blackwell_multiple_2012,
  title = {Multiple {{Overimputation}}: {{A Unified Approach}} to {{Measurement Error}} and {{Missing Data}}},
  author = {Blackwell, Matthew and Honaker, James and King, Gary},
  date = {2012},
  pages = {50},
  abstract = {Although social scientists devote considerable effort to mitigating measurement error during data collection, they usually ignore the issue during data analysis. And although many statistical methods have been proposed for reducing measurement error-induced biases, few have been widely used because of implausible assumptions, high levels of model dependence, difficult computation, or inapplicability with multiple mismeasured variables. We develop an easy-to-use alternative without these problems; it generalizes the popular multiple imputation (mi) framework by treating missing data problems as a special case of extreme measurement error and corrects for both. Like mi, the proposed “multiple overimputation” (mo) framework is a simple two-step procedure. First, multiple (≈ 5) completed copies of the data set are created where cells measured without error are held constant, those missing are imputed from the distribution of predicted values, and cells (or entire variables) with measurement error are “overimputed,” that is imputed from the predictive distribution with observation-level priors defined by the mismeasured values and available external information, if any. In the second step, analysts can then run whatever statistical method they would have run on each of the overimputed data sets as if there had been no missingness or measurement error; the results are then combined via a simple averaging procedure. We also offer easy-to-use open source software that implements all the methods described herein.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/CZAYZNIY/Blackwell et al_2012_Multiple Overimputation.pdf}
}

@article{blackwell_unified_2017,
  title = {A {{Unified Approach}} to {{Measurement Error}} and {{Missing Data}}: {{Overview}} and {{Applications}}},
  shorttitle = {A {{Unified Approach}} to {{Measurement Error}} and {{Missing Data}}},
  author = {Blackwell, Matthew and Honaker, James and King, Gary},
  date = {2017-08},
  journaltitle = {Sociological Methods \& Research},
  shortjournal = {Sociological Methods \& Research},
  volume = {46},
  number = {3},
  pages = {303--341},
  issn = {0049-1241, 1552-8294},
  abstract = {Although social scientists devote considerable effort to mitigating measurement error during data collection, they often ignore the issue during data analysis. And although many statistical methods have been proposed for reducing measurement error-induced biases, few have been widely used because of implausible assumptions, high levels of model dependence, difficult computation, or inapplicability with multiple mismeasured variables. We develop an easy-to-use alternative without these problems; it generalizes the popular multiple imputation (MI) framework by treating missing data problems as a limiting special case of extreme measurement error and corrects for both. Like MI, the proposed framework is a simple two-step procedure, so that in the second step researchers can use whatever statistical method they would have if there had been no problem in the first place. We also offer empirical illustrations, open source software that implements all the methods described herein, and a companion article with technical details and extensions.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/AWQFGUYU/Blackwell et al. - 2017 - A Unified Approach to Measurement Error and Missin.pdf}
}

@article{blackwell_unified_2017-1,
  ids = {blackwell_unified_2017},
  title = {A {{Unified Approach}} to {{Measurement Error}} and {{Missing Data}}: {{Details}} and {{Extensions}}},
  shorttitle = {A {{Unified Approach}} to {{Measurement Error}} and {{Missing Data}}},
  author = {Blackwell, Matthew and Honaker, James and King, Gary},
  date = {2017-08-01},
  journaltitle = {Sociological Methods \& Research},
  shortjournal = {Sociological Methods \& Research},
  volume = {46},
  number = {3},
  pages = {342--369},
  publisher = {{SAGE Publications Inc}},
  issn = {0049-1241},
  abstract = {We extend a unified and easy-to-use approach to measurement error and missing data. In our companion article, Blackwell, Honaker, and King give an intuitive overview of the new technique, along with practical suggestions and empirical applications. Here, we offer more precise technical details, more sophisticated measurement error model specifications and estimation procedures, and analyses to assess the approach’s robustness to correlated measurement errors and to errors in categorical variables. These results support using the technique to reduce bias and increase efficiency in a wide variety of empirical research.},
  langid = {english},
  keywords = {imputation,inference,measurement error,missing data,modeling,multiple overimputation,selection},
  file = {/home/nathante/Zotero/storage/DNEG8WCP/Blackwell et al_2017_A Unified Approach to Measurement Error and Missing Data.pdf;/home/nathante/Zotero/storage/XH3BDRFS/Blackwell et al_2017_A Unified Approach to Measurement Error and Missing Data.pdf}
}

@article{boukes_whats_2020,
  title = {What’s the {{Tone}}? {{Easy Doesn}}’t {{Do It}}: {{Analyzing Performance}} and {{Agreement Between Off-the-Shelf Sentiment Analysis Tools}}},
  shorttitle = {What’s the {{Tone}}?},
  author = {Boukes, Mark and family=Velde, given=Bob, prefix=van de, useprefix=true and Araujo, Theo and Vliegenthart, Rens},
  date = {2020-04-02},
  journaltitle = {Communication Methods and Measures},
  volume = {14},
  number = {2},
  pages = {83--104},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {This article scrutinizes the method of automated content analysis to measure the tone of news coverage. We compare a range of off-the-shelf sentiment analysis tools to manually coded economic news as well as examine the agreement between these dictionary approaches themselves. We assess the performance of five off-the-shelf sentiment analysis tools and two tailor-made dictionary-based approaches. The analyses result in five conclusions. First, there is little overlap between the off-the-shelf tools; causing wide divergence in terms of tone measurement. Second, there is no stronger overlap with manual coding for short texts (i.e., headlines) than for long texts (i.e., full articles). Third, an approach that combines individual dictionaries achieves a comparably good performance. Fourth, precision may increase to acceptable levels at higher levels of granularity. Fifth, performance of dictionary approaches depends more on the number of relevant keywords in the dictionary than on the number of valenced words as such; a small tailor-made lexicon was not inferior to large established dictionaries. Altogether, we conclude that off-the-shelf sentiment analysis tools are mostly unreliable and unsuitable for research purposes – at least in the context of Dutch economic news – and manual validation for the specific language, domain, and genre of the research project at hand is always warranted.},
  file = {/home/nathante/Zotero/storage/HXRTCXAZ/Boukes et al_2020_What’s the Tone.pdf}
}

@article{boumans_taking_2015,
  title = {Taking {{Stock}} of the {{Toolkit}}},
  author = {Boumans, Jelle W. and Trilling, Damian},
  date = {2015-11},
  journaltitle = {Digital Journalism},
  volume = {4},
  number = {1},
  pages = {8--23},
  publisher = {{Informa UK Limited}},
  issn = {2167-082X}
}

@article{breiman_statistical_2001,
  title = {Statistical {{Modeling}}: {{The Two Cultures}} (with Comments and a Rejoinder by the Author)},
  shorttitle = {Statistical {{Modeling}}},
  author = {Breiman, Leo},
  date = {2001-08},
  journaltitle = {Statistical Science},
  volume = {16},
  number = {3},
  pages = {199--231},
  publisher = {{Institute of Mathematical Statistics}},
  issn = {0883-4237, 2168-8745},
  abstract = {There are two cultures in the use of statistical modeling to reach conclusions from data. One assumes that the data are generated by a given stochastic data model. The other uses algorithmic models and treats the data mechanism as unknown. The statistical community has been committed to the almost exclusive use of data models. This commitment has led to irrelevant theory, questionable conclusions, and has kept statisticians from working on a large range of interesting current problems. Algorithmic modeling, both in theory and practice, has developed rapidly in fields outside statistics. It can be used both on large complex data sets and as a more accurate and informative alternative to data modeling on smaller data sets. If our goal as a field is to use data to solve problems, then we need to move away from exclusive dependence on data models and adopt a more diverse set of tools.},
  file = {/home/nathante/Zotero/storage/7ANK3STI/Breiman_2001_Statistical Modeling.pdf;/home/nathante/Zotero/storage/CHU57W33/1009213726.html}
}

@article{budak_better_2021,
  title = {Better {{Crowdcoding}}: {{Strategies}} for {{Promoting Accuracy}} in {{Crowdsourced Content Analysis}}},
  shorttitle = {Better {{Crowdcoding}}},
  author = {Budak, Ceren and Garrett, R. Kelly and Sude, Daniel},
  date = {2021-04-03},
  journaltitle = {Communication Methods and Measures},
  volume = {15},
  number = {2},
  pages = {141--155},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {In this work, we evaluate different instruction strategies to improve the quality of crowdcoding for the concept of civility. We test the effectiveness of training, codebooks, and their combination through 2 × 2 experiments conducted on two different populations – students and Amazon Mechanical Turk workers. In addition, we perform simulations to evaluate the trade-off between cost and performance associated with different instructional strategies and the number of human coders. We find that training improves crowdcoding quality, while codebooks do not. We further show that relying on several human coders and applying majority rule to their assessments significantly improves performance.}
}

@book{buonaccorsi_measurement_2010,
  title = {Measurement {{Error}}: {{Models}}, {{Methods}}, and {{Applications}}},
  shorttitle = {Measurement {{Error}}},
  author = {Buonaccorsi, John P.},
  date = {2010-07-19},
  publisher = {{Chapman and Hall/CRC}},
  location = {{New York}},
  abstract = {Over the last 20 years, comprehensive strategies for treating measurement error in complex models and accounting for the use of extra data to estimate measurement error parameters have emerged. Focusing on both established and novel approaches, Measurement Error: Models, Methods, and Applications provides an overview of the main techniques and illu},
  isbn = {978-0-429-15035-7},
  pagetotal = {464},
  file = {/home/nathante/Zotero/storage/E8KV2QMH/Buonaccorsi_2010_Measurement Error.pdf}
}

@article{burggraaff_through_2020,
  title = {Through a Different Gate: {{An}} Automated Content Analysis of How Online News and Print News Differ},
  shorttitle = {Through a Different Gate},
  author = {Burggraaff, Christiaan and Trilling, Damian},
  date = {2020-01},
  journaltitle = {Journalism},
  shortjournal = {Journalism},
  volume = {21},
  number = {1},
  pages = {112--129},
  issn = {1464-8849, 1741-3001},
  abstract = {We investigate how news values differ between online and print news articles. We hypothesize that print and online articles differ in terms of news values because of differences in the routines used to produce them. Based on a quantitative automated content analysis of N\,=\,762,095 Dutch news items, we show that online news items are more likely to be follow-up items than print items, and that there are further differences regarding news values like references to persons, the power elite, negativity, and positivity. In order to conduct this large-scale analysis, we developed innovative methods to automatically code a wide range of news values. In particular, this article demonstrates how techniques such as sentiment analysis, named entity recognition, supervised machine learning, and automated queries of external databases can be combined and used to study journalistic content. Possible explanations for the difference found between online and offline news are discussed.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/3DN8D8QJ/Burggraaff und Trilling - 2020 - Through a different gate An automated content ana.pdf}
}

@article{burscher_teaching_2014,
  title = {Teaching the {{Computer}} to {{Code Frames}} in {{News}}: {{Comparing Two Supervised Machine Learning Approaches}} to {{Frame Analysis}}},
  shorttitle = {Teaching the {{Computer}} to {{Code Frames}} in {{News}}},
  author = {Burscher, Björn and Odijk, Daan and Vliegenthart, Rens and family=Rijke, given=Maarten, prefix=de, useprefix=true and family=Vreese, given=Claes H., prefix=de, useprefix=true},
  date = {2014-07-03},
  journaltitle = {Communication Methods and Measures},
  shortjournal = {Communication Methods and Measures},
  volume = {8},
  number = {3},
  pages = {190--206},
  issn = {1931-2458, 1931-2466},
  langid = {english}
}

@article{burscher_using_2015,
  title = {Using {{Supervised Machine Learning}} to {{Code Policy Issues}}: {{Can Classifiers Generalize}} across {{Contexts}}?},
  shorttitle = {Using {{Supervised Machine Learning}} to {{Code Policy Issues}}},
  author = {Burscher, Bjorn and Vliegenthart, Rens and De Vreese, Claes H.},
  date = {2015-05},
  journaltitle = {The ANNALS of the American Academy of Political and Social Science},
  shortjournal = {The ANNALS of the American Academy of Political and Social Science},
  volume = {659},
  number = {1},
  pages = {122--131},
  issn = {0002-7162, 1552-3349},
  abstract = {Content analysis of political communication usually covers large amounts of material and makes the study of dynamics in issue salience a costly enterprise. In this article, we present a supervised machine learning approach for the automatic coding of policy issues, which we apply to news articles and parliamentary questions. Comparing computer-based annotations with human annotations shows that our method approaches the performance of human coders. Furthermore, we investigate the capability of an automatic coding tool, which is based on supervised machine learning, to generalize across contexts. We conclude by highlighting implications for methodological advances and empirical theory testing.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/697KE5ZI/Burscher et al. - 2015 - Using Supervised Machine Learning to Code Policy I.pdf}
}

@article{calude_deluge_2017,
  title = {The {{Deluge}} of {{Spurious Correlations}} in {{Big Data}}},
  author = {Calude, Cristian S. and Longo, Giuseppe},
  date = {2017-09-01},
  journaltitle = {Foundations of Science},
  shortjournal = {Found Sci},
  volume = {22},
  number = {3},
  pages = {595--612},
  issn = {1572-8471},
  abstract = {Very large databases are a major opportunity for science and data analytics is a remarkable new field of investigation in computer science. The effectiveness of these tools is used to support a “philosophy” against the scientific method as developed throughout history. According to this view, computer-discovered correlations should replace understanding and guide prediction and action. Consequently, there will be no need to give scientific meaning to phenomena, by proposing, say, causal relations, since regularities in very large databases are enough: “with enough data, the numbers speak for themselves”. The “end of science” is proclaimed. Using classical results from ergodic theory, Ramsey theory and algorithmic information theory, we show that this “philosophy” is wrong. For example, we prove that very large databases have to contain arbitrary correlations. These correlations appear only due to the size, not the nature, of data. They can be found in “randomly” generated, large enough databases, which—as we will prove—implies that most correlations are spurious. Too much information tends to behave like very little information. The scientific method can be enriched by computer mining in immense databases, but not replaced by it.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/QKXXLRSH/Calude_Longo_2017_The Deluge of Spurious Correlations in Big Data.pdf}
}

@book{carroll_measurement_2006,
  title = {Measurement {{Error}} in {{Nonlinear Models}}},
  author = {Carroll, Raymond J and Ruppert, David and Stefanski, Leonard A and Crainiceanu, Ciprian M},
  date = {2006},
  series = {Monographs on {{Statistics}} and {{Applied Probability}}},
  edition = {2},
  number = {105},
  publisher = {{Chapman \& Hall/CRC}},
  location = {{Boca Raton}},
  langid = {english},
  pagetotal = {484},
  file = {/home/nathante/Zotero/storage/K4V878P6/Carroll et al_2006_Measurement Error in Nonlinear Models.pdf}
}

@report{chan_automation-coerced_2022,
  type = {preprint},
  title = {Automation-Coerced, Increased Dilution of Correlation},
  author = {Chan, Chung-hong},
  date = {2022-05-19},
  institution = {{SocArXiv}},
  abstract = {Automated data-making methods in content analysis —like all measurements— are fallible. The purpose of this simulation study is to show this fallibility can lead to the correlation dilution effect: the biased estimation of true effect size towards zero, or, in other words, the unexpected reduction in statistical power. An alternative way to measure the performance of automated procedures, which focuses on the retention of statistical power, is proposed. This paper ends with best practices regarding planning, executing, and reporting of automated content analyses.},
  file = {/home/nathante/Zotero/storage/JGAH7KJS/Chan - 2022 - Automation-coerced, increased dilution of correlat.pdf}
}

@article{chen_alternative_2015,
  title = {Alternative Errors-in-Variables Models and Their Applications in Finance Research},
  author = {Chen, Hong-Yi and Lee, Alice C. and Lee, Cheng-Few},
  date = {2015-11-01},
  journaltitle = {The Quarterly Review of Economics and Finance},
  shortjournal = {The Quarterly Review of Economics and Finance},
  volume = {58},
  pages = {213--227},
  issn = {1062-9769},
  abstract = {Specification error and measurement error are two major issues in finance research. The main purpose of this paper is (i) to review and extend existing errors-in-variables (EIV) estimation methods, including classical method, grouping method, instrumental variable method, mathematical programming method, maximum likelihood method, LISREL method, and the Bayesian approach; (ii) to investigate how EIV estimation methods have been used to finance related studies, such as cost of capital, capital structure, investment equation, and test capital asset pricing models; and (iii) to give a more detailed explanation of the methods used by Almeida et al. (2010).},
  langid = {english},
  keywords = {Capital asset pricing model,Capital structure,Cost of capital,Errors-in-variables,Investment equation,Measurement error},
  file = {/home/nathante/Zotero/storage/DQQQVE8K/S1062976914001057.html}
}

@article{chiu_spin_2017,
  title = {‘{{Spin}}’ in Published Biomedical Literature: {{A}} Methodological Systematic Review},
  shorttitle = {‘{{Spin}}’ in Published Biomedical Literature},
  author = {Chiu, Kellia and Grundy, Quinn and Bero, Lisa},
  editor = {Boutron, Isabelle},
  date = {2017-09-11},
  journaltitle = {PLOS Biology},
  shortjournal = {PLoS Biol},
  volume = {15},
  number = {9},
  pages = {e2002173},
  issn = {1545-7885},
  langid = {english},
  file = {/home/nathante/Zotero/storage/5ZPKBUV9/Chiu et al. (2017).pdf;/home/nathante/Zotero/storage/CWU2AZI6/Chiu et al. - 2017 - ‘Spin’ in published biomedical literature A metho.pdf}
}

@misc{cjadams_jigsaw_2019,
  title = {Jigsaw {{Unintended Bias}} in {{Toxicity Classification}}},
  author = {{cjadams} and {Daniel Borkan} and {inversion} and {Jeffery Sorensen} and {Lucas Dixon} and {Lucy Vasserman} and {nithum}},
  date = {2019},
  publisher = {{Kaggle}}
}

@article{colleoni_echo_2014,
  title = {Echo {{Chamber}} or {{Public Sphere}}? {{Predicting Political Orientation}} and {{Measuring Political Homophily}} in {{Twitter Using Big Data}}},
  shorttitle = {Echo {{Chamber}} or {{Public Sphere}}?},
  author = {Colleoni, Elanor and Rozza, Alessandro and Arvidsson, Adam},
  date = {2014-04-01},
  journaltitle = {Journal of Communication},
  shortjournal = {Journal of Communication},
  volume = {64},
  number = {2},
  pages = {317--332},
  issn = {0021-9916},
  abstract = {This paper investigates political homophily on Twitter. Using a combination of machine learning and social network analysis we classify users as Democrats or as Republicans based on the political content shared. We then investigate political homophily both in the network of reciprocated and nonreciprocated ties. We find that structures of political homophily differ strongly between Democrats and Republicans. In general, Democrats exhibit higher levels of political homophily. But Republicans who follow official Republican accounts exhibit higher levels of homophily than Democrats. In addition, levels of homophily are higher in the network of reciprocated followers than in the nonreciprocated network. We suggest that research on political homophily on the Internet should take the political culture and practices of users seriously.},
  file = {/home/nathante/Zotero/storage/T9R2UPEF/Colleoni et al_2014_Echo Chamber or Public Sphere.pdf;/home/nathante/Zotero/storage/IVJ4I8CA/4085994.html}
}

@article{courtney_automatic_2020,
  title = {Automatic Translation, Context, and Supervised Learning in Comparative Politics},
  author = {Courtney, Michael and Breen, Michael and McMenamin, Iain and McNulty, Gemma},
  date = {2020},
  journaltitle = {Journal of Information Technology \& Politics},
  volume = {17},
  number = {3},
  pages = {208--217},
  publisher = {{Taylor \& Francis}}
}

@article{dobbrick_enhancing_2021,
  title = {Enhancing {{Theory-Informed Dictionary Approaches}} with “{{Glass-box}}” {{Machine Learning}}: {{The Case}} of {{Integrative Complexity}} in {{Social Media Comments}}},
  shorttitle = {Enhancing {{Theory-Informed Dictionary Approaches}} with “{{Glass-box}}” {{Machine Learning}}},
  author = {Dobbrick, Timo and Jakob, Julia and Chan, Chung-Hong and Wessler, Hartmut},
  date = {2021-11-17},
  journaltitle = {Communication Methods and Measures},
  volume = {0},
  number = {0},
  pages = {1--18},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {Dictionary-based approaches to computational text analysis have been shown to perform relatively poorly, particularly when the dictionaries rely on simple bags of words, are not specified for the domain under study, and add word scores without weighting. While machine learning approaches usually perform better, they offer little insight into (a) which of the assumptions underlying dictionary approaches (bag-of-words, domain transferability, or additivity) impedes performance most, and (b) which language features drive the algorithmic classification most strongly. To fill both gaps, we offer a systematic assumption-based error analysis, using the integrative complexity of social media comments as our case in point. We show that attacking the additivity assumption offers the strongest potential for improving dictionary performance. We also propose to combine off-the-shelf dictionaries with supervised “glass box” machine learning algorithms (as opposed to the usual “black box” machine learning approaches) to classify texts and learn about the most important features for classification. This dictionary-plus-supervised-learning approach performs similarly well as classic full-text machine learning or deep learning approaches, but yields interpretable results in addition, which can inform theory development on top of enabling a valid classification.},
  file = {/home/nathante/Zotero/storage/TVUYGPSE/Dobbrick et al_2021_Enhancing Theory-Informed Dictionary Approaches with “Glass-box” Machine.pdf}
}

@article{elsherief_hate_2018,
  title = {Hate {{Lingo}}: {{A Target-Based Linguistic Analysis}} of {{Hate Speech}} in {{Social Media}}},
  shorttitle = {Hate {{Lingo}}},
  author = {ElSherief, Mai and Kulkarni, Vivek and Nguyen, Dana and Wang, William Yang and Belding, Elizabeth},
  date = {2018-06-15},
  journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media},
  volume = {12},
  number = {1},
  issn = {2334-0770},
  abstract = {While social media empowers freedom of expression and individual voices, it also enables anti-social behavior, online harassment, cyberbullying, and hate speech. In this paper, we deepen our understanding of online hate speech by focusing on a largely neglected but crucial aspect of hate speech -- its target: either directed towards a specific person or entity, or generalized towards a group of people sharing a common protected characteristic. We perform the first linguistic and psycholinguistic analysis of these two forms of hate speech and reveal the presence of interesting markers that distinguish these types of hate speech. Our analysis reveals that Directed hate speech, in addition to being more personal and directed, is more informal, angrier, and often explicitly attacks the target (via name calling) with fewer analytic words and more words suggesting authority and influence. Generalized hate speech, on the other hand, is dominated by religious hate, is characterized by the use of lethal words such as murder, exterminate, and kill; and quantity words such as million and many. Altogether, our work provides a data-driven analysis of the nuances of online-hate speech that enables not only a deepened understanding of hate speech and its social implications, but also its detection.},
  issue = {1},
  langid = {english},
  keywords = {generalized hate},
  file = {/home/nathante/Zotero/storage/6RZTDKS4/ElSherief et al_2018_Hate Lingo.pdf}
}

@article{erickson_two-step_2002,
  title = {Two-{{Step GMM Estimation}} of the {{Errors-in-Variables Model Using High-Order Moments}}},
  author = {Erickson, Timothy and Whited, Toni M.},
  date = {2002},
  journaltitle = {Econometric Theory},
  volume = {18},
  number = {3},
  eprint = {3533649},
  eprinttype = {jstor},
  pages = {776--799},
  publisher = {{Cambridge University Press}},
  issn = {0266-4666},
  abstract = {We consider a multiple mismeasured regressor errors-in-variables model where the measurement and equation errors are independent and have moments of every order but otherwise are arbitrarily distributed. We present parsimonious two-step generalized method of moments (GMM) estimators that exploit overidentifying information contained in the high-order moments of residuals obtained by "partialling out" perfectly measured regressors. Using high-order moments requires that the GMM covariance matrices be adjusted to account for the use of estimated residuals instead of true residuals defined by population projections. This adjustment is also needed to determine the optimal GMM estimator. The estimators perform well in Monte Carlo simulations and in some cases minimize mean absolute error by using moments up to seventh order. We also determine the distributions for functions that depend on both a GMM estimate and a statistic not jointly estimated with the GMM estimate.},
  file = {/home/nathante/Zotero/storage/WV3FS83S/Erickson_Whited_2002_Two-Step GMM Estimation of the Errors-in-Variables Model Using High-Order.pdf}
}

@article{felderer_using_nodate,
  title = {Using {{Double Machine Learning}} to {{Understand Nonresponse}} in the {{Recruitment}} of a {{Mixed-Mode Online Panel}}},
  author = {Felderer, Barbara and Kueck, Jannis and Spindler, Martin},
  journaltitle = {Social Science Computer Review},
  pages = {21},
  abstract = {Survey scientists increasingly face the problem of high-dimensionality in their research as digitization makes it much easier to construct high-dimensional (or “big”) data sets through tools such as online surveys and mobile applications. Machine learning methods are able to handle such data, and they have been successfully applied to solve predictive problems. However, in many situations, survey statisticians want to learn about causal relationships to draw conclusions and be able to transfer the findings of one survey to another. Standard machine learning methods provide biased estimates of such relationships. We introduce into survey statistics the double machine learning approach, which gives approximately unbiased estimators of parameters of interest, and show how it can be used to analyze survey nonresponse in a high-dimensional panel setting. The double machine learning approach here assumes unconfoundedness of variables as its identification strategy. In high-dimensional settings, where the number of potential confounders to include in the model is too large, the double machine learning approach secures valid inference by selecting the relevant confounding variables.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/V36TN6SF/Felderer et al. - Using Double Machine Learning to Understand Nonres.pdf}
}

@inproceedings{fiesler_reddit_2018,
  title = {Reddit Rules! {{Characterizing}} an Ecosystem of Governance.},
  booktitle = {Proceedings of the {{International AAAI Conference}} on {{Web}} and {{Social Media}}},
  author = {Fiesler, Casey and Jiang, Jialun" Aaron" and McCann, Joshua and Frye, Kyle and Brubaker, Jed R.},
  date = {2018},
  pages = {72--81},
  publisher = {{AAAI}},
  location = {{Stanford, CA}},
  eventtitle = {{{ICWSM}}},
  file = {/home/nathante/Zotero/storage/65MQFFUB/Fiesler et al. - 2018 - Reddit rules! Characterizing an ecosystem of gover.pdf;/home/nathante/Zotero/storage/75956PAL/Fiesler et al. - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/HHY4DJB6/Fiesler - Reddit Rules! Characterizing an Ecosystem of Gover.pdf}
}

@article{fong_machine_2021,
  title = {Machine {{Learning Predictions}} as {{Regression Covariates}}},
  author = {Fong, Christian and Tyler, Matthew},
  date = {2021-10},
  journaltitle = {Political Analysis},
  volume = {29},
  number = {4},
  pages = {467--484},
  issn = {1047-1987, 1476-4989},
  abstract = {In text, images, merged surveys, voter files, and elsewhere, data sets are often missing important covariates, either because they are latent features of observations (such as sentiment in text) or because they are not collected (such as race in voter files). One promising approach for coping with this missing data is to find the true values of the missing covariates for a subset of the observations and then train a machine learning algorithm to predict the values of those covariates for the rest. However, plugging in these predictions without regard for prediction error renders regression analyses biased, inconsistent, and overconfident. We characterize the severity of the problem posed by prediction error, describe a procedure to avoid these inconsistencies under comparatively general assumptions, and demonstrate the performance of our estimators through simulations and a study of hostile political dialogue on the Internet. We provide software implementing our approach.},
  langid = {english},
  keywords = {classification,inference,instrumental variables,machine learning},
  file = {/home/nathante/Zotero/storage/D52UU9YC/Fong - Online Appendix for Machine Learning Predictions a.pdf;/home/nathante/Zotero/storage/RF8VGCKM/Fong_Tyler_2021_Machine Learning Predictions as Regression Covariates.pdf;/home/nathante/Zotero/storage/SUIH8GNP/Fong und Tyler - 2021 - Machine Learning Predictions as Regression Covaria.pdf;/home/nathante/Zotero/storage/WP4QKQL2/462A74A46A97C20A17CF640BDA72B826.html}
}

@article{fong_online_nodate,
  title = {Online {{Appendix}} for {{Machine Learning Predictions}} as {{Regression Covariates}}},
  author = {Fong, Christian},
  pages = {38},
  langid = {english},
  file = {/home/nathante/Zotero/storage/R78H4JIM/Fong - Online Appendix for Machine Learning Predictions a.pdf}
}

@inproceedings{fortuna_toxic_2020,
  title = {Toxic, {{Hateful}}, {{Offensive}} or {{Abusive}}? {{What Are We Really Classifying}}? {{An Empirical Analysis}} of {{Hate Speech Datasets}}},
  shorttitle = {Toxic, {{Hateful}}, {{Offensive}} or {{Abusive}}?},
  booktitle = {Proceedings of the 12th {{Language Resources}} and {{Evaluation Conference}}},
  author = {Fortuna, Paula and Soler, Juan and Wanner, Leo},
  date = {2020-05},
  pages = {6786--6794},
  publisher = {{European Language Resources Association}},
  location = {{Marseille, France}},
  abstract = {The field of the automatic detection of hate speech and related concepts has raised a lot of interest in the last years. Different datasets were annotated and classified by means of applying different machine learning algorithms. However, few efforts were done in order to clarify the applied categories and homogenize different datasets. Our study takes up this demand. We analyze six different publicly available datasets in this field with respect to their similarity and compatibility. We conduct two different experiments. First, we try to make the datasets compatible and represent the dataset classes as Fast Text word vectors analyzing the similarity between different classes in a intra and inter dataset manner. Second, we submit the chosen datasets to the Perspective API Toxicity classifier, achieving different performances depending on the categories and datasets. One of the main conclusions of these experiments is that many different definitions are being used for equivalent concepts, which makes most of the publicly available datasets incompatible. Grounded in our analysis, we provide guidelines for future dataset collection and annotation.},
  eventtitle = {{{LREC}} 2020},
  isbn = {979-10-95546-34-4},
  langid = {english},
  file = {/home/nathante/Zotero/storage/D4ZXDYTH/Fortuna et al_2020_Toxic, Hateful, Offensive or Abusive.pdf}
}

@book{fuller_measurement_1987,
  title = {Measurement Error Models},
  author = {Fuller, Wayne A.},
  date = {1987},
  series = {Wiley Series in Probability and Mathematical Statistics},
  publisher = {{Wiley}},
  location = {{New York}},
  isbn = {978-0-471-86187-4},
  langid = {english},
  pagetotal = {440},
  keywords = {Error analysis (Mathematics),Regression analysis},
  file = {/home/nathante/Zotero/storage/HD88JCCY/Fuller_1987_Measurement error models.pdf}
}

@article{geis_statistical_2021,
  title = {Statistical {{Power}} in {{Content Analysis Designs}}: {{How Effect Size}}, {{Sample Size}} and {{Coding Accuracy Jointly Affect Hypothesis Testing}} – {{A Monte Carlo Simulation Approach}}.},
  shorttitle = {Statistical {{Power}} in {{Content Analysis Designs}}},
  author = {Geiß, Stefan},
  date = {2021-03-01},
  journaltitle = {Computational Communication Research},
  volume = {3},
  number = {1},
  pages = {61--89},
  issn = {2665-9085, 2665-9085},
  abstract = {This study uses Monte Carlo simulation techniques to estimate the minimum required levels of intercoder reliability in content analysis data for testing correlational hypotheses, depending on sample size, effect size and coder behavior under uncertainty. The ensuing procedure is analogous to power calculations for experimental designs. In most widespread sample size/effect size settings, the rule-of-thumb that chance-adjusted agreement should be ≥.80 or ≥.667 corresponds to the simulation results, resulting in acceptable α and β error rates. However, this simulation allows making precise power calculations that can consider the specifics of each study’s context, moving beyond one-size-fits-all recommendations. Studies with low sample sizes and/or low expected effect sizes may need coder agreement above .800 to test a hypothesis with sufficient statistical power. In studies with high sample sizes and/or high expected effect sizes, coder agreement below .667 may suffice. Such calculations can help in both evaluating and in designing studies. Particularly in pre-registered research, higher sample sizes may be used to compensate for low expected effect sizes and/or borderline coding reliability (e.g. when constructs are hard to measure). I supply equations, easy-to-use tables and R functions to facilitate use of this framework, along with example code as online appendix.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/QJNTR5AU/Geiß - 2021 - Statistical Power in Content Analysis Designs How.pdf}
}

@article{gilardi_social_2022,
  title = {Social {{Media}} and {{Political Agenda Setting}}},
  author = {Gilardi, Fabrizio and Gessler, Theresa and Kubli, Maël and Müller, Stefan},
  date = {2022-01-02},
  journaltitle = {Political Communication},
  shortjournal = {Political Communication},
  volume = {39},
  number = {1},
  pages = {39--60},
  issn = {1058-4609, 1091-7675},
  langid = {english},
  file = {/home/nathante/Zotero/storage/S7BXIGP3/Gilardi et al. - 2022 - Social Media and Political Agenda Setting.pdf}
}

@book{gillespie_custodians_2018,
  title = {Custodians of the {{Internet}}: Platforms, Content Moderation, and the Hidden Decisions That Shape Social Media},
  shorttitle = {Custodians of the Internet},
  author = {Gillespie, Tarleton},
  date = {2018},
  publisher = {{Yale University Press}},
  location = {{New Haven}},
  abstract = {"Most users want their Twitter feed, Facebook page, and YouTube comments to be free of harassment and porn. Whether faced with 'fake news' or livestreamed violence, 'content moderators'--who censor or promote user-posted content--have never been more important. This is especially true when the tools that social media platforms use to curb trolling, ban hate speech, and censor pornography can also silence the speech you need to hear. [The author] provides an overview of current social media practices and explains the underlying rationales for how, when, and why these policies are enforced. In doing so, [the author] highlights that content moderation receives too little public scrutiny even as it is shapes social norms and creates consequences for public discourse, cultural production, and the fabric of society. Based on interviews with content moderators, creators, and consumers, this...book is...for anyone who's ever clicked 'like' or 'retweet.'"--},
  isbn = {978-0-300-17313-0},
  pagetotal = {288},
  keywords = {Business & Economics / Industries / Media & Communications,Censorship,Computers / Web / Social Media,Political Science / Censorship,Social media,Social Science / Media Studies},
  annotation = {OCLC: on1005113962},
  file = {/home/nathante/Zotero/storage/I84YKU5K/Gillespie_2018_Custodians of the Internet.pdf}
}

@article{gonzalez-bailon_signals_2015,
  title = {Signals of {{Public Opinion}} in {{Online Communication}}: {{A Comparison}} of {{Methods}} and {{Data Sources}}},
  shorttitle = {Signals of {{Public Opinion}} in {{Online Communication}}},
  author = {González-Bailón, Sandra and Paltoglou, Georgios},
  date = {2015-05-01},
  journaltitle = {The ANNALS of the American Academy of Political and Social Science},
  shortjournal = {The ANNALS of the American Academy of Political and Social Science},
  volume = {659},
  number = {1},
  pages = {95--107},
  publisher = {{SAGE Publications Inc}},
  issn = {0002-7162},
  abstract = {This study offers a systematic comparison of automated content analysis tools. The ability of different lexicons to correctly identify affective tone (e.g., positive vs. negative) is assessed in different social media environments. Our comparisons examine the reliability and validity of publicly available, off-the-shelf classifiers. We use datasets from a range of online sources that vary in the diversity and formality of the language used, and we apply different classifiers to extract information about the affective tone in these datasets. We first measure agreement (reliability test) and then compare their classifications with the benchmark of human coding (validity test). Our analyses show that validity and reliability vary with the formality and diversity of the text; we also show that ready-to-use methods leave much space for improvement when analyzing domain-specific content and that a machine-learning approach offers more accurate predictions across communication domains.},
  langid = {english},
  keywords = {content analysis,information diversity,language formality,lexicon-based methods,machine learning,sentiment analysis,text mining}
}

@article{gorwa_algorithmic_2020,
  title = {Algorithmic Content Moderation: {{Technical}} and Political Challenges in the Automation of Platform Governance},
  shorttitle = {Algorithmic Content Moderation},
  author = {Gorwa, Robert and Binns, Reuben and Katzenbach, Christian},
  date = {2020-01-01},
  journaltitle = {Big Data \& Society},
  shortjournal = {Big Data \& Society},
  volume = {7},
  number = {1},
  pages = {2053951719897945},
  publisher = {{SAGE Publications Ltd}},
  issn = {2053-9517},
  abstract = {As government pressure on major technology companies builds, both firms and legislators are searching for technical solutions to difficult platform governance puzzles such as hate speech and misinformation. Automated hash-matching and predictive machine learning tools – what we define here as algorithmic moderation systems – are increasingly being deployed to conduct content moderation at scale by major platforms for user-generated content such as Facebook, YouTube and Twitter. This article provides an accessible technical primer on how algorithmic moderation works; examines some of the existing automated tools used by major platforms to handle copyright infringement, terrorism and toxic speech; and identifies key political and ethical issues for these systems as the reliance on them grows. Recent events suggest that algorithmic moderation has become necessary to manage growing public expectations for increased platform responsibility, safety and security on the global stage; however, as we demonstrate, these systems remain opaque, unaccountable and poorly understood. Despite the potential promise of algorithms or ‘AI’, we show that even ‘well optimized’ moderation systems could exacerbate, rather than relieve, many existing problems with content policy as enacted by platforms for three main reasons: automated moderation threatens to (a) further increase opacity, making a famously non-transparent set of practices even more difficult to understand or audit, (b) further complicate outstanding issues of fairness and justice in large-scale sociotechnical systems and (c) re-obscure the fundamentally political nature of speech decisions being executed at scale.},
  langid = {english},
  keywords = {algorithms,artificial intelligence,content moderation,copyright,Platform governance,toxic speech},
  file = {/home/nathante/Zotero/storage/HKY4DC38/Gorwa et al_2020_Algorithmic content moderation.pdf}
}

@article{grimmer_machine_2021,
  title = {Machine {{Learning}} for {{Social Science}}: {{An Agnostic Approach}}},
  shorttitle = {Machine {{Learning}} for {{Social Science}}},
  author = {Grimmer, Justin and Roberts, Margaret E. and Stewart, Brandon M.},
  date = {2021},
  journaltitle = {Annual Review of Political Science},
  volume = {24},
  number = {1},
  pages = {395--419},
  abstract = {Social scientists are now in an era of data abundance, and machine learning tools are increasingly used to extract meaning from data sets both massive and small. We explain how the inclusion of machine learning in the social sciences requires us to rethink not only applications of machine learning methods but also best practices in the social sciences. In contrast to the traditional tasks for machine learning in computer science and statistics, when machine learning is applied to social scientific data, it is used to discover new concepts, measure the prevalence of those concepts, assess causal effects, and make predictions. The abundance of data and resources facilitates the move away from a deductive social science to a more sequential, interactive, and ultimately inductive approach to inference. We explain how an agnostic approach to machine learning methods focused on the social science tasks facilitates progress across a wide range of questions.},
  keywords = {machine learning,research design,text as data},
  file = {/home/nathante/Zotero/storage/N4PR8YCM/Grimmer et al_2021_Machine Learning for Social Science.pdf}
}

@article{grimmer_machine_2021-1,
  title = {Machine {{Learning}} for {{Social Science}}: {{An Agnostic Approach}}},
  shorttitle = {Machine {{Learning}} for {{Social Science}}},
  author = {Grimmer, Justin and Roberts, Margaret E. and Stewart, Brandon M.},
  date = {2021-05-11},
  journaltitle = {Annual Review of Political Science},
  shortjournal = {Annu. Rev. Polit. Sci.},
  volume = {24},
  number = {1},
  pages = {395--419},
  issn = {1094-2939, 1545-1577},
  abstract = {Social scientists are now in an era of data abundance, and machine learning tools are increasingly used to extract meaning from data sets both massive and small. We explain how the inclusion of machine learning in the social sciences requires us to rethink not only applications of machine learning methods but also best practices in the social sciences. In contrast to the traditional tasks for machine learning in computer science and statistics, when machine learning is applied to social scientific data, it is used to discover new concepts, measure the prevalence of those concepts, assess causal effects, and make predictions. The abundance of data and resources facilitates the move away from a deductive social science to a more sequential, interactive, and ultimately inductive approach to inference. We explain how an agnostic approach to machine learning methods focused on the social science tasks facilitates progress across a wide range of questions.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/NTS5M7DS/Grimmer et al. - 2021 - Machine Learning for Social Science An Agnostic A.pdf}
}

@article{grimmer_text_2013,
  title = {Text as {{Data}}: {{The Promise}} and {{Pitfalls}} of {{Automatic Content Analysis Methods}} for {{Political Texts}}},
  shorttitle = {Text as {{Data}}},
  author = {Grimmer, Justin and Stewart, Brandon M.},
  date = {2013},
  journaltitle = {Political Analysis},
  volume = {21},
  number = {3},
  pages = {267--297},
  issn = {1047-1987, 1476-4989},
  abstract = {Politics and political conflict often occur in the written and spoken word. Scholars have long recognized this, but the massive costs of analyzing even moderately sized collections of texts have hindered their use in political science research. Here lies the promise of automated text analysis: it substantially reduces the costs of analyzing large collections of text. We provide a guide to this exciting new area of research and show how, in many instances, the methods have already obtained part of their promise. But there are pitfalls to using automated methods—they are no substitute for careful thought and close reading and require extensive and problem-specific validation. We survey a wide range of new methods, provide guidance on how to validate the output of the models, and clarify misconceptions and errors in the literature. To conclude, we argue that for automated text methods to become a standard tool for political scientists, methodologists must contribute new methods and new methods of validation.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/TZULVLRZ/Grimmer_Stewart_2013_Text as Data.pdf;/home/nathante/Zotero/storage/P8HGL73A/F7AAC8B2909441603FEB25C156448F20.html}
}

@book{grimmer_text_2022,
  title = {Text as {{Data}}: {{A New Framework}} for {{Machine Learning}} and the {{Social Sciences}}},
  shorttitle = {Text as {{Data}}},
  author = {Grimmer, Justin and Roberts, Margaret E. and Stewart, Brandon M.},
  date = {2022-01-04},
  eprint = {dL40EAAAQBAJ},
  eprinttype = {googlebooks},
  publisher = {{Princeton University Press}},
  abstract = {A guide for using computational text analysis to learn about the social world From social media posts and text messages to digital government documents and archives, researchers are bombarded with a deluge of text reflecting the social world. This textual data gives unprecedented insights into fundamental questions in the social sciences, humanities, and industry. Meanwhile new machine learning tools are rapidly transforming the way science and business are conducted. Text as Data shows how to combine new sources of data, machine learning tools, and social science research design to develop and evaluate new insights.Text as Data is organized around the core tasks in research projects using text—representation, discovery, measurement, prediction, and causal inference. The authors offer a sequential, iterative, and inductive approach to research design. Each research task is presented complete with real-world applications, example methods, and a distinct style of task-focused research.Bridging many divides—computer science and social science, the qualitative and the quantitative, and industry and academia—Text as Data is an ideal resource for anyone wanting to analyze large collections of text in an era when data is abundant and computation is cheap, but the enduring challenges of social science remain.Overview of how to use text as dataResearch design for a world of data delugeExamples from across the social sciences and industry},
  isbn = {978-0-691-20799-5},
  langid = {english},
  pagetotal = {360},
  keywords = {Computers / Data Science / Data Analytics,Computers / Data Science / Data Modeling & Design,Computers / Data Science / Machine Learning,Social Science / Methodology,Social Science / Sociology / General}
}

@article{guess_how_2019,
  title = {How {{Accurate Are Survey Responses}} on {{Social Media}} and {{Politics}}?},
  author = {Guess, Andrew and Munger, Kevin and Nagler, Jonathan and Tucker, Joshua},
  date = {2019-04-03},
  journaltitle = {Political Communication},
  shortjournal = {Political Communication},
  volume = {36},
  number = {2},
  pages = {241--258},
  issn = {1058-4609, 1091-7675},
  langid = {english}
}

@article{gummer_using_2022,
  title = {Using {{Google Trends Data}} to {{Learn More About Survey Participation}}},
  author = {Gummer, Tobias and Oehrlein, Anne-Sophie},
  date = {2022-09-20},
  journaltitle = {Social Science Computer Review},
  shortjournal = {Social Science Computer Review},
  pages = {089443932211291},
  issn = {0894-4393, 1552-8286},
  abstract = {As response rates continue to decline, the need to learn more about the survey participation process remains an important task for survey researchers. Search engine data may be one possible source for learning about what information some potential respondents are looking up about a survey when they are making a participation decision. In the present study, we explored the potential of search engine data for learning about survey participation and how it can inform survey design decisions. We drew on freely available Google Trends (GT) data to learn about the use of Google Search with respect to our case study: participation in the Family Research and Demographic Analysis (FReDA) panel survey. Our results showed that some potential respondents were using Google Search to gather information on the FReDA survey. We also showed that the additional data obtained via GT can help survey researchers to discover topics of interest to respondents and geographically stratified search patterns. Moreover, we introduced different approaches for obtaining data via GT, discussed the challenges that come with these data, and closed with practical recommendations on how survey researchers might utilize GT data to learn about survey participation.},
  langid = {english}
}

@article{guo_who_2020,
  title = {Who Is Responsible for {{Twitter}}’s Echo Chamber Problem? {{Evidence}} from 2016 {{U}}.{{S}}. Election Networks},
  shorttitle = {Who Is Responsible for {{Twitter}}’s Echo Chamber Problem?},
  author = {Guo, Lei and A. Rohde, Jacob and Wu, H. Denis},
  date = {2020-01-28},
  journaltitle = {Information, Communication \& Society},
  shortjournal = {Information, Communication \& Society},
  volume = {23},
  number = {2},
  pages = {234--251},
  issn = {1369-118X, 1468-4462},
  langid = {english}
}

@article{gwet_computing_2008,
  title = {Computing Inter-Rater Reliability and Its Variance in the Presence of High Agreement},
  author = {Gwet, Kilem Li},
  date = {2008},
  journaltitle = {British Journal of Mathematical and Statistical Psychology},
  volume = {61},
  number = {1},
  pages = {29--48},
  issn = {2044-8317},
  abstract = {Pi (π) and kappa (κ) statistics are widely used in the areas of psychiatry and psychological testing to compute the extent of agreement between raters on nominally scaled data. It is a fact that these coefficients occasionally yield unexpected results in situations known as the paradoxes of kappa. This paper explores the origin of these limitations, and introduces an alternative and more stable agreement coefficient referred to as the AC1 coefficient. Also proposed are new variance estimators for the multiple-rater generalized π and AC1 statistics, whose validity does not depend upon the hypothesis of independence between raters. This is an improvement over existing alternative variances, which depend on the independence assumption. A Monte-Carlo simulation study demonstrates the validity of these variance estimators for confidence interval construction, and confirms the value of AC1 as an improved alternative to existing inter-rater reliability statistics.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/2Y58TMMP/000711006X126600.html}
}

@article{haber_causal_2018,
  title = {Causal Language and Strength of Inference in Academic and Media Articles Shared in Social Media ({{CLAIMS}}): {{A}} Systematic Review},
  shorttitle = {Causal Language and Strength of Inference in Academic and Media Articles Shared in Social Media ({{CLAIMS}})},
  author = {Haber, Noah and Smith, Emily R. and Moscoe, Ellen and Andrews, Kathryn and Audy, Robin and Bell, Winnie and Brennan, Alana T. and Breskin, Alexander and Kane, Jeremy C. and Karra, Mahesh and McClure, Elizabeth S. and Suarez, Elizabeth A. and {on behalf of the CLAIMS research team}},
  editor = {Dorta-González, Pablo},
  date = {2018-05-30},
  journaltitle = {PLOS ONE},
  shortjournal = {PLoS ONE},
  volume = {13},
  number = {5},
  pages = {e0196346},
  issn = {1932-6203},
  langid = {english},
  file = {/home/nathante/Zotero/storage/79U7LRPJ/Haber et al. (2018) Causal language and strength of inference in academic and media articles shared in social media (CLAIMS).pdf;/home/nathante/Zotero/storage/RSPSPK2X/Haber et al. - 2018 - Causal language and strength of inference in acade.pdf}
}

@article{hand_classifier_2006,
  title = {Classifier {{Technology}} and the {{Illusion}} of {{Progress}}},
  author = {Hand, David J.},
  date = {2006-02-01},
  journaltitle = {Statistical Science},
  shortjournal = {Statist. Sci.},
  volume = {21},
  number = {1},
  issn = {0883-4237},
  file = {/home/nathante/Zotero/storage/2PPZII9T/Hand - 2006 - Classifier Technology and the Illusion of Progress.pdf}
}

@article{hardin_regression-calibration_2003,
  title = {The {{Regression-calibration Method}} for {{Fitting Generalized Linear Models}} with {{Additive Measurement Error}}},
  author = {Hardin, James W. and Schmiediche, Henrik and Carroll, Raymond J.},
  date = {2003-12-01},
  journaltitle = {The Stata Journal},
  shortjournal = {The Stata Journal},
  volume = {3},
  number = {4},
  pages = {361--372},
  publisher = {{SAGE Publications}},
  issn = {1536-867X},
  abstract = {This paper discusses and illustrates the method of regression calibration. This is a straightforward technique for fitting models with additive measurement error. We present this discussion in terms of generalized linear models (GLMs) following the notation defined in Hardin and Carroll (2003). Discussion will include specified measurement error, measurement error estimated by replicate error-prone proxies, and measurement error estimated by instrumental variables. The discussion focuses on software developed as part of a small business innovation research (SBIR) grant from the National Institutes of Health (NIH).},
  langid = {english},
  keywords = {generalized linear models,instrumental variables,measurement error,regression calibration,replicate measures,st0050},
  file = {/home/nathante/Zotero/storage/5WZVFPXQ/Hardin et al_2003_The Regression-calibration Method for Fitting Generalized Linear Models with.pdf}
}

@article{hase_computational_2022,
  title = {Der „{{Computational Turn}}“: Ein „interdisziplinärer {{Turn}}“? {{Ein}} Systematischer {{Überblick}} Zur {{Nutzung}} Der Automatisierten {{Inhaltsanalyse}} in Der {{Journalismusforschung}}},
  shorttitle = {Der „{{Computational Turn}}“},
  author = {Hase, Valerie and Mahl, Daniela and Schäfer, Mike S.},
  date = {2022},
  journaltitle = {Medien \& Kommunikationswissenschaft},
  shortjournal = {M\&K},
  volume = {70},
  number = {1-2},
  pages = {60--78},
  issn = {1615-634X},
  abstract = {Themen journalistischer Berichterstattung durch maschinelles Lernen identifizieren oder Nachrichtendiffusion automatisiert messen: Die Anwendungsmöglichkeiten der automatisierten Inhaltsanalyse in der Journalismusforschung scheinen vielfältig. Aber wie wird die computerbasierte Methode bisher eingesetzt - und welche Konsequenzen hat der „Computational Turn“ der Kommunikationswissenschaft, besonders im Hinblick auf Interdisziplinarität? Dieser Beitrag fasst auf Basis eines systematischen Literaturüberblicks zusammen, wie die automatisierte Inhaltsanalyse im Forschungsfeld der Journalismusforschung genutzt wird. Dabei zeigt sich, dass die zunehmende Nutzung der Methode ein Indikator für methodische Interdisziplinarität in der ohnehin interdisziplinären Kommunikationswissenschaft ist. Gleichzeitig finden sich kaum Hinweise auf eine Zunahme theoretischer Interdisziplinarität, z. B. Rückgriffe auf fachfremde Theorien. Auch im Hinblick auf praktische Interdisziplinarität, z. B. Kooperationen mit anderen Disziplinen, wird unser Fach keineswegs interdisziplinärer. Vielmehr findet eine Verschiebung zugunsten technischer Disziplinen statt. Der „Computational Turn“ der Kommunikationswissenschaft ist daher zumindest bisher nur teils als „interdisziplinärer Turn“ zu verstehen.           ,              Possibilities of applying automated content analysis in journalism research include, for example, machine learning to identify topics in journalistic coverage or measuring news diffusion via automated approaches. But how has the computational method been applied thus far? And what are consequences of the “computational turn” in communication research, especially concerning interdisciplinarity? Based on a systematic literature review, this article summarizes the use of automated content analysis in journalism research. Results illustrate an increasing use of the method by communication scientists as yet another indicator of methodological interdisciplinarity in communication research. However, there is little evidence of an increase in theoretical interdisciplinarity: Studies relying on computational methods do not increasingly refer to theories from other disciplines. With respect to practical interdisciplinarity, for instance collaborations, our discipline is by no means becoming more interdisciplinary. Instead, we find a shift in favor of technical disciplines. At least up to now, the “computational turn” in communication research should not be equated with an “interdisciplinary turn”.},
  file = {/home/nathante/Zotero/storage/IWVHZAWM/Hase et al. - 2022 - Der „Computational Turn“ ein „interdisziplinärer .pdf}
}

@article{hausman_mismeasured_2001,
  title = {Mismeasured {{Variables}} in {{Econometric Analysis}}: {{Problems}} from the {{Right}} and {{Problems}} from the {{Left}}},
  shorttitle = {Mismeasured {{Variables}} in {{Econometric Analysis}}},
  author = {Hausman, Jerry},
  date = {2001-12},
  journaltitle = {Journal of Economic Perspectives},
  volume = {15},
  number = {4},
  pages = {57--67},
  issn = {0895-3309},
  abstract = {The effect of mismeasured variables in the most straightforward regression analysis with a single regressor variable leads to a least squares estimate that is downward biased in magnitude toward zero. I begin by reviewing classical issues involving mismeasured variables. I then consider three recent developments for mismeasurement econometric models. The first issue involves difficulties in using instrumental variables. A second involves the consistent estimators that have recently been developed for mismeasured nonlinear regression models. Finally, I return to mismeasured left hand side variables, where I will focus on issues in binary choice models and duration models.},
  langid = {english},
  keywords = {Multiple or Simultaneous Equation Models: General},
  file = {/home/nathante/Zotero/storage/3M539ACE/Hausman_2001_Mismeasured Variables in Econometric Analysis.pdf;/home/nathante/Zotero/storage/4BN25KNR/articles.html}
}

@article{hayes_answering_2007,
  title = {Answering the {{Call}} for a {{Standard Reliability Measure}} for {{Coding Data}}},
  author = {Hayes, Andrew F. and Krippendorff, Klaus},
  date = {2007-04-01},
  journaltitle = {Communication Methods and Measures},
  volume = {1},
  number = {1},
  pages = {77--89},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {In content analysis and similar methods, data are typically generated by trained human observers who record or transcribe textual, pictorial, or audible matter in terms suitable for analysis. Conclusions from such data can be trusted only after demonstrating their reliability. Unfortunately, the content analysis literature is full of proposals for so-called reliability coefficients, leaving investigators easily confused, not knowing which to choose. After describing the criteria for a good measure of reliability, we propose Krippendorff's alpha as the standard reliability measure. It is general in that it can be used regardless of the number of observers, levels of measurement, sample sizes, and presence or absence of missing data. To facilitate the adoption of this recommendation, we describe a freely available macro written for SPSS and SAS to calculate Krippendorff's alpha and illustrate its use with a simple example.}
}

@inproceedings{hede_toxicity_2021,
  title = {From {{Toxicity}} in {{Online Comments}} to {{Incivility}} in {{American News}}: {{Proceed}} with {{Caution}}},
  shorttitle = {From {{Toxicity}} in {{Online Comments}} to {{Incivility}} in {{American News}}},
  booktitle = {Proceedings of the 16th {{Conference}} of the {{European Chapter}} of the {{Association}} for {{Computational Linguistics}}: {{Main Volume}}},
  author = {Hede, Anushree and Agarwal, Oshin and Lu, Linda and Mutz, Diana C. and Nenkova, Ani},
  date = {2021},
  pages = {2620--2630},
  publisher = {{Association for Computational Linguistics}},
  location = {{Online}},
  eventtitle = {Proceedings of the 16th {{Conference}} of the {{European Chapter}} of the {{Association}} for {{Computational Linguistics}}: {{Main Volume}}},
  langid = {english},
  file = {/home/nathante/Zotero/storage/53RFCQSU/Hede et al. - 2021 - From Toxicity in Online Comments to Incivility in .pdf}
}

@article{heidenreich_discontentment_2022,
  title = {Discontentment Trumps {{Euphoria}}: {{Interacting}} with {{European Politicians}}’ Migration-Related Messages on Social Media},
  author = {Heidenreich, Tobias and Eberl, Jakob-Moritz and Lind, Fabienne and Boomgaarden, Hajo G},
  date = {2022},
  journaltitle = {new media \& society},
  pages = {14614448221074648},
  publisher = {{SAGE Publications Sage UK: London, England}}
}

@article{hillard_computer-assisted_2008,
  title = {Computer-{{Assisted Topic Classification}} for {{Mixed-Methods Social Science Research}}},
  author = {Hillard, Dustin and Purpura, Stephen and Wilkerson, John},
  date = {2008-05-15},
  journaltitle = {Journal of Information Technology \& Politics},
  shortjournal = {Journal of Information Technology \& Politics},
  volume = {4},
  number = {4},
  pages = {31--46},
  issn = {1933-1681, 1933-169X},
  langid = {english}
}

@article{hopkins_method_2010,
  title = {A {{Method}} of {{Automated Nonparametric Content Analysis}} for {{Social Science}}},
  author = {Hopkins, Daniel J. and King, Gary},
  date = {2010-01},
  journaltitle = {American Journal of Political Science},
  volume = {54},
  number = {1},
  pages = {229--247},
  issn = {00925853, 15405907},
  langid = {english},
  file = {/home/nathante/Zotero/storage/55EKSIUK/Hopkins und King - 2010 - A Method of Automated Nonparametric Content Analys.pdf}
}

@article{hopp_correlating_2020,
  title = {Correlating {{Self-Report}} and {{Trace Data Measures}} of {{Incivility}}: {{A Proof}} of {{Concept}}},
  shorttitle = {Correlating {{Self-Report}} and {{Trace Data Measures}} of {{Incivility}}},
  author = {Hopp, Toby and Vargo, Chris J. and Dixon, Lucas and Thain, Nithum},
  date = {2020-10-01},
  journaltitle = {Social Science Computer Review},
  shortjournal = {Social Science Computer Review},
  volume = {38},
  number = {5},
  pages = {584--599},
  publisher = {{SAGE Publications Inc}},
  issn = {0894-4393},
  abstract = {This study correlated self-report and trace data measures of political incivility. Specifically, we asked respondents to provide estimates of the degree to which they engage in uncivil political communication online. These estimates were then compared to computational measures of uncivil social media discussion behavior. The results indicated that those who self-disclose uncivil online behavior also tend to generate content on social media that is uncivil as identified by Google’s Perspective application programming interface. Taken as a whole, this work suggests that combining self-report and behavioral trace data may be a fruitful means of developing multimethod measures of complex communication behaviors.},
  langid = {english},
  keywords = {computational social sciences,incivility,political discussion,survey,toxicity},
  file = {/home/nathante/Zotero/storage/I6YWVQW4/Hopp et al_2020_Correlating Self-Report and Trace Data Measures of Incivility.pdf}
}

@article{hopp_social_2019,
  title = {Social {{Capital}} as an {{Inhibitor}} of {{Online Political Incivility}}: {{An Analysis}} of {{Behavioral Patterns Among Politically Active Facebook Users}}},
  shorttitle = {Social {{Capital}} as an {{Inhibitor}} of {{Online Political Incivility}}},
  author = {Hopp, Toby and Vargo, Chris J.},
  date = {2019-09-13},
  journaltitle = {International Journal of Communication},
  volume = {13},
  number = {0},
  pages = {21},
  issn = {1932-8036},
  abstract = {This study examines the relationship between social capital and uncivil political communication online using a sample of politically active Facebook users and their Facebook post data. Theory suggests that social capital, in both its bonded and bridged forms, may inhibit the frequency and severity of online political incivility. The results here indicate that bonded social capital is negatively associated with political incivility on Facebook. Bridged capital is not, however, statistically related to posting uncivil content on Facebook.},
  issue = {0},
  langid = {english},
  keywords = {bonded social capital,bridged social capital,incivility,political discussion},
  file = {/home/nathante/Zotero/storage/UREW3WG6/Hopp_Vargo_2019_Social Capital as an Inhibitor of Online Political Incivility.pdf}
}

@online{hosseini_deceiving_2017,
  title = {Deceiving {{Google}}'s {{Perspective API Built}} for {{Detecting Toxic Comments}}},
  author = {Hosseini, Hossein and Kannan, Sreeram and Zhang, Baosen and Poovendran, Radha},
  date = {2017-02-26},
  number = {arXiv:1702.08138},
  eprint = {arXiv:1702.08138},
  eprinttype = {arxiv},
  abstract = {Social media platforms provide an environment where people can freely engage in discussions. Unfortunately, they also enable several problems, such as online harassment. Recently, Google and Jigsaw started a project called Perspective, which uses machine learning to automatically detect toxic language. A demonstration website has been also launched, which allows anyone to type a phrase in the interface and instantaneously see the toxicity score [1]. In this paper, we propose an attack on the Perspective toxic detection system based on the adversarial examples. We show that an adversary can subtly modify a highly toxic phrase in a way that the system assigns significantly lower toxicity score to it. We apply the attack on the sample phrases provided in the Perspective website and show that we can consistently reduce the toxicity scores to the level of the non-toxic phrases. The existence of such adversarial examples is very harmful for toxic detection systems and seriously undermines their usability.},
  pubstate = {preprint},
  keywords = {Computer Science - Computers and Society,Computer Science - Machine Learning,Computer Science - Social and Information Networks},
  file = {/home/nathante/Zotero/storage/7DNERYPW/Hosseini et al_2017_Deceiving Google's Perspective API Built for Detecting Toxic Comments.pdf;/home/nathante/Zotero/storage/AJM3CAWA/1702.html}
}

@incollection{hua_characterizing_2020,
  title = {Characterizing {{Twitter Users Who Engage}} in {{Adversarial Interactions}} against {{Political Candidates}}},
  booktitle = {Proceedings of the 2020 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}},
  author = {Hua, Yiqing and Naaman, Mor and Ristenpart, Thomas},
  date = {2020-04-21},
  pages = {1--13},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  abstract = {Social media provides a critical communication platform for political figures, but also makes them easy targets for harassment. In this paper, we characterize users who adversarially interact with political figures on Twitter using mixed-method techniques. The analysis is based on a dataset of 400 thousand users' 1.2 million replies to 756 candidates for the U.S. House of Representatives in the two months leading up to the 2018 midterm elections. We show that among moderately active users, adversarial activity is associated with decreased centrality in the social graph and increased attention to candidates from the opposing party. When compared to users who are similarly active, highly adversarial users tend to engage in fewer supportive interactions with their own party's candidates and express negativity in their user profiles. Our results can inform the design of platform moderation mechanisms to support political figures countering online harassment.},
  isbn = {978-1-4503-6708-0},
  keywords = {online harassment,political candidates,twitter,user behavior},
  file = {/home/nathante/Zotero/storage/LJMBWTZH/Hua et al_2020_Characterizing Twitter Users Who Engage in Adversarial Interactions against.pdf}
}

@inproceedings{hullman_worst_2022,
  title = {The {{Worst}} of {{Both Worlds}}: {{A Comparative Analysis}} of {{Errors}} in {{Learning}} from {{Data}} in {{Psychology}} and {{Machine Learning}}},
  shorttitle = {The {{Worst}} of {{Both Worlds}}},
  booktitle = {Proceedings of the 2022 {{AAAI}}/{{ACM Conference}} on {{AI}}, {{Ethics}}, and {{Society}}},
  author = {Hullman, Jessica and Kapoor, Sayash and Nanayakkara, Priyanka and Gelman, Andrew and Narayanan, Arvind},
  date = {2022-07-26},
  pages = {335--348},
  publisher = {{ACM}},
  location = {{Oxford United Kingdom}},
  eventtitle = {{{AIES}} '22: {{AAAI}}/{{ACM Conference}} on {{AI}}, {{Ethics}}, and {{Society}}},
  isbn = {978-1-4503-9247-1},
  langid = {english},
  file = {/home/nathante/Zotero/storage/D5R2AWJG/Hullman et al. - 2022 - The Worst of Both Worlds A Comparative Analysis o.pdf;/home/nathante/Zotero/storage/TVV3M3QL/Hullman et al. (2022) The Worst of Both Worlds.pdf}
}

@incollection{im_synthesized_2020,
  title = {Synthesized {{Social Signals}}: {{Computationally-Derived Social Signals}} from {{Account Histories}}},
  shorttitle = {Synthesized {{Social Signals}}},
  booktitle = {Proceedings of the 2020 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}},
  author = {Im, Jane and Tandon, Sonali and Chandrasekharan, Eshwar and Denby, Taylor and Gilbert, Eric},
  date = {2020-04-21},
  pages = {1--12},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  abstract = {Social signals are crucial when we decide if we want to interact with someone online. However, social signals are typically limited to the few that platform designers provide, and most can be easily manipulated. In this paper, we propose a new idea called synthesized social signals (S3s): social signals computationally derived from an account's history, and then rendered into the profile. Unlike conventional social signals such as profile bios, S3s use computational summarization to reduce receiver costs and raise the cost of faking signals. To demonstrate and explore the concept, we built Sig, an extensible Chrome extension that computes and visualizes S3s. After a formative study, we conducted a field deployment of Sig on Twitter, targeting two well-known problems on social media: toxic accounts and misinformation. Results show that Sig reduced receiver costs, added important signals beyond conventionally available ones, and that a few users felt safer using Twitter as a result. We conclude by reflecting on the opportunities and challenges S3s provide for augmenting interaction on social platforms.},
  isbn = {978-1-4503-6708-0},
  keywords = {social computing,social media,social platform,social signals},
  file = {/home/nathante/Zotero/storage/RY5ENJPR/Im et al_2020_Synthesized Social Signals.pdf}
}

@article{jacobucci_machine_2020,
  title = {Machine {{Learning}} and {{Psychological Research}}: {{The Unexplored Effect}} of {{Measurement}}},
  shorttitle = {Machine {{Learning}} and {{Psychological Research}}},
  author = {Jacobucci, Ross and Grimm, Kevin J.},
  date = {2020-05-01},
  journaltitle = {Perspectives on Psychological Science},
  shortjournal = {Perspect Psychol Sci},
  volume = {15},
  number = {3},
  pages = {809--816},
  publisher = {{SAGE Publications Inc}},
  issn = {1745-6916},
  abstract = {Machine learning (i.e., data mining, artificial intelligence, big data) has been increasingly applied in psychological science. Although some areas of research have benefited tremendously from a new set of statistical tools, most often in the use of biological or genetic variables, the hype has not been substantiated in more traditional areas of research. We argue that this phenomenon results from measurement errors that prevent machine-learning algorithms from accurately modeling nonlinear relationships, if indeed they exist. This shortcoming is showcased across a set of simulated examples, demonstrating that model selection between a machine-learning algorithm and regression depends on the measurement quality, regardless of sample size. We conclude with a set of recommendations and a discussion of ways to better integrate machine learning with statistics as traditionally practiced in psychological science.},
  langid = {english},
  keywords = {data mining,machine learning,measurement error,psychometrics,structural-equation modeling},
  file = {/home/nathante/Zotero/storage/IYLNBMSN/Jacobucci_Grimm_2020_Machine Learning and Psychological Research.pdf}
}

@inproceedings{jain_adversarial_2018,
  title = {Adversarial {{Text Generation}} for {{Google}}'s {{Perspective API}}},
  booktitle = {2018 {{International Conference}} on {{Computational Science}} and {{Computational Intelligence}} ({{CSCI}})},
  author = {Jain, Edwin and Brown, Stephan and Chen, Jeffery and Neaton, Erin and Baidas, Mohammad and Dong, Ziqian and Gu, Huanying and Artan, Nabi Sertac},
  date = {2018-12},
  pages = {1136--1141},
  abstract = {With the preponderance of harassment and abuse, social media platforms and online discussion platforms seek to curb toxic comments. Google's Perspective aims to help platforms classify toxic comments. We have created a pipeline to modify toxic comments to evade Perspective. This pipeline uses existing adversarial machine learning attacks to find the optimal perturbation which will evade the model. Since these attacks typically target images, as opposed to discrete text data, we include a process to generate text candidates from perturbed features and select candidates to retain syntactic similarity. We demonstrated that using a model with just 10,000 queries, changing three words in each comment evades Perspective 25\% of the time, suggesting that building a surrogate model may not require many queries and a more robust approach is needed to improve the toxic comment classifier accuracy.},
  eventtitle = {2018 {{International Conference}} on {{Computational Science}} and {{Computational Intelligence}} ({{CSCI}})},
  keywords = {Adversarial,Adversarial machine learning,Deep Learning,Google,Google Perspective,Machine Learning,Natural Language Processing,Perturbation methods,Semantics,Syntactics,Task analysis},
  file = {/home/nathante/Zotero/storage/HND429IV/Jain et al_2018_Adversarial Text Generation for Google's Perspective API.pdf;/home/nathante/Zotero/storage/3BH3ARY2/8947631.html}
}

@article{jiang_addressing_2021,
  ids = {jiang_addressing_2021-1},
  title = {Addressing {{Measurement Error}} in {{Random Forests Using Quantitative Bias Analysis}}},
  author = {Jiang, Tammy and Gradus, Jaimie L and Lash, Timothy L and Fox, Matthew P},
  date = {2021-09-01},
  journaltitle = {American Journal of Epidemiology},
  shortjournal = {American Journal of Epidemiology},
  volume = {190},
  number = {9},
  pages = {1830--1840},
  issn = {0002-9262},
  abstract = {Although variables are often measured with error, the impact of measurement error on machine-learning predictions is seldom quantified. The purpose of this study was to assess the impact of measurement error on the performance of random-forest models and variable importance. First, we assessed the impact of misclassification (i.e., measurement error of categorical variables) of predictors on random-forest model performance (e.g., accuracy, sensitivity) and variable importance (mean decrease in accuracy) using data from the National Comorbidity Survey Replication (2001–2003). Second, we created simulated data sets in which we knew the true model performance and variable importance measures and could verify that quantitative bias analysis was recovering the truth in misclassified versions of the data sets. Our findings showed that measurement error in the data used to construct random forests can distort model performance and variable importance measures and that bias analysis can recover the correct results. This study highlights the utility of applying quantitative bias analysis in machine learning to quantify the impact of measurement error on study results.},
  file = {/home/nathante/Zotero/storage/3BRTJHQW/Jiang et al. - 2021 - Addressing Measurement Error in Random Forests Usi.pdf;/home/nathante/Zotero/storage/4XIUHUVG/Jiang et al_2021_Addressing Measurement Error in Random Forests Using Quantitative Bias Analysis.pdf;/home/nathante/Zotero/storage/UK34NAWR/6123935.html}
}

@article{johnson_learning_2020,
  title = {Learning from the Past and Considering the Future of Chemicals in the Environment},
  author = {Johnson, Andrew C. and Jin, Xiaowei and Nakada, Norihide and Sumpter, John P.},
  date = {2020-01-24},
  journaltitle = {Science},
  shortjournal = {Science},
  volume = {367},
  number = {6476},
  pages = {384--387},
  issn = {0036-8075, 1095-9203},
  abstract = {Knowledge of the hazards and associated risks from chemicals discharged to the environment has grown considerably over the past 40 years. This improving awareness stems from advances in our ability to measure chemicals at low environmental concentrations, recognition of a range of effects on organisms, and a worldwide growth in expertise. Environmental scientists and companies have learned from the experiences of the past; in theory, the next generation of chemicals will cause less acute toxicity and be less environmentally persistent and bioaccumulative. However, researchers still struggle to establish whether the nonlethal effects associated with some modern chemicals and substances will have serious consequences for wildlife. Obtaining the resources to address issues associated with chemicals in the environment remains a challenge.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/KWTMTJBA/Johnson et al. (2017) Measurement error and the replication crisis.pdf;/home/nathante/Zotero/storage/VDIKYHGM/Johnson et al. - 2020 - Learning from the past and considering the future .pdf}
}

@article{junger_unboxing_2022,
  title = {Unboxing {{Computational Social Media Research From}} a {{Datahermeneutical Perspective}}: {{How Do Scholars Address}} the {{Tension Between Automation}} and {{Interpretation}}?},
  author = {Jünger, Jakob and Geise, Stephanie and Hännelt, Maria},
  date = {2022},
  journaltitle = {International Journal of Communication},
  volume = {16},
  pages = {1482--1505},
  issn = {1932-8036},
  abstract = {Communication researchers have fruitfully applied computational methods in their analysis of communication processes. However, the automation of scientific data collection and analysis confronts scholars with fundamental epistemological and practical challenges. Particularly, automation implies that the processing of data is highly standardized for all cases. In the context of social science research, this contrasts with the expectation that meaning is always attributed in individual interaction processes. Based on a literature review of peer-reviewed journal articles, our study explores the resulting tension between automated and interpretive research. We first analyze the extent to which automated methods play a role in social media research. We then identify the challenges and limitations researchers addressed in their studies. On this basis, we propose steps for a data hermeneutical perspective that combines computational methods with interpretive approaches.},
  keywords = {automated data collection,computational communication science,computational methods,computational social science,data hermeneutics,interpretive paradigm,process-generated data}
}

@article{kaplan_big_2014,
  title = {Big {{Data}} and {{Large Sample Size}}: {{A Cautionary Note}} on the {{Potential}} for {{Bias}}},
  shorttitle = {Big {{Data}} and {{Large Sample Size}}},
  author = {Kaplan, Robert M. and Chambers, David A. and Glasgow, Russell E.},
  date = {2014},
  journaltitle = {Clinical and Translational Science},
  volume = {7},
  number = {4},
  pages = {342--346},
  issn = {1752-8062},
  abstract = {A number of commentaries have suggested that large studies are more reliable than smaller studies and there is a growing interest in the analysis of “big data” that integrates information from many thousands of persons and/or different data sources. We consider a variety of biases that are likely in the era of big data, including sampling error, measurement error, multiple comparisons errors, aggregation error, and errors associated with the systematic exclusion of information. Using examples from epidemiology, health services research, studies on determinants of health, and clinical trials, we conclude that it is necessary to exercise greater caution to be sure that big sample size does not lead to big inferential errors. Despite the advantages of big studies, large sample size can magnify the bias associated with error resulting from sampling or study design. Clin Trans Sci 2014; Volume \#: 1–5},
  langid = {english},
  keywords = {bias,big data,research methods,sampling},
  file = {/home/nathante/Zotero/storage/PTGVP2WW/Kaplan et al_2014_Big Data and Large Sample Size.pdf;/home/nathante/Zotero/storage/KBURTV5N/cts.html}
}

@article{kim_distorting_2021,
  title = {The {{Distorting Prism}} of {{Social Media}}: {{How Self-Selection}} and {{Exposure}} to {{Incivility Fuel Online Comment Toxicity}}},
  shorttitle = {The {{Distorting Prism}} of {{Social Media}}},
  author = {Kim, Jin Woo and Guess, Andrew and Nyhan, Brendan and Reifler, Jason},
  date = {2021-12-01},
  journaltitle = {Journal of Communication},
  shortjournal = {Journal of Communication},
  volume = {71},
  number = {6},
  pages = {922--946},
  issn = {0021-9916},
  abstract = {Though prior studies have analyzed the textual characteristics of online comments about politics, less is known about how selection into commenting behavior and exposure to other people’s comments changes the tone and content of political discourse. This article makes three contributions. First, we show that frequent commenters on Facebook are more likely to be interested in politics, to have more polarized opinions, and to use toxic language in comments in an elicitation task. Second, we find that people who comment on articles in the real world use more toxic language on average than the public as a whole; levels of toxicity in comments scraped from media outlet Facebook pages greatly exceed what is observed in comments we elicit on the same articles from a nationally representative sample. Finally, we demonstrate experimentally that exposure to toxic language in comments increases the toxicity of subsequent comments.},
  file = {/home/nathante/Zotero/storage/T89NGBE4/Kim et al_2021_The Distorting Prism of Social Media.pdf;/home/nathante/Zotero/storage/Q8JWJ7LZ/6363640.html}
}

@article{king_analyzing_2001,
  title = {Analyzing {{Incomplete Political Science Data}}: {{An Alternative Algorithm}} for {{Multiple Imputation}}},
  shorttitle = {Analyzing {{Incomplete Political Science Data}}},
  author = {King, Gary and Honaker, James and Joseph, Anne and Scheve, Kenneth},
  date = {2001-03},
  journaltitle = {American Political Science Review},
  volume = {95},
  number = {1},
  pages = {49--69},
  publisher = {{Cambridge University Press}},
  issn = {1537-5943, 0003-0554},
  abstract = {We propose a remedy for the discrepancy between the way political scientists analyze data with missing values and the recommendations of the statistics community. Methodologists and statisticians agree that “multiple imputation” is a superior approach to the problem of missing data scattered through one’s explanatory and dependent variables than the methods currently used in applied data analysis. The discrepancy occurs because the computational algorithms used to apply the best multiple imputation models have been slow, difficult to implement, impossible to run with existing commercial statistical packages, and have demanded considerable expertise. We adapt an algorithm and use it to implement a general-purpose, multiple imputation model for missing data. This algorithm is considerably faster and easier to use than the leading method recommended in the statistics literature. We also quantify the risks of current missing data practices, illustrate how to use the new procedure, and evaluate this alternative through simulated data as well as actual empirical examples. Finally, we offer easy-to-use software that implements all methods discussed.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/SRZTVUSY/King et al_2001_Analyzing Incomplete Political Science Data.pdf;/home/nathante/Zotero/storage/EJP7I7RQ/9E712982CCE2DE79A574FE98488F212B.html}
}

@article{kleinberg_algorithmic_2018,
  title = {Algorithmic {{Fairness}}},
  author = {Kleinberg, Jon and Ludwig, Jens and Mullainathan, Sendhil and Rambachan, Ashesh},
  date = {2018},
  journaltitle = {AEA Papers and Proceedings},
  volume = {108},
  pages = {22--27},
  issn = {2574-0768},
  abstract = {Concerns that algorithms may discriminate against certain groups have led to numerous efforts to 'blind' the algorithm to race. We argue that this intuitive perspective is misleading and may do harm. Our primary result is exceedingly simple, yet often overlooked. A preference for fairness should not change the choice of estimator. Equity preferences can change how the estimated prediction function is used (e.g., different threshold for different groups) but the function itself should not change. We show in an empirical example for college admissions that the inclusion of variables such as race can increase both equity and efficiency.},
  langid = {english},
  keywords = {Cluster Analysis,Factor Models; Equity; Justice; Inequality; and Other Normative Criteria and Measurement; Higher Education,Multiple or Simultaneous Equation Models: Classification Methods,Non-labor Discrimination,Principal Components,Research Institutions; Economics of Minorities; Races; Indigenous Peoples; and Immigrants},
  file = {/home/nathante/Zotero/storage/67KVXZIU/Kleinberg et al_2018_Algorithmic Fairness.pdf;/home/nathante/Zotero/storage/TSV3T4KE/articles.html}
}

@article{knox_testing_2022,
  title = {Testing {{Causal Theories}} with {{Learned Proxies}}},
  author = {Knox, Dean and Lucas, Christopher and Cho, Wendy K. Tam},
  date = {2022-05-12},
  journaltitle = {Annual Review of Political Science},
  shortjournal = {Annu. Rev. Polit. Sci.},
  volume = {25},
  number = {1},
  pages = {419--441},
  issn = {1094-2939, 1545-1577},
  abstract = {Social scientists commonly use computational models to estimate proxies of unobserved concepts, then incorporate these proxies into subsequent tests of their theories. The consequences of this practice, which occurs in over two-thirds of recent computational work in political science, are underappreciated. Imperfect proxies can reflect noise and contamination from other concepts, producing biased point estimates and standard errors. We demonstrate how analysts can use causal diagrams to articulate theoretical concepts and their relationships to estimated proxies, then apply straightforward rules to assess which conclusions are rigorously supportable. We formalize and extend common heuristics for “signing the bias”—a technique for reasoning about unobserved confounding—to scenarios with imperfect proxies. Using these tools, we demonstrate how, in often-encountered research settings, proxy-based analyses allow for valid tests for the existence and direction of theorized effects. We conclude with best-practice recommendations for the rapidly growing literature using learned proxies to test causal theories.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/T64YP7NJ/Knox et al. - 2022 - Testing Causal Theories with Learned Proxies.pdf}
}

@book{krippendorff_content_2018,
  title = {Content Analysis: {{An}} Introduction to Its Methodology},
  author = {Krippendorff, Klaus},
  date = {2018},
  publisher = {{SAGE}}
}

@article{krippendorff_estimating_1970,
  title = {Estimating the {{Reliability}}, {{Systematic Error}} and {{Random Error}} of {{Interval Data}}},
  author = {Krippendorff, Klaus},
  date = {1970-04-01},
  journaltitle = {Educational and Psychological Measurement},
  shortjournal = {Educational and Psychological Measurement},
  volume = {30},
  number = {1},
  pages = {61--70},
  publisher = {{SAGE Publications Inc}},
  issn = {0013-1644},
  langid = {english},
  file = {/home/nathante/Zotero/storage/YSDM7Z7Q/Krippendorff_1970_Estimating the Reliability, Systematic Error and Random Error of Interval Data.pdf}
}

@article{krippendorff_reliability_2004,
  title = {Reliability in {{Content Analysis}}},
  author = {Krippendorff, Klaus},
  date = {2004},
  journaltitle = {Human Communication Research},
  volume = {30},
  number = {3},
  pages = {411--433},
  issn = {1468-2958},
  abstract = {In a recent article in this journal, Lombard, Snyder-Duch, and Bracken (2002) surveyed 200 content analyses for their reporting of reliability tests, compared the virtues and drawbacks of five popular reliability measures, and proposed guidelines and standards for their use. Their discussion revealed that numerous misconceptions circulate in the content analysis literature regarding how these measures behave and can aid or deceive content analysts in their effort to ensure the reliability of their data. This article proposes three conditions for statistical measures to serve as indices of the reliability of data and examines the mathematical structure and the behavior of the five coefficients discussed by the authors, as well as two others. It compares common beliefs about these coefficients with what they actually do and concludes with alternative recommendations for testing reliability in content analysis and similar data-making efforts.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/GBK9844Z/j.1468-2958.2004.tb00738.html}
}

@article{kroon_beyond_2022,
  title = {Beyond {{Counting Words}}: {{Assessing Performance}} of {{Dictionaries}}, {{Supervised Machine Learning}}, and {{Embeddings}} in {{Topic}} and {{Frame Classification}}},
  shorttitle = {Beyond {{Counting Words}}},
  author = {Kroon, Anne C. and family=Meer, given=Toni, prefix=van der, useprefix=true and Vliegenthart, Rens},
  date = {2022-10-01},
  journaltitle = {Computational Communication Research},
  volume = {4},
  number = {2},
  pages = {528--570},
  issn = {2665-9085, 2665-9085},
  langid = {english}
}

@inproceedings{kurrek_towards_2020,
  title = {Towards a {{Comprehensive Taxonomy}} and {{Large-Scale Annotated Corpus}} for {{Online Slur Usage}}},
  booktitle = {Proceedings of the {{Fourth Workshop}} on {{Online Abuse}} and {{Harms}}},
  author = {Kurrek, Jana and Saleem, Haji Mohammad and Ruths, Derek},
  date = {2020-11},
  pages = {138--149},
  publisher = {{Association for Computational Linguistics}},
  location = {{Online}},
  abstract = {Abusive language classifiers have been shown to exhibit bias against women and racial minorities. Since these models are trained on data that is collected using keywords, they tend to exhibit a high sensitivity towards pejoratives. As a result, comments written by victims of abuse are frequently labelled as hateful, even if they discuss or reclaim slurs. Any attempt to address bias in keyword-based corpora requires a better understanding of pejorative language, as well as an equitable representation of targeted users in data collection. We make two main contributions to this end. First, we provide an annotation guide that outlines 4 main categories of online slur usage, which we further divide into a total of 12 sub-categories. Second, we present a publicly available corpus based on our taxonomy, with 39.8k human annotated comments extracted from Reddit. This corpus was annotated by a diverse cohort of coders, with Shannon equitability indices of 0.90, 0.92, and 0.87 across sexuality, ethnicity, and gender. Taken together, our taxonomy and corpus allow researchers to evaluate classifiers on a wider range of speech containing slurs.},
  eventtitle = {{{ALW-EMNLP}} 2020},
  file = {/home/nathante/Zotero/storage/8EURY5H3/Kurrek et al_2020_Towards a Comprehensive Taxonomy and Large-Scale Annotated Corpus for Online.pdf}
}

@article{lazer_meaningful_2021,
  title = {Meaningful Measures of Human Society in the Twenty-First Century},
  author = {Lazer, David and Hargittai, Eszter and Freelon, Deen and Gonzalez-Bailon, Sandra and Munger, Kevin and Ognyanova, Katherine and Radford, Jason},
  date = {2021-07},
  journaltitle = {Nature},
  volume = {595},
  number = {7866},
  pages = {189--196},
  publisher = {{Nature Publishing Group}},
  issn = {1476-4687},
  abstract = {Science rarely proceeds beyond what scientists can observe and measure, and sometimes what can be observed proceeds far ahead of scientific understanding. The twenty-first century offers such a moment in the study of human societies. A vastly larger share of behaviours is observed today than would have been imaginable at the close of the twentieth century. Our interpersonal communication, our movements and many of our everyday actions, are all potentially accessible for scientific research; sometimes through purposive instrumentation for scientific objectives (for example, satellite imagery), but far more often these objectives are, literally, an afterthought (for example, Twitter data streams). Here we evaluate the potential of this massive instrumentation—the creation of techniques for the structured representation and quantification—of human behaviour through the lens of scientific measurement and its principles. In particular, we focus on the question of how we extract scientific meaning from data that often were not created for such purposes. These data present conceptual, computational and ethical challenges that require a rejuvenation of our scientific theories to keep up with the rapidly changing social realities and our capacities to capture them. We require, in other words, new approaches to manage, use and analyse data.},
  issue = {7866},
  langid = {english},
  keywords = {Scientific community}
}

@article{lazer_meaningful_2021-1,
  title = {Meaningful Measures of Human Society in the Twenty-First Century},
  author = {Lazer, David and Hargittai, Eszter and Freelon, Deen and Gonzalez-Bailon, Sandra and Munger, Kevin and Ognyanova, Katherine and Radford, Jason},
  date = {2021-07},
  journaltitle = {Nature},
  volume = {595},
  number = {7866},
  pages = {189--196},
  publisher = {{Nature Publishing Group}},
  issn = {1476-4687},
  abstract = {Science rarely proceeds beyond what scientists can observe and measure, and sometimes what can be observed proceeds far ahead of scientific understanding. The twenty-first century offers such a moment in the study of human societies. A vastly larger share of behaviours is observed today than would have been imaginable at the close of the twentieth century. Our interpersonal communication, our movements and many of our everyday actions, are all potentially accessible for scientific research; sometimes through purposive instrumentation for scientific objectives (for example, satellite imagery), but far more often these objectives are, literally, an afterthought (for example, Twitter data streams). Here we evaluate the potential of this massive instrumentation—the creation of techniques for the structured representation and quantification—of human behaviour through the lens of scientific measurement and its principles. In particular, we focus on the question of how we extract scientific meaning from data that often were not created for such purposes. These data present conceptual, computational and ethical challenges that require a rejuvenation of our scientific theories to keep up with the rapidly changing social realities and our capacities to capture them. We require, in other words, new approaches to manage, use and analyse data.},
  issue = {7866},
  langid = {english},
  keywords = {Scientific community}
}

@article{lederer_short_nodate,
  title = {A Short Introduction to the {{SIMEX}} and {{MCSIMEX}}},
  author = {Lederer, Wolfgang and Küchenhoff, Helmut},
  pages = {5},
  langid = {english},
  file = {/home/nathante/Zotero/storage/5SPGL6VF/Lederer und Küchenhoff - A short Introduction to the SIMEX and.pdf}
}

@inproceedings{lima_characterizing_2020,
  title = {Characterizing ({{Un}})Moderated {{Textual Data}} in {{Social Systems}}},
  booktitle = {2020 {{IEEE}}/{{ACM International Conference}} on {{Advances}} in {{Social Networks Analysis}} and {{Mining}} ({{ASONAM}})},
  author = {Lima, Lucas and Reis, Julio C. S. and Melo, Philipe and Murai, Fabrício and Benevenuto, Fabrício},
  date = {2020-12},
  pages = {430--434},
  issn = {2473-991X},
  abstract = {Despite the valuable social interactions that online media promote, these systems provide space for speech that would be potentially detrimental to different groups of people. The moderation of content imposed by many social media has motivated the emergence of a new social system for free speech named Gab, which lacks moderation of content. This article characterizes and compares moderated textual data from Twitter with a set of unmoderated data from Gab. In particular, we analyze distinguishing characteristics of moderated and unmoderated content in terms of linguistic features, evaluate hate speech and its different forms in both environments. Our work shows that unmoderated content presents different psycholinguistic features, more negative sentiment and higher toxicity. Our findings support that unmoderated environments may have proportionally more online hate speech. We hope our analysis and findings contribute to the debate about hate speech and benefit systems aiming at deploying hate speech detection approaches.},
  eventtitle = {2020 {{IEEE}}/{{ACM International Conference}} on {{Advances}} in {{Social Networks Analysis}} and {{Mining}} ({{ASONAM}})},
  keywords = {Blogs,Gab,Hate Speech,Linguistics,Media,Moderated Content,Social Network,Social networking (online),Statistical analysis,Toxicology,Twitter,Unmoderated Content,Voice activity detection},
  file = {/home/nathante/Zotero/storage/MK4JUJR4/Lima et al_2020_Characterizing (Un)moderated Textual Data in Social Systems.pdf}
}

@article{lind_greasing_2021,
  title = {Greasing the Wheels for Comparative Communication Research: {{Supervised}} Text Classification for Multilingual Corpora},
  author = {Lind, Fabienne and Heidenreich, Tobias and Kralj, Christoph and Boomgaarden, Hajo G},
  date = {2021},
  journaltitle = {Computational Communication Research},
  volume = {3},
  number = {3},
  publisher = {{Amsterdam University Press}}
}

@report{lockhart_whats_2022,
  type = {preprint},
  title = {What’s in a {{Name}}? {{Name-Based Demographic Inference}} and the {{Unequal Distribution}} of {{Misrecognition}}},
  shorttitle = {What’s in a {{Name}}?},
  author = {Lockhart, Jeffrey W and King, Molly M. and Munsch, Christin},
  date = {2022-09-06},
  institution = {{SocArXiv}},
  abstract = {Academics and companies increasingly draw on large datasets to understand the social world. Name-based demographic ascription tools are widespread for imputing information like gender and race that are often missing from these large datasets, but these approaches have drawn criticism on ethical, empirical, and theoretical grounds. Employing a survey of all authors listed on articles in sociology, economics, and communications journals in the Web of Science between 2015 and 2020, we compared self-identified demographics with name-based imputations of gender and race/ethnicity for 19,924 scholars across four gender ascription tools (genderize.io, M3-inference, R’s `predictrace` and `gender` packages) and four race/ethnicity ascription tools (ethnicolor’s Florida and North Carolina voter models, and R’s `predictrace` and wru packages). We find substantial inequalities in how these tools misgender and misrecognize the race/ethnicity of authors, distributing erroneous ascriptions unevenly along other demographic traits. Because of the empirical and ethical consequences of these errors, scholars need to be cautious with the use of name-based demographic imputation, particularly when studying subgroups. We recommend five principles for the responsible use of name-based demographic ascription.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/HC5MCC2C/Lockhart et al. - 2022 - What’s in a Name Name-Based Demographic Inference.pdf}
}

@article{loken_measurement_2017,
  title = {Measurement Error and the Replication Crisis},
  author = {Loken, Eric and Gelman, Andrew},
  date = {2017-02-10},
  journaltitle = {Science},
  volume = {355},
  number = {6325},
  pages = {584--585},
  publisher = {{American Association for the Advancement of Science}},
  file = {/home/nathante/Zotero/storage/EK6AQXQE/Loken_Gelman_2017_Measurement error and the replication crisis.pdf}
}

@article{lorcher_discussing_2017,
  title = {Discussing Climate Change Online. {{Topics}} and Perceptions in Online Climate Change Communication in Different Online Public Arenas},
  author = {Lörcher, Ines and Taddicken, Monika},
  date = {2017-05-18},
  journaltitle = {Journal of Science Communication},
  shortjournal = {JCOM},
  volume = {16},
  number = {02},
  pages = {A03},
  issn = {1824-2049},
  abstract = {How users discuss climate change online is one of the crucial questions (science) communication scholars address nowadays. This study contributes by approaching the issue through the theoretical concept of online public arenas. The diversity of topics and perceptions in the climate change discourse is explored by comparing different arenas. German journalistic articles and their reader comments as well as scientific expert blogs are analyzed by quantitative manual and automated content analysis (n=5,301). Findings demonstrate a larger diversity of topics and interpretations in arenas with low barriers to communication. Overall, climate change skepticism is rare, but mostly present in lay publics.},
  file = {/home/nathante/Zotero/storage/QPTH7GQZ/Lörcher und Taddicken - 2017 - Discussing climate change online. Topics and perce.pdf}
}

@article{lovejoy_assessing_2014,
  title = {Assessing the {{Reporting}} of {{Reliability}} in {{Published Content Analyses}}: 1985–2010},
  shorttitle = {Assessing the {{Reporting}} of {{Reliability}} in {{Published Content Analyses}}},
  author = {Lovejoy, Jennette and Watson, Brendan R. and Lacy, Stephen and Riffe, Daniel},
  date = {2014-07-03},
  journaltitle = {Communication Methods and Measures},
  shortjournal = {Communication Methods and Measures},
  volume = {8},
  number = {3},
  pages = {207--221},
  issn = {1931-2458, 1931-2466},
  langid = {english}
}

@article{mahl_noise_2022,
  title = {Noise {{Pollution}}: {{A Multi-Step Approach}} to {{Assessing}} the {{Consequences}} of ({{Not}}) {{Validating Search Terms}} on {{Automated Content Analyses}}},
  author = {Mahl, Daniela and family=Nordheim, given=Gerret, prefix=von, useprefix=true and Guenther, Lars},
  date = {2022-09-23},
  journaltitle = {Digital Journalism},
  shortjournal = {Digital Journalism},
  pages = {1--23},
  publisher = {{Routledge}},
  issn = {2167-0811}
}

@article{maier_applying_2018,
  title = {Applying {{LDA Topic Modeling}} in {{Communication Research}}: {{Toward}} a {{Valid}} and {{Reliable Methodology}}},
  author = {Maier, Daniel and Waldherr, A. and Miltner, P. and Wiedemann, G. and Niekler, A. and Keinert, A. and Pfetsch, B. and Heyer, G. and Reber, U. and Häussler, T. and {al}, et},
  date = {2018-02},
  journaltitle = {Communication Methods and Measures},
  volume = {12},
  number = {2-3},
  pages = {93--118},
  publisher = {{Informa UK Limited}},
  issn = {1931-2466}
}

@incollection{mall_four_2020,
  title = {Four {{Types}} of {{Toxic People}}: {{Characterizing Online Users}}\&\#x2019; {{Toxicity}} over {{Time}}},
  shorttitle = {Four {{Types}} of {{Toxic People}}},
  booktitle = {Proceedings of the 11th {{Nordic Conference}} on {{Human-Computer Interaction}}: {{Shaping Experiences}}, {{Shaping Society}}},
  author = {Mall, Raghvendra and Nagpal, Mridul and Salminen, Joni and Almerekhi, Hind and Jung, Soon-Gyo and Jansen, Bernard J.},
  date = {2020-10-25},
  number = {37},
  pages = {1--11},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  abstract = {Identifying types of online users’ toxic behavior reveals important insights from social media interactions, including whether a user becomes “radicalized” (more toxic) or “pacified” (less toxic) over time. In this research, we design two metrics to identify toxic user types: F score that captures the changes in a user’s toxicity, and G score that captures the direction of the shift taking place in the user’s toxicity pattern. We apply these metrics to a dataset of 4M user comments from Reddit by defining four toxic user types based on the toxicity scores of a user’s comments: (a) Steady Users whose toxicity scores are steady over time, (b) Fickle-Minded Users that switch between toxic and non-toxic commenting, (c) Pacified Users whose commenting becomes less toxic in time, and (d) Radicalized Users that become gradually toxic. Findings from the Reddit dataset indicate that fickle-minded users form the largest group (31.2\%), followed by pacified (25.8\%), radicalized (25.4\%), and steadily toxic users (17.6\%). The results suggest that the most typical behavior type of toxicity is switching between toxic and non-toxic commenting. This research has implications for preserving the user-friendliness of online communities by identifying continuously toxic users and users in danger of becoming radicalized (in terms of their toxic behavior), and designing interventions to mitigate these behavior types. Using the metrics we have defined, identifying these user types becomes possible. More research is needed to understand why these patterns take place and how they could be mitigated.},
  isbn = {978-1-4503-7579-5},
  keywords = {online toxicity,Reddit,social media behavior,user analysis},
  file = {/home/nathante/Zotero/storage/27PC9H72/Mall et al_2020_Four Types of Toxic People.pdf}
}

@article{malloch_estimation_2021,
  title = {Estimation with {{Errors}} in {{Variables}} via the {{Characteristic Function}}*},
  author = {Malloch, H and Philip, R and Satchell, S},
  date = {2021-10-18},
  journaltitle = {Journal of Financial Econometrics},
  shortjournal = {Journal of Financial Econometrics},
  pages = {nbab011},
  issn = {1479-8409},
  abstract = {Errors in variables in linear regression continue to be a significant empirical issue in financial econometrics. We propose using the characteristic function (CF) to obtain estimates for linear models with errors in the variables. By assuming that the explanatory variable follows a flexible double gamma distribution, we obtain closed-form expressions for the analytic CF of the data generating process. We show that our method performs well relative to existing techniques that address error-in-variables (EIVs) through simulations. We further extend our CF technique to a multivariate setting where it continues to produce accurate estimates. We illustrate the performance of our procedure by estimating the capital asset pricing model and a two-factor model.},
  file = {/home/nathante/Zotero/storage/FFQ9NE3N/6400037.html}
}

@article{mangold_metrics_2016,
  title = {Metrics of {{News Audience Polarization}}: {{Same}} or {{Different}}?},
  author = {Mangold, Frank and Scharkow, Michael},
  date = {2016},
  pages = {26},
  abstract = {Although media and communication scholars have suggested various ana lytical methods for measuring and comparing news audience polarization across countries, we lack a systematic assessment of the metrics produced by these techniques. Using survey data from the 2016 Reuters Institute Digital News Report on news use in 26 countries, we address this gap through a resampling simulation experiment. Our simulation revealed a strong impact of analytical choices, which invited disparate interpretations in terms of how polarized news audiences are, how strongly audience polariza tion structurally varies between news environments, and how news audience polarization is distributed cross-nationally. Alternative choices led to pro found differences in the compatibility, consistency, and validity of the empiri cal news audience polarization estimates. We conclude from these results that a more precise methodological understanding of news audience polar ization metrics informs our capability to draw meaningful inferences from empirical work.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/C33ZMIUK/Mangold und Scharkow - 2016 - Metrics of News Audience Polarization Same or Dif.pdf}
}

@article{mcnamara_not_2022,
  title = {Not Just “Big” Data: {{Importance}} of Sample Size, Measurement Error, and Uninformative Predictors for Developing Prognostic Models for Digital Interventions},
  shorttitle = {Not Just “Big” Data},
  author = {McNamara, Mary E. and Zisser, Mackenzie and Beevers, Christopher G. and Shumake, Jason},
  date = {2022-06},
  journaltitle = {Behaviour Research and Therapy},
  shortjournal = {Behaviour Research and Therapy},
  volume = {153},
  pages = {104086},
  issn = {00057967},
  abstract = {There is strong interest in developing a more efficient mental health care system. Digital interventions and predictive models of treatment prognosis will likely play an important role in this endeavor. This article reviews the application of popular machine learning models to the prediction of treatment prognosis, with a particular focus on digital interventions. Assuming that the prediction of treatment prognosis will involve modeling a complex combination of interacting features with measurement error in both the predictors and outcomes, our simulations suggest that to optimize complex prediction models, sample sizes in the thousands will be required. Machine learning methods capable of discovering complex interactions and nonlinear effects (e.g., decision tree ensembles such as gradient boosted machines) perform particularly well in large samples when the predictors and outcomes have virtually no measurement error. However, in the presence of moderate measurement error, these methods provide little or no benefit over regularized linear regression, even with very large sample sizes (N = 100,000) and a non-linear ground truth. Given these sample size requirements, we argue that the scalability of digital interventions, especially when used in combination with optimal measurement practices, provides one of the most effective ways to study treatment prediction models. We conclude with suggestions about how to implement these algorithms into clinical practice.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/VF7RGRD4/McNamara et al. - 2022 - Not just “big” data Importance of sample size, me.pdf}
}

@article{merkley_are_2020,
  title = {Are {{Experts}} ({{News}}){{Worthy}}? {{Balance}}, {{Conflict}}, and {{Mass Media Coverage}} of {{Expert Consensus}}},
  shorttitle = {Are {{Experts}} ({{News}}){{Worthy}}?},
  author = {Merkley, Eric},
  date = {2020-07-03},
  journaltitle = {Political Communication},
  shortjournal = {Political Communication},
  volume = {37},
  number = {4},
  pages = {530--549},
  issn = {1058-4609, 1091-7675},
  langid = {english},
  file = {/home/nathante/Zotero/storage/U8ZU363W/Merkley - 2020 - Are Experts (News)Worthy Balance, Conflict, and M.pdf}
}

@article{millimet_accounting_2022,
  title = {Accounting for {{Skewed}} or {{One-Sided Measurement Error}} in the {{Dependent Variable}}},
  author = {Millimet, Daniel L. and Parmeter, Christopher F.},
  date = {2022-01},
  journaltitle = {Political Analysis},
  shortjournal = {Polit. Anal.},
  volume = {30},
  number = {1},
  pages = {66--88},
  issn = {1047-1987, 1476-4989},
  abstract = {While classical measurement error in the dependent variable in a linear regression framework results only in a loss of precision, nonclassical measurement error can lead to estimates, which are biased and inference which lacks power. Here, we consider a particular type of nonclassical measurement error: skewed errors. Unfortunately, skewed measurement error is likely to be a relatively common feature of many outcomes of interest in political science research. This study highlights the bias that can result even from relatively “small” amounts of skewed measurement error, particularly, if the measurement error is heteroskedastic. We also assess potential solutions to this problem, focusing on the stochastic frontier model and Nonlinear Least Squares. Simulations and three replications highlight the importance of thinking carefully about skewed measurement error as well as appropriate solutions.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/2W869JT8/Millimet und Parmeter - 2022 - Accounting for Skewed or One-Sided Measurement Err.pdf}
}

@inproceedings{mitchell_model_2019,
  title = {Model {{Cards}} for {{Model Reporting}}},
  booktitle = {Proceedings of the {{Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}},
  author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
  date = {2019-01-29},
  pages = {220--229},
  publisher = {{ACM}},
  location = {{Atlanta GA USA}},
  eventtitle = {{{FAT}}* '19: {{Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}},
  isbn = {978-1-4503-6125-5},
  langid = {english},
  file = {/home/nathante/Zotero/storage/ZHZ9CP8M/Mitchell et al. - 2019 - Model Cards for Model Reporting.pdf}
}

@inproceedings{mitchell_model_2019-1,
  title = {Model {{Cards}} for {{Model Reporting}}},
  booktitle = {Proceedings of the {{Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}},
  author = {Mitchell, Margaret and Wu, Simone and Zaldivar, Andrew and Barnes, Parker and Vasserman, Lucy and Hutchinson, Ben and Spitzer, Elena and Raji, Inioluwa Deborah and Gebru, Timnit},
  date = {2019-01-29},
  series = {{{FAT}}* '19},
  pages = {220--229},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  abstract = {Trained machine learning models are increasingly used to perform high-impact tasks in areas such as law enforcement, medicine, education, and employment. In order to clarify the intended use cases of machine learning models and minimize their usage in contexts for which they are not well suited, we recommend that released models be accompanied by documentation detailing their performance characteristics. In this paper, we propose a framework that we call model cards, to encourage such transparent model reporting. Model cards are short documents accompanying trained machine learning models that provide benchmarked evaluation in a variety of conditions, such as across different cultural, demographic, or phenotypic groups (e.g., race, geographic location, sex, Fitzpatrick skin type [15]) and intersectional groups (e.g., age and race, or sex and Fitzpatrick skin type) that are relevant to the intended application domains. Model cards also disclose the context in which models are intended to be used, details of the performance evaluation procedures, and other relevant information. While we focus primarily on human-centered machine learning models in the application fields of computer vision and natural language processing, this framework can be used to document any trained machine learning model. To solidify the concept, we provide cards for two supervised models: One trained to detect smiling faces in images, and one trained to detect toxic comments in text. We propose model cards as a step towards the responsible democratization of machine learning and related artificial intelligence technology, increasing transparency into how well artificial intelligence technology works. We hope this work encourages those releasing trained machine learning models to accompany model releases with similar detailed evaluation numbers and other relevant documentation.},
  isbn = {978-1-4503-6125-5},
  keywords = {datasheets,disaggregated evaluation,documentation,ethical considerations,fairness evaluation,ML model evaluation,model cards},
  file = {/home/nathante/Zotero/storage/4T2GRQ6M/Mitchell et al_2019_Model Cards for Model Reporting.pdf}
}

@article{mittos_and_2020,
  title = {“{{And We Will Fight}} for {{Our Race}}!” {{A Measurement Study}} of {{Genetic Testing Conversations}} on {{Reddit}} and 4chan},
  author = {Mittos, Alexandros and Zannettou, Savvas and Blackburn, Jeremy and Cristofaro, Emiliano De},
  date = {2020-05-26},
  journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media},
  volume = {14},
  pages = {452--463},
  issn = {2334-0770},
  abstract = {Progress in genomics has enabled the emergence of a booming market for “direct-to-consumer” genetic testing. Nowadays, companies like 23andMe and AncestryDNA provide affordable health, genealogy, and ancestry reports, and have already tested tens of millions of customers. At the same time, alt- and far-right groups have also taken an interest in genetic testing, using them to attack minorities and prove their genetic “purity.” In this paper, we present a measurement study shedding light on how genetic testing is being discussed on Web communities in Reddit and 4chan. We collect 1.3M comments posted over 27 months on the two platforms, using a set of 280 keywords related to genetic testing. We then use NLP and computer vision tools to identify trends, themes, and topics of discussion. Our analysis shows that genetic testing attracts a lot of attention on Reddit and 4chan, with discussions often including highly toxic language expressed through hateful, racist, and misogynistic comments. In particular, on 4chan's politically incorrect board (/pol/), content from genetic testing conversations involves several alt-right personalities and openly antisemitic rhetoric, often conveyed through memes. Finally, we find that discussions build around user groups, from technology enthusiasts to communities promoting fringe political views.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/MJ4ZR4IJ/Mittos et al_2020_“And We Will Fight for Our Race.pdf;/home/nathante/Zotero/storage/XRRGGABM/Fong & Tyler (2020).pdf}
}

@book{mooney_monte_1997,
  title = {Monte {{Carlo}} Simulation},
  author = {Mooney, Christopher Z.},
  date = {1997},
  series = {Monte {{Carlo}} Simulation},
  pages = {viii, 103},
  publisher = {{Sage Publications, Inc}},
  location = {{Thousand Oaks, CA, US}},
  abstract = {The statistics of classical parametric inference inform us about how the world works to the extent necessary assumptions are met. When certain regression assumptions are violated, or are under suspicion of violation, Monte Carlo simulation can be a way out. For example, it allows exploration of parameter estimation granting a variety of distributions—uniform, Pareto, exponential, normal, lognormal, chi-square, Student's t, mixture or beta. Monte Carlo simulation can be used to compare estimator properties from multiequation systems, for example, 2-stage vs 3-stage estimators. Furthermore, it promises considerable payoff in the study of valuable statistics that are simply calculated but about which little is known inferentially, for example, the median or the absolute average deviation. The logic of Monte Carlo simulation is presented, a population of interest is simulated, and how to prepare the computer algorithm is explained. (PsycINFO Database Record (c) 2016 APA, all rights reserved)},
  isbn = {978-0-8039-5943-9},
  pagetotal = {viii, 103},
  keywords = {Social Sciences,Statistical Analysis},
  file = {/home/nathante/Zotero/storage/N7PS594X/1997-08836-000.html}
}

@article{muddiman_reclaiming_2019,
  ids = {muddiman_reclaiming_2019-1},
  title = {({{Re}}){{Claiming Our Expertise}}: {{Parsing Large Text Corpora With Manually Validated}} and {{Organic Dictionaries}}},
  shorttitle = {({{Re}}){{Claiming Our Expertise}}},
  author = {Muddiman, Ashley and McGregor, Shannon C. and Stroud, Natalie Jomini},
  date = {2019-04-03},
  journaltitle = {Political Communication},
  volume = {36},
  number = {2},
  pages = {214--226},
  publisher = {{Routledge}},
  issn = {1058-4609},
  abstract = {Content analysis of large-scale textual data sets poses myriad problems, particularly when researchers seek to analyze content that is both theoretically derived and context dependent. In this piece, we detail the approach we developed to tackle the analysis of the context-dependent content of political incivility. After describing our manually validated organic dictionaries approach, we compare the method to others we could have used and then replicate the method in a different—but still context-dependent—project examining political issue content on social media. We conclude by summarizing the strengths and weaknesses of the approach and offering suggestions for future research that can refine and expand the method.},
  keywords = {computer-aided content analysis,incivility,news comments,news issues,Twitter},
  file = {/home/nathante/Zotero/storage/MKDWDL4K/Muddiman et al_2019_(Re)Claiming Our Expertise.pdf}
}

@article{mueller_twitter_2020,
  title = {Twitter Made Me Do It! {{Twitter}}'s Tonal Platform Incentive and Its Effect on Online Campaigning},
  author = {Mueller, Samuel David and Saeltzer, Marius},
  date = {2020-12-12},
  journaltitle = {Information, Communication \& Society},
  shortjournal = {Information, Communication \& Society},
  pages = {1--26},
  issn = {1369-118X, 1468-4462},
  langid = {english},
  file = {/home/nathante/Zotero/storage/XKGW5R5V/Mueller und Saeltzer - 2020 - Twitter made me do it! Twitter's tonal platform in.pdf}
}

@article{nab_mecor_2021,
  title = {Mecor: {{An R}} Package for Measurement Error Correction in Linear Regression Models with a Continuous Outcome},
  shorttitle = {Mecor},
  author = {Nab, Linda and family=Smeden, given=Maarten, prefix=van, useprefix=true and Keogh, Ruth H. and Groenwold, Rolf H. H.},
  date = {2021-09-01},
  journaltitle = {Computer Methods and Programs in Biomedicine},
  shortjournal = {Computer Methods and Programs in Biomedicine},
  volume = {208},
  pages = {106238},
  issn = {0169-2607},
  abstract = {Measurement error in a covariate or the outcome of regression models is common, but is often ignored, even though measurement error can lead to substantial bias in the estimated covariate-outcome association. While several texts on measurement error correction methods are available, these methods remain seldomly applied. To improve the use of measurement error correction methodology, we developed mecor, an R package that implements measurement error correction methods for regression models with a continuous outcome. Measurement error correction requires information about the measurement error model and its parameters. This information can be obtained from four types of studies, used to estimate the parameters of the measurement error model: an internal validation study, a replicates study, a calibration study and an external validation study. In the package mecor, regression calibration methods and a maximum likelihood method are implemented to correct for measurement error in a continuous covariate in regression analyses. Additionally, methods of moments methods are implemented to correct for measurement error in the continuous outcome in regression analyses. Variance estimation of the corrected estimators is provided in closed form and using the bootstrap.},
  langid = {english},
  keywords = {Maximum likelihood,Measurement error correction,Method of moments,Regression calibration},
  file = {/home/nathante/Zotero/storage/P95Z6A7N/Nab et al_2021_Mecor.pdf}
}

@article{nab_quantitative_2020,
  title = {Quantitative {{Bias Analysis}} for a {{Misclassified Confounder}}: {{A Comparison Between Marginal Structural Models}} and {{Conditional Models}} for {{Point Treatments}}},
  shorttitle = {Quantitative {{Bias Analysis}} for a {{Misclassified Confounder}}},
  author = {Nab, Linda and Groenwold, Rolf H. H. and family=Smeden, given=Maarten, prefix=van, useprefix=true and Keogh, Ruth H.},
  date = {2020-11},
  journaltitle = {Epidemiology},
  volume = {31},
  number = {6},
  pages = {796--805},
  issn = {1044-3983},
  abstract = {Observational data are increasingly used with the aim of estimating causal effects of treatments, through careful control for confounding. Marginal structural models estimated using inverse probability weighting (MSMs-IPW), like other methods to control for confounding, assume that confounding variables are measured without error. The average treatment effect in an MSM-IPW may however be biased when a confounding variable is error prone. Using the potential outcome framework, we derive expressions for the bias due to confounder misclassification in analyses that aim to estimate the average treatment effect using an marginal structural model estimated using inverse probability weighting (MSM-IPW). We compare this bias with the bias due to confounder misclassification in analyses based on a conditional regression model. Focus is on a point-treatment study with a continuous outcome. Compared with bias in the average treatment effect in a conditional model, the bias in an MSM-IPW can be different in magnitude but is equal in sign. Also, we use a simulation study to investigate the finite sample performance of MSM-IPW and conditional models when a confounding variable is misclassified. Simulation results indicate that confidence intervals of the treatment effect obtained from MSM-IPW are generally wider, and coverage of the true treatment effect is higher compared with a conditional model, ranging from overcoverage if there is no confounder misclassification to undercoverage when there is confounder misclassification. Further, we illustrate in a study of blood pressure-lowering therapy, how the bias expressions can be used to inform a quantitative bias analysis to study the impact of confounder misclassification, supported by an online tool.},
  langid = {american},
  file = {/home/nathante/Zotero/storage/TIKY8Z49/Nab et al_2020_Quantitative Bias Analysis for a Misclassified Confounder.pdf;/home/nathante/Zotero/storage/YPZQ4NGF/Quantitative_Bias_Analysis_for_a_Misclassified.7.html}
}

@online{nicholls_deep_nodate,
  title = {Deep Learning Models for Multilingual Supervised Political Text Classification},
  author = {Nicholls, Thomas and Culpepper, Pepper D},
  pubstate = {preprint}
}

@online{noauthor_jigsaw_nodate,
  title = {Jigsaw {{Unintended Bias}} in {{Toxicity Classification}}},
  abstract = {Detect toxicity across a diverse range of conversations},
  langid = {english},
  file = {/home/nathante/Zotero/storage/7A9N58UN/data.html}
}

@online{noauthor_place_nodate,
  title = {Place {{Your Order}} - {{Amazon}}.Com {{Checkout}}}
}

@book{noble_algorithms_2018,
  title = {Algorithms of {{Oppression}}: {{How Search Engines Reinforce Racism}}},
  shorttitle = {Algorithms of {{Oppression}}},
  author = {Noble, Safiya Umoja},
  date = {2018-02-20},
  edition = {Illustrated edition},
  publisher = {{NYU Press}},
  location = {{New York}},
  isbn = {978-1-4798-3724-3},
  langid = {english},
  pagetotal = {248}
}

@article{obermeyer_dissecting_2019,
  title = {Dissecting Racial Bias in an Algorithm Used to Manage the Health of Populations},
  author = {Obermeyer, Ziad and Powers, Brian and Vogeli, Christine and Mullainathan, Sendhil},
  date = {2019-10-25},
  journaltitle = {Science},
  volume = {366},
  number = {6464},
  eprint = {31649194},
  eprinttype = {pmid},
  pages = {447--453},
  issn = {0036-8075, 1095-9203},
  abstract = {Racial bias in health algorithms The U.S. health care system uses commercial algorithms to guide health decisions. Obermeyer et al. find evidence of racial bias in one widely used algorithm, such that Black patients assigned the same level of risk by the algorithm are sicker than White patients (see the Perspective by Benjamin). The authors estimated that this racial bias reduces the number of Black patients identified for extra care by more than half. Bias occurs because the algorithm uses health costs as a proxy for health needs. Less money is spent on Black patients who have the same level of need, and the algorithm thus falsely concludes that Black patients are healthier than equally sick White patients. Reformulating the algorithm so that it no longer uses costs as a proxy for needs eliminates the racial bias in predicting who needs extra care. Science, this issue p. 447; see also p. 421 Health systems rely on commercial prediction algorithms to identify and help patients with complex health needs. We show that a widely used algorithm, typical of this industry-wide approach and affecting millions of patients, exhibits significant racial bias: At a given risk score, Black patients are considerably sicker than White patients, as evidenced by signs of uncontrolled illnesses. Remedying this disparity would increase the percentage of Black patients receiving additional help from 17.7 to 46.5\%. The bias arises because the algorithm predicts health care costs rather than illness, but unequal access to care means that we spend less money caring for Black patients than for White patients. Thus, despite health care cost appearing to be an effective proxy for health by some measures of predictive accuracy, large racial biases arise. We suggest that the choice of convenient, seemingly effective proxies for ground truth can be an important source of algorithmic bias in many contexts. A health algorithm that uses health costs as a proxy for health needs leads to racial bias against Black patients. A health algorithm that uses health costs as a proxy for health needs leads to racial bias against Black patients.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/HWSVCC4M/447.html}
}

@incollection{oehmer-pedrazzi_automated_2023,
  title = {Automated Content Analysis},
  booktitle = {Standardisierte Inhaltsanalyse in der Kommunikationswissenschaft – Standardized Content Analysis in Communication Research},
  author = {Hase, Valerie},
  editor = {Oehmer-Pedrazzi, Franziska and Kessler, Sabrina Heike and Humprecht, Edda and Sommer, Katharina and Castro, Laia},
  date = {2023},
  pages = {23--36},
  publisher = {{Springer Fachmedien Wiesbaden}},
  location = {{Wiesbaden}},
  abstract = {Abstract             Due to the rise in processing power, advancements in machine learning, and the availability of large text corpora online, the use of computational methods including automated content analysis has rapidly increased. Automated content analysis is applied and developed across disciplines such as computer science, linguistics, political science, economics and – increasingly – communication science. This chapter offers a theoretical and applied introduction to the method, including promises and pitfalls associated with the method.},
  isbn = {978-3-658-36178-5 978-3-658-36179-2},
  langid = {ngerman},
  file = {/home/nathante/Zotero/storage/EYMUDQCL/Hase - 2023 - Automated Content Analysis.pdf}
}

@article{opperhuizen_framing_2019,
  ids = {opperhuizen_framing_2019-1},
  title = {Framing a {{Conflict}}! {{How Media Report}} on {{Earthquake Risks Caused}} by {{Gas Drilling}}: {{A Longitudinal Analysis Using Machine Learning Techniques}} of {{Media Reporting}} on {{Gas Drilling}} from 1990 to 2015},
  shorttitle = {Framing a {{Conflict}}! {{How Media Report}} on {{Earthquake Risks Caused}} by {{Gas Drilling}}},
  author = {Opperhuizen, Alette Eva and Schouten, Kim and Klijn, Erik Hans},
  date = {2019-04-04},
  journaltitle = {Journalism Studies},
  shortjournal = {Journalism Studies},
  volume = {20},
  number = {5},
  pages = {714--734},
  issn = {1461-670X, 1469-9699},
  langid = {english},
  file = {/home/nathante/Zotero/storage/DJMSRL6B/Opperhuizen et al. - 2019 - Framing a Conflict! How Media Report on Earthquake.pdf}
}

@article{papasavva_is_2020,
  title = {"{{Is}} It a {{Qoincidence}}?": {{A First Step Towards Understanding}} and {{Characterizing}} the {{QAnon Movement}} on {{Voat}}.Co},
  shorttitle = {"{{Is}} It a {{Qoincidence}}?},
  author = {Papasavva, Antonis and Blackburn, Jeremy and Stringhini, Gianluca and Zannettou, Savvas and De Cristofaro, Emiliano},
  date = {2020},
  langid = {english},
  keywords = {Computer Science,Computers and Society,cs.CY},
  file = {/home/nathante/Zotero/storage/U8M5WSNH/Papasavva et al_2020_Is it a Qoincidence.pdf}
}

@article{papasavva_raiders_2020,
  title = {Raiders of the {{Lost Kek}}: 3.5 {{Years}} of {{Augmented}} 4chan {{Posts}} from the {{Politically Incorrect Board}}},
  shorttitle = {Raiders of the {{Lost Kek}}},
  author = {Papasavva, Antonis and Zannettou, Savvas and Cristofaro, Emiliano De and Stringhini, Gianluca and Blackburn, Jeremy},
  date = {2020-05-26},
  journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media},
  volume = {14},
  pages = {885--894},
  issn = {2334-0770},
  abstract = {This paper presents a dataset with over 3.3M threads and 134.5M posts from the Politically Incorrect board (/pol/) of the imageboard forum 4chan, posted over a period of almost 3.5 years (June 2016-November 2019). To the best of our knowledge, this represents the largest publicly available 4chan dataset, providing the community with an archive of posts that have been permanently deleted from 4chan and are otherwise inaccessible. We augment the data with a set of additional labels, including toxicity scores and the named entities mentioned in each post. We also present a statistical analysis of the dataset, providing an overview of what researchers interested in using it can expect, as well as a simple content analysis, shedding light on the most prominent discussion topics, the most popular entities mentioned, and the toxicity level of each post. Overall, we are confident that our work will motivate and assist researchers in studying and understanding 4chan, as well as its role on the greater Web. For instance, we hope this dataset may be used for cross-platform studies of social media, as well as being useful for other types of research like natural language processing. Finally, our dataset can assist qualitative work focusing on in-depth case studies of specific narratives, events, or social theories.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/VI53KWD3/Papasavva et al_2020_Raiders of the Lost Kek.pdf}
}

@article{pearl_fusion_1986,
  title = {Fusion, Propagation, and Structuring in Belief Networks},
  author = {Pearl, Judea},
  date = {1986-09-01},
  journaltitle = {Artificial Intelligence},
  shortjournal = {Artificial Intelligence},
  volume = {29},
  number = {3},
  pages = {241--288},
  issn = {0004-3702},
  abstract = {Belief networks are directed acyclic graphs in which the nodes represent propositions (or variables), the arcs signify direct dependencies between the linked propositions, and the strengths of these dependencies are quantified by conditional probabilities. A network of this sort can be used to represent the generic knowledge of a domain expert, and it turns into a computational architecture if the links are used not merely for storing factual knowledge but also for directing and activating the data flow in the computations which manipulate this knowledge. The first part of the paper deals with the task of fusing and propagating the impacts of new information through the networks in such a way that, when equilibrium is reached, each proposition will be assigned a measure of belief consistent with the axioms of probability theory. It is shown that if the network is singly connected (e.g. tree-structured), then probabilities can be updated by local propagation in an isomorphic network of parallel and autonomous processors and that the impact of new information can be imparted to all propositions in time proportional to the longest path in the network. The second part of the paper deals with the problem of finding a tree-structured representation for a collection of probabilistically coupled propositions using auxiliary (dummy) variables, colloquially called “hidden causes.” It is shown that if such a tree-structured representation exists, then it is possible to uniquely uncover the topology of the tree by observing pairwise dependencies among the available propositions (i.e., the leaves of the tree). The entire tree structure, including the strengths of all internal relationships, can be reconstructed in time proportional to n log n, where n is the number of leaves.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/RQ4HHQDE/Pearl_1986_Fusion, propagation, and structuring in belief networks.pdf;/home/nathante/Zotero/storage/TJGHM5D8/000437028690072X.html}
}

@article{pepe_insights_2007,
  title = {Insights into Latent Class Analysis of Diagnostic Test Performance},
  author = {Pepe, Margaret Sullivan and Janes, Holly},
  date = {2007-04-01},
  journaltitle = {Biostatistics},
  shortjournal = {Biostatistics},
  volume = {8},
  number = {2},
  pages = {474--484},
  issn = {1465-4644},
  abstract = {Latent class analysis is used to assess diagnostic test accuracy when a gold standard assessment of disease is not available but results of multiple imperfect tests are. We consider the simplest setting, where 3 tests are observed and conditional independence (CI) is assumed. Closed-form expressions for maximum likelihood parameter estimates are derived. They show explicitly how observed 2- and 3-way associations between test results are used to infer disease prevalence and test true- and false-positive rates. Although interesting and reasonable under CI, the estimators clearly have no basis when it fails. Intuition for bias induced by conditional dependence follows from the analytic expressions. Further intuition derives from an Expectation Maximization (EM) approach to calculating the estimates. We discuss implications of our results and related work for settings where more than 3 tests are available. We conclude that careful justification of assumptions about the dependence between tests in diseased and nondiseased subjects is necessary in order to ensure unbiased estimates of prevalence and test operating characteristics and to provide these estimates clinical interpretations. Such justification must be based in part on a clear clinical definition of disease and biological knowledge about mechanisms giving rise to test results.},
  file = {/home/nathante/Zotero/storage/MI5DX4GP/Pepe_Janes_2007_Insights into latent class analysis of diagnostic test performance.pdf;/home/nathante/Zotero/storage/4HJEMBH2/232752.html}
}

@article{pilny_using_2019,
  ids = {pilny_using_2019-1},
  title = {Using {{Supervised Machine Learning}} in {{Automated Content Analysis}}: {{An Example Using Relational Uncertainty}}},
  shorttitle = {Using {{Supervised Machine Learning}} in {{Automated Content Analysis}}},
  author = {Pilny, Andrew and McAninch, Kelly and Slone, Amanda and Moore, Kelsey},
  date = {2019-10-02},
  journaltitle = {Communication Methods and Measures},
  volume = {13},
  number = {4},
  pages = {287--304},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {The goal of this research is to make progress towards using supervised machine learning for automated content analysis dealing with complex interpretations of text. For Step 1, two humans coded a sub-sample of online forum posts for relational uncertainty. For Step 2, we evaluated reliability, in which we trained three different classifiers to learn from those subjective human interpretations. Reliability was established when two different metrics of inter-coder reliability could not distinguish whether a human or a machine coded the text on a separate hold-out set. Finally, in Step 3 we assessed validity. To accomplish this, we administered a survey in which participants described their own relational uncertainty/certainty via text and completed a questionnaire. After classifying the text, the machine’s classifications of the participants’ text positively correlated with the subjects’ own self-reported relational uncertainty and relational satisfaction. We discuss our results in line with areas of computational communication science, content analysis, and interpersonal communication.},
  file = {/home/nathante/Zotero/storage/6W4S82UP/Pilny et al_2019_Using Supervised Machine Learning in Automated Content Analysis.pdf;/home/nathante/Zotero/storage/VZHKQWIE/19312458.2019.html}
}

@article{pipal_if_2022,
  title = {If {{You Have Choices}}, {{Why Not Choose}} (and {{Share}}) {{All}} of {{Them}}? {{A Multiverse Approach}} to {{Understanding News Engagement}} on {{Social Media}}},
  shorttitle = {If {{You Have Choices}}, {{Why Not Choose}} (and {{Share}}) {{All}} of {{Them}}?},
  author = {Pipal, Christian and Song, Hyunjin and Boomgaarden, Hajo G.},
  date = {2022-03-02},
  journaltitle = {Digital Journalism},
  shortjournal = {Digital Journalism},
  pages = {1--21},
  issn = {2167-0811, 2167-082X},
  langid = {english}
}

@article{rajadesingan_quick_2020,
  title = {Quick, {{Community-Specific Learning}}: {{How Distinctive Toxicity Norms Are Maintained}} in {{Political Subreddits}}},
  shorttitle = {Quick, {{Community-Specific Learning}}},
  author = {Rajadesingan, Ashwin and Resnick, Paul and Budak, Ceren},
  date = {2020-05-26},
  journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media},
  volume = {14},
  pages = {557--568},
  issn = {2334-0770},
  abstract = {Online communities about similar topics may maintain very different norms of interaction. Past research identifies many processes that contribute to maintaining stable norms, including self-selection, pre-entry learning, post-entry learning, and retention. We analyzed political subreddits that had distinctive, stable levels of toxic comments on Reddit, in order to identify the relative contribution of these four processes. Surprisingly, we find that the largest source of norm stability is pre-entry learning. That is, newcomers' first comments in these distinctive subreddits differ from those same people's prior behavior in other subreddits. Through this adjustment, they nearly match the toxicity level of the subreddit they are joining. We also show that behavior adjustments are community-specific and not broadly transformative. That is, people continue to post toxic comments at their previous rates in other political subreddits. Thus, we conclude that in political subreddits, compatible newcomers are neither born nor made– they make local adjustments on their own.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/CTW2MSVA/Rajadesingan et al_2020_Quick, Community-Specific Learning.pdf}
}

@report{ralund_measurement_2022,
  type = {preprint},
  title = {Measurement Error and Model Instability in Automated Text Analysis: The Case of Topic Models},
  shorttitle = {Measurement Error and Model Instability in Automated Text Analysis},
  author = {Ralund, Snorre and Carlsen, Hjalmar Bang and Klemmensen, Robert and Lassen, David Dreyer},
  date = {2022-09-29},
  institution = {{SocArXiv}},
  abstract = {Topic models (TMs) have become the de facto standard for automated content analysis in the social sciences. Some problematic aspects of the models, however, have been recently identified. These mainly concern the high variability of solutions that is the result of both preprocessing and non-deterministic inference in high-dimensional and very large solution spaces. Whether current model selection and validation practices are sufficient to ensure precise and unbiased measurement has been the subject of debate. This paper argues that the prevalent practices of model selection and validation for ensuring unbiased measurement are indeed insufficient. This paper focus is on the use of summarization techniques during labeling, indirect validation techniques, and the employment of heuristics for model selection. Our results document non-trivial biases and show that current model validation techniques lead to arbitrary research outcomes. The study concludes by providing recommendations on ways to validate work based on topic models.}
}

@article{rauchfleisch_false_2020,
  title = {The {{False}} Positive Problem of Automatic Bot Detection in Social Science Research},
  author = {Rauchfleisch, Adrian and Kaiser, Jonas},
  date = {2020-10-22},
  journaltitle = {PLOS ONE},
  shortjournal = {PLOS ONE},
  volume = {15},
  number = {10},
  pages = {e0241045},
  publisher = {{Public Library of Science}},
  issn = {1932-6203},
  abstract = {The identification of bots is an important and complicated task. The bot classifier "Botometer" was successfully introduced as a way to estimate the number of bots in a given list of accounts and, as a consequence, has been frequently used in academic publications. Given its relevance for academic research and our understanding of the presence of automated accounts in any given Twitter discourse, we are interested in Botometer’s diagnostic ability over time. To do so, we collected the Botometer scores for five datasets (three verified as bots, two verified as human; n = 4,134) in two languages (English/German) over three months. We show that the Botometer scores are imprecise when it comes to estimating bots; especially in a different language. We further show in an analysis of Botometer scores over time that Botometer's thresholds, even when used very conservatively, are prone to variance, which, in turn, will lead to false negatives (i.e., bots being classified as humans) and false positives (i.e., humans being classified as bots). This has immediate consequences for academic research as most studies in social science using the tool will unknowingly count a high number of human users as bots and vice versa. We conclude our study with a discussion about how computational social scientists should evaluate machine learning systems that are developed for identifying bots.},
  langid = {english},
  keywords = {Automation,Machine learning,Scientists,Social communication,Social media,Social research,Social sciences,Twitter},
  file = {/home/nathante/Zotero/storage/CSEHIDQE/Rauchfleisch_Kaiser_2020_The False positive problem of automatic bot detection in social science research.pdf;/home/nathante/Zotero/storage/37AK3T2Q/article.html}
}

@unpublished{reiss_reporting_2022,
  title = {Reporting {{Supervised Text Analysis}} for {{Communication Science}}},
  author = {Reiss, Michael and Kobilke, Lara and Stoll, Anke},
  date = {2022-06-10},
  venue = {{Annual Conference of the Methods Section of the German Communication Section, Munich}}
}

@article{rettberg_algorithmic_2022-1,
  title = {Algorithmic Failure as a Humanities Methodology: {{Machine}} Learning's Mispredictions Identify Rich Cases for Qualitative Analysis},
  shorttitle = {Algorithmic Failure as a Humanities Methodology},
  author = {Rettberg, Jill Walker},
  date = {2022-07},
  journaltitle = {Big Data \& Society},
  shortjournal = {Big Data \& Society},
  volume = {9},
  number = {2},
  pages = {205395172211312},
  issn = {2053-9517, 2053-9517},
  abstract = {This commentary tests a methodology proposed by Munk et al. (2022) for using failed predictions in machine learning as a method to identify ambiguous and rich cases for qualitative analysis. Using a dataset describing actions performed by fictional characters interacting with machine vision technologies in 500 artworks, movies, novels and videogames, I trained a simple machine learning algorithm (using the kNN algorithm in R) to predict whether or not an action was active or passive using only information about the fictional characters. Predictable actions were generally unemotional and unambiguous activities where machine vision technologies were treated as simple tools. Unpredictable actions, that is, actions that the algorithm could not correctly predict, were more ambivalent and emotionally loaded, with more complex power relationships between characters and technologies. The results thus support Munk et al.'s theory that failed predictions can be productively used to identify rich cases for qualitative analysis. This test goes beyond simply replicating Munk et al.'s results by demonstrating that the method can be applied to a broader humanities domain, and that it does not require complex neural networks but can also work with a simpler machine learning algorithm. Further research is needed to develop an understanding of what kinds of data the method is useful for and which kinds of machine learning are most generative. To support this, the R code required to produce the results is included so the test can be replicated. The code can also be reused or adapted to test the method on other datasets.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/MPCYENAB/Rettberg - 2022 - Algorithmic failure as a humanities methodology M.pdf}
}

@article{rice_machine_2021,
  title = {Machine Coding of Policy Texts with the {{Institutional Grammar}}},
  author = {Rice, Douglas and Siddiki, Saba and Frey, Seth and Kwon, Jay H. and Sawyer, Adam},
  date = {2021},
  journaltitle = {Public Administration},
  volume = {99},
  number = {2},
  pages = {248--262},
  issn = {1467-9299},
  abstract = {The Institutional Grammar (IG) is used to analyse the syntactic structure of statements constituting institutions (e.g., policies, regulations, and norms) that indicate behavioural constraints and parameterize features of institutionally governed domains. Policy and administration scholars have made considerable progress in methodologically developing the IG, offering increasingly clear guidelines for IG-based coding, identifying unique considerations for applying the IG to different types of institutions, and expanding its syntactic scope. However, while validated as a robust institutional analysis approach, the resource and time commitment associated with its application has precipitated concerns over whether the IG might ever enjoy widespread use. Needed now in the methodological development of the IG are reliable and accessible (i.e., open source) approaches that reduce the costs associated with its application. We propose an automated approach leveraging computational text analysis and natural language processing. We then present results from an evaluation in the context of food system regulations.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/C7ZBPYPY/padm.html}
}

@article{rieder_fabrics_2021,
  title = {The Fabrics of Machine Moderation: {{Studying}} the Technical, Normative, and Organizational Structure of {{Perspective API}}},
  shorttitle = {The Fabrics of Machine Moderation},
  author = {Rieder, Bernhard and Skop, Yarden},
  date = {2021-07-01},
  journaltitle = {Big Data \& Society},
  shortjournal = {Big Data \& Society},
  volume = {8},
  number = {2},
  pages = {20539517211046181},
  publisher = {{SAGE Publications Ltd}},
  issn = {2053-9517},
  abstract = {Over recent years, the stakes and complexity of online content moderation have been steadily raised, swelling from concerns about personal conflict in smaller communities to worries about effects on public life and democracy. Because of the massive growth in online expressions, automated tools based on machine learning are increasingly used to moderate speech. While ‘design-based governance’ through complex algorithmic techniques has come under intense scrutiny, critical research covering algorithmic content moderation is still rare. To add to our understanding of concrete instances of machine moderation, this article examines Perspective API, a system for the automated detection of ‘toxicity’ developed and run by the Google unit Jigsaw that can be used by websites to help moderate their forums and comment sections. The article proceeds in four steps. First, we present our methodological strategy and the empirical materials we were able to draw on, including interviews, documentation, and GitHub repositories. We then summarize our findings along five axes to identify the various threads Perspective API brings together to deliver a working product. The third section discusses two conflicting organizational logics within the project, paying attention to both critique and what can be learned from the specific case at hand. We conclude by arguing that the opposition between ‘human’ and ‘machine’ in speech moderation obscures the many ways these two come together in concrete systems, and suggest that the way forward requires proactive engagement with the design of technologies as well as the institutions they are embedded in.},
  langid = {english},
  keywords = {Algorithmic content moderation,Google Jigsaw,machine learning,moral engineering,Perspective API,platformization},
  file = {/home/nathante/Zotero/storage/XQZZZJU6/Rieder_Skop_2021_The fabrics of machine moderation.pdf}
}

@inproceedings{salminen_online_2018,
  title = {Online {{Hate Interpretation Varies}} by {{Country}}, {{But More}} by {{Individual}}: {{A Statistical Analysis Using Crowdsourced Ratings}}},
  shorttitle = {Online {{Hate Interpretation Varies}} by {{Country}}, {{But More}} by {{Individual}}},
  booktitle = {2018 {{Fifth International Conference}} on {{Social Networks Analysis}}, {{Management}} and {{Security}} ({{SNAMS}})},
  author = {Salminen, Joni and Veronesi, Fabio and Almerekhi, Hind and Jung, Soon-Gvo and Jansen, Bernard J.},
  date = {2018-10},
  pages = {88--94},
  abstract = {Hate is prevalent in online social media. This has resulted in a considerable amount of research in detecting and scoring it. Most computational efforts involve machine learning with crowdsourced ratings as training data. A prominent example of this is the Perspective API., a tool by Google to score toxicity of online comments. However., a major issue in the existing approaches is the lack of consideration for the subjective nature of online hate. While there is research that shows the intensity of hate varies and the hate depends on the context., there is no research that systematically investigates how hate interpretation varies by country or individual. In this exploratory research, we undertake this challenge. We sample crowd workers from 50 countries, have them score the same social media comments for toxicity and then evaluate the differences in the scores., altogether 18.,125 ratings. We find that the interpretation score differences among countries are highly significant. However., the hate interpretations vary more by the individual raters than by countries. These findings suggest that hate scoring systems should consider user-level features when scoring and automating the processing of online hate.},
  eventtitle = {2018 {{Fifth International Conference}} on {{Social Networks Analysis}}, {{Management}} and {{Security}} ({{SNAMS}})},
  keywords = {Dictionaries,Facebook,hateinterpretation,Media,Online hate,Security,social media,Task analysis,YouTube},
  file = {/home/nathante/Zotero/storage/WWS3JFLS/Salminen et al_2018_Online Hate Interpretation Varies by Country, But More by Individual.pdf;/home/nathante/Zotero/storage/7IY8BXP4/8554954.html}
}

@article{salminen_topic-driven_2020,
  title = {Topic-Driven Toxicity: {{Exploring}} the Relationship between Online Toxicity and News Topics},
  shorttitle = {Topic-Driven Toxicity},
  author = {Salminen, Joni and Sengün, Sercan and Corporan, Juan and Jung, Soon-gyo and Jansen, Bernard J.},
  date = {2020-02-21},
  journaltitle = {PLOS ONE},
  shortjournal = {PLOS ONE},
  volume = {15},
  number = {2},
  pages = {e0228723},
  publisher = {{Public Library of Science}},
  issn = {1932-6203},
  abstract = {Hateful commenting, also known as ‘toxicity’, frequently takes place within news stories in social media. Yet, the relationship between toxicity and news topics is poorly understood. To analyze how news topics relate to the toxicity of user comments, we classify topics of 63,886 online news videos of a large news channel using a neural network and topical tags used by journalists to label content. We score 320,246 user comments from those videos for toxicity and compare how the average toxicity of comments varies by topic. Findings show that topics like Racism, Israel-Palestine, and War \& Conflict have more toxicity in the comments, and topics such as Science \& Technology, Environment \& Weather, and Arts \& Culture have less toxic commenting. Qualitative analysis reveals five themes: Graphic videos, Humanistic stories, History and historical facts, Media as a manipulator, and Religion. We also observe cases where a typically more toxic topic becomes non-toxic and where a typically less toxic topic becomes “toxicified” when it involves sensitive elements, such as politics and religion. Findings suggest that news comment toxicity can be characterized as topic-driven toxicity that targets topics rather than as vindictive toxicity that targets users or groups. Practical implications suggest that humanistic framing of the news story (i.e., reporting stories through real everyday people) can reduce toxicity in the comments of an otherwise toxic topic.},
  langid = {english},
  keywords = {Internet,Language,Machine learning,Racial discrimination,Religion,Russia,Social media,Toxicity},
  file = {/home/nathante/Zotero/storage/V8AU2PHU/Salminen et al_2020_Topic-driven toxicity.pdf;/home/nathante/Zotero/storage/GZMK7WD6/article.html}
}

@article{scharkow_content_2017,
  title = {Content Analysis, Automatic},
  author = {Scharkow, Michael},
  date = {2017},
  journaltitle = {The international encyclopedia of communication research methods},
  pages = {1--14},
  publisher = {{John Wiley \& Sons, Inc. Hoboken, NJ, USA}},
  file = {/home/nathante/Zotero/storage/VU8JC2YH/Scharkow_2017_Content analysis, automatic.pdf}
}

@article{scharkow_how_2017,
  title = {How {{Measurement Error}} in {{Content Analysis}} and {{Self-Reported Media Use Leads}} to {{Minimal Media Effect Findings}} in {{Linkage Analyses}}: {{A Simulation Study}}},
  shorttitle = {How {{Measurement Error}} in {{Content Analysis}} and {{Self-Reported Media Use Leads}} to {{Minimal Media Effect Findings}} in {{Linkage Analyses}}},
  author = {Scharkow, Michael and Bachl, Marko},
  date = {2017-07-03},
  journaltitle = {Political Communication},
  volume = {34},
  number = {3},
  pages = {323--343},
  publisher = {{Routledge}},
  issn = {1058-4609},
  abstract = {In the debate on minimal media effects and their causes, methodological concerns about measurement are rarely discussed. We argue that even in state-of-the-art media-effects studies that combine measures of media messages and media use (i.e., linkage analyses), measurement error in both the media content analysis and the media use self-reports will typically lead to severely downward-biased effect estimates. We demonstrate this phenomenon using a large Monte Carlo simulation with varying parameters of the content analysis and the survey study. Results show that measurement error in the content analysis and media use variables does indeed lead to smaller effect estimates, especially when the media messages of interest are relatively rare. We discuss these findings as well as possible remedies and implications for future research.},
  keywords = {content analysis,Corrigendum,linkage analysis,media effects,media use,Monte Carlo simulation,reliability},
  file = {/home/nathante/Zotero/storage/M5A6LIZQ/Scharkow_Bachl_2017_How Measurement Error in Content Analysis and Self-Reported Media Use Leads to.pdf}
}

@article{scharkow_thematic_2013,
  title = {Thematic Content Analysis Using Supervised Machine Learning: {{An}} Empirical Evaluation Using {{German}} Online News},
  shorttitle = {Thematic Content Analysis Using Supervised Machine Learning},
  author = {Scharkow, Michael},
  date = {2013-02-01},
  journaltitle = {Quality \& Quantity},
  shortjournal = {Qual Quant},
  volume = {47},
  number = {2},
  pages = {761--773},
  issn = {1573-7845},
  abstract = {In recent years, two approaches to automatic content analysis have been introduced in the social sciences: semantic network analysis and supervised text classification. We argue that, although less linguistically sophisticated than semantic parsing techniques, statistical machine learning offers many advantages for applied communication research. By using manually coded material for training, supervised classification seamlessly bridges the gap between traditional and automatic content analysis. In this paper, we briefly introduce the conceptual foundations of machine learning approaches to text classification and discuss their application in social science research. We then evaluate their potential in an experimental study in which German online news was coded with established thematic categories. Moreover, we investigate whether and how linguistic preprocessing can improve classification quality. Results indicate that supervised text classification is generally robust and reliable for some categories, but may even be useful when it fails.},
  langid = {english},
  keywords = {Bayesian classifier,Content analysis,Machine learning,Online news},
  file = {/home/nathante/Zotero/storage/L6G36ZJV/Scharkow_2013_Thematic content analysis using supervised machine learning.pdf}
}

@article{schwartz_neglected_1985,
  title = {The {{Neglected Problem}} of {{Measurement Error}} in {{Categorical Data}}},
  author = {Schwartz, Joseph E.},
  date = {1985-05},
  journaltitle = {Sociological Methods \& Research},
  shortjournal = {Sociological Methods \& Research},
  volume = {13},
  number = {4},
  pages = {435--466},
  issn = {0049-1241, 1552-8294},
  abstract = {The problems created by measurement error are entirely ignored in the vast majority of statistical analyses. To adjust for the effects of measurement error requires both a theory, or model, of measurement and estimates of the relevant measurement parameters (e.g., reliability coefficients). A fairly well-developed measurement theory for interval level data has been known for quite some time. A corresponding measurement theory for categorical data is not widely known even though such data are at least as important in the social sciences as interval data. Nevertheless, such a theory exists in the statistical journals. The primary purpose of this article is pedagogical: that is, to present the foundation of this theory for binary variables, the simplest type of categorical variable, and to demonstrate that the consequences of measurement errors in binary data are different from and probably more serious than the effects of measurement errors in interval level data. The principal reason for this is that measurement errors in a binary variable are likely to have a nonzero mean and will always be negatively correlated with the underlying true scores. The former has the effect of biasing the sample estimate of the mean, often to such a degree that the likelihood that a 95\% confidence interval will contain the population mean is almost nil.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/3HPFWPK6/Schwartz (1985) The Neglected Problem of Measurement Error in Categorical Data.pdf}
}

@article{song_validations_2020,
  title = {In {{Validations We Trust}}? {{The Impact}} of {{Imperfect Human Annotations}} as a {{Gold Standard}} on the {{Quality}} of {{Validation}} of {{Automated Content Analysis}}},
  shorttitle = {In {{Validations We Trust}}?},
  author = {Song, Hyunjin and Tolochko, Petro and Eberl, Jakob-Moritz and Eisele, Olga and Greussing, Esther and Heidenreich, Tobias and Lind, Fabienne and Galyga, Sebastian and Boomgaarden, Hajo G.},
  date = {2020-07-03},
  journaltitle = {Political Communication},
  shortjournal = {Political Communication},
  volume = {37},
  number = {4},
  pages = {550--572},
  issn = {1058-4609, 1091-7675},
  abstract = {Political communication has become one of the central arenas of innovation in the application of automated analysis approaches to ever-growing quantities of digitized texts. However, although researchers routinely and conveniently resort to certain forms of human coding to validate the results derived from automated procedures, in practice the actual “quality assurance” of such a “gold standard” often goes unchecked. Contemporary practices of validation via manual annotations are far from being acknowledged as best practices in the literature, and the reporting and interpretation of validation procedures differ greatly. We systematically assess the connection between the quality of human judgment in manual annotations and the relative performance evaluations of automated procedures against true standards by relying on large-scale Monte Carlo simulations. The results from the simulations confirm that there is a substantially greater risk of a researcher reaching an incorrect conclusion regarding the performance of automated procedures when the quality of manual annotations used for validation is not properly ensured. Our contribution should therefore be regarded as a call for the systematic application of high-quality manual validation materials in any political communication study, drawing on automated text analysis procedures.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/FIX97HQ6/Song et al. - 2020 - In Validations We Trust The Impact of Imperfect H.pdf}
}

@article{stoll_developing_2023,
  title = {Developing an {{Incivility Dictionary}} for {{German Online Discussions}} – a {{Semi-Automated Approach Combining Human}} and {{Artificial Knowledge}}},
  author = {Stoll, Anke and Wilms, Lena and Ziegele, Marc},
  date = {2023-02-05},
  journaltitle = {Communication Methods and Measures},
  shortjournal = {Communication Methods and Measures},
  pages = {1--19},
  issn = {1931-2458, 1931-2466},
  langid = {english}
}

@article{stoll_supervised_2020,
  title = {Supervised Machine Learning mit Nutzergenerierten Inhalten: Oversampling für nicht balancierte Trainingsdaten},
  shorttitle = {Supervised Machine Learning mit Nutzergenerierten Inhalten},
  author = {Stoll, Anke},
  date = {2020-05},
  journaltitle = {Publizistik},
  shortjournal = {Publizistik},
  volume = {65},
  number = {2},
  pages = {233--251},
  issn = {0033-4006, 1862-2569},
  langid = {ngerman},
  file = {/home/nathante/Zotero/storage/LYDJHHFJ/Stoll - 2020 - Supervised Machine Learning mit Nutzergenerierten .pdf}
}

@article{su_uncivil_2018,
  title = {Uncivil and Personal? {{Comparing}} Patterns of Incivility in Comments on the {{Facebook}} Pages of News Outlets},
  shorttitle = {Uncivil and Personal?},
  author = {Su, Leona Yi-Fan and Xenos, Michael A and Rose, Kathleen M and Wirz, Christopher and Scheufele, Dietram A and Brossard, Dominique},
  date = {2018-10},
  journaltitle = {New Media \& Society},
  shortjournal = {New Media \& Society},
  volume = {20},
  number = {10},
  pages = {3678--3699},
  issn = {1461-4448, 1461-7315},
  abstract = {Social media and its embedded user commentary are playing increasingly influential roles in the news process. However, researchers’ understanding of the social media commenting environment remains limited, despite rising concerns over uncivil comments. Accordingly, this study used a supervised machine learning–based method of content analysis to examine the extent and patterns of incivility in the comment sections of 42 US news outlets’ Facebook pages over an 18-month period in 2015–2016. These outlets were selected as being broadly representative of national, local, conservative, and liberal-news media. The findings provide the first empirical evidence that both the level and the targets of incivility in the comments posted on news outlets’ Facebook pages vary greatly according to such entities’ general type and ideological stance.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/ZSEXMRA3/Su et al. - 2018 - Uncivil and personal Comparing patterns of incivi.pdf}
}

@unpublished{tay_conceptual_2022,
  title = {A {{Conceptual Framework}} for {{Investigating}} and {{Mitigating Machine Learning Measurement Bias}} ({{MLMB}}) in {{Psychological Assessment}}},
  author = {Tay, Louis and Woo, Sang Eun and Hickman, Louis and Booth, Brandon and D'Mello, Sidney K.},
  date = {2022},
  howpublished = {Working Draft},
  file = {/home/nathante/Zotero/storage/ELRHYNKL/Tayetal.2021MachineLearningMeasurementBias.pdf}
}

@article{teblunthuis_effects_2021,
  ids = {teblunthuis_effects_2020},
  title = {Effects of {{Algorithmic Flagging}} on {{Fairness}}: {{Quasi-experimental Evidence}} from {{Wikipedia}}},
  shorttitle = {Effects of {{Algorithmic Flagging}} on {{Fairness}}},
  author = {TeBlunthuis, Nathan and Hill, Benjamin Mako and Halfaker, Aaron},
  date = {2021-04-22},
  journaltitle = {Proceedings of the ACM on Human-Computer Interaction},
  shortjournal = {Proc. ACM Hum.-Comput. Interact.},
  volume = {5},
  eprint = {2006.03121},
  eprinttype = {arxiv},
  pages = {56:1--56:27},
  abstract = {Online community moderators often rely on social signals such as whether or not a user has an account or a profile page as clues that users may cause problems. Reliance on these clues can lead to "overprofiling'' bias when moderators focus on these signals but overlook the misbehavior of others. We propose that algorithmic flagging systems deployed to improve the efficiency of moderation work can also make moderation actions more fair to these users by reducing reliance on social signals and making norm violations by everyone else more visible. We analyze moderator behavior in Wikipedia as mediated by RCFilters, a system which displays social signals and algorithmic flags, and estimate the causal effect of being flagged on moderator actions. We show that algorithmically flagged edits are reverted more often, especially those by established editors with positive social signals, and that flagging decreases the likelihood that moderation actions will be undone. Our results suggest that algorithmic flagging systems can lead to increased fairness in some contexts but that the relationship is complex and contingent.},
  issue = {CSCW1},
  keywords = {ai,causal inference,community norms,fairness,machine learning,moderation,online communities,peer production,sociotechnical systems,wikipedia},
  file = {/home/nathante/Zotero/storage/8KVI8QKZ/TeBlunthuis et al. - 2021 - Effects of Algorithmic Flagging on Fairness Quasi.pdf;/home/nathante/Zotero/storage/E2RPTEMM/TeBlunthuis et al_2021_Effects of Algorithmic Flagging on Fairness.pdf;/home/nathante/Zotero/storage/LAJEZ9JV/TeBlunthuis et al. - 2021 - Effects of Algorithmic Flagging on Fairness Quasi.pdf;/home/nathante/Zotero/storage/NWM56G48/TeBlunthuis et al_2020_The effects of algorithmic flagging on fairness.pdf;/home/nathante/Zotero/storage/YBYI7VSP/2006.html}
}

@inproceedings{teblunthuis_measuring_2021,
  title = {Measuring {{Wikipedia Article Quality}} in {{One Dimension}} by {{Extending ORES}} with {{Ordinal Regression}}},
  booktitle = {17th {{International Symposium}} on {{Open Collaboration}}},
  author = {Teblunthuis, Nathan},
  date = {2021-09-15},
  series = {{{OpenSym}} 2021},
  pages = {1--10},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  abstract = {Organizing complex peer production projects and advancing scientific knowledge of open collaboration each depend on the ability to measure quality. Wikipedia community members and academic researchers have used article quality ratings for purposes like tracking knowledge gaps and studying how political polarization shapes collaboration. Even so, measuring quality presents many methodological challenges. The most widely used systems use quality assesements on discrete ordinal scales, but such labels can be inconvenient for statistics and machine learning. Prior work handles this by assuming that different levels of quality are “evenly spaced” from one another. This assumption runs counter to intuitions about degrees of effort needed to raise Wikipedia articles to different quality levels. I describe a technique extending the Wikimedia Foundations’ ORES article quality model to address these limitations. My method uses weighted ordinal regression models to construct one-dimensional continuous measures of quality. While scores from my technique and from prior approaches are correlated, my approach improves accuracy for research datasets and provides evidence that the “evenly spaced” assumption is unfounded in practice on English Wikipedia. I conclude with recommendations for using quality scores in future research and include the full code, data, and models.},
  isbn = {978-1-4503-8500-8},
  keywords = {datasets,machine learning,measurement,methods,online communities,peer production,quality,sociotechnical systems,statistics,Wikipedia},
  file = {/home/nathante/Zotero/storage/5PU87696/Teblunthuis_2021_Measuring Wikipedia Article Quality in One Dimension by Extending ORES with.pdf}
}

@article{theocharis_dynamics_2020,
  title = {The {{Dynamics}} of {{Political Incivility}} on {{Twitter}}},
  author = {Theocharis, Yannis and Barberá, Pablo and Fazekas, Zoltán and Popa, Sebastian Adrian},
  date = {2020-04},
  journaltitle = {SAGE Open},
  shortjournal = {SAGE Open},
  volume = {10},
  number = {2},
  pages = {215824402091944},
  issn = {2158-2440, 2158-2440},
  abstract = {Online incivility and harassment in political communication have become an important topic of concern among politicians, journalists, and academics. This study provides a descriptive account of uncivil interactions between citizens and politicians on Twitter. We develop a conceptual framework for understanding the dynamics of incivility at three distinct levels: macro (temporal), meso (contextual), and micro (individual). Using longitudinal data from the Twitter communication mentioning Members of Congress in the United States across a time span of over a year and relying on supervised machine learning methods and topic models, we offer new insights about the prevalence and dynamics of incivility toward legislators. We find that uncivil tweets represent consistently around 18\% of all tweets mentioning legislators, but with spikes that correspond to controversial policy debates and political events. Although we find evidence of coordinated attacks, our analysis reveals that the use of uncivil language is common to a large number of users.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/6W97WXV2/Theocharis et al. - 2020 - The Dynamics of Political Incivility on Twitter.pdf}
}

@article{trilling_scaling_2018,
  title = {Scaling up {{Content Analysis}}},
  author = {Trilling, Damian and Jonkman, Jeroen G. F.},
  date = {2018-04-03},
  journaltitle = {Communication Methods and Measures},
  volume = {12},
  number = {2-3},
  pages = {158--174},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {Employing a number of different standalone programs is a prevalent approach among communication scholars who use computational methods to analyze media content. For instance, a researcher might use a specific program or a paid service to scrape some content from the Web, then use another program to process the resulting data, and finally conduct statistical analysis or produce some visualizations in yet another program. This makes it hard to build reproducible workflows, and even harder to build on the work of earlier studies. To improve this situation, we propose and discuss four criteria that a framework for automated content analysis should fulfill: scalability, free and open source, adaptability, and accessibility via multiple interfaces. We also describe how to put these considerations into practice, discuss their feasibility, and point toward future developments.},
  file = {/home/nathante/Zotero/storage/8EAAYQQE/Trilling_Jonkman_2018_Scaling up Content Analysis.pdf}
}

@article{van_atteveldt_validity_2021,
  title = {The {{Validity}} of {{Sentiment Analysis}}: {{Comparing Manual Annotation}}, {{Crowd-Coding}}, {{Dictionary Approaches}}, and {{Machine Learning Algorithms}}},
  shorttitle = {The {{Validity}} of {{Sentiment Analysis}}},
  author = {family=Atteveldt, given=Wouter, prefix=van, useprefix=true and family=Velden, given=Mariken A. C. G., prefix=van der, useprefix=true and Boukes, Mark},
  date = {2021-04-03},
  journaltitle = {Communication Methods and Measures},
  volume = {15},
  number = {2},
  pages = {121--140},
  issn = {1931-2458},
  abstract = {Sentiment is central to many studies of communication science, from negativity and polarization in political communication to analyzing product reviews and social media comments in other sub-fields. This study provides an exhaustive comparison of sentiment analysis methods, using a validation set of Dutch economic headlines to compare the performance of manual annotation, crowd coding, numerous dictionaries and machine learning using both traditional and deep learning algorithms. The three main conclusions of this article are that: (1) The best performance is still attained with trained human or crowd coding; (2) None of the used dictionaries come close to acceptable levels of validity; and (3) machine learning, especially deep learning, substantially outperforms dictionary-based methods but falls short of human performance. From these findings, we stress the importance of always validating automatic text analysis methods before usage. Moreover, we provide a recommended step-by-step approach for (automated) text analysis projects to ensure both efficiency and validity.},
  keywords = {Automated Approaches,Evaluation,Manual Annotation,Measurement,Sentiment Analysis,Validity},
  file = {/home/nathante/Zotero/storage/M658DYHG/van Atteveldt et al_2021_The Validity of Sentiment Analysis.pdf}
}

@article{van_smeden_reflection_2020,
  title = {Reflection on Modern Methods: Five Myths about Measurement Error in Epidemiological Research},
  shorttitle = {Reflection on Modern Methods},
  author = {family=Smeden, given=Maarten, prefix=van, useprefix=true and Lash, Timothy L and Groenwold, Rolf H H},
  date = {2020-02-01},
  journaltitle = {International Journal of Epidemiology},
  shortjournal = {International Journal of Epidemiology},
  volume = {49},
  number = {1},
  pages = {338--347},
  issn = {0300-5771},
  abstract = {Epidemiologists are often confronted with datasets to analyse which contain measurement error due to, for instance, mistaken data entries, inaccurate recordings and measurement instrument or procedural errors. If the effect of measurement error is misjudged, the data analyses are hampered and the validity of the study’s inferences may be affected. In this paper, we describe five myths that contribute to misjudgments about measurement error, regarding expected structure, impact and solutions to mitigate the problems resulting from mismeasurements. The aim is to clarify these measurement error misconceptions. We show that the influence of measurement error in an epidemiological data analysis can play out in ways that go beyond simple heuristics, such as heuristics about whether or not to expect attenuation of the effect estimates. Whereas we encourage epidemiologists to deliberate about the structure and potential impact of measurement error in their analyses, we also recommend exercising restraint when making claims about the magnitude or even direction of effect of measurement error if not accompanied by statistical measurement error corrections or quantitative bias analysis. Suggestions for alleviating the problems or investigating the structure and magnitude of measurement error are given.},
  file = {/home/nathante/Zotero/storage/GPMMPFYB/van Smeden et al_2020_Reflection on modern methods.pdf;/home/nathante/Zotero/storage/9FVJBERI/5671729.html}
}

@article{vermeer_online_2020,
  title = {Online {{News User Journeys}}: {{The Role}} of {{Social Media}}, {{News Websites}}, and {{Topics}}},
  shorttitle = {Online {{News User Journeys}}},
  author = {Vermeer, Susan and Trilling, Damian and Kruikemeier, Sanne and family=Vreese, given=Claes, prefix=de, useprefix=true},
  date = {2020-10-20},
  journaltitle = {Digital Journalism},
  shortjournal = {Digital Journalism},
  volume = {8},
  number = {9},
  pages = {1114--1141},
  issn = {2167-0811, 2167-082X},
  langid = {english},
  file = {/home/nathante/Zotero/storage/NPE7CB6S/Vermeer et al. - 2020 - Online News User Journeys The Role of Social Medi.pdf}
}

@article{votta_going_2023,
  title = {Going {{Micro}} to {{Go Negative}}?: {{Targeting Toxicity}} Using {{Facebook}} and {{Instagram Ads}}},
  shorttitle = {Going {{Micro}} to {{Go Negative}}?},
  author = {Votta, Fabio and Noroozian, Arman and Dobber, Tom and Helberger, Natali and family=Vreese, given=Claes, prefix=de, useprefix=true},
  date = {2023-02-01},
  journaltitle = {Computational Communication Research},
  volume = {5},
  number = {1},
  pages = {1--50},
  issn = {2665-9085, 2665-9085},
  langid = {english}
}

@article{wallach_big_2019,
  title = {Big {{Data}}, {{Machine Learning}}, and the {{Social Sciences}}: {{Fairness}}, {{Accountability}}, and {{Transparency}}},
  shorttitle = {Big {{Data}}, {{Machine Learning}}, and the {{Social Sciences}}},
  author = {Wallach, Hanna},
  date = {2019-01-16},
  journaltitle = {Medium},
  abstract = {This essay is a (near) transcript of a talk I recently gave at a NIPS 2014 workshop on “Fairness, Accountability, and Transparency in Machine Learning,” organized by Solon Barocas and Moritz Hardt.},
  langid = {american},
  file = {/home/nathante/Zotero/storage/XYTVY7WV/big-data-machine-learning-and-the-social-sciences-fairness-accountability-and-transparency.html}
}

@article{weber_extracting_2018,
  title = {Extracting {{Latent Moral Information}} from {{Text Narratives}}: {{Relevance}}, {{Challenges}}, and {{Solutions}}},
  shorttitle = {Extracting {{Latent Moral Information}} from {{Text Narratives}}},
  author = {Weber, René and Mangus, J. Michael and Huskey, Richard and Hopp, Frederic R. and Amir, Ori and Swanson, Reid and Gordon, Andrew and Khooshabeh, Peter and Hahn, Lindsay and Tamborini, Ron},
  date = {2018-04-03},
  journaltitle = {Communication Methods and Measures},
  volume = {12},
  number = {2-3},
  pages = {119--139},
  publisher = {{Routledge}},
  issn = {1931-2458},
  abstract = {Moral Foundations Theory (MFT) and the Model of Intuitive Morality and Exemplars (MIME) contend that moral judgments are built on a universal set of basic moral intuitions. A large body of research has supported many of MFT’s and the MIME’s central hypotheses. Yet, an important prerequisite of this research—the ability to extract latent moral content represented in media stimuli with a reliable procedure—has not been systematically studied. In this article, we subject different extraction procedures to rigorous tests, underscore challenges by identifying a range of reliabilities, develop new reliability test and coding procedures employing computational methods, and provide solutions that maximize the reliability and validity of moral intuition extraction. In six content analytical studies, including a large crowd-based study, we demonstrate that: (1) traditional content analytical approaches lead to rather low reliabilities; (2) variation in coding reliabilities can be predicted by both text features and characteristics of the human coders; and (3) reliability is largely unaffected by the detail of coder training. We show that a coding task with simplified training and a coding technique that treats moral foundations as fast, spontaneous intuitions leads to acceptable inter-rater agreement, and potentially to more valid moral intuition extractions. While this study was motivated by issues related to MFT and MIME research, the methods and findings in this study have implications for extracting latent content from text narratives that go beyond moral information. Accordingly, we provide a tool for researchers interested in applying this new approach in their own work.}
}

@article{weld_adjusting_2022,
  title = {Adjusting for {{Confounders}} with {{Text}}: {{Challenges}} and an {{Empirical Evaluation Framework}} for {{Causal Inference}}},
  shorttitle = {Adjusting for {{Confounders}} with {{Text}}},
  author = {Weld, Galen and West, Peter and Glenski, Maria and Arbour, David and Rossi, Ryan A. and Althoff, Tim},
  date = {2022-05-31},
  journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media},
  volume = {16},
  pages = {1109--1120},
  issn = {2334-0770},
  abstract = {Causal inference studies using textual social media data can provide actionable insights on human behavior. Making accurate causal inferences with text requires controlling for confounding which could otherwise impart bias. Recently, many different methods for adjusting for confounders have been proposed, and we show that these existing methods disagree with one another on two datasets inspired by previous social media studies. Evaluating causal methods is challenging, as ground truth counterfactuals are almost never available. Presently, no empirical evaluation framework for causal methods using text exists, and as such, practitioners must select their methods without guidance. We contribute the first such framework, which consists of five tasks drawn from real world studies. Our framework enables the evaluation of any casual inference method using text. Across 648 experiments and two datasets, we evaluate every commonly used causal inference method and identify their strengths and weaknesses to inform social media researchers seeking to use such methods, and guide future improvements. We make all tasks, data, and models public to inform applications and encourage additional research.},
  langid = {english},
  keywords = {Web and Social Media},
  file = {/home/nathante/Zotero/storage/LD3DS8GA/Weld et al_2022_Adjusting for Confounders with Text.pdf}
}

@article{wiernik_obtaining_2020,
  title = {Obtaining {{Unbiased Results}} in {{Meta-Analysis}}: {{The Importance}} of {{Correcting}} for {{Statistical Artifacts}}},
  shorttitle = {Obtaining {{Unbiased Results}} in {{Meta-Analysis}}},
  author = {Wiernik, Brenton M. and Dahlke, Jeffrey A.},
  date = {2020-03},
  journaltitle = {Advances in Methods and Practices in Psychological Science},
  shortjournal = {Advances in Methods and Practices in Psychological Science},
  volume = {3},
  number = {1},
  pages = {94--123},
  issn = {2515-2459, 2515-2467},
  abstract = {Most published meta-analyses address only artifactual variance due to sampling error and ignore the role of other statistical and psychometric artifacts, such as measurement error variance (due to factors including unreliability of measurements, group misclassification, and variable treatment strength) and selection effects (including range restriction or enhancement and collider biases). These artifacts can have severe biasing effects on the results of individual studies and meta-analyses. Failing to account for these artifacts can lead to inaccurate conclusions about the mean effect size and between-studies effect-size heterogeneity, and can influence the results of meta-regression, publication-bias, and sensitivity analyses. In this article, we provide a brief introduction to the biasing effects of measurement error variance and selection effects and their relevance to a variety of research designs. We describe how to estimate the effects of these artifacts in different research designs and correct for their impacts in primary studies and meta-analyses. We consider meta-analyses of correlations, observational group differences, and experimental effects. We provide R code to implement the corrections described.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/HNEUY89U/Wiernik und Dahlke - 2020 - Obtaining Unbiased Results in Meta-Analysis The I.pdf}
}

@article{williams_bayesian_1998,
  title = {Bayesian Classification with {{Gaussian}} Processes},
  author = {Williams, C.K.I. and Barber, D.},
  date = {1998-12},
  journaltitle = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
  volume = {20},
  number = {12},
  pages = {1342--1351},
  issn = {1939-3539},
  abstract = {We consider the problem of assigning an input vector to one of m classes by predicting P(c|x) for c=1,...,m. For a two-class problem, the probability of class one given x is estimated by /spl sigma/(y(x)), where /spl sigma/(y)=1/(1+e/sup -y/). A Gaussian process prior is placed on y(x), and is combined with the training data to obtain predictions for new x points. We provide a Bayesian treatment, integrating over uncertainty in y and in the parameters that control the Gaussian process prior the necessary integration over y is carried out using Laplace's approximation. The method is generalized to multiclass problems (m{$>$}2) using the softmax function. We demonstrate the effectiveness of the method on a number of datasets.},
  eventtitle = {{{IEEE Transactions}} on {{Pattern Analysis}} and {{Machine Intelligence}}},
  keywords = {Bayesian methods,Computer Society,Gaussian noise,Gaussian processes,Logistics,Monte Carlo methods,Process control,Training data,Uncertain systems,Uncertainty},
  file = {/home/nathante/Zotero/storage/BL4LP5X2/Williams_Barber_1998_Bayesian classification with Gaussian processes.pdf;/home/nathante/Zotero/storage/TX4DTGA2/735807.html}
}

@article{wozniak_event-centered_2021,
  title = {The {{Event-Centered Nature}} of {{Global Public Spheres}}: {{The UN Climate Change Conferences}}, {{Fridays}} for {{Future}}, and the ({{Limited}}) {{Transnationalization}} of {{Media Debates}}},
  author = {Wozniak, Antal and Wessler, Hartmut and Chan, Chung-hong and Lück, Julia},
  date = {2021-01-14},
  journaltitle = {International Journal of Communication},
  volume = {15},
  pages = {688--714},
  abstract = {Research has shown how unpremeditated events can influence media attention and media framing. But how do staged political events influence patterns of news coverage across countries, and are such changes sustainable beyond the immediate event context? We examined whether the UN climate change conferences are conducive to an emergence of a transnational public sphere by triggering issue convergence and increased transnational interconnectedness across national media debates. An automated content analysis of climate change coverage in newspapers from Germany, India, South Africa, and the United States between 2012 and 2019 revealed largely event-focused reporting. Media coverage quickly returned to preconference patterns after each conference. References to foreign countries showed almost no relationship to the climate change conferences’ coverage. We found similar results for the effects of the Fridays for Future movement. The significance of these events lies less in long-term changes in media reporting but more in short-term attention generation and coordinated message production.},
  keywords = {climate change coverage,comparative research,media content analysis,media events,time series analysis,transnational public sphere},
  file = {/home/nathante/Zotero/storage/GR8HDAYJ/Wozniak et al. - 2021 - The Event-Centered Nature of Global Public Spheres.pdf}
}

@book{yi_handbook_2021,
  title = {Handbook of {{Measurement Error Models}}},
  editor = {Yi, Grace Y. and Delaigle, Aurore and Gustafson, Paul},
  date = {2021-10-17},
  publisher = {{Chapman and Hall/CRC}},
  location = {{New York}},
  abstract = {Measurement error arises ubiquitously in applications and has been of long-standing concern in a variety of fields, including medical research, epidemiological studies, economics, environmental studies, and survey research. While several research monographs are available to summarize methods and strategies of handling different measurement error problems, research in this area continues to attract extensive attention.  The Handbook of Measurement Error Models provides overviews of various topics on measurement error problems. It collects carefully edited chapters concerning issues of measurement error and evolving statistical methods, with a good balance of methodology and applications. It is prepared for readers who wish to start research and gain insights into challenges, methods, and applications related to error-prone data. It also serves as a reference text on statistical methods and applications pertinent to measurement error models, for researchers and data analysts alike.  Features: Provides an account of past development and modern advancement concerning measurement error problems Highlights the challenges induced by error-contaminated data Introduces off-the-shelf methods for mitigating deleterious impacts of measurement error  Describes state-of-the-art strategies for conducting in-depth research},
  isbn = {978-1-315-10127-9},
  pagetotal = {592},
  file = {/home/nathante/Zotero/storage/47CS3UND/Yi et al_2021_Handbook of Measurement Error Models.pdf}
}

@inproceedings{zannettou_measuring_2020,
  title = {Measuring and {{Characterizing Hate Speech}} on {{News}}\&\#xa0;{{Websites}}},
  booktitle = {12th {{ACM Conference}} on {{Web Science}}},
  author = {Zannettou, Savvas and Elsherief, Mai and Belding, Elizabeth and Nilizadeh, Shirin and Stringhini, Gianluca},
  date = {2020-07-06},
  series = {{{WebSci}} '20},
  pages = {125--134},
  publisher = {{Association for Computing Machinery}},
  location = {{New York, NY, USA}},
  abstract = {The Web has become the main source for news acquisition. At the same time, news discussion has become more social: users can post comments on news articles or discuss news articles on other platforms like Reddit. These features empower and enable discussions among the users; however, they also act as the medium for the dissemination of toxic discourse and hate speech. The research community lacks a general understanding on what type of content attracts hateful discourse and the possible effects of social networks on the commenting activity on news articles. In this work, we perform a large-scale quantitative analysis of 125M comments posted on 412K news articles over the course of 19 months. We analyze the content of the collected articles and their comments using temporal analysis, user-based analysis, and linguistic analysis, to shed light on what elements attract hateful comments on news articles. We also investigate commenting activity when an article is posted on either 4chan’s Politically Incorrect board (/pol/) or six selected subreddits. We find statistically significant increases in hateful commenting activity around real-world divisive events like the “Unite the Right” rally in Charlottesville and political events like the second and third 2016 US presidential debates. Also, we find that articles that attract a substantial number of hateful comments have different linguistic characteristics when compared to articles that do not attract hateful comments. Furthermore, we observe that the post of a news articles on either /pol/ or the six subreddits is correlated with an increase of (hateful) commenting activity on the news articles.},
  isbn = {978-1-4503-7989-2},
  file = {/home/nathante/Zotero/storage/GPCWVQLY/Zannettou et al_2020_Measuring and Characterizing Hate Speech on News&#xa0\;Websites.pdf}
}

@report{zhang_how_2021,
  type = {preprint},
  title = {How {{Using Machine Learning Classification}} as a {{Variable}} in {{Regression Leads}} to {{Attenuation Bias}} and {{What}} to {{Do About It}}},
  author = {Zhang, Han},
  date = {2021-05-29},
  institution = {{SocArXiv}},
  abstract = {Social scientists have increasingly been applying machine learning algorithms to  big data  to measure theoretical concepts and then using these machinepredicted variables in regression. This article  rst demonstrates that directly inserting binary predictions (i.e. classi cation) without regard for prediction error will generally lead to the attenuation bias of slope coe cients or marginal e ect estimates. We then propose  ve estimators with which to obtain consistent estimates of the coe cients. The estimators require validation data; both machine prediction and true values can be used. Monte Carlo simulations are used to demonstrate the e ectiveness and robustness of the proposed estimators. We summarize the pattern of usage of machine learning predictions in 12 recent publications in the top social science journals, apply our proposed estimators to four of them, and o er some practical recommendations. We develop an R package (CCER) to help researchers use the proposed estimators.},
  langid = {english},
  file = {/home/nathante/Zotero/storage/HYJ5LBR6/Zhang - 2021 - How Using Machine Learning Classification as a Var.pdf}
}

@article{zhao_assumptions_2013,
  title = {Assumptions behind {{Intercoder Reliability Indices}}},
  author = {Zhao, Xinshu and Liu, Jun S. and Deng, Ke},
  date = {2013-01-01},
  journaltitle = {Annals of the International Communication Association},
  volume = {36},
  number = {1},
  pages = {419--480},
  publisher = {{Routledge}},
  issn = {2380-8985},
  file = {/home/nathante/Zotero/storage/TDF2I55Y/Zhao et al_2013_Assumptions behind Intercoder Reliability Indices.pdf;/home/nathante/Zotero/storage/64NWAITD/23808985.2013.html}
}