diff --git a/Ecology_REU_Flyer.docx b/Ecology_REU_Flyer.docx new file mode 100644 index 0000000..14d3419 Binary files /dev/null and b/Ecology_REU_Flyer.docx differ diff --git a/dissertation_proposals/teblunthuis_ecology.pdf b/dissertation_proposals/teblunthuis_ecology.pdf new file mode 100644 index 0000000..78cbcca Binary files /dev/null and b/dissertation_proposals/teblunthuis_ecology.pdf differ diff --git a/dissertations/nathante_uw_2021/ETD_version.tex b/dissertations/nathante_uw_2021/ETD_version.tex new file mode 100644 index 0000000..9b4d793 --- /dev/null +++ b/dissertations/nathante_uw_2021/ETD_version.tex @@ -0,0 +1,321 @@ + \documentclass[12pt]{memoir} + +\usepackage{cdsc-memoir} +% there are two chapter styles: cdsc-article and cdsc-memo +% memo assumes that you remove the "\\" and the email address from the +% \author field below as well as that you will comment out the +% \published tag +\chapterstyle{cdsc-article} + +\usepackage[utf8]{inputenc} +\usepackage{wrapfig} +\usepackage[T1]{fontenc} +\usepackage{textcomp} +% \usepackage[garamond]{mathdesign} +\let\circledS\undefined + +\usepackage[letterpaper,left=1in,right=1in,top=1in,bottom=1in]{geometry} + +% packages i use in essentially every document +\usepackage{graphicx} +\usepackage{enumerate} +\newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX +\newcommand{\maxwidth}{\linewidth} +% packages i use in many documents but leave off by default +\usepackage{amsmath, amsthm, amssymb} +\usepackage{dcolumn} +% \usepackage{endfloat} + +% import and customize urls +\usepackage[usenames,dvipsnames]{color} +\usepackage[breaklinks]{hyperref} + +\hypersetup{colorlinks=true, linkcolor=Black, citecolor=Black, filecolor=Blue, + urlcolor=Blue, unicode=true} + +\usepackage{xcolor} +\definecolor{shadecolor}{rgb}{.97, .97, .97} +\definecolor{messagecolor}{rgb}{0, 0, 0} +\definecolor{warningcolor}{rgb}{1, 0, 1} +\definecolor{errorcolor}{rgb}{1, 0, 0} +\definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} + +\definecolor{mygreen}{HTML}{43bf71} + +% list of footnote symbols for \thanks{} +\makeatletter +\renewcommand*{\@fnsymbol}[1]{\ensuremath{\ifcase#1\or *\or \dagger\or \ddagger\or + \mathsection\or \mathparagraph\or \|\or **\or \dagger\dagger + \or \ddagger\ddagger \else\@ctrerr\fi}} +\makeatother +\newcommand*\samethanks[1][\value{footnote}]{\footnotemark[#1]} + +% add bibliographic stuff +\usepackage[american]{babel} +\usepackage{csquotes} +\usepackage[natbib=true, style=apa, backend=biber]{biblatex} +%\addbibresource{ecological_models.bib} +%\addbibresource{ch1_intro.bib} +\addbibresource{articlequality.bib} +\addbibresource{equalogy_refs.bib} +\addbibresource{refs.bib} +\addbibresource{ReadingTime.bib} +\addbibresource{ores_fairness.bib} +\DeclareLanguageMapping{american}{american-apa} + +\defbibheading{secbib}[\bibname]{% + \section*{#1}% + \markboth{#1}{#1}% + \baselineskip 14.2pt% + \prebibhook} + +\def\citepos#1{\citeauthor{#1}'s (\citeyear{#1})} +\def\citespos#1{\citeauthor{#1}' (\citeyear{#1})} + + + +% memoir function to take out of the space out of the whitespace lists +\firmlists + +% \newcommand*\abstract[1]{ + +% LATEX NOTE: these lines will import vc stuff after running `make vc` which +% will add version control information to the bottom of each page. This can be +% useful for keeping track of which version of a document somebody has: +% \input{vc} +% \pagestyle{cdsc-page-git} + +% LATEX NOTE: this alternative line will just input a timestamp at the +% build process, useful for Overleaf +% \pagestyle{cdsc-page-overleaf} + +% \definecolor{fgcolor}{rgb}{0.345, 0.345, 0.345} +% \newcommand{\hlnum}[1]{\textcolor[rgb]{0.686,0.059,0.569}{#1}}% +% \newcommand{\hlstr}[1]{\textcolor[rgb]{0.192,0.494,0.8}{#1}}% +% \newcommand{\hlcom}[1]{\textcolor[rgb]{0.678,0.584,0.686}{\textit{#1}}}% +% \newcommand{\hlopt}[1]{\textcolor[rgb]{0,0,0}{#1}}% +% \newcommand{\hlstd}[1]{\textcolor[rgb]{0.345,0.345,0.345}{#1}}% +% \newcommand{\hlkwa}[1]{\textcolor[rgb]{0.161,0.373,0.58}{\textbf{#1}}}% +% \newcommand{\hlkwb}[1]{\textcolor[rgb]{0.69,0.353,0.396}{#1}}% +% \newcommand{\hlkwc}[1]{\textcolor[rgb]{0.333,0.667,0.333}{#1}}% +% \newcommand{\hlkwd}[1]{\textcolor[rgb]{0.737,0.353,0.396}{\textbf{#1}}}% +% \let\hlipl\hlkwb + +% \usepackage{framed} +% \makeatletter +% \newenvironment{kframe}{% +% \def\at@end@of@kframe{}% +% \ifinner\ifhmode% +% \def\at@end@of@kframe{\end{minipage}}% +% \begin{minipage}{\columnwidth}% +% \fi\fi% +% \def\FrameCommand##1{\hskip\@totalleftmargin \hskip-\fboxsep +% \colorbox{shadecolor}{##1}\hskip-\fboxsep +% % There is no \\@totalrightmargin, so: +% \hskip-\linewidth \hskip-\@totalleftmargin \hskip\columnwidth}% +% \MakeFramed {\advance\hsize-\width +% \@totalleftmargin\z@ \linewidth\hsize +% \@setminipage}}% +% {\par\unskip\endMakeFramed% +% \at@end@of@kframe} +% \makeatother + +% \definecolor{shadecolor}{rgb}{.97, .97, .97} +% \definecolor{messagecolor}{rgb}{0, 0, 0} +% \definecolor{warningcolor}{rgb}{1, 0, 1} +% \definecolor{errorcolor}{rgb}{1, 0, 0} +% \newenvironment{knitrout}{}{} % an empty environment to be redefined in TeX + +\usepackage{alltt} + +\definecolor{c77a1d2}{RGB}{119,161,210} +\definecolor{bf9837}{RGB}{191,152,55} +\definecolor{cc0c0c0}{RGB}{192,192,192} +\def \globalscale {0.2} + +\definecolor{mycomp}{RGB}{250,198,49} +\definecolor{mymut}{RGB}{13,8,135} + + +%\usepackage{wrapfig} +\usepackage{tikz} +\usepackage{booktabs} +\usepackage{multicol} + +% TODO make table of contents HERE + +% TODO add Acknowledgements HERE + + +% \begin{acks} + +% \end{acks} + + +% TODO add Dedication HERE + +% Add Chapter Titles + +\usepackage{subcaption} +\def\citepos#1{{\hypersetup{citecolor=black}\citeauthor{#1}}'s \citep{#1}} +\def\citespos#1{{\hypersetup{citecolor=black}\citeauthor{#1}}' \citep{#1}} +\let\oldciteauthor=\citeauthor +\def\citeauthor#1{{\hypersetup{citecolor=black}\oldciteauthor{#1}}} +%% +\usepackage[htt]{hyphenat} +\usepackage{commath} +\usepackage{mathtools} + +\renewcommand{\widetilde}[1]{\mathbin{% + \stackrel{\sim}{\smash{#1} \rule{0pt}{1.15ex}}% + }} + + +\let\oldnorm\norm % <-- Store original \norm as \oldnorm +\let\norm\undefined % <-- "Undefine" \norm +\DeclarePairedDelimiter\norm{\lVert}{\rVert} + +%% end of the preamble, start of the body of the document source. +\hyphenation{shit-gun-con-trol-lers-say com-mer-cial-real-est-ate real-est-ate sub-red-dit sub-red-dits real-est-ate-in-vest-ing fin-an-cial-in-de-pen-dence in-fin-ite-war-fare vint-age-aud-io rus-sia-la-go march-ag-ainst-trump} +\IfFileExists{upquote.sty}{\usepackage{upquote}}{} + +\def\Slash{\slash\hspace{0pt}} + +% \chapterstyle{thatcher} + +% this one is also good and more formal +% \chapterstyle{thatcher} + +% \renewcommand*{\chapterheadstart}{\begingroup +% \vspace*{\beforechapskip}% +% \begin{adjustwidth}{}{-\chapindent}% +% \hrulefill +% \smash{\rule{0.4pt}{15mm}} +% \end{adjustwidth}\endgroup} +\usepackage{longtable} +\usepackage{color, colortbl} +\definecolor{lavenderblue}{rgb}{0.9, 0.9, 0.98} +\usepackage{graphicx} +\usepackage{multirow} +\usepackage{svg} +\usepackage{afterpage} + +%% magic command to not add links on \citeauthor +\usepackage{etoolbox} +\makeatletter +\pretocmd{\NAT@citexnum}{\@ifnum{\NAT@ctype>\z@}{\let\NAT@hyper@\relax}{}}{}{} +\makeatother + +\DeclareMathOperator*{\argmin}{arg\,min} % thin space, limits underneath in displays +\DeclareMathOperator*{\argmax}{arg\,max} % thin space, limits underneath in displays + + +\begin{document} +\tableofcontents +\listoffigures +\listoftables +\chapter*[Acknowledgments]{Acknowledgments} + +I am grateful to the many academic friends, colleagues, and mentors who have cultivated my intellectual development, helped me work on these ideas, and in every other way made possible my success. +In particular I would like to thank members of the Community Data Science Collective and Aaron Shaw, Sohyeon Hwang, Jeremy Foote, Carl Colglazier, Floor Fiers, Sejal Khatri, Sefania Druga, Nicholas Vincent, and Kaylea Champion in particular for their helpful feedback on parts of this work. +Also thanks to Mako and Aaron for their innovation, dedication and care in organizing this very special research group. +I am also grateful to my collaborators I have not yet mentioned: Isabella Brown, Laura (Alia) Levi, Nicole McGinnis, Tilman Bayer, Olga Vasileva, and Aaron Halfaker. +Special thanks to Daryn McElroy for her work to externally validate our clusters. +Thanks to Mark Kott for his excellent course on mathematical ecology which inspired an important turning point in the direction of this work and to Carmen Gonzalez and Matthew Powers' for their fantastic course on fieldwork research methods. The importance of this education in qualitative research to this work suprised me, but I doubt it would suprise them. +I am also grateful to the organizers and participants in the social computing reading group (SCRG) at the University of Washington. My participation in this reading group has been invaluable to any ability I have to make contributions to social computing or HCI. +I owe special gratitude to my 20 interview participants for their time and knowledge. +I am thankful to the organizers and members of UAW Local 4121 for their strength and solidarity. +Thanks to Jason Baumgartner and pushshift.io for the Reddit data archive. +This work was made possible by generous financial support from the National science foundation grants IIS-1908850 and IIS-1910202 and GRFP2016220885 and was facilitated through the use of the advanced computational infrastructure provided by the Hyak supercomputer system at the University of Washington. + +\chapter*[Dedication]{Dedication} +To Amanda, my dear full mutualist. + + +\chapter*[Preface to Chapter 1]{Preface to Chapter 1} +Several paragraphs in beginning of the following chapter adapt from text I wrote for a grant proposal submitted to the National Science Foundation (\url{https://www.nsf.gov/awardsearch/showAward?AWD_ID=1910202}, 1910202) +\begin{refsection} +\chapter[An Ecology of Digital Affiliation]{Introduction: An Ecology of Digital Affiliation} +\input{ch1_intro.tex} +% \end{refsection} +% \begin{refsection} +\chapter*[Preface to Chapter 2]{Preface to Chapter 2} +The following chapter is a collaborative work with Benjamin Mako Hill. + +\noindent It was honored with a Top Paper award from the Computational Methods Division of the International Communication Association's 2021 annual meeting. An early version of this chapter was presented at the 2020 International Conference for Computational Social Science (IC2S2 2020). +\chapter[Identifying Competition and Mutualism]{Identifying Competition and Mutualism Between Online Groups} +\input{ch2_identifying.tex} +% \end{refsection} +% \begin{refsection} +\chapter*[Preface to Chapter 3]{Preface to Chapter 3} + +An important finding from Chapter 2 is that mutualism is much more common than competition among overlapping subreddits. This finding was also surprising because ecological theory and prior results in social computing suggest that greater niche overlaps result in stronger competition. Furthermore, theories of organizational ecology were insufficient for explaining the reasons why overlapping online communities exist in the first place. Therefore, the qualitative investigation presented in Chapter 3 provided important explanation and validation of the quantitative finding of widespread mutualism in terms of the experiences and understandings of active participants in overlapping subreddits. If the findings from Chapter 3 had been known in advance of Chapter 2's study, Chapter 2 would have been more likely to anticipate widespread mutualism and may have been designed to explain it. + +Because Chapters 2, 3, and 4 are each written as stand-alone articles, some parts of the background section of Chapter 3, most notably the first 3 paragraphs of §3.2 makes some of the same points as the background section of Chapter 2. +Also, the interview recruitment process uses an earlier version of clustering algorithm from Chapter 2 (before it was improved during a revise and resubmit process). The second paragraph of §3.5 summarizes the clustering procedure. Readers of Chapter 2 may quickly pass over the these paragraphs. + +This chapter is a collaborative work with Charles Kiene, Isabella Brown, Laura (Alia) Levi, Nicole McGinnis, and Benjamin Mako Hill and is under review in Proceedings of the ACM on Human-Computer Interaction: Computer Supported Cooperative Work. + + \chapter[No Community Can Do Everything]{No Community Can Do Everything: Why People Participate in Similar Online Communities} +\input{equalogy.tex} +% \end{refsection} +% \begin{refsection} +\chapter*[Preface to Chapter 4]{Preface to Chapter 4} +As was the case with Chapter 3, Chapter 4 is written as a stand-alone article building upon Chapter 3. It repeats some of the same motivating points in the first paragraph of §4.1, and the first two paragraphs of §4.2. + +This study also reuses the clustering procedure from Chapter 2, but on a larger dataset. The first three paragraphs of §4.3 describe the clustering procedure. Those who have read Chapter 2 may quickly pass over these paragraphs, noting that the sample size, dimensionality of LSI, and the number of clusters are different from Chapter 2. + +\chapter[Dynamics of Ecological Adaptation]{Dynamics of Ecological Adaptation in Online Communities} +\input{ch4_competitive_exclusion.tex} +% \end{refsection} +% \begin{refsection} +\chapter[Future Directions]{Future Directions in the Ecology of Online Communities} +\input{ch5_conclusion.tex} +\end{refsection} +\appendix +\begin{refsection} +\chapter*[Preface to Appendix A]{Preface to Appendix A} +The following appendix is published in the Proceedings of The 17th International Symposium on Open Collaboration. + +\chapter[Measuring Article Quality]{Measuring Wikipedia Article Quality in One Dimension by Extending ORES with Ordinal Regression} +\input{appendix_A_articlequality.tex} +\end{refsection} +\chapter*[Preface to Appendix B]{Preface to Appendix B} + The following appendix is a collaborative work with Tilman Bayer and Olga Vasileva and is published in the Proceedings of The 15th International Symposium on Open Collaboration. +\begin{refsection} + \chapter[Dwelling on Wikipedia]{Dwelling on Wikipedia: Investigating time spent by global encyclopedia readers} +\input{appendix_B_readingtime.tex} +\end{refsection} +\chapter*[Preface to Appendix C]{Preface to Appendix C} + The following appendix is a collaborative work with Benjamin Mako Hill and Aaron Halfaker and is published in the Proceedings of ACM on Human-Computer Interaction: Computer Supported Cooperative Work. +\begin{refsection} + \chapter[Effects of Algorithmic Flagging on Fairness]{Effects of Algorithmic Flagging on Fairness: Quasi-experimental Evidence from Wikipedia} +\input{appendix_C_oresfairness.tex} +\end{refsection} + +% \appendix + +%\renewcommand{\thechapter}{A} after \chapter{Test Appendix} + +% \addtocontents{toc}{\setlength\cftchapternumwidth{1em}} +% \renewcommand\thechapter{} + + + +% \begin{refsection} +% \chapter[Future Directions]{Future Directions in the Ecology of Online Communities} +% \input{appendix_B.tex} +% \end{refsection} +% \begin{refsection} +% \chapter[Future Directions]{Future Directions in the Ecology of Online Communities} +% \input{appendix_C.tex} +% \end{refsection} + + + +% bibliography here +% \setcounter{biburlnumpenalty}{9001} +% \printbibliography[title = {References}, heading=secbib] +\end{document} + +% LocalWords: diff --git a/dissertations/nathante_uw_2021/Makefile b/dissertations/nathante_uw_2021/Makefile new file mode 100644 index 0000000..30767c0 --- /dev/null +++ b/dissertations/nathante_uw_2021/Makefile @@ -0,0 +1,32 @@ +#!/usr/bin/make + +all: ETD_version.pdf + pdftk copyright_page.pdf title_page.pdf abstract.pdf ETD_version.pdf cat output diss_ecology_of_online_communities.pdf + +# use the following section for Rnw/knitr documents +# all: $(patsubst %.Rnw,%.pdf,$(wildcard *.Rnw)) +# %.tex: %.Rnw +# Rscript -e "library(knitr); knit('$<')" + +%.pdf: %.tex + latexmk -f -pdf $< + +clean: + latexmk -C *.tex + rm -f *.tmp *.run.xml + rm -f vc + rm -f *.bbl + +# the following lines are useful for Rnw/knitr +# rm -rf cache/ figure/ +# rm -f *.tex + +viewpdf: all + evince *.pdf + +vc: resources/vc-git + +pdf: all + +.PHONY: clean all +# .PRECIOUS: %.tex diff --git a/dissertations/nathante_uw_2021/ReadingTime.bib b/dissertations/nathante_uw_2021/ReadingTime.bib new file mode 100644 index 0000000..41b8e8d --- /dev/null +++ b/dissertations/nathante_uw_2021/ReadingTime.bib @@ -0,0 +1,818 @@ + +@article{hill_wikipedia_2013, + title = {The {{Wikipedia}} Gender Gap Revisited: Characterizing Survey Response Bias with Propensity Score Estimation}, + volume = {8}, + shorttitle = {The {{Wikipedia Gender Gap Revisited}}}, + number = {6}, + journal = {PLoS ONE}, + doi = {10.1371/journal.pone.0065782}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + month = jun, + year = {2013} +} + +@inproceedings{antin_social_2012, + address = {{New York, NY, USA}}, + series = {{{CHI}} '12}, + title = {Social Desirability Bias and Self-Reports of Motivation: A Study of {{Amazon Mechanical Turk}} in the {{US}} and {{India}}}, + isbn = {978-1-4503-1015-4}, + shorttitle = {Social {{Desirability Bias}} and {{Self}}-Reports of {{Motivation}}}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + publisher = {{ACM}}, + doi = {10.1145/2207676.2208699}, + author = {Antin, Judd and Shaw, Aaron}, + year = {2012}, + keywords = {distributed work,social desirability,motivation,crowdsourcing,amazon mechanical turk}, + pages = {2925--2934} +} + +@article{preece_reader--leader_2009, + title = {The Reader-to-Leader Framework: Motivating Technology-Mediated Social Participation}, + volume = {1}, + issn = {1944-3900}, + shorttitle = {The {{Reader}}-to-{{Leader Framework}}}, + number = {1}, + journal = {AIS Transactions on Human-Computer Interaction}, + author = {Preece, Jennifer and Shneiderman, Ben}, + year = {2009}, + pages = {13-32} +} + +@inproceedings{arazy_functional_2015, + address = {{New York, NY}}, + series = {{{CSCW}} '15}, + title = {Functional Roles and Career Paths in {{Wikipedia}}}, + isbn = {978-1-4503-2922-4}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + publisher = {{ACM}}, + doi = {10.1145/2675133.2675257}, + author = {Arazy, Ofer and Ortega, Felipe and Nov, Oded and Yeo, Lisa and Balila, Adam}, + year = {2015}, + keywords = {peer-production,functional roles,role transitions,ORGANIZATIONAL structure,wikipedia}, + pages = {1092--1105} +} + +@inproceedings{warncke-wang_misalignment_2015, + title = {Misalignment {{Between Supply}} and {{Demand}} of {{Quality Content}} in {{Peer Production Communities}}}, + language = {en}, + urldate = {2016-08-15}, + booktitle = {Ninth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + url = {http://www.aaai.org/ocs/index.php/ICWSM/ICWSM15/paper/view/10591}, + author = {{Warncke-Wang}, Morten and Ranjan, Vivek and Terveen, Loren and Hecht, Brent}, + month = apr, + year = {2015} +} + +@article{stvilia_issues_2009, + title = {Issues of Cross-Contextual Information Quality Evaluation\textemdash{{The}} Case of {{Arabic}}, {{English}}, and {{Korean Wikipedias}}}, + volume = {31}, + issn = {07408188}, + language = {en}, + number = {4}, + journal = {Library \& Information Science Research}, + doi = {10.1016/j.lisr.2009.07.005}, + author = {Stvilia, Besiki and {Al-Faraj}, Abdullah and Yi, Yong Jeong}, + month = dec, + year = {2009}, + pages = {232-239} +} + +@article{johnson_emergence_2014, + title = {Emergence of Power Laws in Online Communities: {{The}} Role of Social Mechanisms and Preferential Attachment.}, + volume = {38}, + shorttitle = {Emergence of {{Power Laws}} in {{Online Communities}}}, + number = {3}, + urldate = {2017-04-26}, + journal = {Management Information Systems Quarterly}, + url = {http://aisel.aisnet.org/cgi/viewcontent.cgi?article=3193\&context=misq}, + author = {Johnson, Steven L. and Faraj, Samer and Kudaravalli, Srinivas}, + year = {2014}, + pages = {795--808} +} + +@article{boyd_critical_2012, + title = {Critical {{Questions For Big Data}}: {{Provocations}} for a Cultural, Technological, and Scholarly Phenomenon}, + volume = {15}, + issn = {1369-118X, 1468-4462}, + shorttitle = {{{CRITICAL QUESTIONS FOR BIG DATA}}}, + language = {en}, + number = {5}, + journal = {Information, Communication \& Society}, + doi = {10.1080/1369118X.2012.678878}, + author = {{boyd}, danah and Crawford, Kate}, + month = jun, + year = {2012}, + pages = {662-679} +} + +@article{shaw_pipeline_2018, + title = {The Pipeline of Online Participation Inequalities: The Case of {{Wikipedia}} Editing}, + volume = {68}, + issn = {0021-9916}, + shorttitle = {The {{Pipeline}} of {{Online Participation Inequalities}}}, + language = {en}, + number = {1}, + journal = {Journal of Communication}, + doi = {10.1093/joc/jqx003}, + author = {Shaw, Aaron and Hargittai, Eszter}, + month = feb, + year = {2018}, + pages = {143-168} +} + +@article{pal_exponentiated_2006, + title = {Exponentiated {{Weibull}} Distribution}, + volume = {66}, + copyright = {Copyright (c)}, + issn = {1973-2201}, + language = {en}, + number = {2}, + journal = {Statistica}, + doi = {10.6092/issn.1973-2201/493}, + author = {Pal, Manisha and Ali, M. Masoom and Woo, Jungsoo}, + year = {2006}, + pages = {139-147} +} + +@article{gupta_exponentiated_2001, + title = {Exponentiated {{Exponential Family}}: {{An Alternative}} to {{Gamma}} and {{Weibull Distributions}}}, + volume = {43}, + copyright = {\textcopyright{} 2001 WILEY-VCH Verlag Berlin GmbH, Fed. Rep. of Germany}, + issn = {1521-4036}, + shorttitle = {Exponentiated {{Exponential Family}}}, + language = {en}, + number = {1}, + journal = {Biometrical Journal}, + doi = {10.1002/1521-4036(200102)43:1<117::AID-BIMJ117>3.0.CO;2-R}, + author = {Gupta, Rameshwar D. and Kundu, Debasis}, + month = feb, + year = {2001}, + keywords = {Fisher Information matrix,Gamma distribution,Hazard rate ordering,Likelihood ratio ordering,Maximum Likelihood Estimator,Stochastic ordering,Weibull distribution}, + pages = {117-130} +} + +@inproceedings{liu_understanding_2010, + address = {{New York, NY, USA}}, + series = {{{SIGIR}} '10}, + title = {Understanding {{Web Browsing Behaviors Through Weibull Analysis}} of {{Dwell Time}}}, + isbn = {978-1-4503-0153-4}, + booktitle = {Proceedings of the 33rd {{International ACM SIGIR Conference}} on {{Research}} and {{Development}} in {{Information Retrieval}}}, + publisher = {{ACM}}, + doi = {10.1145/1835449.1835513}, + author = {Liu, Chao and White, Ryen W. and Dumais, Susan}, + year = {2010}, + keywords = {dwell time,user behaviors,web browsing,Weibull analysis}, + pages = {379--386} +} + +@article{mitzenmacher_brief_2004, + title = {A {{Brief History}} of {{Generative Models}} for {{Power Law}} and {{Lognormal Distributions}}}, + volume = {1}, + issn = {1542-7951}, + number = {2}, + journal = {Internet Mathematics}, + doi = {10.1080/15427951.2004.10129088}, + author = {Mitzenmacher, Michael}, + month = jan, + year = {2004}, + pages = {226-251} +} + +@inproceedings{miquel-ribe_cultural_2016, + address = {{New York, NY, USA}}, + series = {{{SMSociety}} '16}, + title = {Cultural {{Identities}} in {{Wikipedias}}}, + isbn = {978-1-4503-3938-4}, + booktitle = {Proceedings of the 7th 2016 {{International Conference}} on {{Social Media}} \& {{Society}}}, + publisher = {{ACM}}, + doi = {10.1145/2930971.2930996}, + author = {{Miquel-Rib{\'e}}, Marc and Laniado, David}, + year = {2016}, + keywords = {Wikipedia,Online Communities,Analytics \& Data Mining,Cross-cultural studies,Cultural Identity}, + pages = {24:1--24:10} +} + +@inproceedings{lehmann_reader_2014, + address = {{New York, NY, USA}}, + series = {{{HT}} '14}, + title = {Reader {{Preferences}} and {{Behavior}} on {{Wikipedia}}}, + isbn = {978-1-4503-2954-5}, + booktitle = {Proceedings of the 25th {{ACM Conference}} on {{Hypertext}} and {{Social Media}}}, + publisher = {{ACM}}, + doi = {10.1145/2631775.2631805}, + author = {Lehmann, Janette and {M{\"u}ller-Birn}, Claudia and Laniado, David and Lalmas, Mounia and Kaltenbrunner, Andreas}, + year = {2014}, + keywords = {article quality,reader,wikipedia,human factors,editor,engagement,measurement,reading behavior,reading interest}, + pages = {88--97} +} + +@article{baliamounelutz_analysis_2003, + title = {An Analysis of the Determinants and Effects of {{ICT}} Diffusion in Developing Countries}, + volume = {10}, + copyright = {Copyright \textcopyright{} 2003 IOS Press}, + issn = {1554-0170}, + language = {en}, + number = {3}, + journal = {Information Technology for Development}, + doi = {10.1002/itdj.1590100303}, + author = {Baliamoune-Lutz, Mina}, + month = jun, + year = {2003}, + pages = {151-169} +} + +@article{pearce_digital_2013, + title = {Digital {{Divides From Access}} to {{Activities}}: {{Comparing Mobile}} and {{Personal Computer Internet Users}}}, + volume = {63}, + copyright = {\textcopyright{} 2013 International Communication Association}, + issn = {1460-2466}, + shorttitle = {Digital {{Divides From Access}} to {{Activities}}}, + language = {en}, + number = {4}, + journal = {Journal of Communication}, + doi = {10.1111/jcom.12045}, + author = {Pearce, Katy E. and Rice, Ronald E.}, + month = aug, + year = {2013}, + pages = {721-744} +} + +@article{marler_mobile_2018, + title = {Mobile Phones and Inequality: {{Findings}}, Trends, and Future Directions}, + volume = {20}, + issn = {1461-4448}, + shorttitle = {Mobile Phones and Inequality}, + language = {en}, + number = {9}, + journal = {New Media \& Society}, + doi = {10.1177/1461444818765154}, + author = {Marler, Will}, + month = sep, + year = {2018}, + pages = {3498-3520} +} + +@article{asadi_motivating_2013, + title = {Motivating and Discouraging Factors for {{Wikipedians}}: The Case Study of {{Persian Wikipedia}}}, + volume = {62}, + issn = {0024-2535}, + shorttitle = {Motivating and Discouraging Factors for {{Wikipedians}}}, + number = {4/5}, + journal = {Library Review}, + doi = {10.1108/LR-10-2012-0114}, + author = {Asadi, Saeid and Ghafghazi, Shadi and R. Jamali, Hamid}, + month = jul, + year = {2013}, + pages = {237-252} +} + +@article{ojanpera_engagement_2017, + title = {Engagement in the {{Knowledge Economy}}: {{Regional Patterns}} of {{Content Creation}} with a {{Focus}} on {{Sub}}-{{Saharan Africa}}}, + volume = {13}, + issn = {1544-7529}, + shorttitle = {Engagement in the {{Knowledge Economy}}}, + language = {en}, + number = {0}, + urldate = {2018-10-22}, + journal = {Information Technologies \& International Development}, + url = {https://itidjournal.org/index.php/itid/article/view/1479}, + author = {Ojanper{\"a}, Sanna and Graham, Mark and Straumann, Ralph and Sabbata, Stefano De and Zook, Matthew}, + month = mar, + year = {2017}, + keywords = {digital divide,domain registrations,geographies of knowledge,GitHub,information geographies,international development}, + pages = {19} +} + +@article{he_the_tower_of_babel.jpg:_nodate, + title = {The\_{{Tower}}\_of\_{{Babel}}.Jpg: {{Diversity}} of {{Visual Encyclopedic Knowledge Across Wikipedia Language Editions}}}, + language = {en}, + author = {He, Shiqing and Lin, Allen Yilun and Adar, Eytan and Hecht, Brent}, + pages = {10} +} + +@inproceedings{halfaker_user_2015, + address = {{Republic and Canton of Geneva, Switzerland}}, + series = {{{WWW}} '15}, + title = {User {{Session Identification Based}} on {{Strong Regularities}} in {{Inter}}-Activity {{Time}}}, + isbn = {978-1-4503-3469-3}, + booktitle = {Proceedings of the 24th {{International Conference}} on {{World Wide Web}}}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + doi = {10.1145/2736277.2741117}, + author = {Halfaker, Aaron and Keyes, Os and Kluver, Daniel and {Thebault-Spieker}, Jacob and Nguyen, Tien and Shores, Kenneth and Uduwage, Anuradha and {Warncke-Wang}, Morten}, + year = {2015}, + keywords = {activity,modeling,metrics,analytics,human behavior,regularities,user session}, + pages = {410--418} +} + +@article{kocielnik_reciprocity_2018, + title = {Reciprocity and {{Donation}}: {{How Article Topic}}, {{Quality}} and {{Dwell Time Predict Banner Donation}} on {{Wikipedia}}}, + volume = {2}, + issn = {25730142}, + shorttitle = {Reciprocity and {{Donation}}}, + language = {en}, + number = {CSCW}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + doi = {10.1145/3274360}, + author = {Kocielnik, Rafal and Keyes, Os and Morgan, Jonathan T. and Taraborelli, Dario and McDonald, David W. and Hsieh, Gary}, + month = nov, + year = {2018}, + pages = {1-20} +} + +@article{kiesler_response_1986, + title = {Response {{Effects}} in the {{Electronic Survey}}}, + volume = {50}, + issn = {0033-362X}, + language = {en}, + number = {3}, + journal = {Public Opinion Quarterly}, + doi = {10.1086/268992}, + author = {Kiesler, Sara and Sproull, Lee S.}, + month = jan, + year = {1986}, + pages = {402-413} +} + +@article{phillips_effects_1972, + title = {Some {{Effects}} of "{{Social Desirability}}" in {{Survey Studies}}}, + volume = {77}, + issn = {0002-9602}, + number = {5}, + journal = {American Journal of Sociology}, + doi = {10.1086/225231}, + author = {Phillips, Derek L. and Clancy, Kevin J.}, + month = mar, + year = {1972}, + pages = {921-940} +} + +@article{clauset_power-law_2009, + title = {Power-{{Law Distributions}} in {{Empirical Data}}}, + volume = {51}, + issn = {0036-1445}, + number = {4}, + journal = {SIAM Review}, + doi = {10.1137/070710111}, + author = {Clauset, A. and Shalizi, C. and Newman, M.}, + month = nov, + year = {2009}, + pages = {661-703} +} + +@article{stumpf_critical_2012, + title = {Critical {{Truths About Power Laws}}}, + volume = {335}, + copyright = {Copyright \textcopyright{} 2012, American Association for the Advancement of Science}, + issn = {0036-8075, 1095-9203}, + language = {en}, + number = {6069}, + journal = {Science}, + doi = {10.1126/science.1216142}, + author = {Stumpf, Michael P. H. and Porter, Mason A.}, + month = feb, + year = {2012}, + pages = {665-666}, + pmid = {22323807} +} + +@article{pepinsky_visual_2018, + title = {Visual Heuristics for Marginal Effects Plots}, + volume = {5}, + issn = {2053-1680}, + language = {en}, + number = {1}, + journal = {Research \& Politics}, + doi = {10.1177/2053168018756668}, + author = {Pepinsky, Thomas B.}, + month = jan, + year = {2018}, + pages = {2053168018756668} +} + +@inproceedings{singer_why_2017, + archivePrefix = {arXiv}, + title = {Why {{We Read Wikipedia}}}, + language = {en}, + booktitle = {Proceedings of the 26th {{International Conference}} on {{World Wide Web}} - {{WWW}} '17}, + doi = {10.1145/3038912.3052716}, + author = {Singer, Philipp and Lemmerich, Florian and West, Robert and Zia, Leila and Wulczyn, Ellery and Strohmaier, Markus and Leskovec, Jure}, + year = {2017}, + keywords = {Computer Science - Digital Libraries,Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + pages = {1591-1600} +} + +@inproceedings{lemmerich_why_2019, + address = {{New York, NY, USA}}, + series = {{{WSDM}} '19}, + title = {Why the {{World Reads Wikipedia}}: {{Beyond English Speakers}}}, + isbn = {978-1-4503-5940-5}, + shorttitle = {Why the {{World Reads Wikipedia}}}, + booktitle = {Proceedings of the {{Twelfth ACM International Conference}} on {{Web Search}} and {{Data Mining}}}, + publisher = {{ACM}}, + doi = {10.1145/3289600.3291021}, + author = {Lemmerich, Florian and {S{\'a}ez-Trumper}, Diego and West, Robert and Zia, Leila}, + year = {2019}, + keywords = {motivation,survey,wikipedia,cross-cultural analysis,log analysis,multi-language}, + pages = {618--626} +} + +@inproceedings{paranjape_improving_2016, + address = {{New York, NY, USA}}, + series = {{{WSDM}} '16}, + title = {Improving {{Website Hyperlink Structure Using Server Logs}}}, + isbn = {978-1-4503-3716-8}, + booktitle = {Proceedings of the {{Ninth ACM International Conference}} on {{Web Search}} and {{Data Mining}}}, + publisher = {{ACM}}, + doi = {10.1145/2835776.2835832}, + author = {Paranjape, Ashwin and West, Robert and Zia, Leila and Leskovec, Jure}, + year = {2016}, + keywords = {log analysis,browsing,link prediction,navigation}, + pages = {615--624} +} + +@inproceedings{yi_beyond_2014, + address = {{New York, NY, USA}}, + series = {{{RecSys}} '14}, + title = {Beyond {{Clicks}}: {{Dwell Time}} for {{Personalization}}}, + isbn = {978-1-4503-2668-1}, + shorttitle = {Beyond {{Clicks}}}, + booktitle = {Proceedings of the 8th {{ACM Conference}} on {{Recommender Systems}}}, + publisher = {{ACM}}, + doi = {10.1145/2645710.2645724}, + author = {Yi, Xing and Hong, Liangjie and Zhong, Erheng and Liu, Nanthan Nan and Rajan, Suju}, + year = {2014}, + keywords = {collaborative filtering,dwell time,content recommendation,learning to rank,personalization}, + pages = {113--120} +} + +@inproceedings{balachandran_modeling_2014, + address = {{New York, NY, USA}}, + series = {{{MobiCom}} '14}, + title = {Modeling {{Web Quality}}-of-Experience on {{Cellular Networks}}}, + isbn = {978-1-4503-2783-1}, + booktitle = {Proceedings of the 20th {{Annual International Conference}} on {{Mobile Computing}} and {{Networking}}}, + publisher = {{ACM}}, + doi = {10.1145/2639108.2639137}, + author = {Balachandran, Athula and Aggarwal, Vaneet and Halepovic, Emir and Pang, Jeffrey and Seshan, Srinivasan and Venkataraman, Shobha and Yan, He}, + year = {2014}, + keywords = {performance,web browsing,cellular network,quality of experience (qoe)}, + pages = {213--224} +} + +@inproceedings{yin_silence_2013, + address = {{New York, NY, USA}}, + series = {{{KDD}} '13}, + title = {Silence Is {{Also Evidence}}: {{Interpreting Dwell Time}} for {{Recommendation}} from {{Psychological Perspective}}}, + isbn = {978-1-4503-2174-7}, + shorttitle = {Silence Is {{Also Evidence}}}, + booktitle = {Proceedings of the 19th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} and {{Data Mining}}}, + publisher = {{ACM}}, + doi = {10.1145/2487575.2487663}, + author = {Yin, Peifeng and Luo, Ping and Lee, Wang-Chien and Wang, Min}, + year = {2013}, + keywords = {recommendation,dwell time,psychological}, + pages = {989--997} +} + +@article{soler-adillon_wikipedia_2017, + title = {Wikipedia Access and Contribution: {{Language}} Choice in Multilingual Communities . {{A}} Case Study}, + volume = {0}, + copyright = {Copyright (c) 2017 Joan Soler-Adillon, Pere Freixa}, + issn = {2340-5236}, + shorttitle = {Wikipedia Access and Contribution}, + language = {en}, + number = {57}, + journal = {An{\`a}lisi}, + doi = {10.5565/rev/analisi.3109}, + author = {{Soler-Adillon}, Joan and Freixa, Pere}, + month = dec, + year = {2017}, + keywords = {literacy,Internet,internet,Wikipedia,Viquipèdia,alfabetització,alfabetización,competence,competencia,competència,cultura digital,digital culture,estudiantes,estudiants,informació,información,information,linguistic choice,multilingualism,multilingüisme,multilingüismo,opció lingüística,opción lingüística,students,universidad,universitat,university}, + pages = {63-80} +} + +@article{graham_warped_2008, + title = {Warped {{Geographies}} of {{Development}}: {{The Internet}} and {{Theories}} of {{Economic Development}}}, + volume = {2}, + copyright = {\textcopyright{} 2008 The Author. Journal Compilation \textcopyright{} 2008 Blackwell Publishing Ltd}, + issn = {1749-8198}, + shorttitle = {Warped {{Geographies}} of {{Development}}}, + language = {en}, + number = {3}, + journal = {Geography Compass}, + doi = {10.1111/j.1749-8198.2008.00093.x}, + author = {Graham, Mark}, + year = {2008}, + pages = {771-789} +} + +@article{graham_uneven_2014, + title = {Uneven {{Geographies}} of {{User}}-{{Generated Information}}: {{Patterns}} of {{Increasing Informational Poverty}}}, + volume = {104}, + issn = {0004-5608}, + shorttitle = {Uneven {{Geographies}} of {{User}}-{{Generated Information}}}, + number = {4}, + journal = {Annals of the Association of American Geographers}, + doi = {10.1080/00045608.2014.910087}, + author = {Graham, Mark and Hogan, Bernie and Straumann, Ralph K. and Medhat, Ahmed}, + month = jul, + year = {2014}, + keywords = {representation,Wikipedia,geographies of knowledge,geografía de la Internet,geografías del conocimiento,geoweb,Internet geography,representación,互联网地理,再现,地理网络,知识地理,维基百科}, + pages = {746-764} +} + +@article{graham_geography_2013, + title = {Geography and the Future of Big Data, Big Data and the Future of Geography}, + volume = {3}, + issn = {2043-8206}, + language = {en}, + number = {3}, + journal = {Dialogues in Human Geography}, + doi = {10.1177/2043820613513121}, + author = {Graham, Mark and Shelton, Taylor}, + month = nov, + year = {2013}, + pages = {255-261} +} + +@article{fiesler_participant_2018, + title = {``{{Participant}}'' {{Perceptions}} of {{Twitter Research Ethics}}}, + volume = {4}, + issn = {2056-3051}, + language = {en}, + number = {1}, + journal = {Social Media + Society}, + doi = {10.1177/2056305118763366}, + author = {Fiesler, Casey and Proferes, Nicholas}, + month = jan, + year = {2018}, + pages = {2056305118763366} +} + +@article{napoli_emerging_2014, + title = {The {{Emerging Mobile Internet Underclass}}: {{A Critique}} of {{Mobile Internet Access}}}, + volume = {30}, + issn = {0197-2243}, + shorttitle = {The {{Emerging Mobile Internet Underclass}}}, + number = {5}, + journal = {The Information Society}, + doi = {10.1080/01972243.2014.944726}, + author = {Napoli, Philip M. and Obar, Jonathan A.}, + month = oct, + year = {2014}, + keywords = {Internet,digital divide,access,mobile Internet,smartphones}, + pages = {323-334} +} + +@article{scheerder_determinants_2017, + title = {Determinants of {{Internet}} Skills, Uses and Outcomes. {{A}} Systematic Review of the Second- and Third-Level Digital Divide}, + volume = {34}, + issn = {0736-5853}, + number = {8}, + journal = {Telematics and Informatics}, + doi = {10.1016/j.tele.2017.07.007}, + author = {Scheerder, Anique and {van Deursen}, Alexander and {van Dijk}, Jan}, + month = dec, + year = {2017}, + keywords = {Internet skills,Digital divide,Internet outcomes,Internet use,Systematic literature review}, + pages = {1607-1624} +} + +@article{buchi_modeling_2016, + title = {Modeling the Second-Level Digital Divide: {{A}} Five-Country Study of Social Differences in {{Internet}} Use}, + volume = {18}, + issn = {1461-4448}, + shorttitle = {Modeling the Second-Level Digital Divide}, + language = {en}, + number = {11}, + journal = {New Media \& Society}, + doi = {10.1177/1461444815604154}, + author = {B{\"u}chi, Moritz and Just, Natascha and Latzer, Michael}, + month = dec, + year = {2016}, + pages = {2703-2722} +} + +@article{deursen_compoundness_2017, + title = {The Compoundness and Sequentiality of Digital Inequality}, + volume = {11}, + copyright = {cc\_by\_nc\_nd}, + issn = {1932-8036}, + language = {en}, + journal = {International Journal of Communication}, + author = {Deursen, Alexander J. A. M. Van and Helsper, Ellen and Eynon, Rebecca and {van Dijk}, Jan A. G. M.}, + month = jan, + year = {2017}, + pages = {452-473} +} + +@article{deursen_toward_2015, + title = {Toward a {{Multifaceted Model}} of {{Internet Access}} for {{Understanding Digital Divides}}: {{An Empirical Investigation}}}, + volume = {31}, + issn = {0197-2243}, + shorttitle = {Toward a {{Multifaceted Model}} of {{Internet Access}} for {{Understanding Digital Divides}}}, + number = {5}, + journal = {The Information Society}, + doi = {10.1080/01972243.2015.1069770}, + author = {van Deursen, Alexander J. A. M. and van Dijk, Jan A. G. M.}, + month = oct, + year = {2015}, + keywords = {digital divide,skills,motivation,Internet access,material access,usage}, + pages = {379-391} +} + +@article{donner_exploring_2011, + title = {Exploring {{Mobile}}-Only {{Internet Use}}: {{Results}} of a {{Training Study}} in {{Urban South Africa}}}, + volume = {5}, + copyright = {The International Journal of Communication is an academic journal. As such, it is dedicated to the open exchange of information. For this reason, IJoC is freely available to individuals and institutions. Copies of this journal or articles in this journal may be distributed for research or educational purposes free of charge and without permission. However, commercial use of the IJoC website or the articles contained herein is expressly prohibited without the written consent of the editor. Authors who publish in The International Journal of Communication will release their articles under the Creative Commons Attribution Non-Commercial No Derivatives (by-nc-nd) license . This license allows anyone to copy and distribute the article for non-commercial purposes provided that appropriate attribution is given. For details of the rights authors grants users of their work, see the "human-readable summary" of the license , with a link to the full license. (Note that "you" refers to a user, not an author, in the summary.) This journal utilizes the LOCKSS system to create a distributed archiving system among participating libraries and permits those libraries to create permanent archives of the journal for purposes of preservation and restoration. The publisher perpetually authorizes participants in the LOCKSS system to archive and restore our publication through the LOCKSS System for the benefit of all LOCKSS System participants. Specifically participating libraries may: Collect and preserve currently accessible materials; Use material consistent with original license terms; Provide copies to other LOCKSS appliances for purposes of audit and repair. ~ Fair Use The U.S. Copyright Act of 1976 specifies, in Section 107, the terms of the Fair Use exception: Notwithstanding the provisions of sections 106 and 106A, the fair use of a copyrighted work, including such use by reproduction in copies or phonorecords or by any other means specified by that section, for purposes such as criticism, comment, news reporting, teaching (including multiple copies for classroom use), scholarship, or research, is not an infringement of copyright. In determining whether the use made of a work in any particular case is a fair use the factors to be considered shall include: the purpose and character of the use, including whether such use is of a commercial nature or is for nonprofit educational purposes; the nature of the copyrighted work; the amount and substantiality of the portion used in relation to the copyrighted work as a whole; \& the effect of the use upon the potential market for or value of the copyrighted work. The fact that a work is unpublished shall not itself bar a finding of fair use if such finding is made upon consideration of all the above factors. In accord with these provisions, the International Journal of Communication believes in the vigorous assertion and defense of Fair Use by scholars engaged in academic research, teaching and non-commercial publishing. Thus, we view the inclusion of ``quotations'' from existing print, visual, audio and audio-visual texts to be appropriate examples of Fair Use, as are reproductions of visual images for the purpose of scholarly analysis. We encourage authors to obtain appropriate permissions to use materials originally produced by others, but do not require such permissions as long as the usage of such materials falls within the boundaries of Fair Use. The International Journal of Communication encourages authors to employ fair use in their scholarly publishing wherever appropriate. Fair use is the right to use unlicensed copyrighted material (whether it is text, images, audio-visual, or other) in your own work, in some circumstances. We consult the Code of Best Practices in Fair Use for Scholarly Research in Communication , created by the International Communication Association and endorsed by the National Communication Association, and you should too. If you have any questions about whether fair use applies to your uses of copyrighted material (whether it is text, images, audio-visual, or other) in your scholarship, simply include your rationale, grounded in the Best Practices, as a supplementary document with your submission.}, + issn = {1932-8036}, + shorttitle = {Exploring {{Mobile}}-Only {{Internet Use}}}, + language = {en}, + number = {0}, + urldate = {2019-03-27}, + journal = {International Journal of Communication}, + url = {https://ijoc.org/index.php/ijoc/article/view/750}, + author = {Donner, Jonathan and Gitau, Shikoh and Marsden, Gary}, + month = apr, + year = {2011}, + pages = {24} +} + +@article{hargittai_second-level_2002, + title = {Second-{{Level Digital Divide}}: {{Differences}} in {{People}}'s {{Online Skills}}}, + volume = {7}, + copyright = {Copyright (c)}, + issn = {13960466}, + shorttitle = {Second-{{Level Digital Divide}}}, + language = {en-US}, + number = {4}, + journal = {First Monday}, + doi = {10.5210/fm.v7i4.942}, + author = {Hargittai, Eszter}, + month = apr, + year = {2002} +} + +@book{stinchcombe_constructing_1987, + address = {{Chicago}}, + title = {Constructing Social Theories}, + isbn = {978-0-226-77484-8}, + language = {English}, + publisher = {{University of Chicago Press}}, + author = {Stinchcombe, Arthur L}, + year = {1987}, + note = {OCLC: 970416061} +} + +@inproceedings{kim_modeling_2014, + address = {{New York, NY, USA}}, + series = {{{WSDM}} '14}, + title = {Modeling {{Dwell Time}} to {{Predict Click}}-Level {{Satisfaction}}}, + isbn = {978-1-4503-2351-2}, + booktitle = {Proceedings of the 7th {{ACM International Conference}} on {{Web Search}} and {{Data Mining}}}, + publisher = {{ACM}}, + doi = {10.1145/2556195.2556220}, + author = {Kim, Youngho and Hassan, Ahmed and White, Ryen W. and Zitouni, Imed}, + year = {2014}, + keywords = {user behavior,click satisfaction.,dwell time analysis}, + pages = {193--202} +} + +@inproceedings{jansen_analysis_2003, + address = {{Las Vegas, Nevada}}, + title = {An {{Analysis}} of {{Web Documents Retrieved}} and {{Viewed}}}, + language = {en}, + booktitle = {International {{Conference}} on {{Internet Computing}}}, + publisher = {{CSREA Press}}, + author = {Jansen, Bernard J and Spink, Amanda}, + year = {2003}, + pages = {65-69} +} + +@misc{davies_mediums_2013, + title = {Medium's Metric That Matters: {{Total Time Reading}}}, + shorttitle = {Medium's Metric That Matters}, + urldate = {2019-03-30}, + journal = {Data Lab}, + url = {https://medium.com/data-lab/mediums-metric-that-matters-total-time-reading-86c4970837d5}, + author = {Davies, Pete}, + month = nov, + year = {2013} +} + +@article{okoli_wikipedia_2014, + title = {Wikipedia in the Eyes of Its Beholders: {{A}} Systematic Review of Scholarly Research on {{Wikipedia}} Readers and Readership}, + volume = {65}, + copyright = {\textcopyright{} 2014 ASIS\&T}, + issn = {2330-1643}, + shorttitle = {Wikipedia in the Eyes of Its Beholders}, + language = {en}, + number = {12}, + journal = {Journal of the Association for Information Science and Technology}, + doi = {10.1002/asi.23162}, + author = {Okoli, Chitu and Mehdi, Mohamad and Mesgari, Mostafa and Nielsen, Finn {\AA}rup and Lanam{\"a}ki, Arto}, + year = {2014}, + keywords = {Internet,knowledge,reading}, + pages = {2381-2403} +} + +@inproceedings{priedhorsky_measuring_2017, + address = {{New York, NY, USA}}, + series = {{{CSCW}} '17}, + title = {Measuring {{Global Disease}} with {{Wikipedia}}: {{Success}}, {{Failure}}, and a {{Research Agenda}}}, + isbn = {978-1-4503-4335-0}, + shorttitle = {Measuring {{Global Disease}} with {{Wikipedia}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + publisher = {{ACM}}, + doi = {10.1145/2998181.2998183}, + author = {Priedhorsky, Reid and Osthus, Dave and Daughton, Ashlynn R. and Moran, Kelly R. and Generous, Nicholas and Fairchild, Geoffrey and Deshpande, Alina and Del Valle, Sara Y.}, + year = {2017}, + keywords = {disease,epidemiology,forecasting,modeling,wikipedia}, + pages = {1812--1834} +} + +@inproceedings{gorbatai_exploring_2011, + address = {{New York, NY, USA}}, + series = {{{WikiSym}} '11}, + title = {Exploring {{Underproduction}} in {{Wikipedia}}}, + isbn = {978-1-4503-0909-7}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + publisher = {{ACM}}, + doi = {10.1145/2038558.2038595}, + author = {Gorbat{\^a}i, Andreea D.}, + year = {2011}, + keywords = {collective production,social goods,underproduction}, + pages = {205--206} +} + +@article{bell_extensive_2001, + title = {Extensive {{Reading}}: {{Speed}} and {{Comprehension}}}, + volume = {1}, + issn = {1533-242X}, + shorttitle = {Extensive {{Reading}}}, + language = {en}, + number = {1}, + journal = {Reading Matrix: An International Online Journal}, + author = {Bell, Timothy I.}, + year = {2001/00/00}, + keywords = {Comparative Analysis,Foreign Countries,Reading Comprehension,Reading Instruction,Reading Rate,Reading Tests,Scores,Teaching Methods} +} + +@article{bochkarev_average_2012, + archivePrefix = {arXiv}, + primaryClass = {cs}, + title = {Average Word Length Dynamics as Indicator of Cultural Changes in Society}, + urldate = {2019-04-01}, + journal = {arXiv:1208.6109 [cs]}, + url = {http://arxiv.org/abs/1208.6109}, + author = {Bochkarev, Vladimir V. and Shevlyakova, Anna V. and Solovyev, Valery D.}, + month = aug, + year = {2012}, + keywords = {Computer Science - Computation and Language,91F20,J.5} +} + +@article{scheerder_determinants_2017-1, + title = {Determinants of {{Internet}} Skills, Uses and Outcomes. {{A}} Systematic Review of the Second- and Third-Level Digital Divide}, + volume = {34}, + issn = {0736-5853}, + number = {8}, + journal = {Telematics and Informatics}, + doi = {10.1016/j.tele.2017.07.007}, + author = {Scheerder, Anique and {van Deursen}, Alexander and {van Dijk}, Jan}, + month = dec, + year = {2017}, + keywords = {Internet skills,Digital divide,Internet outcomes,Internet use,Systematic literature review}, + pages = {1607-1624} +} + +@article{willems_equity_2012, + title = {Equity Considerations for Open Educational Resources in the Glocalization of Education}, + volume = {33}, + issn = {0158-7919}, + number = {2}, + journal = {Distance Education}, + doi = {10.1080/01587919.2012.692051}, + author = {Willems, Julie and Bossu, Carina}, + month = aug, + year = {2012}, + keywords = {equity,education,e-inclusion,open educational resources}, + pages = {185-199} +} + +@article{reagle_gender_2011, + title = {Gender {{Bias}} in {{Wikipedia}} and {{Britannica}}}, + volume = {5}, + copyright = {The International Journal of Communication is an academic journal. As such, it is dedicated to the open exchange of information. For this reason, IJoC is freely available to individuals and institutions. Copies of this journal or articles in this journal may be distributed for research or educational purposes free of charge and without permission. However, commercial use of the IJoC website or the articles contained herein is expressly prohibited without the written consent of the editor. Authors who publish in The International Journal of Communication will release their articles under the Creative Commons Attribution Non-Commercial No Derivatives (by-nc-nd) license . This license allows anyone to copy and distribute the article for non-commercial purposes provided that appropriate attribution is given. For details of the rights authors grants users of their work, see the "human-readable summary" of the license , with a link to the full license. (Note that "you" refers to a user, not an author, in the summary.) This journal utilizes the LOCKSS system to create a distributed archiving system among participating libraries and permits those libraries to create permanent archives of the journal for purposes of preservation and restoration. The publisher perpetually authorizes participants in the LOCKSS system to archive and restore our publication through the LOCKSS System for the benefit of all LOCKSS System participants. Specifically participating libraries may: Collect and preserve currently accessible materials; Use material consistent with original license terms; Provide copies to other LOCKSS appliances for purposes of audit and repair. ~ Fair Use The U.S. Copyright Act of 1976 specifies, in Section 107, the terms of the Fair Use exception: Notwithstanding the provisions of sections 106 and 106A, the fair use of a copyrighted work, including such use by reproduction in copies or phonorecords or by any other means specified by that section, for purposes such as criticism, comment, news reporting, teaching (including multiple copies for classroom use), scholarship, or research, is not an infringement of copyright. In determining whether the use made of a work in any particular case is a fair use the factors to be considered shall include: the purpose and character of the use, including whether such use is of a commercial nature or is for nonprofit educational purposes; the nature of the copyrighted work; the amount and substantiality of the portion used in relation to the copyrighted work as a whole; \& the effect of the use upon the potential market for or value of the copyrighted work. The fact that a work is unpublished shall not itself bar a finding of fair use if such finding is made upon consideration of all the above factors. In accord with these provisions, the International Journal of Communication believes in the vigorous assertion and defense of Fair Use by scholars engaged in academic research, teaching and non-commercial publishing. Thus, we view the inclusion of ``quotations'' from existing print, visual, audio and audio-visual texts to be appropriate examples of Fair Use, as are reproductions of visual images for the purpose of scholarly analysis. We encourage authors to obtain appropriate permissions to use materials originally produced by others, but do not require such permissions as long as the usage of such materials falls within the boundaries of Fair Use. The International Journal of Communication encourages authors to employ fair use in their scholarly publishing wherever appropriate. Fair use is the right to use unlicensed copyrighted material (whether it is text, images, audio-visual, or other) in your own work, in some circumstances. We consult the Code of Best Practices in Fair Use for Scholarly Research in Communication , created by the International Communication Association and endorsed by the National Communication Association, and you should too. If you have any questions about whether fair use applies to your uses of copyrighted material (whether it is text, images, audio-visual, or other) in your scholarship, simply include your rationale, grounded in the Best Practices, as a supplementary document with your submission.}, + issn = {1932-8036}, + language = {en}, + number = {0}, + urldate = {2019-06-24}, + journal = {International Journal of Communication}, + url = {https://ijoc.org/index.php/ijoc/article/view/777}, + author = {Reagle, Joseph and Rhue, Lauren}, + month = aug, + year = {2011}, + pages = {21} +} + + diff --git a/dissertations/nathante_uw_2021/UWPhDThesis_Template_2013_Updated_11.29.2015.docx b/dissertations/nathante_uw_2021/UWPhDThesis_Template_2013_Updated_11.29.2015.docx new file mode 100644 index 0000000..2ac0236 Binary files /dev/null and b/dissertations/nathante_uw_2021/UWPhDThesis_Template_2013_Updated_11.29.2015.docx differ diff --git a/dissertations/nathante_uw_2021/abstract.pdf b/dissertations/nathante_uw_2021/abstract.pdf new file mode 100644 index 0000000..9dacaaa Binary files /dev/null and b/dissertations/nathante_uw_2021/abstract.pdf differ diff --git a/dissertations/nathante_uw_2021/appendix_A_articlequality.tex b/dissertations/nathante_uw_2021/appendix_A_articlequality.tex new file mode 100644 index 0000000..32f84dc --- /dev/null +++ b/dissertations/nathante_uw_2021/appendix_A_articlequality.tex @@ -0,0 +1,312 @@ + +% \baselineskip 24ptn + +%% +%% The "title" command has an optional parameter, +%% allowing the author to define a "short title" to be used in page headers. +%% Sneha suggests changing the title suggests it should make reference to ORES. +%% Abstract 150 words +\chapterprecishere{ +% Most explanations of changes in online group size focus on internal factors like social structures or design decisions. +% do not make the , and render critical questions like “which other groups are a given group's strongest competitors or mutualists?” unanswerable. +Organizing complex peer production projects and advancing scientific knowledge of open collaboration each depend on the ability to measure quality. Article quality ratings on English language Wikipedia have been widely used by both Wikipedia community members and academic researchers for purposes like tracking knowledge gaps and studying how political polarization shapes collaboration. Even so, measuring quality presents many methodological challenges. The most widely used systems use labels on discrete ordinal scales when assessing quality, but such labels can be inconvenient for statistics and machine learning. Prior work handles this by assuming that different levels of quality are ``evenly spaced'' from one another. This assumption runs counter to intuitions about the relative degrees of effort needed to raise Wikipedia encyclopedia articles to different quality levels. Furthermore, models from prior work are fit to datasets that oversample high-quality articles. This limits their accuracy for representative samples of articles or revisions. I describe a technique extending the Wikimedia Foundations' ORES article quality model to address these limitations. My method uses weighted ordinal regression models to construct one-dimensional continuous measures of quality. While scores from my technique and from prior approaches are correlated, my approach improves accuracy for research datasets and provides evidence that the ``evenly spaced'' assumption is unfounded in practice on English Wikipedia. I conclude with recommendations for using quality scores in future research and include the full code, data, and models. +} + +\section{Introduction} \label{sec:introduction} +% LATEX NOTE: This alphabet below is here so we can measure the line-length of +% different layouts. Typesetters suggest that an average line-length of +% between 45-90 characters and a rule of thumb for typesetting is that you +% should be able to fit between 2-3 alphabets on one line. Generally speaking, +% the shorter the line length, the better -- and the smaller the linespacing +% can become. The following line is 3 alphabets (73 characters). + +% Kaylea suggests adding "support learning" to the motivation in reference to how wikiedu uses the ORES quality measures. +% This first paragraph is very Wikipedia-centric. +Measuring content quality in peer production projects like Wikipedia is important so projects can learn about themselves and track progress. Measuring quality also helps build confidence that information is accurate and supports monitoring how well an encyclopedia includes diverse subject areas to identify gaps needing attention \citep{redi_taxonomy_2021}. Measuring quality enables tracking and evaluating the progress of subprojects and initiatives organized to fill the gaps \citep{halfaker_interpolating_2017, warncke-wang_success_2015}. Raising an article to a high standard of quality is a recognized achievement among contributors, so assessing quality can help motivate contributions \citep{ayers_how_2008,forte_why_2005}. In these ways, measuring quality can be of key importance to advancing the priorities of the Wikimedia movement and is also important to other kinds of open collaboration \citep{champion_underproduction_2021}. + +Measuring quality also presents methodological and ontological challenges. How can ``quality'' be conceptualized so that measurement of the goals of a project and the value it produces can be precise and accurate? +Language editions of Wikipedia, including English, peer produce quality labels that have been useful both for motivating and coordinating project work and for enabling research. +Epistemic virtues of this approach stem from the community-constructed criteria for assessment and from formalized procedures for third-party evaluation organized by WikiProjects. These systems also have two important limitations: (1) ratings are likely to lag behind changes in article quality, and (2) quality is assessed on a discrete ordinal scale, which violates typical assumptions in statistical analysis. Both limitations are surmountable. + +The machine learning framework introduced by \citet{warncke-wang_tell_2013}, further developed by \citet{halfaker_interpolating_2017}, implemented by the Objective Revision Evaluation Service\footnote{\url{https://www.mediawiki.org/wiki/ORES} (\url{https://perma.cc/TH6L-KFT6})} (ORES) article quality models and adopted by several research studies of Wikipedia article quality \citep[e.g.][]{halfaker_ores_2020, kocielnik_reciprocity_2018, shi_wisdom_2019, warncke-wang_success_2015} was designed to address the first limitation by using article assessments at the time they were made as ``ground truth.'' Article quality might drift in the periods between assessments, but it seems safe to assume that new quality assessments are accurate at the time they are made. A model trained on recent assessments can predict what quality label an article would receive if assessed in its current state. + +%In this paper, I build on these models to address the second limitation by developing a one-dimensional measurement of article quality that does not assume that the quality levels are evenly spaced. + +This paper introduces a method for constructing interpretable one-dimensional measures of article quality from Wikipedia quality assessments and the ORES article quality model. The method improves upon prior approaches in two important ways. First, by using inverse probability weighting to calibrate the model, it is more accurate for typical research applications, and second, it does not depend on the assumption that quality levels are ``evenly spaced,'' which threatens the validity of prior research \citep{halfaker_interpolating_2017, arazy_evolutionary_2019}. In addition, this paper helps us understand the validity of previous work by analyzing the performance of the ORES quality model and testing the ``evenly spaced'' assumption. + +In §\ref{sec:background}, I provide a brief overview of quality measurement in peer production research, in which I foreground the importance of the assumptions needed to use machine learning predictions in downstream analysis---particularly the ``evenly spaced'' assumption used by \citet{halfaker_interpolating_2017} to justify the use of a handpicked weighted sum to combine article class probabilities. Next, in §\ref{sec:methods}, I describe how to build accurate ordinal quality models that are appropriately calibrated for analyses of representative samples of Wikipedia articles or revisions. I also briefly explain how ordinal regression provides an interpretable one-dimensional measure of quality and how it relaxes the ``evenly spaced'' assumption. Finally, in §\ref{sec:results} I present the results of my analysis to (1) show how the precision of the measurement depends on proper calibration and (2) demonstrate that the ``evenly spaced'' assumption is violated. Despite this, I find that scores from the ordinal models are highly correlated with those from prior work so the ``evenly spaced'' assumption may be acceptable in some applications. I conclude in §\ref{sec:discussion} with recommendations for measuring article quality in future research. + +\section{Background} +\label{sec:background} + + +% first point: measuring quality can help peer production projects +% second point: measuring quality can help science + +% Mako thinks this is cute and it's fine to keep it but the bit about freezing mercury in the discussion takes it a bit far. +Measurement is important to science as available knowledge often constrains the development of improved tools for advancing knowledge. For example, in the book \textit{Inventing Temperature}, Hasok \citeauthor{chang_inventing_2004} \citep{chang_inventing_2004}, the philosopher and historian of science, documents how extending theories of heat beyond the range of human sense perception required scientists to develop new types of thermometers. This in turn required better knowledge of heat and of thermometric materials such as the freezing point of mercury. Part of the challenge of scientific advancement is that measurement devices developed under certain conditions may give unexpected results outside of the range in which they are calibrated: a thermometer will give impossibly low temperature readings when its mercury unexpectedly freezes. Today, machine learning models are used to extend the range of quality measurements in peer production research, but state of the art machine learning can be quite sensitive to the nuances of how their training data are selected \citep{recht_imagenet_2019}. +% This project introduces a new measurement device for measuring article quality and provides assurance that the measurement is reasonably accurate over the range of a given dataset. + +\subsection{Measuring Quality in Peer Production} + + +As described in §\ref{sec:introduction}, measuring quality has been of great importance to peer production projects like Wikipedia and in the construction of knowledge about how such projects work. The foundation of article quality measurement in Wikipedia has been the peer production of article quality assessment organized by WikiProjects who develop criteria for articles in their domain \citep{phoebe_ayers_how_2008}. This enables quality assessment to be consistent across different subject areas, but the procedures for assessing quality are tailored to the values of each WikiProject. Yet, like human sense perception of temperature, these quality assessments are limited in that they require human time and attention. In addition, humans' limited ability to discriminate between levels on a scale limits the sensitivity of quality assessments. Articles are assessed irregularly and infrequently at the discretion of volunteer editors. Therefore, for most article revisions, it is not known what quality class the article would be assigned if it were newly assessed. + +% This paragraph is a bit lit reviewy and nonessential to the argument. Cut or reowrk. +Researchers have proposed many ideas to extend the range of quality measurement beyond the direct perception of Wikipedians, such as page length \citep{blumenstock_size_2008}, persistent word revisions \citep{adler_content-driven_2007, biancani_measuring_2014}, collaboration network structures \citep{raman_classifying_2020}, and template-based flaw detection \citep{anderka_predicting_2012}. Carefully constructed indexes benchmarked against English language Wikipedia quality assessments might allow quality measurement of articles that have not been assessed or in projects that have underproduced article assessments \citep{lewoniewski_relative_2017}. However, such indexes may lack emic validity if they fail to capture important aspects of quality or if notions of quality vary between linguistic communities and might even shape the editing activity in unexpected ways that could ultimately defeat their purpose \citep{goodhart_problems_1984,strathern_improving_1997}. Peer-produced quality labels depend on the limited capacity of volunteer communities to coordinate quality assessment, but also provide impressive validity for evaluating projects on their own terms. + +\subsection{Article Quality Models Extend Measurement to Unassessed Articles} + +Perhaps the most successful approaches to extending the range of quality measurements use machine learning models trained on available article quality assessments to predict the quality of revisions that have not been assessed. The ORES article quality model (henceforth ORES) implements this approach, but other similar article quality predictors have been developed \citep{anderka_breakdown_2012,dang_quality_2016,zhang_history-based_2018,druck_learning_2008,sarkar_stre_2019,raman_classifying_2020}, and additional features including those based on language models can substantially improve classification performance compared to ORES \citep{schmidt_article_2019}. The ORES model is a tree-based classifier that predicts the quality class of a Wikipedia article at the time it is assessed.\footnote{The system uses cross-validation to select among candidates that include random-forest and boosted decision tree models.} These tree-based models are reasonable for practical purposes with the reported ability to predict within one level of the true quality class with 90\% accuracy (although in §\ref{sec:accuracy} I find a decline in accuracy in a more recent dataset). Yet, since these models do not account for the ordering of quality labels, the use of these predictions in downstream analysis introduces complicated methodological challenges. + + +The ORES classifiers are fit using \texttt{scikit-learn}\footnote{\url{https://scikit-learn.org/stable/}(\url{https://perma.cc/5Y8B-W8T5})} through minimization of the multinomial deviance as shown \citep{pedregosa_scikit-learn_2011,hastie_elements_2018}: +% = -\sum_{k=1}^{K}I(y=\mathcal{G}_k)f_k(x) + log(sum_{l=1}^K(e^{f_l(x)})) +\begin{equation} + L(y_i,p(x_i)) = -\sum_{k=1}^K{I(y_i=\mathcal{G}_{i,k})\mathrm{log}~p_k(x_i)} +\label{eq:multinomial.loglik} +\end{equation} + +\noindent For each article $i$ with predictors $x_i$ that has been labeled with a quality class $y_i$, the ORES model outputs an estimated probability $p_k(x_i)$ that the article belongs to each quality class $k \in \{\mathrm{\textit{stub}}, \mathrm{\textit{start}}, \mathrm{\textit{C-class}}, \mathrm{\textit{B-class}}, \mathrm{\textit{Good article (GA)}}, \mathrm{\textit{Featured article (FA)}}\}$. The predicted probabilities $p(x_i)$ sum to one so the ORES model outputs a unit vector for each article. If $\mathcal{G}_{i,k}$, the most probable quality class (MPQC) according to the model, is the true label, then $I(y_i=\mathcal{G}_{i,k})$ equals $1$ ($I$ is the indicator function) and the log predicted probability $p_k(x_i)$ of the correct class is subtracted from the loss $L(y_i,p(x_i))$. Note that this model does not use the fact that article quality classes are ordered. If it did, then it would have to penalize an incorrect classification of a \textit{Good article} as \textit{C-class} more than a classification of a \textit{Good article} as \textit{B-class}. In this model, different quality classes have no intrinsic rank or ordering and thus are akin to different categories of article subjects like animals, vegetables, or minerals. + +The MPQC is perhaps the most natural way to use the ORES output to measure quality. It has been used in several studies including to provide evidence that politically polarized collaboration on Wikipedia leads to high quality articles \citep{shi_wisdom_2019} and to understand the relationship between article quality and donation \citep{kocielnik_reciprocity_2018}. However, the MPQC is limited in that it does not measure quality differences between articles that have the same MPQC. Consider two hypothetical articles; the first has the multinomial prediction $(0.1,0.3,0.4,0.075,0.075,0)$ and the second has the prediction $(0.075,0.075,0.4,0.3,0.1,0)$. The MPQC will assign both the \textit{C-class} label even though the first article has an even chance at being a \textit{Stub} or \textit{Start-class} while the second article has an even chance at being a \textit{B-class} or even a \textit{Good article}. At best, the MPQC has limited sensitivity to subtle variations or gradual changes in quality \citep{halfaker_interpolating_2017}. + +\subsection{Combining Scores for Granular Measurement} + +To further extend the range of article quality measurement within article quality classes, \citet{halfaker_interpolating_2017} constructed a numerical quality score using a linear combination (a weighted sum) of the elements of the multinomial prediction $p(x_i)$. This is advantageous from a statistical perspective as it naturally provides a continuous measure of quality which can typically justify a normal or log-normal statistical model. It can also support higher-order aggregations for measuring the quality of a set of articles \citep{halfaker_interpolating_2017}. \citeauthor{halfaker_interpolating_2017} handpicks the coefficients $[0,1,2,3,4,5]$ to make a linear combination of the predictions under the assumption ``that the ordinal quality scale developed by Wikipedia editors is roughly cardinal and evenly spaced,'' which I refer to the ``evenly spaced'' assumption. It essentially says that a \textit{Start-class} article has one more unit quality of a \textit{Stub-class} article, and that a \textit{C-class} article has one more unit of quality than a \textit{Start-class} article and so on. This approach is being adopted by other researchers including \citet{arazy_evolutionary_2019}. + +The considerable degree of effort and expertise required to raise articles to higher levels of quality raises doubt in the assumption \citep{jemielniak_common_2014}. Higher quality levels correspond to increasing completeness, encyclopedic character, usefulness to wider audiences, incorporation of multimedia, polished citations, and adherence to Wikipedia's policies. The English language Wikipedia editing guideline on content assessment\footnote{\url{https://en.wikipedia.org/w/index.php?title=Wikipedia:Content_assessment&oldid=1023695750} (\url{https://perma.cc/2JUV-6SD})} defines a \textit{Good article} as ``useful to nearly all readers, with no obvious problems'' and a \textit{Featured article} article as ``professional, outstanding and thorough.'' According to Wikipedians, it can take ``three to six months of full time work'' to write a \emph{Featured article}.\footnote{Public statement by Stuart Yeates, an expert Wikipedian; quoted with permission. \url{https://lists.wikimedia.org/hyperkitty/list/wiki-research-l@lists.wikimedia.org/message/7U35LHAXRWEPABN75DOTPOIEA2VYCTQQ/} (\url{https://perma.cc/9V4P-WRXR})} Are we to assume that the difference in quality between a \textit{Good article} and a \textit{Featured article} is measurably the same as that between a \textit{Stub} defined as as ``little more than a dictionary definition'' and a \textit{Start-class} that is ``a very basic description of the topic?'' How could we even answer this question? +%This paper provides a methodology to answer it, but the answer depends on how quality is measured. + +If the ``evenly spaced'' assumption is reasonable, then \citeauthor{halfaker_interpolating_2017}'s weighted sum approach is too. But if increasing Wikipedia article classes do not represent roughly equal improvements in quality, this may threaten the accuracy of analysis dependent on the assumption. Suppose that a \textit{B-class} has not 1, but 2 units of quality greater than a \textit{C-class} article, then \citeauthor{halfaker_interpolating_2017} could have underestimated the improvement in the knowledge gap of women scientists, which was considerably driven by improvement in \textit{B-class} articles. In the next section, I provide a straightforward extension of the ORES article quality model based on ordinal regression can both relax the ``evenly spaced'' assumption and provide a better calibrated and more accurate one-dimensional measure of quality. + +%I now describe my implementation of the approach. I will then evaluate my model in terms of predictive accuracy, the spacing of quality levels, and comparison with prior approaches. + + +\section{Data, Methods and Measures} +\label{sec:methods} + +%\citeauthor{halfaker_interpolating_2017} \cite{halfaker_interpolating_2017} constructed a one-dimensional measure of article quality using handpicked linear combination of the ORES category predictions assuming that quality levels are evenly spaced. I choose the linear combination + +I use Bayesian ordinal regression models that use the ORES predicted probabilities to predict the quality class labels and quantify the distance between quality classes. I now provide a brief overview of ordinal regression as needed to explain my approach to measuring quality. Understanding ordinal regression depends on background knowledge of odds and generalized linear models. I recommend \citet{mcelreath_statistical_2018} for reference. + +\subsection{Bayesian Ordinal Regression} + +Ordinal regression predicts quality class membership using a single linear model for all classes and identifies boundaries between classes using the log cumulative odds link function shown below in Eq. \ref{eq:ordinal.regression}. The log cumulative odds is not the only possible choice of link function, but it is the most common, is the easiest to interpret, and is appropriate here. + +\begin{align} + \mathrm{log}&~\frac{\mathrm{Pr}(y_i \le k)}{1 - \mathrm{Pr}(y_i \le k)} = \alpha_k - \phi_i \label{eq:ordinal.regression} \\ + \phi_i &= B x_i \nonumber +\end{align} +\noindent As in Eq. \ref{eq:multinomial.loglik}, $y_i$ is the quality label for article $i$. The left hand side of Eq. \ref{eq:ordinal.regression} gives the log odds that $y_i$ is less than or equal to quality level $k$. The ordinal quality measure is given by a linear model $\phi_i = B x_i$ ($x_i$ is a vector of transformed ORES scores for article $i$). Key to interpreting $\phi_i$ as a quality measure are the intercept parameters $a_k$ for each quality level $k$. The log cumulative odds (the log odds that the article $y_i$ has quality less than or equal to $k$) are given by the difference between the intercept and the linear model $a_k$ - $\phi_i$. Therefore, if $\phi_i = \alpha_k$ then the chances that $i <= k$ equal the chances that $i > k$. When $\phi_i$ is less than $\alpha_k$, the quality of article $i$ is probably less than or equal to quality level $k$. As $\phi_i - \alpha_k$ increases so do the chances that article $i$ is of quality better than $k$. In this way, the threshold parameters $a_k$ define quantitative article quality levels on the scale of the ordinal quality measure $\phi_i$. + +Informally, an ordinal regression model maps a linear regression model to the ordinal scale using the log cumulative odds link function. It does this by inferring thresholds that partition the range of linear predictions. When the linear predictor for an article crosses a threshold, the probability that the article has quality greater than that corresponding to the threshold begins to increase. + +Bayesian inference allows interpreting model parameters like $\phi_i$ and $\alpha_k$ as random variables and provides accurate quantification of uncertainty in thresholds and predictions. I fit models using the R package Bayesian regression modeling using Stan (\texttt{brms}) \citep{burkner_brms_2017} version 2.15.0. I use the default priors for ordinal regression, which are weakly informative. Due to the large sample size, the data overwhelm the priors and the priors have little influence over results. I confirmed this by fitting equivalent frequentist models using the \texttt{polr} function in the \texttt{MASS} R package \citep{venables_modern_2002} and found that the estimates of intercepts and coefficients were very close. + +% "all useful information" not strictly true +The six quality scores output by the ORES article quality classifier are perfectly collinear by construction because they sum to one. This means they cannot all be included in the same regression model. Since interpreting the coefficients is not important, I take the linear transformation of the ORES scores using appropriately weighted principle component analysis and use the first five principle components as the independent variables. This is simpler and more statistically efficient than a model selection procedure. + +%I fit 3 ordinal regression models, one for each of the units of analysis using weights as described below in §\ref{sec:data}. The use of different weights is important to ensure that the model, and therefore the resulting quality scale is well calibrated to the chosen unit of analysis as shown in Figure \ref{fig:calibration}. To further demonstrate the importance of calibrating the models to the correct unit of analysis, I report the accuracy of each model (and of the MPQC) on each weighted dataset in §\ref{sec:accuracy}. + +\subsection{Dataset and Model Calibration} +\label{sec:data} + +I draw a new random sample of 5,000 articles from each quality class to develop my models. I first reuse code from the \texttt{articlequality}\footnote{\url{https://pypi.org/project/articlequality} (\url{https://perma.cc/8R4H-MAZ9})} Python package to process the March 2020 XML dumps for English Wikipedia and extract up-to-date article quality labels. I then select pages that have been assessed by a member of at least one WikiProject. Following prior work, if an article is assessed at different levels according to more than one WikiProject, I assign it to the highest such level and I drop articles having the rarely used \emph{A-class} quality level \citep{halfaker_interpolating_2017,warncke-wang_success_2015,warncke-wang_tell_2013}. Next, I use the \texttt{revscoring}\footnote{\url{https://pypi.org/project/revscoring} (\url{https://perma.cc/3HFN-V23Z})} Python package to obtain the ORES scores of the labeled article versions. Some of these versions have been deleted leading to missing observations at each quality level. Table \ref{tab:sample} shows the number of articles sampled in each quality class. I reserve a random sample of 2000 articles which I use in reporting my results and fit my ordinal regression models on the remainder. + +%For a fair comparison of predictive accuracy, I holdout a random sample of r2[['n.holdout']] articles. +%From these labeled articles I draw a new stratified sample to enable the use of a smaller sample that is ``balanced,'' meaning that it has equal sample sizes for all article classes as shown in Table \ref{tab:sample}. +The ORES article quality classifiers are fit on a ``balanced'' dataset having an equal number of articles in each quality class. Thus, an ORES score is the probability that an article is a member of a quality class under the assumption that the article was drawn from a population where each quality class contains an equal number of articles. Simply put, the model has learned from its training data that each quality class is about the same size. + +\begin{figure} +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\maxwidth]{figures/calibration-1} +\end{knitrout} +\caption{Calibration of each predictive quality model on datasets representative of each unit of analysis (article, revision, quality class). Each chart shows, for each quality class, the miscalibration of a model (columns) with respect to a dataset weighted to represent a unit of analysis (rows). The y-axis shows difference between the true probability of the quality class and the average predicted probability of that class, given a chosen unit of analysis. Points close to zero indicate good calibration. For example, the top-left chart shows that the article model is well-calibrated to the dataset on which it was fit and the middle-left chart shows that the article model predicts that articles are \textit{Stubs} with probability greater than the frequency of \textit{Stubs} in a random sample of revisions. Error bars show 95\% confidence intervals. \label{fig:calibration}} +\end{figure} + + +This is not representative of the overall article quality on Wikipedia, which is highly skewed with over 3 million \textit{Stubs} but only around \textit{7,000} \textit{Featured articles} as shown in Table \ref{tab:sample}. Although using a balanced dataset likely improves the accuracy of the ORES models, for the ordinal regression models, the choice of unit of analysis presents a trade-off between accuracy in a representative sample of articles or revisions and accuracy within each quality class. +Constructing a balanced dataset by oversampling is a common practice in machine learning because it can improve predictive performance. However, oversampling can also lead to badly calibrated predictive probabilities as shown in Fig. \ref{fig:calibration}. Calibration means that, on average, the predicted probability of a quality class equals the average true probability of that class for the unit of analysis. + +The ``balanced'' dataset on which ORES is trained has the \textit{quality class} unit of analysis because each quality class has equal representation. However, researchers are more interested in analyzing representative samples of \textit{articles} or \textit{revisions}. For example, the article unit of analysis would be used to estimate the average quality of a random sample of articles and the revision unit of analysis might be used to model the change in the quality of an encyclopedia over time. +Weighting allows the use of the balanced dataset to estimate a model as if the dataset were a uniform random sample of a different unit of analysis. +My method uses a balanced dataset to fit ordinal regression models with inverse probability weighting to calibrate each model to the unit of analysis of a research project. +For example, each article in the model calibrated to the article unit of analysis is weighted by the probability of its quality class in the population of articles divided by the probability of its quality class in the sample. The size of the sample and the weights for the article and revision levels of analysis are also shown in Table \ref{tab:sample}. +% It turns out that the ``evenly spaced'' assumption is sensitive to the unit of analysis. + + +\begin{table} +\caption{Number of articles sampled at each quality level} +% latex table generated in R 4.0.4 by xtable 1.8-4 package +% +\begin{tabular}{lrrrrr} + \hline +Label & No. of articles & No. of revisions & Sample size & Article weights & Revision weights \\ + \hline +Stub & 3,359,351 & 12,005,611 & 4,969 & 4.23 & 2.52 \\ + Start & 1,019,038 & 7,828,335 & 4,979 & 1.28 & 1.64 \\ + C & 235,655 & 3,889,639 & 4,988 & 0.30 & 0.81 \\ + B & 128,875 & 3,640,591 & 4,990 & 0.16 & 0.76 \\ + GA & 31,808 & 924,468 & 4,999 & 0.04 & 0.19 \\ + FA & 7,438 & 365,255 & 4,995 & 0.01 & 0.08 \\ + \hline +\end{tabular} + +\label{tab:sample} +\end{table} + +% This requires dropping one of the scores, but it is not obvious which one should be dropped. For both the weighted and unweighted models, I fit six models each dropping a different scores and then use approximate leave-one-out cross validation (LOO-CV) implemented in the \textsc{loo} R package to choose \cite{vehtari_practical_2017}. LOO-CV takes advantage of the Bayesian model to accurately and reliable calculate the expected log out-of-sample pointwise predictive accuracy (ELPD) using Pareto smoothed importance sampling. The choice does not matter much as the standard errors of the ELPD differences are not much smaller than the differences themselves. As shown in Table \ref{tab:loo.comparison}, the best models according to the ELPD have the \textit{start-class} score removed for models with weights and with the \textit{stub-class} score removed for the unweighted models. I therefore use these models from here on. + +\section{Results} +\label{sec:results} +I first report my findings about the spacing of the quality classes in each of the models in §\ref{sec:spacing}. Quality classes are not evenly spaced, especially when articles or revisions are the unit of analysis. Next, in §\ref{sec:accuracy}, I report the accuracy of each of the models and the uncertainty of the ordinal quality scale. All models perform similarly to or better than the MPQC within the pertinent unit of analysis. The unweighted model provides the best accuracy and lowest uncertainty across the entire range of quality levels, but is poorly calibrated for other units of analysis. Finally, in §\ref{sec:correlation}, I show that all quality measures are highly correlated, but the ordinal quality measures agree with one another more than with the ``evenly spaced'' measure. + + + +\begin{figure} +\centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\maxwidth]{figures/fig_spacing-1} +\end{knitrout} +\caption{Quality scores and predictions of the ordinal regression models. Columns in the grid of charts correspond to the ordinal quality model calibrated to the indicated unit of analysis and rows correspond to sampled articles having the indicated level of quality as assessed by Wikipedians. Each chart shows the histogram of scores, thresholds inferred by the ordinal model with 95\% credible intervals colored in gray, and colors indicating when the model makes correct or incorrect predictions. The thresholds are not evenly spaced, especially in \textit{revision model} and \textit{article model} that has more weight on lower quality classes. These two models infer that the gaps between \textit{Stub} and \textit{Start} and between \textit{Start} and \textit{C-class} articles are considerably wider than the gap between \textit{C-class} and \textit{B-class} articles. \label{fig:spacing}} +\end{figure} + + +\subsection{Spacing of Quality Classes} +\label{sec:spacing} + +The grid of charts in Fig. \ref{fig:spacing} shows quality scores and thresholds for each model (columns) and article quality level (rows). Each chart shows the histogram of quality scores $\phi_i$ given to articles having the true quality label corresponding to the row of the grid. The histograms are colored to indicate regions where the model correctly predicts that articles belong to their true class. Vertical dashed lines show the thresholds inferred by the model with 95\% credible intervals colored in gray. Different models have different ranges of scores, so Fig. \ref{fig:spacing} shows results normalized between 0 and 1. + + + + +No matter the unit of analysis, article quality classes are not evenly spaced. The quality class model provides a quality scale in which \textit{Featured} articles take up $27\%$ of the scale and are expected to score in the range of $[0.73, 1]$, but probable \textit{C-class} articles only span $14\%$ of the scale in the range $[0.31, 0.45]$. Researchers are likely to be interested in models calibrated to the article or revision units of analysis, and in these cases, the quality classes are far from evenly spaced. The \textit{revision model} assigns $28\%$ of the scale to \textit{Stubs}, from $0$ to $0.28$. It assigns \textit{C-class} articles the smallest part of the scale, only $4\%$ of it, from $0.54$ to $0.58$. The \textit{article model} is even more extreme. It assigns \textit{Stubs} to the interval $[0, 0.39]$, $39\%$ of the scale, and the space between thresholds defining the range of \textit{C-class} articles is so narrow that it virtually never predicts that an article will be C-class. In general terms, the \textit{quality class model} gives relatively equal amounts of space to each quality class compared to the other models, while reserving nearly the top half of the scale for the top 2 quality classes. The \textit{revision model} and \textit{article model} do the opposite and use the bottom half of the scale to account for differences within the bottom two quality classes, leave some room for \textit{B-class} articles, but squeeze the top end of the scale and \textit{C-class} articles into relatively small intervals. + + +%spacing between the levels is relatively even compared to the other units of analysis. A greater range of the ordinal quality scale is given to \textit{Featured} articles than to \textit{Good} articles, and a smaller range is given to \textit{C-class} and \textit{B-class} articles. Things are quite different in circumstances more likely to be of interest to researchers: when the units of analysis are revisions or articles. In both cases a large range of the scale is taken by \textit{Stub} and \textit{Start-class} articles at bottom of the scale; \textit{C-class} articles have a quite small range of the scale, perhaps due to the difficulty in distinguishing them from \textit{B} or \textit{Start-class} articles; and \textit{Good} and \textit{Featured} articles are given some part of the scale, but substantially less than when the unit of analysis is the quality class. + + +\subsection{Accuracy and Uncertainty} +\label{sec:accuracy} + +I evaluate predictive performance in terms of \textit{accuracy}, the proportion of predictions of article quality that are correct. To allow comparison with the reported accuracy of the ORES quality models, I also report \textit{off-by-one accuracy}, which includes predictions within one level of the true quality class among correct predictions. + +\begin{table} +\caption{Accuracy of quality prediction models depends on the unit of analysis. The greatest accuracy and off-by-one accuracy scores are highlighted. Models are more accurate when calibrated on the same unit of analysis on which they are evaluated. Compared to the MPQC, the ordinal quality models have better accuracy when revisions or articles are the unit of analysis. When the quality class is the unit of analysis, the ordinal quality model has worse accuracy, but predicts within one quality class with slightly better accuracy. \label{tab:accuracy}} +% latex table generated in R 4.0.4 by xtable 1.8-4 package +% +\begin{tabular}{lllll} + \hline +Unit of analysis & Model & Ordinal model? & Accuracy & Off-by-one accuracy \\ + \hline +Quality class & Article & Yes & 0.33 & 0.75 \\ + Quality class & Revision & Yes & 0.44 & 0.84 \\ + Quality class & Quality class & Yes & 0.52 & \cellcolor{mygreen}0.87 \\ + Quality class & ORES MPQC & No & \cellcolor{mygreen}0.55 & 0.86 \\ + \hline +Revision & Article & Yes & 0.57 & 0.87 \\ + Revision & Revision & Yes & \cellcolor{mygreen}0.61 & \cellcolor{mygreen}0.92 \\ + Revision & Quality class & Yes & 0.54 & 0.88 \\ + Revision & ORES MPQC & No & 0.58 & 0.9 \\ + \hline +Article & Article & Yes & \cellcolor{mygreen}0.76 & \cellcolor{mygreen}0.97 \\ + Article & Revision & Yes & 0.73 & 0.96 \\ + Article & Quality class & Yes & 0.63 & 0.92 \\ + Article & ORES MPQC & No & 0.65 & 0.94 \\ + \hline +\end{tabular} + + \end{table} + +As shown in Table \ref{tab:accuracy}, the ordinal regression models have better predictive ability than the MPQC except when the unit of analysis is the quality class. In this case, the best ordinal quality model has worse accuracy than the MPQC but slightly better off-by-one accuracy. Table \ref{tab:accuracy} shows accuracy and off-by-one accuracy weighted for each unit of analysis. Accuracy for a given unit of analysis depends on having a model fit to data representative of that unit of analysis. Accuracy scores are higher when greater weight is placed on lower article quality classes, suggesting that it is easier to discriminate between these classes. + +The ORES article quality model has been quickly adopted by researchers, but its accuracy is limited. While off-by-one accuracy is above 90\% when the article is the unit of analysis, the MPQC only predicts the correct quality class 55\% of the time when the quality class is the unit of analysis. + +The trade-offs in selecting a unit of analysis on which to calibrate the models are further illustrated by Fig. \ref{fig:uncertainty}, which plots the size of the 95\% credible intervals as a function of the quality scores for each model. As in Fig. \ref{fig:spacing}, quality scores in this plot are rescaled between 0 and 1. The models calibrated to articles or revisions have more certainty in the lower range of the quality scale compared to the model that places equal weight in all quality classes. This comes with a trade-off for the higher range of quality. While the \textit{quality class model} has relatively low uncertainty across the entire range of quality, the \textit{revision model} and \textit{article model} have greater uncertainty at higher levels of quality. + +\begin{figure} +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\maxwidth]{figures/uncertainty-1} +\end{knitrout} +\caption{Uncertainty in ordinal quality scores for models calibrated at each unit of analysis. Points show the size of the 95\% credible interval for the ordinal quality score for each article in the dataset. The quality class model has low uncertainty across the range of quality. Models calibrated to the revision and article levels of analysis have less uncertainty at the low end of the quality scale, but greater uncertainty at the higher end of the scale. \label{fig:uncertainty}} +\end{figure} + +\subsection{Correlation Between Scores} +\label{sec:correlation} + +Although the models have different predictive performances and uncertainties, as measures of quality, they are nearly perfectly correlated with one another as shown in Fig. \ref{fig:correlation}. For each quality score, including the ``evenly spaced'' weighted sum, Fig. \ref{fig:correlation} shows a scatter plot and two correlation statistics: Kendall's $\tau$ and Pearson's $r$. Pearson's $r$ is the standard linear correlation coefficient and Kendall's $\tau$ is a nonparametric rank-based correlation defined as the probability that the quality scores will agree about which of any two articles has higher quality minus the probability that they will disagree. + +According to Pearson's $r$ all the quality scores are highly correlated with correlation coefficients of about $0.98$ or higher. Kendall's $\tau$ measures nonlinear correlation and reveals discrepancies between the ordinal models and the ``evenly spaced'' measures. The Pearson correlation between scores from the \textit{revision model} and the scores from the \textit{quality class model} are about the same as the correlation between the \textit{revision model} scores and the ``evenly spaced'' scores ($r=0.98$). However, according to Kendall's $\tau$, scores from the \textit{revision model} are more similar to those from the \textit{quality class model} ($r=0.98$) than to the scores from the ``evenly spaced'' approach ($r=0.9$). + +The evenly spaced model is more likely to disagree with the model-based scores than any of the model-based scores are to disagree with one another as visualized in the scatter plots in Fig. \ref{fig:correlation}. Disagreement between the ``evenly spaced'' method and the ordinal models is greatest among articles in the middle of the quality range. + +\begin{figure} +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\maxwidth]{figures/score_correlation-1} +\end{knitrout} + \caption{Correlations between quality measures show that the different approaches to measuring quality are quite similar. ``Evenly spaced'' uses a weighted sum of the ORES scores with handpicked coefficients \citep{halfaker_interpolating_2017}. Lower values of Kendall's $\tau$, a nonparametric rank correlation statistic, compared to Pearson's $r$ suggest nonlinear differences between the weighted sum and the other measures. \label{fig:correlation}} +\end{figure} + + +\section{Discussion} +\label{sec:discussion} +Past efforts to extend the measurement of Wikipedia article quality from peer-produced article quality assessments to unassessed versions of articles and from the discrete to the continuous domain have relied upon machine learning and expedient but untested assumptions like that quality levels are ``evenly spaced.'' +% I argued in §\ref{sec:background} that using machine learning to extend the article quality measurement from the direct observation of human article assessment to unobserved articles and from the discrete levels to a continuous scale might be analogous to how thermometry extended into new extremes of hot and cold where assumptions like the liquidity of mercury break down. Scientists, unaware that mercury has a solid state were baffled and misled by impossibly low temperature readings from thermometers in which the mercury had unexpectedly frozen \cite{chang_inventing_2004}. +While I suggest technical improvements for statistical models for measuring quality, I also find that scores from my models are highly correlated to those obtained under the ``evenly spaced'' assumption. + +I set out to provide a better way to convert the probability vector output by the ORES article quality model into a continuous scale and to test the assumption that the quality levels are evenly spaced. I used ordinal regression models to infer spacing between quality levels and used the linear predictor of these models as a continuous measure of quality. While I found in §\ref{sec:spacing} that the quality levels are not evenly spaced and that the spacing depends on the unit of analysis to which the models are calibrated, I also showed in §\ref{sec:correlation} that the model-based quality measures are highly, although not perfectly, correlated with the ``evenly spaced'' measure. This provides some assurance that past results built on this measure are unlikely to mislead. That said, I recommend that future work adopt appropriately calibrated model-based quality measures instead of the ``evenly spaced'' approach, and I argue that it is important to improve the accuracy of article quality predictors to enable more precise article quality measurement. + +\subsection{Recommendations for Measuring Article Quality} +How should future researchers approach the question of how to measure Wikipedia article quality? While I cannot provide a final or complete answer to the question, I believe the exercise reported in this paper provides some insights on which to base recommendations. It is important to note that I consider here only approaches to measuring quality that assume the use of a good predictor of article quality assessment, such as the ORES quality model. I do not consider other based approaches such as those based on indexes \citep{lewoniewski_relative_2017} described in §\ref{sec:background}. + +\subsubsection{Use the principle components of ORES scores for statistical control of article quality} +In many statistical analyses, the only purpose of measuring quality will be as a statistical control or adjustment. For example, \citet{zhang_crowd_2017} used the MPQC as a control variable in a propensity score matching analysis of promotion to \textit{Featured article} status, but as argued in §\ref{sec:methods}, the MPQC provides less information than the vector of ORES scores. Using the principle components is simpler than using an ordinal quality model. I recommend obtaining ORES scores for your dataset, taking the principle components, and dropping the least significant one to remove collinearity. + +\subsubsection{Use ordinal quality scores when article quality is an independent variable} +\label{sec:qciv} +In other cases, research questions will ask how article quality is related to an outcome of interest, like how \citet{kocielnik_reciprocity_2018} set out to explore factors associated with donations to the Wikimedia Foundation. They use the MPQC as an independent variable, which complicates their analysis. Although they conclude that ``pages with higher quality attract more donations,'' this is not strictly true. They actually found a nonlinear relationship where readers of \textit{B-class} articles were more likely to donate than readers of \textit{Featured articles}. Using a continuous measure of quality is more convenient when the average linear relationship is the target of inference. + +I recommend using an ordinal regression model appropriate to the downstream unit of analysis because this will justify the interpretation of the measure. If the downstream unit of analysis differs substantively from those used here, such as if different selection criteria are applied, I recommend reusing my code to calibrate a new ordinal regression model to a new dataset. Otherwise, reusing one of my models should be adequate. Finally, in the Bayesian framework, the scores are interpretable as random variables. This provides a justification for incorporating the variance of these scores as measurement errors to improve estimation in downstream analysis \citep{mcelreath_statistical_2018}. + +% Although the ``evenly spaced'' scores and the scores based on ordinal regression are highly correlated, there are a number of reasons to prefer my approach. +% The most important is simply that it requires no strong assumptions about the relationships between levels of article quality. Rather, it learns both the spacing between quality levels and the best combination of ORES scores for predicting article quality assessment. + +% Second, the scores have grounded statistical interpretations as the linear predictor in an ordinal quality model. Given the intercepts from the model, the scores are directly interpretable as a probability distribution over article quality classes. + + +\subsubsection{Use the MPQC or ordinal quality scores when article quality is the dependent variable} + +Using the MPQC as the outcome in an ordinal regression model, as is done by \citet{shi_wisdom_2019} in their analysis of Wikipedia articles with politically polarized editors, is a reasonable choice as long as it provides sufficient variation and a more granular quality measure is not needed. Although it is theoretically possible that using the MPQC might introduce statistical bias because it less accurate than ordinal quality scores for units of analysis other than the quality class and omits variation within quality classes, such threats to validity do not seem more significant than the threat introduced by inaccurate predictions. If the MPQC does not provide sufficient granularity and a continuous measure is desired as in \citet{halfaker_interpolating_2017} or \citet{arazy_evolutionary_2019}, I recommend using a measure based on ordinal regression as described in §\ref{sec:qciv}. + + +\subsection{Limitations} + +Although intuitions about the varying degrees of effort required to develop articles with different levels of quality led me to question the ``evenly spaced'' assumption, my findings that quality classes are not evenly spaced do not necessarily reflect relative degrees of effort. Rather, spaces between levels are chosen to link a linear model to ordinal data. The spacing of intervals depends on the ability of the ORES scores to predict quality classes. The ORES article quality model has relative difficulty classifying \textit{C-class} and \textit{B-class} articles \citep{halfaker_interpolating_2017}. Perhaps, the differences between these quality classes are minor compared to the other classes. Maybe ORES lacks the features or ability to model these differences and the space between these classes will grow if its predictive performance improves. + +The usefulness of article quality scores depends on the accuracy of the model. The ORES quality models are accurate enough to be useful for researchers, but they still only predict the correct quality class 55\% of the time on a balanced dataset. Of course, this limits the accuracy of the ordinal regression models reported here. +Furthermore, while the ORES quality models were designed with carefully chosen features intended to limit biases \citep{halfaker_ores_2020}, it is still quite plausible that the accuracy of predictive quality models may vary depending on characteristics of the article \citep{kleinberg_inherent_2016}. Such inaccuracies may introduce bias, threaten downstream analysis or lead to unanticipated consequences of collaboration tools built upon the models \citep{teblunthuis_effects_2021}. Therefore, improving the accuracy of article quality prediction models is important to the validity of future article quality research. Adopting machine learning models that can incorporate ordinal loss functions is a promising direction and can reduce the need for auxiliary ordinal regression models \citep{cardoso_learning_2007}. + +This paper only considers measuring article quality for English language Wikipedia, but expanding knowledge of collaborative encyclopedia production depends on studying other languages as audiences and collaborative dynamics can greatly vary between projects \citep{hecht_tower_2010,lemmerich_why_2019,teblunthuis_dwelling_2019}. Other languages carry out quality assessments \citep{lewoniewski_relative_2017}, and some of these have been used to build ORES article quality models. Future work should extend this project to provide multilingual article quality measures in one continuous dimension. + +An additional limitation stems from the likelihood that peer-produced quality labels are biased. For instance, the English Wikipedia community has a well-documented pattern of discrimination against content associated with marginalized groups such as biographies of women \citep{tripodi_ms_2021, menking_people_2019} and indigenous knowledge \citep{van_der_velden_decentering_2013}. Although demonstrating biases in article quality assessment is a task for future research, if Wikipedians' assessments of article quality are biased then model predictions of quality will almost certainly be as well. + + +\section{Conclusion} +Measuring article quality in one continuous dimension is a valuable tool for studying the peer production of information goods because it provides granularity and is amenable to statistical analysis. Prior approaches extended ORES article quality prediction into a continuous measure under the ``evenly spaced'' assumption. I showed how to use ordinal regression models to transform the ORES predictions into a continuous measure of quality that is interpretable as a probability distribution over article quality levels, provides an account of its own uncertainty and does not assume that quality levels are ``evenly spaced.'' Calibrating the models to the chosen unit of analysis improves accuracy for research applications. I recommend that future work adopt this approach when article quality is an independent variable in a statistical analysis. + +\section{Code and Data Availability} +Code, data and instructions for replicating or reusing this analysis are available in the Harvard Dataverse at \url{https://doi.org/10.7910/DVN/U5V0G1}. + +\section*{Acknowledgements} + +I am grateful to the members of the Community Data Science Collective for their feedback on early drafts of this work including Kaylea Champion, Sneha Narayan, Jeremy Foote, and Benjamin Mako Hill. I would also like to thank Aaron Halfaker for encouraging me to write this after seeing a preliminary version. Thanks to Stuart Yeates and other participants in the \texttt{wiki-research-l} mailing list \url{wiki-research-l@lists.wikimedia.org} for answering my questions about measuring article quality and effort. Finally, thank you to the anonymous OpenSym reviewers whose careful and constructive feedback improved the paper. + + +% bibliography here +\setcounter{biburlnumpenalty}{9001} +\printbibliography[title = {References}, heading=secbib] diff --git a/dissertations/nathante_uw_2021/appendix_B_readingtime.tex b/dissertations/nathante_uw_2021/appendix_B_readingtime.tex new file mode 100644 index 0000000..abd94a9 --- /dev/null +++ b/dissertations/nathante_uw_2021/appendix_B_readingtime.tex @@ -0,0 +1,659 @@ + +\chapterprecishere{ +Much existing knowledge about global consumption of peer-produced information goods is supported by data on Wikipedia page view counts and surveys. In 2017, the Wikimedia Foundation began measuring the time readers spend on a given page view (dwell time), enabling a more detailed understanding of such reading patterns. In this paper, we validate and model this new data source and, building on existing findings, use regression analysis to test hypotheses about how patterns in reading time vary between global contexts. +Consistent with prior findings from self-report data, our complementary analysis of behavioral data provides evidence that Global South readers are more likely to use Wikipedia to gain in-depth understanding of a topic. We find that Global South readers spend more time per page view and that this difference is amplified on desktop devices, which are thought to be better suited for in-depth information seeking tasks. +% Olga's comment: I would switch "patterns in reading time vary between global contexts" above with the previous mention of Global South. i.e. "Here we build on findings about patterns of readership varying between global contexts from recent large-scale..." and then below say "how patterns in reading time vary between the Global South and Global North" +%We also observe patterns consistent with skills gaps between Global South and Global North audiences as Global South readers also seem to spend more time +% consider knowledge gaps +%Here, we use this data to answer questions like: How does time spent vary from language edition to language edition, or between different kinds of readers or articles? How can we determine whether a new design change increases the time spent by readers? +%This report explores this newly available data, to provide insights on how people use Wikipedia that can inform and influence our future product direction. +%This data allows us to answer questions like: +%We validate this data and begin to answer questions such as the ones above. We observe the limitations of the data, most notably a high rate (57\%) of missing data on mobile devices. It is important to consider these shortcomings, but we believe that the data can be fruitfully applied to improve our current knowledge of how people read Wikipedia. We used regression analyses to explore how factors like page length, device choice, and the locations of readers are related to reading times. We believe that our results for device choice and reader location offer behavioral data to corroborate findings from a large-scale survey of Wikipedia readers of 14 language editions \cite{lemmerich_why_2019}. +} + +\section{Introduction} + +%How does Wikipedia readership vary across different geographic and developmental contexts? +% Perhaps this can be more general +How do Wikipedia readers vary across different geographic and developmental contexts? +%information-seeking tasks differ between people in countries compared to more developed countries? +A recent study of readers of different Wikipedia language editions found that readers in countries with a lower human development index (HDI) were more likely to read for in-depth understanding compared to readers in high-HDI countries \citep{lemmerich_why_2019}. However, this study is limited by the use of self-reported data, which can be biased by effects of social desirability and self-selection due to the volunteer nature of web-based surveys \citep{antin_social_2012, hill_wikipedia_2013, kiesler_response_1986, phillips_effects_1972}. This study provides additional support for this finding from large-scale observation of reading behavior across contexts with varying levels of development. + +Wikipedia contributors generally start as Wikipedia readers Therefore understanding and better supporting readership is important for the continued growth of the Wikimedia movement \citep{preece_reader--leader_2009}. +In 2017, the Wikimedia Foundation's web team introduced new instrumentation to measure the amount of time Wikipedia readers spend on the pages they view. We utilize this newly available data source, which provides additional information over the widely used page view data. With reading times in our field of view, it becomes clear that not all views are created equal. Some page views seem to involve in-depth reading, yet most are quite short. + + +%In that sense, results such as ours also contribute to the knowledge about open collaboration processes, complementing research that is based on the detailed contributor data available from MediaWiki wikis. + +% \url{https://meta.wikimedia.org/wiki/Research:Which_parts_of_an_article_do_readers_read\#Eyetracking}{eye 1tracking}, +% +%\url{https://meta.wikimedia.org/wiki/Research:Which_parts_of_an_article_do_readers_read +% maybe we should replace this figure a similar one showing the medians for each group. + +\begin{figure} +\centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\columnwidth]{figures/GN_session_device_plot-1} +\end{knitrout} +%\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/17.png} +\caption{Marginal effects plot showing dwell times on Wikipedia pages predicted by our regression model. Compared to readers in the Global North, readers in the Global South spend substantially more time reading when on desktop devices.\label{fig:model1bplot}} +\end{figure} + +We begin our analysis by evaluating the quality of the adopted approach for measuring reading times. We find limitations including a high rate of missing data on mobile devices and a low rate of invalid (missing or negative) measurements. However, we believe that the data can be generally informative as long as these limitations are considered. We then present a summary of the data and estimate the total time spent reading Wikipedia. + +Next we evaluate probability models for reading time data. In addition to validating assumptions that underlie the use of parametric statistics and regression models used in answering our research questions, model selection can also help evaluate theorized data generating processes that predict when a given model will be a good fit for the data \citep{mitzenmacher_brief_2004, stumpf_critical_2012}. For instance, Liu et al. (2010) analyze dwell times using Weibull models, finding evidence for ``screen-and-glean'' patterns in which people first spend a short amount of time to assess a web page, and then decide whether to read it in-depth \citep{liu_understanding_2010}. We evaluate several probability distributions on the data from Wikipedia readers, and find that the Weibull model is not a good fit, but that the log-normal distribution fits the data well enough to justify using the geometric mean as a metric. + +Finally, we return to our study of global reading behavior. Consistent with the results of Lemmerich et al., we find that readers in countries with lower HDI or in the so-called Global South spend more time reading per page view compared to readers in the Global North or in countries with higher HDI \citep{lemmerich_why_2019}. Moreover, this difference is amplified where we would expect users to consume information in depth: on the desktop (non-mobile) site. While we also hypothesized that the difference would likewise be greater in the last page-view in a session, this idea was not supported by our data analysis. We demonstrate these patterns using both multivariate regressions and a simple non-parametric analysis. + +% We also report on how page length, device use, and the property of being the last page view in a session relate to reading time. + +\section{Background} + +\subsection{Wikipedia readership} + +Reading behavior on Wikipedia has been studied extensively, with a 2014 literature review listing 99 publications by 2011 \citep{okoli_wikipedia_2014}. Page view count data is central to this body of work when it comes to quantifying the attention readers give to particular topics or entire Wikipedia language editions. According to Priedhorsky et al., ``the most common application [of page view data] is detection and measurement of popular news topics or events,'' with other uses including forecasting attempts (of e.g. box office revenues) and the study of Wikipedia's own processes \citep{priedhorsky_measuring_2017}. As an example of research using it to examine information imbalances, building on earlier work by Gorbatâi and others, Warncke-Wang et al. compared page view data with article quality ratings, and found ``misalignment between supply and demand'', as the Wikipedia articles with the most views were often not the highest quality \citep{gorbatai_exploring_2011, warncke-wang_misalignment_2015}. +Other, less frequently used research strategies include using click streams and session lengths \citep{halfaker_user_2015, paranjape_improving_2016}. + +%\cite{singer_why_2017} + +% let's elaborate on issues with self-selection bias in Wikipedia surveys down here. +Surveys are another important source of information about Wikipedia readership \citep{okoli_wikipedia_2014}. %p.23 +As mentioned in the introduction, such voluntarily self-reported data are subject to participation and social desirability biases. Participation biases from self-selection may have had significant effects in the case of a previous Wikipedia reader and editor survey \citep{hill_wikipedia_2013}. + +Some previous research on Wikipedia readership has already used an approximation of reading time that assumes that the end of a page view is always marked by a new web-request originating from the same IP and user agent \citep{singer_why_2017}. Apart from the limitations arising from using the IP/user agent combination as a substitute for a user ID, this approach also does not allow measuring the dwell time for the last page view in a session.%the latter limitation shared with Google Analytics + +%We do not investigate these differences any further in this report because we lack knowledge of the specific contexts of each community and their audiences which would be necessary to adequately explain them. Instead, we present an analysis of the relationship between reading time and the development level of reader's countries to offer a more general explanation of one factor that might make a difference. + +% TODO make sure that we are clear that dwell times == reading times +\subsection{Dwell times and information seeking} +It has long been observed that page view numbers can paint a misleading picture of the amount of attention spent by web readers, or the information value a web site provides to them. An early study of search engine users found in 2003 that typical reading times were ``substantially less than has been previously reported using survey data'' \citep{jansen_analysis_2003}. In more recent years, metrics based on page dwell time (or total time spent on a site) have been adopted more widely. +%[mention Google Analytics' "Time on Page" and its last-in session limitation here?] +A prominent example is the online publishing platform Medium.com, which in 2013 declared ``Total Time Reading'' (TTR) as their ``Only Metric That Matters.'' Distancing themselves from widely adopted web analytics metrics such as page views or active users, they argue that the act of reading should be seen as the most relevant form of user engagement for content websites \citep{davies_mediums_2013}. + +Much prior work on web page dwell times focuses on applications in information retrieval and content recommendation \citep[e.g.,][]{kim_modeling_2014,yi_beyond_2014,yin_silence_2013}. Long dwell times can signal successful information retrieval in search applications because they suggest that the user has found sought information \citep{kim_modeling_2014}. Liu et al. analyzed dwell time data collected through a web browser plugin to characterize types of web content \citep{liu_understanding_2010}. However, factors beyond content may influence dwell times including psychological processes of decision making and individualized styles of content consumption \citep{yin_silence_2013}. As we compare Wikipedia readers using mobile and desktop devices it is worth noting that dwell times are likely to be longer on desktop computers compared to mobile devices \citep{yi_beyond_2014}. + + +% Screen and glean +% use in recommendation systems +% psychology + +\subsection{Global device and knowledge gaps} +%Geographic distribution of content production on Wikipedia +% cultural context content +% a lot of these ideas suggest that people with lower skills are less likely to engage. + +% subsection device gaps +% tie this to our hypotheses about mobile +% We really need a theory of how information needs between +We seek to understand differences in Wikipedia's audience between the areas roughly known as the Global North and the Global South. Lemmerich et al. show empirical differences between self-reported information seeking behavior between such contexts \citep{lemmerich_why_2019}. These differences are likely related to digital divides or gaps between the knowledge, information and technology resources commonly available in different contexts, which can lead to systematic differences in reading behavior. + +For people to use the Internet (or Wikipedia), they have to be able to connect to it, but not all forms of access are equally suited for a given task \citep{deursen_toward_2015}. Deursen et al. suggest that personal computers will be better for in-depth information seeking, while mobile devices, which are often close at hand, have advantages for social interaction \citep{deursen_toward_2015}. As Internet access becomes more ubiquitous, gaps in skills and knowledge about how to use the Internet are increasingly salient digital dividers and can be reinforced by device gaps \citep{deursen_compoundness_2017,hargittai_second-level_2002}. For instance, in many parts of the non-western world, mobile phones diffused before PCs, and skills for PC usage may be less widespread \citep{napoli_emerging_2014, pearce_digital_2013}. We contribute new information about the interaction between device use around the world and how people read Wikipedia. + +Gaps in skills and knowledge may also help explain gaps in who contributes to Wikipedia \citep{shaw_pipeline_2018}. Wikipedia promises to advance over traditional modes of knowledge production in which dominant western attitudes shape what people and places will be included and how they will be represented in authoritative sources like encyclopedias \citep{graham_uneven_2014}. In theory, peer production can empower people around to the world to add their local knowledge of their places to Wikipedia. Yet even as global access to Wikipedia grows, it is slow to fulfill these promises. Gaps in coverage of cultural knowledge reflect and reinforce structural digital divides at many levels that ``disadvantage many of the world's informational peripheries'' \citep{graham_uneven_2014}. These gaps in Wikipedia's coverage help motivate a better understanding of global readership. + + + +%Differences in reading times across global contexts may relate to gaps in access, skills, or knowledge needed to efficiently discover, filter, and interpret information. + + +% Olga's badass paragraph +In this paper, we use the Human Development Index (HDI) and the Global South/Global North regional classification as means of comparing countries separated by varying levels of development. We recognize that both are insufficient for defining economic development. Furthermore, these concepts and our measures of them only provide an incomplete understanding of the unique identities and motivations of cultures within an information-seeking context. What's more, they do not take into consideration inequality within a geographic region due to minority populations, which may affect the utility of averages such as GDP, income, and life expectancy. We hope that this work provides a basis of study that may be continued with work that takes into account individual cultural context, internet accessibility, and internal inequality. + +%"content rich depth searches might be better conducted on personal computers or laptops while handheld devices might be most appropriate for using social media as a vehicle for social interaction." + +%\cite{buchi_modeling_2016} +%\cite{scheerder_determinants_2017} +% cite Hargattai on skills +%\cite{shaw_pipeline_2018} + + + +\section{Methods} +\subsection{Collecting reading time data} + +Our data collection instrument, the reading depth plugin +%works by running JavaScript in the client browser which sends two messages to the server during a page view.\footnote{\url{https://meta.wikimedia.org/wiki/Schema:ReadingDepth}} The first message is sent when the page is loaded and the second message is sent when it is unloaded. The page unloaded event sends values from timers that measure, among other things, the amount of time that the page was visible in the visitor's browser window. +%More specifically, the plugin +uses the page visibility API to measure \emph{time visible}, the total amount of time that the page was in a visible browser tab.\footnote{See \url{https://meta.wikimedia.org/wiki/Schema:ReadingDepth} \textit{archived at} \url{https://perma.cc/JK75-Y6DH} and \url{https://developer.mozilla.org/en-US/docs/Web/API/Page\_Visibility\_API} \textit{archived at} +\url{https://perma.cc/79PB-389J}} The instrument also records a second candidate measure of reading time: \emph{total time.} This is simply the entire time the page was loaded in the browser. We used this variable for data validation and in robustness checks. We chose to focus on \emph{time visible} because it excludes time when the user could not possibly have been reading the page. This is similar to the client-side approach described in Yi et al. (2014) \citep{yi_beyond_2014}. + +Beginning November 20th 2017, we logged events from a 0.1\% sample of visitor sessions.\footnote{Sessions are based on a random identifier recorded in the browser's \textit{sessionStorage}, which expires at the end of each browser session. This is more privacy-friendly than the common approach (as used in e.g. Google Analytics) of tracking users via a cookie, in that the session identifier is not sent with every request to Wikimedia servers. It also differs from session cookies in that a new identifier will be used for links opened in a new browser tab or window.} The sampling rate was increased to 10\% on September 25, 2018 to support future studies at a higher level of granularity.%\footnote{\url{https://phabricator.wikimedia.org/T205176}} + +Since we care about the reading behavior of humans, we identify bots using user agent strings and exclude them from all of our analyses.\footnote{See \url{https://meta.wikimedia.org/wiki/Research:Page_view/Tags\#Spider} \textit{archived at} \url{https://perma.cc/3NSL-X6L2}} + +\subsection{Missing data} +We are only able to collect data from web browsers that support the APIs on which the instrument depends. Also, we excluded certain user agents that were found to send data unreliably in our testing, namely the default Android browser, versions of Chrome earlier than 39, Safari, and all browsers running on versions of iOS older than 11.3. We also do not collect data from browsers that have not enabled JavaScript or that have enabled Do Not Track.\footnote{See \url{https://en.wikipedia.org/wiki/Do_Not_Track} \textit{archived at} \url{https://perma.cc/J368-ZYBD}} + +Even when the above conditions are met, in some cases we are still not able to collect data. Sometimes we observe a page loaded event, indicating that a user in our sample opened a page, but we do not observe a corresponding event indicating that the user has left the page (a page unloaded event). This issue affects 57\% percent of records on the mobile site and about 5\% of records on the desktop site. The likely explanation for why many mobile views are affected is that many mobile browsers will fail to send a page-unloaded event in certain situations, such as when the user closes the browser app using the app switcher.\footnote{We are planning to remedy this issue in future versions of the instrumentation, by making use of alternatives to the page unloaded event available in modern browsers, e.g. the Page Lifecycle API introduced in Google Chrome in 2018. +%(https://phabricator.wikimedia.org/T219212) +} We only include page views for which we observe exactly 1 page loaded event and 1 page unloaded event and remove 0.016\% of page unloaded events where, for unknown reasons, the instrument recorded a page visible time that was less than 0 or undefined. + +\subsection{Taking a sample} + +Because Wikipedia is so widely read, even a 0.1\% sample results in an amount of data exceeding the statistical requirements of this analysis. We therefore conduct our analysis on random sub-samples of the collected data. + +To ensure that all Wikipedia language projects are fairly and adequately represented in our sample, we use stratified sampling by assigning a \emph{weight} to each group that adjusts the probability that members of the group are chosen in the sample. This introduces a \emph{known bias} in the resulting sample, which is corrected using the \emph{weights} in ways analogous to weighted averaging. +For estimating total reading time, and for distribution selection, we stratify by wiki, taking up to 20,000 data points for each wiki and excluding wikis that have fewer than 300 data points. This leaves us with 242 wikis in our sample. In the multivariate analysis below, we stratify by wiki, by the country of the reader's approximate location, and by whether or not we think that the user is on a mobile device. We sample up to 200 data points for each stratum and analyze a sample of 285 wikis. + +% per Casey Fiesler this should go in the methods section +\subsection{Ethical considerations} +Our approach in this paper relies on large-scale observational data collected by monitoring the behavior of Wikipedia visitors. We neither see nor speak to the humans on the other side of the screen. In addition to the empirical limitations discussed below, this approach is subject to epistemic limitations. It makes those behaviors that we can observe through browser APIs visible, while obscuring those we cannot. It cannot speak to how people in different countries understand their experience of Wikipedia \citep{graham_geography_2013}. Furthermore, ``big data'' approaches carry critical and novel ethical risks that are not easily understood in conventional informed-consent and human subjects research frameworks \citep{boyd_critical_2012}. + +Wikimedia's privacy policy endeavors to clearly communicate that the information we use here will be collected, but we do not consider this an ethical license to use this data however we see fit.\footnote{See \url{https://foundation.wikimedia.org/wiki/Privacy_policy} \textit{archived at} \url{https://perma.cc/C4VQ-HWRT}} We chose an analysis that we believe poses minimal risk to Wikipedia visitors' expectations, trust, and autonomy \citep{fiesler_participant_2018}.\footnote{We followed the Wikimedia Foundation's (WMF) guidelines and processes for conducting research. As it is not a federally funded institution, research at the WMF is not supervised by an institutional review board (IRB).} +Each observation of individuals in our study was aggregated with many others at a high level of granularity. We chose to study the country level partly because our geolocation measure is most accurate at that level, but also because it is very coarse. We do not track people from one session to another, and do not look at the content of the pages they visit other than the page length. We exclude people from our analysis who indicate a wish for privacy by enabling Do Not Track in their browsers, and will discard any session identifiers remaining in the data collected for this analysis after it is complete. + +%We acknowledge that these steps do not guarantee that everyone whose behavior we analyze would be comfortable with traces of their activity being so used. + + +\begin{figure}[t] +\begin{center} +%\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/1.png} +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\columnwidth]{figures/histograms_1-1} +\end{knitrout} + +\end{center} + +\caption{The distribution of dwell times across 242 language editions of Wikipedia. The top chart shows a histogram of dwell times less than one hour long (the x-axis is truncated to 300 seconds for clarity). In this chart we can see that the median dwell time is about 25 seconds long and that the distribution of dwell times is very skewed, with the arithmetic mean far from the median. The y-axis represents the probability that a given page view is in a given box. In the lower figure, the dwell times are log-transformed and the data appear bell-shaped, with some skew to the right.} +\label{fig:hists} +\end{figure} + +\section{Distribution of reading times} +Here we present summary statistics and a high level description of reading behavior on Wikipedia in terms of dwell times. When someone opens a given page on Wikipedia, how long do they typically stay on the page? Are reading times highly skewed? How much does reading behavior vary across different language editions of Wikipedia? How much time does all of humanity spend reading Wikipedia? + +\subsection{Wikipedia as a whole} + +In general, the distribution of reading times is very skewed (see \hyperref[fig:hists]{Figure \ref*{fig:hists}}). The median reading time is 25 seconds and the 75th percentile is 75.1 seconds. This skewness pushes the arithmetic mean far from most of the mass of the distribution. Therefore, the geometric means, medians, and other percentiles have more utility within our discussion of reading times. + +\subsection{Total time spent} + +Based on our data, we estimate that humanity spent about \emph{ 672,349 years} reading Wikipedia from November 2017 through October 2018. We calculated this estimate as the product of the mean reading time on each Wikipedia wiki by the number of page views on that wiki, excluding readers using the mobile apps and identified bots. It is possible that some people leave Wikipedia pages visible in their browsers for extended periods of time without reading. To make our estimates of total reading time in this section somewhat conservative, we rounded all page views down to 1 hour. + +\subsection{Variation between different language editions} + + +% We hope these plots will be readers who may wish to know how reading times compare between their wikis of interest. +\hyperref[fig:kernelplots]{Figure \ref*{fig:kernelplots}} shows kernel density estimates of the distribution of page visible times on several Wikipedia language editions selected to highlight projects of different sizes and of different cultures. These are Arabic (ar), German (de), English (en), Spanish (es), Hindi (hi), Dutch (nl) and Punjabi (pa). As above, we place unscaled data side-by-side with log-transformed data. Only the log-transformed plots show the full range of the data. Similar kernel density plots for other languages as well as box-and-whisker plots are available in our online supplement.\footnote{Available at \url{https://w.wiki/5Jo}.} + +\begin{figure} + +%\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/4.png} +\centering + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\columnwidth]{figures/kernelplots-1} +\end{knitrout} +\caption{Kernel density plots of the distribution of dwell times on a selection of wikis. Spanish, Hindi, and Arabic appear to have longer reading times while English and Punjabi appear to have somewhat shorter reading times. In general, the distribution is very skewed, as these example wikis demonstrate.\label{fig:kernelplots}} + +\end{figure} + +%We observe a great deal of variation in the distribution of reading times between different language editions. + +\begin{table}[t] +\centering +\begin{tabular}[]{@{}llllll@{}} +\toprule +wiki & 5\% & 25\% & 50\% & 75\% & 95\% \\ +\midrule +all wikis & 1.8 & 8.0 & 25.0 & 75.1 & 439.1 \\ +ar & 5.2 & 5.2 & 21.5 & 69.9 & 371.7 \\ +de & 14.1 & 14.1 & 14.1 & 56.6 & 482.7 \\ +en & 37.2 & 37.2 & 37.2 & 37.2 & 262.4 \\ +es & 23.3 & 23.3 & 23.3 & 65.5 & 616.4 \\ +hi & 2.5 & 11.4 & 31.4 & 82.6 & 360.5 \\ +nl & 6.1 & 6.1 & 15.9 & 60.1 & 441.8 \\ +pa & 2.0 & 7.2 & 19.5 & 55.4 & 303.1 \\ +\bottomrule +\end{tabular} +\caption{Percentiles for reading times (in seconds) on selected Wikipedia editions\label{tab:wikilangpercentiles}} +\end{table} + + +\section{Univariate model selection} + +\subsection{Motivation} + +Analysts of reading times on Wikipedia will wish to make parametric assumptions to justify the use of statistical models for evaluating experiments, drawing comparisons between different samples of reading times, and performing multivariate analyses as we do below. This requires assuming +a probability distribution with interpretable parameters such +as mean, variance, and shape parameters. Fitting parametric +distributions to data allows us to estimate these parameters +and to statistically test changes in the parameters. However, +parametric models can mislead if they don't fit the data well. Below, we evaluate several models. + + +\subsection{Candidate models} +We consider the following distributions in our model-selection process. + +\textbf{Log-normal distribution:} This is a normal distribution, but on a logarithmic scale. Differences in means between log-normal samples can be tested using t-tests. Such advantages make the log-normal distribution a common choice in analyzing skewed data, even when it is not a perfect fit. + +% This gives it convenient properties because its parameters the mean and variance of the log-transformed data. + +\textbf{Lomax (Pareto Type II) Distribution:} Datasets on human behavior often exhibit power-law distributions, meaning that the probability of extreme events, while still low, is much greater than would be predicted by a normal (or log-normal) distribution \citep{clauset_power-law_2009}. We fit the Lomax Distribution, a commonly used long-tailed distribution with two parameters that assumes that power law dynamics occur over the whole range of the data. + +\textbf{Weibull Distribution:} Liu et al. model reading times on web pages using a Weibull Distribution \citep{liu_understanding_2010}. This model has two parameters: {\bfseries {${\displaystyle \lambda }$}}, a scale parameter, and {\bfseries {${\displaystyle k}$}}, a shape parameter. The Weibull distribution can be a useful model because of the intuitive interpretation of {\bfseries {${\displaystyle k}$}}. If {${\displaystyle k>1}$}, then reading behavior exhibits positive aging, which means that the longer someone stays on a page, the more likely they are to leave the page at any moment. Conversely {${\displaystyle k<1}$} is interpreted as negative aging, which means that as someone remains on a page, they become less likely to leave the page at any given moment. The Weibull distribution is often used in the context of reliability engineering for modeling the chances that a given part will fail at a given moment. + +\textbf{Exponentiated Weibull Distribution:} The Weibull model assumes that the rate of readers leaving a page changes monotonically over time. This implies there must be either negative aging, positive aging, or no aging. It excludes more complicated dynamic processes where positive aging gives way to negative aging after a point in time. The exponentiated Weibull distribution is a three-parameter generalization of the Weibull distribution that relaxes this constraint \citep{pal_exponentiated_2006}. The extra degree of freedom will allow this model to fit a greater range of empirical distributions compared to the two-parameter Weibull model. + + %Therefore if the data show that the likelihood of a reader leaving a page first increases and then decreases (or vice versa) then assumptions of the Weibull model are violated. + + +%We also considered the \url{https://en.wikipedia.org/wiki/gamma_distribution}{gamma distribution} and the \url{https://en.wikipedia.org/wiki/exponential_distribution}{exponential distribution}, but we will not go into depth about them here. We didn't have a strong motivation for these models and they did not fit the data well. + +\subsection{Methods} + +Our method for model selection is inspired in part by Liu et al., who compared the log-normal distribution to the Weibull distribution of dwell times on a large sample of web pages \citep{liu_understanding_2010}. They fit both models to data for each web page, and then compare two measures of model fit: the log-likelihood, which measures the probability of the data given the model (higher is better), and the Kolmogorov-Smirnov distance (KS-distance), which is the maximum difference between the model CDF and the empirical CDF (lower is better). For the sample of web pages they consider, the Weibull model outperformed the log-normal model in a large majority of cases according to both goodness-of-fit measures. + +Similar to the approach of Liu et al., we fit each of the models we consider on reading time data, separately for each Wikipedia project \citep{liu_understanding_2010}. In addition to the KS-distance, we also use KS-tests of the null hypothesis that the model is a good fit for the data to evaluate goodness-of-fit \citep{clauset_power-law_2009}. For the samples sizes we use, passing the KS-test is a high bar. + +Adding parameters can increase model fit without improving out-of-sample predictive performance or explanatory power. To make fair comparison between models with different numbers of parameters, we use the Akaike information criterion (AIC) and the Bayesian information criterion (BIC) instead of the log-likelihood. Both criteria attempt to quantify the amount of information lost by the model (lower is better), by evaluating the log likelihood, and adding a penalty for the model parameters. The difference between AIC and BIC is that BIC maintains the penalty for larger sample sizes.\footnote{We provide a more detailed example of this procedure in our online supplement at \url{https://w.wiki/5Jo}.} + +%This allows us to go beyond Liu et al. by evaluating whether each distribution is a plausible model, instead of just whether one distribution is a better fit + +%We also use the KS distance to evaluate goodness-of-fit. The +%The KS-test is quite sensitive to deviations between the model and the data, especially in large samples. than another. + +%compare two distributions that each have 2 parameters, but the models we consider have different numbers of parameters (the exponentiated Weibull model has 3 parameters and the exponential model has only 1). Adding parameters can increase model fit without improving out-of-sample predictive performance or explanatory power. To avoid the risk over-fitting and to make a fair comparison between models we + +Following Liu et al., we build these goodness-of-fit measures for each wiki and rank them from best to worst \citep{liu_understanding_2010}. For each distribution, we report the mean and median of these ranks. In addition, we report the mean and median p-values of the KS-tests as well as the number and proportion of wikis that pass the KS-test for each model. + +We fit the models using SciPy. The exponentiated Weibull, Weibull, and Lomax models were fit using maximum likelihood estimation and the log-normal distributions were fit using the method of moments. + + +%We also use diagnostic plots to compare the empirical and modeled distributions of the data in order to explain where models are failing to fit the data. Because the data is skewed, we log the X axis of these plots. + +%The diagnostic plots are shown with data on English Wikipedia. On this wiki, the exponentiated Weibull model is the best fit, followed by the Lomax model and then the log normal model and only the exponentiated Weibull model passes the KS test. + +\section{Results} + +%\subsection{Goodness-of-fit metrics} + +\hyperref[tab:gof]{Table \ref*{tab:gof}} below shows the results of this procedure. The Lomax, exponentiated Weibull, and Log-normal all fit the data reasonably well. All pass the KS-test for many wikis, and are in a three-way tie for best median rank according to AIC. Despite this, none of our candidate models pass the KS test for all wikis: There are 28 wikis where all 4 models fail to pass at the 95\% level, and 13 wikis where they all fail at the 97.5\% level. + +% this table is broken +\begin{table*} +\centering +\begin{footnotesize} +\centering +\begin{tabular}[]{@{}lllllllllllll@{}} +\toprule +\emph{model} & \multicolumn{2}{c}{AIC rank} & \multicolumn{2}{c}{BIC rank} & \multicolumn{2}{c}{ks rank} & \multicolumn{2}{c}{KS p-value} & +\multicolumn{2}{c}{KS 95\%} & \multicolumn{2}{c}{KS 97.5\%} \\ +\midrule +& mean & med. & mean & med. & mean & med. & mean & med. & mean & passing & mean & passing \\ +Lomax & 1.78 & 2 & 1.70 & 1 & 2.09 & 2 & 0.26 & 0.17 & 0.79 & 192 & 0.87 & 211 \\ +Log-normal & 2.20 & 2 & 2.10 & 2 & 2.33 & 2 & 0.27 & 0.17 & 0.71 & 173 & 0.79 & 191\\ +Expon. Weibull & 2.15 & 2 & 2.34 & 3 & 2.11 & 2 & 0.29 & 0.23 & 0.77 & 187 & 0.84 & 203 \\ +Weibull & 3.98 & 4 & 3.94 & 4 & 3.84 & 4 & 0.07 & 0.00 & 0.24 & 59 & 0.30 & 72\\ +\bottomrule +\end{tabular} +\end{footnotesize} +\caption{Goodness of fit statistics resulting from the model selection process on 242 wikis. The Lomax, log-normal, and exponentiated Weibull distributions fit the data reasonably well, but the Lomax most often fits the best. The "mean" columns under KS 95\%, and KS 97.5\% refer to the proportion of wikis passing KS-tests at the 95\% and 97.5\% significance levels, and the "passing" columns states the absolute number. \label{tab:gof}} +\end{table*} + +The Lomax distribution is the best fit across all wikis according to all metrics. With only 2 parameters, it has a lower AIC and BIC than the three-parameter exponentiated Weibull distribution and passes the KS-test 79\% of the time at the 95\% confidence level. +The exponentiated Weibull model fits the data better than the log-normal model in terms of passing KS-tests and with respect to AIC. However, the log-normal is better in terms of BIC, which imposes a greater penalty on the additional parameter of the exponentiated Weibull model. + +The Weibull model fits substantially worse than the Lomax, log-normal, and exponentiated Weibull in terms of all of our goodness-of-fit metrics. In this respect, our results differ from those of Liu et al., who observed the Weibull model fitting dwell time data better than the Log-normal model \citep{liu_understanding_2010}. We observe that for dwell times on Wikipedia, the Log-normal model is the better fit. While substantially worse than the Lomax model, the Log-normal model still passes the KS-test at the 95\% level for about 71\% of wikis in the sample. + +\subsection{Discussion} + +We found that the Lomax, exponentiated Weibull, and log-normal models all fit the data within reason. We now discuss how each of these models can be applied to understanding Wikipedia reading behavior. + +\textbf{Lomax (Pareto Type II) Distribution:} That the Lomax model fits well suggests that Wikipedia reading times may follow a power law. Mitzenmacher (2004) describes several possible data generating processes for power law (Pareto) and log-normal distributions \citep{mitzenmacher_brief_2004}. Rich-get-richer dynamics such as preferential attachment are commonly associated with power law distributions, and a mixture of Log-normal distributions can also generate a power law \citep{mitzenmacher_brief_2004}. Deeper exploration of potential power-law dynamics in reading behavior is a potential avenue for future research. + +%On the other hand, it is intuitive that a mixture of different log-normal processes may be involved in reading time, such as an exploration process mixed with a reading process or even a mixture of behavior patterns associated with different types of information consumption. + + \textbf{Log-Normal Distribution:} The log-normal model does not fit the data perfectly, but it fits well enough to be useful. It frequently passes KS-tests, and is preferred to the exponentiated Weibull by the BIC. Even though the Lomax model typically fits the data better, assuming a log-normal model justifies using t-tests to compare differences in geometric means when evaluating experiments. Furthermore, assuming log-normality can help justify using ordinary least squares to estimate regression models in multivariate analysis (as we do below) instead of models that require maximum likelihood estimation. + + \textbf{Weibull Distribution:} The Weibull model did not fit the data well. %This was somewhat disappointing because we had hoped to analyze reading behavior in terms of the inferred parameter that indicates positive or negative aging. + While Liu et al. observed that the Weibull model out-performed the log-normal model on their datasets, we (along with \citet{yin_silence_2013}) observe the opposite. However, the exponentiated Weibull model generalizes the Weibull, is a good fit for the data, and can help us explain why the Weibull does not fit the data well. + + \textbf{Exponentiated Weibull Distribution:} The exponentiated Weibull has 3 parameters \citep{pal_exponentiated_2006}. Two are shape parameters ({${\displaystyle \alpha >0}$} and {${\displaystyle \gamma >0}$}) and one is a scale parameter ({${\displaystyle \lambda >0}$}). The major qualitative distinctions in interpreting the model depend on the shape parameters. In many cases the parameters can be interpreted in terms of a transition from negative to positive aging (or visa-versa) after some threshold. However, if either {${\displaystyle \gamma >1}$}, {${\displaystyle \alpha <1}$} or {${\displaystyle \gamma <1}$}, {${\displaystyle \alpha >1}$} then qualitative interpretation may require closer inspection of estimated hazard functions. + +Inconveniently, we estimated {${\displaystyle \alpha >1}$} and {${\displaystyle \gamma <1}$} for all but one of the 285 Wikipedia projects we analyzed. This limits the usefulness of exponentiated Weibull models for large-scale analysis on many wikis because the parameters are outside the area where the model leads directly to intuitive qualitative interpretations. However, by plotting the estimated hazard function we can see over what range of the data the hazard function is decreasing or increasing, accelerating or decelerating. + +\begin{figure} + + +%\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/5.png} +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\columnwidth]{figures/hazardplot-1} +\end{knitrout} + +\caption{Hazard functions for the parametric models estimated on English Wikipedia. The exponentiated Weibull model (the best fit to the data) indicates that the hazard rate increases in the first seconds of a page view, after which we observe negative aging.\label{fig:hazards}} + +\end{figure} + +In \hyperref[fig:hazards]{figure \ref{fig:hazards}} we observe that, on English Wikipedia, the log-normal and exponentiated Weibull models both indicate a brief period of positive aging, during which the instantaneous rate of page-leaving increases, followed by negative aging. This helps explain why the Weibull model is not a good fit for the data compared to the log-normal and exponentiated Weibull models: the Weibull distribution cannot model a non-monotonic hazard function. While Liu et al. found it to be a good model for the distribution of dwell times in data collected through a web browser plugin, our analysis suggests that the behavior of Wikipedia readers may be somewhat more complex. Perhaps whereas Liu et al. operationalized ``screen-and-glean'' as a monotonically decreasing hazard function, Wikipedia readers require more than 1 or 2 seconds to "screen" the page and during these first few moments, their hazard of leaving it increases. + +%This proposition can be tested if the use of a feature that provides information about the content of a page before it is opened (such as page previews) leads to monotonically decreasing hazard rates.\footnote{https://www.mediawiki.org/wiki/Page\_Previews}% I don't understand the preceding sentence. --TB + +%\subsubsection{Distribution fitting plots} + +%To further explore how well these distributions fit the data, we present a series of diagnostic plots that compare the empirical distribution of the data with the model predicted distributions. For each of the four models under consideration (Lomax, Log-normal, exponentiated Weibull, Weibull), we present a density plot, a distribution plot, and a quantile-quantile plot (Q-Q plot). The density plots compare the probability density function of the estimated parametric model to the normalized histogram of the data. Similarly the distribution plots compare the estimated cumulative distribution to the empirical distribution. The Q-Q plots plot the values of the quantile function for the data on the x-axis and for the estimated model on the y-axis. These plots can help us explain diagnose ways that the data diverge from each of the models. We present the x-axis of all these plots on a logarithmic scale to improve the visibility of the data. +%We show these plots for data from English Wikipedia. For this wiki, the liklihood-based goodness-of-fit measures indicate that the exponentiated Weibull model is the best fit (BIC = 19321) followed in order by the Lomax (BIC = 19351), the Log-normal (BIC = 19373) and the Weibull (BIC = 20111), but the log-normal model is the only model that passes the KS test ({${\displaystyle p}$} = 0.089). + + +% {\scalefont{0.52741}\begin{longtable}{>{\RaggedRight}p{0.47143\linewidth}>{\RaggedRight}p{0.47143\linewidth}} +% \hspace*{0pt}\ignorespaces{}\hspace*{0pt}\begin{center}\uline{}\begin{minipage}{1.0\linewidth}\begin{center}\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/6.png}\end{center}\myfigurewithoutcaption{6}\end{minipage} \textbf{Figure 2.2.{\mbox{$~$}}}\emph{ The Lomax model accurately estimates the rate of long reading times, but its monotonic density overestimates the probability of very short reading times and underestimates that of reading times in the range of 1 110 seconds.}\end{center}\begin{center}\uline{}\begin{minipage}{1.0\linewidth}\begin{center}\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/7.png}\end{center}\myfigurewithoutcaption{7}\end{minipage} \textbf{Figure 2.4.{\mbox{$~$}}}\emph{ The Exponentiated Weibull model fits the data somewhat better than the Log-normal model, but still overestimates the occurrence of very short reading times.}\end{center}&\hspace*{0pt}\ignorespaces{}\hspace*{0pt}\begin{center}\uline{}\begin{minipage}{1.0\linewidth}\begin{center}\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/8.png}\end{center}\myfigurewithoutcaption{8}\end{minipage} \textbf{Figure 2.3.{\mbox{$~$}}}\emph{ The Log-normal model fits the data well, but overestimates the probability of very short reading times and underestimates the probability of very long reading times.}\end{center}\begin{center}\uline{}\begin{minipage}{1.0\linewidth}\begin{center}\includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/9.png}\end{center}\myfigurewithoutcaption{9}\end{minipage} \textbf{Figure 2.5.{\mbox{$~$}}}\emph{ The Weibull model is not a good fit for the data. On a log scale, the PDF is not only monotonically decreasing, it is concave up everywhere. It greatly overestimates the probability of very short and very long reading times while under estimating the probability of reading times between 10 and 1000 seconds.}\end{center} +% \end{longtable} +% } + +%\cite{graham_warped_2008} argues that despite the metaphor of cyberspace as transcending space and time, that information technology + +\section{Reading time and global contexts} +% each of these are 1-3 paragraphs on things other people have done and 1 short paragraph about how we contribute +Now we return to our analysis of Wikipedia readers in a global context. Our analysis is most closely inspired by Lemmerich et al.'s large-scale global survey of Wikipedia readers. They found that readers in lower-HDI countries are more likely to use Wikipedia in educational contexts and for intrinsic learning, but not for fact-checking \citep{lemmerich_why_2019}. Such motivations and contexts are likely to involve longer sessions and dwell times compared to fact-checking \citep{lemmerich_why_2019, singer_why_2017}. +Therefore, we predict that readers in lower-HDI countries and in the Global South are more likely to have longer dwell times on Wikipedia articles. + +\textbf{H1:} Readers in countries with lower HDI (or the Global South) are more likely to spend more time reading each page they visit compared to readers in countries with higher HDI (or the Global North). + +%Despite the prior evidence in favor of \textbf{H1}, distributions of device gaps, reader fluency, internet skills, and internet connectivity might drive reading times in less developed and global South countries compared to more developed and Global North countries. We attempt to build confidence that such factors do not drive the observed relationship in two ways. First we use multivariate regression to statistically control for observable factors such as device load time, device type, and page length. We also attempt to increase support for the theory that different kinds of information seeking drive the reading time gap by testing additional predictions of it \cite{stinchcombe_constructing_1987}. + +We also test a second prediction of the theory that Global South readers are more likely to use Wikipedia for in-depth understanding. If desktop devices have advantages for reading to gain in-depth understanding then users may be more likely +to choose these devices for such tasks (when they have the choice). Furthermore, Global South readers may also experience gaps limiting their access to desktop devices, and when they do have access may be likely to take advantage of such opportunities by reading longer. Therefore, we expect users in countries within the Global South designation (or with lower HDIs) to read even longer on desktop devices. + +\textbf{H2:} The difference between the reading times of readers in countries with lower HDI compared to readers in higher-HDI countries will be greater on desktop than on mobile devices. + +Based on the ``screen-and-glean'' model of information seeking behavior that Liu et al. observed on the web \citep{liu_understanding_2010}, we propose that reading of articles for in-depth understanding is most likely to take place in the last page view in a session. Differences in reading time in other page views might be attributable to less efficient ``screening''---gaps in the skills required to efficiently sift through Wikipedia pages to find the page with the information sought. However, the final page view in a session may reflect ``gleaning''---information consumption. If so, then the last page view in a session provides an opportunity to isolate information consumption from information seeking. + +Therefore if the gap between low and high development context readers is attributable to types of information seeking tasks, and in-depth reading tasks require more time spent ``gleaning,'' then we predict that the gap between reading time in low versus high HDI countries will also be amplified on the last page view in a session. + +\textbf{H3:} The difference between reading times in countries with lower HDI and countries with higher HDI will be greater on the last page view in a session than on other page views. + +On the other hand, a ``skills gap'' with respect to information screening may drive an opposite result. The gap between reading times in the Global South and the Global North may shrink on the last page view in a session if Global South readers are less efficient at filtering information. + +\subsection{Methods and measures} + +The EventLogging system records the date and time the page was viewed. +We include \emph{Day-Of-Week} and \emph{Month} as statistical controls for seasonal and weekly reading patterns. Including \emph{NthInSession} statistically adjusts for the number of pages a reader has viewed so far in the session. \emph{Revision Length}, the size of the wiki-page, measured in bytes, roughly accounts for the amount of content on the page. We use two other measures from the instrument to statistically adjust for page load time: \emph{time till first paint}, the time from the request until the browser starts to render any part of the page; and \emph{dom interactive time}, the time from the request until the user can interact with the page.\footnote{See \url{https://developer.mozilla.org/en-US/docs/Web/API/PerformanceNavigationTiming/domInteractive} \textit{archived at} \url{https://perma.cc/RRA8-8SQG}, DOM refers the page's ``document object model'' structure} + +We obtain the \emph{page length}, measured in bytes at the time the page was viewed, by merging the EventLogging data with the edit history. To understand how reading behavior on \emph{mobile} devices differs from behavior on non-mobile (i.e. desktop) devices, we assume that visitors to mobile web-hosts (e.g. en.m.wikipedia.org) are using mobile devices and that visitors to non-mobile web-hosts (e.g. en.wikipedia.org) are on non-mobile (desktop) devices. + +We determine the approximate country in which a reader is located from the MaxMind GeoIP database which is integrated with the Wikimedia analytics pipeline.\footnote{See \url{https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Geolocation} \textit{archived at} \url{https://perma.cc/C36T-2E4E}} We use the United Nations' human development index (\emph{HDI}) to measure the development level of the country.\footnote{From \url{http://hdr.undp.org/en/data} \textit{archived at} \url{https://perma.cc/SLQ3-HS8S}. The HDI is a number between 0 and 1.} We lack geolocation data before March 3rd 2018, which limits our analysis of reading times in the global context to the period from then until September 28th 2018. We standardize the HDI by centering to 0 and scaling it by the standard deviation (taken at the country level) because the partial residual plots of interaction term between (unscaled) HDI and mobile were very skewed. This also allows us to interpret results in terms of standard deviations. + +We also use the established regional classifications of Global North and Global South\footnote{See \url{https://meta.wikimedia.org/wiki/List_of_countries_by_regional_classification} \textit{archived at} \url{https://perma.cc/WHN7-GB9D}} as a second, dichotomous, measure of development. Finally, the EventLogging instrumentation retains a session token with which we measure whether or not a given page view is the \emph{last-in-session.} We also statistically adjust for the number of pages viewed in the session so far (\emph{Nth in session}). + +\subsubsection{Models} +We test the three hypotheses using two regression models that differ only in how they represent economic development. \emph{Model 1a} uses the human development index (HDI) and \emph{model 1b} uses the Global North / Global South regional classification. Here is the specification of \emph{model 1a}: + +\begin{small} + +\[ Y=B_{0}+B_{1}HDI+B_{2}Mobile+B_{3}Mobile~x~HDI\] +\[+ B_{4}RevisionLength+B_{5}DayOfWeek+B_{6}Month\] +\[+ B_{7}NthInSession+B_{8}LastInSession\] +\[+B_{9}HDI~x~LastInSession+B_{10}Mobile~x~LastInSession\] +\[+B_{11}FirstPaint+B_{12}DomInteractiveTime\] + +% \textbf{Model 1b:} +% \[Y=B_{0}+B_{1}GlobalNorth+B_{2}Mobile+B_{3}Mobile:GlobalNorth\] +% \[B_{4}RevisionLength+B_{5}DayOfWeek+B_{6}Month\] +% \[+ B_{7}NthInSession+B_{8}LastInSession\] +% \[+B_{9}GlobalNorth:LastInSession+B_{10}Mobile:LastInSession\] +% \[+B_{11}FirstPaint+B_{12}DomInteractiveTime\] +\end{small} + +The formula for \emph{model 1b} is the same except for using \emph{GlobalNorth} terms instead of \emph{HDI}. + +We consider \textbf{H1} supported if {${\displaystyle B_{1}<0}$} in both models; \textbf{H2} if {${\displaystyle B_{3}>0}$}; and \textbf{H3} if {${\displaystyle B_{9}<0}$}. Because interaction terms can be difficult to interpret qualitatively, we will present marginal effect (ME) plots to assist in qualitative interpretation of the observed relationships \citep{pepinsky_visual_2018}. + +We explored alternative model specifications that include higher order terms and additional interaction terms. We choose to present \emph{model 1a} and \emph{model 1b} because more complex models neither substantively improve the explained variance and the predictive performance nor lead to qualitatively different conclusions. We fit both models using weighted ordinary least squares estimation in R on a stratified sample of size 9,873,641. + + +\subsection{Non-parametric Analysis} + +Our multivariate regression analysis assumes a parametric model and as we saw in the univariate analysis above, the assumption of log-normality may not be valid for every Wiki. Therefore, we also provide a simple non-parametric analysis based on median reading times. Unlike the regression analysis, the non-parametric analysis does not include statistical controls or afford statistical hypothesis tests, but it avoids having to depend on assumptions about the distribution. We construct a 3x3 table of users depending on whether they are in the Global North or Global South, on a mobile or desktop device, or on the last page view in their session. The medians of each cell of the table validate that our findings are not driven by the normality assumption alone. + + + +\section{Results} + +%\subsection{Regression Analysis} + +We use marginal effects (ME) plots to interpret our regression models.\footnote{Full regression tables are available in the appendix.} A marginal effects plot shows how the model's predicted outcome varies with respect to one or more of the predictors when other terms of the model are held constant at some typical value \citep{pepinsky_visual_2018}. Since we are interested in comparing reading times between last-in-session page views and other page views, we create two marginal effects plots for each model: one for last-in-session page views and one for non-last-in-session page views. Similarly, we also break down predicted reading times by device type. + +For each marginal effects plot, the y-axis shows the model predicted values and the x-axis shows the values of the predictor variables. In the marginal effects plots shown here, uncertainty intervals represent confidence intervals of the parameter estimates, not uncertainty about the model predictions. Uncertainty about model predictions in this case is generally very high, as our models explain only a small fraction (about 7\%) of the variance in reading times. + +% start out just considering mobile devices, non-last-in-session +\subsection{Hypothesis 1: Global context and reading times} + +We find support for \textbf{H1}: that readers in higher-HDI countries ({${\displaystyle \mathrm {B} =-0.20,~SE=0.002}$}) or in the Global North ({${\displaystyle \mathrm {B} =-0.27,~SE=0.002}$}) are likely to spend less time on each page than readers in lower HDI countries or in the Global South. For illustration, our ME plot for \emph{model 1a} (\hyperref[fig:model1aplot]{figure \ref*{fig:model1aplot}}) shows that, for non-last-in-session page views, a prototypical reader on a desktop device in a country with an HDI one standard deviation below the mean is predicted to spend about 25 seconds on a given non-last-in-session page view compared to the predicted 18 seconds spent by an average reader in a country with an HDI one standard deviation above the mean. Similarly, per our ME plot for \emph{model 1b} (\hyperref[fig:model1bplot]{figure \ref*{fig:model1bplot}}), for last-in-session page views on desktop devices, a prototypical Global North reader is predicted to spend around 42 seconds per page view compared to the 50 seconds spent by a prototypical Global South reader. + +%Similarly, model 1b predicts that a user in Global South country will spend 130\% as much time reading a page as an equivalent reader in a Global North country. + +% \begin{figure} +% \includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/11.png} +% \caption{ Marginal effects plot showing how the time spent on pages depends on the development level of the country they are in.} +% \end{figure} + +% \begin{figure} +% \includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/12.png} +% \caption{Marginal effects plot showing how the time spent on pages depends on the development level of the country they are in.} +% \end{figure} + + +% \begin{figure} +% \includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/16.png} +% \caption{Marginal effects plot showing how the time spent on pages depends on whether a reader is on whether they are on their last page view in a session, and the development level of the country they are in.} +% \end{figure} +\begin{figure}[t] +\centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\columnwidth]{figures/model1aplot-1} +\end{knitrout} +\caption{\label{fig:model1aplot} Marginal effects plot showing the relationship between HDI and reading time predicted by \emph{model 1a}. The negative slope of the lines shows that lower-HDI readers have longer reading times, and the difference in slopes between devices shows that the relationship between HDI and reading time is more pronounced on desktop devices. The ribbons reflect 95\% confidence intervals of the model coefficients. The x-axis units represent standard deviations from the mean HDI.} +\end{figure} + + +\subsection{Hypothesis 2: Global context and mobile devices} + +%We proposed \textbf{H2:} that in the device gap between mobile and desktop devices such that desktop devices are superior tools for gaining an in-depth understanding that the reading time gap between lower-HDI and greater-HDI readers would be greater in magnitude on desktop devices. + +We also find support for \textbf{H2}: that readers in the Global North ({${\displaystyle \mathrm {B} =15,~SE=0.002}$}) or higher-HDI ({${\displaystyle \mathrm {B} =0.11,~SE=0.002}$}) countries are likely to spend even less time reading compared to Global South or lower-HDI readers when they are on a desktop device compared to a mobile device. This is clearly visible as a differences in slopes in \hyperref[fig:model1aplot]{figure \ref*{fig:model1aplot}}. Indeed, for pages views other than the last-in-session, the predicted reading times for prototypical readers in countries 1 standard deviation below the mean decreases from 25 seconds on desktop devices to 22 seconds on mobile devices, but the reverse is true for readers in higher-HDI countries. In a country 1 standard deviation above the mean, an otherwise comparable reader is predicted to read for about 19 seconds on mobile and about 17 seconds on desktop. The ME plot for \emph{model 1b} (\hyperref[fig:model1bplot]{figure \ref*{fig:model1bplot}}) shows that for the prototypical reader, the gap between Global South and Global North is greater on desktop devices (about 5 seconds) than on mobile devices (about 3 seconds). + +%that geometric means of reading times on mobile devices and desktop devices are 22 and 26 seconds respectively, a difference of 4 seconds. However, this moderate gap grows to midni + + +% \begin{figure} +% \includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/13.png} +% \caption{Marginal effects plot from Model 1 showing how the time spent on pages varies with type of device and on whether a page view is the last in a session.} +% \end{figure} + +%Much of this variation seems to be driven by the least developed countries where people read Wikipedia. In model 1b, where we label countries as Global South or Global North, readers in Global South countries appear to use mobile and Desktop devices in the same way on average, but readers in Global North countries spend more time reading on Mobile than on Desktop. + + +% \begin{figure} +% \includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/14.png} +% \caption{Marginal effects plot showing how the time spent on pages depends on whether a reader is on the kind of device they are using, and the development level of the country they are in.} +% \end{figure} + +% \begin{figure} +% \includegraphics[width=1.0\linewidth,height=6.5in,keepaspectratio]{../images/15.png} +% \caption{Marginal effects plot showing how the time spent on pages depends on the kind of device they are using, and the development level of the country they are in.} \end{figure} + +\subsection{Hypothesis 3: Global context and last-in-session} + +Based on the "screen-and-glean" results by Liu et al, we expected in-depth reading to be most likely in the last page view in a session, and thus predicted \textbf{H3:} the difference in reading times between lower-HDI countries and higher-HDI countries will be amplified in the last page view in a session. However, we do not find support for this hypothesis, which would have been indicated by a negative regression coefficient for the interaction term between development and last-in-session. Instead we find a positive coefficients for \emph{HDI:Last in session} ({${\displaystyle \mathrm {B} =0.63,~SE=0.002}$}) in \emph{model 1a} and for \emph{Global North:Last in session} ({${\displaystyle \mathrm {B} =0.08,~SE=0.002}$}) in \emph{model 1b}. + +%Note that there are still sizable reading time gaps between Global South and Global North readers and between devices when we look at last-in-session page views. These gaps appear larger in the marginal effects plots only because reading times are longer on average, and our model's logarithmic scale magnifies these differences, not because of the interaction term. + +% for the R&R it would be really cool to make a better version of this that shows how the medians as a tree-flow diagram. +\subsection{Non-parametric Analysis} + +\hyperref[tab:nonparametric]{Table \ref*{tab:nonparametric}} shows the median time pages are visible by the user's economic region, device and whether a page is the last viewed in the user's session. Consistent with \textbf{H1}, median users in the Global South spend more time on pages compared to median users in the Global North regardless of device or session stage. Consistent with \textbf{H2}, the difference between Global South and Global North users is clearly more pronounced on desktop compared to mobile. In contrast to the prediction of \textbf{H3}, but in line with the findings from our parametric analysis, we do not observe an accentuation of the difference between Global South and Global North users in the last page view in a session. + +\begin{small} +\begin{table}[b] +\centering +\begin{tabular}[]{@{}llll@{}} +\toprule +Economic-region & Desktop & Last-in-session & Time-visible\\ +\midrule + North & False & False & 20.1 \\ + South & False & False & 21.5 \\ + North & True & False & 16.1 \\ + South & True & False & 21.8 \\ + North & False & True & 28.1 \\ + South & False & True & 28.7 \\ + North & True & True & 39.8 \\ + South & True & True & 43.6 \\ +\bottomrule +\end{tabular} +\caption{Table of median reading times by last-in-session, economic region, and device type. Reading times in the Global South are greater than in the Global North in all categories, and are markedly greater on desktop compared to mobile devices. \label{tab:nonparametric}} +\end{table} +\end{small} +\subsection{Page length} + +In addition to the above results on reading times and global contexts, we also examined how reading times relate to page length. The association between page length and reading times is small and positive ({${\displaystyle \mathrm {B} =0.17,SE=0.0004}$}). +%as shown by the marginal effects plot in figure \hyperref{fig:pagelenplot}. +Pages on Wikipedia vary greatly in length: from just a few bytes up to 2,000,000 bytes. +%Our model estimates that the difference between the shortest and the longest page lengths can account for a difference in typical reading times from about 5 seconds to about 45 seconds. +If a page were to double its length, our model would predict a marginal increase in reading times of a factor of 1.2. For example, a page with 10000 bytes has a predicted reading time of 25 seconds, which for a page with twice that length (20000 bytes) increases to 30 seconds.\footnote{See our online supplement at \url{https://w.wiki/5Jo} for a marginal effects plot. Page length refers to the size of the wikitext source of the page measured in bytes. Not every byte corresponds to a character of readable text. Wikitext source also includes code for formatting, using templates, or embedding images. Additionally, some characters, especially in non-Latin alphabets, may take up multiple bytes. Still our results confirm that for longer Wikipedia articles, only a fraction of the text is read in a typical page view. Assuming a reading speed of around 250 words per minute and an average word length of 5 characters in English (not including spaces and punctuation), these 30 seconds would only suffice to read through less than 1000 of these 20000 bytes \citep{bell_extensive_2001, bochkarev_average_2012}.} + +\section{Limitations} +Two important technical limitations of our dwell time data affect our ability to compare reader behavior between mobile phone and PC devices. The first is missing data on mobile devices, discussed above. This missing data likely introduces a negative bias to our measures of reading time on mobile devices because we believe observations are more likely to be lost when users switch tasks from the browser, and subsequently return to reading. This bias may be quite significant as the issue affects a large proportion of our sample. + +%Therefore, we do not believe that user behaviors that may generate the appearance of long reading times do not correspond to reading. +%An additional limitation arises from the missing data described above. It is possible that we are missing data in ways that may potentially confound our results, especially, but not exclusively, in terms of the comparison between mobile and non-mobile devices. + +%(the lunch break problem ). +The second limitation occurs when readers leave a page visible in the browser at times when they are not directly reading it. For example, a user may have multiple windows visible while only looking at one of them, or may leave a browser window visible and move away from the computer for a long period of time. In general, the best we can hope to observe is that a page is visible in a browser. We cannot, through this instrument alone, know with confidence that an individual is reading. This limitation leads to positive bias in our measures of reading time. To partially address this limitation, we fit regression models on data with dwell times greater than 1 hour removed (assuming that it contains a higher ratio of those "visible but not reading" cases), and found that our results were not substantively affected by the change. + + +%We think that, especially combined with the first limitation described above (mobile devices failing to log a "page unloaded" event), this issue is likely to affect the data from desktop devices more often, thus limiting our ability to compare mobile phone and PC devices. + +It is possible that this positive bias may correlate with our analytic variables. Perhaps last-in-session views may be particularly subject to this source of bias and may contribute to the gap we observe between reading times in last-in-session page views compared to others. We designed our analysis of H1 and H2 to account for differences between last-in-session and other page views, and found that the sign of the observed differences remained the same whether the view was the last in a session or not. We did not find support for H3, which considered differences within last-in-session page views. + +%Conclusions based on H3 would be more likely to be affected by this possible issue, but the data did not support H3 anyway. + +%aIn our patametric analysis by interacting \textit{last-in-session} with our analytic variables for these hypotheses (\textit{mobile}, \textit{HDI}, and \textit{GlobalNorth}). +%We don't see an a priori reason why such a reading pause would occur more often at specific points of a reading session. (Note that because of the way our instrumentation works, the user would normally need to become active again for the last view of a session to conclude and register a dwell time.) However, we can't completely rule out this possibility either based on our data alone. + +Additional steps could be taken to construct new measures of reading that would not suffer this limitation through browser instrumentation to track mouse movements or scroll positions. However, such steps should be taken with care as additional data collection may negatively affect users in terms of privacy, browser responsiveness, page load times, and power consumption. + +Finally, readers should keep in mind that we analyzed observational, not experimental, data with the intention to describe correlations between our variables, not to demonstrate causal relationships. We used ordinary least squares analysis, but future analysis might better account for the hierarchical structure of our data using multilevel modeling. + +\subsection{Alternative explanations} + +% could be good to cite something else on knowledge gap there +Furthermore, there are several plausible alternative explanations that we cannot rule out in the presented analysis. The observed reading time gap between more and less developed countries may be due to factors other than the types of information seeking tasks in which readers are engaged. For instance, if readers experience knowledge gaps in less developed countries, they may be likely to read in languages that are not their primary language, and thus spend more time reading regardless of task \citep{graham_uneven_2014}. A future iteration of this project may partially address this limitation by accounting for whether a Wikipedia edition is a common primary language in the reader's country. + +Another alternative explanation may be that the gap between readers in more and less developed countries is partly due to time spent on exploration (``screening'') rather than on content consumption (``gleaning''). Our finding rejecting \textbf{H3}, +%that the last page view in a session is not associated with an amplified gap between Global South and Global North readers +suggests this, as Global South readers have longer dwell times on non-last-in-session page views compared to Global North readers. +We also observe shorter non-last-in-session page views on desktop devices compared to mobile for Global North readers, but for Global South readers such page views are about the same length no matter what device is used. This unexpected result would be consistent with a skills gap experienced by Global South readers who may have greater difficulty finding sought information, especially when using desktop devices \citep{deursen_toward_2015}. The present analysis offers only tentative support for this claim, but we suggest it as an avenue for future research. + +% propagate the changes in the below paragraph to the report +Global South readers may also be more sensitive to the price of downloading data and thus they may avoid opening pages that they are unlikely to read in-depth. Future work might use data from the Wikipedia Zero project to study the relationship between price sensitivity and Wikipedia audiences. More generally, drawing conclusions about information seeking from our analysis rests on strong assumptions about relationships between task type and reading times. Future work on information seeking behavior on Wikipedia testing these assumptions would help validate such conclusions. + + +% \begin{figure} +% \centering +% <>= +% p <- ggplot(pagelen.plot.data,aes(x=x,y=predicted,ymax=conf.high,ymin=conf.low)) +% p <- p + geom_ribbon(alpha=0.3) + geom_line() +% p <- p + ylab("Time visible (seconds)") +% p <- p + xlab("Page length (log bytes)") +% p <- p + theme(legend.position = "none") +% p +% @ +% \caption{Marginal effects plot showing how the time spent on pages depends on page length according to Model 1a. \label{fig:pagelenplot}} +% \end{figure} + + +\section{Discussion and Conclusion} + +%\subsection{} + +In an analysis of novel data from Wikipedia, measuring the time that web pages are visible in the browser window as an approximation of reading time, we investigated patterns of reader behavior across global contexts and found systematic differences consistent with greater use for in-depth understanding in lower-HDI countries compared to higher-HDI countries. We believe this analysis should strengthen confidence in similar findings from surveys of reader behavior because our data have complementary strengths and limitations compared to self-report data. + +We conclude that Global South readers are more likely to engage in in-depth information seeking when reading Wikipedia compared to Global North readers. Consistent with Lemmerich et al.'s survey results \citep{lemmerich_why_2019}, we find that readers in lower-HDI countries have longer reading times than readers in higher-HDI countries, and that this difference is greater for users of non-mobile (desktop) devices. + +The observed relationships are quite similar whether measured using the human development index (HDI) or dichotomized economic region (Global South / Global North). These relationships are supported not only by the regression models, but also by non-parametric analysis. +While Wikipedia readers increasingly use mobile devices to visit Wikipedia, they are likely to spend the most time reading when they are in the last page view of a desktop session. This is exactly when we expect them to gain in-depth understandings of topics. + +We lack evidence to fully explain our findings in terms of structural and socioeconomic differences between the Global North and Global South. One possibility is that the gap in reading times reflects differences in information seeking and content understanding skills \citep{deursen_toward_2015, shaw_pipeline_2018}. That we did not observe the gap between global contexts widen in last-in-session page views tentatively suggests that Global South readers are more likely to struggle to find and filter information on Wikipedia compared to Global North readers. + +However, given the evidence that Wikipedia readers in the Global South are more likely to engage in deeper information seeking tasks \citep{lemmerich_why_2019}, we conjecture that the gap in reading times may be explained by the quality and accessibility of the information on Wikipedia relative to alternatives available in the reader's contexts. Wikipedia may not be perfect, but given historical inequalities in education, and knowledge production between the Global South and Global North \citep{graham_uneven_2014}, it still might be competitive compared to other sources, especially when it comes to encyclopedic content about the Global South, content in local languages, and information not otherwise available for free to Internet users. This would explain why Global South readers would be more likely to choose Wikipedia when seeking in-depth information. Future research might test this hypotheses in audience surveys or by adapting approaches previously applied to gender comparisons on English Wikipedia \citep{reagle_gender_2011}. + + +%We considered While we However, we did not find our predicted relationship between reading behavior in the last page view in a session and the association between + +%device use is associated with an amplified gap between Global South and Global North readers supports the idea that + +%All of these results are consistent with the proposition that readers in the Global South are more likely to engage in deep information seeking tasks compared to readers in the Global North. + +%Our analysis reading time is generally consistent with findings from the survey study, which suggested that readers in Global South countries are most likely to engage in more intensive information seeking tasks. + +%We also considered whether the relationship would be amplified in the last view in a browser session, which we expect to be associated with content consumption as opposed to discovery. While we do observe that all readers dwell for longer in the last page view in a session, and that readers in developing countries appear to read longer, we do not observe the gap between readers in less developed and more developed countries amplified in the last view in the session. + +Another contribution of this study is to vet the reading time data to understand its limitations and to conduct model selection to justify parametric assumptions for future analysts. We found a high rate of missing data on mobile, among other less significant irregularities. Future analysts should keep this in mind and work to improve the coverage. We found that the log-normal distribution often fits the data well, and therefore adopted the use of geometric means as a metric for comparing samples reading times. This also helped support our decision to adopt ordinary least squares regression analysis for multivariate comparison. However, we also found that exponentiated Weibull and Lomax probability models were often an even better fit. Future researchers might explore how reader behavior may generate data in processes consistent with these models. + +%To further complement the approaches to studying Wikipedia audiences described here, we suggest that future researchers conduct field studies of Wikipedia readers. Direct, in-person observation of Wikipedia readers can help us understand how people use Wikipedia in much greater detail than surveys and browser instrumentation can allow. + +The reading time data we used in this study is a promising tool for future researchers to improve upon studies of page views for understanding Wikipedia's audiences. For example, recent research has shown widespread misalignment between how often articles are visited and the quality of those articles \citep{warncke-wang_misalignment_2015}. However, we have observed that not all views are created equal. Future studies on the relationship between content production and content consumption on Wikipedia might use reading time data to learn about how content consumption might change depending on article quality. + + +%One anticipated application of reading time data is for evaluating design interventions intended to improve the user experience of Wikipedia visitors. We recommend that analysts and designers use geometric means as a metric for comparing reading behavior between treatments or between sites. The distribution of reading times is very skewed and therefore the arithmetic mean can be misleading. Moreover, for most wikis, the Log-normal distribution is a good fit to the data, and this justifies the use of geometric means. + +\section*{Acknowledgements} +We are grateful to the anonymous reviewers, whose observations helped improve the paper. Specials thanks to the web team at the Wikimedia Foundation that built the instrumentation, to Zareen Farooqui who conducted initial data quality vetting as part of her data analyst Outreachy internship at WMF, and to the Foundation's Analytics Engineering team for supporting the data analysis infrastructure used in this work. Thanks to those who provided comments on various stages of this research, including Kaylea Champion, other members of the Community Data Science Collective, Johnathan Morgan, Aaron Halfaker, Isaac Johnson, Miriam Redi, Abbey Ripstra, and other members of the Wikimedia research team. Special thanks to Benjamin Mako Hill for his comments and advice. This work was completed while Nathan TeBlunthuis was a PhD student at the University of Washington and in his capacity as a Wikimedia Foundation contractor and affiliate. It was also supported by the National Science Foundation (GRFP-2016220885). + +%\balance{} + +\setcounter{biburlnumpenalty}{9001} +\printbibliography[title = {References}, heading=secbib] + +\clearpage +\small +\begin{table*}[htbp] +\centering +%\addto\captionsenglish{\renewcommand{\figurename}{Appendix:}} + +\caption{Regression tables for models 1a and 1b.} +\begin{tabular}{l c c } +\hline + & Model 1a & Model 1b \\ +\hline +Intercept & $1.3660 \; (0.0085)^{***}$ & $1.3791 \; (0.0085)^{***}$ \\ +Global North & & $-0.2680 \; (0.0022)^{***}$ \\ +mobile : Global North & & $0.1490 \; (0.0024)^{***}$ \\ +mobile : Last in Session & $-0.6332 \; (0.0021)^{***}$ & $-0.6349 \; (0.0021)^{***}$ \\ +Global North : Last in Session & & $0.0830 \; (0.0024)^{***}$ \\ +Human development index & $-0.1961 \; (0.0018)^{***}$ & \\ +mobile : HDI & $0.1133 \; (0.0019)^{***}$ & \\ +HDI : Last in Session & $0.0632 \; (0.0019)^{***}$ & \\ +Revision length (bytes) & $0.1752 \; (0.0004)^{***}$ & $0.1758 \; (0.0004)^{***}$ \\ +time to first paint & $-0.0164 \; (0.0006)^{***}$ & $-0.0171 \; (0.0006)^{***}$ \\ +time to dom interactive & $0.0025 \; (0.0009)^{**}$ & $0.0024 \; (0.0009)^{**}$ \\ +mobilemobile & $-0.0118 \; (0.0023)^{***}$ & $-0.0142 \; (0.0023)^{***}$ \\ +sessionlength & $-0.0001 \; (0.0000)^{***}$ & $-0.0001 \; (0.0000)^{***}$ \\ +Last in session & $0.8632 \; (0.0023)^{***}$ & $0.8575 \; (0.0023)^{***}$ \\ +nthinsession & $0.0002 \; (0.0000)^{***}$ & $0.0002 \; (0.0000)^{***}$ \\ +dayofweekMon & $0.0939 \; (0.0020)^{***}$ & $0.0926 \; (0.0020)^{***}$ \\ +dayofweekSat & $0.0169 \; (0.0020)^{***}$ & $0.0175 \; (0.0020)^{***}$ \\ +dayofweekSun & $0.0322 \; (0.0020)^{***}$ & $0.0332 \; (0.0020)^{***}$ \\ +dayofweekThu & $0.0561 \; (0.0019)^{***}$ & $0.0548 \; (0.0019)^{***}$ \\ +dayofweekTue & $0.0349 \; (0.0020)^{***}$ & $0.0326 \; (0.0020)^{***}$ \\ +dayofweekWed & $0.0757 \; (0.0019)^{***}$ & $0.0743 \; (0.0019)^{***}$ \\ +usermonth4 & $0.0095 \; (0.0096)$ & $0.0083 \; (0.0096)$ \\ +usermonth5 & $0.0108 \; (0.0095)$ & $0.0104 \; (0.0095)$ \\ +usermonth6 & $-0.0102 \; (0.0097)$ & $-0.0103 \; (0.0097)$ \\ +usermonth7 & $-0.0494 \; (0.0097)^{***}$ & $-0.0491 \; (0.0097)^{***}$ \\ +usermonth8 & $-0.0119 \; (0.0097)$ & $-0.0121 \; (0.0097)$ \\ +usermonth9 & $0.0382 \; (0.0076)^{***}$ & $0.0370 \; (0.0076)^{***}$ \\ +usermonth10 & $-0.0004 \; (0.0075)$ & $0.0010 \; (0.0075)$ \\ +\hline +R$^2$ & 0.0721 & 0.0725 \\ +Adj. R$^2$ & 0.0720 & 0.0725 \\ +Num. obs. & 9873641 & 9873641 \\ +RMSE & 14.2330 & 14.2297 \\ +\hline +\multicolumn{3}{l}{\scriptsize{$^{***}p<0.001$, $^{**}p<0.01$, $^*p<0.05$}} +\end{tabular} +\end{table*} diff --git a/dissertations/nathante_uw_2021/appendix_C_oresfairness.tex b/dissertations/nathante_uw_2021/appendix_C_oresfairness.tex new file mode 100644 index 0000000..0740ec4 --- /dev/null +++ b/dissertations/nathante_uw_2021/appendix_C_oresfairness.tex @@ -0,0 +1,1143 @@ + +\chapterprecishere{ +Online community moderators often rely on social signals such as whether or not a user has an account or a profile page as clues that users may cause problems. Reliance on these clues can lead to ``overprofiling'' bias when moderators focus on these signals but overlook the misbehavior of others. We propose that algorithmic flagging systems deployed to improve the efficiency of moderation work can also make moderation actions more fair to these users by reducing reliance on social signals and making norm violations by everyone else more visible. We analyze moderator behavior in Wikipedia as mediated by RCFilters, a system which displays social signals and algorithmic flags, and estimate the causal effect of being flagged on moderator actions. We show that algorithmically flagged edits are reverted more often, especially those by established editors with positive social signals, and that flagging decreases the likelihood that moderation actions will be undone. Our results suggest that algorithmic flagging systems can lead to increased fairness in some contexts but that the relationship is complex and contingent. +} + +%\renewcommand{\shortauthors}{TeBlunthuis et al.} + +% uncomment to make a printable/editable version + + + + +%We believe that understanding the design of algorithmic governance systems requires accounting for how and to what extent the system serves both the surveillence and nudging functions. We analyze an algorithmic triage system in the wild + +% Too much emphasis on criminal justice and claiming that as a major part of the contribution is creating extra work. Don't do that unless you really need it. + +% \fontsize{12pt}{24pt} +% \selectfont + +\section{Introduction} + +% Paragraph motivating the question: What are we concerned about why does it matter?% What's the CSCW problem we're working on? It's a popular topic right now. + +% I need to develop / focus my concepts of enforcement or monitoring work. Use ostrom? + +% Bring in blackwell and bowker and star and mary douglas more here. Blackwell's important for signaling that this is CSCW work, not FAT*. +% Salient signals go in the introduction, but the point must be that they signal membership in suspicious categories. +% broaden the focus beyond online communities to broader questions of algorithmic governance --- including the criminal justice system. +% Bring back surveillence/visibility and profiling in the intro. +% don't make it about online communities! + + +% Algorithms and the problem of scale +Online community moderators are responsible for reviewing the torrents of user-generated content for spam, vandalism, attacks, and other violations of community norms and rules. In many large online communities, a small number of moderators---often volunteers---will be responsible for reviewing thousands or millions of actions and taking steps to stop and mitigate problematic behaviors \citep{gillespie_custodians_2018}. To help focus their attention within this deluge, moderators typically rely on social signals \citep{donath_social_2014} that indicate that a user's contributions are made in good faith and of high quality \citep{kraut_building_2012}. Common signals include visible reputation scores, user profiles, experience, and registration status \citep{broughton_wikipedia_2008, kraut_building_2012}. +For example, since new users are often more likely to engage in bad behaviors, moderators might scrutinize contributions from newcomers more closely \citep{kraut_building_2012, potthast_automatic_2008}. +However, directing limited moderation attention based on social signals can introduce unfairness through ``overprofiling'' that occurs when moderators focus their attention on users with signals associated with bad behaviors while ignoring others engaged in similar or worse behaviors \citep{de_laat_profiling_2016}. +For this reason, and because relying on social signals can still place enormous demands on limited moderator resources, online communities are increasingly adopting algorithmic flagging systems to direct moderators toward problematic actions \citep{chandrasekharan_crossmod:_2019, halfaker_ores:_2020}. + +%Discriminating by attributes like anonymity or newness does not raise the same constitutional concerns as discrimination against protected classes such as race or religion. + +% For example, an algorithmic flagging system might lead to increased scrutiny for newcomers while letting more established users of the hook for the same behavior. + +Although the consequences are very different, these systems share salient commonalities with algorithmic flagging systems used in employment, college admissions, and criminal justice. All of these systems use predictions of whether an outcome will occur to flag certain individuals as more or less likely sources of problems. All leave final decisions to a human judge. +The use of these systems when people's lives are at stake has rightfully attracted criticism based of how algorithms engage in misrepresentation and discrimination \citep{campolo_ai_2017, oneil_weapons_2018,barocas_fairness_2019}. +% A number of scholars have argued that understanding whether these types of algorithmic flagging system are more or less ``fair'' depends on how humans use them to make decisions in the context of broader sociotechnical systems \cite{kleinberg_human_2018, selbst_fairness_2019, stevenson_algorithmic_2019}. +On the other hand, advocates of algorithmic prediction in criminal justice argue that algorithms---even those that are measurably biased in their predictions---might still be less discriminatory than decisions made by biased human judges alone \citep{kleinberg_human_2018, stevenson_assessing_2017}. + +% Existing research in social computing suggests the answer is not obvious. +%Indeed, we argue that it partly depends on whether one adopts a psychological engineering or institutionalist perspective on online community governance \cite{frey_this_2019}. In the psychological engineering approach +%Both approaches might predict that algorithmic flagging can increase fairness in two senses: first by drawing moderator attention toward anti-normative behaviors by underprofiled users. +%But the perspectives disagree about whether algorithmic flagging will lead to more fair moderator actions for over-profiled users. Psychological perspectives, specifically dual-process or behavioral economic models might predict that algorithms interfere with formal rationality as moderators might be easily nudged by algorithmic flags into making hasty judgements \cite{tversky_judgment_1974,caraban_23_2019}. From an institutional perspective, that as a ``carrier of formal rationality'' an algorithmic triage system can increase compliance with higher-order norms \cite{lindebaum_insights_2019,weber_economy_1978}. + + +Can algorithmic flagging systems in online community moderation similarly reduce reliance on social signals and lead to more fair outcomes? We aim to answer this question through a field evaluation of an algorithmic flagging system called RCFilters, which was deployed on 23 different Wikipedia language editions from January 2019 to March 2020. RCFilters flags contributions identified by the Objective Revision Evaluation Service (ORES) machine learning system as likely to be damaging \citep{halfaker_ores:_2020}. These flags are shown along with existing social signals of quality. We take advantage of a set of arbitrary thresholds built into RCFilters to conduct a quasi-experimental analysis that estimates the causal effect of algorithmic flagging on moderation decisions and that seeks to measure whether algorithmic flags lead to better or worse outcomes for users who are likely to be overscrutinized \textit{ex ante}. +%"cues are created, propogated, and interpreted to become signals" +% halfak suggests being 'more relaxed in the introduction' +Our results suggest that algorithmic flagging can lead to more fair outcomes but that this effect may depend on the specifics of the social signals in question. +% In support of the idea that flagging can lead to more fair outcomes, we find that the effect of algorithmic flagging on the rate of sanctioning of actions by unregistered contributors was lower than the effect for logged-in users who are typically seen as more trustworthy. However, results from similar tests for users lacking profile pages who appear to be new to Wikipedia are mixed at best. + +%We find that algorithmic flags reduce over-profiling of anonymous (IP) Wikipedia\footnote{Editing Wikipedia does not require account registration. Changes can be made "anonymously" and an IP address of the user submitting the change is recorded in leu of a username. Thus these editors are often referred to as "IP editors"} editors as flagging causes a greater increase in the likelihood of reversion for registered editors compared to IP editors. Flagging increases fairness of sanctioning for IP editors as reverts of their flagged edits are less likely to be sanctioned for violating second-order norms. However, our analysis of editors without profile pages, a visible sign of newcomer status, does not support these conclusions. Our methods exploit discontinuous thresholds in algorithmic triage systems, but we suggest that designers should consider alternative approaches to surfacing model predictions in review interfaces. At least in some cases, algorithmic triage can improve fair treatment of contributors with visible traits that may face discrimination, but impacts on moderator actions likely depend on the institutional context. + +% contributions to CSCW +Our paper makes several contributions. +First, our work answers calls to analyze the impacts of algorithms \textit{in situ} \citep{selbst_fairness_2019, stevenson_assessing_2017, zhu_value-sensitive_2018} by offering an empirical evaluation of an algorithmic flagging system in an important social computing context. +Second, our analysis contributes to an ongoing debate over when and how algorithms might lead to more or less fair outcomes for individuals subject to profiling by human decision makers. +% In this regard, our findings provide some reason for optimism as well caution about drawing general claims about the effect of algorithms on fairness in general. +% Furthermore, our work suggests over-profiling bias as a mechanism by which moderation practices erect barriers to participation by contributors displaying social signals. +Third, our work offers a methodological contribution by presenting a novel quasi-experimental approach that can act as a template for future non-interventional studies of causal effects of algorithmic decision support systems. +Finally, our work contributes to social computing system design by suggesting improvements to algorithmic flagging and filtering systems. + +% and contextual factors that suggest boundary conditions for when algorithmic predictions can improve fairness. +% focuses on fairness, the interaction between algorithmic flagging and user identity signals that may be used by moderators as signs of misbehavior, and on the contrast between institutional and psychological engineering perspectives on online community design and governance \cite{frey_this_2019}. + +\section{Background} + +% Regulating behavior in online communities? + +\subsection{Moderation in Online Communities} +% Why regulate behavior? +% Comment from overleaf: De Laat might help us make a normative argument, but we can also make it ourselves. +Contemporary online communities are flooded with harassment, spam, misinformation, disinformation, and hate. Users of social media systems frequently and flagrantly violate community and platform rules, various laws, and norms of decency and decorum. Even users acting in good faith can do damage by taking conversations off-topic, undermining the stated purpose of communities, and lowering the quality of discourse or the knowledge goods being produced. Protecting online communities from unwanted activity are content moderators---many of them volunteers---that \citet{gillespie_custodians_2018} has described as ``custodians of the Internet.'' +% Although governance in social computing systems involves much more than just content moderation \citep{frey_this_2019}, +Moderation work typically involves three tasks: namely, reviewing content or activity, mitigating damage caused by a problematic behavior, and sanctioning users in different ways \citep{gillespie_custodians_2018, seering_moderator_2019, jiang_moderation_2019, kiene_technological_2019}. +% Some of the most common forms of sanctions by moderators---and the focus of this study---involve deleting or undoing the actions of users in ways that also serve to mitigate damage + +\citet{grimmelmann_virtues_2015} defined moderation as ``governance mechanisms that structure participation in a community to facilitate cooperation and prevent abuse.'' % and suggests that it is core to regulating behavior in online communities and social media platforms. +Discussions of content moderation often focus on individuals occupying formal roles as moderators with special rights and responsibilities. For example, several of the moderators in \citepos{gillespie_custodians_2018} account are professional moderators working for major platforms such as Facebook and Twitter. Several moderators, and nearly all of them on platforms such as Reddit and Discord \citep{matias_going_2016, jiang_moderation_2019, kiene_technological_2019}, work as volunteers but occupy similar positions of formal authority and responsibility. +That said, the work of moderation is also distributed across regular community members \citep{lampe_slashdot_2004, kiene_surviving_2016}. In Wikipedia, for example, the bulk of moderation activity as defined by \citeauthor{grimmelmann_virtues_2015} occurs as normal users review, vet and undo the work of others to mitigate damage and sanction users they believe have behaved badly \citep{piskorski_testing_2017}. + +% Two alternative conceptions of governance in online communities are the ``psychological engineering'' and an organizational, sociological, institutional the commons-based ``institutional'' approaches \cite{frey_this_2019}. Represented by the book ``Building successful online communities'', the engineering approach draws on social psychological theories of motivation and behavior to inform top-down designs that optimize engagement \cite{kraut_regulating_2012}. While rules and norms have a place in the engineering approach, they are instituted through a top-down process where a designer creates and enforces rules to regulate behavior. By contrast, in the institutional approach norms, rules, and structures are constructed through bottom-up processes in communities. + +% TODO move down +% Institutional models, by contrast can both enlist community members to help scale governance work and empower them to participate in constructing legitimate rules and norms at multiple levels of governance \cite{frey_this_2019}. On Reddit, voting systems effect on form of moderation by aggregating judgements from many users to raise or lower the visibility of content. But this distributed system co-exists with a wide array of rule-based systems exhibiting ``implicit fudalism'' as power is concentrated in community moderators and platform administrators \cite{schneider_admins_2019, fiesler_reddit_2018}. But norms can also be instituted in more participatory ways. For example, on Wikipedia ``first-order norms'' govern right ways of editing articles, but ``second-order norms'' govern the enforcement actions taken against violations of first order norms \cite{coleman_social_1988,piskorski_testing_2017}. Second order norms can be maintained when they are enforced through forms of ``meta-moderation'' such as when third parties undo moderator's decisions \cite{lampe_slashdot_2004}. + +%Constitutional levels of governance can institute formal structures by which communities can create and change their own rules \cite{frey_this_2019,ostrom_governing_1990}. + +%Frey et al. propose supplanting instituting ``constitutional levels'' of governance in online communities to govern the construction and modification of rules, norms, and structures at lower levels \cite{frey_designing_2019}. But +%On Wikipedia for instance, combines elements of distributed has many different formal roles such as ``administrators'' who can ban users and ``patrollers'' who can edit more frequently and use some special tools (i.e. Huggle) for reviewing a large number of edits, any and user can contribute to moderation work by reviewing and undoing changes. +% Communities on this end of the spectrum include those using +% can distribute moderation work among community members with different roles. +%There are many possible devices by which communities pursue these goals including reputation systems, collaborative block lists \cite{blackwell_classification_2017}, documenting rules, or creating barriers to entry. +% Governance mechanisms can be classified by whether they are proactive (i.e. systemsp that throttle activity, only publish approved content, or depend on privileges) or reactive (i.e. content is published and then moderated) \cite{kraut_regulating_2012}. We focus on reactive systems in this paper. Two common and interdependent reactive patterns in online community governance are sanctioning misbehavior and removing problematic content. +%Comments from Charlie: +% Groomsman virtues of moderation for definition of moderation +% what kind of moderation are we talking about here? There's volunteer moderation and there is paid moderation. Our theory might apply to both, but we study a context of volunteer moderation where any user can contribute. + +% % Define rules / normsg +% \subsubsection{Rules and norms} + +%\TODO{explain governance probalems on Wikipedia in more depth.} We maybe don't need that. This isn't opensym. +\subsubsection{Sanctions} +% Define norm enforcement + +Sanctioning involves enforcing norms in ways that attempt to discourage future misbehavior. It is a core part of moderation work because it encourages compliance with norms by communicating that rules will be enforced \citep{jhaver_did_2019, srinivasan_content_2019}. Although it also serves to mitigate damage, removing content is a common form of sanctioning because it communicates that an action was inappropriate \citep{piskorski_testing_2017}. \citet{halfaker_dont_2011} showed that removing content is an effective sanction and results in higher quality contributions by the reverted contributor in Wikipedia. Similarly, \citet{srinivasan_content_2019} found that people whose comments were removed from Reddit were less likely to violate norms in the future. + +% punish and deter bad behavior are key to instituting online community governance. +% Removing content +Although the goal of most sanctioning is to steer participants toward productive behaviors, the effect is often to deter participation. This can be particularly problematic with well-meaning newcomers who often violate norms because they have not yet learned the ropes \citep{adler_content-driven_2007, halfaker_dont_2011, halfaker_rise_2013}. +Sanctioned newcomers are less likely to continue participating, especially in the absence of clear explanations from moderators \citep{jhaver_did_2019, kraut_regulating_2012, potthast_automatic_2008, halfaker_rise_2013, teblunthuis_revisiting_2018}. +On Wikipedia and similar communities, high rates of sanctioning can help explain declines in participation and may be an obstacle to building a community that includes diverse participants \citep{halfaker_rise_2013, teblunthuis_revisiting_2018, lam_wp:clubhouse?:_2011}. +% Although socializing newcomers to community rules and norms can improve retention and adherence to community norms, it often easier for users to devote their time and energy somewhere else \cite{narayan_wikipedia_2017, morgan_evaluating_2018, halfaker_snuggle:_2014, kiene_surviving_2016}. + + + +%\subsubsection{Quis custodiet ipsos custodes?} +\subsubsection{Meta-norms} + +% Here is where I want to introduce second-order norms. +No moderation system is perfect. Moderators inevitably make mistakes and apply sanctions in ways that are arbitrary and unfair. This is particularly challenging to avoid in distributed moderation models used on sites such as Slashdot or Wikipedia where moderation is conducted by large and diverse groups of untrained and loosely coordinated users. +Sanctions can be particularly demotivating to newcomers who feel that sanctions are unfair and incorrect \citep{srinivasan_content_2019, jhaver_did_2019, gillespie_custodians_2018}. +% For example, Wikipedia editors who were blocked were more likely to return to productive, norm-compliant participation when they believed the block was fair \cite{chang_trajectories_2019}. +Consequently, steps that make moderation more fair might decrease the negative effects of sanctions on community growth. + +One way to improve fairness in moderation is through governance structures that enforce accountability \citep{frey_this_2019}. +Toward this end, Slashdot famously created tools for ``meta-moderation'' that allowed all users to evaluate the decisions of moderators \citep{lampe_slashdot_2004}. Users whose moderation decisions were controversial or at odds with the opinions of other Slashdot members would not be given moderation privileges again. +Although formal systems for meta-moderation remain rare, behaviors that take action against controversial sanctions are common and serve a similar social function \citep{crawford_what_2016}. +``Meta-norms,'' which prescribe when and how one should issue sanctions against violations of first-order norms \citep{horne_enforcement_2001} are particularly relevant. \citet{reagle_be_2010} documented the formalization of meta-norms on Wikipedia and \citet{piskorski_testing_2017} showed how Wikipedia users maintain meta-norms by undoing sanctions in ways that effectively sanction the originally sanctioning user. + +% Sanctions can be controversial if they violate second-order norms about what kinds of behavior should be sanctioned, when such norms are contested, or when enforcement is inconsistent or unaccountable . + +% Creating barriers that slow participation is a second approach to maintaining order by intentionally limiting growth \cite{kiene_surviving_2016, lin_better_2017}. But in peer production communities like Wikipedia, barriers to growth may also constitute barriers to expanding the quality of diverse knowledge and knowledge-producers \cite{lam_wp:clubhouse?:_2011}. + +% community development as online communities face a dilemma between regulating behavior and attracting participants \citep{teblunthuis_revisiting_2018, halfaker_rise_2013, halfaker_dont_2011}. + +% \cite{halfaker_rise_2013} found that newcomers to Wikipedia were less likely to continue contributing to the encyclopedia after being sanctioned and \cite{teblunthuis_revisiting_2018} replicated this finding in a population of other Wikis. + +% This paper contributes to understanding how algorithmic and social signals are related to the fairness of sanctioning by analyze how effects of algorithmic flagging on fair sanctioning differ between groups of users with varying social signals. + +\subsubsection{Flagging and Algorithmic Triage} + +Moderators in large online communities can face incredible challenges in scaling their work to handle an enormous mass of content and user activity \citep{gillespie_custodians_2018, kiene_technological_2019, seering_moderator_2019, seering_shaping_2017}. In interviews conducted by \citet{kiene_technological_2019}, small teams of volunteers tasked with maintaining order in large communities described their work as akin to ``running a small city.'' +Some platforms deal with scale by employing more paid moderators. However, the work involved can be exploitative, challenging, traumatizing, and expensive \citep{roberts_commercial_2016}. Volunteer moderator teams frequently find it difficult to identify, train, and integrate new members as they grow \citep{kiene_surviving_2016}. On average, teams become less likely to add new members as their communities grow \citep{shaw_laboratories_2014}. + +For these reasons and others, it is often impossible for communities to scale moderation resources such that human moderators can review all activity. +% Problem of scale in norm enforcement +% For moderators to sanction behavior, they must first observe it. +As a result, many moderation systems implement flagging so that a wider group of users can report content for review by moderators \citep{grimmelmann_virtues_2015}. +If users reliably flag problematic behaviors, flagging can mitigate issues of scale because moderators focus their attention on behavior that is flagged. Obviously, flagging is far from a perfect solution. +From the perspective of a flagged user, flagging can seem arbitrary and opaque \citep{crawford_what_2016}. +From a moderator perspective, flagging is flawed because disgruntled users can coordinate to overwhelm moderators and target opposing viewpoints \citep{crawford_what_2016}. +Finally, given that traditional flagging systems continue to rely on volunteer labor, they often fail to fully address issues of scale, leaving many bad actions unflagged, unreviewed, and unsanctioned. + +To address this final limitation, communities have turned to algorithmic flagging systems that use computer programs to automatically mark content for review by human moderators \citep{kiene_technological_2019,kiene_who_2020,seering_shaping_2017}. Although some of these systems rely on keywords, regular expressions, or heuristics, more advanced and flexible versions of these systems use predictions from machine learning models. These systems are seen as promising answers to the problem of moderation at scale because they can be easily be used to review an enormous volume of behaviors, they may be less vulnerable to strategic flagging, and they may be more reliable than human reviewers. + +Algorithmic flagging systems can be thought of as human-in-the-loop versions of similar computational systems that engage in fullly automated moderation. For example, numerous digital platforms utilize the PhotoDNA system to automatically identify and remove child pornography \citep{gillespie_custodians_2018}. Similarly, Wikipedia's ClueBot NG uses a machine learning predictor to automatically remove vandalism \citep{geiger_when_2013}. Although they play a critical role in reducing moderation workloads, fully automated systems are uncertain enough in most of their assessments that they are typically only considered useful in defending against the most clear-cut examples of misbehavior \citep{gillespie_custodians_2018}. +% Furthermore, in user-organized communities, moderation decisions are an important part of building shared meaning, a task not easily left to a fully automated system \cite{seering_moderator_2019}. + +Some machine learning systems that are designed to classify bad behavior are used as a form of algorithmic triage. While the most egregious examples of bad behavior are dealt with by automatic systems, other possible norm violations are flagged for review by human moderators. +For example, Reddit allows moderators to define a system of rules based on regular expressions to automatically flag content for further review \citep{jhaver_human-machine_2019}. Algorithmic flagging systems based on machine learning occupy the vanguard of online activity regulation and numerous examples have been described in recent scholarship. +% Applied machine learning research endeavors to predict deviant behavior in online communities such as +% \citet{liu_forecasting_2018} describes a systems to predict when conversations on Instagram will turn hostile. +\citet{chandrasekharan_crossmod:_2019} described a system for Reddit communities to share information and collaborate on automatic flagging that accounts for differences between rules of different communities. +\citet{wulczyn_ex_2017} presented a system for classifying harassing behavior on Wikipedia. Finally, \citet{halfaker_ores:_2020} developed the ORES system to predict the quality of contributions and content on Wikipedia. + +% I don't understand what the point of the next sentence is: mako +% It's me being self indulgent with org comm stuff that means "more context please!" +% When online communities adopt algorithmic triage systems in efforts to scale up regulation, these systems become imbricated with practices, norms, and structures already instituted in the preexisting governance systems \cite{leonardi_when_2011}. + +% Expand to include other designs of algorithms for detecting norm violations or misbehavior +% However, this study focuses on settings where an algorithm might flag content to make it visible to a human who can make an enforcement decision. + +% Define flagging and filtering +% This section needs more special attention. +% Gotta integrate this section. +% \section{Algorithmic Flagging and Fairness} +% \subsection{Statistical discrimination and social signals} + +% A longer version will + +% \subsection{Discriminatory sanctioning of over-profiled users} +\subsection{Will Algorithmic Flagging Decrease Discrimination Of Overprofiled Users?} + +One of the most important debates in contemporary technology policy is the degree to which the introduction of algorithms into socially consequential decision making leads to more or less fair outcomes \citep{chouldechova_fair_2017, kleinberg_human_2018, oneil_weapons_2018, selbst_fairness_2019}. Much of this debate focuses on arguments about whether algorithms will amplify or entrench discrimination and on biases introduced by training data \citep{barocas_fairness_2019, campolo_ai_2017, sap_risk_2019}. +Discrimination is the deferential treatment of individuals based on membership in a group. Economists of discrimination distinguish between taste-based and statistical discrimination \citep{becker_economics_1957, bertrand_field_2016, phelps_statistical_1972}. Taste-based discrimination is driven by preferences for members of one group and includes both ideologically-driven racism and implicit bias. Statistical discrimination occurs when social signals---visible and socially salient characteristics, such as group memberships---are instrumental in driving decisions. Statistical discrimination can also lead to unequal outcomes for certain groups. +% in ways that are unfair but can be justified if differential treatment may be worth the price of expediency \citep{bertrand_field_2016}. +%Taste-based and statistical discrimination cn be difficult to tell apart in real-world empirical settings, but field experiments can help. \citep{bertrand_field_2016}. \cite{bertrand_are_2004} conducted an audit study in a labor market. They applied for jobs using resumes of simulated job applicants with either high or low levels of experience level and either white or black sounding names. They observed racial discrimination as white applicants were much more likely to receive an interview invitation compared to black applicants. Now, under a hypothesis of statistical discrimination, additional information about experience levels should reduce reliance on race as a signal of performance, and the gap between white and black applicants should decrease within the group of high quality resumes. However, \cite{bertrand_are_2004} found the opposite --- the gap between white and black sounding applicants was greater in the group of high-quality resumes. Taste-based discrimination is a plausible mechanism for this finding as more information about applicants amplified rather than decreased the gap as predicted by statistical discrimination, but it is difficult to rule out alternative explanations. + +\subsubsection{Social Signals} +\label{sec:social.signals} +% Introduce social signaling theory and explain why we need social signals to function in society. Without prototypes we can't make quick judgements about each other. + +% A big part of the problem is trust. For an online encyclopedia social signals like profiles and identity make someone seem trustworthy. Someone who has not provided these signals is more difficult to identify, stereotype and therefore apprarently less trustworty. +% Introduce social signalling theory. + +Although most discussions of discrimination focus on high-stakes contexts such as banking, labor markets, and criminal justice, moderation in online communities is also ripe for statistical discrimination based on visible social signals. +When interacting in face-to-face groups, people can observe---and discriminate on the basis of---visible signals of status, group membership, psychological states, or cultural identity \citep{donath_social_2014, pentland_honest_2008, ridgeway_status:_2019}. Because the invisibility of these signals in online communities creates a barrier to regulation, sociability, and cooperation, +communities use devices such as profile images and biographies, avatars, or visualizations of activity as tools for self-presentation and signals of membership \citep{lampe_familiar_2007, donath_social_2014}. +Disclosing information on profiles can provide signals helpful for people using prototypes \citep{grabner-krauter_trust_2015}, building social capital \citep{ellison_connection_2011}, and developing trust \citep{ma_self-disclosure_2017}. Formal reputation systems such as karma on Reddit and Slashdot or badges on StackExchange can be important signals of commitment, quality, and trustworthiness \citep{grimmelmann_virtues_2015, lampe_role_2012, merchant_signals_2019} + +Even without user profiles or formal reputation systems, participants in online communities use subtle signals to draw conclusions about each other \citep{donath_signals_2007, ellison_managing_2006, jacobson_impression_1999}. Sparse cues such as usernames or communication styles can be signals of personality, gender, and identity \cite{donath_social_2014, hancock_impression_2001, herring_gender_2000}. Tests of community-specific technical or cultural knowledge can identify newcomers and, similar to formal reputation systems, they may be more challenging to fake than biographical information \citep{bernstein_4chan_2011, donath_social_2014, grimmelmann_virtues_2015}. +In peer production projects, prior contributions can be inspected for information about expertise, work styles, and the future value of a newcomer \citep{marlow_impression_2013}. + +%The usefulness of signals from profiles and behaviors depends on the stability of user identities. +In several online communities such as Wikipedia, users can elect to participate anonymously, under more-or-less stable pseudonyms, or using their real names. Masking signals of gender, race, age, (dis)ability, or status can appear to equalize and free individuals from oppressive prejudices and stereotypes \citep{dubrovsky_equalization_1991, friedman_social_2001}. +On the other hand, the presence or absence of a stable user identity is itself an essential signal because persistent identities make it possible to build up reputation, social capital, and trust and the inability to do so is associated with misbehavior \citep{grabner-krauter_trust_2015, hill_hidden_2020}. +% Therefore, adopting anonymous or unstable identities may reflect a lack of interest in building ties or status within a group: a suspicious sign of potential misbehavior. + +\subsubsection{Will algorithmic flagging reduce overprofiling?} + +Online community moderators can use social signals to discover and respond to misbehavior, but this can lead to statistical discrimination. +Wikipedia's \textit{Missing Manual} advises would-be vandal fighters on Wikipedia to ``consider the source'' when ``estimating the likelihood that an edit is vandalism'' \citep{broughton_wikipedia_2008}. +Because newcomers and anonymous users are more likely to violate rules, moderators may rely on social signals of newness to find bad behaviors or to decide if an ambiguous contribution was made in bad faith. +Increased scrutiny and skepticism can translate into an increased likelihood of sanction, simply for being new or anonymous. +% Users with little history of posting to forum, a reputational signal like karma on Reddit, or points or achievements on StackOverflow. In communities with cheap pseudonyms it might be easy for rule breakers to evade sanctions by creating new accounts \cite{friedman_social_2001}. +Statistical discrimination emerges because moderators are more likely to scrutinize and sanction new or anonymous contributors who have legitimate reasons for contributing. + +% Newcomers are not the only groups that might be subject to statistical discrimination this way. +Ethical philosophers have objected to the way social signals are used in online moderation activity. Dutch philosopher Paul de Laat adopted the concept of ``profiling'' from legal scholar Frederick Schauer to argue against the use---and even the public display of---social signals such as registration status and experience levels in the user interfaces used for moderation because they are prone to ``overuse'' \citep{de_laat_use_2015, de_laat_profiling_2016}. It should be noted that discriminating by attributes such as newness does not raise the same legal or constitutional concerns as discrimination against protected classes such as race or religion. Online communities establish their own norms and may choose to protect or target certain attributes on the basis of a specific community's values. +For example, while discussing Wikipedia, de Laat argues that ``overuse'' is unethical, immoral, and inconsistent with the community's founding principles of transparency and equality. Drawing on de Laat, we refer to individuals with social signals that elicit undue scrutiny as ``overprofiled.'' +% We modify de Laat’s vocabulary to call such editors ``over-profiled.'' On the other hand other kinds of editors will be ``underprofiled'' as their contributions may be less likely to come under scrutiny. + +% As in statistical discrimination, when moderators over-use such signals they ``overused'' characteristics, who face statistical discrimination, are ``over-profiled'' and that other individuals are ``underprofiled.'' +% may have legitimate reasons for editing anonymously or editing through a new account. To simplify language, we say that individuals with + +% Therefore, new accounts in particular are suspect and likely face more scrutiny. +Although an important debate continues over the use of algorithmic predictions in domains like criminal sentencing, proponents of algorithms argue that they could reduce discrimination and inequality \citep{kleinberg_human_2018, stevenson_assessing_2017}. Algorithms can reproduce statistical discrimination, but they might be less biased than the alternative: human decisions that would presumably rely heavily, if perhaps subconsciously, on salient social signals such as race. Critics suggest that algorithms simply obscure this discrimination behind complex mathematical models that are difficult to understand, interrogate, or challenge. + +Although this debate is challenging to resolve in the case of criminal justice, algorithmic flagging in online community moderation provides a setting with lower stakes and more detailed data. +%Although the social signals and contexts are substantially different, similar social and psychological processes may be in play. +If we apply arguments proposing that algorithms can reduce discrimination to community moderation, we would conclude that algorithmic triage systems would reduce the impact of discrimination among overprofiled individuals by making misbehavior by all kinds of users visible to moderators. If algorithmic flagging reduces overprofiling bias, then it will have a smaller effect on overprofiled users than on others. If algorithms simply reproduce discrimination, we would find no such difference. +% NOTE: I think this paragraph is redundant... -mako +% When the system flags an action, it will increase the likelihood that a moderator responds with a sanction. How much flagging increases the likelihood of a sanction depends on the counterfactual: \emph{what would have happened if the action had not been flagged?} Therefore, if algorithmic flagging reduces over-profiling then it will have a greater effect on underprofiled contributors than on over-profiled ones. On the other hand, introducing algorithmic predictions should be of little consequence to taste-based discrimination, which would occur if Wikipedia moderators revert anonymous editors because they dislike them. So we ask: +This leads us to our first research question: +\textit{\textbf{[RQ1]} How will flagging an action change the likelihood an action is sanctioned for overprofiled editors compared with others?} + +Algorithmic fairness researchers use specific criteria to quantify biases encoded in algorithmic predictors and the fairness of resulting decisions \citep{chouldechova_fair_2017, barocas_fairness_2019, mitchell_prediction-based_2020}. +These criteria are often developed for settings where model predictions are equivalent to decisions. For example, \citet{kusner_counterfactual_2017} define demographic parity in terms of model predictions, whereas \citet{mitchell_prediction-based_2020} define it in terms of human decisions. In algorithmic flagging, decisions are informed by algorithms but left to humans. +Therefore, we distinguish between the fairness of predictions and the fairness of decisions and refer to our criteria as ``decision system fairness metrics'' following \citepos{mitchell_prediction-based_2020} use of the term ``decision system.'' + +%We now interpret our research questions in terms of actual fairness criteria which are analogous to algorithmic fairness criteria. + +We first consider demographic parity, as shown in Equation \ref{eq:demographic.parity}, which means that the probability of a decision ($D$) is statistically independent of a protected attribute ($A$) \citep{kusner_counterfactual_2017, barocas_fairness_2019}: + +\begin{equation} + P(\widehat{D} \vert A = 0) = P(\widehat{D} \vert A = 1) +\label{eq:demographic.parity} +\end{equation} +\noindent An algorithmic flagging system will have demographic parity concerning registration status if the probability that an action is flagged is the same for actions by overprofiled and underprofiled editors. +Our analysis of RQ1 thus evaluates how flagging shapes demographic parity for sanctioning decisions. + +% \noindent In RQ1 we ask if flagging will have stronger effects for editors that are not over-profiled editors than for editors that are. That is, we are interested in absolute change in consequential demographic disparity, shown in Equation \ref{eq:change.exp.parity}. + +% \begin{gather} +% \left\vert\Delta_A P(S \vert \widehat{Y} = i) \right\vert = \left\vert P(S \vert A = 1, \widehat{Y}=i) - P(S | A = 0, \widehat{Y}=i)\right\vert \\ +% \left\vert\Delta_A P(S \vert \widehat{Y} = 1) - \Delta_A P(S \vert \widehat{Y} = 0)\right\vert \label{eq:change.exp.parity} +% \end{gather} + +% \noindent Where $\left\vert\Delta_A P(S \vert \widehat{Y} = 1)\right\vert$ is the consequential demographic disparity for unflagged actions. +% % +% But if statistical discrimination or over-profiling are active then an + +% Thus it is important to consider how judges or moderators will use an algorithmic predictor along side social signals in practice. + +% Probably the degree to which algorithms substititute for identity is a function of the quality of the algorithm, how much users trust it, and how much discrimination is taste-based vs statistical. + +% A group is discriminated against when a relevant For example, a judge discriminates against black defendants if they are less likely to be released on bail than apparently identical defendants of a different race. That said, there are multiple mechanisms that may lead to patterns of discrimination. ``Statistical discrimination'' would occur if the reason the judge discriminates is that the judge knows that, all else being equal, black defendants are less likely to appear in court. In this case the judge is discriminating because doing so advances the judge's goal of carrying out an efficient and orderly judicial process. However, the judge's discrimination might instead be attributable to ideological racism, or a ``taste'' disfavoring releasing black defendants \citep{bertrand_field_2016}. The distinction between taste-based and statistical discrimination is salient because statistical dissemination might be considered an acceptable form of differential treatment between groups, particularly if historical oppression is not a factor, as in discrimination against newcomers in regulating an online community. Indeed we think that statistical, but not taste-based discrimination against new and anonymous contributors is likely in online communities. + +% consider deleting this paragraph entirely + + %Importantly, the visibility of such characteristics to moderators is sufficient for ``over-profiling.'' Including them as predictors in an algorithm is not necessary. + +% Blend it in a bit better. + +% For example? +\subsection{Will Algorithmic Flagging Increase Fairness?} + +A system might lack demographic parity by sanctioning one group more than others but still be justifiable if all sanctions are fair. +What does it mean for a sanction to be fair? The subject of fairness in algorithmic systems is a major subject of debate in computing and AI. There are several different approaches to conceptualizing fairness, and no algorithmic predictor can satisfy them all \citep{barocas_fairness_2019, caraban_23_2019, kleinberg_inherent_2016, mitchell_prediction-based_2020, yin_understanding_2019,wallach_big_2019}. +While such approaches focus on discrimination built into machine learning programs, we seek a concept of fairness that reflects the standards of relevant communities of practice. We find one in the concept of ``meta-norms'' from social psychology and James Coleman's sociological conception of norm maintenance. Drawing from these sources, we define unfair sanctions as those that a community is unwilling to let stand---i.e., sanctions that are themselves the subject of sanction \citep{coleman_social_1988, horne_enforcement_2001, piskorski_testing_2017}. +% \citet{piskorski_testing_2017} apply and validate this concept for norms governing revert actions on Wikipedia. +For example, norms in Wikipedia govern right and wrong ways of editing wiki pages. Sanctions of first-order norm violations are governed by meta-norms about what sorts of contributions merit sanction. Following \citet{piskorski_testing_2017}, we describe a sanction as \emph{controversial}---i.e., in likely violation of a meta-norm---if it, in turn, is sanctioned by a third community member. + +A controversial sanction suggests that the initial edit was not truly damaging (i.e., $D=1$ but $Y=0$ where $Y=1$ means an edit was truly damaging). Thus, a controversial sanction is analogous to false positive classification by a machine predictor ($ \widehat{Y}=1$ but $Y=0$, where $\widehat{Y}=1$ means the machine predicts that an edit is damaging). The false positive rate quantifies the amount of unfair treatment a group experiences, but it does not compare unfair treatment between groups. Therefore, is not strictly speaking an algorithmic fairness criterion. However, changes in the false positive rate of the decision system (shown in Equation \ref{eq:change.sanction.fpr}) quantify how flagging is increasing or decreasing the rate of unfair sanctions. + +\begin{equation} + P(D=1 \vert Y=0, \widehat{Y}=1) - P(D=1 \vert Y=0, \widehat{Y}=0) +\label{eq:change.sanction.fpr} +\end{equation} + +\noindent Relying on this definition of fairness, our second research question asks how algorithmic flagging shapes the fairness of sanctioning in terms of the rate of sanctions for meta-norm violations: \textit{\textbf{[RQ2]} How will flagging an action change the chances it receives a controversial sanction?} + +Influential theoretical frameworks in social computing seem to predict competing answers to this second question. +% We consider two competing theories of how flags will shape the consistency of first-order norm enforcement against over-profiled users. +First, dual-process models of behavioral economics suggest that people will tend to rely on ``salient signals'' for rapid decision making in conditions of uncertainty and imperfect information \citep{bordalo_salience_2012, kleinberg_human_2018, tversky_judgment_1974}. When human moderators use social signals to choose behavior to review or sanction, these attributes serve as salient signals but remain far from perfect signals of quality. +% important term related to salient signal is "cue" +Algorithmic flags provide an additional salient signal but are also far from perfect \citep{halfaker_ores:_2020}. Indeed, algorithmic flagging systems are typically designed to minimize the risk of missing bad behaviors by surfacing large numbers of false positives (i.e., non-problematic behaviors) and relying on human moderators to make final decisions. +Of course, if human moderators use algorithmic flags as salient signals, they may reproduce algorithms' false predictions. In this case, controversial sanctions will increase. + + +% In a mental model of moderation work characterized by rapid decision making, flagging might function as a cue triggering moderators to issue a sanction when given more careful inspection they might have not. + +A second perspective suggests that algorithmic flags can increase fairness. +% As noted above, the Wikipedia community is governed not only by norms about correct ways of editing articles, but also by higher-order norms about right ways of constructing or enforcing first-order norms. +Several online communities have institutionalized rules, norms and meta-norms and act as highly bureaucratic organizations \citep{butler_dont_2008, piskorski_testing_2017}. +Max Weber described how bureaucratic organizations construct and use two concepts of what he called ``rationality:'' substantive rationality and formal rationality \citep{weber_economy_1978}. +% Neither concept is precisely the same as the common usage of ``rationality'' as an ideal form of utility maximizing decision making. +Substantive rationality refers to how bureaucratic organizations use policies, routines and hierarchy to define their collective values and goals. +Formal rationality refers to the use of calculated decision making, such as that involving productivity or financial metrics, in the pursuit of goals \citep{lindebaum_insights_2019}. +Following Weber, \citet{kreiss_limits_2011} argued that increasing substantive rationality through bureaucratic policies in online communities can lead to more fair outcomes. + +Although less explored by scholars of online communities, there are also reasons to believe that increasing formal rationality in moderation decisions might also enhance fairness, at least in online communities with mature normative systems. In such contexts, algorithmic flagging systems can enact formal rationality by estimating the probability and displaying an authoritative signal that an action runs afoul of shared behavioral standards. Adopting algorithmic flagging can thus mark a shift away from idiosyncratic individual decision-making and toward increasing the use of formalized rationality. Through this lens, an algorithmic flagging system---even one that encodes biases---can be a ``carrier of formal rationality'' \citep{lindebaum_insights_2019}, leading to governance that is more in line with community meta-norms and to a decrease in controversial sanctions. + +Next, we consider how changes in the false positive rate of the decision system depends on overprofiling. This corresponds to evaluating decision system fairness in terms of equality of opportunity (shown in Equation \ref{eq:balance}) \citep{hardt_equality_2016, mitchell_prediction-based_2020}: +\begin{equation} + P(D = 1 \vert Y = 0, A=0) = P(D = 1 \vert Y=0, A=1) +\label{eq:balance} +\end{equation} + +\noindent Equality of opportunity is satisfied when the false positive rate of a decision system does not depend on the protected attribute. +Equality of opportunity for registration status would mean that registered and unregistered editors that make good edits have equal chances of having their contributions accepted. + + +% \noindent As with demographic parity above, we propose an analogous consequential fairness criterion for community moderation systems. Consequential equality of opportunity, (shown in Equation \ref{eq:experienced.balance}) for registration status would mean that the rate of improper sanctions for unregistered editors equals that for registered editors. + +% \begin{equation} +% P(S = 1 \vert Y = 0, A=0) = P(S = 1 \vert Y=0 , A=1) +% \label{eq:experienced.balance} +% \end{equation} + + +% \begin{equation} +% P(S=1 \vert Y=0, \widehat{Y}=1) - P(S=1 \vert Y=0, \widehat{Y}=0) +% \label{eq:change.sanction.fpr} +% \end{equation} + +%\noindent Our results for RQ2 for unregistered editors show that flagging decreases the rate of controversial sanctions. While controversial sanctions do not precisely correspond to false-positive sanctions, we take this finding as evidence that flagging decreases the sanctioning false positive rate. + +% \subsection{Will algorithmic flagging decrease discrimination of over-profiled users?} +% \subsection{Second order norms and over-profiling} + +Our third research question asks whether algorithmic flagging systems will increase or decrease equality of opportunity: \textit{\textbf{[RQ3]} Within the set of sanctioned actions, how will the effect of flagging an action on controversial sanctions depend on whether contributors are overprofiled?} + +% whether flagging affects controversial sanctioning for over-profiled contributors compared to underprofiled contributors. +Once again, influential theoretical frameworks in social computing research seem to point in opposite directions. Under dual-process psychological models, both social signals and algorithmic flags might cue moderators to issue sanctions and might substitute for one another. In this case, we would hypothesize that flagging would have a more positive effect on controversial sanctions among underprofiled contributors, who had previously been relatively ignored, than it does among the overprofiled individuals, who were always scrutinized. +Conversely, if the larger effect of algorithmic flagging is helping moderators comply with meta-norms, it simply will not matter whether contributors are overprofiled. + +\section{Empirical Setting} +\label{sec:empirical} + +% \subsection{Sociotechnical evaluation of algorithmic systems} +%\TODO{Split this paragraph in two so that we have one paragraph about the need for sociotechnical evaluation and merge the rest with the methods.} +\begin{figure}[t] + \centering +\begin{tikzpicture} + + + \node[anchor=west](flags) at (-7,2.7) {ORES Flags}; +% \node[anchor=west](pagetitle) at (-4.7,2.7) {Page titles}; + \node[anchor=west](userpagelink) at (-1.7,2.7) {User profile link}; + \node[anchor=west](unregistered) at (1.4,2.7) {Unregistered editor}; + + + \begin{scope} + \node[anchor=south, inner sep=0] (image) at (0,0) {\includegraphics[width=\textwidth]{figures/rcfilters_example_2.png}}; + \draw [-stealth,ultra thick] (flags.220) -- ++(0,-0.4); +% \draw [-stealth,ultra thick] (pagetitle.south) -- ++(0,-0.4); + \draw [-stealth,ultra thick] (unregistered.200) -- (1.5,2); + \draw [-stealth,ultra thick] (userpagelink.280) -- (0.2,1.6); +\end{scope} +\end{tikzpicture} + + \caption[Screenshot of edit metadata shown in RCFilters.]{Screenshot of Wikipedia edit metadata on Special:RecentChanges with RCFilters enabled. Highlighted edits with a colored circle to the left side of other metadata are flagged by ORES. Different circles and highlight colors (white, yellow, orange and red in the figure) correspond to different levels of confidence that the edit is damaging. Users can configure which colors are shown. Visible social signals include registration status (i.e., whether a username or an IP address is shown) and whether an editor's user page and user talk page exist. RCFilters does not specifically flag edits by new accounts, but does support filtering changes by newcomers.} + \label{fig:rcfilters} +\end{figure} + +% introduce here? +% We study moderator behavior in the context of the ORES algorithm for edit quality prediction on Wikipedia and the RCFilters flagging and filtering user-interface that it powers \cite{halfaker_ores:_2020}. As shown in Figure \ref{fig:rcfilters}, this system displays algorithmic predictions alongside visible indicators of membership in salient social categories for reviewing actions on the encyclopedia. Similar to other designs for algorithmic triage systems with humans-in-the loop \cite[e.g.][]{chandrasekharan_crossmod:_2019}, flags are triggered when ORES' prediction confidence crosses arbitrary operating points or thresholds. Similarly, RCFilters enables users to view only that subset of edits above a threshold. These features allow a systematic statistical analysis of edits near to the threshold to provide causal inferences of the effect of algorithmic triage on moderation decisions. In addition, because algorithmic flags are presented to moderators alongside information about membership in categories associated with objectionable contributions, we can test predictions of our theories about how algorithmic flags will differently affect individuals with or without visible social signals. + +%when was it introduced? + +We aim to answer our three research questions through a field evaluation of an algorithmic flagging system called RCFilters, which was deployed on 23 different Wikipedia language editions between January 2019 and March 2020. RCFilters stands for ``Recent Changes filters.'' The term ``Recent Changes'' refers to a page on Wikipedia that allows viewers to see the most recent changes made to the site.\footnote{For example, the Recent Changes page for English Wikipedia is available here: \url{https://en.wikipedia.org/wiki/Special:RecentChanges} (Archived: \url{https://perma.cc/BNZ3-E9D5})} As Figure \ref{fig:rcfilters} shows, RCFilters adds a set of flags represented as colored dots on the left side of the list of recent contributions. Social signals are also visible, including registration status and whether a user has created a profile page. Although dense with information regarding recent edits and hyperlinks, the page is immediately understandable to Wikipedia moderators. When deployed, the RCFilters interface appears both on ``Recent Changes'' as well as on ``watchlists''---a special version of ``Recent Changes'' that shows only edits to the subset of pages that a user has elected to follow. RCFilters must be enabled by each user on their Wikipedia user preferences page. + +Algorithmic flagging in the RCFilters system is powered by the ORES edit quality models trained to predict whether edits are labeled ``damaging'' or ``not damaging.'' The models are gradient boosted decision trees trained on a mixture of human-labeled Wikipedia edits and edits made by established editors that are assumed to be ``not damaging.'' + +It should be noted that ORES models do not merely reproduce profiling patterns typical of moderation on Wikipedia. +The interface for labeling training data obscures social signals from the volunteer Wikipedians doing labeling work and its models are predictive of damage from users that are not anonymous or newcomers. +Nevertheless, as discussed in §\ref{sec:threats}, ORES encodes biases against unregistered editors and---to a lesser extent---against editors without user pages. +ORES was designed neither to merely support quality control in Wikipedia, nor to optimize precision, recall, or fairness but to enact Wikipedian principles of openness, transparency, and community accountability---to ``deploy efficient machine learning at scale for content moderation \ldots\ in ways that enable volunteers to develop and deploy advanced technologies on their own terms'' \citep{halfaker_ores:_2020}. More information on the philosophy, design and implementation of ORES can be found in \citet{halfaker_ores:_2020}. + + +\section{Methods} +% \subsection{Analytic Approach} + +Our analysis is based on a regression discontinuity design (RDD) that aims to estimate causal the effects of flagging by RCFilters on moderator behavior in Wikipedia \citep{imbens_regression_2008, jacob_practical_2012, lee_regression_2010}. Common in empirical economics, RDDs are quasi-experimental in that they resemble a randomized control trial for data points in the neighborhood of an arbitrary cutoff \citep{jacob_practical_2012, lee_regression_2010}. +% NOTE: i don't think this point below is critical-mako +% Considerable attention to the ethics and usability of algorithmic systems in the machine learning community aims to provide more transparent or ``fair'' predictors with a focuses on statistical and optimization problems, but from the perspectives of sociotechnical systems and value-sensitive algorithmic design it is important to expand the scope of design and evaluation to consider the user experience and how the introduction and use of new technologies interacts with social structures and shapes work processes \citep{selbst_fairness_2019, zhu_value-sensitive_2018}. +RDDs model how an outcome depends on this cutoff and a continuous ``forcing variable.'' The idea behind an RDD is that observations immediately below and above the cutoff will be equal in expectation after adjusting for any underlying (i.e., ``secular'') trend. For example, RDDs used in econometrics might estimate the effect of passing a test by comparing the outcomes of people who barely passed and failed. +One benefit of an RDD over a field experiment based on A/B tests is that it can provide ecological validity and support causal claims without subjecting users to intervention without consent \citep{lane_big_2015, jouhki_facebooks_2016}. +% While even observational studies of social media can raise concerns and violate user's privacy expectations \citep{boyd_critical_2012, fiesler_participant_2018}, Wikipedia editing is generally considered public and open to scrutiny. An RDD can provide evidence of causal effects without intervention and violating user's expectations of privacy. +Although they remain rare in computing, RDDs have been used in recent publications in social computing \citep{narayan_all_2019, hill_hidden_2020}. + +Our forcing variable is the score from the ORES machine learning system. Our cut-off variables are a set of arbitrarily chosen operating points used by RCFilters. Our outcomes are constructed by creating two variables that indicate whether a revision's author is overprofiled as well as variables that indicate whether each revision was reverted or subject to a controversial revert. We discuss each in turn before introducing our analytic approach. + +\subsection{Data and Measures} + +We build our dataset from two publicly available tables of Wikimedia history published by the Wikimedia Foundation (WMF).\footnote{\url{https://wikitech.wikimedia.org/wiki/Analytics/Data\_Lake/Edits/Mediawiki\_history} (Archived: \url{https://perma.cc/CPM6-PY6F}; \url{https://dumps.wikimedia.org/other/mediawiki\_history/readme.html} (Archived: \url{https://perma.cc/3DDJ-9FXS})} +% by running spark scripts on the Wikimedia analytics cluster. +Although Wikipedia is published and collaborated on in several languages, the vast majority of knowledge regarding collaboration on Wikipedia is derived from studies of English Wikipedia \citep{hecht_tower_2010, hara_cross-cultural_2010}. To support generalizability, we analyze data from 23 language editions of Wikipedia where edit quality flags are displayed in the RCFilters interface. +To ensure that we have variation in our outcomes, we exclude wikis with less than three edits above and below each threshold (see §\ref{sec:thresholds}) from each sub-analysis. +For all of our analyses, our unit of analysis is the \emph{revision}. Revisions correspond to a single edit to a page by a participant on Wikipedia. We exclude revisions by bots since we care about how algorithmic flagging and social signals are used by human moderators. +Following guidance for RDDs \citep{lee_regression_2010}, we include only revisions very near to RCFilters thresholds, with ORES scores within 0.03 of the thresholds. + + +%This means that different wikis may be included in different models. For each model we report the quantity of edits from each wiki and how many fall on either side of the thresholds. +To manage the total size of our dataset, we analyze a sample that we construct by stratifying along several dimensions: Wikipedia language edition; user registration status (§\ref{sec:signal}); whether the editor has a user page or not (§\ref{sec:signal}); whether an edit was reverted in 2 hours, 48 hours, or 30 days; and whether the revert was controversial (§\ref{sec:controversial}). +% Most Wikipedia edits comply with norms, and accordingly the ORES scores are left-skewed, therefore we also stratify our sample by the decile of the ORES scores. +Then, we sample 5000 edits from within unique combinations of the variables. If there are less than 5000 edits in the given strata, we include all of them. +% Stratified sampling introduces a known bias in our sample and +We adjust for this stratification using sample weights throughout our analysis. +Since RCFilters was introduced to different wikis at different times, % but we wish to estimate the average effect for edits to any of the wikis in our sample, +we sample edits during the period immediately following the introduction of ORES but weight our sample according to the number of edits to each wiki over the entire study period. +The numbers of observations sampled at each threshold, from each Wiki, and for each model are available in the supplementary material. + + +\subsubsection{ORES scores and RCfilter thresholds} +\label{sec:thresholds} + +% \begin{figure}[t] +% \centering +% \includegraphics[width=0.7\textwidth]{resources/Ores_Thresholds.png} +% \caption[Screenshot showing RCFilters thresholds for English Wikipedia.]{Screenshot of Special:OresModels from English Wikipedia showing levels of precision and recall corresponding to different flags in RCFilters.} +% \label{fig:ores_thresholds} +% \end{figure} + +The continuous forcing variable used in our RDD analysis is a score from the ORES algorithm described in §\ref{sec:empirical}. Scores range from 0 to 1 and reflect the predicted probability that a revision is damaging. Because the ORES system has been under continual development over time, we obtain ORES scores created at the times revisions were made from a log maintained by the WMF. +The treatments in our analysis are whether edits to Wikipedia are flagged by RCFilters. These flags are applied if, and only if, a score from ORES exceeds a threshold. +This use of thresholds at arbitrary operating points is a feature of most algorithmic flagging systems. +% To know whether an edit was flagged in RCFilters, we need to obtain the ORES score that was assigned to the edit and the thresholds that were active at the time the edit was reverted. +The intuition behind our RDD is that---after adjusting for small differences in quality associated with marginally higher or lower scores---edits with ORES scores immediately above and below an arbitrary threshold will be similarly likely to receive both first-order and controversial sanctions. Consequently, any discontinuous change in reverts at one of the thresholds used by RCFilters can be attributed to the flag. + +RCFilters uses multiple thresholds corresponding to green, yellow, orange, and red flags. By default, only orange, and red flags are shown, but users can configure which colors to display. Green flags and filters are to help Wikipedia editors find good edits. +% As we are interested in flagging for the purposes of finding damaging edits we consider them no further. +Our analysis considers only red, orange, and yellow flags, which correspond to thresholds making different trade-offs between precision (the proportion of flagged edits that are truly damaging) and recall (the proportion of truly damaging edits that are flagged). The red flag is labeled ``very likely damaging'' and corresponds to a high precision threshold. Orange flags corresponds to a ``likely damaging'' label with greater recall but less precision. Edits with a yellow flag are ``maybe damaging'' with a high recall but lower precision. +%A special page displays the thresholds and their corresponding levels of precision and recall. +%Figure \ref{fig:ores_thresholds} shows this page for English Wikipedia.\footnote{\url{https://en.wikipedia.org/wiki/Special:OresModels}} +% \subsubsection{RCFilters thresholds} +% ORES edit classifier damaging scores31} +% Our key estimand is a set of dichotomous variables, typically referred to as $\tau$ in RDD analysis, that indicate whether or a given edit is above or a cutoff threshold. +%for each wiki from the public mirror of the ORES scores database hosted by the Wikimedia foundation's quarry service. +% \subsection{RCFilters thresholds} +RCFilters' thresholds are truly arbitrary and have changed over time and across language editions in response to shifts in the precision and recall of ORES models and in response to community feedback. +% Because new models were deployed during our study period, scraping the page where the active thresholds are displayed would not provide the correct thresholds that were in use when an edit was made or that moderators reviewing changes would observe. +% Fortunately, the configuration determining the thresholds, the trained ORES models, the code to run them are open source, and the exact time that changes are deployed is published at the Wikimedia foundation's server admin log. So we wrote a script to combine this information to determine the precise thresholds that were active for each edit. +We were able to collect data on threshold configuration, fully trained ORES models, code, and the precise time that changes were deployed in the WMF server admin log. We combined these data to identify the precise thresholds that were active for each revision in our dataset. + +\subsubsection{Sanctions} + +% cite some more stuff that uses reverts and sanctioning. +% Should we mention Twinkle? +Our outcome variable for answering RQ1 must capture sanctioning in Wikipedia. Following a large body of other social computing research, we measure sanctions as identity reverts \citep[e.g.,][]{halfaker_dont_2011, halfaker_rise_2013, teblunthuis_revisiting_2018, piskorski_testing_2017}. Identity reverts occur when a user undoes another user's edit by restoring a page to an earlier state and are measured by comparing hashes of page revisions \citep{halfaker_dont_2011}. + +That said, identity reverts are an imperfect measure of sanctioning. It is also possible for an individual to ``self-revert'' by undoing their own edit. We therefore only treat a revision as reverted if it was undone, but not by a self-revert. We also limit our measure of sanctioning to revisions that are undone within 48 hour to avoid problems related to mass revert actions such as ``blanking'' of pages that result in false positives. We are confident that 48 hours is a reasonable window because most damage to Wikipedia will be undone within that amount of time \citep{geiger_when_2013} and a 48 hours window will include reverts caused by RCFilters since any effect of RCFilters is likely to occur quickly. + +\subsubsection{Controversial sanctions} +\label{sec:controversial} + +Our outcome variable for answering RQ2 and RQ3 measures controversial sanctions. We follow \citet{piskorski_testing_2017} by measuring controversial sanctions as identity reverts that are subsequently reverted by a third party. Specifically, we label a sanction as controversial if the sanction is undone by a third editor who was not the original editor or the reverting editor. Such interactions likely correspond to cases in which a third party observes the initial revert, disagrees with the initial sanction and then acts to reverse the sanction. + + +\subsubsection{Social signals} + +\label{sec:signal} +Answering our RQ1 and RQ3 requires that we identify underprofiled and overprofiled individuals in our empirical setting. Drawing from research and documentation for Wikipedia moderators, we identify two such measures shown in the RCFilters interface shown in Figure \ref{fig:rcfilters}. +%Although much of interface is likely inscrutable to those without experience editing Wikipedia, there are several clear social signals of edit quality clearly displayed on ``Recent Changes'' pages and watchlists---and on the versions of these pages augmented by RCFilters---that will be understood by experienced Wikipedia editors engaged in moderation. +Our first measure is whether an editor was logged into an account. Unregistered editors act on Wikipedia without logging in and registered contributors are those that edit with accounts. Because they are identified by their IP address rather than by a chosen username, unregistered editors are also referred to as ``IP editors'' or ``anons.'' Unregistered editors are associated with misbehavior and have long had a controversial status on Wikipedia \citep{mcdonald_privacy_2019}. Geiger and Ribes described how tools for moderators highlight unregistered editors \citep{geiger_work_2010}. +% That said, communities such as Wikipedia may wish to allow anonymous contributions due to the benefits anonymity may provide. Anonymity may help diversify participation as those who face targeted harassment based on their identities are likely to seek anonymity \cite{forte_privacy_2017}. Anonymity may also increase productive contribution by removing the frictions of creating an account or logging in \cite{mcdonald_privacy_2019}. When wikis on other platforms disallowed unregistered editing this decreased norm and rule violation, but also decreased beneficial contributions. +De Laat argued that unregistered editors on Wikipedia are overprofiled in that they are at higher risk to have their contributions rejected unfairly \citep{de_laat_use_2015, de_laat_profiling_2016}. +% Such barriers to contribution may limit community growth and diversity, as users with vulnerable identities may seek anonymity and blocking contributions from unregistered contributors can decrease positive contributions to peer production projects \cite{hill_hidden_2020, forte_privacy_2017}. +% That said, overuse of social characteristics such as experience levels, reputation, and registration status is instrumental for moderators to deal with the ``problem of scale'' and efficiently regulate online spaces \cite{gillespie_custodians_2018, de_laat_profiling_2016}. +% Recently, concerns about privacy and vandalism related to the use of IP addresses for edit attribution sparked discussions about alternatives, including proposals to ban anonymous editors from creating pages or even to eliminate anonymous editing entirely.\footnote{see \url{https://meta.wikimedia.org/wiki/Talk:IP_Editing:_Privacy\_Enhancement\_and\_Abuse\_Mitigation}} +% https://en.wikipedia.org/wiki/Wikipedia:Editors_should_be_logged-in_users_(failed_proposal) +% https://en.wikipedia.org/wiki/Wikipedia:Disabling_edits_by_unregistered_users_and_stricter_registration_requirement +% https://en.wikipedia.org/wiki/Wikipedia:IPs_are_human_too + +Second, the RCFilters interface indicates whether the editor has created a user page. User pages are Wikipedia's version of profile pages. Not having a user page is a social signal of newness because most committed users will create a user page early into their experience in Wikipedia \citep{ayers_how_2008}. The presence or absence of pages in Wikipedia is indicated with a subtle user interface clue: links to pages that do not exist are rendered in red, whereas links to pages that exist are blue. For example, Figure \ref{fig:rcfilters} shows the user ``Mashlova'' whose name is shown in red and would be identified as a newcomer. +% \citet{broughton_wikipedia_2008} highlights that vandal fighters pay special attention to unregistered editors and accounts without ``user talk pages,'' who are probably new.\footnote{The RCfilter interface also displays whether a user has a ``user talk page.'' A user talk page is a second page which is used for one-to-one messaging \citep{narayan_all_2019}. The absence of a user talk page means that a user has never been sent a message on Wikipedia. Although both user pages and user talk pages are signals of newness, a red link to a user talk page will turn blue when a vandal is warned. As a result, this may be a less reliable signal of newness compared to a red user page link and is not included in this analysis.} +% Wikipedia users rely on the presence or absence of user pages to identify low quality content. +%For example, \citet{matthews_} explains, ``QUOTE.'' +De Laat cited the absence of a user page as a second example of an indicator of vandalism that will result in overprofiling \citep{de_laat_profiling_2016}. +We measure whether an editor's user page exists at the time of a given contribution by matching the titles of user pages against the editor's username and checking if the creation of the user page was prior to the edit in question. We only include registered editors in our analysis of overprofiling based on user pages. + +% Paragraph summarizing how ores was trained and routing people to halfak's preprint. + +% briefly describe the release of the feature and what it takes to turn it on. +%Prior to the development and release of RCFilters, tools with features such as algorithmic flagging or filtering by user characteristics were available in special interfaces such as huggle. None of the above + +\begin{table} +\centering +\footnotesize +\begin{subtable}{0.47\linewidth} +% latex table generated in R 4.0.4 by xtable 1.8-4 package +% +\begin{tabular}{llrr} + Threshold & Edit type & N. & Prop. \\ + \hline +Maybe dam. & Not reverted & 12,403,717 & 0.87 \\ + Maybe dam. & Rev. controversial & 69,395 & 0.00 \\ + Maybe dam. & Rev. not cont. & 1,757,866 & 0.12 \\ + Maybe dam. & \textbf{Total} & 14,230,978 & 1.00 \\ + \hline +Likely dam. & Not reverted & 1,254,219 & 0.55 \\ + Likely dam. & Rev. controversial & 31,652 & 0.01 \\ + Likely dam. & Rev. not cont. & 1,009,108 & 0.44 \\ + Likely dam. & \textbf{Total} & 2,294,979 & 1.00 \\ + \hline +V. likely dam. & Not reverted & 58,474 & 0.15 \\ + V. likely dam. & Rev. controversial & 12,545 & 0.03 \\ + V. likely dam. & Rev. not cont. & 323,762 & 0.82 \\ + V. likely dam. & \textbf{Total} & 394,781 & 1.00 \\ + \hline +\end{tabular} + +\caption{Counts and proportions of edits by whether an edit was reverted or controversially reverted in the neighborhood of each threshold.}\label{tab:edit.stats} +\end{subtable} +\qquad +\begin{subtable}{0.47\linewidth} +% latex table generated in R 4.0.4 by xtable 1.8-4 package +% +\begin{tabular}{llrr} + Threshold & Editor type & N. & Prop. \\ + \hline +Maybe dam. & Reg. No User Page & 4,006,466 & 0.28 \\ + Maybe dam. & Reg. User Page & 3,797,451 & 0.27 \\ + Maybe dam. & Unregistered & 6,415,271 & 0.45 \\ + Maybe dam. & \textbf{Total} & 14,219,188 & 1.00 \\ + \hline +Likely dam. & Reg. No User Page & 281,964 & 0.12 \\ + Likely dam. & Reg. User Page & 26,459 & 0.01 \\ + Likely dam. & Unregistered & 1,982,985 & 0.87 \\ + Likely dam. & \textbf{Total} & 2,291,408 & 1.00 \\ + \hline +V. likely dam. & Reg. No User Page & 21,630 & 0.05 \\ + V. likely dam. & Reg. User Page & 687 & 0.00 \\ + V. likely dam. & Unregistered & 371,499 & 0.94 \\ + V. likely dam. & \textbf{Total} & 393,816 & 1.00 \\ + \hline +\end{tabular} + +\caption{Counts and proportions of edits by whether an editor was registered or had a user page in the neighborhood of each threshold.}\label{tab:editor.stats} +\end{subtable} +\caption{Summary statistics from our full dataset. \label{tab:summary.stats}} +\end{table} + +\section{Analytic plan} +\label{sec:analytic} + +%As a robustness check against threats to assumption (2) we conduct ``placebo tests'' by running our analysis at artificial cutoffs not equal to the real thresholds. We present results of this robustness check in the supplementary material. + +%(see \cite{chancellor_thyghgapp:_2016} for an example of evading content moderation through lexical variation in social media) we do not think this will be wide-spread or successful on Wikipedia. + +% \newcounter{equationcnt} +% \newcounter{figuretmp} +% \setcounter{figuretmp}{\thefigure} +% \setcounter{figure}{0} + +Our analysis comprises Bayesian logistic regression models in two parallel analyses. +The first analysis treats our dichotomous measure of whether edits are reverted as an outcome. This begins with an ``adoption check'' (§\ref{sec:adoption}) that describes the causal effects of flagging on reverts in general. The adoption check is a prerequisite to answering our research questions. The rest of the first analysis (§\ref{sec:results-rq1}) answers RQ1 by comparing the effect of RCFilters on edits by overprofiled users to its effect on other editors. +Our second analysis is very similar but uses controversial reverts as the outcome, and analyzes only reverted edits to model the probability that a revert is controversial. It begins by answering RQ2 (§\ref{sec:results-rq2}) in an analysis similar to the adoption check but with controversial sanctions as an outcome and with a dataset limited to overprofiled users. The rest of the second analysis (§\ref{sec:results-rq2}) answers RQ3 and is similar to RQ1 but with controversial reverts as the outcome in place of reverts. + +% We then fit models predicting the likelihood of first-order and controversial sanctions for subsets of revisions by Registered and Non-registered contributors, and contributors with and without profile pages. +Although our models use different sets of edits and outcomes, they all have the same logistic regression structure shown in Equation \ref{eq:model}. + +\begin{align} + \mathrm{log}\left(\frac{P\left(Y_r\right)}{1-P\left(Y_r\right)}\right) &= + \alpha_{1}\left(score_{r} - c_{jw}\right) + + \tau_j \mathbf{1} \left[score_{r} > c_{jw}\right] \nonumber \\ + &+ \alpha_{2}\left(score_{r}-c_{jw}\right) \mathbf{1} \left[score_{r} > c_{jw}\right] + \alpha_w +\label{eq:model} +\end{align} + +\noindent Our goal is to estimate $\tau_j$ which is the causal effect of being flagged at level $j$, where $j \in \{1,2,3\}$ corresponds to labels of ``maybe damaging,'' ``likely damaging'' and ``very likely damaging.'' For each cutoff on each wiki, we select revisions whose ORES scores are within a $\pm0.03$ window of the cutoff $(c_{jw})$. +Following established approaches to RDD, we fit ``kink'' models that allow for a change in slope at the discontinuity \citep{lee_regression_2010, litschig_impact_2013}. The slope before the discontinuity is $\alpha_1$ and the change in slope is $\alpha_2$. The indicator function is represented by $\mathbf{1}$. Our models include fixed effects for wiki ($\alpha_w)$ to account for differences in the rates of sanctioning between wikis. + +% Our models incorporate all three RCFilters thresholds, following the example of \citet{litschig_impact_2013}. +%For each RCFilters threshold $j \in +%\{1,2,3\}$, we seek to estimate ($\tau_j$) the causal effect of being flagged on our outcomes. +%level $j$ where $j \in \{1,2,3\}$ corresponding to labels of ``maybe damaging'', ``likely damaging'' and ``very likely damaging''. + +%Equation \eqref{eq:rdd_reverted} shows our specification for our models (the only differences between our models are the dependent variables, $Y$ and the type of editor whose edits are modeled.) + +%\begin{small} +% \begin{equation*} +% \begin{split} +% P(Y_{rw}) & = \left[ \tau_1 \mathbf{1} [score_{r} > c_{1w}] + \alpha_{10}(score_{r} - c_1) + \alpha_{11}\left(score_{r}-c_{1w}\right) \mathbf{1} [score_{r} > c_{1w}]\right]\mathbf{1_{1p}} \\ +% & + \left[ \tau_2\mathbf{1}[score_{r} > c_{2w}] + \alpha_{20}(score_{r} - c_2) + \alpha_{21}\left(score_{r}-c_{2w}\right)\mathbf{1}[score_{r} > c_{2w}]\right]\mathbf{1_{2p}} \\ +% & + \left[ \tau_3\mathbf{1}[score_{r} > c_{3w}] + \alpha_{30}(score_{r} - c_3) + \alpha_{31}\left(score_{r}-c_{3w}\right)\mathbf{1}[score_{r} > c_{3w}]\right]\mathbf{1_{3p}} \\ +% & + \sum_{j=1}^3B_j\mathbf{1}[seg_{j-1} < score +% \le seg_{j}]\mathbf{1}_{jp} + \alpha_w + \mu_{rw} +% \end{split} +% \end{equation*} +% \begin{equation*} +% \begin{split} +% \mathbf{1_{jp}} & = \mathbf{1}[c_{wj}(1-p) < score_{rw} < c_{jw}(1+p)] +% j=1,2,3; & ~~p=0.05 +% \end{split} +% \end{equation*} + +% We conduct placebo tests to + +We use Bayesian inference to estimate our models for two reasons. First, virtually all edits above the ``very damaging'' level are reverted in some of the wikis we analyze. The presence of near-perfect ``separation'' creates estimation problems for classical numerical approaches \citep{allison_convergence_2004}. Preferred solutions to this problem in non-Bayesian frameworks include penalized likelihood methods that introduce bias. Our Bayesian approach uses weakly-informative priors that are conservative but avoid the problem of separation. +% This leads to a conservative analysis less likely to result in false discovery. +The second reason we use Bayesian inference is that it makes it easy to compare estimates across models. +% Our hypotheses compare effects of flagging between different types of editors. Testing them in a classical framework can be done by fitting a joint model including all editor types and conducting a Wald test. In a Bayesian framework, we can sample parameter estimates from the posterior distribution and test our hypotheses using statistical tests for differences between these samples \cite{morey_fallacy_2016}. +Prior work at CSCW by \citet{gan_gender_2018} used a similar rationale for adopting Bayesian logistic regression. +In Bayesian analysis, fitted models take the form of posterior distributions constituting a probability distribution of model coefficients conditional on our model, data and priors. We consider a hypothesis supported if it is consistent with at least 95\% of posterior draws. In other words, we accept a given hypothesis if our parameter estimate has the predicted sign and the 95\% credible interval does not contain 0. This is the Bayesian analog to testing a hypothesis with $\alpha=0.05$. +We fit our models using the rstanarm package (version 2.19.3) and the default priors that are provided for reference in the supplementary material. + + +%To check that our models fit the data and that observed discontinuities are not spurious, these plots also present probabilities of reversion estimated directly from our data within bins around different values of x. + +\section{Adoption Check} +\label{sec:adoption} + +% TODO move down +% RCFilters algorithmic filtering features are not enabled by default and must be enabled in user preferences. Therefore, we will present a preliminary analysis that shows that these tools were adopted by demonstrating an overall causal effect of flagging on sanctioning after presenting our methods. First we will describe our other measures. + +Before presenting results from hypothesis tests associated with our research questions, we first establish that RCFilters was adopted by Wikipedia moderators and that it had an effect on sanctioning behavior. This establishes a baseline necessary to answer RQ1 regarding the differential effects of RCFilters between overprofiled users and others. Null effects in RQ1 might simply reflect that the system was not used. A successful adoption check rules out this possibility and sets up a credible null hypothesis test for RQ1. +% null effects for hypotheses associated with our research questions are due +% , prior to our study, little was known of the extent of RCFilters usage beyond anecdotal reports from Wikis, chatrooms, in-person conversations, and mailing lists. As a result, it is not obvious that RCFilters will have the sort of causal effects on sanctioning that would allow us to answer our research questions. + +% we look for evidence that flagging has a causal effect on sanctioning over all types of editors. +% using model following equation \ref{eq:rdd_reverted}. +% Observing discontinuous increases in the probability of reversion at a given thresholds constitutes evidence that flags in RCFilters have a causal effect on moderation actions on Wikipedia. Specifically, +We test the hypothesis that flagging increases the probability that an edit is reverted to demonstrate that RCFilters flags are being used by Wikipedia moderators. Our estimates for $\tau_j$---as described in §\ref{sec:analytic}---should be positive if Wikipedia moderators are using flags in RCFilters to review potentially damaging edits. + +We find strong evidence that RCFilters was adopted and impacted sanctioning. Figure \ref{fig:adoption.me} visualizes this evidence: a marginal effects plot that illustrates our models' predicted likelihood of reverts across different ORES scores in the neighborhood of the thresholds. In each such plot, the $x$-axis shows the distance from the threshold such that discontinuities at 0 represent the effect of being flagged. The plots show modeled values for the English language edition of Wikipedia but are representative of relationships across all wikis.\footnote{Because intercepts are the only part of our model that depend on Wikis, slopes and the discontinuities caused by algorithmic flagging represent our inference over all our data.} +Figure \ref{fig:adoption.me} shows discontinuous increases in the likelihood of reversion at the ``maybe damaging'' and ``likely damaging'' thresholds in the left and center panels. +We find the greatest effect at the ``maybe damaging'' threshold ($\tau_1 = 1.23$ $[1.19;\allowbreak 1.28]$).\footnote{All $\tau$ parameter estimates are reported as log-odds ratios. The bracket notation indicates the 95\% credible interval. In other words, the most likely value of the parameter is $1.23$, but there is a 95\% probability that the parameter lies in the interval $[1.19;\allowbreak 1.28]$.} +The effect at the ``very likely damaging'' threshold shown in the right-most panel is smaller ($\tau_3 = 0.41$, $[0.35;\allowbreak 0.46]$). + +\begin{figure} + \centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\maxwidth]{figures/knitr-adoption_me_plot-1} + +\end{knitrout} +\caption{Marginal effects plot showing model predicted relationship between ORES score and the probability that an edit will be reverted around the cutoffs for all contributors with 95\% credible intervals.\label{fig:adoption.me}} +\end{figure} + +%(in the case of \textbf{RQ1}) or that a revert is controversial (for \textbf{RQ2}; \textbf{RQ3}) in the neighborhood of thresholds that trigger flags. + +The impacts of the ``maybe damaging'' and ``likely damaging'' flags on the likelihood of sanctioning are enormous. Figure \ref{fig:adoption.me} shows that likelihood of a revert for an edit just below the ``maybe damaging'' threshold is between 5.5\% and 5.8\%, indicating that reverts of unflagged edits are relatively rare. Being flagged +with the ``maybe damaging'' flag causes a dramatic increase in the reversion probability to between 16.8\% and 17.7\% for edits just above the threshold. +%Flagging an edit at the ``maybe damaging'' level increases the odds it will be reverted +The effect of algorithmic flags at the ``likely damaging'' level is even more stark. We estimate that edits just below the ``likely damaging'' threshold are likely to be reverted between 24.3\% and 25.8\% of the time, whereas similar edits just above the threshold are reverted between 46.1\% and 48.7\% of the time. Being flagged at the ``very likely damaging'' threshold causes an increase in reversion probability from between 72.1\% and 73.5\% to +between 79.5\% and 81\%. + +\section{Results} +\subsection{RQ1: Effect of Flagging on Sanctioning} +\label{sec:results-rq1} + + + +% I have so much data and these marginal posteriors are so normal that there isn't much point in showing the intervals +% \begin{subfigure}[t]{0.75\textwidth} +% \centering +% Our adoption check establishes a baseline for answering our request by establishing that users whose edits are flagged by RCfilter are sanctioning at a higher rate. +In our first research question (RQ1), we seek to understand how the increase in sanctioning caused by flagging +% shown in our adoption check in §\ref{sec:adoption} +affects discrimination against overprofiled users. If algorithmic flagging reduces overprofiling, as some computer scientists have argued \citep{kleinberg_human_2018}, the effect of flagging will be more scrutiny on users who are more likely to be given a pass. If algorithms simply reproduce discrimination, we will find no difference. +Results for hypothesis tests answering this question are shown in Figure \ref{fig:h1.regplot}, which visualizes the point estimates and credible intervals for differences in the causal effects of flagging on reverts between unregistered and registered contributors and between contributors with and without user pages. Values greater than 0 indicate that our estimated effect for the other users is greater than that for the overprofiled group. + +% Overall, we find that the odds of an . +\begin{figure} + \centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\textwidth]{figures/knitr-h1_unreg_me_plot-1} + +\end{knitrout} + \caption{Results for RQ1 comparing unregistered and registered contributors are displayed in a marginal effects plot showing the model predicted relationship with 95\% credible intervals between ORES scores and reverts around the thresholds that trigger flags. \label{fig:h1.me}} +\end{figure} + + + +% Figure \ref{fig:h1.me} shows marginal effects plots representing the relationship between ORES score and sanctioning in the neighborhood of the thresholds for English Wikipedia for these models. +In support of the idea that algorithmic flagging can reduce overprofiling bias, we find that the overall effect of flagging is to increase demographic parity between registered and unregistered editors. Aggregating our posteriors over all three thresholds shows that the average effect over the three thresholds is greater for registered editors than for unregistered editors +%($\tau^{\mathrm{Reg}}_{1} + \tau^{\mathrm{Reg}}_{2} + \tau^{\mathrm{Reg}}_{3} - \tau^{\mathrm{Unreg}}_{1} - \tau^{\mathrm{Unreg}}_{2} - \tau^{\mathrm{Unreg}}_{3}) = +($\frac{1}{3}\sum_{j=1}^3{\tau^{\mathrm{Reg}}_j - \tau^{\mathrm{Unreg}}_j} = +0.45~[0.16;\allowbreak 0.6]$). +The effect of flagging on reverts of registered editors is greater than the effect for unregistered editors at both the ``maybe damaging'' threshold ($\tau^{\mathrm{Unreg}}_1 - \tau^{\mathrm{Reg}}_1 = 0.8~[0.71;\allowbreak 0.89]$) and the ``likely damaging'' threshold ($\tau^{\mathrm{Unreg}}_2 - \tau^{\mathrm{Reg}}_2 = 0.78~[0.58;\allowbreak 0.97]$). +% NOTE: this is in a footnote above -mako +% As required by our logistic regression framework, we use odds ratios when comparing causal effects between groups of contributors. +For an action by an unregistered contributor near to the ``maybe damaging'' threshold, being flagged increases the odds of being reverted by a factor of between 1.45 and 1.6 times. This is significantly less than the increase of between +3.16 and 3.68 times for registered contributors. + +However, at the ``very likely damaging'' threshold we find that the effects of flagging are stronger for unregistered editors than for registered editors ($\tau^{\mathrm{Reg}}_2 - \tau^{\mathrm{Unreg}}_2 = -0.17~[-0.33;\allowbreak -0.01]$). Being flagged increases the odds that an action is reverted by a factor of between 1.43 and 1.62 times for an unregistered editor and by 1.11 and 1.49 times for registered contributors. However, as Table \ref{tab:summary.stats} shows, a far greater number of actions receive scores near to lower thresholds. Thus, we focus on the lower thresholds in the following discussion. + +Figure \ref{fig:h1.me} lets us interpret our models by making it possible to visually compare the effects of being flagged between overprofiled and underprofiled editors at a given threshold because the $y$-axes in each row span an identical range. +The top-left panel shows how our models' linear predictions of how the probability of sanctioning for unregistered contributors at the ``maybe damaging'' threshold jumps between 4.8 and 6.7 +percentage points, from 13.5\% to 19.2\% on average. For registered editors, shown in the top-right of Figure \ref{fig:h1.me}, we estimate a jump of between 9.1 and 10.3 percentage points, from 4.6\% to 14.3\% on average. This is between + 3.3 and 4.6 percentage points greater than the jump for unregistered editors. +% round(non.anon.md.proto.above$linpred.lower,2) and round(non.anon.md.proto.above$linpred.upper,2). +For unflagged edits that ORES scores near the ``maybe damaging'' threshold, an unflagged unregistered contributor has about the same odds of being sanctioned as a flagged registered contributor. + +% proto.reverted.CI.str(anon.md.proto.below, digits.1=2,digits.2=3) to proto.reverted.CI.str(anon.md.proto.above, digits.1=2,digits.2=3). + +% Flagging increases the likelihood that an edit by a registered editor is f +% percentage points, +% So being flagged as ``maybe damaging'' for registered editors causes a for registered than for unregistered editors. +% The top-left plot in the figure shows how our models' linear predictions of how the probability of sanctioning for unregistered contributors at the ``maybe damaging'' threshold jumps between round((anon.md.proto.above$linpred.lower - anon.md.proto.below$linpred.upper)*100,2) and round((anon.md.proto.above$linpred.upper - anon.md.proto.below$linpred.lower)*100,2) percentage points, from format.percent(round(anon.md.proto.below$linpred,3)) to format.percent(round(anon.md.proto.above$linpred,3)) on average. + +\begin{figure} +\centering + +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=0.7\linewidth]{figures/knitr-regplot_H1_anon-1} + +\end{knitrout} +\caption{Results for RQ1 showing point estimates and 95\% credible intervals for differences in the causal effect of flagging on sanctioning between overprofiled contributors and others. A value greater than 0 indicates that our estimates of the effect for underprofiled contributors are greater than those for overprofiled contributors. +} % Our analysis of registration status shows that the effects of flagging for registered editors are greater than for unregistered at both the ``maybe damaging'' and the ``likely damaging'' thresholds and the difference over both thresholds is also positive. For editors without user profiles, on the other hand, flagging increases sanctioning but does so to a greater extent for contributors without a user profile than for contributors with one. +\label{fig:h1.regplot} +\end{figure} + +The bottom row of Figure \ref{fig:h1.me} shows that the change in sanctioning probability at the ``likely damaging'' threshold is +between 9.5 and 15.2 percentage +points greater for registered editors than for unregistered editors. +For unregistered contributors, shown in the bottom-left of Figure \ref{fig:h1.me}, being flagged as ``likely damaging'' increases the probability of revert between 15 and 18.6 percentage points, from 33.5\% to 50.2\% on average. +But for registered editors, shown in the bottom-right of Figure \ref{fig:h1.me}, we detect an even bigger jump of between 23.7 and 34.6 percentage points, from 15.5\% to 44.5\% on average. +For actions that ORES scores near the ``likely damaging'' threshold, unflagged actions by unregistered editors are far more likely to be reverted. Once flagged, actions by registered and unregistered editors are reverted at relatively similar rates. + +% proto.reverted.CI.str(anon.ld.proto.below,format.percent=T,between=T) to proto.reverted.CI.str(anon.ld.proto.above,format.percent=T,between=T), a jump of +% percentage points on average. + +% proto.reverted.CI.str(non.anon.ld.proto.below,format.percent=T,between=T) to proto.reverted.CI.str(non.anon.ld.proto.above,format.percent=T,between=T), an average increase of format.percent(non.anon.ld.proto.above$linpred - non.anon.ld.proto.below$linpred) percentage points. + + + + +%percentage points, from proto.reverted.CI.str(anon.ld.proto.below, digits.1=2,digits.2=3) to proto.reverted.CI.str(anon.ld.proto.above, digits.1=2,digits.2=3). + +% Our models predict that an edit by a prototypical unregistered contributor with an ORES scores very near the ``maybe damaging'' threshold will jump from a likelihood of being reverted immediately below the cutoff to a proto.reverted.CI.str(anon.md.proto.above, digits.1=2,digits.2=3) likelihood just above. +% When a contributor is registered, we see an even bigger jump from +% Our findings for the ``likely damaging'' threshold are substantively similar. We find a $round(exp(mean(h1.tau.2.non.anon)),2)$-factor +% $get.CI.str(h1.tau.2.non.anon,transform.f=exp,format.percent=F)$ increase in the odds of reversion for actions by registered contributors which is greater than the $round(exp(mean(h1.tau.2.anon)),2)$-factor +% $get.CI.str(h1.tau.2.anon,transform.f=exp,format.percent=F)$ increase in odds for actions by unregistered contributors. + + + +% The top left panel Figure X shows that an edit to English Wikipedia by a registered contributors will be reverted jumps XX\% (proto.reverted.CI.str(non.anon.ld.proto.below, digits.1=2,digits.2=3) to proto.reverted.CI.str(non.anon.ld.proto.above, digits.1=2,digits.2=3) at the threshold). +% The panel in the top right suggests an smaller increase for unregistered contributors of X\% (from proto.reverted.CI.str(anon.ld.proto.below, digits.1=2,digits.2=3) to proto.reverted.CI.str(anon.ld.proto.above, digits.1=2,digits.2=3)). +% Our model incorporates uncertainty not captured by these point estimates of prototypical revisions. We estimate that the difference in the jump between registered and unregistered users of between X\% and Y\%. + +% Since we detect that edits by Non-IP editors are more sensitive to flagging at both thresholds, we also observe a greater overall effect for Non-IP editors $(\tau^{\mathrm{Non IP}}=round(mean(h1.tau.non.anon),2)$ $(\mathrm{CI}=get.CI.str(h1.tau.non.anon,format.percent=F)$) than for IP editors $(\tau^{\mathrm{IP}}=round(mean(h1.tau.anon),2)$ $(\mathrm{CI}=get.CI.str(h1.tau.anon,format.percent=F)$. + +These results show that flagging causes an increase in a decision system's demographic parity concerning registration status. Actions by unregistered contributors that fall just above the cutoffs are much more likely to be reverted due to RCFilters---but the gap between actions by registered and unregistered contributors is much smaller when RCFilters has flagged an edit as ``maybe damaging'' or ``likely damaging.'' +In this way, our analysis suggests that algorithmic flagging can reduce overprofiling bias. +% This provides strong evidence that algorithms can reduce over-profiling bias. + +% Overall, we find that the odds of an . +\begin{figure} + \centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=\textwidth]{figures/knitr-h1_userpage_me_plot-1} + +\end{knitrout} + \caption{Results for RQ1 comparing contributors with and without user pages. + Each panel shows a marginal effects plot with 95\% credible intervals of the modeled relationship between ORES scores and reverts around the thresholds that trigger flags. \label{fig:h1.me.up}} +\end{figure} + +Surprisingly, our results for our second measure of over-profiling in Wikipedia suggest a dynamic that is opposite in sign to the differences we observe between registered and unregistered editors at the +``maybe damaging'' threshold ($\tau^{\mathrm{NoUP}}_1 - \tau^{\mathrm{UP}}_1 = -0.68~[-0.95;\allowbreak -0.41]$). At the ``likely damaging'' ($\tau^{\mathrm{NoUP}}_2 - \tau^{\mathrm{UP}}_2 = -0.05~[-1.61;\allowbreak 1.39]$) and the ``very likely damaging'' ($\tau^{\mathrm{NoUP}}_2 - \tau^{\mathrm{UP}}_2 = 0.46~[-0.1;\allowbreak 1.03]$) thresholds, we do not detect differences in effect size between contributors with and without user pages. +At the ``maybe damaging'' threshold, we find that flagging increases the odds that an editor without a user page is reverted between 3.47 and 4.06 times. This is significantly more than the increase of +between 1.47 and 2.46 times for registered contributors. + +As above, we interpret these odds ratios using marginal effects plots shown in Figure \ref{fig:h1.me.up}. The top-left plot in the figure shows our models' linear predictions of the probability of reverting for contributors without user pages near to the ``maybe damaging'' threshold. For these editors, being flagged as ``maybe damaging'' increases the chances of sanctioning by 11.4 and 13.8 +percentage points, from 5.6\% to 18.1\% on average. +In the top-right of Figure \ref{fig:h1.me.up}, we see a jump of between 2.2 and 4.8 percentage points, from 4\% to 7.4\% on average for editors that have created user pages. This is between + 9.7 and 8.4 percentage points less than the jump for contributors without user pages. +% round(non.anon.md.proto.above$linpred.lower,2) and round(non.anon.md.proto.above$linpred.upper,2). + +% We find that flagging has a greater has effect for users with profile pages than for over-profiled newcomers without them. These results are shown in the right panel of Figure \ref{fig:h1.me.up}. +% At the ``maybe damaging'' threshold, our models suggest that an edit by a prototypical over-profiled newcomer without a profile page will see a jump from proto.reverted.CI.str(no.up.md.proto.below, digits.1=2,digits.2=3,between=T,format.percent=T) to proto.reverted.CI.str(no.up.md.proto.above, digits.1=2,digits.2=3,between=T,format.percent=T) +% in the likelihood being reverted at the cutoff. We estimate that otherwise similar revisions by more established contributors with profile pages will jump from between up.md.proto.below$linpred.lower and up.md.proto.below$linpred.upper to between up.md.proto.above$linpred.lower and up.md.proto.above$linpred.upper. + +% proto.reverted.CI.str(up.md.proto.below, digits.1=2,digits.2=3) to proto.reverted.CI.str(up.md.proto.above, digits.1=2,digits.2=3). + +% Our models suggest that the odds of revert at the ``maybe damaging'' level are $round(exp(mean(h1.tau.1.no.user.page)),2)$ $get.CI.str(h1.tau.1.no.user.page,transform.f=exp,format.percent=F)$ times higher for less-profiled contributors with profile pages and $round(exp(mean(h1.tau.1.user.page)),2)$ $get.CI.str(h1.tau.1.user.page,transform.f=exp,format.percent=F)$ for those without. +% These results are opposite in sign to the results for registered and unregistered users. +% The chances that an action by a contributor without a profile page is sanctioned increase in probability from proto.reverted.CI.str(no.up.ld.proto.below, digits.1=2,digits.2=3) to proto.reverted.CI.str(no.up.ld.proto.above, digits.1=2,digits.2=3) and for contributors that do have profile pages we find an increase from proto.reverted.CI.str(up.ld.proto.below, digits.1=2,digits.2=3) to proto.reverted.CI.str(up.ld.proto.above, digits.1=2,digits.2=3). These changes correspond to odds ratios of $round(exp(mean(h1.tau.2.no.user.page)),2)$ $get.CI.str(h1.tau.2.no.user.page,transform.f=exp,format.percent=F)$ and $round(exp(mean(h1.tau.2.user.page)),2)$ $get.CI.str(h1.tau.2.user.page,transform.f=exp,format.percent=F)$ respectively, which we cannot statistically distinguish. + +% NOTE: i think this is not necessary. it's just a recap of what we've said and will say again in the discussion -mako +% While we found strong evidence that flagging decreases over-profiling bias due to registration status, we found evidence possibly contradicting this from our analysis of editors with and without user page profiles on Wikipedia. We believe that our inability to detect a change in sanctioning of contributors with User pages at the ``likely damaging'' threshold is likely because sanctioned edits by such editors are relatively scarce, leading to high uncertainty. These results from our analysis of over-profiling suggest that algorithms may not reduce over-profiling bias for some certain of social signals. We further reflect on the inconsistency between our findings for over-profiling based on registration status and User pages in our Discussion (§\ref{sec:discussion}). + +% Similarly, we observe no overall statistical difference between edits by editors that have user pages $(\tau^{\mathrm{No~u.p.}}=round(mean(h1.tau.user.page),2)$ $(\mathrm{CI}=get.CI.str(h1.tau.user.page,format.percent=F))$ and edits by those that do not $(\tau^{\mathrm{IP}}=round(exp(mean(h1.tau.no.user.page)),2)$ $mathrm{CI}=get.CI.str(h1.tau.no.user.page,transform.f=exp,format.percent=F))$. + + +\subsection{RQ2: Effect of flagging on controversial sanctioning} +\label{sec:results-rq2} + +% We tweak H2 so it's now about IP editors and editors without user pages + + +% NOTE: note sure if this is important -mako +% In articulating the rationale for \textbf{RQ2}, we drew from dual-process theories from behavioral economics to hypothesize that flagging can lead to less fair outcomes in the form of increased chances of a controversial sanction for over-profiled editors. We also pointed to arguments that algorithmic flagging can act as a carrier of formal rationality to hypothesis the converse---that flagging could decrease controversial sanctions for over-profiled users and increase fairness. +Consistent with the idea that algorithmic flagging can support fairness, we find that having an ORES score cross the ``likely damaging'' or ``very likely damaging'' thresholds decreases the chances that a revert will be controversial for unregistered editors. +These results are visualized in Figure \ref{fig:h2.regplot.anon}. We have less confidence in the effect at the ``maybe damaging'' threshold because our 95\% credible interval includes 0 ($\tau^{\mathrm{Unreg}}_{1}=-0.07;\allowbreak \mathrm{CI}=[-0.16;\allowbreak 0.02]$). + +\begin{figure} + \centering +\begin{subfigure}[t]{\textwidth} + \centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=0.7\linewidth]{figures/knitr-regplot_controversial_anon-1} + +\end{knitrout} +\caption{Parameter estimates and 95\% credible intervals for the effects of flagging on whether reverts are controversial for unregistered editors. \label{fig:h2.regplot.anon}} +\end{subfigure} +~ +\begin{subfigure}[b]{\textwidth} +\centering + +\includegraphics[width=1\linewidth]{figures/knitr-me_plot_H2_anon-1} + +\caption[RQ2. me plot anon]{Marginal effects plots with 95\% credible intervals for models predicting whether a revert is controversial, for unregistered editors. \label{fig:h2.me.anon}} +\end{subfigure} +\caption[RQ2. plot anon]{Results for RQ2: flagging causes a small but detectable decrease in the likelihood that an action by an unregistered contributor receives a controversial sanction.} +\end{figure} + +\begin{figure} + \centering +\begin{subfigure}[t]{\textwidth} + \centering +\begin{knitrout} +\definecolor{shadecolor}{rgb}{0.969, 0.969, 0.969}\color{fgcolor} +\includegraphics[width=0.7\linewidth]{figures/knitr-regplot_controversial_no_user_page-1} + +\end{knitrout} +\caption{Parameter estimates and 95\% credible intervals for effects of flagging on whether reverts are controversial for editors without user pages.} +\label{fig:h2.regplot.up} +\end{subfigure} +~ +\begin{subfigure}[b]{\textwidth} +\centering + +\includegraphics[width=\textwidth]{figures/knitr-me_plot_H2_no_user_page-1} + +\caption[RQ2. me plot up]{Marginal effects plots with 95\% credible intervals for models predicting whether a revert is controversial, for contributors without user pages.} +\label{fig:h2.me.up} +\end{subfigure} +\caption{Results for RQ2 comparing contributors with user pages to those without show no detectable effect of flagging on controversial sanctioning.} +\end{figure} + +We estimate that being flagged at the ``likely damaging'' level results in a change in the odds that a sanction is controversial by a factor between 0.85 and 0.97. Figure \ref{fig:h2.me.anon} shows the modeled relationship between ORES scores and the probability of a controversial sanction in the neighborhood of the thresholds for English Wikipedia. On the left plot, we see that being flagged changes unregistered contributor's likelihood of a controversial revert from a possible increase of +0.27 percentage points to a possible decrease of + 0.55 percentage points, a change from +3.08\% to +2.81\% +on average. + +% , an average decrease from +% between round(h2.anon.md.proto.above$linpred.lower - h2.anon.md.proto.below$linpred.upper,2) +% and round(h2.anon.md.proto.above$linpred.upper - h2.anon.md.proto.below$linpred.lower,2) percentage points, from round(h2.anon.md.proto.below$linpred,3)*100\% to round(h2.anon.md.proto.above$linpred,3)*100\% on average. + +% proto.reverted.CI.str(h2.anon.md.proto.above,digits.1=2,digits.2=3,between=T,format.percent=T) or + +We observe a similar effect of flagging at the ``very likely damaging'' threshold ($\tau^{\mathrm{Unreg}}_{2}=-0.13;\mathrm{CI}=[-0.24;\allowbreak -0.04]$): the odds that a revert is controversial are between 0.79 and 0.97 times smaller. On the right side of Figure \ref{fig:h2.me.anon}, we find that being flagged decreases the probability that a sanction to an action by an unregistered editor is controversial by between 0.11 and +0.89 percentage points, a change from +from 3.33\% to +2.92\% +on average. + + +% format.percent(h2.anon.ld.proto.below$linpred.lower,1) and format.percent(h2.anon.ld.proto.below$linpred.upper,1) to between format.percent(h2.anon.ld.proto.above$linpred.lower,1) +% and format.percent(h2.anon.ld.proto.above$linpred.upper,1), an average decrease of format.percent(h2.anon.ld.proto.below$linpred - h2.anon.ld.proto.above$linpred,1) percentage points. + +% By summing our posteriors for both threshold parameters, we find algorithmic flagging has a negative effect across both thresholds overall ($\tau^{\mathrm{Unreg}}_{1} + \tau^{\mathrm{Unreg}}_{2}=round(mean(h2.tau.anon),2)$; $\mathrm{CI}=get.CI.str(h2.tau.anon,format.percent=F)$). + +However, we did not detect effects of flagging when the reverted editor lacks a user page at the ``maybe damaging'' ($\tau^{\mathrm{NoUP}}_{1}=0.01;\mathrm{CI}=[-0.07;\allowbreak 0.08]$), ``likely damaging'' ($\tau^{\mathrm{NoUP}}_{2}=-0.01;\mathrm{CI}=[-0.15;\allowbreak 0.14]$), or ``very likely damaging'' ($\tau^{\mathrm{NoUP}}_{3}=-0.12;\mathrm{CI}=[-0.35;\allowbreak 0.11]$) thresholds. +Our results for RQ2 for unregistered editors show that flagging decreases the rate of controversial sanctions. Although controversial sanctions do not precisely correspond to false-positive sanctions, we take this finding as evidence that flagging decreases the false positive rate of the decision system. +We address the inconsistencies between our results for unregistered editors and editors without user pages in our discussion (§\ref{sec:discussion}). + +%Because the credible interval contains 1, we are uncertain that there is a decrease at the ``maybe damaging'' threshold. + +% Figure \ref{fig:h2.regplot} summarizes our parameter estimates. +%Similarly, being flagged at the ``likely damaging'' level decreases the odds that a revert is controversial by a factor of $round(exp(mean(h2.tau.2.anon)),2)$ $get.CI.str(h2.tau.2.anon,transform.f=exp)$. +% When an ORES score to an edit by a user without a User page is just below the ``maybe damaging'' level, the probability that a revert is controversial is proto.reverted.CI.str(h2.no.user.page.md.proto.below,digits.1=2,digits.2=3) and proto.reverted.CI.str(h2.no.user.page.md.proto.above,digits.1=2,digits.2=3), a change in odds of factor of $round(exp(mean(h2.tau.1.no.user.page)),2)$ $get.CI.str(h2.tau.1.no.user.page,transform.f=exp)$. +%Similarly, being flagged at the ``likely damaging'' level changes the odds that a revert is controversial by a factor of $round(exp(mean(h2.tau.2.no.user.page)),2)$ ($get.CI.str(h2.tau.2.no.user.page,transform.f=exp)$). +% Just before the ``likely damaging'' threshold a sanction on English Wikipedia has a proto.reverted.CI.str(h2.no.user.page.ld.proto.below) probability of being recieving a meta-sanctioned by a third party, for flagged edits this is proto.reverted.CI.str(h2.no.user.page.ld.proto.below). +% We do not detect a statistically roundicant effect of algorithmic flagging +%over both thresholds ($\tau^{\mathrm{RQ2}}=round(mean(h2.tau.no.user.page),2)$ $get.CI.str(h2.tau.no.user.page,format.percent=F)$). + + +\subsection{RQ3: Social signals and effects of flagging on controversial sanctioning } +\label{sec:results-rq3} + +% Should we comment on power here? +To answer RQ3, we largely replicate the analysis conducted for RQ1 with the dependent variable used in RQ2. Results shown in Figure \ref{fig:h3.reg.plot} provide weak evidence that a decrease in controversial sanctioning may be greater for +registered than for unregistered contributors at the ``maybe damaging'' ($\tau^{\mathrm{Reg}}_1 - \tau^{\mathrm{Unreg}}_1 = 0.04$ $[-0.06;\allowbreak 0.14]$), ``likely damaging'' ($\tau^{\mathrm{Reg}}_2 - \tau^{\mathrm{Unreg}}_2 = 0.07$ $[-0.05;\allowbreak 0.2]$), +and ``very likely damaging'' ($\tau^{\mathrm{Reg}}_3 - \tau^{\mathrm{Unreg}}_3 = 0.02$ $[-0.23;\allowbreak 0.27]$) thresholds. +However, our evidence weakly suggests that the effect for contributors with user profiles is greater than those for without at the ``maybe damaging'' threshold +($\tau^{\mathrm{UP}}_1 - \tau^{\mathrm{NoUP}}_1 = 0.05$ $[-0.08;\allowbreak 0.17]$) but the opposite seems true at the ``likely damaging'' threshold ($\tau^{\mathrm{UP}}_2 - \tau^{\mathrm{NoUP}}_2 = -0.26$ $[-0.79;\allowbreak 0.26]$) and ``very likely damaging'' ($\tau^{\mathrm{UP}}_3 - \tau^{\mathrm{NoUP}}_3 = -0.16$ $[-0.9;\allowbreak 0.56]$) thresholds. None of these estimates are statistically significant at the 95\% level. + +\begin{figure} +\centering +% I have so much data and these marginal posteriors are so normal that there isn't much point in showing the intervals +% \begin{subfigure}[t]{0.49\textwidth} +% \centering + +\includegraphics[width=0.7\linewidth]{figures/knitr-regplot_H3_anon-1} + +\caption{Results for RQ3 showing the difference in our parameter estimates between overprofiled editors and others with 95\% credible intervals. Values greater than 0 would indicate that the effect for underprofiled editors is greater than that for overprofiled editors.} +\label{fig:h3.reg.plot} +\end{figure} + + + +% \begin{figure}[t] +% \centering +% <>= +% make.comparison.me.plot(mod.anon.controversial.me.data.df, +% mod.anon.controversial.bins.df, +% 'IP', +% mod.non.anon.controversial.me.data.df, +% mod.non.anon.controversial.bins.df, +% 'Not IP', +% mod.no.user.page.controversial.me.data.df, +% mod.no.user.page.controversial.bins.df, +% "No user page", +% mod.user.page.controversial.me.data.df, +% mod.user.page.controversial.bins.df, +% "User page", +% digits=3, +% used.thresholds=c("Maybe damaging", "Likely damaging") +% ) + + +% @ + +% \caption{Marginal effects plot for models predicting whether a revert is controversial} +% \label{fig:me.controversial.comp} +% \end{figure} + +\section{Threats to Validity} +\label{sec:threats} +Our results are subject to a range of threats to validity that pertain to our ability to make causal claims, rule out alternative explanations, and establish the generalizability of our findings. First, there are several threats to our ability to draw causal inferences that are common to RDDs. +% We test our hypotheses using a regression discontinuity design (RDD) for causal estimation of the effect of flagging an action on sanctioning (for \textbf{RQ1}) and controversial sanctioning (for \textbf{RQ2} and \textbf{RQ3}). +Formally, RDDs model an outcome $Y$ as a function of a continuous ``forcing variable'' $Z$, other covariates, and a cutoff $c$ such that $Z>c$ determines treatment assignment. In principle, treatment assignment conditional on $Z$ is ``as good as random'' under two assumptions: (1) that agents have at most limited control over $Z>c$, and (2) that the relationship between $Y$ and $Z$ is smooth \citep{lee_regression_2010}. +%In social computing, \citet{narayan_all_2019} and \citet{hill_hidden_2020} use within-subjects designs similar to RDDs to analyze the consequences of policy and design interventions for online communities. Both studies use time as a forcing variable which threatens validity as the timing of intervention may be influenced by unobserved factors in violation of assumption (1). +Although the assumptions required for causal inference are fundamentally unverifiable, we believe that our RDD provides relatively strong evidence of causal relationships between flagging and sanctioning. + +Our treatment, being flagged in RCFilters, is an ideal candidate for an RDD from the perspective of assumption (1) because editors are unlikely to have much control over the scores that their edits receive. Although attempts to evade sanction by specially crafting edits to evade algorithmic detection are hypothetically possible, the authors of ORES and RCFilters believe they are unrealistic and very unlikely to be widespread. +Assumption (2) would be violated if any unobserved treatments affect our outcomes at discrete levels of ORES scores. This is certainly possible because ORES makes scores available via a public API. Indeed, we are aware of bots that automatically revert edits triggered by the ``very damaging'' threshold on some of the Wikipedia language editions in our sample and therefore have more reason to doubt results at this threshold. Despite this threat, our conclusions regarding how algorithmic flagging shapes fairness are substantively similar whether we consider this threshold or not. +%To mitigate this threat, we exclude reverts by bots and at the ``very likely damaging'' threshold. +Although we identified one anti-vandalism tool---a system called Huggle discussed in §\ref{sec:discussion}---that collects ORES damaging scores, it uses ORES scores as one feature in its own algorithmic model and, by default, presents predictions from this model to users as a list of edits sorted in order of likelihood of vandalism. Given these facts, we believe that it is unlikely that Huggle users will drive discontinuities in the relationship between ORES scores and our outcomes. + +A limitation of RDD analysis is that it estimates effects for observations in the neighborhood of the cutoff and results may not generalize far away from the cutoff. Compared with most RDD analysis, ours has the advantage of multiple different thresholds. Although our results for the ``likely damaging'' and ``maybe damaging'' thresholds are substantively similar, causal effects may diverge more at operating points we have not considered. Future work on algorithmic bias using RDD should consider that results may depend on the choice of operating points used as RDD cutoffs. + +An additional threat to validity is raised by the extent to which the ORES models encode biases concerning editors who are unregistered or without profile pages. +% It might be that our result that flagging appears to improve fairness for unregistered editors depends on a lack of model bias against such editors. +To assess this threat, we analyzed the bias of ORES models for each wiki that had deployed the system on December 19\textsuperscript{th} 2020 using their human-labeled training data according to the \textit{conditional calibration} approach to evaluating model bias \citep{mitchell_prediction-based_2020}.\footnote{We chose conditional calibration as our fairness metric because it does not depend on the choice of threshold. This simplifies the analysis of a decision system with multiple thresholds.} In our case, this involves comparing the rate of damaging edits predicted by the model to the true rates for each type of editor. +%, equality of opportunity, and balance in the negative class that have been used in previous research to analyze algorithmic bias \cite{kleinberg_inherent_2016, mitchell_prediction-based_2020}. +We find that ORES exhibits bias against both unregistered editors and editors without user pages but that the extent of bias against unregistered editors is much greater. These findings are opposite in sign to what we would expect if model bias were driving our results. +% This gives us confidence that model bias is not driving our results and suggests that if ORES were improved to reduce bias this could improve the fairness of Wikipedia moderation. +We present detailed results from this analysis in our online supplement. + +%Our analysis provides strengths that an experiment would not including ecological validity and non-intervention. Furthermore, the limitations stemming from regression discontinuity assumptions are relatively minor compared to those required for a causal interpretation of our comparison of editors with different social signals. + +Our study design is also limited in that we cannot present causal evidence of the impact of social signals. Although RCFilters's algorithmic flags are distributed in a quasi-experimental way, overprofiled status is not. +% We theorize that social signals are causing moderators to make some sanctioning actions instead of others but our evidence only allows us to compare the relationship between the effects of flagging on types of overprofiled and others editors in a way that captures correlations. +There are a range of possible systematic differences between overprofiled users and others that might be driving our results for RQ1 and RQ3. +% Finally, our results cannot rule out plausible alternative explanations for our findings related to systematic differences between contributors with or without social signals. +For example, if damaging edits by contributors who are unregistered or lack user pages are more difficult for ORES to detect, that might drive our findings of a decrease in overprofiling for RQ1. % as sanctioning would be less driven by algorithmic flagging for such editors. +% Such a scenario seems to suggest that over to other contributors, +Although we believe that this particular threat is unlikely because it would require that overprofiled contributors be systematically more sophisticated than others---something our experience with ORES suggests is unlikely---we cannot rule out either the specific threat or a range of other possibilities. +A promising direction for future work might involve experiments or quasi-experiments that can jointly vary social signals and algorithmic flagging. + + +Additionally, system designers will likely want to know how overall rates of sanctioning and controversial sanctions change before and after a system such as RCFilters is launched. Unfortunately, our analysis cannot answer this question directly. +% Instead it looks at a single system to compare actions that were flagged with ones that were not. +In preliminary work, we attempted to draw a statistical comparison between Wikipedia governance before and after the introduction of ORES but high temporal variation in sanctioning behaviors made this type of aggregate change difficult to measure. Future studies should organize with communities to conduct planned and principled field experiments to study the causal effects of introducing such systems in online communities using the model being pioneered by \citet{matias_civilservant_2018}. + +Finally, a set of largely unanswerable threats involves questions of generalizability across our measures and empirical contexts. +Although our theory of interactions between algorithmic flags and social signals is general, and although we study RCFilters across 23 distinct communities, languages, and cultures, we study a single moderator tool on one platform. +We cannot claim that our findings generalize beyond the specific pool of communities that we study. +% We cannot claim that our setting is representative of other Wikipedia communities that did not launch RCFilters. +Additionally, we have considered only a small subset of possible social signals that may be used in online community moderation. +Clearly, we also cannot claim that our settings are representative of moderation in online communities in general. +% We analyze the broadest possible sample in an effort to improve generalizability beyond English Wikipedia alone. Wikipedia language communities adopted ORES according to their perceived needs and their ability to label training data. +Like most other empirical studies in social computing, we must sadly leave these questions for further research. + + + +% \subsection{Alternative Explanations} + + +% maybe newcomers with user pages are more suspect? + +\section{Discussion} +\label{sec:discussion} + +% \subsection{Flagging, overprofiling, and sanctioning} + +In the broadest strokes, our work is potentially good news for advocates of algorithmic flagging in social computing systems. It provides some evidence supporting the idea that algorithmic flagging can reduce discrimination in the form of overprofiling bias and that it can increase fairness. Our adoption check (§\ref{sec:adoption}) provides strong evidence that RCFilters drives behavior and our answers to RQ1 (§\ref{sec:results-rq1}) suggests that flagging can level the playing field by increasing decision system demographic parity between unregistered and registered Wikipedia editors. Flagged edits by these contributors are reverted at similar rates, but unflagged edits of comparable quality by registered editors are reverted relatively infrequently. +More good news comes in the form of our answer to RQ2 (§\ref{sec:results-rq2}) that suggests that flagging is associated with a decrease in controversial sanctions among some overprofiled users and provides evidence that algorithmic flagging systems can help moderators more accurately issue sanctions. + + +% In \textbf{RQ2} we asked how algorithmic flagging might change how fairly overprofiled editors are treated by moderators in terms of sanctions against moderators for violating meta-norms. We consider two competing hypotheses about how algorithmic flags would affect fairness of norm enforcement reflected by sanctions against meta-norm violations. +% Dual-process theories of behavioral economics might suggest that algorithmic flags might act as salient signals cuing Wikipedia moderators to issue sanctions in violation of meta-norms \cite{bordalo_salience_2012}. Such an application of dual-process theories to the situation of Wikipedia moderation intentionally takes a narrow perspective focusing on interaction between a human and computer interface \cite{frey_designing_2019}. Taking a broader institutional view that emphasizes not the cognitive biases that might lead moderators to issue sanctions irrationally but the multiple levels of bureaucracy and norms that structure moderation work instead suggested that algorithmic triage systems might function as a ``carrier of formal rationality'' by helping moderators find and revert misbehavior in compliance with meta-norms \cite{frey_designing_2019, lindebaum_insights_2019}. An increase in sanctions compliant with meta-norms naturally decreases in the proportion of sanctions that violate meta-norms. This model predicted and we found that that flagging decreases the incidence of controversial sanctions for IP editors. We take this as evidence that flags can help moderators more accurately issue sanctions. + +When it comes to the details, however, the picture that emerges from our results is much more contingent and mixed. Our analysis used two different measures of overprofiling in Wikipedia but the pattern of our results diverged substantially between the two. The optimistic story about the effects of algorithmic flagging on overprofiled users only describes our results for unregistered Wikipedia users. Our evidence on overprofiled users without user pages is much weaker and points, in part, in the direction of algorithmic flagging increasing discrimination. Why do these results diverge? What do these divergent results mean for theory? + +One possible explanation is that editors without user pages are, quite simply, not particularly overprofiled. Of the two social signals we consider, registration status attracts far more attention from academics and community members in discussions of Wikipedia vandalism \citep[e.g.,][]{hill_hidden_2020}. +% Sources such as \citet{broughton_wikipedia_2008} who suggest that red links are signals useful to Wikipedia moderators are more than a decade old and may not reflect widespread practices on Wikipedia today. +Our analysis for RQ2, where we did not detect changes in controversial sanctions for editors without user pages, is also consistent with the notion that contributors without user pages may not be overprofiled. +If algorithmic flagging systems help moderators more accurately issue sanctions by reducing overprofiling, then flagging would not decrease controversial sanctioning for editors that are not overprofiled. +However, this alone does not explain why the effect for editors without profile pages was larger than for editors with them. + +Our results might be explained if model bias against contributors without user pages means that the set of flagged edits from these users are less damaging than flagged edits by contributors who have profile pages. +As discussed in §\ref{sec:threats} and documented in our online supplement, ORES models are sometimes biased against contributors without user pages, but they are even more biased against anonymous contributors. +Our results make sense if the overprofiling of anonymous editors outweighs model bias against them, but the reverse is true for editors without user pages. + +% This might be enough to drive our results assuming that moderators do not substantially overprofile editors without User pages. Then flagged edits by both kinds of users might be inspected by moderators at similar rates and model bias may lead moderators to more harshly sanction flagged edits by contributors without user pages. While flagged edits by contributors with profile pages are even more likely to be damaging than similarly flagged edits by those without profile pages, +% this difference will be even greater between edits by registered and unregistered editors. + +It is also plausible that our mixed results are evidence that algorithmic flags will substitute for some social signals used in overprofiling while reinforcing others. Our study analyzes only two of many possible social signals that online community moderators might use. A better understanding of which signals drive sanctioning misbehavior can help explain if and when algorithmic triage systems can increase fairness. +Our results suggest that algorithmic flags can substitute for some social signals and reduce overprofiling in online community moderation. Our results also suggest that they might reinforce social signals, make overprofiling worse, or introduce new forms of unfairness through encoded bias. +Unfortunately, outcomes resulting from myriad factors acting at once +% including overprofiling, social signals, meta-norms, and algorithmic bias, +are likely contingent on details of sociotechnical arrangements and difficult to know \textit{ex ante}. + +Although RQ2 suggests that algorithmic flagging can increase fairness for overprofiled contributors, our null results for RQ3 mean that we could not detect a difference in this effect between overprofiled editors and others. +Uncertainty in our models for RQ3 is high enough that parameter values consistent with a substantive average effect that is either positive or negative are plausible. +A null effect for RQ3 might also be explained if meta-norms and improved information are more important to controversial sanctioning than bias introduced by algorithmic flags or social signals acting as cues. + +% New approaches to measuring normative and meta-normative compliance in online communities may reflect a promising area for future work. +% This points to methodological limitations in our use of controversial reverts as a measure of fairness. There are only Ultimately, controversial reverts just too rare---especially for registered contributors with User pages---for us to be confident in our estimates. +% Part of this problem is also theoretical, for we attempted to apply coarse psychological frameworks of dual-process models to the wild and complex domain of meta-norm enforcement on Wikipedia where many unobserved factors may affect what sanctions are controversial. + + + + +% We ask in \textbf{RQ1} if algorithmic triage systems can improve fairness by reducing the over-profiling of contributors displaying social signals associated with misbehavior. We propose that moderators pay attention to the unflagged actions of over-profiled contributors, but mainly pay attention to actions by underprofiled contributors when they are flagged. As a result, we hypothesized that algorithmic flagging will have a greater effect on sanctioning for underprofiled contributors than for over-profiled ones. Our analysis of over-profiling based on registration status supports this conjecture, but our analysis based on user profile creation does not. +% We interpret our findings for \textbf{RQ1} as evidence that algorithmic flags can improve treatment of over-profiled users by substituting for social signals in routing moderator attention. At least in the case of registration status, we found that +% \subsection{Flagging and controversial sanctioning} +% \subsection{Over-profiling and controversial sanctioning} +% We found evidence in \textbf{RQ1} suggesting that algorithmic flagging can reduce over-profiling and conjectured in \textbf{RQ2} that algorithmic flags might act as salient signals that nudging moderators to make controversial sanctions. In \textbf{RQ3} we considered if such changes in controversial sanctioning affect over-profiled contributors more or less than underprofiled contributors. +% As we found in \textbf{RQ2} that algorithmic flagging instead \emph{decreases} controversial sanctioning in the case of unregistered editors, we believe that dual-process models may have little predictive power in institutionalized settings like Wikipedia moderation. + +% \subsection{Design implications} +% \label{sec:design.implications} +% Halfak says to cite de laat in this paragraph. + +Our work has several important implications for designers of algorithmic flagging systems and sociotechnical systems. +Scholars of human computer interaction, science and technology studies, and the law have all called for analyses of algorithmic fairness to move beyond biases inherent in algorithms to consider the systemic and downstream effects of algorithms in use \citep{selbst_fairness_2019, stevenson_assessing_2017, zhu_value-sensitive_2018}. +% We use decision system fairness metrics as tools for evaluating how an algorithmic system shapes the fair treatment of contributors to an online encyclopedia. +Ultimately, we recommend that operators of algorithmic flagging systems should continuously evaluate decision system fairness metrics and seek to improve them according to their values. In that the ORES model is, itself, biased against overprofiled users, our results suggest that evaluating the fairness of model predictions is only one piece of understanding how an algorithmic system shapes fairness in contexts such as online community moderation. + +Future work should rigorously construct and critique decision system fairness criteria in terms of their consequences. +The algorithmic fairness literature often treats algorithmic predictions as equivalent to final decisions. +Our work shows that sociotechnical decision systems with humans in the loop face distinctive and contextually sensitive epistemic, ontological, and ethical questions about how decision system fairness should be defined or measured \citep{kleinberg_human_2018, selbst_fairness_2019}. + +Decision system fairness is particularly important in open production communities such as Wikipedia because of the trade-offs between quality control and the essential tasks of supporting newcomers and encouraging contribution \citep{halfaker_rise_2013, morgan_tea_2013}. +Past work has shown that increased quality control efforts correspond to a decrease in newcomer engagement and have hypothesized that one mechanism is increased scrutiny of newcomers \citep{halfaker_rise_2013, teblunthuis_revisiting_2018}. Similarly, although blocking anonymous edits to wikis has shown been shown to cause a decrease in reverted edits, it also leads to a decrease in positive contributions \citep{hill_hidden_2020}. While it may be intuitive to think about edits that get sanctioned as obvious vandalism, many of the edits flagged at the ``maybe damaging'' threshold are authored by well-meaning newcomers \citep{halfaker_rise_2013}. There's a potentially high cost to sanctioning these low quality but well-intentioned contributions. We believe that our results point to the benefit of tracking changes in the rate of sanctions to sensitive groups of community members in order to assure that such well-meaning contributors are not being driven away. + +There are also lessons to learned from the impressive degree with which RCFilters shapes behavior. +Although the choice of operating points in algorithmic systems is often framed as purely about trading off precision and recall, our work demonstrates that these choices can have a range of other important consequences. +Our disparate findings at the ``very likely damaging'' threshold for overprofiling based on registration status reveal that an algorithmic tool might improve fairness at a given operating point but decrease it at another. +Although thresholds allowed us to explore the effects of flagging on sanctioning behavior, this arbitrary flagging of actions applied by RCFilters brought disproportionate attention to contributions just above the thresholds compared to contributions just below. Designers should think about whether using thresholds to trigger flagging in moderation interfaces is a fair practice at all. Our results show that this leads to sanctioning behavior that is, like the thresholds, arbitrary. + +What types of designs might support quality control support models that scrutinize contributions in proportion to the likelihood that the contributions deserve to be sanctioned? We see some inspiration in Huggle, a counter-vandalism tool for Wikipedia which sorts actions by the likelihood that they are damaging.\footnote{See discussion in \citep{halfaker_snuggle:_2014}} Huggle users are encouraged to review the highest likelihood edits first and only move onto lower likelihood edits once those reviews are complete. Such a user experience might increase efficiency and fairness by better concentrating moderator attention wherever it can have the greatest benefits. + +% TODO: Nate rephrases this the way that he is thinking about it. +% NOTE: this doesn't feel like implication for designs. it's kinda a limitation? or maybe a generalizability thing? or general discussion? it's a good paragraph but i don't quite see where it belongs -mako +% Our analysis was informed by considerable prior work documenting Wikipedian practices and institutions that allowed us to pose our research questions and make sense of our results. In particular, knowledge of the over-profiling of unregistered Wikipedia contributors led us to posit conditions where algorithmic triage can improve fairness. Understanding meta-norms on Wikipedia helped us hypothesize why flagging might decrease controversial sanctions for over-profiled contributors. System designers working in less well understood contexts will need to build a baseline understanding of relevant social signals, meta-norms, and institutions involved in content moderation in order to know what questions to ask about fairness. + + +\section{Conclusion} + +As algorithmic flagging becomes more integrated into online community moderation, it is important to understand its effects and consequences on overprofiling and fairness. +We use a regression discontinuity analysis of the RCFilters to find and sanction misbehavior by volunteers on Wikipedia to consider how the use of algorithmic flagging and social signals interact. +We find that by drawing moderator attention to misbehavior by registered participants, algorithmic flagging can reduce overprofiling in certain contexts. +% Unflagged edits by registered Wikipedia contributors are sanctioned much less often than edits by unregistered contributors, but flagging edits causes their edits to be reverted at more similar rates. +We also find that algorithmic flagging can support fairness by decreasing controversial sanctions of unregistered contributors. Our results also suggest that the same system may have much less effect, and might even increase discrimination, for other types of overprofiled users. + +Studies of machine learning in high-stakes settings like employment, education, and criminal justice trace how algorithms can encode discriminatory patterns in human behavior but might also improve fairness compared with human biases. Although the stakes are much lower, such questions are also pertinent to the use of machine predictions for online community moderation. We find that tools for predictive governance in a sociotechnical system can reduce overprofiling but their effects are also difficult to anticipate. % While the stakes in online moderation are very different, online communities provide a real-world setting where similar social and psychological processes may be at work. + +Although our analysis of overprofiling based on registration status supports a rosy account of algorithmic flagging, our analysis of overprofiling based on user pages does not. While contributors without user pages may be less overprofiled compared to unregistered contributors, our results also +% , a social signal that Wikipedia moderators associate with newcomer status +suggest that the interaction between algorithmic flagging and social signals is complex and contingent. +We suggest a need for future work that describes the kinds of social signals that are used in practice and explains how different types of information may be used alongside algorithmic flags. Finally, we present a methodological approach that we hope future studies of algorithmic tools in real-world sociotechnical systems might build upon to establish the causal effects of algorithmic systems without experimental intervention. + + +% critiques of algorithmic fairness or discrimination +% machine learning practitioners pursue methods for building algorithms Field studies of such systems that are deployed and used in the wild, as we do in this study of the RCFilters/ORES system on Wikipedia. Our research design based on regression discontinuity causal + +% Based on the logic of ``overprofiling,'' statistical discrimination, and salient signals, we proposed that Wikipedia moderators would rely more on algorithmic flags to guide them to discover damaging edits by users that are not already over-profiled. Instead we found little difference in the effect of flagging on the likelihood of reversion between registered and IP editors. + +% What explains this surprising result? One possibility is that Wikipedians do not actually ``over-profile'' IP editors at all. If Wikipedians already scrutinize edits by IP editors and by registered users equally, then we would not expect to find a difference in how flagging effects reversion in ways associated with editor type. This explanation is dubious since Wikipedians are thought to be highly suspicious of anonymous editors. + +% However, if the availability of an algorithmic flag obviates the need for statistical discrimination against ``over-profiled'' editors, then Wikipedians may use only signals from the algorithmic flagging system instead of using algorithmic flags alongside social signals. This explanation is encouraging for it suggests that introducing algorithmic predictions into governance systems can reduce statistical discrimination. + +\section*{acknowldgements} + +We are grateful to the anonymous CSCW reviewers and associate chairs for their keen insights and feedback. We would also like to thank the Wikimedia Foundation for its support, members of the WMF analytics team including Andrew Otto, Luca Toscano, and Joal Allemandou for help with data access and computing infrastructure and members of the WMF research team including Jonathan Morgan for feedback early in project development. +Thanks also go to members of the Community Data Science Collective who provided multiple rounds of feedback and contributed to copyediting including Kaylea Champion, Charles Kiene, Stefania Druga, Sohyeon Hwang, Jeremy Foote, and Aaron Shaw. +We also thank the WMF staff and volunteers who developed the systems we analyze including Roan Kattouw, the main developer of RCFilters, and the developers of ORES including Amir Sarabadani and Andy Craze. Special thanks to Amanda TeBlunthuis. Finally we owe an extra special thanks to the Wikipedia contributors whose digital traces we analyze. +Portions of this work were facilitated through the use of advanced computational, storage, and networking infrastructure provided by the Hyak supercomputer system at the University of Washington. Financial support for this work came from the Wikimedia Foundation, from the National Science Foundation graduate research fellowship program \#2016220885, and from the University of Washington. + + +\section*{Data Access} +A replication dataset including ORES scores, thresholds, and our sample of Wikipedia revisions, along with all of our code has been placed in the Harvard Dataverse archive and is available at the following URL: \url{https://doi.org/10.7910/DVN/E0RYJ4} + +\setcounter{biburlnumpenalty}{9001} +\printbibliography[title = {References}, heading=secbib] + + +% LOCAL_WORDS: decile +% : an increases in odds by a factor of $round(exp(mean(tau.2)),2)$ $get.CI.str(tau.2,transform.f=exp)$. +% We believe that we do not observe any increase in the likelihood of sanctioning at the ``very likely damaging'' level because actions flagged as ``very likely damaging'' are also flagged as ``likely damaging'' in the RCFilters' default configuration. +% % and because most revisions flagged as ``very likely damaging'' +% As a result, the marginal impact of being flagged as ``very likely damaging'' on visibility is likely very small. +% Moreover, edits flagged as ``very likely damaging'' are often so egregious that they will be reverted by bots before a human moderator can review them. +% We believe this is likely the case because only the ``likely damaging'' and ``very likely damaging'' levels but not the ``maybe damaging'' level are enabled by default. So if the ``very likely damaging'' filters or flags are visible then probably so will ``likely damaging'' flags and filters. +% We believe this is likely because filtering at the ``very likely damaging'' threshold alone when patrolling recent changes on English Wikipedia was not very useful as few edits cross this threshold, and those that do are frequently reverted by bots before a human editor can perform the revert. +% Racing the bots to revert obvious damage seems like less useful and rewarding work compared to enabling the ``maybe damaging'' threshold to surface more ambiguous edits requiring human judgment to review. +% NOTE: totally redundant -mako +% From this analysis we conclude that the RCFilters system powered by ORES was put in to use by Wikipedians and has a detectable influence on which actions are sanctioned. +% The large discontinuous increases in sanctioning we observe have important implications for design of sociotechnical systems that use algorithmic predictions to guide human attention which we discuss in §\ref{sec:design.implications}. We did not detect flagging effects at the ``very likely damaging'' threshold and therefore we exclude this threshold from our subsequent hypothesis tests. diff --git a/dissertations/nathante_uw_2021/articlequality.bib b/dissertations/nathante_uw_2021/articlequality.bib new file mode 100644 index 0000000..415fcab --- /dev/null +++ b/dissertations/nathante_uw_2021/articlequality.bib @@ -0,0 +1,1047 @@ + +@inproceedings{adler_content-driven_2007, + title = {A Content-Driven Reputation System for the {{Wikipedia}}}, + booktitle = {Proceedings of the 16th {{International Conference}} on {{World Wide Web}}}, + author = {Adler, B. Thomas and {de Alfaro}, Luca}, + year = {2007}, + series = {{{WWW}} '07}, + pages = {261--270}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {We present a content-driven reputation system for Wikipedia authors. In our system, authors gain reputation when the edits they perform to Wikipedia articles are preserved by subsequent authors, and they lose reputation when their edits are rolled back or undone in short order. Thus, author reputation is computed solely on the basis of content evolution; user-to-user comments or ratings are not used. The author reputation we compute could be used to flag new contributions from low-reputation authors, or it could be used to allow only authors with high reputation to contribute to controversialor critical pages. A reputation system for the Wikipedia could also provide an incentive for high-quality contributions. We have implemented the proposed system, and we have used it to analyze the entire Italian and French Wikipedias, consisting of a total of 691, 551 pages and 5, 587, 523 revisions. Our results show that our notion of reputation has good predictive value: changes performed by low-reputation authors have a significantly larger than average probability of having poor quality, as judged by human observers, and of being later undone, as measured by our algorithms.}, + isbn = {978-1-59593-654-7} +} + +@inproceedings{anderka_breakdown_2012, + title = {A Breakdown of Quality Flaws in {{Wikipedia}}}, + booktitle = {Proceedings of the {{2Nd Joint WICOW}}/{{AIRWeb Workshop}} on {{Web Quality}}}, + author = {Anderka, Maik and Stein, Benno}, + year = {2012}, + series = {{{WebQuality}} '12}, + pages = {11--18}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {The online encyclopedia Wikipedia is a successful example of the increasing popularity of user generated content on the Web. Despite its success, Wikipedia is often criticized for containing low-quality information, which is mainly attributed to its core policy of being open for editing by everyone. The identification of low-quality information is an important task since Wikipedia has become the primary source of knowledge for a huge number of people around the world. Previous research on quality assessment in Wikipedia either investigates only small samples of articles, or else focuses on single quality aspects, like accuracy or formality. This paper targets the investigation of quality flaws, and presents the first complete breakdown of Wikipedia's quality flaw structure. We conduct an extensive exploratory analysis, which reveals (1) the quality flaws that actually exist, (2) the distribution of flaws in Wikipedia, and (3) the extent of flawed content. An important finding is that more than one in four English Wikipedia articles contains at least one quality flaw, 70\% of which concern article verifiability.}, + isbn = {978-1-4503-1237-0}, + file = {/home/nathante/Zotero/storage/TLV8BC38/Anderka_Stein_2012_A breakdown of quality flaws in Wikipedia.pdf} +} + +@inproceedings{anderka_predicting_2012, + title = {Predicting Quality Flaws in User-Generated Content: The Case of {{Wikipedia}}}, + shorttitle = {Predicting Quality Flaws in User-Generated Content}, + booktitle = {Proceedings of the 35th {{International ACM SIGIR Conference}} on {{Research}} and {{Development}} in {{Information Retrieval}}}, + author = {Anderka, Maik and Stein, Benno and Lipka, Nedim}, + year = {2012}, + series = {{{SIGIR}} '12}, + pages = {981--990}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {The detection and improvement of low-quality information is a key concern in Web applications that are based on user-generated content; a popular example is the online encyclopedia Wikipedia. Existing research on quality assessment of user-generated content deals with the classification as to whether the content is high-quality or low-quality. This paper goes one step further: it targets the prediction of quality flaws, this way providing specific indications in which respects low-quality content needs improvement. The prediction is based on user-defined cleanup tags, which are commonly used in many Web applications to tag content that has some shortcomings. We apply this approach to the English Wikipedia, which is the largest and most popular user-generated knowledge source on the Web. We present an automatic mining approach to identify the existing cleanup tags, which provides us with a training corpus of labeled Wikipedia articles. We argue that common binary or multiclass classification approaches are ineffective for the prediction of quality flaws and hence cast quality flaw prediction as a one-class classification problem. We develop a quality flaw model and employ a dedicated machine learning approach to predict Wikipedia's most important quality flaws. Since in the Wikipedia setting the acquisition of significant test data is intricate, we analyze the effects of a biased sample selection. In this regard we illustrate the classifier effectiveness as a function of the flaw distribution in order to cope with the unknown (real-world) flaw-specific class imbalances. The flaw prediction performance is evaluated with 10,000 Wikipedia articles that have been tagged with the ten most frequent quality flaws: provided test data with little noise, four flaws can be detected with a precision close to 1.}, + isbn = {978-1-4503-1472-5}, + file = {/home/nathante/Zotero/storage/BQDLM6XK/Anderka et al_2012_Predicting quality flaws in user-generated content.pdf} +} + +@inproceedings{anderka_towards_2011, + title = {Towards {{Automatic Quality Assurance}} in {{Wikipedia}}}, + booktitle = {Proceedings of the 20th {{International Conference Companion}} on {{World Wide Web}}}, + author = {Anderka, Maik and Stein, Benno and Lipka, Nedim}, + year = {2011}, + series = {{{WWW}} '11}, + pages = {5--6}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Featured articles in Wikipedia stand for high information quality, and it has been found interesting to researchers to analyze whether and how they can be distinguished from "ordinary" articles. Here we point out that article discrimination falls far short of writer support or automatic quality assurance: Featured articles are not identified, but are made. Following this motto we compile a comprehensive list of information quality flaws in Wikipedia, model them according to the latest state of the art, and devise one-class classification technology for their identification.}, + isbn = {978-1-4503-0637-9}, + file = {/home/nathante/Zotero/storage/D4757WKM/Anderka et al_2011_Towards Automatic Quality Assurance in Wikipedia.pdf} +} + +@inproceedings{antelio_qualitocracy_2012, + title = {Qualitocracy: {{A}} Data Quality Collaborative Framework Applied to Citizen Science}, + shorttitle = {Qualitocracy}, + author = {Antelio, Marcio and Esteves, Maria Gilda P. and Schneider, Daniel and de Souza, Jano Moreira}, + year = {2012}, + month = oct, + pages = {931--936}, + publisher = {{IEEE}}, + isbn = {978-1-4673-1714-6 978-1-4673-1713-9 978-1-4673-1712-2}, + file = {/home/nathante/Zotero/storage/IURBKUZP/Antelio et al_2012_Qualitocracy.pdf} +} + +@inproceedings{arazy_determinants_2010, + title = {Determinants of Wikipedia Quality: The Roles of Global and Local Contribution Inequality}, + shorttitle = {Determinants of Wikipedia Quality}, + booktitle = {Proceedings of the 2010 {{ACM}} Conference on {{Computer}} Supported Cooperative Work}, + author = {Arazy, Ofer and Nov, Oded}, + year = {2010}, + month = feb, + series = {{{CSCW}} '10}, + pages = {233--236}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {The success of Wikipedia and the relative high quality of its articles seem to contradict conventional wisdom. Recent studies have begun shedding light on the processes contributing to Wikipedia's success, highlighting the role of coordination and contribution inequality. In this study, we expand on these works in two ways. First, we make a distinction between global (Wikipedia-wide) and local (article-specific) inequality and investigate both constructs. Second, we explore both direct and indirect effects of these inequalities, exposing the intricate relationships between global inequality, local inequality, coordination, and article quality. We tested our hypotheses on a sample of a Wikipedia articles using structural equation modeling and found that global inequality exerts significant positive impact on article quality, while the effect of local inequality is indirect and is mediated by coordination}, + isbn = {978-1-60558-795-0}, + keywords = {contribution inequality,coordination,global inequality,information quality,local inequality,wikipedia}, + file = {/home/nathante/Zotero/storage/WDJ4APS7/Arazy_Nov_2010_Determinants of wikipedia quality.pdf} +} + +@article{arazy_evolutionary_2019, + title = {The Evolutionary Trajectories of Peer-Produced Artifacts: {{Group}} Composition, the Trajectories' Exploration, and the Quality of Artifacts}, + shorttitle = {The Evolutionary Trajectories of Peer-Produced Artifacts}, + author = {Arazy, Ofer and Lindberg, Aron and Rezaei, Mostafa and Samorani, Michele}, + year = {2019}, + month = dec, + journal = {MIS Quarterly}, + abstract = {Members of an online community peer-produce digital artifacts by negotiating different perspectives and personal knowledge bases. These negotiations are manifested in the temporal evolution of the peer-produced artifact. In this study we conceptualize the evolution of a digital artifact as a trajectory in a feature space. Our theoretical frame suggests that through negotiations contributors' actions "pull" the trajectory and shape its movement in the feature space. We hypothesize that the type of contributors that work on a focal article influences the extent to which that article's trajectory explores alternative positions within that space, and that the trajectory's exploration is, in turn, associated with the artifact's quality. To test these hypotheses, we analyzed the trajectories of wiki articles drawn from two peer-production communities: Wikipedia and Wikia, tracking the evolution of 242 paired articles for over a decade during which the articles went through 536,745 revisions. We found that the contributors who are the most likely to increase the trajectory's exploration are those that (a) return to work on the focal artifact and (b) are unregistered members in the broader online community Further, our results show that the trajectory's exploration has a curvilinear association with article quality, indicating that exploration contributes positively to quality, but that the effect is reversed when exploration exceeds a certain level. The insights derived from this study highlight the value of an artifact-centric approach to increasing our understanding of the dynamics underlying peer-production.}, + keywords = {peer production,wikia}, + file = {/home/nathante/Zotero/storage/ZGMAGR5H/Arazy et al_2019_The evolutionary trajectories of peer-produced artifacts.pdf} +} + +@article{asthana_few_2018, + title = {With {{Few Eyes}}, {{All Hoaxes Are Deep}}}, + author = {Asthana, Sumit and Halfaker, Aaron}, + year = {2018}, + month = nov, + journal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + number = {CSCW}, + pages = {21:1--21:18}, + issn = {2573-0142}, + abstract = {Quality control is critical to open production communities like Wikipedia. Wikipedia editors enact border quality control with edits (counter-vandalism) and new article creations (new page patrolling) shortly after they are saved. In this paper, we describe a long-standing set of inefficiencies that have plagued new page patrolling by drawing a contrast to the more efficient, distributed processes for counter-vandalism. Further, to address this issue, we demonstrate an effective automated topic model based on a labeling strategy that leverages a folksonomy developed by subject specific working groups in Wikipedia (WikiProject tags) and a flexible ontology (WikiProjects Directory) to arrive at a hierarchical and uniform label set. We are able to attain very high fitness measures (macro ROC-AUC: 95.2\%, macro PR-AUC: 74.5\%) and real-time performance using word2vec-based features. Finally, we present a proposal for how incorporating this model into current tools will shift the dynamics of new article review positively.}, + file = {/home/nathante/Zotero/storage/EM6Z9WPQ/Asthana and Halfaker - 2018 - With Few Eyes, All Hoaxes Are Deep.pdf} +} + +@book{ayers_how_2008, + title = {How {{Wikipedia}} Works and How You Can Be a Part of It}, + author = {Ayers, Phoebe and Matthews, Charles and Yates, Ben}, + year = {2008}, + publisher = {{No Starch Press}}, + address = {{San Francisco, CA}}, + abstract = {"In How Wikipedia Works, you'll learn the skills required to use and contribute to the world's largest reference work - like what constitutes good writing and research and how to work with images and templates."--Jacket.}, + isbn = {978-1-59327-227-2}, + langid = {english} +} + +@techreport{band_wikipedias_2013, + type = {{{SSRN Scholarly Paper}}}, + title = {Wikipedia's {{Economic Value}}}, + author = {Band, Jonathan and Gerafi, Jonathan}, + year = {2013}, + month = oct, + address = {{Rochester, NY}}, + institution = {{Social Science Research Network}}, + abstract = {In the copyright policy debate, proponents of strong copyright protection tend to be dismissive of the quality of freely available content. In response to counter-examples such as open access scholarly publications and advertising-supported business models (e.g., newspaper websites and the over-the-air television broadcasts viewed by 50 million Americans), the strong copyright proponents center their attack on amateur content. In this narrative, YouTube is for cat videos and Wikipedia is a wildly unreliable source of information.}, + langid = {english}, + keywords = {Jonathan Band,Jonathan Gerafi,SSRN,Wikipedia's Economic Value}, + file = {/home/nathante/Zotero/storage/4Z3W8LKV/Band_Gerafi_2013_Wikipedia's Economic Value.pdf;/home/nathante/Zotero/storage/KDSXLL2E/papers.html} +} + +@inproceedings{biancani_measuring_2014, + title = {Measuring the {{Quality}} of {{Edits}} to {{Wikipedia}}}, + booktitle = {Proceedings of {{The International Symposium}} on {{Open Collaboration}}}, + author = {Biancani, Susan}, + year = {2014}, + series = {{{OpenSym}} '14}, + pages = {33:1--33:3}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Wikipedia is unique among reference works both in its scale and in the openness of its editing interface. The question of how it can achieve and maintain high-quality encyclopedic articles is an area of active research. In order to address this question, researchers need to build consensus around a sensible metric to assess the quality of contributions to articles. This measure must not only reflect an intuitive concept of "quality," but must also be scalable and run efficiently. Building on prior work in this area, this paper uses human raters through Amazon Mechanical Turk to validate an efficient, automated quality metric.}, + isbn = {978-1-4503-3016-9}, + file = {/home/nathante/Zotero/storage/WHG7AUHK/Biancani_2014_Measuring the Quality of Edits to Wikipedia.pdf} +} + +@inproceedings{blumenstock_size_2008, + title = {Size Matters: Word Count as a Measure of Quality on Wikipedia}, + shorttitle = {Size Matters}, + booktitle = {Proceeding of the 17th International Conference on {{World Wide Web}} - {{WWW}} '08}, + author = {Blumenstock, Joshua E.}, + year = {2008}, + pages = {1095}, + publisher = {{ACM Press}}, + address = {{Beijing, China}}, + abstract = {Wikipedia, ``the free encyclopedia'', now contains over two million English articles, and is widely regarded as a highquality, authoritative encyclopedia. Some Wikipedia articles, however, are of questionable quality, and it is not always apparent to the visitor which articles are good and which are bad. We propose a simple metric \textendash{} word count \textendash for measuring article quality. In spite of its striking simplicity, we show that this metric significantly outperforms the more complex methods described in related work.}, + isbn = {978-1-60558-085-2}, + langid = {english}, + file = {/home/nathante/Zotero/storage/I8L8VT29/Blumenstock_2008_Size matters.pdf} +} + +@article{burkner_brms_2017, + title = {Brms: {{An R Package}} for {{Bayesian Multilevel Models Using Stan}}}, + shorttitle = {Brms}, + author = {B{\"u}rkner, Paul-Christian}, + year = {2017}, + month = aug, + journal = {Journal of Statistical Software}, + volume = {80}, + number = {1}, + pages = {1--28}, + issn = {1548-7660}, + copyright = {Copyright (c) 2017 Paul-Christian B\"urkner}, + langid = {english}, + keywords = {Bayesian inference,MCMC,multilevel model,ordinal data,R,Stan}, + file = {/home/nathante/Zotero/storage/XDUCKTG7/Bürkner_2017_brms.pdf;/home/nathante/Zotero/storage/LJXX4II6/v080i01.html} +} + +@article{burkner_ordinal_2019, + title = {Ordinal {{Regression Models}} in {{Psychology}}: {{A Tutorial}}}, + shorttitle = {Ordinal {{Regression Models}} in {{Psychology}}}, + author = {B{\"u}rkner, Paul-Christian and Vuorre, Matti}, + year = {2019}, + month = mar, + journal = {Advances in Methods and Practices in Psychological Science}, + volume = {2}, + number = {1}, + pages = {77--101}, + publisher = {{SAGE Publications Inc}}, + issn = {2515-2459}, + abstract = {Ordinal variables, although extremely common in psychology, are almost exclusively analyzed with statistical models that falsely assume them to be metric. This practice can lead to distorted effect-size estimates, inflated error rates, and other problems. We argue for the application of ordinal models that make appropriate assumptions about the variables under study. In this Tutorial, we first explain the three major classes of ordinal models: the cumulative, sequential, and adjacent-category models. We then show how to fit ordinal models in a fully Bayesian framework with the R package brms, using data sets on opinions about stem-cell research and time courses of marriage. The appendices provide detailed mathematical derivations of the models and a discussion of censored ordinal models. Compared with metric models, ordinal models provide better theoretical interpretation and numerical inference from ordinal data, and we recommend their widespread adoption in psychology.}, + langid = {english}, + keywords = {brms,Likert items,open data,open materials,ordinal models,R}, + file = {/home/nathante/Zotero/storage/TQJGFWGD/Bürkner_Vuorre_2019_Ordinal Regression Models in Psychology.pdf} +} + +@article{cardoso_learning_2007, + title = {Learning to {{Classify Ordinal Data}}: {{The Data Replication Method}}}, + author = {Cardoso, Jaime S and Cardoso, Jaime and Pt, Inescporto}, + year = {2007}, + journal = {Journal of Machine Learning Research}, + volume = {8}, + pages = {37}, + abstract = {Classification of ordinal data is one of the most important tasks of relation learning. This paper introduces a new machine learning paradigm specifically intended for classification problems where the classes have a natural order. The technique reduces the problem of classifying ordered classes to the standard two-class problem. The introduced method is then mapped into support vector machines and neural networks. Generalization bounds of the proposed ordinal classifier are also provided. An experimental study with artificial and real data sets, including an application to gene expression analysis, verifies the usefulness of the proposed approach.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FKNYFLDN/Cardoso et al. - Learning to Classify Ordinal Data The Data Replic.pdf} +} + +@phdthesis{champion_production_2019, + type = {Master of {{Arts Thesis}}}, + title = {Production Misalignment: A Threat to Public Knowledge}, + shorttitle = {Production Misalignment}, + author = {Champion, Kaylea}, + year = {2019}, + address = {{Seattle, Washington}}, + abstract = {On Wikipedia, when a high-interest topic is poorly-covered\textemdash either it is incomplete or contains inaccurate information\textemdash public knowledge is threatened. Contributors on Wikipedia are volunteers: they're not assigned to track consumer demand, and they choose their own tasks. When contributor interest doesn't align with consumer interest, the result is termed ``underproduction''\textemdash some widely consumed materials are low quality. Past research has found competing explanations for what motivates volunteers to work on particular articles, including attempts to solve their own problems and supporting project goals. I theorize that social rewards explain task selection for moderate to high levels of experience, although this trend attenuates at the highest level of experience. Using a detailed longitudinal dataset, I find support for this theory in three ways. First, that although they are a minority of contributors, persistent contributors drive what gets produced. Second, as contributors persist, they are less likely to contribute to underproduced materials, but this trend flattens over time as predicted.Third, this pattern is weaker among contributors who do not create accounts.}, + copyright = {CC BY}, + langid = {american}, + school = {University of Washington}, + annotation = {Accepted: 2020-02-04T19:25:32Z}, + file = {/home/nathante/Zotero/storage/WQWDRFAW/45156.html} +} + +@article{champion_underproduction_2021, + title = {Underproduction: {{An}} Approach for Measuring Risk in Open Source Software}, + author = {Champion, Kaylea and Hill, Benjamin Mako}, + year = {2021}, + month = feb, + journal = {IEEE International Conference on Software Analysis, Evolution and Reengineering}, + eprint = {2103.00352}, + eprinttype = {arxiv}, + primaryclass = {cs.SE}, + abstract = {The widespread adoption of Free/Libre and Open Source Software (FLOSS) means that the ongoing maintenance of many widely used software components relies on the collaborative effort of volunteers who set their own priorities and choose their own tasks. We argue that this has created a new form of risk that we call 'underproduction' which occurs when the supply of software engineering labor becomes out of alignment with the demand of people who rely on the software produced. We present a conceptual framework for identifying relative underproduction in software as well as a statistical method for applying our framework to a comprehensive dataset from the Debian GNU/Linux distribution that includes 21,902 source packages and the full history of 461,656 bugs. We draw on this application to present two experiments: (1) a demonstration of how our technique can be used to identify at-risk software packages in a large FLOSS repository and (2) a validation of these results using an alternate indicator of package risk. Our analysis demonstrates both the utility of our approach and reveals the existence of widespread underproduction in a range of widely-installed software components in Debian.}, + archiveprefix = {arXiv} +} + +@book{chang_inventing_2004, + title = {Inventing Temperature.}, + author = {Chang, Hasok}, + year = {2004}, + publisher = {{OUP}}, + address = {{Oxford}}, + isbn = {978-0-19-517127-3}, + langid = {english}, + annotation = {OCLC: 538097673} +} + +@inproceedings{dang_quality_2016, + title = {Quality {{Assessment}} of {{Wikipedia Articles Without Feature Engineering}}}, + booktitle = {Proceedings of the 16th {{ACM}}/{{IEEE-CS}} on {{Joint Conference}} on {{Digital Libraries}}}, + author = {Dang, Quang Vinh and Ignat, Claudia-Lavinia}, + year = {2016}, + series = {{{JCDL}} '16}, + pages = {27--30}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {As Wikipedia became the largest human knowledge repository, quality measurement of its articles received a lot of attention during the last decade. Most research efforts focused on classification of Wikipedia articles quality by using a different feature set. However, so far, no ``golden feature set" was proposed. In this paper, we present a novel approach for classifying Wikipedia articles by analysing their content rather than by considering a feature set. Our approach uses recent techniques in natural language processing and deep learning, and achieved a comparable result with the state-of-the-art.}, + isbn = {978-1-4503-4229-2}, + file = {/home/nathante/Zotero/storage/KVMYP7YQ/Dang_Ignat_2016_Quality Assessment of Wikipedia Articles Without Feature Engineering.pdf} +} + +@inproceedings{druck_learning_2008, + title = {Learning to {{Predict}} the {{Quality}} of {{Contributions}} to {{Wikipedia}}}, + booktitle = {{{WikiAI}}}, + author = {Druck, Gregory and Miklau, Gerome and McCallum, Andrew}, + year = {2008}, + pages = {6}, + abstract = {Although some have argued that Wikipedia's open edit policy is one of the primary reasons for its success, it also raises concerns about quality \textemdash{} vandalism, bias, and errors can be problems. Despite these challenges, Wikipedia articles are often (perhaps surprisingly) of high quality, which many attribute to both the dedicated Wikipedia community and ``good Samaritan'' users. As Wikipedia continues to grow, however, it becomes more difficult for these users to keep up with the increasing number of articles and edits. This motivates the development of tools to assist users in creating and maintaining quality. In this paper, we propose metrics that quantify the quality of contributions to Wikipedia through implicit feedback from the community. We then learn discriminative probabilistic models that predict the quality of a new edit using features of the changes made, the author of the edit, and the article being edited. Through estimating parameters for these models, we also gain an understanding of factors that influence quality. We advocate using edit quality predictions and information gleaned from model analysis not to place restrictions on editing, but to instead alert users to potential quality problems, and to facilitate the development of additional incentives for contributors. We evaluate the edit quality prediction models on the Spanish Wikipedia. Experiments demonstrate that the models perform better when given access to content-based features of the edit, rather than only features of contributing user. This suggests that a user-based solution to the Wikipedia quality problem may not be sufficient.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/IVJQI75V/Druck et al. - Learning to Predict the Quality of Contributions t.pdf} +} + +@article{ford_beyond_2018, + title = {Beyond Notification: {{Filling}} Gaps in Peer Production Projects}, + shorttitle = {Beyond Notification}, + author = {Ford, Heather and Pensa, Iolanda and Devouard, Florence and Pucciarelli, Marta and Botturi, Luca}, + year = {2018}, + month = oct, + journal = {New Media \& Society}, + volume = {20}, + number = {10}, + pages = {3799--3817}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {In order to counter systemic bias in peer production projects like Wikipedia, a variety of strategies have been used to fill gaps and improve the completeness of the archive. We test a number of these strategies in a project aimed at improving articles relating to South Africa's primary school curriculum and find that many of the predominant strategies are insufficient for filling Wikipedia's gaps. Notifications that alert users to the existence of gaps including incomplete or missing articles, in particular, are found to be ineffective at improving articles. Only through the process of trust-building and the development of negotiated boundary objects, potential allies (institutional academics in this case) can be enrolled in the task of editing the encyclopaedia. Rather than a simple process of enrolment via notification, this project demonstrated the principles of negotiation required for engaging with new editor groups in the long-term project of filling Wikipedia's gaps}, + langid = {english}, + keywords = {Boundary objects,expertise,participation,systemic bias,Wikipedia}, + file = {/home/nathante/Zotero/storage/4QJNHJFS/Ford et al_2018_Beyond notification.pdf} +} + +@inproceedings{forte_why_2005, + ids = {forte_why_nodate-1}, + title = {Why {{Do People Write}} for {{Wikipedia}}? {{Incentives}} to {{Contribute}} to {{Open-Content Publishing}}}, + booktitle = {Proceedings of {{GROUP}}}, + author = {Forte, Andrea and Bruckman, Amy}, + year = {2005}, + pages = {6}, + abstract = {When people learn that we have spoken to individuals who spend up to 30 hours a week volunteering their time to research and write for an open-content encyclopedia, we often hear the same question: ``Why do they do it?`` The fact that this encyclopedia does not provide bylines to credit authors for their hard work makes the scenario still less fathomable. Two rounds of interviews with 22 volunteer encyclopedia writers in the fall of 2004 and spring of 2005 revealed that, in some respects, the incentive system that motivates contributions to the opencontent encyclopedia Wikipedia resembles that of the scientific community. Like scientists, contributors to Wikipedia seek to collaboratively identify and publish true facts about the world. Research on the sociology of science provides a useful touchstone for considering the incentive systems embedded in the technology and culture of online communities of collaborative authorship. In this paper we describe some of our findings in the context of Latour and Woolgar's seminal work on the incentive systems that motivate publishing scientists. We suggest that minimizing reliance on ``hard coded,`` stratified user privileges and providing indicators of engagement in desirable activities can help support the growth of incentive economies in online communities.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/G97IG7J2/Forte and Bruckman - Why Do People Write for Wikipedia Incentives to C.pdf;/home/nathante/Zotero/storage/Z84K5QDA/Forte and Bruckman - Why Do People Write for Wikipedia Incentives to C.pdf} +} + +@incollection{goodhart_problems_1984, + title = {Problems of {{Monetary Management}}: {{The UK Experience}}}, + shorttitle = {Problems of {{Monetary Management}}}, + booktitle = {Monetary {{Theory}} and {{Practice}}: {{The UK Experience}}}, + author = {Goodhart, C. A. E.}, + editor = {Goodhart, C. A. E.}, + year = {1984}, + pages = {91--121}, + publisher = {{Macmillan Education UK}}, + address = {{London}}, + abstract = {In 1971 the monetary authorities1 in the UK adopted a new approach to monetary management, a change of policy announced and described in several papers on competition and credit control. The subsequent experience of trying to operate this revised system has, however, been troublesome and at times unhappy. The purpose here is to examine certain aspects of recent monetary developments in order to illustrate a number of more general analytical themes which may have relevance among several countries.}, + isbn = {978-1-349-17295-5}, + langid = {english} +} + +@inproceedings{gorbatai_exploring_2011-1, + title = {Exploring {{Underproduction}} in {{Wikipedia}}}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Gorbatai, Andreea D.}, + year = {2011}, + series = {{{WikiSym}} '11}, + pages = {205--206}, + abstract = {Researchers have used Wikipedia data to identify a wide range of antecedents to success in collective production. But we have not yet inquired whether collective production creates those public goods which bring most value-add from a social perspective. In this poster I explore two key circumstances in which collective production can fail to respond to social need: when goods fail to attain high quality despite (1) high demand or (2) explicit designation by producers as highly important. In the context of Wikipedia. I propose first to examine articles that remain low quality, or underproduced, despite the fact they are viewed often; and second, to examine articles that remain low quality despite the fact that they were identified as important by Wikipedia contributors. This research highlights the fact that collective production needs to be examined not only by itself but also in the context of a market for goods in order to ascertain the benefits of this production form. The final version of this study will integrate data on underproduced articles with data on knowledge categories to uncover systematic patterns of underproduction at the category level and predict which categories are most in need of quality improvement. Additionally I will use in-depth qualitative methods to examine the mechanisms through which underproduction occurs in select knowledge categories to distill practical recommendations for collective production improvement.}, + isbn = {978-1-4503-0909-7}, + keywords = {collective production,social goods,underproduction} +} + +@inproceedings{halfaker_interpolating_2017, + title = {Interpolating {{Quality Dynamics}} in {{Wikipedia}} and {{Demonstrating}} the {{Keilana Effect}}}, + booktitle = {Proceedings of the 13th {{International Symposium}} on {{Open Collaboration}}}, + author = {Halfaker, Aaron}, + year = {2017}, + month = aug, + series = {{{OpenSym}} '17}, + pages = {1--9}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {For open, volunteer generated content like Wikipedia, quality is a prominent concern. To measure Wikipedia's quality, researchers have historically relied on expert evaluation or assessments of article quality by Wikipedians themselves. While both of these methods have proven effective for answering many questions about Wikipedia's quality and processes, they are both problematic: expert evaluation is expensive and Wikipedian quality assessments are sporadic and unpredictable. Studies that explore Wikipedia's quality level or the processes that result in quality improvements have only examined small snapshots of Wikipedia and often rely on complex propensity models to deal with the unpredictable nature of Wikipedians' own assessments. In this paper, I describe a method for measuring article quality in Wikipedia historically and at a finer granularity than was previously possible. I use this method to demonstrate an important coverage dynamic in Wikipedia (specifically, articles about women scientists) and offer this method, dataset, and open API to the research community studying Wikipedia quality dynamics.}, + isbn = {978-1-4503-5187-4}, + keywords = {Dataset,Interpolation,Methods,Modeling,Predictive,Quality,Wikipedia}, + file = {/home/nathante/Zotero/storage/5Q7YRJ92/Halfaker_2017_Interpolating Quality Dynamics in Wikipedia and Demonstrating the Keilana Effect.pdf} +} + +@inproceedings{halfaker_jury_2009, + title = {A {{Jury}} of {{Your Peers}}: {{Quality}}, {{Experience}} and {{Ownership}} in {{Wikipedia}}}, + shorttitle = {A {{Jury}} of {{Your Peers}}}, + booktitle = {Proceedings of the 5th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Halfaker, Aaron and Kittur, Aniket and Kraut, Robert and Riedl, John}, + year = {2009}, + series = {{{WikiSym}} '09}, + pages = {15:1--15:10}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Wikipedia is a highly successful example of what mass collaboration in an informal peer review system can accomplish. In this paper, we examine the role that the quality of the contributions, the experience of the contributors and the ownership of the content play in the decisions over which contributions become part of Wikipedia and which ones are rejected by the community. We introduce and justify a versatile metric for automatically measuring the quality of a contribution. We find little evidence that experience helps contributors avoid rejection. In fact, as they gain experience, contributors are even more likely to have their work rejected. We also find strong evidence of ownership behaviors in practice despite the fact that ownership of content is discouraged within Wikipedia.}, + isbn = {978-1-60558-730-1}, + keywords = {experience,ownership,peer,peer review,quality,wikipedia,WikiWork}, + file = {/home/nathante/Zotero/storage/3D95RK5T/Halfaker et al. - 2009 - A Jury of Your Peers Quality, Experience and Owne.pdf;/home/nathante/Zotero/storage/4VTKXZIS/Halfaker et al. - 2009 - A Jury of Your Peers Quality, Experience and Owne.pdf;/home/nathante/Zotero/storage/R84D69QJ/Halfaker et al. - 2009 - A jury of your peers quality, experience and owne.pdf} +} + +@article{halfaker_ores_2020, + title = {{{ORES}}: {{Lowering Barriers}} with {{Participatory Machine Learning}} in {{Wikipedia}}}, + author = {Halfaker, Aaron and Geiger, R Stuart}, + year = {2020}, + month = oct, + volume = {4}, + number = {148}, + pages = {37}, + langid = {english}, + file = {/home/nathante/Zotero/storage/SYIM8B62/Halfaker_Geiger_2020_ORES.pdf} +} + +@book{hastie_elements_2018, + title = {The {{Elements}} of Statistical Learning: Data Mining, Inference, and Prediction}, + shorttitle = {The {{Elements}} of Statistical Learning}, + author = {Hastie, Trevor and Friedman, Jerome and Tisbshirani, Robert}, + year = {2018}, + publisher = {{Springer}}, + address = {{New York}}, + isbn = {978-0-387-84857-0}, + langid = {english}, + annotation = {OCLC: 1085863671} +} + +@inproceedings{hecht_tower_2010, + title = {The {{Tower}} of {{Babel}} Meets {{Web}} 2.0: User-Generated Content and Its Applications in a Multilingual Context}, + shorttitle = {The Tower of Babel Meets Web 2.0}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Hecht, Brent and Gergle, Darren}, + year = {2010}, + month = apr, + series = {{{CHI}} '10}, + pages = {291--300}, + publisher = {{Association for Computing Machinery}}, + address = {{Atlanta, Georgia, USA}}, + abstract = {This study explores language's fragmenting effect on user-generated content by examining the diversity of knowledge representations across 25 different Wikipedia language editions. This diversity is measured at two levels: the concepts that are included in each edition and the ways in which these concepts are described. We demonstrate that the diversity present is greater than has been presumed in the literature and has a significant influence on applications that use Wikipedia as a source of world knowledge. We close by explicating how knowledge diversity can be beneficially leveraged to create "culturally-aware applications" and "hyperlingual applications".}, + isbn = {978-1-60558-929-9}, + langid = {english}, + file = {/home/nathante/Zotero/storage/7AVHUTYF/Hecht_Gergle_2010_The Tower of Babel meets Web 2.pdf} +} + +@inproceedings{javanmardi_user_2009, + ids = {�}, + title = {User Contribution and Trust in {{Wikipedia}}}, + booktitle = {Proceedings of the 5th {{International Conference}} on {{Collaborative Computing}}: {{Networking}}, {{Applications}} and {{Worksharing}} ({{CollaborateCom}} '09)}, + author = {Javanmardi, S. and Ganjisaffar, Y. and Lopes, C. and Baldi, P.}, + year = {2009}, + publisher = {{ITSC / IEEE}}, + address = {{New York, NY}}, + abstract = {Wikipedia, one of the top ten most visited websites, is commonly viewed as the largest online reference for encyclopedic knowledge. Because of its open editing model -allowing anyone to enter and edit content- Wikipedia's overall quality has often been questioned as a source of reliable information. Lack of study of the open editing model of Wikipedia and its effectiveness has resulted in a new generation of wikis that restrict contributions to registered users only, using their real names. In this paper, we present an empirical study of user contributions to Wikipedia. We statistically analyze contributions by both anonymous and registered users. The results show that submissions of anonymous and registered users in Wikipedia suggest a power law behavior. About 80\% of the revisions are submitted by less than 7\% of the users, most of whom are registered users. To further refine the analyzes, we use the Wiki Trust Model (WTM), a user reputation model developed in our previous work to assign a reputation value to each user. As expected, the results show that registered users contribute higher quality content and therefore are assigned higher reputation values. However, a significant number of anonymous users also contribute high-quality content.We provide further evidence that regardless of a user s' attribution, registered or anonymous, high reputation users are the dominant contributors that actively edit Wikipedia articles in order to remove vandalism or poor quality content.}, + keywords = {encyclopaedias,encyclopedic knowledge,user contribution,user interfaces,Web sites,Wiki trust model,Wikipedia}, + file = {/home/nathante/Zotero/storage/M98L5F63/Javanmardi et al_2009_User contribution and trust in Wikipedia.pdf} +} + +@book{jemielniak_common_2014, + ids = {jemielniak_common_2014-1}, + title = {Common {{Knowledge}}?: {{An Ethnography}} of {{Wikipedia}}}, + shorttitle = {Common {{Knowledge}}?}, + author = {Jemielniak, Dariusz}, + year = {2014}, + month = may, + publisher = {{Stanford University Press}}, + abstract = {With an emphasis on peer\textendash produced content and collaboration, Wikipedia exemplifies a departure from traditional management and organizational models. This iconic "project" has been variously characterized as a hive mind and an information revolution, attracting millions of new users even as it has been denigrated as anarchic and plagued by misinformation. Have Wikipedia's structure and inner workings promoted its astonishing growth and enduring public relevance? In Common Knowledge?, Dariusz Jemielniak draws on his academic expertise and years of active participation within the Wikipedia community to take readers inside the site, illuminating how it functions and deconstructing its distinctive organization. Against a backdrop of misconceptions about its governance, authenticity, and accessibility, Jemielniak delivers the first ethnography of Wikipedia, revealing that it is not entirely at the mercy of the public: instead, it balances open access and power with a unique bureaucracy that takes a page from traditional organizational forms. Along the way, Jemielniak incorporates fascinating cases that highlight the tug of war among the participants as they forge ahead in this pioneering environment.}, + isbn = {978-0-8047-9120-5}, + langid = {english}, + keywords = {Business \& Economics / General,Business \& Economics / Organizational Behavior,Electronic encyclopedias -- Social aspects.,Organizational sociology.,Social Science / Anthropology / Cultural,Social Science / Anthropology / Cultural \& Social,Wikipedia.}, + file = {/home/nathante/Zotero/storage/LS85JVJB/Jemielniak_2014_Common knowledge.pdf;/home/nathante/Zotero/storage/WN97JGCI/reader.html} +} + +@inproceedings{kittur_harnessing_2008, + ids = {kittur_harnessing_2008-1}, + title = {Harnessing the {{Wisdom}} of {{Crowds}} in {{Wikipedia}}: {{Quality Through Coordination}}}, + shorttitle = {Harnessing the {{Wisdom}} of {{Crowds}} in {{Wikipedia}}}, + booktitle = {Proceedings of the 2008 {{ACM Conference}} on {{Computer Supported Cooperative Work}}}, + author = {Kittur, Aniket and Kraut, Robert E.}, + year = {2008}, + series = {{{CSCW}} '08}, + pages = {37--46}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Wikipedia's success is often attributed to the large numbers of contributors who improve the accuracy, completeness and clarity of articles while reducing bias. However, because of the coordination needed to write an article collaboratively, adding contributors is costly. We examined how the number of editors in Wikipedia and the coordination methods they use affect article quality. We distinguish between explicit coordination, in which editors plan the article through communication, and implicit coordination, in which a subset of editors structure the work by doing the majority of it. Adding more editors to an article improved article quality only when they used appropriate coordination techniques and was harmful when they did not. Implicit coordination through concentrating the work was more helpful when many editors contributed, but explicit coordination through communication was not. Both types of coordination improved quality more when an article was in a formative stage. These results demonstrate the critical importance of coordination in effectively harnessing the "wisdom of the crowd" in online production environments.}, + isbn = {978-1-60558-007-4}, + keywords = {collaboration,Collaboration,collective intelligence,coordination,distributed cognition,quality of content,social computing,social interaction,wiki,Wiki,wikipedia,Wikipedia}, + file = {/home/nathante/Zotero/storage/AAKBSS79/Kittur_Kraut_2008_Harnessing the wisdom of crowds in wikipedia.pdf;/home/nathante/Zotero/storage/J7X48SKE/Kittur and Kraut - 2008 - Harnessing the wisdom of crowds in wikipedia qual.pdf} +} + +@article{kleinberg_inherent_2016, + title = {Inherent {{Trade-Offs}} in the {{Fair Determination}} of {{Risk Scores}}}, + author = {Kleinberg, Jon and Mullainathan, Sendhil and Raghavan, Manish}, + year = {2016}, + month = sep, + journal = {arXiv:1609.05807 [cs, stat]}, + eprint = {1609.05807}, + eprinttype = {arxiv}, + primaryclass = {cs, stat}, + abstract = {Recent discussion in the public sphere about algorithmic classification has involved tension between competing notions of what it means for a probabilistic classification to be fair to different groups. We formalize three fairness conditions that lie at the heart of these debates, and we prove that except in highly constrained special cases, there is no method that can satisfy these three conditions simultaneously. Moreover, even satisfying all three conditions approximately requires that the data lie in an approximate version of one of the constrained special cases identified by our theorem. These results suggest some of the ways in which key notions of fairness are incompatible with each other, and hence provide a framework for thinking about the trade-offs between them.}, + archiveprefix = {arXiv}, + keywords = {_tablet_modified,Computer Science - Computers and Society,Computer Science - Machine Learning,Statistics - Machine Learning}, + file = {/home/nathante/ownCloud/Papers/Kleinberg et al. - 2016 - Inherent Trade-Offs in the Fair Determination of R.pdf;/home/nathante/Zotero/storage/XXQIPXY2/1609.html} +} + +@article{kocielnik_reciprocity_2018, + ids = {kocielnik_reciprocity_2018-1}, + title = {Reciprocity and {{Donation}}: {{How Article Topic}}, {{Quality}} and {{Dwell Time Predict Banner Donation}} on {{Wikipedia}}}, + shorttitle = {Reciprocity and {{Donation}}}, + author = {Kocielnik, Rafal and Keyes, Os and Morgan, Jonathan T. and Taraborelli, Dario and McDonald, David W. and Hsieh, Gary}, + year = {2018}, + month = nov, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {2}, + number = {CSCW}, + pages = {1--20}, + issn = {25730142}, + langid = {english}, + file = {/home/nathante/Zotero/storage/XIWVF6VN/Kocielnik et al. - 2018 - Reciprocity and Donation How Article Topic, Quali.pdf;/home/nathante/Zotero/storage/YCF2G6ZA/Kocielnik et al. - 2018 - Reciprocity and Donation How Article Topic, Quali.pdf} +} + +@inproceedings{lemmerich_why_2019, + title = {Why the {{World Reads Wikipedia}}: {{Beyond English Speakers}}}, + shorttitle = {Why the {{World Reads Wikipedia}}}, + booktitle = {Proceedings of the {{Twelfth ACM International Conference}} on {{Web Search}} and {{Data Mining}}}, + author = {Lemmerich, Florian and {S{\'a}ez-Trumper}, Diego and West, Robert and Zia, Leila}, + year = {2019}, + series = {{{WSDM}} '19}, + pages = {618--626}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {As one of the Web's primary multilingual knowledge sources, Wikipedia is read by millions of people across the globe every day. Despite this global readership, little is known about why users read Wikipedia's various language editions. To bridge this gap, we conduct a comparative study by combining a large-scale survey of Wikipedia readers across 14 language editions with a log-based analysis of user activity. We proceed in three steps. First, we analyze the survey results to compare the prevalence of Wikipedia use cases across languages, discovering commonalities, but also substantial differences, among Wikipedia languages with respect to their usage. Second, we match survey responses to the respondents' traces in Wikipedia's server logs to characterize behavioral patterns associated with specific use cases, finding that distinctive patterns consistently mark certain use cases across language editions. Third, we show that certain Wikipedia use cases are more common in countries with certain socio-economic characteristics; e.g., in-depth reading of Wikipedia articles is substantially more common in countries with a low Human Development Index. These findings advance our understanding of reader motivations and behaviors across Wikipedia languages and have implications for Wikipedia editors and developers of Wikipedia and other Web technologies.}, + isbn = {978-1-4503-5940-5}, + keywords = {cross-cultural analysis,log analysis,motivation,multi-language,survey,wikipedia}, + file = {/home/nathante/Zotero/storage/HY6T3E2I/Lemmerich et al_2019_Why the World Reads Wikipedia.pdf} +} + +@article{lewoniewski_relative_2017, + title = {Relative {{Quality}} and {{Popularity Evaluation}} of {{Multilingual Wikipedia Articles}}}, + author = {Lewoniewski, W{\l}odzimierz and W{\k{e}}cel, Krzysztof and Abramowicz, Witold}, + year = {2017}, + month = dec, + journal = {Informatics}, + volume = {4}, + number = {4}, + pages = {43}, + publisher = {{Multidisciplinary Digital Publishing Institute}}, + abstract = {Despite the fact that Wikipedia is often criticized for its poor quality, it continues to be one of the most popular knowledge bases in the world. Articles in this free encyclopedia on various topics can be created and edited in about 300 different language versions independently. Our research has showed that in language sensitive topics, the quality of information can be relatively better in the relevant language versions. However, in most cases, it is difficult for the Wikipedia readers to determine the language affiliation of the described subject. Additionally, each language edition of Wikipedia can have own rules in the manual assessing of the content's quality. There are also differences in grading schemes between language versions: some use a 6\textendash 8 grade system to assess articles, and some are limited to 2\textendash 3. This makes automatic quality comparison of articles between various languages a challenging task, particularly if we take into account a large number of unassessed articles; some of the Wikipedia language editions have over 99\% of articles without a quality grade. The paper presents the results of a relative quality and popularity assessment of over 28 million articles in 44 selected language versions. Comparative analysis of the quality and the popularity of articles in popular topics was also conducted. Additionally, the correlation between quality and popularity of Wikipedia articles of selected topics in various languages was investigated. The proposed method allows us to find articles with information of better quality that can be used to automatically enrich other language editions of Wikipedia.}, + copyright = {http://creativecommons.org/licenses/by/3.0/}, + langid = {english}, + keywords = {DBpedia,information quality,Wikipedia,WikiRank}, + file = {/home/nathante/Zotero/storage/FLWMK7U5/Lewoniewski et al_2017_Relative Quality and Popularity Evaluation of Multilingual Wikipedia Articles.pdf;/home/nathante/Zotero/storage/JQJTEH6S/htm.html} +} + +@article{lewoniewski_relative_2017-2, + title = {Relative {{Quality}} and {{Popularity Evaluation}} of {{Multilingual Wikipedia Articles}}}, + author = {Lewoniewski, W{\l}odzimierz and W{\k{e}}cel, Krzysztof and Abramowicz, Witold}, + year = {2017}, + month = dec, + journal = {Informatics}, + volume = {4}, + number = {4}, + pages = {43}, + abstract = {Despite the fact that Wikipedia is often criticized for its poor quality, it continues to be one of the most popular knowledge bases in the world. Articles in this free encyclopedia on various topics can be created and edited in about 300 different language versions independently. Our research has showed that in language sensitive topics, the quality of information can be relatively better in the relevant language versions. However, in most cases, it is difficult for the Wikipedia readers to determine the language affiliation of the described subject. Additionally, each language edition of Wikipedia can have own rules in the manual assessing of the content's quality. There are also differences in grading schemes between language versions: some use a 6\textendash 8 grade system to assess articles, and some are limited to 2\textendash 3. This makes automatic quality comparison of articles between various languages a challenging task, particularly if we take into account a large number of unassessed articles; some of the Wikipedia language editions have over 99\% of articles without a quality grade. The paper presents the results of a relative quality and popularity assessment of over 28 million articles in 44 selected language versions. Comparative analysis of the quality and the popularity of articles in popular topics was also conducted. Additionally, the correlation between quality and popularity of Wikipedia articles of selected topics in various languages was investigated. The proposed method allows us to find articles with information of better quality that can be used to automatically enrich other language editions of Wikipedia.}, + copyright = {http://creativecommons.org/licenses/by/3.0/}, + langid = {english}, + file = {/home/nathante/Zotero/storage/589NUW97/Lewoniewski et al. - 2017 - Relative Quality and Popularity Evaluation of Mult.pdf;/home/nathante/Zotero/storage/RCNA3RIS/Lewoniewski et al. - 2017 - Relative Quality and Popularity Evaluation of Mult.pdf;/home/nathante/Zotero/storage/A447QV7Z/43.html} +} + +@article{lukyanenko_iq_2014, + title = {The {{IQ}} of the {{Crowd}}: {{Understanding}} and {{Improving Information Quality}} in {{Structured User-Generated Content}}}, + shorttitle = {The {{IQ}} of the {{Crowd}}}, + author = {Lukyanenko, Roman and Parsons, Jeffrey and Wiersma, Yolanda F.}, + year = {2014}, + month = dec, + journal = {Information Systems Research}, + volume = {25}, + number = {4}, + pages = {669--689}, + issn = {1047-7047, 1526-5536}, + langid = {english}, + file = {/home/nathante/Zotero/storage/BNME7RBJ/Lukyanenko et al_2014_The IQ of the Crowd.pdf} +} + +@inproceedings{matias_civilservant_2018, + title = {Civilservant: Community-Led Experiments in Platform Governance}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Matias, J. Nathan and Mou, Merry}, + year = {2018}, + series = {{{CHI}} '18}, + pages = {9:1--9:13}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {As online platforms monitor and intervene in the daily lives of billions of people, platforms are being used to govern enduring social problems. Field experiments could inform wise uses of this power if tensions between democratic values and experimentation could be resolved. In this paper, we introduce CivilServant, a novel experimentation infrastructure that online communities and their moderators use to evaluate policies and replicate each others' findings. We situate CivilServant in the political history of policy experiments and present design considerations for community participation, ethics, and replication. Based on two case studies of community-led experiments and public debriefings on the reddit platform, we share findings on community deliberation about experiment results. We also report on uses of evidence, finding that experiments informed moderator practices, community policies, and replications by communities and platforms. We discuss the implications of these findings for evaluating platform governance in an open, democratic, experimenting society.}, + isbn = {978-1-4503-5620-6}, + keywords = {action research,ethics,field experiments,governance,moderation,platforms,policy evaluation,randomized trials}, + file = {/home/nathante/Zotero/storage/3ULGKV83/Matias_Mou_2018_Civilservant.pdf} +} + +@book{mcelreath_statistical_2018, + title = {Statistical {{Rethinking}}}, + author = {McElreath, Richard and Safari, an O'Reilly Media Company}, + year = {2018}, + abstract = {Statistical Rethinking: A Bayesian Course with Examples in R and Stan builds readers' knowledge of and confidence in statistical modeling. Reflecting the need for even minor programming in today's model-based statistics, the book pushes readers to perform step-by-step calculations that are usually automated. This unique computational approach ensures that readers understand enough of the details to make reasonable choices and interpretations in their own modeling work. The text presents generalized linear multilevel models from a Bayesian perspective, relying on a simple logical interpretation of Bayesian probability and maximum entropy. It covers from the basics of regression to multilevel models. The author also discusses measurement error, missing data, and Gaussian process models for spatial and network autocorrelation. By using complete R code examples throughout, this book provides a practical foundation for performing statistical inference. Designed for both PhD students and seasoned professionals in the natural and social sciences, it prepares them for more advanced or specialized statistical modeling. Web Resource The book is accompanied by an R package (rethinking) that is available on the author's website and GitHub. The two core functions (map and map2stan) of this package allow a variety of statistical models to be constructed from standard model formulas.}, + langid = {english}, + annotation = {OCLC: 1107423386} +} + +@incollection{menking_people_2019, + title = {People {{Who Can Take It}}: {{How Women Wikipedians Negotiate}} and {{Navigate Safety}}}, + shorttitle = {People {{Who Can Take It}}}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Menking, Amanda and Erickson, Ingrid and Pratt, Wanda}, + year = {2019}, + month = may, + pages = {1--14}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Wikipedia is one of the most successful online communities in history, yet it struggles to attract and retain women editors-a phenomenon known as the gender gap. We investigate this gap by focusing on the voices of experienced women Wikipedians. In this interview-based study (N=25), we identify a core theme among these voices: safety. We reveal how our participants perceive safety within their community, how they manage their safety both conceptually and physically, and how they act on this understanding to create safe spaces on and off Wikipedia. Our analysis shows Wikipedia functions as both a multidimensional and porous space encompassing a spectrum of safety. Navigating this space requires these women to employ sophisticated tactics related to identity management, boundary management, and emotion work. We conclude with a set of provocations to spur the design of future online environments that encourage equity, inclusivity, and safety for historically marginalized users.}, + isbn = {978-1-4503-5970-2}, + keywords = {gender gap,online communities,participation,safe spaces,safety,wikipedia}, + file = {/home/nathante/Zotero/storage/YAQL3MGV/Menking et al_2019_People Who Can Take It.pdf} +} + +@article{mesgari_sum_2015, + title = {``{{The}} Sum of All Human Knowledge'': {{A}} Systematic Review of Scholarly Research on the Content of {{Wikipedia}}}, + shorttitle = {``{{The}} Sum of All Human Knowledge''}, + author = {Mesgari, Mostafa and Okoli, Chitu and Mehdi, Mohamad and Nielsen, Finn {\AA}rup and Lanam{\"a}ki, Arto}, + year = {2015}, + journal = {Journal of the Association for Information Science and Technology}, + volume = {66}, + number = {2}, + pages = {219--245}, + issn = {2330-1643}, + abstract = {Wikipedia may be the best-developed attempt thus far to gather all human knowledge in one place. Its accomplishments in this regard have made it a point of inquiry for researchers from different fields of knowledge. A decade of research has thrown light on many aspects of the Wikipedia community, its processes, and its content. However, due to the variety of fields inquiring about Wikipedia and the limited synthesis of the extensive research, there is little consensus on many aspects of Wikipedia's content as an encyclopedic collection of human knowledge. This study addresses the issue by systematically reviewing 110 peer-reviewed publications on Wikipedia content, summarizing the current findings, and highlighting the major research trends. Two major streams of research are identified: the quality of Wikipedia content (including comprehensiveness, currency, readability, and reliability) and the size of Wikipedia. Moreover, we present the key research trends in terms of the domains of inquiry, research design, data source, and data gathering methods. This review synthesizes scholarly understanding of Wikipedia content and paves the way for future studies.}, + copyright = {\textcopyright{} 2014 ASIS\&T}, + langid = {english}, + keywords = {encyclopedias,quality,reliability}, + annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/asi.23172}, + file = {/home/nathante/Zotero/storage/9MGZIS9V/Mesgari et al_2015_“The sum of all human knowledge”.pdf;/home/nathante/Zotero/storage/H5F2PUN5/asi.html} +} + +@article{michlmayr_quality_2003, + title = {Quality and the {{Reliance}} on {{Individuals}} in {{Free Software Projects}}}, + author = {Michlmayr, Martin and Hill, Benjamin Mako}, + year = {2003}, + journal = {3rd Workshop on Open Source Software Engineering, ICSE}, + abstract = {It has been suggested that the superior quality of many Free Software projects in comparison to their proprietary counterparts is in part due to the Free Software commu- nity's extensive source code peer-review process. While many argue that software is best developed by individuals or small teams, the process of debugging is highly paral- lizable. This ``one and many'' model describes a template employed by many Free Software projects. However, re- liance on a single developer or maintainer creates a sin- gle point of failure that raises a number of serious quality and reliability concerns \textendash{} especially when considered in the context of the volunteer-based nature of most Free Software projects. This paper will investigate the nature of problems raised by this model within the Debian Project and will ex- plore several possible strategies aimed at removing or de- emphasizing the reliance on individual developers.} +} + +@article{miquel-ribe_wikipedia_2018, + title = {Wikipedia {{Culture Gap}}: {{Quantifying Content Imbalances Across}} 40 {{Language Editions}}}, + shorttitle = {Wikipedia {{Culture Gap}}}, + author = {{Miquel-Rib{\'e}}, Marc and Laniado, David}, + year = {2018}, + journal = {Frontiers in Physics}, + volume = {6}, + issn = {2296-424X}, + abstract = {The online encyclopedia Wikipedia is the largest general information repository created through collaborative efforts from all over the globe. Despite the project's goal being to achieve the sum of human knowledge, there are strong content imbalances across the language editions. In order to quantify and investigate these imbalances, we study the impact of cultural context in 40 language editions. To this purpose, we developed a computational method to identify articles that can be related to the editors' cultural context associated to each Wikipedia language edition. We employed a combination of strategies taking into account geolocated articles, specific keywords and categories, as well as links between articles. We verified the method's quality with manual assessment and found an average precision of 0.92 and an average recall of 0.95. The results show that about a quarter of each Wikipedia language edition is dedicated to represent the corresponding cultural context. Although a considerable part of this content was created during the first years of the project, its creation is sustained over time. An analysis of cross-language coverage of this content shows that most of it is unique in its original language, and reveals special links between cultural contexts; at the same time, it highlights gaps where the encyclopaedia could extend its content. The approach and findings presented in this study can help to foster participation and inter-cultural enrichment of Wikipedias. The datasets produced are made available for further research.}, + langid = {english}, + keywords = {Big Data.,content imbalance,cross-cultural studies,Cultural Diversity,Data Collection,Data Mining,Digital Humanities,online communities,Wikipedia}, + file = {/home/nathante/Zotero/storage/WJSZZBVF/Miquel-Ribé_Laniado_2018_Wikipedia Culture Gap.pdf} +} + +@misc{noauthor_ordinal_nodate, + title = {Ordinal {{Regression}}}, + howpublished = {https://betanalpha.github.io/assets/case\_studies/ordinal\_regression.html}, + file = {/home/nathante/Zotero/storage/5CLVS2WM/ordinal_regression.html} +} + +@article{pedregosa_scikit-learn_2011, + title = {Scikit-Learn: {{Machine Learning}} in {{Python}}}, + shorttitle = {Scikit-Learn}, + author = {Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, {\'E}douard}, + year = {2011}, + journal = {Journal of Machine Learning Research}, + volume = {12}, + number = {85}, + pages = {2825--2830}, + abstract = {Scikit-learn is a Python module integrating a wide range of state-of-the-art machine learning algorithms for medium-scale supervised and unsupervised problems. This package focuses on bringing machine learning to non-specialists using a general-purpose high-level language. Emphasis is put on ease of use, performance, documentation, and API consistency. It has minimal dependencies and is distributed under the simplified BSD license, encouraging its use in both academic and commercial settings. Source code, binaries, and documentation can be downloaded from http://scikit-learn.sourceforge.net.}, + file = {/home/nathante/Zotero/storage/AWW8RZYB/Pedregosa et al_2011_Scikit-learn.pdf} +} + +@book{phoebe_ayers_how_2008, + title = {How {{Wikipedia Works}}}, + author = {{Phoebe Ayers} and {Charles Matthews} and {Ben Yates}}, + year = {2008}, + publisher = {{No Starch Press}}, + abstract = {"We cover Wikipedia from soup to nuts: for readers trying to understand what's in Wikipedia, how and why it got there, and how to analyze the quality of the content you might find on the site; for current and future editors, from basic editing techniques and wikisyntax to not-so-basic information on complicated syntax, referencing and researching content, and editing collaboratively and harmoniously; and finally for anyone interested in how Wikipedia's vibrant and complicated community comes together to produce content, resolve disputes, and keep the site running. Finally, we touch on the wider world of Wikipedias in other languages, other Wikimedia projects, and the Wikimedia Foundation itself. We close with appendices about reusing Wikipedia content according to the terms of the GFDL license, and thoughts on using Wikipedia in a classroom setting. "Throughout, we provide community consensus viewpoints and our own thoughts on a common-sense approach to using and participating in Wikipedia, and a selection of carefully-chosen links to the thousands of pages of documentation, help and Wikipedia-space pages that we discuss -- not to mention a sprinkling of humor. In every discussion, we try to provide a sense of the community that supports and is at the heart of the Wikipedia project and mission." -- Phoebe Ayers,}, + collaborator = {{Phoebe Ayers; Charles Matthews; Ben Yates}}, + copyright = {Copyright (C) 2008 by Phoebe Ayers, Charles Matthews, and Ben Yates Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and with the Back-Cover Texts being "How Wikipedia Works", by Phoebe Ayers, Charles Matthews, and Ben Yates, published by No Starch Press. A copy of the license is included in the section entitled "GNU Free Documentation License".}, + langid = {english}, + keywords = {documentation,encyclopedias,Mediawiki,Social media,User-generated content,Wikimedia,Wikipedia,Wikipedia--Handbooks; manuals; etc.}, + file = {/home/nathante/Zotero/storage/MB2AZG45/HowWikipediaWorks%2FHowWikipediaWorks.epub} +} + +@inproceedings{raman_classifying_2020, + title = {Classifying {{Wikipedia Article Quality With Revision History Networks}}}, + booktitle = {Proceedings of the 16th {{International Symposium}} on {{Open Collaboration}}}, + author = {Raman, Narun and Sauerberg, Nathaniel and Fisher, Jonah and Narayan, Sneha}, + year = {2020}, + month = aug, + series = {{{OpenSym}} 2020}, + pages = {1--7}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {We present a novel model for classifying the quality of Wikipedia articles based on structural properties of a network representation of the article's revision history. We create revision history networks (an adaptation of Keegan et. al's article trajectory networks [7]), where nodes correspond to individual editors of an article, and edges join the authors of consecutive revisions. Using descriptive statistics generated from these networks, along with general properties like the number of edits and article size, we predict which of six quality classes (Start, Stub, C-Class, B-Class, Good, Featured) articles belong to, attaining a classification accuracy of 49.35\% on a stratified sample of articles. These results suggest that structures of collaboration underlying the creation of articles, and not just the content of the article, should be considered for accurate quality classification.}, + isbn = {978-1-4503-8779-8}, + keywords = {article quality,classification,collaboration,network analysis,quantitative methods,Wikipedia}, + file = {/home/nathante/Zotero/storage/3ZJCZI6W/Raman et al_2020_Classifying Wikipedia Article Quality With Revision History Networks.pdf} +} + +@article{reavley_quality_2012, + title = {Quality of Information Sources about Mental Disorders: A Comparison of {{Wikipedia}} with Centrally Controlled Web and Printed Sources}, + shorttitle = {Quality of Information Sources about Mental Disorders}, + author = {Reavley, N. J. and Mackinnon, A. J. and Morgan, A. J. and {Alvarez-Jimenez}, M. and Hetrick, S. E. and Killackey, E. and Nelson, B. and Purcell, R. and Yap, M. B. H. and Jorm, A. F.}, + year = {2012}, + month = aug, + journal = {Psychological Medicine}, + volume = {42}, + number = {8}, + pages = {1753--1762}, + issn = {1469-8978, 0033-2917}, + abstract = {Background Although mental health information on the internet is often of poor quality, relatively little is known about the quality of websites, such as Wikipedia, that involve participatory information sharing. The aim of this paper was to explore the quality of user-contributed mental health-related information on Wikipedia and compare this with centrally controlled information sources. Method Content on 10 mental health-related topics was extracted from 14 frequently accessed websites (including Wikipedia) providing information about depression and schizophrenia, Encyclopaedia Britannica, and a psychiatry textbook. The content was rated by experts according to the following criteria: accuracy, up-to-dateness, breadth of coverage, referencing and readability. Results Ratings varied significantly between resources according to topic. Across all topics, Wikipedia was the most highly rated in all domains except readability. Conclusions The quality of information on depression and schizophrenia on Wikipedia is generally as good as, or better than, that provided by centrally controlled websites, Encyclopaedia Britannica and a psychiatry textbook.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PPKE7WIA/Reavley et al_2012_Quality of information sources about mental disorders.pdf;/home/nathante/Zotero/storage/YMZK3KM8/595CEE672BB7C503101FAF5A9E303673.html} +} + +@inproceedings{recht_imagenet_2019, + title = {Do {{ImageNet Classifiers Generalize}} to {{ImageNet}}?}, + booktitle = {International {{Conference}} on {{Machine Learning}}}, + author = {Recht, Benjamin and Roelofs, Rebecca and Schmidt, Ludwig and Shankar, Vaishaal}, + year = {2019}, + month = may, + pages = {5389--5400}, + publisher = {{PMLR}}, + issn = {2640-3498}, + abstract = {We build new test sets for the CIFAR-10 and ImageNet datasets. Both benchmarks have been the focus of intense research for almost a decade, raising the danger of overfitting to excessively re-used ...}, + langid = {english}, + file = {/home/nathante/Zotero/storage/D7JPAXNZ/Recht et al_2019_Do ImageNet Classifiers Generalize to ImageNet.pdf;/home/nathante/Zotero/storage/HH6SE7TA/recht19a.html} +} + +@article{redi_taxonomy_2021, + title = {A {{Taxonomy}} of {{Knowledge Gaps}} for {{Wikimedia Projects}} ({{Second Draft}})}, + author = {Redi, Miriam and Gerlach, Martin and Johnson, Isaac and Morgan, Jonathan and Zia, Leila}, + year = {2021}, + month = jan, + journal = {arXiv:2008.12314 [cs]}, + eprint = {2008.12314}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {In January 2019, prompted by the Wikimedia Movement's 2030 strategic direction, the Research team at the Wikimedia Foundation identified the need to develop a knowledge gaps index -- a composite index to support the decision makers across the Wikimedia movement by providing: a framework to encourage structured and targeted brainstorming discussions; data on the state of the knowledge gaps across the Wikimedia projects that can inform decision making and assist with measuring the long term impact of large scale initiatives in the Movement. After its first release in July 2020, the Research team has developed the second complete draft of a taxonomy of knowledge gaps for the Wikimedia projects, as the first step towards building the knowledge gap index. We studied more than 250 references by scholars, researchers, practitioners, community members and affiliates -- exposing evidence of knowledge gaps in readership, contributorship, and content of Wikimedia projects. We elaborated the findings and compiled the taxonomy of knowledge gaps in this paper, where we describe, group and classify knowledge gaps into a structured framework. The taxonomy that you will learn more about in the rest of this work will serve as a basis to operationalize and quantify knowledge equity, one of the two 2030 strategic directions, through the knowledge gaps index.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computers and Society}, + file = {/home/nathante/Zotero/storage/TIFWV8J6/Redi et al. - 2021 - A Taxonomy of Knowledge Gaps for Wikimedia Project.pdf} +} + +@article{sarkar_stre_2019, + title = {{{StRE}}: {{Self Attentive Edit Quality Prediction}} in {{Wikipedia}}}, + shorttitle = {{{StRE}}}, + author = {Sarkar, Soumya and Reddy, Bhanu Prakash and Sikdar, Sandipan and Mukherjee, Animesh}, + year = {2019}, + month = jun, + journal = {arXiv:1906.04678 [cs]}, + eprint = {1906.04678}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Wikipedia can easily be justified as a behemoth, considering the sheer volume of content that is added or removed every minute to its several projects. This creates an immense scope, in the field of natural language processing towards developing automated tools for content moderation and review. In this paper we propose Self Attentive Revision Encoder (StRE) which leverages orthographic similarity of lexical units toward predicting the quality of new edits. In contrast to existing propositions which primarily employ features like page reputation, editor activity or rule based heuristics, we utilize the textual content of the edits which, we believe contains superior signatures of their quality. More specifically, we deploy deep encoders to generate representations of the edits from its text content, which we then leverage to infer quality. We further contribute a novel dataset containing 21M revisions across 32K Wikipedia pages and demonstrate that StRE outperforms existing methods by a significant margin at least 17\% and at most 103\%. Our pretrained model achieves such result after retraining on a set as small as 20\% of the edits in a wikipage. This, to the best of our knowledge, is also the first attempt towards employing deep language models to the enormous domain of automated content moderation and review in Wikipedia.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Neural and Evolutionary Computing,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3BAHCLC7/Sarkar et al_2019_StRE.pdf;/home/nathante/Zotero/storage/DSMFT5CS/1906.html} +} + +@inproceedings{schmidt_article_2019, + title = {Article Quality Classification on {{Wikipedia}}: Introducing Document Embeddings and Content Features}, + shorttitle = {Article Quality Classification on {{Wikipedia}}}, + booktitle = {Proceedings of the 15th {{International Symposium}} on {{Open Collaboration}}}, + author = {Schmidt, Manuel and Zangerle, Eva}, + year = {2019}, + month = aug, + series = {{{OpenSym}} '19}, + pages = {1--8}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {The quality of articles on the Wikipedia platform is vital for its success. Currently, the assessment of quality is performed manually by the Wikipedia community, where editors classify articles into pre-defined quality classes. However, this approach is hardly scalable and hence, approaches for the automatic classification have been investigated. In this paper, we extend this previous line of research on article quality classification by extending the set of features with novel content and edit features (e.g., document em-beddings of articles). We propose a classification approach utilizing gradient boosted trees based on this novel, extended set of features extracted from Wikipedia articles. Based on an established dataset containing Wikipedia articles and quality classes, we show that our approach is able to substantially outperform previous approaches (also including recent deep learning methods). Furthermore, we shed light on the contribution of individual features and show that the proposed features indeed capture the quality of an article well.}, + isbn = {978-1-4503-6319-8}, + keywords = {classification,collaborative information systems,gradient boosted trees,information quality,Wikipedia}, + file = {/home/nathante/Zotero/storage/N8QRISAN/Schmidt_Zangerle_2019_Article quality classification on Wikipedia.pdf} +} + +@inproceedings{sheppard_quality_2011, + title = {Quality Is a {{Verb}}: {{The Operationalization}} of {{Data Quality}} in a {{Citizen Science Community}}}, + shorttitle = {Quality Is a {{Verb}}}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Sheppard, S. Andrew and Terveen, Loren}, + year = {2011}, + series = {{{WikiSym}} '11}, + pages = {29--38}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Citizen science is becoming more valuable as a potential source of environmental data. Involving citizens in data collection has the added educational benefits of increased scientific awareness and local ownership of environmental concerns. However, a common concern among domain experts is the presumed lower quality of data submitted by volunteers. In this paper, we explore data quality assurance practices in River Watch, a community-based monitoring program in the Red River basin. We investigate how the participants in River Watch understand and prioritize data quality concerns. We found that data quality in River Watch is primarily maintained through universal adherence to standard operating procedures, but there remain areas where technological intervention may help. We also found that rigorous data quality assurance practices appear to enhance rather than hinder the educational goals of the program. We draw implications for the design of quality assurance mechanisms for River Watch and other citizen science projects.}, + isbn = {978-1-4503-0909-7}, + file = {/home/nathante/Zotero/storage/AW9CJY5B/Sheppard_Terveen_2011_Quality is a Verb.pdf} +} + +@article{shi_wisdom_2019, + ids = {shi_wisdom_2019-1}, + title = {The Wisdom of Polarized Crowds}, + author = {Shi, Feng and Teplitskiy, Misha and Duede, Eamon and Evans, James A.}, + year = {2019}, + month = apr, + journal = {Nature Human Behaviour}, + volume = {3}, + number = {4}, + pages = {329--336}, + publisher = {{Nature Publishing Group}}, + issn = {2397-3374}, + abstract = {As political polarization in the United States continues to rise1\textendash 3, the question of whether polarized individuals can fruitfully cooperate becomes pressing. Although diverse perspectives typically lead to superior team performance on complex tasks4,5, strong political perspectives have been associated with conflict, misinformation and a reluctance to engage with people and ideas beyond one's echo chamber6\textendash 8. Here, we explore the effect of ideological composition on team performance by analysing millions of edits to Wikipedia's political, social issues and science articles. We measure editors' online ideological preferences by how much they contribute to conservative versus liberal articles. Editor surveys suggest that online contributions associate with offline political party affiliation and ideological self-identity. Our analysis reveals that polarized teams consisting of a balanced set of ideologically diverse editors produce articles of a higher quality than homogeneous teams. The effect is most clearly seen in Wikipedia's political articles, but also in social issues and even science articles. Analysis of article `talk pages' reveals that ideologically polarized teams engage in longer, more constructive, competitive and substantively focused but linguistically diverse debates than teams of ideological moderates. More intense use of Wikipedia policies by ideologically diverse teams suggests institutional design principles to help unleash the power of polarization.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature Limited}, + langid = {english}, + file = {/home/nathante/Zotero/storage/SUP5HZ6U/Shi et al_2019_The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/YT9TVD7R/Shi et al_2019_The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/D8DIA97B/s41562-019-0541-6.html;/home/nathante/Zotero/storage/JB34TKTT/s41562-019-0541-6.html} +} + +@article{strathern_improving_1997, + title = {`{{Improving}} Ratings': Audit in the {{British University}} System}, + shorttitle = {`{{Improving}} Ratings'}, + author = {Strathern, Marilyn}, + year = {1997}, + month = jul, + journal = {European Review}, + volume = {5}, + number = {3}, + pages = {305--321}, + publisher = {{Cambridge University Press}}, + issn = {1474-0575, 1062-7987}, + abstract = {This paper gives an anthropological comment on what has been called the `audit explosion', the proliferation of procedures for evaluating performance. In higher education the subject of audit (in this sense) is not so much the education of the students as the institutional provision for their education. British universities, as institutions, are increasingly subject to national scrutiny for teaching, research and administrative competence. In the wake of this scrutiny comes a new cultural apparatus of expectations and technologies. While the metaphor of financial auditing points to the important values of accountability, audit does more than monitor\textemdash it has a life of its own that jeopardizes the life it audits. The runaway character of assessment practices is analysed in terms of cultural practice. Higher education is intimately bound up with the origins of such practices, and is not just the latter day target of them. \textcopyright{} 1997 by John Wiley \& Sons, Ltd.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/8BHAJ9RN/Strathern_1997_‘Improving ratings’.pdf;/home/nathante/Zotero/storage/SWKTUUPH/FC2EE640C0C44E3DB87C29FB666E9AAB.html} +} + +@inproceedings{teblunthuis_dwelling_2019, + ids = {teblunthuis_dwelling_2019-1}, + title = {Dwelling on {{Wikipedia}}: {{Investigating}} Time Spent by Global Encyclopedia Readers}, + booktitle = {{{OpenSym}} '19, {{The}} 15th {{International Symposium}} on {{Open Collaboration}}}, + author = {TeBlunthuis, Nathan and Bayer, Tilman and Vasileva, Olga}, + year = {2019}, + month = aug, + pages = {14}, + address = {{Sk\"ovde, Sweden}}, + langid = {english}, + file = {/home/nathante/Zotero/storage/5NGFRX7L/TeBlunthuis et al_2019_Dwelling on Wikipedia.pdf;/home/nathante/Zotero/storage/JR9UCSKW/TeBlunthuis et al. - Dwelling on Wikipedia Investigating time spent by.pdf} +} + +@article{teblunthuis_effects_2021, + ids = {teblunthuis_effects_2020}, + title = {Effects of {{Algorithmic Flagging}} on {{Fairness}}: {{Quasi-experimental Evidence}} from {{Wikipedia}}}, + shorttitle = {Effects of {{Algorithmic Flagging}} on {{Fairness}}}, + author = {TeBlunthuis, Nathan and Hill, Benjamin Mako and Halfaker, Aaron}, + year = {2021}, + month = apr, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {5}, + number = {CSCW1}, + eprint = {2006.03121}, + eprinttype = {arxiv}, + pages = {56:1--56:27}, + abstract = {Online community moderators often rely on social signals such as whether or not a user has an account or a profile page as clues that users may cause problems. Reliance on these clues can lead to "overprofiling'' bias when moderators focus on these signals but overlook the misbehavior of others. We propose that algorithmic flagging systems deployed to improve the efficiency of moderation work can also make moderation actions more fair to these users by reducing reliance on social signals and making norm violations by everyone else more visible. We analyze moderator behavior in Wikipedia as mediated by RCFilters, a system which displays social signals and algorithmic flags, and estimate the causal effect of being flagged on moderator actions. We show that algorithmically flagged edits are reverted more often, especially those by established editors with positive social signals, and that flagging decreases the likelihood that moderation actions will be undone. Our results suggest that algorithmic flagging systems can lead to increased fairness in some contexts but that the relationship is complex and contingent.}, + archiveprefix = {arXiv}, + keywords = {ai,causal inference,community norms,Computer Science - Computers and Society,Computer Science - Human-Computer Interaction,Computer Science - Machine Learning,Computer Science - Social and Information Networks,fairness,K.4.3,machine learning,moderation,online communities,peer production,sociotechnical systems,wikipedia}, + file = {/home/nathante/Zotero/storage/9LEWQEUJ/TeBlunthuis et al_2020_The effects of algorithmic flagging on fairness.pdf;/home/nathante/Zotero/storage/DYFEYFUT/TeBlunthuis et al_2021_Effects of Algorithmic Flagging on Fairness.pdf;/home/nathante/Zotero/storage/EQV69NYF/2006.html} +} + +@inproceedings{tran_are_2020, + title = {Are Anonymity-Seekers Just like Everybody Else? {{An}} Analysis of Contributions to {{Wikipedia}} from {{Tor}}}, + shorttitle = {Are Anonymity-Seekers Just like Everybody Else?}, + booktitle = {2020 {{IEEE Symposium}} on {{Security}} and {{Privacy}} ({{SP}})}, + author = {Tran, Chau and Champion, Kaylea and Forte, Andrea and Hill, Benjamin Mako and Greenstadt, Rachel}, + year = {2020}, + volume = {1}, + pages = {974--990}, + publisher = {{IEEE Computer Society}}, + address = {{San Francisco, California}}, + abstract = {User-generated content sites routinely block contributions from users of privacy-enhancing proxies like Tor because of a perception that proxies are a source of vandalism, spam, and abuse. Although these blocks might be effective, collateral damage in the form of unrealized valuable contributions from anonymity seekers is invisible. One of the largest and most important user-generated content sites, Wikipedia, has attempted to block contributions from Tor users since as early as 2005. We demonstrate that these blocks have been imperfect and that thousands of attempts to edit on Wikipedia through Tor have been successful. We draw upon several data sources and analytical techniques to measure and describe the history of Tor editing on Wikipedia over time and to compare contributions from Tor users to those from other groups of Wikipedia users. Our analysis suggests that although Tor users who slip through Wikipedia's ban contribute content that is more likely to be reverted and to revert others, their contributions are otherwise similar in quality to those from other unregistered participants and to the initial contributions of registered users.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/RGAM25XB/1j2LfZYlubC.html} +} + +@article{tripodi_ms_2021, + title = {Ms. {{Categorized}}: {{Gender}}, Notability, and Inequality on {{Wikipedia}}}, + shorttitle = {Ms. {{Categorized}}}, + author = {Tripodi, Francesca}, + year = {2021}, + month = jun, + journal = {New Media \& Society}, + pages = {14614448211023772}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Gender is one of the most pervasive and insidious forms of inequality. For example, English-language Wikipedia contains more than 1.5 million biographies about notable writers, inventors, and academics, but less than 19\% of these biographies are about women. To try and improve these statistics, activists host ``edit-a-thons'' to increase the visibility of notable women. While this strategy helps create several biographies previously inexistent, it fails to address a more inconspicuous form of gender exclusion. Drawing on ethnographic observations, interviews, and quantitative analysis of web-scraped metadata, this article demonstrates that biographies about women who meet Wikipedia's criteria for inclusion are more frequently considered non-notable and nominated for deletion compared to men's biographies. This disproportionate rate is another dimension of gender inequality previously unexplored by social scientists and provides broader insights into how women's achievements are (under)valued.}, + langid = {english}, + keywords = {Articles for Deletion,gender gap,gender inequality,metadata,Wikipedia}, + file = {/home/nathante/Zotero/storage/QAXNPJ72/Tripodi_2021_Ms.pdf} +} + +@article{van_der_velden_decentering_2013, + title = {Decentering {{Design}}: {{Wikipedia}} and {{Indigenous Knowledge}}}, + shorttitle = {Decentering {{Design}}}, + author = {{van der Velden}, Maja}, + year = {2013}, + month = mar, + journal = {International Journal of Human\textendash Computer Interaction}, + volume = {29}, + number = {4}, + pages = {308--316}, + publisher = {{Taylor \& Francis}}, + issn = {1044-7318}, + abstract = {This article is a reflection on the case of Wikipedia, the largest online reference site with 23 million articles, with 365 million readers, and without a page called Indigenous knowledge. A Postcolonial Computing lens, extended with the notion of decentering, is used to find out what happened with Indigenous knowledge in Wikipedia. Wikipedia's ordering technologies, such as policies and templates, play a central role in producing knowledge. Two designs, developed with and for Indigenous communities, are introduced to explore if another Wikipedia's design is possible.}, + annotation = {\_eprint: https://doi.org/10.1080/10447318.2013.765768}, + file = {/home/nathante/Zotero/storage/IU8S7FRL/van der Velden_2013_Decentering Design.pdf;/home/nathante/Zotero/storage/SIRXEIGA/10447318.2013.html} +} + +@article{vehtari_practical_2017, + ids = {vehtari_practical_2017-1}, + title = {Practical {{Bayesian}} Model Evaluation Using Leave-One-out Cross-Validation and {{WAIC}}}, + author = {Vehtari, Aki and Gelman, Andrew and Gabry, Jonah}, + year = {2017}, + month = sep, + journal = {Statistics and Computing}, + volume = {27}, + number = {5}, + eprint = {1507.04544}, + eprinttype = {arxiv}, + pages = {1413--1432}, + issn = {0960-3174, 1573-1375}, + abstract = {Leave-one-out cross-validation (LOO) and the widely applicable information criterion (WAIC) are methods for estimating pointwise out-of-sample prediction accuracy from a fitted Bayesian model using the log-likelihood evaluated at the posterior simulations of the parameter values. LOO and WAIC have various advantages over simpler estimates of predictive error such as AIC and DIC but are less used in practice because they involve additional computational steps. Here we lay out fast and stable computations for LOO and WAIC that can be performed using existing simulation draws. We introduce an efficient computation of LOO using Pareto-smoothed importance sampling (PSIS), a new procedure for regularizing importance weights. Although WAIC is asymptotically equal to LOO, we demonstrate that PSIS-LOO is more robust in the finite case with weak priors or influential observations. As a byproduct of our calculations, we also obtain approximate standard errors for estimated predictive errors and for comparing of predictive errors between two models. We implement the computations in an R package called 'loo' and demonstrate using models fit with the Bayesian inference package Stan.}, + archiveprefix = {arXiv}, + keywords = {Statistics - Computation,Statistics - Methodology}, + file = {/home/nathante/Zotero/storage/M5H8F7EZ/Vehtari et al_2017_Practical Bayesian model evaluation using leave-one-out cross-validation and.pdf;/home/nathante/Zotero/storage/PHFMKLFX/Vehtari et al. - 2017 - Practical Bayesian model evaluation using leave-on.pdf;/home/nathante/Zotero/storage/AW3FBQRP/1507.html;/home/nathante/Zotero/storage/LKATPX25/1507.html} +} + +@book{venables_modern_2002, + title = {Modern Applied Statistics with {{S}}}, + author = {Venables, W. N and Ripley, Brian D and Venables, W. N}, + year = {2002}, + publisher = {{Springer}}, + address = {{New York}}, + abstract = {S is a powerful environment for the statistical and graphical analysis of data. It provides the tools to implement many statistical ideas that have been made possible by the widespread availability of workstations having good graphics and computational capabilities. This book is a guide to using S environments to perform statistical analyses and provides both an introduction to the use of S and a course in modern statistical methods. Implementations of S are available commercially in S-PLUS(R) workstations and as the Open Source R for a wide range of computer systems. The aim of this book is to show how to use S as a powerful and graphical data analysis system. Readers are assumed to have a basic grounding in statistics, and so the book is intended for would-be users of S-PLUS or R and both students and researchers using statistics. Throughout, the emphasis is on presenting practical problems and full analyses of real data sets. Many of the methods discussed are state of the art approaches to topics such as linear, nonlinear and smooth regression models, tree-based methods, multivariate analysis, pattern recognition, survival analysis, time series and spatial statistics. Throughout modern techniques such as robust methods, non-parametric smoothing and bootstrapping are used where appropriate. This fourth edition is intended for users of S-PLUS 6.0 or R 1.5.0 or later. A substantial change from the third edition is updating for the current versions of S-PLUS and adding coverage of R. The introductory material has been rewritten to emphasis the import, export and manipulation of data. Increased computational power allows even more computer-intensive methods to be used, and methods such as GLMMs.}, + isbn = {9780387954578 9786610189373 9781280189371}, + langid = {english}, + annotation = {OCLC: 1058013209} +} + +@article{volsky_quality_2012, + ids = {volsky_quality_2012-1}, + title = {Quality of {{Internet}} Information in Pediatric Otolaryngology: {{A}} Comparison of Three Most Referenced Websites}, + shorttitle = {Quality of {{Internet}} Information in Pediatric Otolaryngology}, + author = {Volsky, Peter G. and Baldassari, Cristina M. and Mushti, Sirisha and Derkay, Craig S.}, + year = {2012}, + month = sep, + journal = {International Journal of Pediatric Otorhinolaryngology}, + volume = {76}, + number = {9}, + pages = {1312--1316}, + issn = {0165-5876}, + abstract = {Objective Patients commonly refer to Internet health-related information. To date, no quantitative comparison of the accuracy and readability of common diagnoses in Pediatric Otolaryngology exist. Study aims: (1) identify the three most frequently referenced Internet sources; (2) compare the content accuracy and (3) ascertain user-friendliness of each site; (4) inform practitioners and patients of the quality of available information. Methods Twenty-four diagnoses in pediatric otolaryngology were entered in Google and the top five URLs for each were ranked. Articles were accessed for each topic in the three most frequently referenced sites. Standard rubrics were developed to include proprietary scores for content, errors, navigability, and validated metrics of readability. Results Wikipedia, eMedicine, and NLM/NIH MedlinePlus were the most referenced sources. For content accuracy, eMedicine scored highest (84\%; p \< 0.05) over MedlinePlus (49\%) and Wikipedia (46\%). The highest incidence of errors and omissions per article was found in Wikipedia (0.98 {$\pm$} 0.19), twice more than eMedicine (0.42 {$\pm$} 0.19; p \< 0.05). Errors were similar between MedlinePlus and both eMedicine and Wikipedia. On ratings for user interface, which incorporated Flesch\textendash Kinkaid Reading Level and Flesch Reading Ease, MedlinePlus was the most user-friendly (4.3 {$\pm$} 0.29). This was nearly twice that of eMedicine (2.4 {$\pm$} 0.26) and slightly greater than Wikipedia (3.7 {$\pm$} 0.3). All differences were significant (p \< 0.05). There were 7 topics for which articles were not available on MedlinePlus. Conclusions Knowledge of the quality of available information on the Internet improves pediatric otolaryngologists' ability to counsel parents. The top web search results for pediatric otolaryngology diagnoses are Wikipedia, MedlinePlus, and eMedicine. Online information varies in quality, with a 46\textendash 84\% concordance with current textbooks. eMedicine has the most accurate, comprehensive content and fewest errors, but is more challenging to read and navigate. Both Wikipedia and MedlinePlus have lower content accuracy and more errors, however MedlinePlus is simplest of all to read, at a 9th Grade level.}, + file = {/home/nathante/Zotero/storage/KQ3G6CNY/Volsky et al_2012_Quality of Internet information in pediatric otolaryngology.pdf;/home/nathante/Zotero/storage/UMX6FM8I/S0165587612003369.html} +} + +@article{warncke-wang_misalignment_2015-1, + title = {Misalignment {{Between Supply}} and {{Demand}} of {{Quality Content}} in {{Peer Production Communities}}}, + author = {{Warncke-Wang}, Morten and Ranjan, Vivek and Terveen, Loren and Hecht, Brent}, + year = {2015}, + month = apr, + journal = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {9}, + number = {1}, + issn = {2334-0770}, + copyright = {Copyright (c)}, + langid = {english}, + keywords = {Wikipedia}, + file = {/home/nathante/Zotero/storage/EUK8XAYT/Warncke-Wang et al_2015_Misalignment Between Supply and Demand of Quality Content in Peer Production.pdf} +} + +@inproceedings{warncke-wang_success_2015, + ids = {warncke-wang_success_2015-1}, + title = {The {{Success}} and {{Failure}} of {{Quality Improvement Projects}} in {{Peer Production Communities}}}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {{Warncke-Wang}, Morten and Ayukaev, Vladislav R. and Hecht, Brent and Terveen, Loren G.}, + year = {2015}, + month = feb, + series = {{{CSCW}} '15}, + pages = {743--756}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Peer production communities have been proven to be successful at creating valuable artefacts, with Wikipedia as a prime example. However, a number of studies have shown that work in these communities tends to be of uneven quality and certain content areas receive more attention than others. In this paper, we examine the efficacy of a range of targeted strategies to increase the quality of under-attended content areas in peer production communities. Mining data from five quality improvement projects in the English Wikipedia, the largest peer production community in the world, we show that certain types of strategies (e.g. creating artefacts from scratch) have better quality outcomes than others (e.g. improving existing artefacts), even if both are done by a similar cohort of participants. We discuss the implications of our findings for Wikipedia as well as other peer production communities.}, + isbn = {978-1-4503-2922-4}, + keywords = {peer production,quality modelling,user-generated content,wikipedia}, + file = {/home/nathante/Zotero/storage/7RKRZ5J9/Warncke-Wang et al_2015_The Success and Failure of Quality Improvement Projects in Peer Production.pdf;/home/nathante/Zotero/storage/XXZ6US6B/Warncke-Wang et al_2015_The Success and Failure of Quality Improvement Projects in Peer Production.pdf} +} + +@inproceedings{warncke-wang_tell_2013, + title = {Tell Me More: An Actionable Quality Model for {{Wikipedia}}}, + shorttitle = {Tell Me More}, + booktitle = {Proceedings of the 9th {{International Symposium}} on {{Open Collaboration}}}, + author = {{Warncke-Wang}, Morten and Cosley, Dan and Riedl, John}, + year = {2013}, + month = aug, + series = {{{WikiSym}} '13}, + pages = {1--10}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {In this paper we address the problem of developing actionable quality models for Wikipedia, models whose features directly suggest strategies for improving the quality of a given article. We first survey the literature in order to understand the notion of article quality in the context of Wikipedia and existing approaches to automatically assess article quality. We then develop classification models with varying combinations of more or less actionable features, and find that a model that only contains clearly actionable features delivers solid performance. Lastly we discuss the implications of these results in terms of how they can help improve the quality of articles across Wikipedia.}, + isbn = {978-1-4503-1852-5}, + keywords = {classification,flaw detection,information quality,machine learning,modelling,Wikipedia}, + file = {/home/nathante/Zotero/storage/E3GPHFKF/Warncke-Wang et al_2013_Tell me more.pdf} +} + +@inproceedings{west_drawing_2012, + title = {Drawing a Data-Driven Portrait of {{Wikipedia}} Editors}, + booktitle = {Proceedings of the {{Eighth Annual International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {West, Robert and Weber, Ingmar and Castillo, Carlos}, + year = {2012}, + month = aug, + series = {{{WikiSym}} '12}, + pages = {1--10}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {While there has been a substantial amount of research into the editorial and organizational processes within Wikipedia, little is known about how Wikipedia editors (Wikipedians) relate to the online world in general. We attempt to shed light on this issue by using aggregated log data from Yahoo!'s browser toolbar in order to analyze Wikipedians' editing behavior in the context of their online lives beyond Wikipedia. We broadly characterize editors by investigating how their online behavior differs from that of other users; e.g., we find that Wikipedia editors search more, read more news, play more games, and, perhaps surprisingly, are more immersed in popular culture. Then we inspect how editors' general interests relate to the articles to which they contribute; e.g., we confirm the intuition that editors are more familiar with their active domains than average users. Finally, we analyze the data from a temporal perspective; e.g., we demonstrate that a user's interest in the edited topic peaks immediately before the edit. Our results are relevant as they illuminate novel aspects of what has become many Web users' prevalent source of information.}, + isbn = {978-1-4503-1605-7}, + keywords = {editors,expertise,web usage,Wikipedia}, + file = {/home/nathante/Zotero/storage/HS7786WY/West et al_2012_Drawing a data-driven portrait of Wikipedia editors.pdf} +} + +@inproceedings{wilkinson_cooperation_2007-1, + title = {Cooperation and Quality in {{Wikipedia}}}, + booktitle = {Proceedings of the 2007 {{International Symposium}} on {{Wikis}}}, + author = {Wilkinson, Dennis M. and Huberman, Bernardo A.}, + year = {2007}, + series = {{{WikiSym}} '07}, + pages = {157--164}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {The rise of the Internet has enabled collaboration and cooperation on anunprecedentedly large scale. The online encyclopedia Wikipedia, which presently comprises 7.2 million articles created by 7.04 million distinct editors, provides a consummate example. We examined all 50 million edits made tothe 1.5 million English-language Wikipedia articles and found that the high-quality articles are distinguished by a marked increase in number of edits, number of editors, and intensity of cooperative behavior, as compared to other articles of similar visibility and age. This is significant because in other domains, fruitful cooperation has proven to be difficult to sustain as the size of the collaboration increases. Furthermore, in spite of the vagaries of human behavior, we show that Wikipedia articles accrete edits according to a simple stochastic mechanism in which edits beget edits. Topics of high interest or relevance are thus naturally brought to the forefront of quality.}, + isbn = {978-1-59593-861-9}, + keywords = {collaborative authoring,cooperation,groupware,Wikipedia}, + file = {/home/nathante/Zotero/storage/BA4AU4F9/Wilkinson and Huberman - 2007 - Cooperation and Quality in Wikipedia.pdf;/home/nathante/Zotero/storage/WSPRZK54/Wilkinson_and_Huberman-2007-Cooperation_and_quality_wikipedia.pdf} +} + +@misc{yeates_re_2020, + title = {Re: [{{Wiki-research-l}}] {{How}} to Quantifying "Effort" or "Time Spent" Put into Articles?}, + shorttitle = {Reply on {{Wiki-research-l}}}, + author = {Yeates, Stuart}, + year = {2020}, + month = oct +} + +@article{zhang_crowd_2017, + title = {Crowd {{Development}}: {{The Interplay}} between {{Crowd Evaluation}} and {{Collaborative Dynamics}} in {{Wikipedia}}}, + shorttitle = {Crowd {{Development}}}, + author = {Zhang, Ark Fangzhou and Livneh, Danielle and Budak, Ceren and Robert, Lionel P. and Romero, Daniel M.}, + year = {2017}, + month = dec, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {1}, + number = {CSCW}, + pages = {1--21}, + issn = {2573-0142}, + langid = {english}, + file = {/home/nathante/Zotero/storage/3J2SN8YD/Zhang et al. - 2017 - Crowd Development The Interplay between Crowd Eva.pdf} +} + +@inproceedings{zhang_history-based_2018, + title = {History-{{Based Article Quality Assessment}} on {{Wikipedia}}}, + booktitle = {2018 {{IEEE International Conference}} on {{Big Data}} and {{Smart Computing}} ({{BigComp}})}, + author = {Zhang, Shiyue and Hu, Zheng and Zhang, Chunhong and Yu, Ke}, + year = {2018}, + month = jan, + pages = {1--8}, + issn = {2375-9356}, + abstract = {Wikipedia is widely considered as the biggest encyclopedia on Internet. Quality assessment of articles on Wikipedia has been studied for years. Conventional methods addressed this task by feature engineering and statistical machine learning algorithms. However, manually defined features are difficult to represent the long edit history of an article. Recently, researchers proposed an end-to-end neural model which used a Recurrent Neural Network(RNN) to learn the representation automatically. Although RNN showed its power in modeling edit history, the end-to-end method is time and resource consuming. In this paper, we propose a new history-based method to represent an article. We also take advantage of an RNN to handle the long edit history, but we do not abandon feature engineering. We still represent each revision of an article by manually defined features. This combination of deep neural model and feature engineering enables our model to be both simple and effective. Experiments demonstrate our model has better or comparable performance than previous works, and has the potential to work as a real-time service. Plus, we extend our model to do quality prediction.}, + keywords = {Electronic publishing,Encyclopedias,Feature extraction,History,Information Quality,Internet,LSTM,Quality assessment,Wikipedia}, + file = {/home/nathante/Zotero/storage/JVIN5RGA/Zhang et al_2018_History-Based Article Quality Assessment on Wikipedia.pdf;/home/nathante/Zotero/storage/XDSP7EI9/8367090.html} +} + + diff --git a/dissertations/nathante_uw_2021/cdsc-memoir.sty b/dissertations/nathante_uw_2021/cdsc-memoir.sty new file mode 100644 index 0000000..5e8162d --- /dev/null +++ b/dissertations/nathante_uw_2021/cdsc-memoir.sty @@ -0,0 +1,209 @@ +% Some article styles and page layout tweaks for the LaTeX Memoir +% class. +% +% Copyright 2009-2018 Benjamin Mako Hill +% Copyright 2008-2009 Kieran Healy + +% Distributed as free software under the GNU GPL v3 + +% This file was originally based on one by Kieran Healy +% available here: http://github.com/kjhealy/latex-custom-kjh/ + +%%% Custom styles for headers and footers +%%% Basic + +\makepagestyle{cdsc-page} +%\makeevenfoot{cdsc-page}{\thepage}{}{} +%\makeoddfoot{cdsc-page}{}{}{\thepage} +%\makeheadrule{cdsc-page}{\textwidth}{\normalrulethickness} +\newcommand{\@cdscmarks}{% + \let\@mkboth\markboth + \def\chaptermark##1{% + \markboth{% + \ifnum \c@secnumdepth >\m@ne + \if@mainmatter + \thechapter. \ % + \fi + \fi + ##1}{}} + \def\sectionmark##1{% + \markright{##1}} +} +\makepsmarks{cdsc-page}{\@cdscmarks} +\makepsmarks{cdsc-page}{} +\makeevenhead{cdsc-page}{}{}{\scshape\thepage} +\makeoddhead{cdsc-page}{}{}{\scshape\thepage} + +%%% version control info in footers; requires vc package +% Make the style for vc-git revision control headers and footers +\makepagestyle{cdsc-page-git} +\newcommand{\@gitmarks}{% + \let\@mkboth\markboth + \def\chaptermark##1{% + \markboth{% + \ifnum \c@secnumdepth >\m@ne + \if@mainmatter + \thechapter. \ % + \fi + \fi + ##1}{}} + \def\sectionmark##1{% + \markright{##1}} +} +\makepsmarks{cdsc-page-git}{\@gitmarks} +\makeevenhead{cdsc-page-git}{}{}{\scshape\thepage} +\makeoddhead{cdsc-page-git}{}{}{\scshape\thepage} +\makeevenfoot{cdsc-page-git}{}{\texttt{\footnotesize{\textcolor{BrickRed}{git revision \VCRevision\ on \VCDateTEX}}}}{} +\makeoddfoot{cdsc-page-git}{}{\texttt{\footnotesize \textcolor{BrickRed}{git revision \VCRevision\ on \VCDateTEX}}}{} + +%%% print a datestamp from ShareLaTeX +\makepagestyle{cdsc-page-overleaf} +\newcommand{\@slmarks}{% + \let\@mkboth\markboth + \def\chaptermark##1{% + \markboth{% + \ifnum \c@secnumdepth >\m@ne + \if@mainmatter + \thechapter. \ % + \fi + \fi + ##1}{}} + \def\sectionmark##1{% + \markright{##1}} +} +\makepsmarks{cdsc-page-overleaf}{\@slmarks} +\makeevenhead{cdsc-page-overleaf}{}{}{\scshape\thepage} +\makeoddhead{cdsc-page-overleaf}{}{}{\scshape\thepage} +\makeevenfoot{cdsc-page-overleaf}{}{\texttt{\footnotesize{\textcolor{BrickRed}{Buildstamp/Version:~\pdfdate}}}}{} +\makeoddfoot{cdsc-page-overleaf}{}{\texttt{\footnotesize{\textcolor{BrickRed}{Buildstamp/Version:~\pdfdate}}}}{} + +%% Create a command to make a note at the top of the first page describing the +%% publication status of the paper. +\newcommand{\published}[1]{% + \gdef\puB{#1}} + \newcommand{\puB}{} + \renewcommand{\maketitlehooka}{% + \par\noindent\footnotesize \puB} + +\makepagestyle{cdsc-page-memo} +\makeevenhead{cdsc-page-memo}{}{}{} +\makeoddhead{cdsc-page-memo}{}{}{} +\makeevenfoot{cdsc-page-memo}{}{\scshape \thepage/\pageref{LastPage}}{} +\makeoddfoot{cdsc-page-memo}{}{\scshape \thepage/\pageref{LastPage}}{} + +\usepackage{lastpage} +\usepackage{datetime} + +% blank footnote +% Use \symbolfootnote[0]{Footnote text} for a blank footnote. +% Useful for initial acknowledgment note. +\long\def\symbolfootnote[#1]#2{\begingroup% +\def\thefootnote{\fnsymbol{footnote}}\footnote[#1]{#2}\endgroup} + +% put a period after the section numbers +\setsecnumformat{\csname the#1\endcsname.\enspace} + +% set fonts to garamond and helvetica +\renewcommand{\rmdefault}{ugm} +\renewcommand{\sfdefault}{phv} + +% material shared between the two modes + +\setsubsecheadstyle{\normalsize\itshape} +\setaftersubsubsecskip{-1em} +\setsubsubsecheadstyle{\small\bfseries} +\renewcommand{\printchaptername}{} +\renewcommand{\chapternamenum}{} +\renewcommand{\chapnumfont}{\chaptitlefont} +\renewcommand{\printchapternum}{\chapnumfont \thechapter\space} +\renewcommand{\afterchapternum}{} +\renewcommand{\printchaptername}{\secheadstyle} +\renewcommand{\cftchapterfont}{\normalfont} +\renewcommand{\cftchapterpagefont}{\normalfont\scshape} +\renewcommand{\cftchapterpresnum}{\scshape} +\captiontitlefont{\small} + +% turn off chapter numbering +% \counterwithout{section}{chapter} +% \counterwithout{figure}{chapter} +% \counterwithout{table}{chapter} + +% supress chapter numbers +% \maxsecnumdepth{chapter} +% \setsecnumdepth{chapter} + +% for numbered sections and subsections: +% (a) comment out the above stanza; (b) uncomment the one below +% \maxsecnumdepth{subsection} +% \setsecnumdepth{subsection} + +% set name of bibliography to 'references' +\renewcommand{\bibname}{References} + +% >> cdsc-article << +\makechapterstyle{cdsc-article}{ + + % section heading sytle + \setsecheadstyle{\large\scshape} + + % reduce skip after section heading + \setaftersecskip{1.7ex} + + % Title flush left + \pretitle{\flushleft\LARGE \itshape} + \posttitle{\par\vskip 0.5em} + \preauthor{\flushleft \large \lineskip 1em} + \postauthor{\par\lineskip 1em} + \predate{\flushleft\footnotesize\vspace{0.65em}} + \postdate{\par\vskip 1em} + + % 'abstract' title, bigger skip from title + \renewcommand{\abstractname}{Abstract:} + \renewcommand{\abstractnamefont}{\normalfont\small\bfseries} + \renewcommand{\abstracttextfont}{\normalfont\small} + \setlength{\absparindent}{0em} + \setlength{\abstitleskip}{-1.5em} + \abstractrunin + + % this is the default page style for chapters + \pagestyle{cdsc-page} + +} + +% >> cdsc-memo << +\makechapterstyle{cdsc-memo}{ + + % section heading sytle + \setsecheadstyle{\large\sffamily\bfseries\MakeUppercase} + + % reduce skip after section heading + \setaftersecskip{1pt} + \setbeforesecskip{-1em} + \setaftersubsecskip{1pt} + \setbeforesubsecskip{-1em} + % \setaftersubsubsecskip{1pt} + % \setbeforesubsubsecskip{-1em} + + + % 'abstract' title, bigger skip from title + % \renewcommand{\maketitle}{\{\preauthor \theauthor\} \hfill \thetitle} + \renewcommand{\maketitle}{ + {\Large\sffamily\bfseries\MakeUppercase\thetitle} \hfill + {\Large\sffamily\MakeUppercase\theauthor} + \vskip 0.7em} + \renewcommand{\abstractname}{\normalfont\scriptsize\noindent} + \renewcommand{\abstracttextfont}{\normalfont\scriptsize} + \abstractrunin + + % set name of bibliography to 'references' + \renewcommand{\bibname}{References} + + \parindent 0pt + + % this is the default page style for chapters + \pagestyle{cdsc-page-memo} + +} + +\endinput + diff --git a/dissertations/nathante_uw_2021/ch1_intro.bib b/dissertations/nathante_uw_2021/ch1_intro.bib new file mode 100644 index 0000000..ae9f196 --- /dev/null +++ b/dissertations/nathante_uw_2021/ch1_intro.bib @@ -0,0 +1,1698 @@ + +@book{aldrich_organizations_2006, + title = {Organizations {{Evolving}}}, + author = {Aldrich, H.E. and Ruef, M.}, + date = {2006}, + edition = {2}, + publisher = {{SAGE Publications}}, + location = {{Thousand Oaks, CA}}, + isbn = {978-1-4129-1047-7} +} + +@inproceedings{arazy_functional_2015, + title = {Functional Roles and Career Paths in {{Wikipedia}}}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Arazy, Ofer and Ortega, Felipe and Nov, Oded and Yeo, Lisa and Balila, Adam}, + date = {2015}, + series = {{{CSCW}} '15}, + pages = {1092--1105}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {An understanding of participation dynamics within online production communities requires an examination of the roles assumed by participants. Recent studies have established that the organizational structure of such communities is not flat; rather, participants can take on a variety of well-defined functional roles. What is the nature of functional roles? How have they evolved? And how do participants assume these functions? Prior studies focused primarily on participants' activities, rather than functional roles. Further, extant conceptualizations of role transitions in production communities, such as the Reader to Leader framework, emphasize a single dimension: organizational power, overlooking distinctions between functions. In contrast, in this paper we empirically study the nature and structure of functional roles within Wikipedia, seeking to validate existing theoretical frameworks. The analysis sheds new light on the nature of functional roles, revealing the intricate "career paths" resulting from participants' role transitions.}, + isbn = {978-1-4503-2922-4}, + file = {/home/nathante/Zotero/storage/ZRNAAPUH/Arazy et al. - 2015 - Functional roles and career paths in Wikipedia.pdf} +} + +@inproceedings{arazy_how_2017, + ids = {arazy2017and}, + title = {On the "How" and "Why" of Emergent Role Behaviors in {{Wikipedia}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}} - {{CSCW}} '17}, + author = {Arazy, Ofer and Liifshitz-Assaf, Hila and Nov, Oded and Daxenberger, Johannes and Balestra, Martina and Cheshire, Coye}, + date = {2017}, + pages = {2039--2051}, + publisher = {{ACM Press}}, + location = {{Portland, Oregon, USA}}, + abstract = {Research on peer-production suggests that as participants choose what actions to perform, prototypical activity patterns emerge. Recent work characterized these patterns and demonstrated that informal emergent roles are highly stable. Nonetheless, we know little about the ways in which contributors take on and shed emergent roles. The objectives of this study are to: (a) delineate the temporal dynamics of participants’ emergent role taking behaviors, and (b) identify the motivations driving role-transition behaviors. Our study links motivation to role-transition behaviors within Wikipedia. Our first sample covered eleven years and 222,119 contributors, and was used to identify four categories of temporal role-taking behaviors, that differ in their mobility between emergent roles and across Wikipedia articles. Our second examination linked the motivations of 175 new participants to their subsequent role-taking activity over 14 months. Together, the two analyses reveal that role-taking categories can be distinguished based on participants’ motivational orientation (intrinsic/extrinsic and self/others-oriented).}, + eventtitle = {The 2017 {{ACM Conference}}}, + isbn = {978-1-4503-4335-0}, + langid = {english}, + file = {/home/nathante/Zotero/storage/ZJ25SYGV/Arazy et al. - 2017 - On the How and Why of Emergent Role Behaviors .pdf} +} + +@article{arazy_turbulent_2016, + title = {Turbulent {{Stability}} of {{Emergent Roles}}: {{The Dualistic Nature}} of {{Self-Organizing Knowledge Coproduction}}}, + shorttitle = {Turbulent {{Stability}} of {{Emergent Roles}}}, + author = {Arazy, Ofer and Daxenberger, Johannes and Lifshitz-Assaf, Hila and Nov, Oded and Gurevych, Iryna}, + date = {2016-12}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {27}, + number = {4}, + pages = {792--812}, + issn = {1047-7047, 1526-5536}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GJBJ39Q9/Arazy et al. - 2016 - Turbulent Stability of Emergent Roles The Dualist.pdf} +} + +@article{armstrong_competitive_1980, + ids = {armstrong_competitive_1980-1}, + title = {Competitive {{Exclusion}}}, + author = {Armstrong, Robert A. and McGehee, Richard}, + date = {1980-02-01}, + journaltitle = {The American Naturalist}, + shortjournal = {The American Naturalist}, + volume = {115}, + number = {2}, + pages = {151--170}, + publisher = {{The University of Chicago Press}}, + issn = {0003-0147}, + abstract = {Recent developments in the mathematical theory of competitive exclusion are discussed and placed in historical perspective. The models which have been used in theoretical investigations of competitive exclusion are classified into two groups: those in which the resources regenerate according to an algebraic relationship (abiotic resource models), and those in which resource regeneration is governed by differential equations (biotic resource models). We then propose a mathematical framework for considering problems of competitive exclusion, and provide examples in which n competitors can coexist on k {$<$} n resources (both biotic and abiotic). These systems persist because of internally generated cyclic behavior. We conclude that the competitive exclusion principle applies in general only to coexistence at fixed densities.}, + file = {/home/nathante/Zotero/storage/WY46EPM3/Nat - 2021 - Competitive Exclusion.pdf;/home/nathante/Zotero/storage/6RRFPS4Z/283553.html} +} + +@article{astley_two_1985, + title = {The {{Two Ecologies}}: {{Population}} and {{Community Perspectives}} on {{Organizational Evolution}}}, + shorttitle = {The {{Two Ecologies}}}, + author = {Astley, W. Graham}, + date = {1985}, + journaltitle = {Administrative Science Quarterly}, + volume = {30}, + number = {2}, + eprint = {2393106}, + eprinttype = {jstor}, + pages = {224--241}, + issn = {0001-8392}, + abstract = {This paper distinguishes between two ecological perspectives on organizational evolution: population ecology and community ecology. The perspectives adopt different levels of analysis and produce contrasting views of the characteristic mode and tempo of organizational evolution. Population ecology limits investigation to evolutionary change unfolding within established populations, emphasizing factors that homogenize organizational forms and maintain population stability. Population ecology thus fails to explain how populations originate in the first place or how evolutionary change occurs through the proliferation of heterogeneous organizational types. Community ecology overcomes these limitations: it focuses on the rise and fall of populations as basic units of evolutionary change, simultaneously explaining forces that produce homogeneity and stability within populations and heterogeneity between them.}, + file = {/home/nathante/Zotero/storage/4Q76BREE/Astley - 1985 - The Two Ecologies Population and Community Perspe.pdf} +} + +@inproceedings{balestra_investigating_2017, + title = {Investigating the {{Motivational Paths}} of {{Peer Production Newcomers}}}, + booktitle = {Proceedings of the 2017 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Balestra, Martina and Cheshire, Coye and Arazy, Ofer and Nov, Oded}, + date = {2017}, + series = {{{CHI}} '17}, + pages = {6381--6385}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Maintaining participation beyond the initial period of engagement is critical for peer production systems. Theory suggests that an increase in motivation is expected with contributors' movement from the community periphery to the core. Less is known, however, about how specific motivations change over time. We fill this gap by focusing on individual motivational paths in the formative periods of engagement, exploring which motivations change and how. We collected data on various instrumental and non-instrumental motivations at two points in study participants? Wikipedia career: when they started editing and again after six months. We found that non-instrumental motivations (including collective and intrinsic motives) decreased significantly over time, in contrast with socially-driven motivations such as norm-oriented motivates which did not change and social motives which increased marginally. The findings offer new insights into newcomers' evolving motivations, with implications for designing and managing peer-production systems.}, + isbn = {978-1-4503-4655-9}, + file = {/home/nathante/Zotero/storage/2E3UFPMA/Balestra et al. - 2017 - Investigating the Motivational Paths of Peer Produ.pdf} +} + +@article{barnett_competition_1987, + title = {Competition and Mutualism among Early Telephone Companies}, + author = {Barnett, William P. and Carroll, Glenn R.}, + date = {1987}, + journaltitle = {Administrative Science Quarterly}, + volume = {32}, + number = {3}, + eprint = {2392912}, + eprinttype = {jstor}, + pages = {400--421}, + issn = {0001-8392}, + abstract = {In an exploratory study of the early telephone industry, we search for evidence of competition and mutualism between legally autonomous companies. Neighboring companies are found to have both types of interdependencies, although their exact nature depends on organizational form. Companies in separate geographical locations are found to be competitive with each other, regardless of organizational form. The two prevalent organizational forms in the industry at this time each apparently flourished in distinct niches and were symbiotically related. The findings are interpreted within a community ecology framework.} +} + +@article{baronchelli_emergence_2018, + title = {The Emergence of Consensus: A Primer}, + shorttitle = {The Emergence of Consensus}, + author = {Baronchelli, Andrea}, + date = {2018-02-01}, + journaltitle = {Open Science}, + volume = {5}, + number = {2}, + pages = {172189}, + issn = {2054-5703}, + abstract = {The origin of population-scale coordination has puzzled philosophers and scientists for centuries. Recently, game theory, evolutionary approaches and complex systems science have provided quantitative insights on the mechanisms of social consensus. However, the literature is vast and widely scattered across fields, making it hard for the single researcher to navigate it. This short review aims to provide a compact overview of the main dimensions over which the debate has unfolded and to discuss some representative examples. It focuses on those situations in which consensus emerges ‘spontaneously’ in the absence of centralized institutions and covers topics that include the macroscopic consequences of the different microscopic rules of behavioural contagion, the role of social networks and the mechanisms that prevent the formation of a consensus or alter it after it has emerged. Special attention is devoted to the recent wave of experiments on the emergence of consensus in social systems.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/BCQ4892J/Baronchelli - 2018 - The emergence of consensus a primer.pdf;/home/nathante/Zotero/storage/WPXC9FJ7/172189.html} +} + +@incollection{baum_ecological_2006, + title = {Ecological Approaches to Organizations}, + booktitle = {Sage {{Handbook}} for {{Organization Studies}}}, + author = {Baum, Joel A. C. and Shipilov, Andrew V.}, + date = {2006}, + pages = {55--110}, + publisher = {{Sage}}, + location = {{Rochester, NY}}, + abstract = {Our goal is to assess and consolidate the current state-of-the-art in organizational ecology. To accomplish this we review major theoretical statements, empirical studies, and arguments that are now being made. Although we attempt to survey ecological approaches to organizations comprehensively, because ecological research now constitutes a very large body of work, and because other extensive reviews are available (Aldrich \& Wiedenmayer, 1993; Barnett \& Carroll, 1995; Baum, 1996; Baum \& Amburgey, 2002; Baum \& Rao, 2004; Carroll, Dobrev \& Swaminathan, 2002; Galunic \& Weeks 2002; Rao, 2002; Singh \& Lumsden, 1990), we emphasize recent work that challenges and extends established theory and highlight new and emerging directions for future research that appear promising. Our appraisal focuses on two main themes - demographic processes and ecological processes.}, + file = {/home/nathante/Zotero/storage/EGQC2W5I/Baum and Shipilov - 2006 - Ecological approaches to organizations.pdf;/home/nathante/Zotero/storage/38MBRGMQ/papers.html} +} + +@incollection{benkler_peer_2015, + title = {Peer Production: {{A}} Form of Collective Intelligence}, + booktitle = {Handbook of {{Collective Intelligence}}}, + author = {Benkler, Yochai and Shaw, Aaron and Hill, Benjamin Mako}, + editor = {Malone, Thomas W. and Bernstein, Michael S.}, + date = {2015}, + pages = {175--204}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-02981-0}, + langid = {english}, + file = {/home/nathante/Zotero/storage/SKULU2E6/Benkler et al. - 2015 - Peer production A form of collective intelligence.pdf} +} + +@book{benkler_wealth_2006, + title = {The Wealth of Networks: {{How}} Social Production Transforms Markets and Freedom}, + author = {Benkler, Yochai}, + date = {2006}, + publisher = {{Yale University Press}}, + location = {{New Haven, CT}}, + pagetotal = {528}, + keywords = {bookReview,Economics,FOSS,foundations of social computing,import,Innovation,Legal Studies,peer production} +} + +@book{bimber_collective_2012, + ids = {bimber_collective_2012-1}, + title = {Collective Action in Organizations: {{Interaction}} and Engagement in an Era of Technological Change}, + shorttitle = {Collective Action in Organizations}, + author = {Bimber, Bruce A. and Flanagin, Andrew J. and Stohl, Cynthia}, + date = {2012}, + publisher = {{Cambridge University Press}}, + location = {{New York, NY}}, + abstract = {"This book explores how people participate in public life through organizations. The authors examine The American Legion, AARP, and MoveOn, and show surprising similarities across these three organizations"--Provided by publisher. "This book offers a new theory of collective action for the age of digital media, attesting to the continued relevance of formal organizations in a time when digital media can make it seem that organizations are outdated. The authors examine the dynamics of membership in three distinctive organizations: The American Legion, AARP, and MoveOn. They develop the theory of Collective Action Space to demonstrate the important dimensions of membership and use survey and interview data to explore commonalities across the organizations, each of which exhibits four, ♯p︢articipatory styles., ♯ ̮The book shows that predictors of participation vary greatly across participatory styles, and rather little across organizations. The book wrestles with a crucial feature of contemporary collective action, wherein technology does not necessarily make people participate more, but people consistently use technology when they participate. The result is a theoretically rich and empirically fresh portrait of collective action, organization, and technology"--Provided by publisher.}, + isbn = {978-0-521-19172-2}, + langid = {english}, + pagetotal = {224} +} + +@inproceedings{bryant_becoming_2005, + title = {Becoming {{Wikipedian}}: Transformation of Participation in a Collaborative Online Encyclopedia}, + shorttitle = {Becoming {{Wikipedian}}}, + booktitle = {Proceedings of the 2005 {{International ACM SIGGROUP Conference}} on {{Supporting Group Work}}}, + author = {Bryant, Susan L. and Forte, Andrea and Bruckman, Amy}, + date = {2005}, + series = {{{GROUP}} '05}, + pages = {1--10}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Traditional activities change in surprising ways when computer-mediated communication becomes a component of the activity system. In this descriptive study, we leverage two perspectives on social activity to understand the experiences of individuals who became active collaborators in Wikipedia, a prolific, cooperatively-authored online encyclopedia. Legitimate peripheral participation provides a lens for understanding participation in a community as an adaptable process that evolves over time. We use ideas from activity theory as a framework to describe our results. Finally, we describe how activity on the Wikipedia stands in striking contrast to traditional publishing and suggests a new paradigm for collaborative systems.}, + isbn = {1-59593-223-2}, + keywords = {activity theory,community,legitimate peripheral participation,qualitative,Wiki,wikipedia}, + file = {/home/nathante/Zotero/storage/VJXQFTDD/Bryant et al. - 2005 - Becoming Wikipedian transformation of participati.pdf} +} + +@article{burgelman_intraorganizational_1991, + title = {Intraorganizational {{Ecology}} of {{Strategy Making}} and {{Organizational Adaptation}}: {{Theory}} and {{Field Research}}}, + shorttitle = {Intraorganizational {{Ecology}} of {{Strategy Making}} and {{Organizational Adaptation}}}, + author = {Burgelman, Robert A.}, + date = {1991-08-01}, + journaltitle = {Organization Science}, + volume = {2}, + number = {3}, + pages = {239--262}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {This paper presents an intraorganizational ecological perspective on strategy making, and examines how internal selection may combine with external selection to explain organizational change and survival. The perspective serves to illuminate data from a field study of the evolution of Intel Corporation's corporate strategy. The data, in turn, are used to refine and deepen the conceptual framework. Relationships between induced and autonomous strategic processes and four modes of organizational adaptation are discussed. Apparent paradoxes associated with structural inertia and strategic reorientation arguments are elucidated and several new propositions derived. The paper proposes that consistently successful organizations are characterized by top managements who spend efforts on building the induced and autonomous strategic processes, as well as concerning themselves with the content of strategy; that such organizations simultaneously exercise induced and autonomous processes; and that successful reorientations in organizations are likely to have been preceded by internal experimentation and selection processes effected through the autonomous process.}, + keywords = {corporate strategy,evolutionary management,organizational ecology,selection and adaptation} +} + +@inbook{burgess_computational_2018, + ids = {foote_computational_2017}, + title = {A Computational Analysis of Social Media Scholarship}, + booktitle = {The {{SAGE Handbook}} of {{Social Media}}}, + author = {Foote, Jeremy and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + pages = {111--134}, + publisher = {{SAGE Publications Ltd}}, + location = {{1 Oliver's Yard,~55 City Road~London~EC1Y 1SP}}, + abstract = {Data from social media platforms and online communities have fueled the growth of computational social science. In this chapter, we use computational analysis to characterize the state of research on social media and demonstrate the utility of such methods. First, we discuss how to obtain datasets from the APIs published by many social media platforms. Then, we perform some of the most widely used computational analyses on a dataset of social media scholarship we extract from the Scopus bibliographic database’s API. We apply three methods: network analysis, topic modeling using latent Dirichlet allocation, and statistical prediction using machine learning. For each technique, we explain the method and demonstrate how it can be used to draw insights from our dataset. Our analyses reveal overlapping scholarly communities studying social media. We find that early social media research applied social network analysis and quantitative methods, but the most cited and influential work has come from marketing and medical research. We also find that publication venue and, to a lesser degree, textual features of papers explain the largest variation in incoming citations. We conclude with some consideration of the limitations of computational research and future directions.}, + bookauthor = {Burgess, Jean and Marwick, Alice and Poell, Thomas}, + isbn = {978-1-4129-6229-2 978-1-4739-8406-6}, + langid = {english}, + file = {/home/nathante/Zotero/storage/W8C4ULRU/Foote et al. - 2018 - A Computational Analysis of Social Media Scholarsh.pdf} +} + +@article{butler_attraction-selection-attrition_2014, + title = {An Attraction-Selection-Attrition Theory of Online Community Size and Resilience}, + author = {Butler, Brian S. and Bateman, Patrick J. and Gray, Peter H. and Diamant, E. Ilana}, + date = {2014-09}, + journaltitle = {MIS Q.}, + volume = {38}, + number = {3}, + pages = {699--728}, + issn = {0276-7783}, + abstract = {Online discussion communities play an important role in the development of relationships and the transfer of knowledge within and across organizations. Their underlying technologies enhance these processes by providing infrastructures through which group-based communication can occur. Community administrators often make decisions about technologies with the goal of enhancing the user experience, but the impact of such decisions on how a community develops must also be considered. To shed light on this complex and under-researched phenomenon, we offer a model of key latent constructs influenced by technology choices and possible causal paths by which they have dynamic effects on communities. Two important community characteristics that can be impacted are community size (number of members) and community resilience (membership that is willing to remain involved with the community in spite of variability and change in the topics discussed). To model community development, we build on attraction-selection-attrition (ASA) theory, introducing two new concepts: participation costs (how much time and effort are required to engage with content provided in a community) and topic consistency cues (how strongly a community signals that topics that may appear in the future will be consistent with what it has hosted in the past). We use the proposed ASA theory of online communities (OCASA) to develop a simulation model of community size and resilience that affirms some conventional wisdom and also has novel and counterintuitive implications. Analysis of the model leads to testable new propositions about the causal paths by which technology choices affect the emergence of community size and community resilience, and associated implications for community sustainability.}, + file = {/home/nathante/Zotero/storage/292C8XTF/Butler et al. - 2014 - An Attraction-selection-attrition Theory of Online.pdf} +} + +@article{butler_cross-purposes_2011, + title = {The Cross-Purposes of Cross-Posting: Boundary Reshaping Behavior in Online Discussion Communities}, + shorttitle = {The Cross-Purposes of Cross-Posting}, + author = {Butler, Brian S. and Wang, Xiaoqing}, + date = {2011-09-15}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {23}, + pages = {993--1010}, + issn = {1047-7047}, + abstract = {Increasingly, online discussion communities are used to support activities ranging from software development to political campaigns. An important feature of an online discussion community is its content boundaries, which are individual perceptions of what materials and discussions are part of the community and what are not, and how that community is related to others within a larger system. Yet in spite of its importance, many community infrastructures allow individual participants to reshape content boundaries by simultaneously associating their contributions with multiple online discussion communities. This reshaping behavior is a controversial aspect of the creation and management of many types of online discussion communities. On one hand, many communities explicitly discourage boundary reshaping behaviors in their frequently asked questions or terms-of-use document. On the other hand, community infrastructures continue to allow such reshaping behaviors. To explain this controversy, we theorize how the extent of boundary reshaping in an online discussion community has simultaneously positive and negative effects on its member dynamics and responsiveness. We test predictions about the conflicting effects of reshaping behaviors with 60 months of longitudinal data from 140 USENET newsgroups, focusing on cross-posting activities as a form of reshaping behavior. Empirical results are consistent with the proposed hypotheses that reshaping behaviors within a discussion community affect member dynamics and community responsiveness in both positive and negative ways. Taken together, the findings highlight the boundary-related design challenges faced by managers seeking to support ongoing activity within online discussion communities.}, + issue = {3-part-2}, + file = {/home/nathante/Zotero/storage/MHIHVXMA/Butler and Wang - 2012 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/ZDTPFJP3/Butler and Wang - 2011 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/5XCPFJS9/isre.1110.html} +} + +@article{butler_membership_2001, + title = {Membership Size, Communication Activity, and Sustainability: {{A}} Resource-Based Model of Online Social Structures}, + shorttitle = {Membership {{Size}}, {{Communication Activity}}, and {{Sustainability}}}, + author = {Butler, Brian S.}, + date = {2001}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {12}, + number = {4}, + eprint = {23011457}, + eprinttype = {jstor}, + pages = {346--362}, + issn = {1047-7047}, + abstract = {As telecommunication networks become more common, there is an increasing interest in the factors underlying the development of online social structures. It has been proposed that these structures are new forms of organizing which are not subject to the same constraints as traditional social structures. However, from anecdotal evidence and case studies it is difficult to evaluate whether online social structures are subject to the same problems as traditional social structures. Drawing from prior studies of traditional social structures and empirical analyses of longitudinal data from a sample of Internet-based groups, this exploratory work considers the role of size and communication activity in sustainable online social structures. A resource-based theory of sustainable social structures is presented. Members contribute time, energy, and other resources, enabling a social structure to provide benefits for individuals. These benefits, which include information, influence, and social support, are the basis for a social structure's ability to attract and retain members. This model focuses on the system of opposing forces that link membership size as a component of resource availability and communication activity as an aspect of benefit provision to the sustainability of an online social structure. Analyses of data from a random sample of e-mail-based Internet social structures (listservs) indicate that communication activity and size have both positive and negative effects on a structure's sustainability. These results suggest that while the use of networked communication technologies may alter the form of communication, balancing the opposing impacts of membership size and communication activity in order to maintain resource availability and provide benefits for current members remains a fundamental problem underlying the development of sustainable online social structures.}, + file = {/home/nathante/Zotero/storage/4ENNLMAH/Butler - 2001 - Membership Size, Communication Activity, and Susta.pdf;/home/nathante/Zotero/storage/U7AUNAZT/Butler-2001-ISR-Membership_size_communication_activitiy_sustainability.pdf} +} + +@article{carroll_concentration_1985, + title = {Concentration and Specialization: {{Dynamics}} of Niche Width in Populations of Organizations}, + shorttitle = {Concentration and {{Specialization}}}, + author = {Carroll, Glenn R.}, + date = {1985-05-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {90}, + number = {6}, + pages = {1262--1283}, + issn = {0002-9602}, + abstract = {This paper departs from the common practice of focusing on large, generalist organizations and shows that new organizational insights are obtined by adopting a broader, ecological perspective. The newspaper publishing industry is examined as an illustration. The ecological focus shows that many small, specialized organizations operate successfully in this industry, despite apparently high levels of local concentration. A resource-partitioning model is advanced to explain the interorganizational relationships between generalist and specialist organizations. Statistical tests of the model using historical data on 2,808 American local newspaper organizations show the merit of using the ecological perspective for analyzing industries.}, + file = {/home/nathante/Zotero/storage/G38AK5SZ/Carroll - 1985 - Concentration and specialization Dynamics of nich.pdf;/home/nathante/Zotero/storage/8PG3QCP3/228210.html} +} + +@article{carroll_why_2000, + title = {Why the Microbrewery Movement? {{Organizational}} Dynamics of Resource Partitioning in the {{U}}.{{S}}. Brewing Industry}, + shorttitle = {Why the {{Microbrewery Movement}}?}, + author = {Carroll, Glenn R. and Swaminathan, Anand}, + date = {2000}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {106}, + number = {3}, + eprint = {10.1086/318962}, + eprinttype = {jstor}, + pages = {715--762}, + issn = {0002-9602}, + abstract = {The number of small specialty brewers in the U.S. beer brewing industry has increased dramatically in recent decades, even as the market for beer became increasingly dominated by mass‐production brewing companies. Using the resource‐partitioning model of organizational ecology, this article shows that these two apparently contradictory trends are fundamentally interrelated. Hypotheses developed here refine the way scale competition among generalist organizations is modeled and improve the theoretical development of the sociological bases for the appeal of specialist organizations' products, especially those related to organizational identity. Evidence drawn from qualitative and quantitative research provides strong support for the theory. The article offers a brief discussion of the theoretical and substantive issues involved in application of the model to other industries and to other cultures.}, + file = {/home/nathante/Zotero/storage/X2ITSCRL/Carroll and Swaminathan - 2000 - Why the microbrewery movement Organizational dyna.pdf} +} + +@article{chandrasekharan_you_2017, + ids = {chandrasekharan_you_2017-1}, + title = {You Can't Stay Here: {{The}} Efficacy of Reddit's 2015 Ban Examined through Hate Speech}, + shorttitle = {You Can't Stay Here}, + author = {Chandrasekharan, Eshwar and Pavalanathan, Umashanthi and Srinivasan, Anirudh and Glynn, Adam and Eisenstein, Jacob and Gilbert, Eric}, + date = {2017-12}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {31:1--31:22}, + issn = {2573-0142}, + abstract = {In 2015, Reddit closed several subreddits-foremost among them r/fatpeoplehate and r/CoonTown-due to violations of Reddit's anti-harassment policy. However, the effectiveness of banning as a moderation approach remains unclear: banning might diminish hateful behavior, or it may relocate such behavior to different parts of the site. We study the ban of r/fatpeoplehate and r/CoonTown in terms of its effect on both participating users and affected subreddits. Working from over 100M Reddit posts and comments, we generate hate speech lexicons to examine variations in hate speech usage via causal inference methods. We find that the ban worked for Reddit. More accounts than expected discontinued using the site; those that stayed drastically decreased their hate speech usage-by at least 80\%. Though many subreddits saw an influx of r/fatpeoplehate and r/CoonTown "migrants," those subreddits saw no significant changes in hate speech usage. In other words, other subreddits did not inherit the problem. We conclude by reflecting on the apparent success of the ban, discussing implications for online moderation, Reddit and internet communities more broadly.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/5Z8CCRM2/Chandrasekharan et al. - 2017 - You Can'T Stay Here The Efficacy of Reddit's 2015.pdf} +} + +@article{datta_identifying_2017, + title = {Identifying {{Misaligned Inter-Group Links}} and {{Communities}}}, + author = {Datta, Srayan and Phelan, Chanda and Adar, Eytan}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {37:1--37:23}, + abstract = {Many social media systems explicitly connect individuals (e.g., Facebook or Twitter); as a result, they are the targets of most research on social networks. However, many systems do not emphasize or support explicit linking between people (e.g., Wikipedia or Reddit), and even fewer explicitly link communities. Instead, network analysis is performed through inference on implicit connections, such as co-authorship or text similarity. Depending on how inference is done and what data drove it, different networks may emerge. While correlated structures often indicate stability, in this work we demonstrate that differences, or misalignment, between inferred networks also capture interesting behavioral patterns. For example, high-text but low-author similarity often reveals communities "at war" with each other over an issue or high-author but low-text similarity can suggest community fragmentation. Because we are able to model edge direction, we also find that asymmetry in degree (in-versus-out) co-occurs with marginalized identities (subreddits related to women, people of color, LGBTQ, etc.). In this work, we provide algorithms that can identify misaligned links, network structures and communities. We then apply these techniques to Reddit to demonstrate how these algorithms can be used to decipher inter-group dynamics in social media.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/52FT8LT8/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf;/home/nathante/Zotero/storage/WKCJHV6R/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf} +} + +@article{dimaggio_iron_1983, + title = {The {{Iron Cage Revisited}}: {{Institutional Isomorphism}} and {{Collective Rationality}} in {{Organizational Fields}}}, + shorttitle = {The {{Iron Cage Revisited}}}, + author = {DiMaggio, Paul J. and Powell, Walter W.}, + date = {1983}, + journaltitle = {American Sociological Review}, + volume = {48}, + number = {2}, + eprint = {2095101}, + eprinttype = {jstor}, + pages = {147--160}, + issn = {0003-1224}, + abstract = {[What makes organizations so similar? We contend that the engine of rationalization and bureaucratization has moved from the competitive marketplace to the state and the professions. Once a set of organizations emerges as a field, a paradox arises: rational actors make their organizations increasingly similar as they try to change them. We describe three isomorphic processes--coercive, mimetic, and normative--leading to this outcome. We then specify hypotheses about the impact of resource centralization and dependency, goal ambiguity and technical uncertainty, and professionalization and structuration on isomorphic change. Finally, we suggest implications for theories of organizations and social change.]}, + keywords = {Organization Behavior,Sociology}, + file = {/home/nathante/Zotero/storage/9A5PXKRT/DiMaggio and Powell - 1983 - The iron cage revisited Institutional isomorphism.pdf;/home/nathante/Zotero/storage/AQWAT6RA/2095101.html} +} + +@article{dimmick_theory_1984, + title = {The {{Theory}} of the {{Niche}}: {{Quantifying Competition Among Media Industries}}}, + shorttitle = {The {{Theory}} of the {{Niche}}}, + author = {Dimmick, John and Rothenbuhler, Eric}, + date = {1984-03-01}, + journaltitle = {Journal of Communication}, + volume = {34}, + number = {1}, + pages = {103--119}, + issn = {1460-2466}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GDM85NW7/Dimmick and Rothenbuhler - 1984 - The Theory of the Niche Quantifying Competition A.pdf;/home/nathante/Zotero/storage/3RUMQPRP/abstract.html} +} + +@article{dobrev_dynamics_2001, + title = {Dynamics of Niche Width and Resource Partitioning}, + author = {Dobrev, Stanislav~D. and Kim, Tai‐Young and Hannan, Michael~T.}, + date = {2001}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {106}, + number = {5}, + eprint = {10.1086/320821}, + eprinttype = {jstor}, + pages = {1299--1337}, + issn = {0002-9602}, + abstract = {This article examines the effects of crowding in a market center on rates of change in organizational niche width and on organizational mortality. It proposes that, although firms with wide niches benefit from risk spreading and economies of scale, they are simultaneously exposed to intense competition. An analysis of organizational dynamics in automobile manufacturing firms in France, Germany, and Great Britain shows that competitive pressure not only increases the hazard of disbanding but also prompts organizational transformations that give rise to processes of resource partitioning. Emphasizing the content/process distinction in conceptualizing organizational change, the article finds that the process effect of changes in niche width and position increases mortality hazards. We discuss our findings in light of the processes investigated by the ecological theories of density dependence, resource partitioning, and structural inertia, and point to the theoretical links that help to integrate these theories.}, + file = {/home/nathante/Zotero/storage/7HQIXSCS/Dobrev et al. - 2001 - Dynamics of niche width and resource partitioning.pdf} +} + +@article{dobrev_shifting_2003, + ids = {dobrev_shifting_2003-1}, + title = {Shifting {{Gears}}, {{Shifting Niches}}: {{Organizational Inertia}} and {{Change}} in the {{Evolution}} of the {{U}}.{{S}}. {{Automobile Industry}}, 1885-1981}, + shorttitle = {Shifting {{Gears}}, {{Shifting Niches}}}, + author = {Dobrev, Stanislav D. and Kim, Tai-Young and Carroll, Glenn R.}, + date = {2003}, + journaltitle = {Organization Science}, + volume = {14}, + number = {3}, + eprint = {4135136}, + eprinttype = {jstor}, + pages = {264--282}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {We examine how experiential learning affects organizational change and its consequences on firm mortality. We develop hypotheses about the interactions of experiences with a specific type of organizational change on the one hand, and environmental stability, organizational size, and organizational niche width on the other hand. Our findings draw from analysis of the U.S. automobile industry between 1885 and 1981 and support the general prediction that "process" effects of change in the organizational core elevate the hazard of failure. We also find that a dynamic interpretation of organizational environments as comprised of other organizations helps to explicate the interplay between organization and environmental forces that shape the occurrence and outcome of transformation.}, + file = {/home/nathante/Zotero/storage/TJUKWSQJ/Dobrev et al_2003_Shifting Gears, Shifting Niches.pdf} +} + +@unpublished{foote_agent-based_2018, + title = {An {{Agent-Based Model}} of {{Online Community Joining}}}, + author = {Foote, Jeremy}, + date = {2018-07}, + editora = {TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron}, + editoratype = {collaborator}, + eventtitle = {International {{Conference}} on {{Computational Social Science}} ({{IC2S2}})}, + venue = {{Evanston, IL}} +} + +@inproceedings{foote_behavior_2018, + title = {The Behavior and Network Position of Peer Production Founders}, + booktitle = {{{iConference}} 2018: {{Transforming Digital Worlds}}}, + author = {Foote, Jeremy and Contractor, Noshir}, + editor = {Chowdhury, Gobinda and McLeod, Julie and Gillet, Val and Willett, Peter}, + date = {2018}, + series = {Lecture {{Notes}} in {{Computer Science}}}, + pages = {99--106}, + publisher = {{Springer}}, + abstract = {Online peer production projects, such as Wikipedia and open-source software, have become important producers of cultural and technological goods. While much research has been done on the way that large existing projects work, little is known about how projects get started or who starts them. Nor is it clear how much influence founders have on the future trajectory of a community. We measure the behavior and social networks of 60,959 users on Wikia.com over a two month period. We compare the activity, local network positions, and global network positions of future founders and non-founders. We then explore the relationship between these measures and the relative growth of a founder’s wikis. We suggest hypotheses for future research based on this exploratory analysis.}, + isbn = {978-3-319-78105-1}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6I8T7IER/Foote and Contractor - 2018 - The Behavior and Network Position of Peer Producti.pdf;/home/nathante/Zotero/storage/QW9VAHSU/10.html} +} + +@thesis{foote_formation_2019, + type = {phdthesis}, + title = {The Formation and Growth of Collaborative Online Organizations}, + author = {Foote, Jeremy}, + date = {2019}, + institution = {{Northwestern University}}, + location = {{Evanston, IL}}, + abstract = {Explore millions of resources from scholarly journals, books, newspapers, videos and more, on the ProQuest Platform.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FATUNJ49/2.html} +} + +@dataset{foote_replication_2017, + title = {Replication Data for: {{Starting}} Online Communities: Motivations and Goals of Wiki Founders}, + shorttitle = {Replication {{Data}} For}, + author = {Foote, Jeremy and Gergle, Darren and Shaw, Aaron}, + date = {2017-05-12}, + journaltitle = {Harvard Dataverse}, + abstract = {Anonymized survey data from our CHI 2017 Note: Starting Online Communities: Motivations and Goals of Wiki Founders}, + langid = {english} +} + +@inproceedings{foote_starting_2017, + title = {Starting Online Communities: Motivations and Goals of Wiki Founders}, + shorttitle = {Starting {{Online Communities}}}, + booktitle = {Proceedings of the 2017 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '17)}, + author = {Foote, Jeremy and Gergle, Darren and Shaw, Aaron}, + date = {2017}, + pages = {6376--6380}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Why do people start new online communities? Previous research has studied what helps communities to grow and what motivates contributors, but the reasons that people create new communities in the first place remain unclear. We present the results of a survey of over 300 founders of new communities on the online wiki hosting site Wikia.com. We analyze the motivations and goals of wiki creators, finding that founders have diverse reasons for starting wikis and diverse ways of defining their success. Many founders see their communities as occupying narrow topics, and neither seek nor expect a large group of contributors. We also find that founders with differing goals approach community building differently. We argue that community platform designers can create interfaces that support the diverse goals of founders more effectively.}, + isbn = {978-1-4503-4655-9}, + keywords = {peer production,survey,wikis}, + file = {/home/nathante/Zotero/storage/BWAIBPUK/Foote et al. - 2017 - Starting Online Communities Motivations and Goals.pdf} +} + +@article{freeman_liability_1983, + title = {The {{Liability}} of {{Newness}}: {{Age Dependence}} in {{Organizational Death Rates}}}, + shorttitle = {The {{Liability}} of {{Newness}}}, + author = {Freeman, John and Carroll, Glenn R. and Hannan, Michael T.}, + date = {1983}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {48}, + number = {5}, + eprint = {2094928}, + eprinttype = {jstor}, + pages = {692--710}, + issn = {0003-1224}, + abstract = {Age dependence in organizational death rates is studied using data on three populations of organizations: national labor unions, semiconductor electronics manufacturers, and newspaper publishing companies. There is a liability of newness in each of these populations but it differs depending on whether death occurs through dissolution or by absorption through merger. Liabilities of smallness and bigness are also identified but controlling for them does not eliminate age dependence.}, + file = {/home/nathante/Zotero/storage/CT32HPF2/Freeman et al. - 1983 - The Liability of Newness Age Dependence in Organi.pdf} +} + +@article{fulk_connective_1996, + title = {Connective and Communal Public Goods in Interactive Communication Systems}, + author = {Fulk, Janet and Flanagin, Andrew J. and Kalman, Michael E. and Monge, Peter R. and Ryan, Timothy}, + date = {1996}, + journaltitle = {Communication Theory}, + volume = {6}, + number = {1}, + pages = {60--87}, + issn = {1468-2885}, + abstract = {This paper extends theories of public goods to interactive communication systems. Two key public communication goods are identified. Connectivity provides point-to-point communication, and communality links members through commonly held information, such as that often found in databases. These extensions are important, we argue, because communication public goods operate differently from traditional material public goods. These differences have important implications for costs, benefits, and the realization of a critical mass of users that is necessary for realization of the good. We also explore multifunctional goods that combine various features and hybrid goods that link private goods to public ones. We examine the applicability of two key assumptions of public goods theory to interactive communication systems. First, jointness of supply specifies that consumption of a public good does not diminish its availability to others. Second, impossibility of exclusion stipulates that all members of the public have access to the good. We conclude with suggestions for further theoretical development.}, + langid = {english}, + keywords = {mantaining public goods}, + file = {/home/nathante/Zotero/storage/ZJVU4TGW/Fulk et al. - 1996 - Connective and communal public goods in interactiv.pdf;/home/nathante/Zotero/storage/8J5CPWLV/4259000.html} +} + +@article{gan_gender_2018, + title = {Gender, Feedback, and Learners' Decisions to Share Their Creative Computing Projects}, + author = {Gan, Emilia F. and Hill, Benjamin Mako and Dasgupta, Sayamindu}, + date = {2018-11}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {2}, + pages = {54:1-54:23}, + abstract = {Although informal online learning communities are made possible by users' decisions to share their creations, participation by females and other marginalized groups remains stubbornly low in technical communities. Using descriptive statistics and a unique dataset of shared and unshared projects from over 1.1 million users of Scratch-a collaborative programming community for young people-we show that while girls share less initially, this trend flips among experienced users. Using Bayesian regression analyses, we show that this relationship can largely be attributed to differences in the way boys and girls participate. We also find that while prior positive feedback is correlated with increased sharing among inexperienced users, this effect also reverses with experience or with the addition of controls. Our findings provide a description of the dynamics behind online learners' decisions to share, open new research questions, and point to several lessons for system designers.}, + issue = {CSCW}, + langid = {english}, + keywords = {broadening participation,computer mediated communication,creative learning,gender differences,online communities,scratch,social computing and social navigation,social learning}, + file = {/home/nathante/Zotero/storage/II3Z28KL/Gan et al. - 2018 - Gender, feedback, and learners' decisions to share.pdf} +} + +@article{graeff_battle_2014, + title = {The Battle for ‘{{Trayvon Martin}}’: {{Mapping}} a Media Controversy Online and off-Line}, + shorttitle = {The Battle for ‘{{Trayvon Martin}}’}, + author = {Graeff, Erhardt and Stempeck, Matt and Zuckerman, Ethan}, + date = {2014-01}, + journaltitle = {First Monday}, + volume = {19}, + number = {2}, + issn = {13960466}, + langid = {english}, + keywords = {controversy mapping,media cloud,networked gatekeeping,political networks,quantitative media analysis}, + file = {/home/nathante/Zotero/storage/EXNM66WB/Graeff et al. - 2014 - The battle for ‘Trayvon Martin’ Mapping a media c.pdf;/home/nathante/Zotero/storage/BW5KPRPA/4947.html;/home/nathante/Zotero/storage/T7J9BSVG/3821.html} +} + +@thesis{graves_open_2013, + type = {Thesis}, + ids = {graves_open_2013-1}, + title = {Open Source Software Development as a Complex System}, + author = {Graves, John David Nicholas}, + date = {2013}, + institution = {{Auckland University of Technology}}, + abstract = {Open Source Software Development is an approach to software development involving open, public exposure of the source code of a computer program under development (hence, ‘open source’). Each open source program is shared online as a project in a source code repository. The so-called ‘open source community’ is the system which coordinates the work of software developers on the code in the repositories. This research explored the growth dynamics of this system, first by launching open source projects and then via simulation. Following (Barabasi \& Albert, 1999) and a biodiversity model (Hubbell, 2001), simulations of a complex system driven by preferential attachment, where popular projects attract more developers and grow (subject to some attrition), provided a systematic explanation for the lack of growth typical of single-developer projects. In this multi-methodological study, the lack of growth in the research projects empirically demonstrated the need for a theoretical understanding of open source project initiation and growth while the subsequent simulation results showed how the pattern of no growth (one developer) projects could be explained by a simple model.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PZVK297T/Graves - 2013 - Open source software development as a complex syst.pdf;/home/nathante/Zotero/storage/TDXFC3JV/5729.html} +} + +@article{halfaker_rise_2013, + title = {The Rise and Decline of an Open Collaboration System: How {{Wikipedia}}'s Reaction to Popularity Is Causing Its Decline}, + shorttitle = {The {{Rise}} and {{Decline}} of an {{Open Collaboration System}}}, + author = {Halfaker, Aaron and Geiger, R. Stuart and Morgan, Jonathan T. and Riedl, John}, + date = {2013-05-01}, + journaltitle = {American Behavioral Scientist}, + shortjournal = {American Behavioral Scientist}, + volume = {57}, + number = {5}, + pages = {664--688}, + issn = {0002-7642}, + abstract = {Open collaboration systems, such as Wikipedia, need to maintain a pool of volunteer contributors to remain relevant. Wikipedia was created through a tremendous number of contributions by millions of contributors. However, recent research has shown that the number of active contributors in Wikipedia has been declining steadily for years and suggests that a sharp decline in the retention of newcomers is the cause. This article presents data that show how several changes the Wikipedia community made to manage quality and consistency in the face of a massive growth in participation have ironically crippled the very growth they were designed to manage. Specifically, the restrictiveness of the encyclopedia’s primary quality control mechanism and the algorithmic tools used to reject contributions are implicated as key causes of decreased newcomer retention. Furthermore, the community’s formal mechanisms for norm articulation are shown to have calcified against changes—especially changes proposed by newer editors.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/7B7AFK58/Halfaker et al. - 2013 - The rise and decline of an open collaboration syst.pdf;/home/nathante/Zotero/storage/Y9676KNV/The Rise and Decline of an Open Collaboration Syst.pdf} +} + +@book{hannan_concepts_2019, + title = {Concepts and Categories: Foundations for Sociological and Cultural Analysis}, + shorttitle = {Concepts and Categories}, + author = {Hannan, Michael T}, + date = {2019}, + abstract = {Why do people like books, music, or movies that adhere consistently to genre conventions? Why is it hard for politicians to take positions that cross ideological boundaries? Why do we have dramatically different expectations of companies that are categorized as social media platforms as opposed to news media sites? The answers to these questions require an understanding of how people use basic concepts in their everyday lives to give meaning to objects, other people, and social situations and actions. In this book, a team of sociologists presents a groundbreaking model of concepts and categorization that can guide sociological and cultural analysis of a wide variety of social situations. Drawing on research in various fields, including cognitive science, computational linguistics, and psychology, the book develops an innovative view of concepts. It argues that concepts have meanings that are probabilistic rather than sharp, occupying fuzzy, overlapping positions in a "conceptual space." Measurements of distances in this space reveal our mental representations of categories. Using this model, important yet commonplace phenomena such as our routine buying decisions can be quantified in terms of the cognitive distance between concepts. Concepts and Categories provides an essential set of formal theoretical tools and illustrates their application using an eclectic set of methodologies, from micro-level controlled experiments to macro-level language processing. It illuminates how explicit attention to concepts and categories can give us a new understanding of everyday situations and interactions.}, + isbn = {978-0-231-19272-9}, + langid = {english}, + annotation = {OCLC: 1083703599} +} + +@book{hannan_logics_2007, + ids = {hannan_logics_2012}, + title = {Logics of Organization Theory: Audiences, Codes, and Ecologies}, + shorttitle = {Logics of Organization Theory}, + author = {Hannan, Michael T and Pólos, László and Carroll, Glenn}, + date = {2007}, + publisher = {{Princeton University Press}}, + location = {{Princeton, N.J.}}, + abstract = {"Building theories of organizations is challenging: theories are partial and "folk" categories are fuzzy. The commonly used tools--first-order logic and its foundational set theory--are ill-suited for handling these complications. Here, three leading authorities rethink organization theory. Logics of Organization Theory sets forth and applies a new language for theory building based on a nonmonotonic logic and fuzzy set theory. In doing so, not only does it mark a major advance in organizational theory, but it also draws lessons for theory building elsewhere in the social sciences. Organizational research typically analyzes organizations in categories such as "bank," "hospital," or "university." These categories have been treated as crisp analytical constructs designed by researchers. But sociologists increasingly view categories as constructed by audiences. This book builds on cognitive psychology and anthropology to develop an audience-based theory of organizational categories. It applies this framework and the new language of theory building to organizational ecology. It reconstructs and integrates four central theory fragments, and in so doing reveals unexpected connections and new insights."--Publisher description.}, + isbn = {978-1-4008-4301-5}, + langid = {english}, + annotation = {OCLC: 646517503} +} + +@book{hannan_organizational_1989, + title = {Organizational Ecology}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1989}, + edition = {1}, + publisher = {{Harvard University Press}}, + location = {{Cambridge, MA}} +} + +@article{hannan_population_1977, + title = {The Population Ecology of Organizations}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1977}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {82}, + number = {5}, + eprint = {2777807}, + eprinttype = {jstor}, + pages = {929--964}, + issn = {0002-9602}, + abstract = {A population ecology perspective on organization-environment relations is proposed as an alternative to the dominant adaptation perspective. The strength of inertial pressures on organizational structure suggests the application of models that depend on competition and selection in populations of organizations. Several such models as well as issues that arise in attempts to apply them to the organization-environment problem are discussed.}, + file = {/home/nathante/Zotero/storage/TVD48Q77/Hannan and Freeman - 1977 - The Population Ecology of Organizations.pdf} +} + +@article{hannan_structural_1984, + title = {Structural Inertia and Organizational Change}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1984-04}, + journaltitle = {American Sociological Review}, + volume = {49}, + number = {2}, + eprint = {2095567}, + eprinttype = {jstor}, + pages = {149}, + issn = {00031224}, + file = {/home/nathante/Zotero/storage/DRMDTJYH/Hannan and Freeman - 1984 - Structural inertia and organizational change.pdf} +} + +@article{haveman_follow_1993, + title = {Follow the {{Leader}}: {{Mimetic Isomorphism}} and {{Entry Into New Markets}}}, + shorttitle = {Follow the {{Leader}}}, + author = {Haveman, Heather A.}, + date = {1993}, + journaltitle = {Administrative Science Quarterly}, + volume = {38}, + number = {4}, + eprint = {2393338}, + eprinttype = {jstor}, + pages = {593--627}, + publisher = {{[Sage Publications, Inc., Johnson Graduate School of Management, Cornell University]}}, + issn = {0001-8392}, + abstract = {This paper combines organizational ecology and neoinstitutional theory to explain the process of diversification, specifically, how the structure of markets affects rates of market entry. I extend the density-dependence model of competition and legitimation, which has been used to study organizational founding and failure, to the process of organizational change through entry into new markets. I argue that the number of organizations operating in a particular market will have an inverted-U-shaped relationship with the rate of entry into that market. I also examine propositions, drawn from neoinstitutional theory, that organizations will follow similar and successful organizations into new markets. I assess the link between entry into new markets and (1) the number of organizations operating in those markets similar to a potential entrant and (2) the number of successful organizations in those markets. I also explore whether these two mimetic processes act in concert by examining whether successful potential entrants to a market are influenced by the presence of other successful organizations. I test these hypotheses on a population of savings and loan associations. I find that these firms imitate large and profitable organizations, but I find only limited evidence of imitation of similarly sized organizations, as large organizations copy the actions of other large organizations.}, + file = {/home/nathante/Zotero/storage/UDA8NLIN/Haveman_1993_Follow the Leader.pdf} +} + +@unpublished{healy_ecology_2003, + type = {Working Paper}, + title = {The Ecology of Open-Source Software Development}, + author = {Healy, Kieran and Schussman, Alan}, + date = {2003}, + abstract = {Open Source Software (OSS) is an innovative method of developing software applications that has been very successful over the past eight to ten years. A number of theories have emerged to explain its success, mainly from economics and law. We analyze a very large sample of OSS projects and find striking patterns in the overall structure of the development community. The distribution of projects on a range of activity measures is spectacularly skewed, with only a relatively tiny number of projects showing evidence of the strong collaborative activity which is supposed to characterize OSS. Our findings are consistent with prior, smaller-scale empirical research. We argue that these findings pose problems for the dominant accounts of OSS. We suggest that the gulf between active and inactive projects may be explained by social-structural features of the community which have received little attention in the existing literature. We suggest some hypotheses that might better predict the observed ecology of projects.}, + howpublished = {Working Paper}, + keywords = {Do Not Cite,FOSS}, + file = {/home/nathante/Zotero/storage/6VRGKZI6/Healy and Schussman - 2003 - The ecology of open-source software development.pdf} +} + +@incollection{hill_almost_2013, + title = {Almost {{Wikipedia}}: {{What}} Eight Early Online Collaborative Encyclopedia Projects Reveal about the Mechanisms of Collective Action.}, + booktitle = {Essays on Volunteer Mobilization in Peer Production}, + author = {Hill, Benjamin Mako}, + date = {2013}, + publisher = {{Massachusetts Institute of Technology}}, + location = {{Cambridge, Massachusetts}}, + annotation = {PhD Dissertation} +} + +@inproceedings{hill_consider_2014, + title = {Consider the Redirect: A Missing Dimension of {{Wikipedia}} Research}, + shorttitle = {Consider the {{Redirect}}}, + booktitle = {Proceedings of {{The International Symposium}} on {{Open Collaboration}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + date = {2014}, + series = {{{OpenSym}} '14}, + pages = {28:1--28:4}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Redirects are special pages in wikis that silently transport visitors to other pages. Although redirects make up a majority of all article pages in English Wikipedia, they have attracted very little attention and are rarely taken into account by researchers. This note describes redirects and illustrates why they play an important role in shaping activity in Wikipedia. We also present a novel longitudinal dataset of redirects for English Wikipedia and the software used to produce it. Using this dataset, we revisit several important published findings about Wikipedia to show that accounting for redirects can have important effects on research.}, + isbn = {978-1-4503-3016-9}, + file = {/home/nathante/Zotero/storage/QBK2TIWQ/Hill and Shaw - 2014 - Consider the Redirect A Missing Dimension of Wiki.pdf} +} + +@book{hill_debian_2005, + title = {Debian {{GNU}}/{{Linux}} 3.1 {{Bible}}}, + author = {Hill, Benjamin Mako}, + date = {2005}, + publisher = {{Wiley Pub}}, + location = {{Indianapolis, Ind}}, + editora = {Harris, David B}, + editoratype = {collaborator}, + keywords = {FOSS} +} + +@software{hill_mediawiki_2018, + title = {Mediawiki Dump Tools}, + author = {Hill, Benjamin Mako and TeBlunthuis, Nathan}, + date = {2018-09-03}, + version = {a4e60a9f} +} + +@book{hill_official_2008, + title = {Official {{Ubuntu}} Book}, + author = {Hill, Benjamin Mako and Burger, Corey and Jesse, Jonathan and Bacon, Jono}, + date = {2008}, + edition = {3}, + publisher = {{Prentice Hall}}, + isbn = {0-13-713668-4}, + keywords = {FOSS} +} + +@inproceedings{hill_page_2015, + title = {Page Protection: Another Missing Dimension of {{Wikipedia}} Research}, + shorttitle = {Page {{Protection}}}, + booktitle = {Proceedings of the 11th {{International Symposium}} on {{Open Collaboration}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + date = {2015}, + series = {{{OpenSym}} '15}, + pages = {15:1--15:4}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Page protection is a feature of wiki software that allows administrators to restrict contributions to particular pages. For example, pages are frequently protected so that they can only be edited by administrators. Page protection affects tens of thousands of pages in English Wikipedia and renders many of Wikipedia's most visible pages uneditable by the vast majority of visitors. That said, page protection has attracted very little attention and is rarely taken into account by researchers. This note describes page protection and illustrates why it plays an important role in shaping user behavior on wikis. We also present a new longitudinal dataset of page protection events for English Wikipedia, the software used to produce it, and results from tests that support both the validity of the dataset and the impact of page protection on patterns of editing.}, + isbn = {978-1-4503-3666-6}, + keywords = {page protection,wikipedia}, + file = {/home/nathante/Zotero/storage/VH9BNJVA/Hill and Shaw - 2015 - Page Protection Another Missing Dimension of Wiki.pdf} +} + +@incollection{hill_studying_2019, + title = {Studying Populations of Online Communities}, + booktitle = {The {{Oxford Handbook}} of {{Networked Communication}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + editor = {Foucault Welles, Brooke and González-Bailón, Sandra}, + date = {2019-09}, + pages = {173--193}, + publisher = {{Oxford University Press}}, + location = {{Oxford, UK}}, + abstract = {While the large majority of published research on online communities consists of analyses conducted entirely within individual communities, this chapter argues for a population-based approach, in which researchers study groups of similar communities. For example, although there have been thousands of papers published about Wikipedia, a population-based approach might compare all wikis on a particular topic. Using examples from published empirical studies, the chapter describes five key benefits of this approach. First, it argues that population-level research increases the generalizability of findings. Next, it describes four processes and dynamics that are only possible to study using populations: community-level variables, information diffusion processes across communities, ecological dynamics, and multilevel community processes. The chapter concludes with a discussion of a series of limitations and challenges.}, + isbn = {978-0-19-046051-8}, + langid = {english}, + file = {/home/nathante/Zotero/storage/39ZWGGYN/Hill and Shaw - 2019 - Studying Populations of Online Communities.pdf;/home/nathante/Zotero/storage/BTB3AQGV/oxfordhb-9780190460518-e-8.html} +} + +@incollection{hill_whither_2018, + title = {Whither Peer Production}, + booktitle = {Decentralizing the {{Commons}}}, + author = {Hill, Benjamin Mako}, + editor = {Hassan, Samer and De Felippi, Primavera}, + date = {2018}, + publisher = {{Institute for Network Culture}}, + location = {{Amsterdam, The Netherlands}}, + annotation = {Forthcoming} +} + +@article{hill_wikipedia_2013, + title = {The {{Wikipedia}} Gender Gap Revisited: Characterizing Survey Response Bias with Propensity Score Estimation}, + shorttitle = {The {{Wikipedia Gender Gap Revisited}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + date = {2013-06-26}, + journaltitle = {PLoS ONE}, + shortjournal = {PLoS ONE}, + volume = {8}, + number = {6}, + pages = {e65782}, + abstract = {Opt-in surveys are the most widespread method used to study participation in online communities, but produce biased results in the absence of adjustments for non-response. A 2008 survey conducted by the Wikimedia Foundation and United Nations University at Maastricht is the source of a frequently cited statistic that less than 13\% of Wikipedia contributors are female. However, the same study suggested that only 39.9\% of Wikipedia readers in the US were female – a finding contradicted by a representative survey of American adults by the Pew Research Center conducted less than two months later. Combining these two datasets through an application and extension of a propensity score estimation technique used to model survey non-response bias, we construct revised estimates, contingent on explicit assumptions, for several of the Wikimedia Foundation and United Nations University at Maastricht claims about Wikipedia editors. We estimate that the proportion of female US adult editors was 27.5\% higher than the original study reported (22.7\%, versus 17.8\%), and that the total proportion of female editors was 26.8\% higher (16.1\%, versus 12.7\%).}, + keywords = {Internet,Language,Online encyclopedias,Schools,Survey research,Surveys,United States,Universities}, + file = {/home/nathante/Zotero/storage/WWED7HE2/Hill and Shaw - 2013 - The Wikipedia Gender Gap Revisited Characterizing.pdf;/home/nathante/Zotero/storage/BGLYPWPW/article.html} +} + +@article{hofman_prediction_2017, + title = {Prediction and Explanation in Social Systems}, + author = {Hofman, Jake M. and Sharma, Amit and Watts, Duncan J.}, + date = {2017}, + journaltitle = {Science}, + volume = {355}, + number = {6324}, + eprint = {28154051}, + eprinttype = {pmid}, + pages = {486--488}, + issn = {0036-8075, 1095-9203}, + abstract = {Historically, social scientists have sought out explanations of human and social phenomena that provide interpretable causal mechanisms, while often ignoring their predictive accuracy. We argue that the increasingly computational nature of social science is beginning to reverse this traditional bias against prediction; however, it has also highlighted three important issues that require resolution. First, current practices for evaluating predictions must be better standardized. Second, theoretical limits to predictive accuracy in complex social systems must be better characterized, thereby setting expectations for what can be predicted or explained. Third, predictive accuracy and interpretability must be recognized as complements, not substitutes, when evaluating explanations. Resolving these three issues will lead to better, more replicable, and more useful social science.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/ISWU5DEQ/Hofman et al. - 2017 - Prediction and explanation in social systems.pdf;/home/nathante/Zotero/storage/TSUJV7Y3/486.html} +} + +@article{hwang_why_2021, + title = {Why Do {{People Participate}} in {{Small Online Communities}}?}, + author = {Hwang, Sohyeon and Foote, Jeremy D.}, + date = {2021-10-18}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {5}, + pages = {462:1--462:25}, + abstract = {Many benefits of online communities---such as obtaining new information, opportunities, and social connections---increase with size. Thus, a "successful'' online community often evokes an image of hundreds of thousands of users, and practitioners and researchers alike have sought to devise methods to achieve growth and thereby, success. On the other hand, small online communities exist in droves and many persist in their smallness over time. Turning to the highly popular discussion website Reddit, which is made up of hundreds of thousands of communities, we conducted a qualitative interview study examining how and why people participate in these persistently small communities, in order to understand why these communities exist when popular approaches would assume them to be failures. Drawing from twenty interviews, this paper makes several contributions: we describe how small communities provide unique informational and interactional spaces for participants, who are drawn by the hyperspecific aspects of the community; we find that small communities do not promote strong dyadic interpersonal relationships but rather promote group-based identity; and we highlight how participation in small communities is part of a broader, ongoing strategy to curate participants' online experience. We argue that online communities can be seen as nested niches: parts of an embedded, complex, symbiotic socio-informational ecosystem. We suggest ways that social computing research could benefit from more deliberate considerations of interdependence between diverse scales of online community sizes.}, + issue = {CSCW2}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks,motivations,online communities,participation}, + file = {/home/nathante/Zotero/storage/H4FXQNBH/Hwang and Foote - 2021 - Why do people participate in small online communit.pdf;/home/nathante/Zotero/storage/JLPLB63F/Hwang and Foote - 2021 - Why do People Participate in Small Online Communit.pdf;/home/nathante/Zotero/storage/UQYVIDWS/Hwang and Foote - 2021 - Why do people participate in small online communit.pdf} +} + +@article{jarvenpaa_communication_1998, + ids = {jarvenpaa_communication_1998-1}, + title = {Communication and Trust in Global Virtual Teams}, + author = {Jarvenpaa, Sirkka L. and Leidner, Dorothy E.}, + date = {1998-06-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + volume = {3}, + number = {4}, + pages = {0--0}, + issn = {1083-6101}, + abstract = {This paper explores the challenges of creating and maintaining trust in a global virtual team whose members transcend time, space, and culture. The challenges are highlighted by integrating recent literature on work teams, computer-mediated communication groups, cross-cultural communication, and interpersonal and organizational trust. To explore these challenges empirically, we report on a series of descriptive case studies on global virtual teams whose members were separated by location and culture, were challenged by a common collaborative project, and for whom the only economically and practically viable communication medium was asynchronous and synchronous computer-mediated communication. The results suggest that global virtual teams may experience a form of ‘swift’ trust but such trust appears to be very fragile and temporal. The study raises a number of issues to be explored and debated by future research. Pragmatically, the study describes communication behaviors that might facilitate trust in global virtual teams.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/CULRNXBT/abstract.html;/home/nathante/Zotero/storage/VMME55NA/4584374.html} +} + +@article{jiang_moderation_2019, + title = {Moderation Challenges in Voice-Based Online Communities on {{Discord}}}, + author = {Jiang, Jialun "Aaron" and Kiene, Charles and Middler, Skyler and Brubaker, Jed R. and Fiesler, Casey}, + date = {2019}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + series = {{{CSCW}} '19}, + volume = {3}, + pages = {23}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/KLXHSBLN/Jiang et al. - 2019 - Moderation Challenges in Voice-based Online Commun.pdf;/home/nathante/Zotero/storage/ZLSXRJ5J/Jiang et al. - 2019 - Moderation challenges in voice-based online commun.pdf} +} + +@inproceedings{keegan_analyzing_2016, + title = {Analyzing {{Organizational Routines}} in {{Online Knowledge Collaborations}}: {{A Case}} for {{Sequence Analysis}} in {{CSCW}}}, + shorttitle = {Analyzing {{Organizational Routines}} in {{Online Knowledge Collaborations}}}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer-Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Keegan, Brian and Lev, Shakked and Arazy, Ofer}, + date = {2016}, + series = {{{CSCW}} '16}, + pages = {1065--1079}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Research into socio-technical systems like Wikipedia has overlooked important structural patterns in the coordination of distributed work. This paper argues for a conceptual reorientation towards sequences as a fundamental unit of analysis for understanding work routines in online knowledge collaboration. We outline a research agenda for researchers in computer-supported cooperative work (CSCW) to understand the relationships, patterns, antecedents, and consequences of sequential behavior using methods already developed in fields like bio-informatics. Using a data set of 37,515 revisions from 16,616 unique editors to 96 Wikipedia articles as a case study, we analyze the prevalence and significance of different sequences of editing patterns. We illustrate the mixed method potential of sequence approaches by interpreting the frequent patterns as general classes of behavioral motifs. We conclude by discussing the methodological opportunities for using sequence analysis for expanding existing approaches to analyzing and theorizing about co-production routines in online knowledge collaboration.}, + isbn = {978-1-4503-3592-8}, + file = {/home/nathante/Zotero/storage/9AK33B8M/Keegan et al. - 2016 - Analyzing Organizational Routines in Online Knowle.pdf} +} + +@article{kiene_managing_2018, + title = {Managing Organizational Culture in Online Group Mergers}, + author = {Kiene, Charles and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {89:1-89-21}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/NV8YEK8W/Kiene et al. - 2018 - Managing organizational culture in online group me.pdf} +} + +@inproceedings{kiene_surviving_2016, + title = {Surviving an “{{Eternal September}}”: {{How}} an Online Community Managed a Surge of Newcomers}, + shorttitle = {Surviving an "{{Eternal September}}"}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Kiene, Charles and Monroy-Hernández, Andrés and Hill, Benjamin Mako}, + date = {2016}, + pages = {1152--1156}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {We present a qualitative analysis of interviews with participants in the NoSleep community within Reddit where millions of fans and writers of horror fiction congregate. We explore how the community handled a massive, sudden, and sustained increase in new members. Although existing theory and stories like Usenet's infamous "Eternal September" suggest that large influxes of newcomers can hurt online communities, our interviews suggest that NoSleep survived without major incident. We propose that three features of NoSleep allowed it to manage the rapid influx of newcomers gracefully: (1) an active and well-coordinated group of administrators, (2) a shared sense of community which facilitated community moderation, and (3) technological systems that mitigated norm violations. We also point to several important trade-offs and limitations.}, + isbn = {978-1-4503-3362-7}, + keywords = {newcomers,norms and governance,online communities,peer production,qualitative methods}, + file = {/home/nathante/Zotero/storage/2YPT6BUL/Kiene et al. - 2016 - Surviving an Eternal September How an Online Co.pdf;/home/nathante/Zotero/storage/S9JX8XE5/Kiene et al. - 2016 - Surviving an “Eternal September” How an online co.pdf} +} + +@article{kiene_technological_2019, + title = {Technological Frames and User Innovation: Exploring Technological Change in Community Moderation Teams}, + shorttitle = {Technological Frames and User Innovation}, + author = {Kiene, Charles and Jiang, Jialun "Aaron" and Hill, Benjamin Mako}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {44:1--44:23}, + abstract = {Management of technological change in organizations is one of the most enduring topics in the literature on computer-supported cooperative work. The successful navigation of technological change is both more challenging and more critical in online communities that are entirely mediated by technology than it is in traditional organizations. This paper presents an analysis of 14 in-depth interviews with moderators of subcommunities of one technological platform (Reddit) that added communities on a new technological platform (Discord). Moderation teams experienced several problems related to moderating content at scale as well as a disconnect between the affordances of Discord and their assumptions based on their experiences on Reddit. We found that moderation teams used Discord's API to create scripts and bots that augmented Discord to make the platform work more like tools on Reddit. These tools were particularly important in communities struggling with scale. Our findings suggest that increasingly widespread end user programming allow users of social computing systems to innovate and deploy solutions to unanticipated design problems by transforming new technological platforms to align with their past expectations.}, + issue = {CSCW}, + keywords = {API,bots,chat,computer-mediated communication,discord,moderation,online communities,reddit,social computing,technological change}, + file = {/home/nathante/Zotero/storage/E2PDCY58/Kiene et al. - 2019 - Technological frames and user innovation explorin.pdf;/home/nathante/Zotero/storage/U7M6IZY4/Kiene et al. - 2019 - Technological Frames and User Innovation Explorin.pdf} +} + +@book{kraut_building_2012, + title = {Building Successful Online Communities: {{Evidence-based}} Social Design}, + author = {Kraut, Robert E. and Resnick, Paul and Kiesler, Sara}, + date = {2012}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + abstract = {Uses insights from social science, psychology, and economics to offer advice on planning and managing an online community.}, + isbn = {978-0-262-29831-5}, + langid = {english}, + keywords = {design,foundations of social computing}, + file = {/home/nathante/Zotero/storage/B4XSKAVW/04-kraut10-Newcomers-current.pdf;/home/nathante/Zotero/storage/CX4KDC3G/01-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/IJCEWA6L/06-Resnick10-Startup-current.pdf;/home/nathante/Zotero/storage/JEWAVXHG/02-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/RIM4D9KS/05-kiesler10-Regulation-current.pdf;/home/nathante/Zotero/storage/S6Z28BBS/03-Ren10-Commitment-current.pdf} +} + +@book{kropotkin_mutual_2012, + title = {Mutual Aid: A Factor of Evolution}, + shorttitle = {Mutual {{Aid}}}, + author = {Kropotkin, Peter}, + date = {2012-05-02}, + origdate = {1902}, + publisher = {{Courier Corporation}}, + abstract = {In this cornerstone of modern liberal social theory, Peter Kropotkin states that the most effective human and animal communities are essentially cooperative, rather than competitive. Kropotkin based this classic on his observations of natural phenomena and history, forming a work of stunning and well-reasoned scholarship. Essential to the understanding of human evolution as well as social organization, it offers a powerful counterpoint to the tenets of Social Darwinism. It also cites persuasive evidence of human nature's innate compatibility with anarchist society."Kropotkin's basic argument is correct," noted evolutionary biologist Stephen Jay Gould. "Struggle does occur in many modes, and some lead to cooperation among members of a species as the best pathway to advantage for individuals." Anthropologist Ashley Montagu declared that "Mutual Aid will never be any more out of date than will the Declaration of Independence. New facts may increasingly become available, but we can already see that they will serve largely to support Kropotkin's conclusion that 'in the ethical progress of man, mutual support—not mutual struggle—has had the leading part.'" Physician and author Alex Comfort asserted that "Kropotkin profoundly influenced human biology by his theory of Mutual Aid. . . . He was one of the first systematic students of animal communities, and may be regarded as the founder of modern social ecology."}, + isbn = {978-0-486-12153-6}, + langid = {english}, + pagetotal = {338} +} + +@article{kubiszewski_production_2010, + title = {The Production and Allocation of Information as a Good That Is Enhanced with Increased Use}, + author = {Kubiszewski, Ida and Farley, Joshua and Costanza, Robert}, + date = {2010-04-01}, + journaltitle = {Ecological Economics}, + shortjournal = {Ecological Economics}, + series = {Special {{Section}} - {{Payments}} for {{Environmental Services}}: {{Reconciling Theory}} and {{Practice}}}, + volume = {69}, + number = {6}, + pages = {1344--1354}, + issn = {0921-8009}, + abstract = {Information has some unique characteristics. Unlike most other goods and services, it is neither rival (use by one prevents use by others) nor non-rival (use by one does not affect use by others), but is enhanced with increased use, or ‘additive’. Therefore a unique allocation system for both the production and consumption of information is needed. Under the current market-based allocation system, production of information is often limited through the exclusive rights produced by patents and copyrights. This limits scientists' ability to share and build on each other's knowledge. We break the problem down into three separate questions: (1) do markets generate the type of information most important for modern society? (2) are markets the most appropriate institution for producing that information? and (3) once information is produced, are markets the most effective way of maximizing the social value of that information? We conclude that systematic market failures make it unlikely that markets will generate the most important types of information, while the unique characteristics of information reduce the cost-effectiveness of markets in generating information and in maximizing its social value. We then discuss alternative methods that do not have these shortcomings, and that would lead to greater overall economic efficiency, social justice and ecological sustainability. These methods include monetary prizes, publicly funded research from which the produced information is released into the public domain, and status driven incentive structures like those in academia and the “open-source” community.}, + langid = {english}, + keywords = {Allocation,Anti-rival,Copyrights,Information,Intellectual property rights,Knowledge,Market failure,Patents}, + file = {/home/nathante/Zotero/storage/DX84YZM7/S092180091000039X.html} +} + +@book{kuhn_structure_1970, + ids = {kuhn_structure_2015}, + title = {The Structure of Scientific Revolutions}, + author = {Kuhn, Thomas S}, + date = {1970}, + publisher = {{University of Chicago Press}}, + langid = {english}, + annotation = {OCLC: 959412835} +} + +@inproceedings{lam_wp:clubhouse?:_2011, + title = {{{WP}}:{{Clubhouse}}?: {{An Exploration}} of {{Wikipedia}}'s {{Gender Imbalance}}}, + shorttitle = {{{WP}}}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Lam, Shyong (Tony) K. and Uduwage, Anuradha and Dong, Zhenhua and Sen, Shilad and Musicant, David R. and Terveen, Loren and Riedl, John}, + date = {2011}, + series = {{{WikiSym}} '11}, + pages = {1--10}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Wikipedia has rapidly become an invaluable destination for millions of information-seeking users. However, media reports suggest an important challenge: only a small fraction of Wikipedia's legion of volunteer editors are female. In the current work, we present a scientific exploration of the gender imbalance in the English Wikipedia's population of editors. We look at the nature of the imbalance itself, its effects on the quality of the encyclopedia, and several conflict-related factors that may be contributing to the gender gap. Our findings confirm the presence of a large gender gap among editors and a corresponding gender-oriented disparity in the content of Wikipedia's articles. Further, we find evidence hinting at a culture that may be resistant to female participation.}, + isbn = {978-1-4503-0909-7}, + file = {/home/nathante/Zotero/storage/EUWCPP57/Lam et al. - 2011 - WPClubhouse An Exploration of Wikipedia's Gende.pdf;/home/nathante/Zotero/storage/KR457VCD/p1-lam.pdf} +} + +@inproceedings{lampe_motivations_2010, + title = {Motivations to Participate in Online Communities}, + booktitle = {Proceedings of the 28th International Conference on {{Human}} Factors in Computing Systems}, + author = {Lampe, Cliff and Wash, Rick and Velasquez, Alcides and Ozkaya, Elif}, + date = {2010}, + pages = {1927--1936}, + publisher = {{ACM}}, + location = {{Atlanta, Georgia, USA}}, + abstract = {A consistent theoretical and practical challenge in the design of socio-technical systems is that of motivating users to participate in and contribute to them. This study examines the case of Everything2.com users from the theoretical perspectives of Uses and Gratifications and Organizational Commitment to compare individual versus organizational motivations in user participation. We find evidence that users may continue to participate in a site for different reasons than those that led them to the site. Feelings of belonging to a site are important for both anonymous and registered users across different types of uses. Long-term users felt more dissatisfied with the site than anonymous users. Social and cognitive factors seem to be more important than issues of usability in predicting contribution to the site.}, + isbn = {978-1-60558-929-9}, + file = {/home/nathante/Zotero/storage/7NIQDKFR/Lampe et al. - 2010 - Motivations to participate in online communities.pdf} +} + +@article{lazer_network_2007, + title = {The {{Network Structure}} of {{Exploration}} and {{Exploitation}}}, + author = {Lazer, David and Friedman, Allan}, + date = {2007-12-01}, + journaltitle = {Administrative Science Quarterly}, + shortjournal = {Administrative Science Quarterly}, + volume = {52}, + number = {4}, + pages = {667--694}, + issn = {0001-8392}, + abstract = {Whether as team members brainstorming or cultures experimenting with new technologies, problem solvers communicate and share ideas. This paper examines how the structure of communication networks among actors can affect system-level performance. We present an agent-based computer simulation model of information sharing in which the less successful emulate the more successful. Results suggest that when agents are dealing with a complex problem, the more efficient the network at disseminating information, the better the short-run but the lower the long-run performance of the system. The dynamic underlying this result is that an inefficient network maintains diversity in the system and is thus better for exploration than an efficient network, supporting a more thorough search for solutions in the long run. For intermediate time frames, there is an inverted-U relationship between connectedness and performance, in which both poorly and well-connected systems perform badly, and moderately connected systems perform best. This curvilinear relationship between connectivity and group performance can be seen in several diverse instances of organizational and social behavior.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/CQCKNER7/Lazer and Friedman - 2007 - The Network Structure of Exploration and Exploitat.pdf} +} + +@inproceedings{luo_causal_2014, + title = {Causal {{Inference}} in {{Social Media Using Convergent Cross Mapping}}}, + booktitle = {2014 {{IEEE Joint Intelligence}} and {{Security Informatics Conference}}}, + author = {Luo, C. and Zheng, X. and Zeng, D.}, + date = {2014-09}, + pages = {260--263}, + abstract = {Revealing underlying causal structure in social media is critical to understanding how users interact, on which a lot of security intelligence applications can be built. Existing causal inference methods for social media usually rely on limited explicit causal context, pre-assume certain user interaction model, or neglect the nonlinear nature of social interaction, which could lead to bias estimations of causality. Inspired from recent advance in causality detection in complex ecosystems, we propose to take advantage of a novel nonlinear state space reconstruction based approach, namely Convergent Cross Mapping, to perform causal inference in social media. Experimental results on real world social media datasets show the effectiveness of the proposed method in causal inference and user behavior prediction in social media.}, + eventtitle = {2014 {{IEEE Joint Intelligence}} and {{Security Informatics Conference}}}, + file = {/home/nathante/Zotero/storage/PQJPPNVK/Luo et al. - 2014 - Causal Inference in Social Media Using Convergent .pdf;/home/nathante/Zotero/storage/YEGDGLZH/6975587.html} +} + +@book{margetts_political_2015, + title = {Political Turbulence: {{How}} Social Media Shape Collective Action}, + shorttitle = {Political {{Turbulence}}}, + author = {Margetts, Helen and John, Peter and Hale, Scott and Yasseri, Taha}, + date = {2015-11-24}, + publisher = {{Princeton University Press}}, + location = {{Princeton, NJ}}, + abstract = {As people spend increasing proportions of their daily lives using social media, such as Twitter and Facebook, they are being invited to support myriad political causes by sharing, liking, endorsing, or downloading. Chain reactions caused by these tiny acts of participation form a growing part of collective action today, from neighborhood campaigns to global political movements. Political Turbulence reveals that, in fact, most attempts at collective action online do not succeed, but some give rise to huge mobilizations--even revolutions. Drawing on large-scale data generated from the Internet and real-world events, this book shows how mobilizations that succeed are unpredictable, unstable, and often unsustainable. To better understand this unruly new force in the political world, the authors use experiments that test how social media influence citizens deciding whether or not to participate. They show how different personality types react to social influences and identify which types of people are willing to participate at an early stage in a mobilization when there are few supporters or signals of viability. The authors argue that pluralism is the model of democracy that is emerging in the social media age--not the ordered, organized vision of early pluralists, but a chaotic, turbulent form of politics. This book demonstrates how data science and experimentation with social data can provide a methodological toolkit for understanding, shaping, and perhaps even predicting the outcomes of this democratic turbulence.}, + isbn = {978-0-691-15922-5}, + langid = {english}, + pagetotal = {304}, + file = {/home/nathante/Zotero/storage/EF6XBIQ7/Margetts et al. - 2015 - Political Turbulence How Social Media Shape Colle.pdf;/home/nathante/Zotero/storage/JEHM4KWG/Political Turbulence_ How Social Media Sha - Helen Margetts.azw3} +} + +@article{margolin_normative_2012, + title = {Normative {{Influences}} on {{Network Structure}} in the {{Evolution}} of the {{Children}}’s {{Rights NGO Network}}, 1977-2004:}, + shorttitle = {Normative {{Influences}} on {{Network Structure}} in the {{Evolution}} of the {{Children}}’s {{Rights NGO Network}}, 1977-2004}, + author = {Margolin, Drew B. and Shen, Cuihua and Lee, Seungyoon and Weber, Matthew S. and Fulk, Janet and Monge, Peter}, + date = {2012-10-23}, + journaltitle = {Communication Research}, + abstract = {This study examines the impact of legitimacy on the dynamics of interorganizational networks within the nongovernmental organizations’ children’s rights communi...}, + langid = {english}, + keywords = {codification,community ecology,evolution,network evolution,NGOs,norms,SIENA}, + file = {/home/nathante/Zotero/storage/295X7HRD/Margolin et al_2012_Normative Influences on Network Structure in the Evolution of the Children’s.pdf;/home/nathante/Zotero/storage/T494X64A/0093650212463731.html} +} + +@book{matei_structural_2017, + title = {Structural Differentiation in Social Media: Adhocracy, Entropy, and the "1 \% Effect"}, + shorttitle = {Structural Differentiation in Social Media}, + author = {Matei, Sorin A and Britt, Brian C}, + date = {2017}, + series = {Lecture {{Notes}} in {{Social Networks}}}, + publisher = {{Springer}}, + abstract = {This book explores community dynamics within social media. Using Wikipedia as an example, the volume explores communities that rely upon commons-based peer production. Fundamental theoretical principles spanning such domains as organizational configurations, leadership roles, and social evolutionary theory are developed. In the context of Wikipedia, these theories explain how a functional elite of highly productive editors has emerged and why they are responsible for a majority of the content. It explains how the elite shapes the project and how this group tends to become stable and increasingly influential over time. Wikipedia has developed a new and resilient social hierarchy, an adhocracy, which combines features of traditional and new, online, social organizations. The book presents a set of practical approaches for using these theories in real-world practice. This work fundamentally changes the way we think about social media leadership and evolution, emphasizing the crucial contributions of leadership, of elite social roles, and of group global structure to the overall success and stability of large social media projects. Written in an accessible and direct style, the book will be of interest to academics as well as professionals with an interest in social media and commons-based peer production processes.}, + isbn = {978-3-319-64425-7}, + langid = {english} +} + +@inproceedings{mcmahon_substantial_2017, + title = {The Substantial Interdependence of {{Wikipedia}} and {{Google}}: {{A}} Case Study on the Relationship between Peer Production Communities and Information Technologies}, + shorttitle = {The {{Substantial Interdependence}} of {{Wikipedia}} and {{Google}}}, + booktitle = {International {{AAAI Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} 2017)}, + author = {McMahon, Connor and Johnson, Isaac L. and Hecht, Brent J.}, + date = {2017}, + pages = {142--151}, + publisher = {{AAAI}}, + location = {{Palo Alto, California}}, + file = {/home/nathante/Zotero/storage/6TX35RFQ/McMahon et al. - 2017 - The substantial interdependence of Wikipedia and G.pdf} +} + +@article{mcpherson_ecology_1983, + title = {An Ecology of Affiliation}, + author = {McPherson, J. Miller}, + date = {1983}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {48}, + number = {4}, + eprint = {2117719}, + eprinttype = {jstor}, + pages = {519--532}, + issn = {0003-1224}, + abstract = {This paper develops an ecological model of the competition of social organizations for members. The concept of the ecological niche is quantified explicitly in a way which ties together geography, time, and the social composition of organizations. A differential equation model analogous to the Lotka-Volterra competition equations in biology captures the dynamics of the system. This dynamic model is related to the niche concept in a novel way, which produces an easily understood and powerful picture of the static and dynamic structure of the community. This new perspective provides a theoretical link between the aggregate macrostructural theory of Blau (1977a,b) and the microstructural dynamics of organizational demography (Pfeffer, 1983). The model is tested with data on organizations from a midwestern city.}, + file = {/home/nathante/Zotero/storage/WIDCF8XB/McPherson - 1983 - An ecology of affiliation.pdf} +} + +@article{mcpherson_evolution_1991, + title = {Evolution on a {{Dancing Landscape}}: {{Organizations}} and {{Networks}} in {{Dynamic Blau Space}}}, + shorttitle = {Evolution on a {{Dancing Landscape}}}, + author = {McPherson, J. Miller and Ranger-Moore, James R.}, + date = {1991-09}, + journaltitle = {Social Forces}, + shortjournal = {Social Forces}, + volume = {70}, + number = {1}, + pages = {19--43}, + issn = {00377732}, + abstract = {This article develops and tests an evolutionary model of the growth, decline, and demographic dynamics of voluntary organizations. The model demonstrates a strong analogy between the adaptive landscape of Sewall Wright (1931) and the exploitation surfaces generated by a model of member selection and retention for voluntary associations. The article connects the processes of membership recruitment and loss to the social networks connecting individuals. The model generates dynamic hypotheses about the time path of organizations in sociodemographic dimensions. A key idea in this model is that membership selection processes at the individual level produce adaptation in communities of organizations. The article concludes with an empirical example and some discussion of the implications of the model for a variety of research literatures.}, + file = {/home/nathante/Zotero/storage/HVQWNZE6/McPherson and Ranger-Moore - 1991 - Evolution on a Dancing Landscape Organizations an.pdf} +} + +@article{mcpherson_testing_1996, + title = {Testing a {{Dynamic Model}} of {{Social Composition}}: {{Diversity}} and {{Change}} in {{Voluntary Groups}}}, + shorttitle = {Testing a {{Dynamic Model}} of {{Social Composition}}}, + author = {McPherson, J. Miller and Rotolo, Thomas}, + date = {1996}, + journaltitle = {American Sociological Review}, + volume = {61}, + number = {2}, + eprint = {2096330}, + eprinttype = {jstor}, + pages = {179--202}, + issn = {0003-1224}, + abstract = {[We test a dynamic model of the social composition of voluntary groups. The model is based on the idea that sociodemographic variables define social niches in which voluntary groups grow and decline, share and compete, and change or remain static. The flow of individuals through such groups depends on the competition of other groups for their time and other resources. We build a dynamic model of this process and show how this model can account for changes in the social composition and the social heterogeneity of voluntary groups. We use life history data on the group affiliations of 1,050 individuals from 1974 to 1989 to test hypotheses about the diversity of education among group members and about the mean level of education of the members. Our data strongly support the hypotheses.]}, + file = {/home/nathante/Zotero/storage/KCQZTDG3/McPherson and Rotolo - 1996 - Testing a Dynamic Model of Social Composition Div.pdf} +} + +@inproceedings{menking_people_2019, + title = {People Who Can Take It: How Women {{Wikipedians}} Negotiate and Navigate Safety}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Menking, Amanda and Erickson, Ingrid and Pratt, Wanda}, + date = {2019-05}, + series = {{{CHI}} '19}, + pages = {472:1--472:14}, + publisher = {{Association for Computing Machinery}}, + location = {{Glasgow, Scotland, UK}}, + abstract = {Wikipedia is one of the most successful online communities in history, yet it struggles to attract and retain women editors-a phenomenon known as the gender gap. We investigate this gap by focusing on the voices of experienced women Wikipedians. In this interview-based study (N=25), we identify a core theme among these voices: safety. We reveal how our participants perceive safety within their community, how they manage their safety both conceptually and physically, and how they act on this understanding to create safe spaces on and off Wikipedia. Our analysis shows Wikipedia functions as both a multidimensional and porous space encompassing a spectrum of safety. Navigating this space requires these women to employ sophisticated tactics related to identity management, boundary management, and emotion work. We conclude with a set of provocations to spur the design of future online environments that encourage equity, inclusivity, and safety for historically marginalized users.}, + isbn = {978-1-4503-5970-2}, + langid = {english}, + keywords = {gender gap,online communities,participation,safe spaces,safety,wikipedia}, + file = {/home/nathante/Zotero/storage/AIBWULEC/Menking et al_2019_People Who Can Take It.pdf;/home/nathante/Zotero/storage/QEPWUCE5/Menking et al. - 2019 - How women Wikipedians negotiate and navigate safety.pdf} +} + +@incollection{merton_sociological_1968, + title = {On {{Sociological Theories}} of the {{Middle Range}}}, + booktitle = {Social {{Theory}} and {{Social Structure}}}, + author = {Merton, Robert}, + date = {1968}, + publisher = {{The Free Press}}, + location = {{New York, NY}}, + file = {/home/nathante/Zotero/storage/LR9B4LLM/02.29_merton_middle_range.pdf} +} + +@article{minkoff_interorganizational_1995, + title = {Interorganizational Influences on the Founding of African American Organizations, 1955–1985}, + author = {Minkoff, Debra C.}, + date = {1995-03-01}, + journaltitle = {Sociological Forum}, + shortjournal = {Sociol Forum}, + volume = {10}, + number = {1}, + pages = {51--79}, + issn = {1573-7861}, + abstract = {This paper examines the relationship between traditions of social action and patterns of organizational development, using data on the formation of national African American protest, advocacy, and service organizations between 1955 and 1985. Following research in organizational ecology, Poisson regression is used to examine the association between organizational density and organizational formation across strategic forms. The results provide some support for the idea that interorganizational influences are important in shaping the contours of the African American social movement industry. Outside funding, internal organizational capacities and protest levels also play a significant role.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/BFLEJ2X4/Minkoff_1995_Interorganizational influences on the founding of african american.pdf} +} + +@dataset{narayan_replication_2017, + title = {Replication Data for: {{The Wikipedia Adventure}}: Field Evaluation of an Interactive Tutorial for New Users}, + shorttitle = {Replication {{Data}} For}, + author = {Narayan, Sneha and Orlowitz, Jake and Morgan, Jonathan T. and Shaw, Aaron D. and Hill, Benjamin Mako}, + date = {2017-06-07}, + journaltitle = {Harvard Dataverse}, + abstract = {This dataset contains the data and code necessary to replicate work in the following paper: Narayan, Sneha, Jake Orlowitz, Jonathan Morgan, Benjamin Mako Hill, and Aaron Shaw. 2017. “The Wikipedia Adventure: Field Evaluation of an Interactive Tutorial for New Users.” in Proceedings of the 20th ACM Conference on Computer-Supported Cooperative Work \& Social Computing (CSCW '17). New York, New York: ACM Press. http://dx.doi.org/10.1145/2998181.2998307 The published paper contains two studies. Study 1 is a descriptive analysis of a survey of Wikipedia editors who played a gamified tutorial. Study 2 is a field experiment that evaluated the same the tutorial. These data are the data used in the field experiment described in Study 2. Description of Files This dataset contains the following files beyond this README: twa.RData — An RData file that includes all variables used in Study 2. twa\_analysis.R — A GNU R script that includes all the code used to generate the tables and plots related to Study 2 in the paper. The RData file contains one variable (d) which is an R dataframe (i.e., table) that includes the following columns: userid (integer): The unique numerical ID representing each user on in our sample. These are 8-digit integers and describe public accounts on Wikipedia. sample.date (date string): The day the user was recruited to the study. Dates are formatted in “YYYY-MM-DD” format. In the case of invitees, it is the date their invitation was sent. For users in the control group, these is the date that they would have been invited to the study. edits.all (integer): The total number of edits made by the user on Wikipedia in the 180 days after they joined the study. Edits to user's user pages, user talk pages and subpages are ignored. edits.ns0 (integer): The total number of edits made by user to article pages on Wikipedia in the 180 days after they joined the study. edits.talk (integer): The total number of edits made by user to talk pages on Wikipedia in the 180 days after they joined the study. Edits to a user's user page, user talk page and subpages are ignored. treat (logical): TRUE if the user was invited, FALSE if the user was in control group. play (logical): TRUE if the user played the game. FALSE if the user did not. All users in control are listed as FALSE because any user who had not been invited to the game but played was removed. twa.level (integer): Takes a value 0 of if the user has not played the game. Ranges from 1 to 7 for those who did, indicating the highest level they reached in the game. quality.score (float). This is the average word persistence (over a 6 revision window) over all edits made by this userid. Our measure of word persistence (persistent word revision per word) is a measure of edit quality developed by Halfaker et al. that tracks how long words in an edit persist after subsequent revisions are made to the wiki-page. For more information on how word persistence is calculated, see the following paper: Halfaker, Aaron, Aniket Kittur, Robert Kraut, and John Riedl. 2009. “A Jury of Your Peers: Quality, Experience and Ownership in Wikipedia.” In Proceedings of the 5th International Symposium on Wikis and Open Collaboration (OpenSym '09), 1–10. New York, New York: ACM Press. doi:10.1145/1641309.1641332. Or this page: https://meta.wikimedia.org/wiki/Research:Content\_persistence How we created twa.RData The files twa.RData combines datasets drawn from three places: A dataset created by Wikimedia Foundation staff that tracked the details of the experiment and how far people got in the game. The variables userid, sample.date, treat, play, and twa.level were all generated in a dataset created by WMF staff when The Wikipedia Adventure was deployed. All users in the sample created their accounts within 2 days before the date they were entered into the study. None of them had received a Teahouse invitation, a Level 4 user warning, or been blocked from editing at the time that they entered the study. Additionally, all users made at least one edit after the day they were invited. Users were sorted randomly into treatment and control groups, based on which they either received or did not receive an invite to play The Wikipedia Adventure. Edit and text persistence data drawn from public XML dumps created on May 21st, 2015. We used publicly available XML dumps to generate the outcome variables, namely edits.all, edits.ns0, edits.talk and quality.score. We first extracted all edits made by users in our sample during the six month period since they joined the study, excluding edits made to user pages or user talk pages using. We parsed the XML dumps using the Python based wikiq and MediaWikiUtilities software online at: http://projects.mako.cc/source/?p=mediawiki\_dump\_tools https://github.com/mediawiki-utilities/python-mediawiki-utilities We obtained the XML dumps from: https://dumps.wikimedia.org/enwiki/ A list of edits made by users in our study that were subsequently deleted, created on August 3rd, 2015. The WMF staff created a dataset that listed all the edits made by users in our study that were deleted before August 3rd, 2015. We made the decision to include these edits in our counts, so as to measure the total level of participation undertaken by each editor. If a user in our study made article or talk page edits that were subsequently deleted, we would use the deleted edit logs to identify them, and increment the variables edits.all, edits.ns0, and edits.talk as appropriate. We decided that all edits drawn from the deleted edit logs would be defined to have an edit persistence score of 0, since they were deleted from Wikipedia. We “manually” merged these datasets together. Contact Us For more details about the dataset, please see our paper. If you notice any bugs or issues with these data or code, please contact Sneha Narayan (snehanarayan@u.northwestern.edu) or the other authors of this paper.}, + langid = {english} +} + +@inproceedings{narayan_wikipedia_2017, + title = {The {{Wikipedia Adventure}}: Field Evaluation of an Interactive Tutorial for New Users}, + shorttitle = {The {{Wikipedia Adventure}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Narayan, Sneha and Orlowitz, Jake and Morgan, Jonathan and Hill, Benjamin Mako and Shaw, Aaron}, + date = {2017}, + series = {{{CSCW}} '17}, + pages = {1785--1799}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Integrating new users into a community with complex norms presents a challenge for peer production projects like Wikipedia. We present The Wikipedia Adventure (TWA): an interactive tutorial that offers a structured and gamified introduction to Wikipedia. In addition to describing the design of the system, we present two empirical evaluations. First, we report on a survey of users, who responded very positively to the tutorial. Second, we report results from a large-scale invitation-based field experiment that tests whether using TWA increased newcomers' subsequent contributions to Wikipedia. We find no effect of either using the tutorial or of being invited to do so over a period of 180 days. We conclude that TWA produces a positive socialization experience for those who choose to use it, but that it does not alter patterns of newcomer activity. We reflect on the implications of these mixed results for the evaluation of similar social computing systems.}, + isbn = {978-1-4503-4335-0}, + file = {/home/nathante/Zotero/storage/3ZFPBYSH/p1785-narayan.pdf} +} + +@inreference{noauthor_digg_2021, + title = {Digg}, + booktitle = {Wikipedia}, + date = {2021-08-26T10:31:32Z}, + abstract = {Digg is an American news aggregator with a curated front page, aiming to select stories specifically for the Internet audience such as science, trending political issues, and viral Internet issues. It was launched in its current form on July 31, 2012, with support for sharing content to other social platforms such as Twitter and Facebook. It formerly had been a popular social news website, allowing people to vote web content up or down, called digging and burying, respectively. In 2012, Quantcast estimated Digg's monthly U.S. unique visits at 3.8 million. Digg's popularity prompted the creation of similar sites such as Reddit.In July 2008, the former company took part in advanced acquisition talks with Google for a reported \$200 million price tag, but the deal ultimately fell through. After a controversial 2010 redesign and the departure of co-founders Jay Adelson and Kevin Rose, in July 2012 Digg was sold in three parts: the Digg brand, website, and technology were sold to Betaworks for an estimated \$500,000; 15 staff were transferred to The Washington Post's "SocialCode" for a reported \$12 million; and a suite of patents was sold to LinkedIn for about \$4 million.Digg was purchased by BuySellAds, an advertising company, for an undisclosed amount in April 2018.}, + langid = {english}, + annotation = {Page Version ID: 1040737272}, + file = {/home/nathante/Zotero/storage/CBTI7R5J/index.html} +} + +@article{olzak_ecology_2001, + title = {The Ecology of Tactical Overlap}, + author = {Olzak, Susan and Uhrig, S. C. Noah}, + date = {2001-10}, + journaltitle = {American Sociological Review}, + volume = {66}, + number = {5}, + eprint = {3088954}, + eprinttype = {jstor}, + pages = {694}, + issn = {00031224}, + keywords = {uses overlap for density}, + file = {/home/nathante/Zotero/storage/23WSU752/3088954.pdf} +} + +@article{park_human_1936, + title = {Human {{Ecology}}}, + author = {Park, Robert Ezra}, + date = {1936-07-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {42}, + number = {1}, + pages = {1--15}, + issn = {0002-9602}, + abstract = {Human ecology is an attempt to apply to the interrelations of human beings a type of analysis previously applied to the interrelations of plants and animals. The term "symbiosis" describes a type of social relationship that is biotic rather than cultural. This biotic social order comes into existence and is maintained by competition. In plant and animal societies competition is unrestricted by an institutional or moral order. Human society is a consequence and effect of this limitation of the symbiotic social order by the cultural. Different social sciences are concerned with the forms which this limitation of the natural or ecological social order assumes on (1) the economic, (2) the political, and (3) the moral level.}, + file = {/home/nathante/Zotero/storage/CBVGR8RU/Park - 1936 - Human Ecology.pdf;/home/nathante/Zotero/storage/UKMY6VUE/217327.html} +} + +@book{peters_speaking_1999, + title = {Speaking into the Air: A History of the Idea of Communication}, + shorttitle = {Speaking into the Air}, + author = {Peters, John Durham}, + date = {1999}, + publisher = {{The University of Chicago press}}, + location = {{Chicago; London}}, + isbn = {978-0-226-66277-0}, + langid = {english} +} + +@article{pikovsky_reconstruction_2016, + title = {Reconstruction of a Neural Network from a Time Series of Firing Rates}, + author = {Pikovsky, A.}, + date = {2016-06-20}, + journaltitle = {Physical Review E}, + shortjournal = {Phys. Rev. E}, + volume = {93}, + number = {6}, + pages = {062313}, + abstract = {Randomly coupled neural fields demonstrate irregular variation of firing rates, if the coupling is strong enough, as has been shown by [Phys. Rev. Lett. 61, 259 (1988)]. We present a method for reconstruction of the coupling matrix from a time series of irregular firing rates. The approach is based on the particular property of the nonlinearity in the coupling, as the latter is determined by a sigmoidal gain function. We demonstrate that for a large enough data set and a small measurement noise, the method gives an accurate estimation of the coupling matrix and of other parameters of the system, including the gain function.}, + file = {/home/nathante/Zotero/storage/HJJ6V4F9/Pikovsky - 2016 - Reconstruction of a neural network from a time ser.pdf;/home/nathante/Zotero/storage/QFCBD7F5/PhysRevE.93.html} +} + +@article{pontikes_ecology_2014, + title = {An {{Ecology}} of {{Social Categories}}}, + author = {Pontikes, Elizabeth and Hannan, Michael}, + date = {2014}, + journaltitle = {Sociological Science}, + volume = {1}, + pages = {311--343}, + issn = {23306696}, + abstract = {This article proposes that meaningful social classification emerges from an ecological dynamic that operates in two planes: feature space and label space. It takes a dynamic view of classification, allowing objects’ movements in both spaces to change the meaning of social categories. The first part of the theory argues that agents assign labels to objects based on perceptions of their similarities to existing members of a category. The second part of the theory shows that an object’s perceived similarity to members of other categories reduces its typicality in a focal category. This means that for categories with a high degree of overlap with other categories in label space (lenient categories), the link between feature-based similarities and labeling weakens. The findings suggest that social classification will likely evolve to contain both constraining and lenient categories. The theory implies that this process is self-reinforcing, so that constraining categories become more constraining, whereas lenient categories become more lenient.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/XPRTHWKT/Pontikes and Hannan - 2014 - An Ecology of Social Categories.pdf} +} + +@article{popielarz_edge_1995, + title = {On the {{Edge}} or {{In Between}}: {{Niche Position}}, {{Niche Overlap}}, and the {{Duration}} of {{Voluntary Association Memberships}}}, + shorttitle = {On the {{Edge}} or {{In Between}}}, + author = {Popielarz, Pamela A. and McPherson, J. Miller}, + date = {1995-11-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {101}, + number = {3}, + pages = {698--720}, + issn = {0002-9602}, + abstract = {This paper aims to explain a major barrier to societal integration: the remarkable homogeneity of voluntary associations. The explanation derives from an ecological theory of voluntary affiliation that asserts that organizations compete for members in a property space defined by the sociodemographic characteristics of members. Voluntary organizations lose fastest those members who are either atypical of the group (the niche edge hypothesis) or subject to competition from other groups (the niche overlap hypotheis). The authors analyze an event-history data set, generated by the life-history calendar approach, of 2,813 voluntary association membership pells. The results, which strongly support both the niche edge and niche overlap hypotheses, substantiate the competitive ecological model of group structure.}, + file = {/home/nathante/Zotero/storage/6FLG9VFY/Popielarz and McPherson - 1995 - On the Edge or In Between Niche Position, Niche O.pdf;/home/nathante/Zotero/storage/B82LWTGA/230757.html} +} + +@book{rankin_official_2009, + title = {The Official {{Ubuntu}} Server Book}, + author = {Rankin, Kyle and Hill, Benjamin Mako}, + date = {2009}, + publisher = {{Prentice Hall}}, + location = {{Upper Saddle River, NJ}}, + isbn = {978-0-13-702118-5}, + langid = {english}, + annotation = {OCLC: 1001929364} +} + +@incollection{resnick_starting_2012, + title = {Starting New Online Communities}, + booktitle = {Building Successful Online Communities: {{Evidence-based}} Social Design}, + author = {Resnick, Paul and Konstan, Joseph and Chen, Yan and Kraut, Robert E}, + date = {2012}, + pages = {231--280}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-29831-5}, + file = {/home/nathante/Zotero/storage/GFUVQWNN/06-Resnick10-Startup-current.pdf} +} + +@inproceedings{roberts_structural_2013, + title = {The {{Structural Topic Model}} and {{Applied Social Science}}}, + booktitle = {2013 {{Workshop}} on {{Topic Models}}: {{Computation}}, {{Application}}, and {{Evaluation}}.}, + author = {Roberts, Margaret E and Tingley, Dustin and Stewart, Brandon M and Airoldi, Edoardo M}, + date = {2013}, + pages = {4}, + abstract = {We develop the Structural Topic Model which provides a general way to incorporate corpus structure or document metadata into the standard topic model. Document-level covariates enter the model through a simple generalized linear model framework in the prior distributions controlling either topical prevalence or topical content. We demonstrate the model’s use in two applied problems: the analysis of open-ended responses in a survey experiment about immigration policy, and understanding differing media coverage of China’s rise.}, + eventtitle = {Advances in {{Neural Information Processing Systems}}}, + langid = {english}, + file = {/home/nathante/Zotero/storage/3RKHWAPT/Roberts et al. - The Structural Topic Model and Applied Social Scie.pdf} +} + +@article{ruef_emergence_2000, + title = {The {{Emergence}} of {{Organizational Forms}}: {{A Community Ecology Approach}}}, + shorttitle = {The {{Emergence}} of {{Organizational Forms}}}, + author = {Ruef, Martin}, + date = {2000-11-01}, + journaltitle = {American Journal of Sociology}, + volume = {106}, + number = {3}, + pages = {658--714}, + publisher = {{The University of Chicago Press}}, + issn = {0002-9602}, + abstract = {This article introduces a new ecological approach to the study of form emergence based on the notion of an organizational community—a bounded set of forms with related identities. Applying the approach to 48 organizational forms in the health care sector, this study suggests that the development of novel forms is affected by the positioning of their identities with respect to existing form identities in the community, by the aggregate density and size of organizations matching those existing identities, and by the amount of attention directed at identity attributes by sector participants. Findings show that the process of form emergence is subject to population‐dependent effects akin to those noted previously for organizational entries within established populations. The aggregate density and size of organizations with similar identities increase the probability of form emergence to a point (cross‐form legitimation), but highly saturated regions of the identity space tend to be uninviting to new forms (cross‐form competition).}, + file = {/home/nathante/Zotero/storage/X6KXYEI5/Ruef - 2000 - The Emergence of Organizational Forms A Community.pdf;/home/nathante/Zotero/storage/NHGAJDIR/318963.html} +} + +@book{schelling_micromotives_1978, + title = {Micromotives and Macrobehavior}, + author = {Schelling, Thomas C.}, + date = {1978}, + publisher = {{WW Norton \& Company}}, + file = {/home/nathante/Zotero/storage/EQX3VVB9/Schelling - Micromotives and Macrobehavior.pdf} +} + +@article{schoener_resource_1974, + title = {Resource {{Partitioning}} in {{Ecological Communities}}}, + author = {Schoener, Thomas W.}, + date = {1974}, + journaltitle = {Science}, + volume = {185}, + number = {4145}, + eprint = {1738612}, + eprinttype = {jstor}, + pages = {27--39}, + issn = {0036-8075}, + file = {/home/nathante/Zotero/storage/R86IDGJN/1738612.pdf;/home/nathante/Zotero/storage/U4UCJ2BT/Schoener - 1974 - Resource Partitioning in Ecological Communities.pdf} +} + +@book{schweik_internet_2012, + title = {Internet Success: {{A}} Study of Open-Source Software Commons}, + shorttitle = {Internet Success}, + author = {Schweik, Charles M. and English, Robert C.}, + date = {2012}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-01725-1}, + pagetotal = {351} +} + +@article{shah_motivation_2006, + title = {Motivation, Governance, and the Viability of Hybrid Forms in Open Source Software Development}, + author = {Shah, Sonali K.}, + date = {2006-07-01}, + journaltitle = {Management Science}, + volume = {52}, + number = {7}, + pages = {1000--1014}, + abstract = {Open source software projects rely on the voluntary efforts of thousands of software developers, yet we know little about why developers choose to participate in this collective development process. This paper inductively derives a framework for understanding participation from the perspective of the individual software developer based on data from two software communities with different governance structures. In both communities, a need for software-related improvements drives initial participation. The majority of participants leave the community once their needs are met, however, a small subset remains involved. For this set of developers, motives evolve over time and participation becomes a hobby. These hobbyists are critical to the long-term viability of the software code: They take on tasks that might otherwise go undone and work to maintain the simplicity and modularity of the code. Governance structures affect this evolution of motives. Implications for firms interested in implementing hybrid strategies designed to combine the advantages of open source software development with proprietary ownership and control are discussed.}, + keywords = {FOSS,Management,To Read}, + file = {/home/nathante/Zotero/storage/9FVVZ6B3/mnsc.1060.pdf;/home/nathante/Zotero/storage/T3DTX9AQ/Shah - 2006 - Motivation, Governance, and the Viability of Hybri.pdf;/home/nathante/Zotero/storage/WAISB3HF/1000.html} +} + +@article{shaw_laboratories_2014, + title = {Laboratories of Oligarchy? {{How}} the Iron Law Extends to Peer Production}, + shorttitle = {Laboratories of {{Oligarchy}}?}, + author = {Shaw, Aaron and Hill, Benjamin Mako}, + date = {2014}, + journaltitle = {Journal of Communication}, + shortjournal = {J Commun}, + volume = {64}, + number = {2}, + pages = {215--238}, + issn = {1460-2466}, + abstract = {Peer production projects like Wikipedia have inspired voluntary associations, collectives, social movements, and scholars to embrace open online collaboration as a model of democratic organization. However, many peer production projects exhibit entrenched leadership and deep inequalities, suggesting that they may not fulfill democratic ideals. Instead, peer production projects may conform to Robert Michels' “iron law of oligarchy,” which proposes that democratic membership organizations become increasingly oligarchic as they grow. Using exhaustive data of internal processes from a sample of 683 wikis, we construct empirical measures of participation and test for increases in oligarchy associated with growth in wikis' contributor bases. In contrast to previous studies, we find support for Michels' iron law and conclude that peer production entails oligarchic organizational forms.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GIII687R/Shaw and Hill - 2014 - Laboratories of oligarchy How the iron law extend.pdf;/home/nathante/Zotero/storage/W3846GC6/full.html} +} + +@article{shi_wisdom_2019, + title = {The Wisdom of Polarized Crowds}, + author = {Shi, Feng and Teplitskiy, Misha and Duede, Eamon and Evans, James A.}, + date = {2019-04}, + journaltitle = {Nature Human Behaviour}, + volume = {3}, + number = {4}, + pages = {329}, + issn = {2397-3374}, + abstract = {This article explores the effect of ideological polarization on team performance. By analysing millions of edits to Wikipedia, the authors reveal that politically diverse editor teams produce higher-quality articles than homogeneous or moderate teams, and they identify the mechanisms responsible for producing these superior articles.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/5AJIP7BF/Shi et al_2019_The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/E7S9VG4I/Shi et al. - 2019 - The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/YVYHDNGP/Shi et al_2019_The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/BPKFC376/s41562-019-0541-6.html;/home/nathante/Zotero/storage/PTAPHWSK/s41562-019-0541-6.html;/home/nathante/Zotero/storage/RLZLXT6Y/s41562-019-0541-6.html} +} + +@book{shirky_here_2008, + title = {Here Comes Everybody : {{The}} Power of Organizing without Organizations}, + author = {Shirky, Clay.}, + date = {2008}, + publisher = {{Penguin Press}}, + location = {{New York, NY}}, + abstract = {An examination of how the rapid spread of new forms of social interaction enabled by technology is changing the way humans form groups and exist within them, with profound long-term economic and social effects--for good and for ill. Our age's new technologies of social networking are evolving, and evolving us, into new groups doing new things in new ways, and old and new groups alike doing the old things better and more easily. Hierarchical structures that exist to manage the work of groups are seeing their raisons d'e\^tre swiftly eroded by the rising tide. Business models are being destroyed, transformed, born at dizzying speeds, and the larger social impact is profound. Clay Shirky is one of our wisest observers of the transformational power of the new forms of tech-enabled social interaction, and this is his reckoning with the ramifications of all this on what we do and who we are.--From publisher description. Discusses and uses examples of how digital networks transform the ability of humans to gather and cooperate with one another.}, + isbn = {978-1-59420-153-0}, + langid = {english}, + keywords = {FOSS,Media Studies}, + file = {/home/nathante/Zotero/storage/DHBTQ79D/shirky-2008.pdf} +} + +@article{siggelkow_temporarily_2003, + title = {Temporarily {{Divide}} to {{Conquer}}: {{Centralized}}, {{Decentralized}}, and {{Reintegrated Organizational Approaches}} to {{Exploration}} and {{Adaptation}}}, + shorttitle = {Temporarily {{Divide}} to {{Conquer}}}, + author = {Siggelkow, Nicolaj and Levinthal, Daniel A.}, + date = {2003-12-01}, + journaltitle = {Organization Science}, + volume = {14}, + number = {6}, + pages = {650--669}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {To create a competitive advantage, firms need to find activity configurations that are not only internally consistent, but also appropriate given the firm's current environment. This challenge is particularly acute after firms have experienced an environmental change that has shifted the existing competitive landscape and created new, high-performing sets of activity choices. How should firms organize to explore and search such an altered performance landscape? While it has been noted that adaptive entities need to maintain a balance of exploration and exploitation, little is known about how different organizational structures moderate this balance. With the help of an agentbased simulation model, we study the value of three different organizational structures: a centralized organization, in which decisions are made only at the level of the firm as a whole; a decentralized organization, in which decisions are made independently in two divisions; and a temporarily decentralized firm, which starts out with a decentralized structure and later reintegrates. We find that if interactions among a firm's activities are pervasive, neither the centralized nor the permanently decentralized organizational structure leads to high performance. In this case, temporary decentralization—an organizational structure that has not found much attention in the literature—yields the highest long-term performance. This organizational structure allows the firm both to avoid low-performing activity configurations and to eventually coordinate across its divisions. Thus, even if the decision problem a .rm faces is not fully decomposable, a temporary bifurcation can lead to a higher long-term performance outcome. Initial decentralized exploration is, however, costly in the short run, as compared to centralized exploration. As a result, a tradeoff exists between the short-term costs of decentralized exploration and the longterm benefits of reaching higher performance. As interactions across and within divisions increase, the optimal length of decentralized exploration tends to grow. Paralleling our first result, we further show that even if a decision problem is decomposable, that is, can be perfectly modularized, it can be beneficial to create a temporary decision allocation that creates "unnecessary" interdependencies across the subsystems. This benefit arises in particular when the modules are complex by themselves. In both cases, an initial phase of exploration, enabled by an appropriate organizational structure, followed by refinement and coordination, enabled by a different structure, leads to high performance. To illustrate our general model, we focus on incumbent firms' responses to the Internet and discuss implications for the product design process.}, + keywords = {Activity Systems,Agent-Based Simulations,E-Commerce,Organizational Adaptation,Organizational Design}, + file = {/home/nathante/Zotero/storage/BFSDUBNA/Siggelkow_Levinthal_2003_Temporarily Divide to Conquer.pdf} +} + +@book{singer_applied_2003, + title = {Applied Longitudinal Data Analysis: {{Modeling}} Change and Event Occurrence}, + shorttitle = {Applied {{Longitudinal Data Analysis}}}, + author = {Singer, Judith D. and Willett, John B.}, + date = {2003}, + publisher = {{Oxford University Press}}, + location = {{New York, NY}}, + isbn = {0-19-515296-4} +} + +@article{soule_competition_2008, + title = {Competition and Resource Partitioning in Three Social Movement Industries}, + author = {Soule, Sarah A. and King, Brayden G.}, + date = {2008-05}, + journaltitle = {The American Journal of Sociology}, + volume = {113}, + number = {6}, + eprint = {25145846}, + eprinttype = {jstor}, + pages = {1568--1610}, + issn = {00029602}, + abstract = {Drawing hypotheses from resource mobilization and resource partitioning theories (RMT and RPT), this article examines how inter-organizational competition and social movement industry (SMI) concentration affect the level of tactical and goal specialization of protest organizations associated with the peace, women's, and environmental movements. Additionally, the article examines how specialization affects the survival of these organizations. By and large, the findings are commensurate with the expectations of RMT and RPT. Results indicate that interorganizational competition leads to more specialized tactical and goal repertoires. Concentration in the SMI also leads to specialization, but this is only true for less established organizations. Results also indicate that tactical and goal specialization decrease organizational survival, unless the industry is highly concentrated.}, + file = {/home/nathante/Zotero/storage/TG4RWD3T/Soule and King - 2008 - Competition and Resource Partitioning in Three Soc.pdf} +} + +@article{sugihara_detecting_2012, + title = {Detecting {{Causality}} in {{Complex Ecosystems}}}, + author = {Sugihara, George and May, Robert and Ye, Hao and Hsieh, Chih-hao and Deyle, Ethan and Fogarty, Michael and Munch, Stephan}, + date = {2012-09-20}, + journaltitle = {Science}, + eprint = {22997134}, + eprinttype = {pmid}, + pages = {1227079}, + issn = {0036-8075, 1095-9203}, + abstract = {Identifying causal networks is important for effective policy and management recommendations on climate, epidemiology, financial regulation, and much else. Here, we introduce a method, based on nonlinear state space reconstruction, that can distinguish causality from correlation. It extends to nonseparable weakly connected dynamic systems (cases not covered by the current Granger causality paradigm). The approach is illustrated both by simple models (where, in contrast to the real world, we know the underlying equations/relations and so can check the validity of our method) and by application to real ecological systems, including the controversial sardine-anchovy-temperature problem.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/88Z9BXNQ/Sugihara et al. - 2012 - Detecting Causality in Complex Ecosystems.pdf;/home/nathante/Zotero/storage/IXJEHNSL/tab-pdf.html} +} + +@inproceedings{suh_singularity_2009, + title = {The Singularity Is Not near: Slowing Growth of {{Wikipedia}}}, + shorttitle = {The {{Singularity}} Is {{Not Near}}}, + booktitle = {Proceedings of the 5th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Suh, Bongwon and Convertino, Gregorio and Chi, Ed H. and Pirolli, Peter}, + date = {2009}, + series = {{{WikiSym}} '09}, + pages = {1--10}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Prior research on Wikipedia has characterized the growth in content and editors as being fundamentally exponential in nature, extrapolating current trends into the future. We show that recent editing activity suggests that Wikipedia growth has slowed, and perhaps plateaued, indicating that it may have come against its limits to growth. We measure growth, population shifts, and patterns of editor and administrator activities, contrasting these against past results where possible. Both the rate of page growth and editor growth has declined. As growth has declined, there are indicators of increased coordination and overhead costs, exclusion of newcomers, and resistance to new edits. We discuss some possible explanations for these new developments in Wikipedia including decreased opportunities for sharing existing knowledge and increased bureaucratic stress on the socio-technical system itself.}, + isbn = {978-1-60558-730-1}, + file = {/home/nathante/Zotero/storage/WTEMKAUC/Suh et al. - 2009 - The singularity is not near slowing growth of Wik.pdf} +} + +@inproceedings{tan_all_2015, + title = {All Who Wander: {{On}} the Prevalence and Characteristics of Multi-Community Engagement}, + shorttitle = {All Who Wander}, + booktitle = {Proceedings of the 24th {{International Conference}} on {{World Wide Web}}}, + author = {Tan, Chenhao and Lee, Lillian}, + date = {2015}, + series = {{{WWW}} '15}, + pages = {1056--1066}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + location = {{Republic and Canton of Geneva, Switzerland}}, + abstract = {Although analyzing user behavior within individual communities is an active and rich research domain, people usually interact with multiple communities both on- and off-line. How do users act in such multi-community environments? Although there are a host of intriguing aspects to this question, it has received much less attention in the research community in comparison to the intra-community case. In this paper, we examine three aspects of multi-community engagement: the sequence of communities that users post to, the language that users employ in those communities, and the feedback that users receive, using longitudinal posting behavior on Reddit as our main data source, and DBLP for auxiliary experiments. We also demonstrate the effectiveness of features drawn from these aspects in predicting users' future level of activity. One might expect that a user's trajectory mimics the "settling-down" process in real life: an initial exploration of sub-communities before settling down into a few niches. However, we find that the users in our data continually post in new communities; moreover, as time goes on, they post increasingly evenly among a more diverse set of smaller communities. Interestingly, it seems that users that eventually leave the community are "destined" to do so from the very beginning, in the sense of showing significantly different "wandering" patterns very early on in their trajectories; this finding has potentially important design implications for community maintainers. Our multi-community perspective also allows us to investigate the "situation vs. personality" debate from language usage across different communities.}, + isbn = {978-1-4503-3469-3}, + keywords = {DBLP,language,lifecycle,multiple communities,reddit}, + file = {/home/nathante/Zotero/storage/8GL2XQG3/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf;/home/nathante/Zotero/storage/J3RVCH26/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf} +} + +@inproceedings{tan_tracing_2018, + title = {Tracing Community Genealogy: How New Communities Emerge from the Old}, + shorttitle = {Tracing {{Community Genealogy}}}, + booktitle = {Proceedings of the {{Twelfth International Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} '18)}, + author = {Tan, Chenhao}, + date = {2018}, + pages = {395--404}, + publisher = {{AAAI}}, + location = {{Palo Alto, California}}, + abstract = {The process by which new communities emerge is a central research issue in the social sciences. While a growing body of research analyzes the formation of a single community by examining social networks between individuals, we introduce a novel community-centered perspective. We highlight the fact that the context in which a new community emerges contains numerous existing communities. We reveal the emerging process of communities by tracing their early members’ previous community memberships.}, + file = {/home/nathante/Zotero/storage/QEAEMFYR/Tan - 2018 - Tracing Community Genealogy How New Communities E.pdf} +} + +@inproceedings{teblunthuis_density_2017, + title = {Density Dependence without Resource Partitioning: Population Ecology on {{Change}}.Org}, + shorttitle = {Density {{Dependence Without Resource Partitioning}}}, + booktitle = {Companion of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2017}, + series = {{{CSCW}} '17 {{Companion}}}, + pages = {323--326}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {E-petitioning is a prominent form of Internet-based collective action. We apply theories from organizational population ecology to investigate whether similar petitions compete for signatures. We use latent Dirichlet allocation (LDA) topic modeling to identify topical niches. Using these niches, we test two theories from population ecology on 442,109 Change.org petitions. First, we find evidence for density dependence, an inverse-U-shaped relationship between the density of a petition's niche and the number of signatures the petition obtains. This suggests e-petitioning is competitive and that e-petitions draw on overlapping resource pools. Second, although resource partitioning theory predicts that topically specialized petitions will obtain more signatures in concentrated populations, we find no evidence of this. This suggests that specialists struggle to avoid competition with generalists.}, + isbn = {978-1-4503-4688-7}, + file = {/home/nathante/Zotero/storage/54585RCP/TeBlunthuis et al. - 2017 - Density dependence without resource partitioning .pdf} +} + +@thesis{teblunthuis_density_2017-1, + type = {Master of Arts Thesis}, + ids = {teblunthuis_density_2017-2,teblunthuis_density_2018}, + title = {Density Dependence without Resource Partitioning on an Online Petitioning Platform}, + author = {TeBlunthuis, Nathan}, + date = {2017}, + institution = {{University of Washington}}, + location = {{Seattle, Washington}}, + abstract = {Online petitions are a collective action tactic that leverages digital affordances in pursuit of discursive opportunities. Prior efforts to explain why some petitions are more successful than others emphasize signer motivations, petition framing, social media, or resources from movement organizations. We advance a key insight of organizational ecology: population-level variables like density and concentration also constrain success. We use latent Dirichlet allocation (LDA) topic models to measure overlap density and frame specialization. We then model how ecological dynamics affect petition signature counts. We observe density dependence: a curvilinear relationship between overlap density and success. We anticipated resource partitioning: specialists enjoy competitive advantages under concentration, but we find no evidence for it. We discuss boundary conditions for ecological dynamics commonly found in organizational fields induced by the distinctive scope of e-tactic platforms. Platforms may produce concentration without advantages for specialists by lowering entry costs for generalists and specialists alike.}, + langid = {american}, + file = {/home/nathante/Zotero/storage/XFELN2Z6/TeBlunthuis - 2018 - Density dependence without resource partitioning o.pdf} +} + +@inproceedings{teblunthuis_revisiting_2018, + title = {Revisiting "{{The}} Rise and Decline" in a Population of Peer Production Projects}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + pages = {355:1--355:7}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Do patterns of growth and stabilization found in large peer production systems such as Wikipedia occur in other communities? This study assesses the generalizability of Halfaker et al.'s influential 2013 paper on "The Rise and Decline of an Open Collaboration System." We replicate its tests of several theories related to newcomer retention and norm entrenchment using a dataset of hundreds of active peer production wikis from Wikia. We reproduce the subset of the findings from Halfaker and colleagues that we are able to test, comparing both the estimated signs and magnitudes of our models. Our results support the external validity of Halfaker et al.'s claims that quality control systems may limit the growth of peer production communities by deterring new contributors and that norms tend to become entrenched over time.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/7YEVSVQM/TeBlunthuis et al. - 2018 - Revisiting The Rise and Decline in a Population .pdf} +} + +@inproceedings{thornton_tagging_2012, + title = {Tagging Wikipedia: Ollaboratively Creating a Category System}, + shorttitle = {Tagging {{Wikipedia}}}, + booktitle = {Proceedings of the 17th {{ACM International Conference}} on {{Supporting Group Work}}}, + author = {Thornton, Katherine and McDonald, David W.}, + date = {2012}, + series = {{{GROUP}} '12}, + pages = {219--228}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Category systems have traditionally been created by small committees of people who had authority over the system they were designing. With the rise of large-scale social media systems, category schemes are being created by groups with differing perspectives, values, and expectations for how categories will be used. Prior studies of social tagging and folksonomy focused on the application and evolution of the collective category scheme, but struggled to uncover some of the collective rationale undergirding the decision-making processes in those schemes. In this paper, we qualitatively analyze the early discussions among editors of Wikipedia about the design and creation of its category system. We highlight three themes that dominated the discussion: hierarchy, scope and navigation, and relate these themes to their more formal roots in the information science literature. We distill out four styles of collaboration with regard to category systems that apply broadly to social tagging and other folksonomies. We conclude the paper with implications for collaborative tools and category systems as applied to large-scale collaborative systems.}, + isbn = {978-1-4503-1486-2}, + keywords = {categorization,information organization,wikipedia}, + file = {/home/nathante/Zotero/storage/CCDWH5LG/Thornton and McDonald - 2012 - Tagging Wikipedia Collaboratively Creating a Cate.pdf;/home/nathante/Zotero/storage/JCMW5EKV/Thornton and McDonald - 2012 - Tagging Wikipedia Collaboratively Creating a Cate.pdf} +} + +@article{tripodi_ms_2021, + title = {Ms. {{Categorized}}: {{Gender}}, Notability, and Inequality on {{Wikipedia}}}, + shorttitle = {Ms. {{Categorized}}}, + author = {Tripodi, Francesca}, + date = {2021-06-27}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {14614448211023772}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Gender is one of the most pervasive and insidious forms of inequality. For example, English-language Wikipedia contains more than 1.5 million biographies about notable writers, inventors, and academics, but less than 19\% of these biographies are about women. To try and improve these statistics, activists host “edit-a-thons” to increase the visibility of notable women. While this strategy helps create several biographies previously inexistent, it fails to address a more inconspicuous form of gender exclusion. Drawing on ethnographic observations, interviews, and quantitative analysis of web-scraped metadata, this article demonstrates that biographies about women who meet Wikipedia’s criteria for inclusion are more frequently considered non-notable and nominated for deletion compared to men’s biographies. This disproportionate rate is another dimension of gender inequality previously unexplored by social scientists and provides broader insights into how women’s achievements are (under)valued.}, + langid = {english}, + keywords = {Articles for Deletion,gender gap,gender inequality,metadata,Wikipedia}, + file = {/home/nathante/Zotero/storage/IBR95ZNY/Tripodi_2021_Ms.pdf;/home/nathante/Zotero/storage/UMW2VMF9/Tripodi - 2021 - Ms. Categorized Gender, notability, and inequalit.pdf} +} + +@article{van_de_ven_explaining_1995, + title = {Explaining {{Development}} and {{Change}} in {{Organizations}}}, + author = {Van de Ven, Andrew H. and Poole, Marshall Scott}, + date = {1995-07-01}, + journaltitle = {Academy of Management Review}, + shortjournal = {ACAD MANAGE REV}, + volume = {20}, + number = {3}, + pages = {510--540}, + issn = {0363-7425, 1930-3807}, + abstract = {This article introduces four basic theories that may serve as building blocks for explaining processes of change in organizations: life cycle, teleology, dialectics, and evolution. These four theories represent different sequences of change events that are driven by different conceptual motors and operate at different organizational levels. This article identifies the circumstances when each theory applies and proposes how interplay among the theories produces a wide variety of more complex theories of change and development in organizational life.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/APD9T5KZ/258786.pdf;/home/nathante/Zotero/storage/FBX2F2XQ/510.html} +} + +@book{verhoef_community_2010, + title = {Community Ecology: Processes, Models, and Applications}, + shorttitle = {Community Ecology}, + author = {Verhoef, Herman A and Morin, Peter J}, + date = {2010}, + publisher = {{Oxford University Press}}, + location = {{Oxford}}, + isbn = {978-0-19-922897-3 978-0-19-922898-0}, + langid = {english}, + annotation = {OCLC: 876676566} +} + +@inproceedings{vincent_examining_2018, + title = {Examining {{Wikipedia}} with a Broader Lens: {{Quantifying}} the Value of {{Wikipedia}}'s Relationships with Other Large-Scale Online Communities}, + shorttitle = {Examining {{Wikipedia With}} a {{Broader Lens}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Vincent, Nicholas and Johnson, Isaac and Hecht, Brent}, + date = {2018}, + series = {{{CHI}} '18}, + pages = {566:1--566:13}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {The extensive Wikipedia literature has largely considered Wikipedia in isolation, outside of the context of its broader Internet ecosystem. Very recent research has demonstrated the significance of this limitation, identifying critical relationships between Google and Wikipedia that are highly relevant to many areas of Wikipedia-based research and practice. This paper extends this recent research beyond search engines to examine Wikipedia's relationships with large-scale online communities, Stack Overflow and Reddit in particular. We find evidence of consequential, albeit unidirectional relationships. Wikipedia provides substantial value to both communities, with Wikipedia content increasing visitation, engagement, and revenue, but we find little evidence that these websites contribute to Wikipedia in return. Overall, these findings highlight important connections between Wikipedia and its broader ecosystem that should be considered by researchers studying Wikipedia. Critically, our results also emphasize the key role that volunteer-created Wikipedia content plays in improving other websites, even contributing to revenue generation.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/8YF9QUFS/Vincent et al. - 2018 - Examining Wikipedia With a Broader Lens Quantifyi.pdf;/home/nathante/Zotero/storage/FHXYQSZK/Vincent et al. - 2018 - Examining Wikipedia With a Broader Lens Quantifyi.pdf} +} + +@article{wang_data_2016, + title = {Data Based Identification and Prediction of Nonlinear and Complex Dynamical Systems}, + author = {Wang, Wen-Xu and Lai, Ying-Cheng and Grebogi, Celso}, + date = {2016-07-12}, + journaltitle = {Physics Reports}, + shortjournal = {Physics Reports}, + series = {Data Based Identification and Prediction of Nonlinear and Complex Dynamical Systems}, + volume = {644}, + pages = {1--76}, + issn = {0370-1573}, + abstract = {The problem of reconstructing nonlinear and complex dynamical systems from measured data or time series is central to many scientific disciplines including physical, biological, computer, and social sciences, as well as engineering and economics. The classic approach to phase-space reconstruction through the methodology of delay-coordinate embedding has been practiced for more than three decades, but the paradigm is effective mostly for low-dimensional dynamical systems. Often, the methodology yields only a topological correspondence of the original system. There are situations in various fields of science and engineering where the systems of interest are complex and high dimensional with many interacting components. A complex system typically exhibits a rich variety of collective dynamics, and it is of great interest to be able to detect, classify, understand, predict, and control the dynamics using data that are becoming increasingly accessible due to the advances of modern information technology. To accomplish these goals, especially prediction and control, an accurate reconstruction of the original system is required. Nonlinear and complex systems identification aims at inferring, from data, the mathematical equations that govern the dynamical evolution and the complex interaction patterns, or topology, among the various components of the system. With successful reconstruction of the system equations and the connecting topology, it may be possible to address challenging and significant problems such as identification of causal relations among the interacting components and detection of hidden nodes. The “inverse” problem thus presents a grand challenge, requiring new paradigms beyond the traditional delay-coordinate embedding methodology. The past fifteen years have witnessed rapid development of contemporary complex graph theory with broad applications in interdisciplinary science and engineering. The combination of graph, information, and nonlinear dynamical systems theories with tools from statistical physics, optimization, engineering control, applied mathematics, and scientific computing enables the development of a number of paradigms to address the problem of nonlinear and complex systems reconstruction. In this Review, we describe the recent advances in this forefront and rapidly evolving field, with a focus on compressive sensing based methods. In particular, compressive sensing is a paradigm developed in recent years in applied mathematics, electrical engineering, and nonlinear physics to reconstruct sparse signals using only limited data. It has broad applications ranging from image compression/reconstruction to the analysis of large-scale sensor networks, and it has become a powerful technique to obtain high-fidelity signals for applications where sufficient observations are not available. We will describe in detail how compressive sensing can be exploited to address a diverse array of problems in data based reconstruction of nonlinear and complex networked systems. The problems include identification of chaotic systems and prediction of catastrophic bifurcations, forecasting future attractors of time-varying nonlinear systems, reconstruction of complex networks with oscillatory and evolutionary game dynamics, detection of hidden nodes, identification of chaotic elements in neuronal networks, reconstruction of complex geospatial networks and nodal positioning, and reconstruction of complex spreading networks with binary data.. A number of alternative methods, such as those based on system response to external driving, synchronization, and noise-induced dynamical correlation, will also be discussed. Due to the high relevance of network reconstruction to biological sciences, a special section is devoted to a brief survey of the current methods to infer biological networks. Finally, a number of open problems including control and controllability of complex nonlinear dynamical networks are discussed. The methods outlined in this Review are principled on various concepts in complexity science and engineering such as phase transitions, bifurcations, stabilities, and robustness. The methodologies have the potential to significantly improve our ability to understand a variety of complex dynamical systems ranging from gene regulatory systems to social networks toward the ultimate goal of controlling such systems.}, + file = {/home/nathante/Zotero/storage/UUYAPUUB/Wang et al. - 2016 - Data based identification and prediction of nonlin.pdf;/home/nathante/Zotero/storage/PWJCA6NU/S037015731630134X.html} +} + +@article{wang_impact_2012, + ids = {wang_impact_2013}, + title = {The Impact of Membership Overlap on Growth: {{An}} Ecological Competition View of Online Groups}, + shorttitle = {The Impact of Membership Overlap on Growth}, + author = {Wang, Xiaoqing and Butler, Brian S. and Ren, Yuqing}, + date = {2012-06-15}, + journaltitle = {Organization Science}, + shortjournal = {Organization Science}, + volume = {24}, + number = {2}, + pages = {414--431}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {The dominant narrative of the Internet has been one of unconstrained growth, abundance, and plenitude. It is in this context that new forms of organizing, such as online groups, have emerged. However, the same factors that underlie the utopian narrative of Internet life also give rise to numerous online groups, many of which fail to attract participants or to provide significant value. This suggests that despite the potential transformative nature of modern information technology, issues of scarcity, competition, and context may remain critical to the performance and functioning of online groups. In this paper, we draw from organizational ecology theories to develop an ecological view of online groups to explain how overlapping membership among online groups causes intergroup competition for member attention and affects a group's ability to grow. Hypotheses regarding the effects of group size, age, and membership overlap on growth are proposed and tested with data from a 64-month, longitudinal sample of 240 online discussion groups. The analysis shows that sharing members with other groups reduced future growth rates, suggesting that membership overlap puts competitive pressure on online groups. Our results also suggest that, compared with smaller and younger groups, larger and older groups experience greater difficulty in growing their membership. In addition, larger groups were more vulnerable to competitive pressure than smaller groups: larger groups experienced greater difficulty in growing their membership than smaller groups as competition intensified. Overall, our findings show how an abundance of opportunities afforded by technologies can create scarcity in user time and effort, which increases competitive pressure on online groups. Our ecological view extends organizational ecology theory to new organizational forms online and highlights the importance of studying the competitive environment of online groups.}, + file = {/home/nathante/Zotero/storage/3WI37Y9S/Wang et al. - 2013 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/D7GAZURV/Wang et al. - 2012 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/EQSW25XD/Wang et al. - 2012 - The impact of membership overlap on growth An eco.pdf;/home/nathante/Zotero/storage/8QDPVTSM/orsc.1120.html;/home/nathante/Zotero/storage/IK6SB3L8/orsc.1120.html} +} + +@article{williamson_economics_1981, + title = {The Economics of Organization: {{The}} Transaction Cost Approach}, + author = {Williamson, Oliver E.}, + date = {1981-11}, + journaltitle = {The American Journal of Sociology}, + volume = {87}, + number = {3}, + eprint = {2778934}, + eprinttype = {jstor}, + pages = {548--577}, + issn = {00029602}, + abstract = {The transaction cost approach to the study of economic organization regards the transaction as the basic unit of analysis and holds that an understanding of transaction cost economizing is central to the study of organizations. Applications of this approach require that transactions be dimensionalized and that alternative governance structures be described. Economizing is accomplished by assigning transactions to governance structures in a discriminating way. The approach applies both to the determination of efficient boundaries, as between firms and markets, and to the organization of internal transactions, including the design of employment relations. The approach is compared and contrasted with selected parts of the organization theory literature.}, + keywords = {Economics,Sociology}, + file = {/home/nathante/Zotero/storage/JHWPCT8H/Williamson - 1981 - The economics of organization The transaction cos.pdf} +} + +@book{worster_natures_1994, + title = {Nature's Economy: A History of Ecological Ideas}, + shorttitle = {Nature's Economy}, + author = {Worster, Donald}, + date = {1994}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge; New York, NY, USA}}, + abstract = {Nature's Economy is a wide-ranging investigation of ecology's past. It traces the origins of the concept, discusses the thinkers who have shaped it, and shows how it in turn has shaped the modern perception of our place in nature.}, + isbn = {978-1-107-26680-3}, + langid = {english}, + annotation = {OCLC: 855524849}, + file = {/home/nathante/Zotero/storage/E2XXC7KJ/(Studies in Environment and History) Worster D.-Nature's Economy_ A History of Ecological Ideas-Cambridge University Press (1994).djvu} +} + +@article{ye_distinguishing_2015, + title = {Distinguishing Time-Delayed Causal Interactions Using Convergent Cross Mapping}, + author = {Ye, Hao and Deyle, Ethan R. and Gilarranz, Luis J. and Sugihara, George}, + date = {2015-10-05}, + journaltitle = {Scientific Reports}, + volume = {5}, + pages = {14750}, + issn = {2045-2322}, + abstract = {An important problem across many scientific fields is the identification of causal effects from observational data alone. Recent methods (convergent cross mapping, CCM) have made substantial progress on this problem by applying the idea of nonlinear attractor reconstruction to time series data. Here, we expand upon the technique of CCM by explicitly considering time lags. Applying this extended method to representative examples (model simulations, a laboratory predator-prey experiment, temperature and greenhouse gas reconstructions from the Vostok ice core, and long-term ecological time series collected in the Southern California Bight), we demonstrate the ability to identify different time-delayed interactions, distinguish between synchrony induced by strong unidirectional-forcing and true bidirectional causality, and resolve transitive causal chains.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/V7Z48B5L/Ye et al. - 2015 - Distinguishing time-delayed causal interactions us.pdf;/home/nathante/Zotero/storage/ZQPFWK7T/srep14750.html} +} + +@article{yu_out_2017, + title = {Out {{With The Old}}, {{In With The New}}?: {{Unpacking Member Turnover}} in {{Online Production Groups}}}, + shorttitle = {Out {{With The Old}}, {{In With The New}}?}, + author = {Yu, Bowen and Wang, Xinyi and Lin, Allen Yilun and Ren, Yuqing and Terveen, Loren and Zhu, Haiyi}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {1}, + pages = {1--19}, + issn = {25730142}, + issue = {CSCW}, + langid = {english}, + file = {/home/nathante/Zotero/storage/Z8R9ZKUE/Yu et al. - 2017 - Out With The Old, In With The New Unpacking Memb.pdf} +} + +@inproceedings{zhu_impact_2014, + title = {The Impact of Membership Overlap on the Survival of Online Communities}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Kraut, Robert E. and Kittur, Aniket}, + date = {2014-04-26}, + series = {{{CHI}} '14}, + pages = {281--290}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {If the people belong to multiple online communities, their joint membership can influence the survival of each of the communities to which they belong. Communities with many joint memberships may struggle to get enough of their members' time and attention, but find it easy to import best practices from other communities. In this paper, we study the effects of membership overlap on the survival of online communities. By analyzing the historical data of 5673 Wikia communities, we find that higher levels of membership overlap are positively associated with higher survival rates of online communities. Furthermore, we find that it is beneficial for young communities to have shared members who play a central role in other mature communities. Our contributions are two-fold. Theoretically, by examining the impact of membership overlap on the survival of online communities we identified an important mechanism underlying the success of online communities. Practically, our findings may guide community creators on how to effectively manage their members, and tool designers on how to support this task.}, + isbn = {978-1-4503-2473-1}, + keywords = {membership overlap,online communities,survival analysis}, + file = {/home/nathante/Zotero/storage/GV2D7ZKS/Zhu et al. - 2014 - The Impact of Membership Overlap on the Survival o.pdf;/home/nathante/Zotero/storage/IY4RTSGD/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf;/home/nathante/Zotero/storage/JZE5JGAZ/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf} +} + +@inproceedings{zhu_selecting_2014, + title = {Selecting an Effective Niche: {{An}} Ecological View of the Success of Online Communities}, + shorttitle = {Selecting an Effective Niche}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Chen, Jilin and Matthews, Tara and Pal, Aditya and Badenes, Hernan and Kraut, Robert E.}, + date = {2014}, + series = {{{CHI}} '14}, + pages = {301--310}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Online communities serve various important functions, but many fail to thrive. Research on community success has traditionally focused on internal factors. In contrast, we take an ecological view to understand how the success of a community is influenced by other communities. We measured a community's relationship with other communities - its "niche" - through four dimensions: topic overlap, shared members, content linking, and shared offline organizational affiliation. We used a mixed-method approach, combining the quantitative analysis of 9495 online enterprise communities and interviews with community members. Our results show that too little or too much overlap in topic with other communities causes a community's activity to suffer. We also show that this main result is moderated in predictable ways by whether the community shares members with, links to content in, or shares an organizational affiliation with other communities. These findings provide new insight on community success, guiding online community designers on how to effectively position their community in relation to others.}, + isbn = {978-1-4503-2473-1}, + venue = {Toronto, Ontario, Canada}, + keywords = {online communities,success,topic overlap,workplace}, + file = {/home/nathante/Zotero/storage/FNS9RSWC/Zhu et al. - 2014 - Selecting an Effective Niche An Ecological View o.pdf;/home/nathante/Zotero/storage/KIHWVKUQ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf;/home/nathante/Zotero/storage/RFMX2CBJ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf} +} + + diff --git a/dissertations/nathante_uw_2021/ch1_intro.tex b/dissertations/nathante_uw_2021/ch1_intro.tex new file mode 100644 index 0000000..5a82e11 --- /dev/null +++ b/dissertations/nathante_uw_2021/ch1_intro.tex @@ -0,0 +1,282 @@ +% \maketitle + +Would Wikipedia be one of the most visited websites in the world if other online collaborative encyclopedia projects had been more established when it was founded? Or was Wikipedia helped by the fact that its predecessors had engaged and trained hundreds of its future contributors? Do new discussion communities on Reddit compete with existing communities for contributors? Is the evolving world of online communities better understood as a competitive struggle for resources or as symbiotic relationships that support a web of interdependent communities? +How does the environment of existing online communities shape the growth, performance, and impact of new groups? + +Answering these questions requires an \textit{ecological understanding} of online communities that accounts for the complex dynamic interactions between communities and their environments. +Prior studies of the growth, survival, and success of online communities have focused almost exclusively on communities' internal features \citep{kraut_building_2012} and have largely neglected environmental factors \citep[e.g.,][]{halfaker_rise_2013, kraut_building_2012, schweik_internet_2012, shaw_laboratories_2014, teblunthuis_revisiting_2018}. +Analyses from this ``focal organization perspective'' \citep{hannan_organizational_1989} typically account for only a small amount of variation in communities' growth, longevity, and performance. Ecology provides a compelling alternative theoretical approach. In biology and organization studies, ecological approaches have shown that success is largely---and sometimes overwhelming---a function of what others groups are doing \citep{hannan_organizational_1989, worster_natures_1994}. + +Ecology is a scientific approach to understanding how interdependence between individuals, collectives, and environments shapes the world \citep{worster_natures_1994}. +%Ecology grew from roots in 18\textsuperscript{th} century naturalism into a science of interrelationships between organisms, between species, and between organisms and the environment \citeh{worster_natures_1994}. +Although first developed to understand biological ecosystems, ecology's theories and methods influenced the development of human ecology, and later of organizational ecology \citep{hannan_organizational_1989, mcpherson_ecology_1983, park_human_1936}. Organizational ecology is a vast field in social science that explains the success, failure, and evolution of newspapers, microbreweries, social movements, and voluntary organizations \citep{carroll_concentration_1985, carroll_why_2000, mcpherson_ecology_1983, soule_competition_2008}. +Ecology can provide practical solutions to problems in complex systems like effective wildlife management, pest control, and sustainable utilization of renewable resources. In organization science, it provides compelling explanations for industrial life-cycles, organizational specialization, and patterns of collaborative partnerships. + +Recent research in the social computing on interdependence between online communities suggests that ecological analyses can provide not only novel scientific understandings but also viable community management strategies \citep{chandrasekharan_you_2017, kiene_managing_2018, tan_tracing_2018, teblunthuis_density_2017, wang_impact_2013, vincent_examining_2018, zhu_impact_2014}. For example \citet{chandrasekharan_you_2017} found evidence that banning hateful communities on Reddit decreased hate speech in related communities. +Community outcomes such as growth and survival depend on membership overlaps between communities \citep{wang_impact_2013, zhu_impact_2014}, but the nature of the resulting relationships remains unclear. \citet{wang_impact_2013} found that participant overlaps between Usenet groups were associated with \emph{competition} and decreased participation in both communities. However, \citet{zhu_impact_2014} found evidence that membership overlap between wikis is associated with \emph{mutualism} and benefits for both communities. Such contradictory findings point to the need for deeper, more precise theories of how ecological dynamics play out in online communities. +% In this sense, ecological approaches sit at the edge of established knowledge in field of social computing. +% That said, bare novelty itself is not a compelling motivation for a research program. + + +Online communities are a dynamic, growing, and increasingly important form of organization that enable collaboration on public goods in contrast to the private goods production most studied in organizational ecology \citep{benkler_peer_2015}. +Through peer production, the Wikipedia community has produced the largest collaborative effort and most important reference work in human history. +Free/libre open source software (FLOSS) communities have produced tens of billions of dollars worth of software made freely available online \citep{benkler_peer_2015}. +Other online communities like subreddits provide information, social support, and entertainment to millions of people. +Ecological research into online communities may enable us to understand \emph{why} and \emph{how} of the millions of attempts to build communities, only a tiny percentage of manage to mobilize participants and to sustain collaboration \citep{schweik_internet_2012, hill_studying_2019, shirky_here_2008}. +However, online communities are vastly different from the organizations organizational ecology was developed to study. +Classical hypotheses in organizational ecology are built on a system of interlinked assumptions that were informed by background knowledge of 20\textsuperscript{th} century organizations. +I argue that past applications of organizational ecology to online communities have not anticipated how this change in context could lead to changes in theoretical predictions. + +%Much less isi undertsoo +Therefore, I do not pick up organizational ecology as an authoritative model or set of laws capable of explaining the growth and decline of online communities. +Instead I drew some ideas directly from mathematical ecology, a subfield of applied mathematics, to better understand the foundational assumptions of an ecological perspective. +On this foundation, I see this project as building an empirical basis for an ecological theory of online communities that starts by inferring competitive and mutualistic relationships between online communities. +% Studying these relationships is the place to start both methodologically and conceptually. +Once ecological dynamics between communities are demonstrated to have measurable relationships with the growth and performance of online communities, we can more fully explain their origins and consequences. + +My empirical studies are framed in terms of how the ecological approach provides new insights and ways of studying interdependent online communities. +However, these studies' methodological designs and empirical results also contribute to organizational ecology by expanding its application beyond the scope of its founding assumptions. +The developers of organizational ecology developed strong intuitions about when organizations will complement or compete with one another based on claims from prior organization theory including that organizations compete over resources, are shaped into established forms by homogenizing pressures, are defined by strong boundaries, and lack capacities for rational adaptation. +Because they typically lacked sufficient longitudinal data to infer when organizations are competitors or mutualists, they have rarely tested these assumptions directly, but rather test theoretical predictions about outcomes like organizational formation, survival, and change \citep{hannan_organizational_1989, baum_ecological_2006}. +The literature on interdependence between online communities is relatively young and provides less background knowledge that can inform such assumptions, but data from online communities enables a stronger empirical basis for understanding relationships between groups. + +Although the time series models in Chapters 2 and 4 depend on fewer assumptions about when competition or mutualism occur compared to the most influential frameworks of organizational ecology, Chapter 2's key finding, that mutualism is more common than competition among online groups with highly overlapping users, radically departs from organizational ecology which has found that both firms and voluntary organizations with highly overlapping resources typically compete \citep{mcpherson_ecology_1983, hannan_organizational_1989}. Although I initially planned to continue developing model-based approaches to explaining the performance of online communities, to find widespread mutualism was surprising and demanded qualitative validation and explanation in terms of the experiences of online community members. Therefore, Chapter 3 reports on an interview-based study of members of highly overlapping online communities. It concludes that ``no community can do everything'' because groups of overlapping communities are characterized by high degrees of specialization. Each community seems to provide a different set of benefits. As Chapter 3 discusses, this is consistent with ecological theory which suggests that highly specialized groups with overlapping memberships are unlikely to compete and that groups provide complementary benefits that can ``spill over'' and drive mutualistic dynamics. + +Knowledge from interviewees includes invaluable cases of mutualism, grounded descriptions of relationships between overlapping online communities, a strong sense that Chapter 2's models are right about the ubiquity of mutualism, and clues about the importance of specialization. However, the interviewees did not provide much to explain processes by which systems of specialized mutualistic overlapping communities develop. In Chapter 4, I draw from strands of organizational ecology that use evolutionary theory as a foundation for processes of change. In addition, the models from Chapter 2 are effectively the most simple time series models that might be used to infer ecological interactions. They depend on many assumptions that are probably unrealistic in the setting of online communities. Therefore, Chapter 4 adopts non-linear time series models developed by mathematical ecologists to study nonlinear dynamics. These models are important to Chapter 4's study design for investigating change processes and also compel us to conceptualize competition and mutualism interactions that are not static and fixed, but that vary over time. + +% This doesn't quite work yet. Need to develop the insights for organizational ecology. +In sum, online communities are a kind of organization, at least in the sense that organizations are ``constructed as tools for specific kinds of collective action'' \citep{hannan_organizational_1989}. Even when online communities are constructed to facilitate communication with strangers on the internet about a topic, this facilitation depends on the sustained contributions of members to keep the conversation going and structures for regulate behavior to maintain a suitable conversation space \citep{kraut_building_2012}. Online communities bear other similarities to organizations including their use of formalized roles, rules, and procedures and their use of boundaries defining the scope of activity \citep{foote_formation_2019}. + +That said, online communities are distinctive in that they are public-good producing voluntary groups constructed through computer-mediated communication. +Features of online communities depart in important ways from the types of organizations that classical organizational ecology has studied the most. Online communities (1) are dependent on volunteer participation, (2) allow participation at very low levels of granularity (3) are weakly bounded and (4) face different potential sources of inertia. +The remainder of this chapter discusses the methodological and theoretical implications of these interrelated features for ecological analysis. +% Some of these features are often said to be made possible by the digital media through which online communities are constructed. +% Drawing on ecological theory and findings from the empirical chapters, I will suggest a role for ecological dynamics in the processes that give rise to and stabilize these structures. + +% + +% Online communities (1) produce public rather than private goods and (2) online communities are online. +\section{Online Communities as Voluntary Organizations} + +Ecological theories conceive of dynamics among individuals that share resources needed for production and survival to explain change in the size and composition of groups over time. +Organizational ecology explains macro-level social change in economies and industries through in an ``evolutionary'' style through mechanisms of the selection and adaptation of firms in a changing resource environment \citep{ven_explaining_1995, hannan_organizational_1989}. +Each organization's survival depends on its \emph{niche} in the resource environment. The notion of a niche is central, if sometimes slippery, and aims to capture the position of an organization in an abstract, high-dimensional resource space \citep{hannan_organizational_1989}. +Organizational ecology was developed mainly to study commercial firms whose survival ultimately depends on their potential to offer returns on investment. +Profitability of these firms typically hinged on expansion to control greater quantities of resources, provide economies of scale and create the potential for monopolistic rents \citep{hannan_organizational_1989}. +Niches for such organizations are often defined in terms of established categories of organizational forms \citep{carroll_why_2000}, technological production factors \citep{dobrev_dynamics_2001}, or economic outputs \citep{dobrev_shifting_2003}. + +How should we define niches for online communities? +They use the low-cost communication systems of the Internet to coordinate voluntary production of public information goods like encyclopedias, FLOSS programs, and cultural artifacts \citep{benkler_wealth_2006}. +An online community might produce something damaging to the broader society, such as computer viruses or misinformation, but the types of online communities considered here produce public goods defined as \emph{non-excludible} (in principle, an individual cannot be excluded from utilizing them) and \emph{non-rival} (utilization does not diminish the good's value). +Therefore, the survival of online communities depends not on capacities to generate revenues and capture profits, but on the consistent participation of volunteer members who have heterogeneous motivations for contributing to a public good \citep{lampe_motivations_2010, shah_motivation_2006}. + +Dependence on volunteer members is something online communities have in common with voluntary organizations like social clubs, churches, or fraternal organizations \citep{bimber_collective_2012}. +Voluntary organizations have been studied in organizational ecology by J. Miller McPherson and collaborators who investigate overlapping niches defined by organizational members and associated demographic patterns \citep{mcpherson_evolution_1991, popielarz_edge_1995, mcpherson_ecology_1983, mcpherson_testing_1996}. +For example, \citet{popielarz_edge_1995} locate voluntary organizations' niches in ``Blau Space'' corresponding to the distribution of their members' demographic characteristics and explain how voluntary organizations tended to become racially or educationally homogeneous in terms of competitive dynamics over members' time and attention \citep{popielarz_edge_1995}. Similar to McPherson, ecological studies of online communities, including the present work, have defined niches of online communities in terms of their participants \citep{wang_impact_2012, zhu_impact_2014}. + +However, membership is not the only plausible way to define an online community's niche. As a consequence of their nature as public-good producing voluntary organizations, their survival does not depend on expansion. Although influential models of the growth of online communities have assumed that motivations to participate in online communities increase as communities grow \citep{butler_membership_2001, kraut_building_2012}, recent surveys and interviews find that large and small communities provide different sorts of benefits \citep{hwang_why_2021, foote_starting_2017}. As Chapter 3 finds, larger communities provide steady streams of content and larger potential audiences, but are less capable of providing tight-knit socialization or specialized information. + +This kind of size-dependent specialization resembles ``niche-width'' arguments in organizational ecology. For example, \citet{carroll_concentration_1985} seeks to explain the coexistence of large and small organizations within an industry by proposing that generalists, who have wide niches, under-perform in certain areas of the resource space. +Smaller organizations can exploit this under performance by specializing in these areas. +However, as \citet{dobrev_dynamics_2001} argue, specialist organizations can grow large in certain circumstances and then organizational size can be uncorrelated with niche width. This is the case with online communities. +For example the subreddit \texttt{r/prequelmemes} is dedicated to making and sharing memes only about the Star Wars and is the largest Star Wars related community on Reddit. +Therefore, it important to recognize that memberships may not capture all the relevant dimensions of an online community's niche. +Indeed, Chapter 3 finds at least three dimensions of specialization in terms of the benefits that members obtain from online communities. +These are (1) access to the largest possible audience, (2) socialization in a homophilous community and (3) ability to find specialized content or information. + +% TODO note on the using topic overlaps in Chapter 4. +Still, for the purposes of the studies in Chapters 2 and 4, membership overlaps provide a number of advantages. +The benefits of participation may not be easily observed, so measuring online community niches in terms of participation, which is observable, is empirically tractable. +Furthermore, findings in Chapter 3 suggest that community leaders do not normally seek to appropriate private value from their communities. If so, then it seems more likely that ecological dynamics that shape the growth and survival of online groups will have more to do with participation, the main rival resource on which online communities depend. +Finally, studies in organizational ecology have set out to test models that depend on linear or curvilinear relationships between niche-overlap and competitive pressures and this required stronger assumptions around the measurement of niche width than those needed here \citep{carroll_concentration_1985, ,dobrev_shifting_2003}. +Chapters 2 and 4 use membership overlaps to identify clusters of highly related communities while time-series models are used to infer competition and mutualism. These models bear their own assumptions, but the threat to scientific validity moves from the task of measurement to the task of statistical inference. Chapter 5 discusses how expanding definitions of an online community's niche to account for additional dimensions of specialization will be important for future work. + +\section{Openness Allows Dividing Time into Little Chunks} + +Although following McPherson's use of membership-based niches makes sense because online communities depend on voluntary contributions to produce public goods, a second key feature of online communities departs from the voluntary organizations in McPherson's studies. This is that online communities provide opportunities for ``tiny acts of participation'' like signing a petition, fixing a typo on Wikipedia, or ``liking'' a post. +When individuals can act in small granular ways they can easily participate in many online communities in rapid succession \citep{benkler_wealth_2006, margetts_political_2015, tan_all_2015}. +% What are the implications of this change for ecological interactions among online communities? +By contrast, McPherson assumes that organizations conduct their activities in face-to-face in-person meetings and theorizes that constraints of time and space strongly limit the number of organizations to which an individual can belong \citep{mcpherson_ecology_1983}. +After work and other obligations, it seems unlikely that many people would have time to belong to very many voluntary organizations at once, so participation in an organization is highly rival and overlaps in membership are tightly coupled with competition. + +Chapter 2 and prior ecological studies of online community participation follow this intuition by considering membership to be a rival resource, and assuming that online communities with overlapping users are those likely to have significant ecological interactions \citep{butler_membership_2001,wang_impact_2012}. However Chapter 2 avoids assuming that these interactions will be competitive and instead finds that mutualism among highly related online communities is about 4 times as common as competition and in Chapter 3 interviewees described how these related communities have specialized roles. Together, these findings suggest that the growth and survival of a sufficiently established community is not often limited by competition over membership. +Why is membership overlap so strongly associated with competition in the context of in-person voluntary organizations but highly overlapping online communities are often mutualists? + +% On the other hand, online groups also rely on \textit{nonrival} resources. They can even produce connective and communal public goods like opportunities to communicate or collections of information \cite{fulk_connective_1996} which can be ``antirival'' when their usefulness increases as a result of others using them \cite{kubiszewski_production_2010, weber_political_2000}. For example, the usefulness of a communication network increases as more people join it \cite{fulk_connective_1996, katz_network_1985}. Similarly, the usefulness of an information good can increase as more people come to know, refer to, and depend upon it \cite{kubiszewski_production_2010, weber_political_2000}. +% % as when +% %Awareness that an online group provides an audience can motivate participation \cite{zhang_group_2011}. +% If multiple online groups help build the same connective or communal public goods, they may form mutualistic interactions where contributions to one group may ``spill over'' and motivate participation in mutualist groups \cite{zhu_impact_2014}. +% Ecological approaches seek to understand how different types of resources will limit or promote growth. + +% TODO cite aaron swartz + +Online communities ``transcend time and space'' using asynchronous and low-cost telecommunications \citep{jarvenpaa_communication_1998, peters_speaking_1999}. Although individuals are fundamentally constrained in their available time and energy, they can finely divide their time over many communities. \citet{margetts_political_2015} suggest this less ``lumpy'' form of participation helps enable online collective action. +Similarly, the fine-grained division of individuals' activities across communities is closely related to the success of online communities having ``open'' organizations with minimal barriers to participation. \citeauthor{benkler_wealth_2006} claims that the fact that information is non-rival is central to how online communities successfully peer-produce public information goods. This characteristic of information goods also enables open organizational structures so that peer-production projects can incorporate contributions from peripheral contributors \citep{benkler_wealth_2006, bryant_becoming_2005}. +Together, these factors allow levels of participation that are even more unequal than those found in other voluntary organizations. For example, while ``the top 20\% of volunteering individuals contributed 50\% of the time volunteered in the USA'' in 2016, the top 1\% of Wikipedia editors put in 77\% of the effort into editing Wikipedia \citep{matei_structural_2017}. +% Such inequalities in the degrees of participation in an online community have often been conceptualized as a division between ``core'' and ``peripheral'' members. + +% TODO add citations +When people can spread their time across many open communities, this also shapes the nature of membership in a community and the boundaries between communities. +Organizational ecology was developed with the relatively impermeable boundaries of commercial organizations in mind \citep{hannan_organizational_1989}. +This is a second reason why \citeauthor{mcpherson_ecology_1983}'s studies of voluntary organizations provide a good model for studying ecology of online communities. +While commercial firms have relatively strong boundaries around internal activities and control over much of their employees time, voluntary organizations open up more of their activities to outsiders in order to attract participants. +As noted above, \citeauthor{mcpherson_ecology_1983} assumes that voluntary organizations with overlapping niches will compete. +However, mathematical ecology shows that niche overlaps do not necessarily imply competition in complex systems involving multiple organizations or resource dimensions because factors other than the overlapping resources can limit growth \citep{armstrong_competitive_1980}. +% and more often because of internal limitations of the community's ability to provide benefits to its membership. + + +% TODO cite some stuff about +Finally, by modeling community size as the ``tiny act of participation'' of commenting in a given week, the analysis of ecological dynamics in Chapters 2 and 4 might be predisposed to find mutualism. Although quantifying time spent on contributions might not be possible in the case of Reddit (how would we count the time someone spends creating art to share with an online community?), it is possible that a study of participation intensity might find weaker mutualism and stronger competition if small contributions from peripheral members are less rival than larger contributions from core members. +On the other hand, if these contributions take the form of non-rival information goods, then communities will be unlikely to compete over them (an artist is likely to share their effortful creations with all communities from which they desire an audience). +The findings of Chapters 2 and 3 both suggest that part of why subreddits with overlapping memberships can provide complementary benefits and form mutualistic ecological relationships is that membership in multiple online communities is relatively inexpensive. If subreddits became closed organizations, perhaps by introducing pricey membership fees, one would expect stronger competition over membership. In this way, openness appears to provide conditions less conducive to competition and more conducive to mutualism. + +% Stuff about organizational boundaries here + +\section{How Should Online Communities be Divided into Organizational Forms?} + +Related to openness and the predominance of mutualism is Chapter 3's finding of extensive specialization among online communities that have similar topics and similar members. One rarely observes more than one active subreddit with similar topics that is not differentiated in some significant way, often in size, rules or topic. Groups of related online communities thus depart from the organizational forms studied in organizational ecology in ways that trouble the specific strands of organization ecology used by prior research on online communities. + +% As described in Chapter 2, early studies of competition and mutualism online communities adopted density dependence theory, perhaps because it is the most influential theory in organizational ecology. Population ecology is a set of theories and models for analyzing competition and mutualism among a set of groups that are assumed to be very similar to one another. Community ecology on the other hand studies relationships between groups without assuming they are similar. + +Chapter 2 defines its approach as community ecology because it focuses on relationships between different online communities. +This may surprise readers of the organizational ecology literature in sociology which defines community ecology as the study interactions between populations of organizations, but I argue it is reasonable given the heterogeneity of overlapping online communities. +I will also note that studies in Communication have applied the community ecology approach to study competition and mutualism between telecommunication companies \citep{dimmick_theory_1984, barnett_competition_1987} or networks of organizational relationships \citep{dimmick_theory_1984, margolin_normative_2012}. However, such studies are a small minority in the literature. + +\citet{aldrich_organizations_2006}, \citet{hannan_organizational_1989}, and \citet{astley_two_1985} all consider community ecology as having a distinct level of analysis from population ecology. +They use levels of abstraction analogous to those used in biological ecology where a population is set of individual organisms of the same species and a community is a set of interacting populations. +For these organizational ecologists, a population is a set of organizations having the same \emph{organizational form} and a community corresponds to an \emph{organizational field} of related organizational forms. + +The identification of an organizational form is of central importance. +Both organizational and mathematical ecologists are aware that population ecology models like density dependence depend on the assumption that the population under study is homogeneous in the sense all members of the population are equally subject to the same intra-population mutualistic and competitive forces. +Organizational ecologists have justified these assumptions by carefully demarcating different types of organizations into organizational forms theorizing that discrete boundaries around organizational forms are constructed by homogenizing features like efficient ways to bundle transactions \citep{williamson_economics_1981}, external regulatory frameworks, or other mechanisms of institutional isomorphism \citep{dimaggio_iron_1983,hannan_organizational_1989}. +Still, the definition of organizational forms in organizational ecology often amounts to accepting an established categorization. The fascinating question of how the processes by which such categorizations are socially constructed are related to the ecological dynamics within and between organizational forms has driven much work by Hannan and his collaborators in recent years \citep{pontikes_ecology_2014, hannan_logics_2007, hannan_concepts_2019}. + +Although McPherson's series of papers on the ecology of voluntary organizations may best be described as a community ecology analysis of categories of voluntary organizations like ``sports'' or ``youth serving'' organizations, at times he resists analogizing organizations as biological populations: +\blockquote[\cite{mcpherson_ecology_1983}]{A population of organizations, then, is not a set of discrete creatures who must mate with each other to reproduce, but a froth of bubbles, constantly sharing or exchanging members, growing and dying, and being absorbed and segmented in response to changing conditions}. +In this instance as well as others, McPherson's papers sometimes slip from discussing ecological dynamics among different organizational forms, which is measured in the data, and between different organizations, which is not. In the above quote, McPherson clearly has a dynamic ecosystem of differentiated organizations in mind. Perhaps the set of ``sports'' organizations contains too much heterogeneity to constitute an organizational form. + +Later organizational ecologists studied diversity within an organizational population by appealing to a distinction between ``core'' features which define the organizational form and are mostly stable over time and ``peripheral'' features which are allowed to vary \citep{hannan_organizational_1989}. Organizational ecologists have studied how variation and specialization of peripheral features shapes competition within an organizational form. For example, \citet{dobrev_shifting_2003} studies how degrees of overlap among automotive firms' technological niches, measured as engine horsepower, changed over time and affected organizational survival. Similarly, Chapter 2 and prior ecological studies of online communities measure user overlap density to quantify how much a community's members participate in other communities \citep{zhu_impact_2014, zhu_selecting_2014, wang_impact_2012}. Chapter 4 takes this a step further by studying how dynamically shifting niches are related to competitive and mutualistic interactions. + +Organizational forms of online communities might be defined according to the platform hosting them. +Indeed, prior ecological studies of online communities have done exactly this and treated sets of communities sharing a platform like Usenet or Wikia as a population. +However, technological boundaries around platforms may not ensure sufficient homogeneity to justify treating these sets of communities as an organizational form. +One finds enormous diversity in the topics and purposes of communities upon exploring a platform like Reddit, Facebook Groups, or Wikia. +Chapter 3 finds that, even when topics and memberships are very similar, online communities are specialized in other dimensions. Although a platform clearly provides a set of common technological affordances, many platforms are flexible enough to allow a great deal of diversity in scopes, rules, and communities can greatly expand available affordances by using auxiliary technologies like bots \citep{kiene_technological_2019}. +It is thus questionable that overlapping features of online communities like memberships or topics are ``peripheral'' while the use of a platform is ``core'' and therefore it is difficult to identify populations of online communities \emph{a priori}. + +% Although Chapter 2 is framed as introducing a novel community ecology approach to social computing that is complementary to population ecology, these theoretical arguments suggest that defining may not be very useful when applied to online communities. + +When categorizations of organizations of interest are not well-understood, \citet{hannan_organizational_1989} recommend using numerical clustering to find divisions of organizational forms. +The quantitative analyses in Chapters 2 and 4 are all based on a clustering algorithm that groups subreddits with similar kinds of users. +I define these as ``ecological communities'' in a way that is consistent with the sense of Aldrich and Reuf, although they are interested in competition and mutualism between organizational forms. +However, as Chapter 3 demonstrates, this results not in clusters of online communities having similar forms, but in groups of subreddits whose topics are related but whose forms vary along dimensions of scope, size, and internal structures like rules. +Population ecology is designed to study the mutualistic and competitive processes among members of an organizational form. +Community ecology is designed to study mutualism and competition between populations of organizations having different forms. +Neither theory seems to fit exactly with subreddits, but Chapter 2 can be understood as advancing a community ecology analysis of organizational forms assumed to have a single member organization. If this seems overly nuanced, one can simply adopt the framing of Chapter 2 and ignore matters of organizational forms and fields and treat community ecology as a relational framework and population ecology as an environmental framework. + +% as a study of ecological interactions within clusters of online communities. + +% In contrast to Hannan and Freeman's approach, the inspiration for prior ecological research in online communities \citep{wang_impact_2012, zhu_impact_2014}, + +% describes a \emph{selection} process in which many online communities are created but fail to sustain participation if they do not find a sufficient niche. + +\section{Inertia and Adaptation} + +Organizational ecologists have tended to emphasize selection processes because organizational cores appear to change relatively little. External homogenizing forces described and by internal factors like culture and routines that are difficult to change lead to ``structural inertia.'' Structural inertia limits an organization's ability to rationally adapt to a changing environment. Organizations typically lack sufficient information about their environments and the ability to coordinate change with sufficient precision in order to rationally adapt, especially when it comes to change in the ``core'' aspects of an organization \citep{hannan_structural_1984}. However, they also experience exceptional transformational periods that accompany an increased risk of failure \citep{aldrich_organizations_2006}. +If organizations are adaptive, then a teleological or functionalist explanation of organizational change may be better than an ecological one \citep{ven_explaining_1995} and theories of change in organizational fields should be based on Lamarkian adaptation-based evolution instead of Darwinian selection. + +Whether online communities can adapt has important consequences for design interventions aimed at improving the quality or safety of online spaces. Adaptive online communities may adopt new tools for moderation or quality control or implement policy changes to address newly uncovered problems. But online communities having substantial structural inertia will struggle to adapt, problems that go unaddressed will contribute to communities' declines, and solutions will largely emerge through the construction of new communities. A selection-based change process may be slower than an adaptive one because it will be limited by rates of community formation and decline. + +% Three possible types of explanations: leadership, membership composition, routinization! +Prior research into online communities suggests a relatively high degree of structural inertia, at least when it comes to policy \citep{teblunthuis_revisiting_2018, halfaker_rise_2013}, but the origins of this inertia are not obvious. +One explanation looks to the composition of contributors to an online community and sees social barriers to diverse newcomers as limiting capacities for change \citep{lam_wp:clubhouse?:_2011, tripodi_ms_2021,menking_people_2019}. +Another explanation is the entrenchment of oligarchical leadership \citep{shaw_laboratories_2014}, who may be conservative and resist change. +Yet in classical organizations, leaders often seek purposeful adaptation, but are foiled by internal sources of inertia like organizational cultural, internal patronage networks, conflicts among stakeholders, and routines \citep{hannan_structural_1984, ven_explaining_1995}. Some of these inertial forces appear to have analogs in online communities such as the stability of emergent roles \citep{arazy_how_2017, arazy_functional_2015}, routines \citep{keegan_analyzing_2016}, and internal conflict that may stabilize policy \citep{shi_wisdom_2019}. + +Chapter 4 explore the relationship between ecological dynamics and adaptive processes in online communities by relaxing assumptions of the model in Chapter 2 to allow ecological interactions between online communities vary over time. This allows us to explain that mutualism is more common than competition in Chapter 2 because periods of mutualistic interaction last longer than periods of competitive interaction. Finding that competitive and mutualistic dynamics in online communities are not static, but dynamic and vary over time sets up hypotheses tests about how online communities might adapt to avoid competition or increase mutualism. While I find evidence that communities increase their specialization by decreasing their user and topic overlaps in competitive conditions, I do not find that this decreases competition and increases mutualism. +This suggests that variations in competitive and mutualistic dynamics are driven by exogenous events and that at least when it comes to positioning themselves with respect to one another, that successful online communities have ``selected an effective niche'' \citep{zhu_selecting_2014}. +As discussed further in Chapter 5, the evidence from Chapters 4 does not support strong claims about whether mutualism is common because of adaptation or selection. Future work should seek to demonstrate the selection process in action. + + +%Given that they vary over time, if online communities act rationally to position themselves relative to each other in ways that optimize their mutualism, we might find temporal correlations between a communities changes topic and membership overlap and its competitive and mutualistic relationships. + +%If not, + +\section{Conclusion: Contributions to Organizational Ecology} + +Organizational ecology began by asking ``Why are there so many kinds of organizations?'' \citep{hannan_organizational_1989, hannan_population_1977}. It provides a conceptual model of how people build systems of interdependent social structures within organizational fields, and a vast and rich literature that was initially developed to study firms in long-running commercial industries. Although \citet{hannan_organizational_1989} account for the demography of industrial unions in their theory, these unions had key characteristics in common with the firms including strong boundaries, pursuit of monopoly, and dependence on institutional legitimacy. In general, they had their ideological and historical origins in the age of bureaucratic rationalism \citep{hannan_organizational_1989}. Theories of organizational ecology have been widely applied to organizations in other contexts, most importantly voluntary organizations and social movements \citep{mcpherson_ecology_1983, soule_competition_2008, minkoff_interorganizational_1995, olzak_ecology_2001}. + +The best work of this kind meaningfully adapts organizational ecology to the new context. For example, \citet{soule_competition_2008} link organizational ecology to the resource mobilization theory of social movement organizations. Such works use organizational ecology as a ``theory of the middle range'' that is empirically grounded but has sufficient generality to bridge across multiple domains. However, organizational ecology is not mature paradigm like thermodynamics where models can be treated as ``scientific laws'' and expected to make accurate predictions about new contexts without any conceptual modification \citep{kuhn_structure_1970}. +As discussed above, some basic concepts of theory, like that of the organizational form, are difficult to apply to online communities. +When virtually all organizations in an organizational field are highly distinctive and no established system for categorization can be found, the concept of ``organizational form'' breaks down and so may the usefulness of distinguishing between the ``population'' and ``community'' levels of analysis. + +Despite these ontological concerns, as I argue in Chapter 2, density dependence theory's environmental perspective is still useful because the relationship between user overlap density and growth or survival seems to reflect the hospitality of an environment. +However, one must keep in mind that tests of density dependence theory in online communities have provided evidence in the form of weak correlations derived from observational data. +I suggest that a project to synthesizes foundational concepts from organizational ecology with new empirically supported ideas about the interdependence between online communities will be a more effective strategy. + +The most important empirical finding, that mutualism is widespread, is empirically supported by quantitative-qualitative triangulation. Using statistical methods, I have found that mutualism is much more common than competition among subreddits with highly overlapping users. Based on interviews with members of these subreddits, I have found that this widespread mutualism is consistent with their intuitions and I have surfaced a plausible explanation for it in how individuals seek multiple benefits from online communities and that communities with similar topics and overlapping users specialize in providing different types of benefits. + +Online communities provide granular longitudinal data of individual behaviors in overlapping groups that make it possible to effectively model and test such propositions. +Studies in organizational ecology have generally been limited to one organizational form or organizational field at a time. +This has made it difficult to test hypotheses about the scope conditions for ecological dynamics or their consequences. +The time series analysis strategies advanced in chapters 2 and 4 make it possible to study ecological interactions on much larger scale, and to justify statements about what kinds of relationships are typical and to model antecedents and consequences of these relationships. +It is important to recognize the limits of prior theories and quantitative tools. When results are puzzling or dead-ends are reached, talking to community members is likely to yield insights that open the way toward a solution. The project of this dissertation is to begin reconstructing organizational ecology in the relatively theory-poor but data-rich context of online communities. + +% I reconstruct organizational ecology +% project. +% infer a large number of competitive and mutualistic relationships groups instead of depending on an elaborate theoretical foundation. +% Chapter 2 uses this method to deconstruct theories like density dependence that were built upon assumptions of when organizations will be competitors or mutualists by inferring these relationships directly from the data. This begins the +% This widespread mutualism among online communities with overlapping members radically contrasts with the competition found among offline voluntary organizations and follows from important ways that online communities differ from classical organizations. +% The ``openness'' of online communities in conjunction with the use of digital media decrease the rivalrousness of membership, and therefore the potential for competition over members. +% That online communities exist to provide public benefits to their members and audiences and provide different types of benefits at different sizes means that they do not in general seek to increase their sizes. +% Unlike commercial firms, online communities do not have strong incentives to compete with each other. +% Many reasons suggest that overlapping online communities will be mutualists and few reasons are apparent for why multiple communities providing equivalent benefits would exist and compete. +% Yet, observing that mutualism is common does not explain the different roles of community founders, managers, and platform design in how systems of overlapping mutualistic online communities are organized. + +% Organizational ecology provides evolutionary modes of explanation for organizational change based on adaptation or selection processes \citep{ven_explaining_1995}. +% Early organizational ecology made strong assumptions that organizational cores change relatively little because of ``structural inertia'' introduced by the external homogenizing forces described above and also by internal factors like culture and routines that are difficult to change \citep{hannan_structural_1984}. +% Most organizations typically have neither sufficient information about their environments nor the ability to coordinate change with sufficient precision in order to rationally adapt, especially when it comes to change in the ``core'' aspects of an organization \citep{hannan_organizational_1989}. +% This model suggested that change in organizational forms was likely to be driven by organizational death and replacement instead of adaptation. + +% Paragaph below copied to chapter 4. +% Online communities also appear to have significant inertia that may come from multiple causes discussed above, but it is also conceivable that mutualism can emerge through an adaptive process that their openness makes possible \citep{mcpherson_testing_1996}. +% Suppose an individual chooses to participate in a community when they have the greatest expectation of finding a type of benefit. +% Through their participation, they can make the community a better place to find this type of benefit by contributing to the supply of resources their own content, attention, and efforts and by rewarding those who provide their benefits with thanks, votes and other signals of approval. +% When many individuals act in this way, their actions may collectively reinforce the ability of the community to provide the benefits in a process resembling the Schelling model of segregation \citep{schelling_micromotives_1978}. +% When communities overlap, large degrees of specialization may emerge through such a feedback loop \citep{mcpherson_testing_1996}. + +% Chapter 4 deepens the exploration of ecological dynamics by relaxing assumptions of the model in Chapter 2 in order to find out how ecological interactions between online communities vary over time and the roles of adaptation and selection in changing ecological dynamics. I test the hypothesis that online communities can rationally adapt to avoid competition or increase mutualism through a time series analysis. The first step is to demonstrate that competitive and mutualistic dynamics in online communities are not static, but dynamic. They vary over time. Therefore, if online communities act rationally to position themselves relative to each other in ways that optimize their mutualism, we might find temporal correlations between a communities changes topic and membership overlap and its competitive and mutualistic relationships. +% However, I observe that changes in online community topics are not correlated with decreases in competition or increases in mutualism. +% This suggests that the emergence of mutualism is driven not by adaptation, but by selection. +% Chapter 5 discusses future directions to investigate the micro-level dynamics of this process and other open research questions. + +% % Cite exit and voice below +% These findings lend additional support to the notion that changing online communities is difficult. Creating new communities that provide complementary benefits may be an alternative solution when existing communities are lacking. +% However, the set of new benefits likely needs to be significantly different from the set of benefits provided by incumbent communities. +% Although the results of Chapters 2 and 4 find mutualism is less common than competition, they also show that competition happens. +% Moreover, they look at communities that have survived for long enough that their niches are measurable and therefore competition faced by the smallest communities that never take off is unobserved. +% If the greater prevalence of mutualism is driven by a selection process, this is likely because new communities that face competition are exceedingly unlikely to survive. + +% As an example, consider attempts to reform Wikipedia to be more inclusive through changing sourcing and notability policy. +% These attempts encounter strong structural inertia resulting from entrenched norms and policies and capacities of opponents to stonewall debate and block changes. +% In this way, activists for a more inclusive Wikipedia have struggled to exercise voice. +% Wikipedia's openness and creative commons licensing make it possible for other encyclopedias to reuse its content. +% Yet, should activists choose to exit and start an alternative to Wikipedia with different policies, this new project will be unlikely to replace Wikipedia if the differentiating factors are limited to different policies and better coverage in a few areas. +% Still, those seeking a more inclusive knowledge production community or the specific types of knowledge it provides may find these benefits in a new specialized community. +% However, Wikipedia will almost certainly continue to draw a larger audience and pool of contributors. + +% Organizational ecology's virtues stem from its defining conceptual move: to explain the success or survival of individual organizations in terms of their relationships with other organizations. +% By adopting an intermediate level of analysis seeking to explain the largest rise of analytically tractable +% with ambitions reaching far below explaining macro-historical changes like the rise of capitalism, organizational ecology dynamics of large-scale social changes like the rise of newspapers or M-form organizational forms, without appealing to overarching macro-historical forces that are difficult to measure and may be necessarily undetermined given available evidence. + + +% +% something about institutionalization? + +% We will address these shortcomings by first engaging deeply with ecological research in both biology and organization science from which we will borrow concepts and methods. In the context of online communities, we will define an ecological \emph{population} as the set of communities that share a set of \emph{resources}. In the context of online community research, resources include the labor and intellects of participants, content that they appropriate and produce, as well as the technological and social systems that communities develop to structure themselves like norms, rules, and technologies \cite{butler_membership_2001}. In an ecological model, a community must find a \emph{niche}---i.e., a set of resources that it can utilize comparatively better than other communities---in order to survive. + +% Why study online communities from an ecological perspective? + +% bibliography here +% \setcounter{biburlnumpenalty}{9001} +% \printbibliography[title = {References}, heading=secbib] diff --git a/dissertations/nathante_uw_2021/ch2_identifying.tex b/dissertations/nathante_uw_2021/ch2_identifying.tex new file mode 100644 index 0000000..1c1409c --- /dev/null +++ b/dissertations/nathante_uw_2021/ch2_identifying.tex @@ -0,0 +1,933 @@ +% +%% This is file `sample-authordraft.tex', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% samples.dtx (with options: `authordraft') +%% +%% IMPORTANT NOTICE: +%% +%% For the copyright see the source file. +%% +%% Any modified versions of this file must be renamed +%% with new filenames distinct from sample-authordraft.tex. +%% +%% For distribution of the original source see the terms +%% for copying and modification in the file samples.dtx. +%% +%% This generated file may be distributed as long as the +%% original source files, as listed above, are part of the +%% same distribution. (The sources need not necessarily be +%% in the same archive or directory.) +%% +%% The first command in your LaTeX source must be the \documentclass command. +% \documentclass[sigconf,authordraft]{acmart} + + +%%%% As of March 2017, [siggraph] is no longer used. Please use sigconf (above) for SIGGRAPH conferences. + +%%%% As of May 2020, [sigchi] and [sigchi-a] are no longer used. Please use sigconf (above) for SIGCHI conferences. + +%%%% Proceedings format for SIGPLAN conferences +% \documentclass[sigplan, anonymous, authordraft]{acmart} + +%%%% Proceedings format for conferences using one-column small layout +%\documentclass[acmsmall,authordraft]{acmart} + +% NOTE that a single column version is required for submission and peer review. This can be done by changing the \doucmentclass[...]{acmart} in this template to +% \documentclass[sigconf,review=True]{acmart} +\chapterprecishere{ +% Most explanations of changes in online group size focus on internal factors like social structures or design decisions. +% do not make the , and render critical questions like “which other groups are a given group's strongest competitors or mutualists?” unanswerable. +% TODO: Polish abstract +% Online groups interact with each other as people, content and ideas flow among them. +We introduce a method for inferring competitive and mutualistic interactions between online groups from time series participation data based on the theoretical framework of community ecology. Platforms often host multiple online groups with highly overlapping topics and members. How can researchers and designers understand how interactions between related groups affect measures of group health? Inspired by population ecology, prior social computing research has studied competition and mutualism among related groups by correlating group size with degrees of overlap in content and membership. The resulting body of evidence is puzzling as overlaps seem sometimes to help and other times to hurt. We suggest that this confusion results from aggregating intergroup relationships into an overall environmental effect instead of focusing on networks of competition and mutualism among groups as our approach does. We compare population and community ecology analyses of online community growth by analyzing clusters of subreddits with high user overlap but varying degrees of competition and mutualism. +} + +%% +%% The code below is generated by the tool at http://dl.acm.org/ccs.cfm. +%% Please copy and paste the code instead of the example below. +%% +% \begin{CCSXML} +% +% +% 10010520.10010553.10010562 +% Computer systems organization~Embedded systems +% 500 +% +% +% 10010520.10010575.10010755 +% Computer systems organization~Redundancy +% 300 +% +% +% 10010520.10010553.10010554 +% Computer systems organization~Robotics +% 100 +% +% +% 10003033.10003083.10003095 +% Networks~Network reliability +% 100 +% +% +% \end{CCSXML} + +% \ccsdesc[500]{Computer systems organization~Embedded systems} +% \ccsdesc[300]{Computer systems organization~Redundancy} +% \ccsdesc{Computer systems organization~Robotics} +% \ccsdesc[100]{Networks~Network reliability} + +%% +%% Keywords. The author(s) should pick words that accurately describe +%% the work being presented. Separate the keywords with commas. +% \keywords{datasets, neural networks, gaze detection, text tagging} + +%% A "teaser" image appears between the author and affiliation +%% information and the body of the document, and typically spans the +%% page. + +% \begin{teaserfigure} +% \includegraphics[width=\textwidth]{sampleteaser} +% \caption{Seattle Mariners at Spring Training, 2010.} +% \Description{Enjoying the baseball game from the third-base +% seats. Ichiro Suzuki preparing to bat.} +% \label{fig:teaser} + +% \end{teaserfigure} + +%% +%% This command processes the author and affiliation and title +%% information and builds the first part of the formatted document. + + +% \fontsize{12pt}{24pt} +% \selectfont + +%% We're going for a "known puzzle" + "clarifying confusion" framing +%% Rememver to frame aronud the depvar + +%% TODO: rewrite with a new outline +%% Introduction, Related Work, Materials & Methods, Results, Discussion, Conclusions +%% Put research question in the introduction. +%% Put hypotheses in Related Work. +%% Consider Hypothesizing that mutualism will be more common than competition because subreddits in these clusters are specialized. +%% Cut unneeded ecological terms +%% Define needed ecological terms + +\section{Introduction} +\label{sec:intro} + +% Why we need an ecological approach +%Online groups are important places where people collaborate to produce information sources, engage in discussions and participate in culture. +Although the fact is frequently ignored in social computing scholarship, online groups do not exist in isolation.\footnote{We use the term ``online group'' instead of ``online community'' to help avoid confusion with our term ``community ecology'' which plays an important conceptual and analytic role in our paper.} Indeed, although studying interdependence between online groups is different and complex \citep{hill_studying_2019}, research in social computing has sought to quantify how online groups share users or topics \citep{datta_identifying_2017, del_tredici_semantic_2018, tan_all_2015, hessel_science_2016}, and how such interactions relate to outcomes like the emergence of new groups \citep{tan_tracing_2018}, contributions to peer-produced knowledge \citep{vincent_examining_2018}, and the spread of hate speech \citep{chandrasekharan_you_2017}. Although this work has demonstrated that intergroup interactions matter very little intergroup research has tackled questions of group success---i.e., why some online groups succeed in maintaining active and long-lived participation while most do not. +%\citep{kraut_role_2014, resnick_starting_2012}. % commented out since there was no response +Can intergroup relationships +% competition or mutualism between online groups +explain whether online groups will grow or decline? +% NOTE: I guess you've added the footnote above to address the reviewer concern. It's important but (a) I think it's too early in the manuscript to bring this in and (b) it should be in a footnote. -mako +% I moved it below by the RQ. + +%a growing body of social computing research shows that online groups, such as wikis, discussion forums and mailing lists spawn new groups and wage conflicts against, compete with and help each other citep{datta_identifying_2017, tan_tracing_2018, wang_impact_2012, zhu_impact_2014}. + +% individual chances of success while mutualistic dynamics increase them. + +% How do relationships between groups shape their chances of success? + +% What's wrong with previous ecological approaches +% Should we introduce ecological theory in the introduction at all? + +Studies in social computing have drawn from organizational ecology to answer this question \citep{wang_impact_2012, zhu_impact_2014, resnick_starting_2012, zhu_selecting_2014}. Inspired by the ecological study of biological systems, organizational ecology is an influential body of theory in sociology that studies competition and mutualism among human organizations +% , ranging from commercial industries to social movements \citep{hannan_population_1977, baum_ecological_2006}. +% NOTE: There's a jump between this sentence and the last one. I think we might need to signal, somehow, that orgecol is not puzzling or the results in soccomp are puzzling in regards to them. I've changed puzzling below to inconsistent but we should make it clear what it's inconsistent with. -mako +Although ecological studies of firms and social movements have developed a clear and established body of theory with strong empirical support \citep{baum_ecological_2006}, similar studies of online groups have yielded inconsistent results that differ both from one context to another and from theoretical predictions. For example, wikis whose memberships overlap with other wikis survived longer \citep{zhu_selecting_2014}, but Usenet groups with overlapping memberships failed more quickly \citep{wang_impact_2012}. + +% NOTE: I'm not sure conflation is the right term here. I've reworked this paragraph below -mako +% I think you nailed it. -- nate +We argue that these confusing results are the result of a conflation of concepts and measures from two distinct strands of theory in organizational ecology: \emph{population ecology} and \emph{community ecology}. Both define competition as a form of interdependence that \emph{decreases} growth and mutualism as one that \emph{increases} growth. However, population ecology focuses on modeling the how overlapping resources among groups affect their subsequent growth, decline, or survival \citep{astley_two_1985, baum_ecological_2006, dobrev_dynamics_2001}. It does not attempt to directly study competitive and mutualistic interactions. On the other hand, community ecology recognizes that groups often exist within ``ecological communities,'' or clusters of highly related entities, and provides an approach for inferring competitive and mutualistic interactions among these. Although the stated goal of ecological research in social computing has been to understand how groups influence each others' ability to sustain participation, ecological research in social computing has relied exclusively on concepts and measures from population ecology. This paper seeks to explain the puzzling set of findings in ecological social computing research by introducing community ecology. + +%These strands have different concepts of ecological dynamics, different levels of analysis and make distinct theoretical predictions \citep{astley_two_1985}. +% despite the fact that doing so is vital to + + + +% Our contributions to CSCW are theoretical, methodological, and empirical. + +% Our theoretical contribution, articulated in §\ref{sec:community_ecology}, + +% We then demonstrate both approaches by investigating our research question: +% \textit{(\textbf{RQ}) How does community ecology's view of competition and mutualism in online groups compare to that of population ecology?} + +% Our overarching goal is to introduce community ecology as a theoretical and methodological framework for understanding how the relationships between specific online groups shape their growth or decline. + +We do so in a three-part empirical study using a dataset drawn from the 10,000 communities on Reddit with the most contributors to analyze 641 clusters of online groups with overlapping participants. +In Study A, we conduct the most important type of population ecology analysis, a test of what is called density dependence theory, and find support for the theory. +%This suggests that competition is strongest when user overlap is high and mutualism is weakest when overlap is low. +This analysis suggests that high degrees of user overlap are associated with competition. +%VAR models are widely used in biological ecology to make inferences about competitive or mutualistic interactions between species. +In Study B, we introduce our method for community ecology analysis that infers networks of competitive and mutualistic interactions by using clustering analysis and vector autoregression (VAR) models of group size over time \citep{sims_macroeconomics_1980, canova_var_2007, ives_estimating_2003}. We illustrate the method in four case studies and present a large-scale computational analysis showing that mutualistic interactions are far more common than competitive ones. +Finally, in Study C, we bring Study A and Study B together to compare population ecology and community ecology by extending the density dependence model from Study A with a variable accounting for competition and mutualism. While we find that adding this variable does not help predict growth, including ecological interactions in our VAR models improves time series forecasting. + +% importance of accounting for mutualistic and competitive interactions in predicting the growth of online groups. We + +% While models including , . + +We discuss how these findings illuminate the differences between population ecology and community ecology and show how the two perspectives are complementary. +While Study A suggests that competition is strongest when user overlap is high, Study B finds widespread mutualism among groups with overlapping membership. +Although these findings might seem contradictory, they reflect how population ecology studies overlapping resources related to favorable or unfavorable environmental conditions, while community ecology studies competitive and mutualistic interactions playing out in local networks of specific groups. By demonstrating that mutualistic and competitive interactions within clusters of highly related groups are important---and by describing how to measure them---this paper lays the groundwork for future research to investigate and design for interdependence between online groups that supports their growth and success. + +%we demonstrate that interactions are important and how to inferred and are useful for time series forecasts of + +% and inform design + +% by understanding + +%lays the groundwork for future research toward design + +% understanding how different forms of + + + + + +% To answer this question, We validate our approach by showing in §\ref{sec:res.forecasting} that + +% % NOTE: Is it (1) the top 1000? It would be nice to summarize the comprehensiveness here. (2) I'm ambivalent about the word "network" here. -mako +% We make four specific empirical contributions: Reddit in §\ref{sec:res.characterizing} and . + +% and provide an explanation for why previous ecological research in social computing has led to confusing and inconsistent results. + + + +% NOTE: Is the sentence below correct? I guess so (at least indirectly) but I haven't read the new discussion. -mako New discussion isn't written yet, but right now that explanation is in the background section. :) -N + +% NOTE: cut this last sentence? -mako - I think this last sentence will be a more accurate reflection of the discussion. -N +% We + +% We + +% We make a theoretical contribution by introducing the community ecology perspective that We also make a methodological contribution by providing a method for inferring these relationships from time-series data on group sizes + +% Where prior approaches aggregate individual relationships between groups, our approach makes it possible to answer critical questions like ``which are a given online group's mutualists or competitors?'' + +% In the process, our theoretical work brings clarity to a confusing set of empirical results in prior research. + +%Discussing this seemingly contrasting finding motivates future investigations into how competitive or mutualistic ecological communities form and why some environments for online groups are competitive or mutualistic. + +% This method builds on a popular approach in biology that provides robust inferences about networks of ecological relationships. , analysis of stability, forecasts of future participation, and can scale to analyze systems of dozens of related communities. We apply this approach to four datasets. + +% We validate our method using simulated data to show that it can identify a full range of ecological relationships and conduct a series of three case studies of groups hosted on the platform Reddit in \textsection \ref{sec:case.studies}. Although limited, these case studies make a third contribution in the form of empirical findings that suggest that specific patterns of relationships vary substantially across networks of groups and that mutualism appears to be much more common than competition. + +\section{Related Work} +\label{sec:related.work} + +% One sentence on "timeliness." Find citations (Chowdry, Benkler, +Online groups are important sites for social support \citep{de_choudhury_mental_2014}, entertainment \citep{ducheneaut_alone_2006}, information sharing \citep{benkler_wealth_2006}, and political mobilization of disinformation campaigns and protest movements \citep{choudhury_social_2016, benkler_social_2013, krafft_disinformation_2020}. +% knowledge of the ecosystem of online groups is important for advancing social science and informing future designs to support and manage online groups. +Although an online group's ability to achieve its goals depends on attracting and retaining contributors, few develop a sizable group of participants \citep{benkler_wealth_2006, dimaggio_social_2001, johnson_emergence_2014, koh_encouraging_2007, kraut_role_2014}. Many attempts to explain the success and growth of online groups look to properties of individual groups like characteristics of founders \citep{kraut_role_2014}, language use \citep{danescu-niculescu-mizil_no_2013}, turnover \citep{dabbish_fresh_2012}, and designs for regulating behavior \citep{halfaker_rise_2013, teblunthuis_revisiting_2018}. + +Recent research suggests that interdependence among online groups is also important to explain success and failure \citep{cunha_are_2019, kairam_life_2012, tan_all_2015, tan_tracing_2018}. +For example, banning hate subreddits reduced hate speech in related subreddits \citep{chandrasekharan_you_2017}. In a very different context, there is evidence that Reddit and Stack Overflow receive substantial benefits from activity on Wikipedia \citep{vincent_examining_2018}. +% ; and editors make valuable and qualitatively different contributions across different languages of Wikipedia \cite{hale_cross-language_2015}. In addition, growth trajectories of online groups initially about similar topics can diverge \cite{zhang_understanding_2021}. +Our work contributes to this literature by providing a new conceptual lens and statistical method for studying competition and mutualism between online groups. + +% , which theorizes how online groups depend on distinct types of resources. +% As we discuss in §\ref{sec:rdp}, the nature of these resources makes possible conditions for mutualism or competition. In §\ref{sec:ecology_background}, we explain how prior ecological studies of online groups extended RDT to consider how overlapping resources between communities can drive competition and mutualism and propose our first hypothesis which replicates part of these studies in Reddit, our empirical context. Finally, in §\ref{sec:community_ecology}, we draw anew from biology and organizational ecology to present our community ecology approach and propose hypotheses to validate its usefulness for predicting the growth of online groups. + +\subsection{Online Groups Depend on Resources} +\label{sec:rdp} + +Like prior ecological research in social computing and information systems, we build on resource dependence theory (RDT) \citep{butler_membership_2001, wang_impact_2012}. +\citet{butler_membership_2001} introduces +RDT to argue that growth in online groups is driven by positive feedback as participants contribute resources such as content, information, attention, or social interactions, which motivate further contributions by subsequent participants. That said, online groups do not grow forever and RDT explains that growth is self-limiting because costs of participation increase in larger groups \citep{butler_membership_2001, butler_attraction-selection-attrition_2014}. + + +% While growth far from the only criteria of success for an online group, much social computing research follows RDT by seeking to support groups' growth and survival through the attraction or retention of members \cite{koh_encouraging_2007, kraut_role_2014, cunha_are_2019}. + +% For example, explanations of Wikipedia's transition from growth to decline structures for quality assurance in a growing project that constituted barriers to newcomer participation \cite{halfaker_rise_2013, teblunthuis_revisiting_2018} spawned significant interest in designs for increasing newcomer retention that have met with limited success \citep[e.g.][]{halfaker_snuggle:_2014, morgan_tea_2013, narayan_wikipedia_2017}. Social structures like leadership, organizational practices, network structure, and design decisions can lower costs and increase benefits of participation \cite{butler_membership_2001, kraut_role_2014, tsugawa_impact_2019}. + + +%TODO: incorporate the below citations to "demonstrate that this is of importance to the social computing audience"" Also cite Charlie's paper about cross-platform interdependence + +%We review this foundational work in §\ref{sec:resource_dep} and then narrow our focus to prior ecological studies and other empirical work about interdependence between online groups in §\ref{sec:ecology_background}. Then, in §\ref{sec:community_ecology} we review sociological research developing community ecology theory and apply it to online groups. + +% It also builds closely on two bodies of ecological theory: first, explanations from population ecology that describe entities as sharing resources in environments and second, explanations from community ecology that theorize networks of specific community relationships. +% In our background we introduce the first two bodies of related work in sections \ref{sec:resource_dep} and \ref{sec:ecology_background}. + + % Frame around the dependent variable: + + % Explaining participation is important because + % 1. It's a longstanding concern of the field + % 2. Online Groups are important to society + % models + % ranging from entertainment, information exchange, social interaction, to the collaborative production of knowledge and organization of collective action + + +% This positive feedback between the value of prior contributions and the motivation for future contributions drives community growth. +% Think about the implications of our findings for the rival vs nonrival resources that could be in play. + +% Maybe try to deepen the discussion of resource competition, or maybe its better to avoid getting dragged into this. + +Ecological approaches recognize that interrelated online groups may share resources with one another in ways that constrain their growth and survival. \textit{Rival} resources like participants' time, attention, and efforts raise the possibility of competition because they become unavailable to others when used by one group \citep{benkler_wealth_2006, kubiszewski_production_2010, ostrom_public_1977,romer_endogenous_1990}. RDT suggests that declines in online participation can be explained in terms of competition over important rival resources \citep{wang_impact_2012}. +% Online participation in general has opportunity costs and may compete with alternatives like sleep, entertainment, or work \cite{becker_theory_1965, butler_attraction-selection-attrition_2014}. +% So online groups that provide similar benefits may be the most likely competitors because once someone has obtained satisfying benefits from one group they may go offline or switch to another activity instead of seeking similar benefits from competitor groups.\footnote{Economists refer to these as ``substitutes.' } + +% providing the same benefits at lesser costs might be a compelling alternative. +% If different online groups can substitute for participation in one another and participation is rival this will lead to competition between the communities and decrease participation in both. +% Public goods are nonrival because their usefulness is not diminished when others use them. + +On the other hand, online groups also rely on \textit{nonrival} resources. They can even produce connective and communal public goods like opportunities to communicate or collections of information \citep{fulk_connective_1996} which can be ``antirival'' when their usefulness increases as a result of others using them \citep{kubiszewski_production_2010, weber_political_2000}. For example, the usefulness of a communication network increases as more people join it \citep{fulk_connective_1996, katz_network_1985}. Similarly, the usefulness of an information good can increase as more people come to know, refer to, and depend upon it \citep{kubiszewski_production_2010, weber_political_2000}. +% as when +%Awareness that an online group provides an audience can motivate participation \cite{zhang_group_2011}. +If multiple online groups help build the same connective or communal public goods, they may form mutualistic interactions where contributions to one group may ``spill over'' and motivate participation in mutualist groups \citep{zhu_impact_2014}. +Ecological approaches seek to understand how different types of resources will limit or promote growth. +% as was demonstrated when Chinese government blocked the Chinese language edition of Wikipedia, unblocked contributors decreased their participation +% + + +%As a result, researchers, designers, and managers of online communities often set aside thorny questions of interdependence between online communities. +%While extensions of the resource dependence framework recognize the importance of exit from online communities \cite{butler_attraction-selection-attrition_2014}, they do not say where people go when they leave. % Before turning to our theory of community ecology, we note differences between ecological theory and analysis in organization and biological science from other uses of the term ecology in HCI and social computing. +% The term ``ecology'' often connotes interconnectedness, complexity, growth, and nature, and also crises of resource sustainability, loss, and extinction \cite{worster_natures_1994, blevis_ecological_2015}. Most references technologists make to ``ecology'' +% For example Nardi and O'Day invoke the ecological metaphor in describing their vision for individuals to cultivate intentional and localized relationships with technology \cite{nardi_information_2000, bowker_bonnie_2001}. +% This continues a long-running intellectual exchange between social and biological sciences. Economic thought was strongly influenced by Darwinian evolution and ecologists in biology were influenced by economic models to understand and solve problems in forestry and conservation \cite{kropotkin_mutual_2012, worster_natures_1994}. Once modern ecological science was developed it was not long before it was applied to understand human societies \cite[e.g.][]{park_human_1936, hawley_human_1986}. Because theories of organizational ecology were crafted to address particular concerns in organization science and are laden with assumptions appropriate to traditional firms with fixed and durable boundaries, our ecological approach also draws from biology. + +% TODO This section needs a number of new concrete examples. Revisit the ecological literature as well. Also perhaps add some examples from the interview paper (which we'll cite and anonymize). +\subsection{Population Ecology, Density Dependence and Overlapping Resources} +\label{sec:ecology_background} + +% Our theoretical approach draws from ecology. +While this paper focuses on the ecological study of online groups, other social computing and HCI scholars have used the term ``ecology'' (and related concepts like ``ecoystem'' and ``environment'') to denote an assemblage of sites, devices, or platforms \citep{nardi_information_1999,wang_coming_2015}. We use the term more narrowly to refer to conceptual and mathematical models of ecological dynamics. +In particular, our work builds on a tradition rooted in \textit{organizational ecology}. First developed in the late 1970s by sociologists studying interactions between firms, organizational ecology was inspired by, and has drawn closely from, ecological studies in biology \citep{hannan_population_1977}. + +Because online groups bear similarities to traditional organizations, organizational ecology provides a compelling theoretical framework for understanding interdependence among online groups. It has inspired at least three high-quality empirical studies of how resources shared by online groups shared shape their growth, decline, or survival \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. +These studies draw from the \textit{population ecology} strand of organizational ecology +%, while we introduce \textit{community ecology} as an alternative. +that studies ecological dynamics within a population of groups. In organizational ecology, populations have been defined as sets of organizations sharing an organizational industry or business model \citep{hannan_organizational_1989}. In social computing, populations have been defined as online groups sharing a given social media platform \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. + +While population ecology involves several distinct theoretical propositions, \textit{density dependence theory} (DDT) is perhaps the most prominent and is the subject of all three prior ecological studies of online groups \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. DDT models competitive or mutualistic forces in a population of groups as a function of \textit{density} which, in the earliest and most influential studies of DDT, is simply the size of the population. In this way, DDT assumes that every group in the population is facing the same competitive and mutualistic pressures \citep{aldrich_organizations_2006}. +However, online groups sharing a platform have diverse topics \citep{kairam_life_2012}, norms \citep{chandrasekharan_internets_2018, fiesler_reddit_2018}, and user bases \citep{tan_all_2015}. Because groups sharing few resources are unlikely to be strongly interdependent, ecological studies of online groups have modeled density dependence based on the concept of \emph{overlap density} \citep{baum_ecological_2006, dobrev_dynamics_2001, wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. Rather than the number of groups that exist in a population, overlap density measures the extent to which an one group's members or topics overlap with all other groups'. Overlap density thus characterizes a group's \emph{niche} or local \emph{resource environment} defined by its distinctive topic and membership. + + +%Unlike \citet{datta_identifying_2017}, we do not divide user frequency by the number of subreddits where the user appears because we do not wish to assume that users who comment in many subreddits are less ecologically important. + +%Overlap density is thus not a property of a population of groups, but a property of the resource environment a particular group faces. + + +% While foundational studies of density dependence in organizational research measu +% red density and growth at the population level, ecological studies of online groups .\footnote{Although it is less common in organizational research, overlap density has also been used by some organizational ecologists \cite[e.g.][]{dobrev_dynamics_2001}.} +% Are this paragraph and the next one necessary or just confusing? +DDT proposes a model for the growth of organizational populations that has a similar structure to \citet{butler_membership_2001} RDT model for the growth of online groups. +In DDT, mutualism is the engine of positive feedback driving population growth. Organizational ecologists show how successful organizations in an emerging industry develop nonrival resources like the legitimacy of a business model or industrial know-how that attract new organizations to enter the market \citep{carroll_density_1989,hannan_organizational_1989}. Similarly, a population of online groups, such as those sharing a platform, may grow in size as their platform gains in popularity, as established groups spin off new ones, and as useful knowledge develops that can be shared between groups \citep{tan_tracing_2018, zhu_impact_2014}. + + +% TODO add a footnote to show the analytical equivalence between the models and connection to Malthus. +In RDT, growth of online groups is self-limiting because of the challenges in managing large groups \citep{butler_membership_2001}. In DDT, competition among population members over rival resources limits growth \citep{hannan_organizational_1989}. DDT thus proposes a trade-off in which low density reflects limited opportunities for mutualistic contributions of nonrival resources like legitimacy, connectivity, and knowledge, but high density reflects competition over rival resources. +Therefore, DDT predicts that the relationship between density and positive outcomes like growth or survival is $\cap$-shaped (inverse-U-shaped) \citep{baum_ecological_2006, carroll_density_1989}. + +% Save the potential conflict between RDT and DDT for the discussion +% An individual online group's growth may be limited by the ability of their social structures to scale to include more members \citep{butler_membership_2001} or due to competition with other groups over members \citep{hannan_organizational_1989}. + +%In a homogenous population or in cases where litt +%Population ecologists have used a number of definitions of population, but they often refer to sets of organizations having the same organizational form or business model. + +%This is because many environments present a trade-off between mutualism and competition: mutualistic forces are stronger when density is low and competitive forces are stronger when density is higher. The intuition is that low-density environments reflect poor environmental conditions for success---if conditions were good then they would attract more growing communities hence be more dense. On the other hand, high-density environments are thought to become crowded and competitive \citepp{hannan_organizational_1989}. + +Tests of DDT in populations of online groups yield inconsistent results. In \citet{wang_impact_2012}, user overlap in Usenet newsgroups is associated with decreasing numbers of participants. Similarly, \citet{teblunthuis_population_2020} find that topical overlaps between online petitions are negatively associated with participation. By contrast, \citet{zhu_impact_2014} find that membership overlap is positively associated with increasing survival of new Wikia wikis. Only \citet{zhu_selecting_2014} find support for the $\cap$-shaped relationship predicted by DDT in an enterprise social media platform. + +In Study A, we provide a test of DDT using data from Reddit. The classical logic of DDT appears reasonable in the context of Reddit because low overlap density is likely to reflect an impoverished environment lacking in non-rival resources like skills and knowledge of experienced users, while a group with high overlap is likely to face competition over its members \citep{zhu_selecting_2014, zhu_impact_2014}: +\textit{(\textbf{H1}) The relationship between overlap density and the growth of online groups is $\cap$-shaped (inverse-U-shaped).} +% such as the + +%DDT sees competition and mutualism as environmental properties of an online group's niche. + +DDT proposes that very high levels of density will decrease growth because of increasing forces of competition within a niche. However, to conclude that groups with the greatest membership overlap are likely competitors would be to commit a well-known statistical fallacy +% (the term ecological fallacy does not refer to theories of population or community ecology, but rather to ``ecological correlations,'' meaning correlations involving aggregates) +\citep{piantadosi_ecological_1988, robinson_ecological_1950}. +The density of a group's environment suggests that it faces competition or mutualism, but it does not tell us which overlapping communities are competitors and which are mutualists. +% DDT therefore relates resource overlaps to the growth of online groups, yet stops short of inferring competitive or mutualistic interactions among them. It does not provide a way of learning when and why groups are mutualists or competitors and this limits its ability to inform designs that take these interactions into account. +Community ecology overcomes this limitation of DDT. + +\subsection{Introducing Community Ecology \label{sec:community_ecology}} + +Perhaps the most natural way to understand the distinction between population ecology and community ecology is in where they believe ecological dynamics like competition and mutualism play out \citep{astley_two_1985}. While population ecology locates competition and mutualism within an environmental niche, community ecology locates competition and mutualism in networks of interdependent groups called \emph{ecological communities} \citep{aldrich_organizations_2006}. In organizational ecology, this can mean studying interactions between different organizational populations \citep[e.g.][]{sorensen_recruitment-based_2004, mcpherson_ecology_1983}, or networks of interactions between organizations \citep[e.g][]{powell_network_2005, margolin_normative_2012}. +%Doing so makes visible the distinctive roles that particular groups play. +While varying conceptions of community ecology are found in the organizational ecology literature \citep{freeman_community_2006}, the approach we describe is identical in structure to that taken by \citet{aldrich_organizations_2006} and \citet{hawley_human_1986}. + +Community ecology focuses on \emph{ecological interactions} \citep{aldrich_organizations_2006}. +%In organizational ecology, these interactions are referred to as ``commensal relationships.'' However, biologists use the term ``commensal'' quite differently to mean an unreciprocated mutualistic interaction in which one species provides benefits to another while being unaffected by it. While for the most part, we draw our conceptions and terminology from organizational ecology rather than biology, the use of the term ``commensalism'' in organizational ecology can be confusing. We therefore adopt the term ``ecological interaction.'' +Ecological interactions can be mutualistic when one group has a positive influence on the second such that growth in the first group leads to growth in the second. They can also be competitive if one group has a negative effect on the second such that growth in the first group leads to decline in the second. Ecological interactions can be reciprocated if mutualism (or competition) from one group to another group is returned in kind. An ecological interaction can also be mutualistic in one direction and competitive in the other. The competitive or mutualistic interactions in an ecological community are quantified by the \emph{community matrix}, a central analytical object in community ecology in both biology and organization science \citep{verhoef_community_2010, novak_characterizing_2016, aldrich_organizations_2006}. + +In Study B, we demonstrate community ecology by inferring networks of ecological interactions in ecological communities on Reddit. Because our understanding of community ecology theory does not suggest hypotheses about what we will find, we conduct an exploratory data analysis to determine whether mutualism or competition among subreddits is more common on Reddit and present case studies illustrating the types of ecological communities we identify. + +%So a commensal relationship exists between each pair of groups in an ecological community. + +% There are six possible ecological interactions as described in Table \ref{tab:interaction.types}. Note that they can be reciprocal (as in full mutualism and competition) or not (as in partial mutualism and competition). In our framework ``predation'' is an interaction that is positive in one direction but negative in the other. It is also possible that growth or decline in the first group has no effect on the second group, and visa-versa, a situation termed ``neutrality.'' + + +% \begin{table} +% \caption{The five possible ecological interactions between two online groups. Values in the column ``i $\rightarrow$ j'' represent the sign of $\phi_{i,j}$ group i's effect on group j. Based on table 11.1 from \citet{aldrich_organizations_2006}.} +% \centering +% \begin{tabular}{c|c|c} +% i $\rightarrow$ j ($\phi_{i,j}$)& i $\rightarrow$ j ($\phi_{i,j}$) & Interaction type \\ \hline +% $+$ & $+$ & Full mutualism \\ +% $+$ & $\cdot$ & Partial mutualism \\ +% $+$ & $-$ & Predation \\ +% $-$ & $\cdot$ & Partial competition \\ +% $-$ & $-$ & Full competition \\ +% $\cdot$ & $\cdot$ & Neutrality +% \end{tabular} +% \label{tab:interaction.types} +% \end{table} + +% by conceiving of community ecology as the study of relationships between different groups. + +% Relationships studied in community ecology are defined by how they , but they are also important because networks of relationships +%and give rise to higher-order properties like stability. + +%Our community ecology approach instead focus on relationships between communities from overlap density approaches to focuses on relationships between communities as a step toward solving the puzzle. + +%Consider the example of how \citet{zhu_impact_2014} find membership overlap is associated with increasing survival of new Wikia wikis, but in \citepos{wang_impact_2012} study of Usenet groups user overlaps are associated with decreasing group sizes. + +% Consider cutting this since we don't look at any other factors + +%study period, and they found a stronger relationship when overlapping members were from more established groups. Perhaps the growth Wikia wikis was limited by knowledge of how to build a Wiki which was provided by more experienced users and user overlaps were correlated with access to such knowledge. While + + +% What's the point of these three paragraphs? +\subsection{Predicting Growth} + +In Study C we build upon our analyses from Study A and Study B by testing whether community ecology can explain the growth and decline of online groups in ways that population ecology can not. We do this by analyzing in two different ways whether accounting for ecological interactions helps predict future group sizes. +% We expect it to do so because resource overlaps as modeled by DDT may be a poor proxy for the degree to which a group's environment is competitive or mutualistic. +In general, competition for overlapping resources will have no effect on group growth if something besides the overlapping resource limits growth \citep{verhoef_community_2010}. For example, two wikis might share a large number of contributors (they have high user overlap), but their growth might be limited by a lack of core contributors who perform important administrative tasks like policy making and software administration \citep{zhu_impact_2014}. Community ecology relaxes the assumption that competition and mutualism are caused by user overlap density and instead seeks to infer these relationships from data. We test the importance of this conceptual shift for predicting growth by testing two hypotheses. The first uses a model comparison approach to test if adding a measure of ecological interactions to the density dependence model in Study A improves prediction of growth: (\textit{\textbf{H2}) A model with ecological interactions and density dependence predicts growth in online groups better than density dependence alone.} + +Support for H2 may be a relatively low bar for assessing whether ecological interactions are important factors shaping the growth of online groups because of confounding moderator or mediator variables related to the occurrence of ecological interactions. +% For example, suppose mutualistic interactions were correlated with declining ecological communities. +Therefore, we also use a time series forecasting approach to test whether modeling ecological interactions is useful for making time series forecasts of participation in online groups: +%We seek to demonstrate in whether including commensal relationships in time series forecasting models improves forecasting performance. +(\textit{\textbf{H3}) The addition of ecological interactions to a baseline time series model improves the forecasting performance.} +While this does not directly compare population ecology and community ecology, it validates that ecological interactions are important. + +%With commensalism, we can seek to explain the puzzling results of resource overlap studies by exploring our second research question:\noindent \textbf{RQ2: How are degrees of user overlap and types of commensal relationships related?} + +% This paragraph isn't helping very much +% Ecological dynamics play out through the network of such relationships over time as represented by the \emph{community matrix}, $\Phi$. + + +% Analysis of the community matrix can reveal indirect relationships between groups and properties of an ecological community like stability \cite{ives_estimating_2003}. +%Seeing interdependence between online groups through a community ecology-based network of dynamical relationships can make visible special roles that particular groups play in an ecological community through their many mutualistic or competitive relationships. + +% Next we take a first methodological step toward answering questions like these by adapting vector autoregression models from biology and macroeconomics as an approach to inferring community matrices. We then apply our approach in three case studies of related groups hosted on Reddit to reveal three qualitatively different ecological communities. + +%% SOME BIKERACK RAISING MORE ISSUES WITH THE NICHE OVERLAP APPROACH + +% study online groups additionally shifts from an analogy of online communities as individual members of a biological species to online communities as species themselves and seeking to understand functional relationships between different online groups. +% Yet a closer examination of the analogy to density-dependence in organizational or biological populations reveals conceptual awkwardness. At issue is the referent of the term ``niche.'' Should we use ``niche'' to refer to a set of resources that an online community can utilize? This is what ``niche'' means in both overlap density and in our version of community ecology. + +% Social exposure is also important, but we don't deal with that in this . The idea here is that the cost-benefit structure depends on alternatives which can lower costs or . +%VAR analysis can quantify the stability of the system and affords exploration of counterfactual forecasts to simulate hypothetical interventions \citep{ives_estimating_2003}. + + +\section{Materials \& Methods} +\label{sec:methods} + + + +% The presentation of our materials and methods is organized as follows: First we introduce the methods and measures for Study A, beginning with +% \emph{user overlap} %(§\ref{sec:mes.overlap}) +% which is aggregated into \emph{overlap density} %(§\ref{sec:mes.density}) +% to predict subreddit \textit{growth} %(§\ref{sec:mes.growth}) +% in a loglinear regression model. Then, for Study B, we present +% our clustering procedure for identifying ecological communities % (§\ref{sec:clustering}) +% on which we fit VAR models % (§\ref{sec:var}) +% predicting \emph{group size}. % (§\ref{sec:mes.group.size}). +% To explore the types of ecological communities found on Reddit, we derive two measures from these models for each cluster: \emph{average ecological interaction} +%(§\ref{sec:mes.avg.mut}) +% which quantifies the degree of competition and mutualism in the ecological community and \emph{ecological interaction strength} %(§\ref{sec:mes.abs.int}) % which quantifies its overall intensity of ecological interactions. Next, we draw competition-mutualism networks in example ecological communities based on interpreting the VAR models using impulse response functions (IRFs) %(§\ref{sec:mes.irf}). +% Then, in Study C, we test H2 to compare community ecology and density dependence theory by adding \emph{subreddit average mutualism} %(§\ref{sec:mes.sub.mut}) +% to the model from Study A. Finally, we test H3 by evaluating whether including ecological interactions in the VAR models improves time series forecasting. % (§\ref{sec:mes.forecasting}). + +\subsection{Data} + +Our data are drawn from the publicly available Pushshift archive of Reddit submissions and comments which we obtained from December 5\textsuperscript{th} 2005 to April 13\textsuperscript{th} 2020 +\citet{baumgartner_pushshift_2020}. Within this dataset, we limit our analysis to submissions and comments from the 10,000 subreddits with the highest number of comments. There are 702 subreddits larger than the smallest subreddit included in our dataset having a majority of submissions marked ``NSFW,'' which typically indicates pornographic material. As others have done in large-scale studies of Reddit \citep[e.g.,][]{datta_identifying_2017}, we exclude these subreddits to avoid asking members of our research team to inspect clusters including pornography. The top 10,000 subreddits provide a sufficiently large number of ecological communities for our statistical analysis. + +\subsection{Study A: Density Dependence Theory} % and Community Ecology} +\label{methods:density} + + +\subsubsection{User overlap \nopunct} \label{sec:mes.overlap} + $o_{i,j}$ quantifies the degree to which two subreddits ($i$ and $j$) share users. + %From it we construct clusters of related groups in §\ref{sec:clustering} and quantify overlap density in §\ref{sec:mes.density}. +\citet{zhu_impact_2014} and \citet{wang_impact_2012} both measure user overlap between two groups by counting the number of users contributing to both groups at least once and exclude users who appear in more than 10 groups. In our preliminary analysis, we found that this measure led to similarity measures and clusters with poor face validity. These issues may have stemmed from how Reddit users often peripherally participate in many groups while participating heavily in few \citep{tan_all_2015, hamilton_loyalty_2017, zhang_community_2017}. Therefore, our measure of user overlap follows \citet{datta_identifying_2017} by using the number of comments each user makes in each pair of groups. + +To measure user overlap between subreddits, we first build user frequency vectors by counting the number of times each user comments in each subreddit. We prevent giving undue weight to subreddits with higher overall activity levels by normalizing the comment counts for each subreddit by the maximum number of comments by a single author in the subreddit: + +\begin{equation} + f_{u,j} = \frac{n_{\mathrm{u,j}}}{max_{v\in\mathrm{J}}n_{v,j}} \label{eq:user.frequency} +\end{equation} + +\noindent where $n_{u,j}$, the user frequency, is the number of times that user $u$ authors a comment in subreddit $j$. + +This results in a user frequency vector $F_j$ for each subreddit that is sparse and high-dimensional, having one element for each user account that comments in any subreddit in our dataset. +% In the course of developing our clustering analysis described in §\ref{sec:clustering}, we found that following an approach analogous to latent semantic analysis (LSA) improved the quality of our clusters. +Next, we use LSA to reduce the dimensionality of the user frequency vectors. +LSA is based on the singular value decomposition and is common in natural language processing and information retrieval. LSA preserves subreddit similarities while removing noise and dealing with sparsity \citep{dumais_latent_2004}: + +\begin{align} + \mathbf{F} &= \mathbf{U \Sigma V}^T \\ \nonumber + \widetilde{F_{j}} &= \mathbf{U_k}^TF_j \label{eq:user.frequency.svd} +\end{align} + +\noindent $\mathbf{F}$ is the matrix where columns are author frequency vectors $F_j$ and $\mathbf{U \Sigma V}^T$ is its singular value decomposition. Truncating the singular value decomposition to use only the first $k$ left-singular vectors gives $\mathbf{U_k}$. Left-multiplying a subreddit's author frequency vector by $\mathbf{U_k}$ transforms the high-dimensional author frequencies into $\widetilde{F_j}$, their approximation in the $k$-dimensional space. +% We choose $k=600$ in the course of our grid search for a good clustering described below in §\ref{sec:clustering}. + +%clustering with a high silhouette coefficient. + +We then obtain our measure of \textit{user overlap} by taking the cosine similarities between the resulting vectors for a pair of subreddits: +\begin{equation} + o_{i,j} = \frac{\widetilde{F_{j}} \cdot \widetilde{F_{i}}} {\norm{\widetilde{F_i}} \norm{\widetilde{F_j}}} \label{eq:user.overlap} +\end{equation} + +\noindent where $\norm{\widetilde{F_i}} = \sqrt{\sum_{x=1}^k \widetilde{f_{x,i}}^2}$ is the euclidean norm of the transformed user frequencies for subreddit $i$. + + + + +%We use the following methods and measures in our tests of our hypothesis that the relationship between user overlap density the growth of online groups is $\cap$-shaped (H1) and our hypothesis that accounting for ecological interactions will help explain growth beyond overlap density (H2): + +% We measure \emph{overlap density} and \emph{growth} to and . To test \textit{\textbf{H2}}, we add the overall influence of ecological interactions on a subreddit + +\subsubsection{Growth\nopunct}\label{sec:mes.growth} is the dependent variable in our density dependence model testing H1 and is also used in our test of H2 as part of Study B. Growth is measured as the change in the (log-transformed) size of a subreddit over the final 24 weeks of our data, from to November 4\textsuperscript{th} 2019 to April 13\textsuperscript{th} 2020. + +\subsubsection{Overlap density\nopunct} \label{sec:mes.density} $d_i$ is the normalized average user overlap for a given subreddit. It is the independent variable in our density dependence model testing H1: + +\begin{align}\label{eq:user.overlap.density} + d^*_{i} &= \frac{1}{\left|S\right|-1} \sum_{j\in R;j\ne i} \mathrm{o}_{i,j} \nonumber \\ + d_{i} &= \frac{d_i^*}{\mathrm{max}_j d_j^*} +\end{align} + +\noindent where $S$ is the set of groups in our dataset. + +\subsubsection{Regression model for H1} \label{sec:reg.H1} +To test H1, we fit Model 1 % in Equation \ref{eq:M1} +which has first and second-order terms for overlap density to allow for a curvilinear relationship between \emph{overlap density} and \emph{growth}. +\begin{align} +\mathrm{Model~1} & & Y_i = B_0 + B_1 d_{i} + B_2 d^2_{i} \label{eq:M1} +\end{align} +\noindent where $Y_i$ is the growth of subreddit $i$ and $d_i$ is its overlap density. + + +\subsection{Study B: Introducing Community Ecology} + + +%Here we review the prior work on which we build our methodological approach to inferring competitive and mutualistic relationships between online groups. %\textsection \ref{sec:inferring} describes our own methodological contributions. + +\subsubsection{Clustering to identify ecological communities} +\label{sec:clustering} +Analyzing networks of ecological interactions is the key difference between community ecology and population ecology. +% In Study A we set out to survey the types of ecological communities found on Reddit to provide a comparison with a large-scale population ecology analysis. +% in \ref{sec:clustering} +%Here, we use a heuristic approach based on clustering algorithms to find ecological communities of online groups that all have high user overlap. +To identify ecological communities of related subreddits, we use a clustering procedure based on the user overlap measure described above in §\ref{sec:mes.overlap}. +We selected a clustering model using grid search to obtain a high silhouette coefficient \citep{rousseeuw_silhouettes_1987}. The silhouette coefficient captures the degree to which a clustering creates groups of subreddits with high within-cluster similarity. +% relative to similarity with subreddits in other clusters. + +Our description of our measure for user overlap in §\ref{sec:mes.overlap} does not explain how we choose the number of LSA dimensions $k$. +To do so, we ran the affinity propagation \citep{frey_clustering_2007}, HDBSCAN \citep{mcinnes_hdbscan_2017} and \textit{k}-means clustering algorithms and selected the algorithm, hyperparameters, and LSA dimensions $k$ that resulted in the clustering with a high silhouette coefficient having less than 5,000 isolated subreddits, and at least 50 clusters. We limit the number of isolated subreddits because some choices of hyperparamters for the HDBSCAN algorithm could improve the silhouette coefficient, but at the cost of greatly increasing numbers of isolated subreddits. Choosing a relatively high limit to the number of isolates helps ensure that our clusters contain highly related communities. We chose an HDBSCAN clustering with 731 clusters, 4964 isolated subreddits, $k=600$ LSI dimensions, and a silhouette score of 0.48. +We exclude the isolated subreddits from our analysis. More details about our clustering selection process are found in the online supplement. + + +%In order to test H2 and answer RQ1, we estimate the community matrix of commensal relationships between selected communities of online groups. +We evaluate the external validity of the chosen clustering using the purity evaluation criterion \citep{manning_introduction_2018} +% : +% \begin{equation}45 +% \mathrm{Purity}=\frac{1}{N}\sum_{m\in M}\max_{d\in D}{|m \cap d|} +% \end{equation} +% \noindent Where $N$ is the number of clusters $M$, $D$ are ``true'' classes to which subreddits might belong and $max_{d\in D}|m \cap d|$ is the greatest number of subreddits in cluster $m$ that belong to the same class $d$. +To do so, an undergraduate research assistant examined a random sample of 100 clusters including 744 subreddits. By visiting the subreddits and using her own judgment, the assistant flagged subreddits that did not seem like a good fit for their assigned cluster. Using these labels and excluding 25 subreddits that have been deleted, made private, or banned, we calculated the purity of our clustering as 0.92. This means that we believe that 92\% of subreddits belong to their assigned cluster. +% Note that although we clustered subreddits based on user overlap, we obtain a high purity score based on a subjective evaluation of the subreddits' contents. + +%\subsection{Inferring Mutualistic and Competitive Interactions} + +% We find f(N.clusters) clusters and f(N.isolates) isolated subreddits. The median cluster has median.cluster.size subreddits and the largest cluster has + + +\subsubsection{Group size\nopunct} \label{sec:mes.group.size} is the dependent variable of the models we use to infer ecological interactions. Measured as the number of distinct commenting users in a subreddit each week, group size quantifies the number of people who participate in a subreddit over time. Typical of social media participation data, group size is highly skewed. Therefore, we transform it by adding 1 and taking the natural logarithm. + + +% The following three paragraphs probably belong in the methods section, but I'm trying to satisfy the reviewers. +\subsubsection{Inferring ecological interactions using Vector Auto Regression} +\label{sec:var} + +The community matrix $\mathbf{\Phi}$ of ecological interactions can be inferred from time series data using vector autoregression models (VAR models). VAR models are a workhorse in biological ecology because VAR(1) models (i.e., VAR models with a single autoregressive term) have a close relationship with the Gompertz of population growth which is widely used in ecology \citep{ives_estimating_2003}. Even in the presence of unmodeled nonlinearities, VAR(1) models can reliably identify competition or mutualism in empirically realistic scenarios \citep{certain_how_2018}. VAR models also been widely adopted in the social sciences, particularly in political science and in macroeconomics \citep{box-steffensmeier_time_2014}. + +% \citet{sims_macroeconomics_1980} advocated VAR modeling in macroeconomics to address a problem in the field as an alternative to structural equation modeling (SEM), which required detailed specification of a large number of theoretical assumptions to identify. +%similar to structural equation models but require fewer theoretical assumptions but are +%VAR models are flexible enough to model a wide range of systems so long as sufficiently long time-series data are available \citep{sims_macroeconomics_1980}. +VAR(1) models can be intuitively understood as a generalization of auto-regressive AR(1) models in time series analysis. But while AR(1) models predict the state of a single time series as a function of its previous value, VAR(1) models simultaneously predict multiple time series as a function of the values of every other variable in the system \citep{canova_var_2007, ives_estimating_2003}: + +\begin{equation}\label{eq:var1} +Y_t = B_0 + B_1t + \sum_{k \in K}A_k x_{k,t} + \sum_{j \in M}\Phi_{j} y_{j,t-1} + \epsilon_t +\end{equation} + +\noindent where $Y_t$ is a vector containing the sizes of a set of online groups ($M$) at time $t$. $B_0$ is the vector of intercept terms and $B_1$ is the vector of linear time trends ($b_{1,j}$) for each community ($j$). $\Phi_{j}$ represents the influence of $y_{j,t-1}$, the size of the $j^{\mathrm{th}}$ online group at time $t-1$ on $Y_t$. $\Phi_{j}$ is a column of $\mathbf{\Phi}$, a matrix of coefficients in which the diagonal elements correspond to intrinsic growth rates (marginal to the trend) for each online group and the off-diagonal elements are intergroup influences, and $\epsilon_t$ is the vector of error terms + +Additional time-dependent predictors ($x_{k,t}$) can be included in the vectors $X_{k}$ with coefficients $a_k$. Because subreddits are created at different times, growth trends must begin only after the subreddit is created. We use $X_{k}$ to introduce a counter-trend during the period prior to the creation of subreddits so that each group's growth trend begins in the period the group is created. For each group $j$ created at time $t^0_j$ we fill $X_{j}$ with the sequence $[1,2,3,\ldots\ ,t^0_j-1,0,0,0,\ldots\ ]$. In other words, $X_{j}$ adds a counter-trend only during the period prior to the first comment in subreddit $j$. We fix the elements $a_{j,i}$ of $A_j$ equal to 0 unless $i=j$, so the counter trend only influences subreddit $j$. This effectively sets $a_{j,j}$ approximately equal to $-b_{1,j}$. + +We fit VAR(1) models using ordinary least squares as implemented in the \texttt{vars} \texttt{R} package to predict the group size each week using over the history of each subreddit prior to November 4\textsuperscript{th} 2019 \citep{pfaff_var_2008}. We hold out 24 weeks of data for forecast evaluation and fit our models on the remainder. To ensure that sufficient data is available for fitting the models, we exclude 946 subreddits and 89 clusters having less than 156 weeks of activity. + +% where the cluster data lacks the necessary degrees of freedom to fit the model because the length of the training time series is less than the size of cluster plus 2. + + +% We hold out the weeks from fit.date to to.date for evalution. % Some of the clusters were too large or had too low levels of activity We include only We include a vector of intercept terms (to account for different equilibrium community sizes) and a vector of trends (to account for long-run endogenous growth) because we found that including these terms greatly improved the fit of our models to the data. Our VAR(1) models have this form in vector notation: + +%$$ Y_t = \Mu + \Phi_1 Y_{t-1} + \ldots + \Phi_p Y_{t-p} + \epsilon_t $$ +% TODO: avoid mixing matrix and vector notation. + +\subsubsection{Characterizing ecological communities} +\label{sec:characterizing.ecological.communities} + +In Study B, we interpret the community matrix $\mathbf{\Phi}$ as a directed network of ecological interactions, a \emph{competition-mutualism network} \citep{ives_estimating_2003}. Although the elements of $\mathbf{\Phi}$ correspond to direct associations between group sizes \citep{novak_characterizing_2016}, ecological interactions can also be indirect. Consider 3 one-directional interactions between three groups ($a$, $b$, $c$) such that growth in $a$ predicts decreased growth in $b$ ($\phi_{a,b} < 0$), growth in $b$ predicts decreased growth in $c$ ($\phi_{b,c} < 0$), but $a$ and $c$ do not directly interact ($\phi_{a,c} \approx 0$). + +This does not necessarily mean that groups A and C are independent. Rather, an exogenous increase in A predicts a decrease in B and thereby an eventual increase in C. Such indirect relationships are analyzed by using impulse response functions (IRFs) to interpret a VAR model \citep{box-steffensmeier_time_2014}. In large VAR models containing many groups, the great number of parameters can mean that few specific elements of $\mathbf{\Phi}$ will be statistically significant, even as many weak direct relationships can combine into statistically significant IRFs \citep{canova_var_2007}. + +\subsubsection{Average ecological interaction\nopunct} \label{sec:mes.avg.mut} $\overline{m}$ measures the extent to which an overall ecological community is mutualistic or competitive by taking the mean point estimate of the off-diagonal coefficients of $\mathbf{\Phi}$: + +\begin{equation}\label{eq:average.interaction} +\overline{m} = \frac{1}{\left|M\right| - 1} \sum_{i\in M} \sum_{j\in M;j\ne i} \phi_{i,j} +\end{equation} + +\noindent if $\overline{m} > 0$ then mutualistic interactions within the ecological community are stronger than competitive ones, and if $\overline{m} < 0$ then competitive interactions are stronger then mutualistic ones. + +\subsubsection{Ecological interaction strength\nopunct} \label{sec:mes.abs.int} $\kappa$ quantifies the overall strength of ecological interactions in an ecological community as the mean absolute value of the point estimates of the off-diagonal coefficients of $\mathbf{\Phi}$: + +\begin{equation}\label{eq:average.absolute.interaction} +\kappa = \frac{1}{\left|M\right| - 1} \sum_{i\in M} \sum_{j\in M;j\ne i} \left| \phi_{i,j} \right| +\end{equation} + +\noindent where $\left| \phi_{i,j} \right|$ is the absolute value of the coefficient $\phi_{i,j}$. + +Ecological communities of subreddits with overlapping users vary in both the overall strength of ecological interactions and in the overall degree of mutualism and competition between member groups. If an ecological community's average ecological interaction is positive, we say the ecological community is mutualistic. If it is negative, we say the ecological community is competitive. The average ecological interaction can be close to 0 in two ways. First, the ecological interaction strength can simply be low. Alternatively, the ecological community can have a mixture of competitive and mutualistic interactions that cancel one another out when averaged. % Such an ecological community can have high ecological interaction strength. + +\subsubsection{Impulse response functions\nopunct}\label{sec:mes.irf} (IRFs) of our VAR(1) models correspond to our visualizations of example competition-mutualism networks in §\ref{sec:case.studies}. An IRF predicts how much each group's size would change in response to a sudden increase in the size of each other group \citep{verhoef_community_2010}: + +\begin{equation} + \mathbf{\Theta_t} = \mathbf{\Theta_{t-1}}\mathbf{\Phi}, t = 1,2,... \label{eq:irf} +\end{equation} + +\noindent where $\mathbf{\Theta_t}$ is the impulse response function at time $t$. $\mathbf{\Theta_0}$ is an $M$-by-$M$ identity matrix so our impulses represent a log-unit increase of 1 to each group. $\mathbf{\Theta_t}$ is a matrix with elements $\theta^t_{i,j}$ corresponding to the response of group $j$ to the impulse of group $i$. We draw an edge $i \rightarrow j$ in the competition-mutualism network if the 95\% CI of $\theta^t_{i,j}$ does not include zero at any time $10>=t>0$. If $\theta^t_{i,j} >0 $, the edge indicates mutualism and if $\theta^t_{i,j} < 0$ the edge indicates competition.\footnote{In higher-order VAR($p$) models that use $p>1$ past observations as predictors $\theta^t_{i,j}$ can be less than 0 for some $t_a$ and greater than 0 for some $t_b$. However, this is not possible in the VAR(1) models we use.} We compute the IRFs with bootstrapped confidence intervals (CI) based on 1,000 samples using the \texttt{vars} \texttt{R} package. + + +% The community matrix $\Phi$ is interpretable as a network of commensal relationships \citep{ives_estimating_2003}. While the coefficients of $\mathbf{\Phi}$ correspond to direct associations between group sizes \cite{novak_characterizing_2016}, commensal relationships can also be indirect. Consider relationships between three groups (A, B, C) such that A partially competes with B and B partially competes with C but A and C have no direct relationship. A VAR(1) model inferring these relationships will have negative coefficients for $\phi_{AB}$ and $\phi_{BC}$ but $\phi_{AC}$ will be nearly zero. + +% TODO plot the examples on figure 1. + +%The central prediction of density dependence theory is that there will be a curviliear, inverse-U-shaped ($\cap$-shaped) relationship between overlap density and growth. + +\subsection{Study C: Predicting growth} + +\subsubsection{Average subreddit mutualism\nopunct}\label{sec:mes.sub.mut} $m_j$ is the independent variable for our test of H2 and measures the average influence of other subreddits in the ecological community on a given subreddit $j$, which we calculate by taking the mean of off-diagonal elements of row $j$ of the community matrix: + +\begin{equation}\label{eq:average.subreddit.mutualism} +m_j = \frac{1}{\left|M\right|-1}\sum_{i\in M;i\ne j} \phi_{i,j} +\end{equation} + +\noindent where $M$ is the set of subreddits in the ecological community and $\left|M\right|$ is the number of subreddits in $M$. We use the mean instead of the sum because different ecological communities have different numbers of subreddits. + +\subsubsection{Regression models for H2} We test H2 by using likelihood ratio tests to compare Model 1 % (above in \ref{sec:reg.H1}) +and Model 2 % in Equation \ref{eq:M2} +which adds \emph{average subreddit mutualism} ($m_i$) as a predictor. We also fit Model 3 % in Equation \ref{eq:M3} +which we compare to Model 2 to test if overlap density explains variation that average subreddit mutualism does not. + +\begin{align} +\mathrm{Model~2} & & Y_i &= B_0 + B_1 d_{i} + B_2 d^2_{i} + B_3 m_i \label{eq:M2} \\ +\mathrm{Model~3} & & Y_i &= B_0 + B_3 m_i \label{eq:M3} +\end{align} +\noindent where $Y_i$ is the growth of subreddit $i$, $d_i$ is its overlap density, $m_i$ is its average subreddit mutualism, and $B_0$, $B_1$, $B_2$, and $B_3$ are regression coefficients. + +\subsubsection{Forecasting growth using ecological interactions} +\label{sec:mes.forecasting} +To test H3, we evaluate whether modeling ecological interactions improves time series forecasting of future participation in online groups by comparing the model in Equation \ref{eq:var1} to a baseline model with off-diagonal elements of $\mathbf{\Phi}$ fixed to 0. This baseline model is equivalent to our VAR model, but excludes ecological interactions. + +We use two forecasting metrics with differing assumptions: root-mean-square-error (RMSE) and the continuous ranked probability score (CRPS). RMSE is commonly used, non-parametric, and intuitive, but does not take differing scales of the predicted variable or forecast uncertainty into account. Thus, in our setting it may place excessive weight on the forecasts of larger subreddits where errors may have greater magnitude simply because the absolute magnitude of the variance is greater. By rewarding forecasts where the true value has high probability under the predictive distribution, the CRPS accounts for variance in the data and rewards forecasts for both accuracy and precision and is thus a ``proper scoring rule'' for evaluating probabilistic forecasts \citep{gneiting_strictly_2007}. Our CRPS calculations assume that the predictive forecast distribution for each community is normal with standard deviations given by the 68.2\% forecast confidence interval. We calculate CRPS using the \texttt{scoringRules} \texttt{R} package \citep{jordan_evaluating_2019}. + +\section{Results} +\label{sec:results} + +% The organization of our results follows that of our methods. We begin with Study A % (§\ref{sec:res:studyA}) +% in which we find, as predicted by H1, that the relationship between overlap density and growth is $\cap$-shaped relationship. Then, in Study B,% (§\ref{sec:res.characterizing}) +% we explore a typology of ecological communities along two dimensions: (1) the degree to which a community is mutualistic or competitive, and (2) the overall strength of ecological interactions between the communities member groups. In the N.clusters ecological communities analyzed in our VAR(1) analysis, we find that mutualistic relationships are much more common than competitive ones. Our case studies % (§\ref{sec:case.studies}) +% illustrate the typology using 4 example ecological communities. Finally, in Study C, we do not find support for H2 %in §\ref{sec:res.likelihood.ratio.test} +% as adding average subreddit mutualism to the density dependence model does not improve growth prediction. But we do find, in support of H3, that ecological interactions improve forecasting performance in our time series models. + + + +\begin{figure*} + \centering + +\includegraphics[width=\linewidth]{figures/knitr-fig_densityxgrowth-1} + +\caption{Relationship between density and growth. A 2D histogram of subreddits with overlap density (log-transformed) on the X-axis and the change in the logarithm of the number of distinct commenting users on the Y-axis. The black line shows the marginal effect of overlap density on growth as predicted by Model 2. The gray region shows the 95\% confidence interval of the marginal effect. \label{fig:density}} +\end{figure*} + +% In §\ref{sec:ecology_background} we presented H1 before RQ1 but we report results for H1 in the same section as H2 since they refer to the same regression model. + +%We first present high-level findings that demonstrate advantages of our community ecology approach upon the overlap density approach. We find that accounting for commensal relationships in time-series models increases forecasting accuracy; that including subreddit average commensalism explains additional variation in subreddit over overlap density; and we compare the conclusions drawn density dependence analysis based on the correlation of overlap density and growth can lead about the ecological environment than our analysis modeling commensal relationships between groups. Finally, we examine the distribution of \emph{average commensalism} and \emph{average absolute commensalism} to illuminate a typology of ecological communities which we illustrate through + +\subsection{Study A: Density Dependence Theory} +\label{sec:res:studyA} + +%As discussed in §\ref{sec:ecology_background}, population ecology approaches in social computing propose that the relationship between overlap-density and growth/survival outcomes reflect an environment that may be competitive, mutualistic, or a mixture of both \citep{wang_impact_2012,zhu_impact_2014}. +We test the classical prediction of density dependence theory as formulated in H1 using Model 1 % (Equation \ref{eq:M1} in §\ref{methods:density}) +which has first- and second-order terms for the effect of overlap density on growth. As described in §\ref{sec:ecology_background}, H1 hypothesizes that overlap density will have a curvilinear $\cap$-shaped (inverse-U-shaped) relationship with growth indicated by a positive first-order regression coefficient and a negative second-order coefficient. + +\begin{table} + \centering + +% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu +% Date and time: Thu, Jul 29, 2021 - 05:22:21 PM +\begin{tabular}{@{\extracolsep{5pt}}lccc} +\\[-1.8ex]\hline +\hline \\[-1.8ex] + & Model 1 & Model 2 & Model 3 \\ + Overlap density & 1.50$^{*}$ (0.26) & 1.50$^{*}$ (0.26) & \\ + Overlap density$^2$ & $-$2.08$^{*}$ (0.41) & $-$2.09$^{*}$ (0.41) & \\ + Average subreddit commensalism & & 0.12 (0.26) & 0.11 (0.26) \\ + Constant & $-$0.23$^{*}$ (0.03) & $-$0.23$^{*}$ (0.04) & $-$0.04$^{*}$ (0.01) \\ + \hline \\[-1.8ex] +Log Likelihood & -4970 & -4970 & -4986 \\ +Observations & 4,090 & 4,090 & 4,090 \\ +\hline +\hline \\[-1.8ex] +\textit{Note:} & \multicolumn{3}{r}{$^*$p$<0.01$} \\ +\end{tabular} + +\caption{Loglinear regression predicting subreddit growth as a function of overlap density. The model supports the prediction of density dependence theory of a $\cap$-shaped relationship between overlap density and growth. \label{tab:density}} +\end{table} + + +As predicted, we observe a $\cap$-shaped relationship between overlap density and growth. Figure \ref{fig:density} plots the marginal effects of overlap density on growth for the median subreddit laid over the data on which the model is fit. Table \ref{tab:density} shows regression coefficients for Models 1-3. For about half of subreddits, increasing overlap density is associated with higher growth rates. The point where increasing density ceases to predict increasing growth and begins to predict decreasing growth is at the 49\textsuperscript{th} percentile. +Prototypical subreddits at this overlap density grew slightly (95\% CI:[0.001,0.06]). Yet subreddits at the lower and upper extremes of overlap density slightly declined on average. Typical groups at the 20\textsuperscript{th} percentile of overlap density decline by 1.1 members (95\% CI:[-1.1,-1.15]) and typical groups at the 80\textsuperscript{th} percentile decline by 1.2 members (95\% CI:[-1.1,-1.28]). +While we find support for the classical theoretical prediction of a curvilinear, ($\cap$-shaped) relationship between overlap density and growth, this does not imply that relationships between highly overlapping communities are more competitive. +% Instead our results below % in §\ref{sec:res.characterizing} +% show that relationships in ecological communities of subreddits with high user overlaps are typically mutualistic. + + +\subsection{Study B: Introducing Community Ecology} +\label{sec:res.characterizing} + + + + +% describe the figure and the main takeaway +% As described in §\ref{sec:characterizing.ecological.communities}, an ecological community can have positive or negative average ecological interaction §\ref{sec:mes.avg.mut} indicating if it is competitive or mutualistic and ecological interaction strength §\ref{sec:mes.abs.int} provides a way to distinguish ecological communities with a mixture of competitive and mutualistic interactions from those where ecological interactions are weak. + +Figure \ref{fig:commense.x.abs.commense} visualizes the distribution of average ecological interaction and ecological interaction strength over the 641 ecological communities we identify. +We observe ecological communities characterized by strong forms of both mutualism and competition, others having mixtures of the two, and some with few significant ecological interactions. Mutualism is more common than competition, with the mean community having an average ecological interaction of 0.03 ($t=14.5$, $p<0.001$). We find that 524 clusters (81.7\%) are mutualistic. Not only are most ecological communities mutualistic, but more mutualistic ecological communities have greater ecological interaction strength (Spearman's $\rho=0.58$, $p<0.001$). +% Note that due to our clustering procedure, our analysis examines ecological interactions among subreddits with relatively high degrees of user overlap. +Therefore, our community ecology analysis suggests that among groups with similar users, mutualistic ecological interactions are more common than competitive ones. + +\begin{figure} + +\includegraphics[width=\linewidth]{figures/knitr-plot_commense_x_abs_commense-1} + +\caption{Two-dimensional histogram showing ecological communities on Reddit in our typology. The X-axis shows the overall degree of mutualism or competition in clusters of subreddits with high user overlap based on the average ecological interaction. The Y-axis shows the ecological interaction strength representing the overall magnitude of competition or mutualism.} +\label{fig:commense.x.abs.commense} +\end{figure} + + + +\subsubsection{Example ecological communities} +\label{sec:case.studies} + +We present four case studies to illustrate our typology of ecological communities of online groups. Figure \ref{fig:commense.x.abs.commense} shows that we find clusters of subreddits characterized by mutualism, competition, a mixture of mutualism and competition, and few ecological relationships at all. We select one case from each of these four types using our measures of average ecological interaction (§\ref{sec:mes.avg.mut}) and ecological interaction strength (§\ref{sec:mes.abs.int}). To allow for more interesting network structures, we draw our cases from the 367 large clusters having at least five subreddits. + +\input{resources/network-figures.tex} + +Figure \ref{fig:networks}, presents visualizations of competition-mutualism networks representing statistically significant impulse response functions as described in §\ref{sec:mes.irf}. During our analysis, we also examined the terms of the vector autoregression parameter $\mathbf{\Phi}$, the impulse response functions, and model fits and forecasts, all of which are available in our online supplement. We also visited each subreddit in the clusters and read their sidebars and top posts to support our brief qualitative descriptions. + +\subsubsection{Mutualism among mental health subreddits} + +% TODO, cite somebody on mental health. +To find a case characterized by mutualism, we selected the top 37 large clusters with the greatest average ecological interaction. From these, we arbitrarily chose one interesting ecological community, the \textit{mental health} cluster, which includes 11 subreddits for supporting people in struggles with mental health, addiction, and surviving abuse. +Constitutive subreddits include those focused on specific mental health diagnoses like \texttt{r\Slash bpd} (bipolar disorder) and \texttt{r\Slash cptsd} (complex post traumatic stress disorder) while others like \texttt{r\Slash survivorsofabuse} and \texttt{r\Slash adultsurvivors} +are support groups. + +The interactions among these subreddits are dense and primarily mutualistic as shown in Figure \ref{fig:mut.network}. There are a handful of competitive interactions like the reciprocal competition detected between \texttt{r\Slash codedependence} and \texttt{r\Slash bpd}. We also observe some interactions that are mutualistic in one direction and competitive in the other. For example, growth in \texttt{r\Slash addiction} predicts increasing growth in \texttt{r\Slash cptsd} even as that growth in \texttt{r\Slash cptsd} predicts decreasing growth in \texttt{r\Slash addiction}. This suggests a pattern in which \texttt{r\Slash cptsd} siphons members from \texttt{r\Slash addiction}. That said, the density of mutualistic interactions shown in Figure \ref{fig:mut.network} suggests that different subreddits have complementary roles in this ecological community as people turn to different types of groups for help with interrelated problems. While attempting to explain why different online groups form mutualistic or competitive interactions is left to future research, the example of mental health subreddits shows how groups with related topics and overlapping participants can have mutualistic interactions where growth in one predicts growth in many of the rest. + +\subsubsection{Competition among real estate and finance subreddits} + + +To find competitive clusters, we selected from the 36 large clusters with the lowest average ecological interaction an ecological community that we label \textit{finance}. Among the 6 subreddits in this cluster, \texttt{r\Slash realestateinvesting}, \texttt{r\Slash realestate} and \texttt{r\Slash commercialrealestate} all deal in different aspects of the real estate industry, while \texttt{r\Slash financialindependence} and \texttt{r\Slash fatfire} (the acronym ``fire'' means ``financial independence/retire early'') are focused on building wealth and becoming financially independent and \texttt{r\Slash financialplanning} is a general purpose subreddit for financial advice. + +In contrast to the mental health ecological community, the finance cluster has mostly competitive ties as visualized in Figure \ref{fig:comp.network}. The fact that even this cluster, among the most competitive in our data, contains a number of mutualistic ties reflects just how prevalent mutualism is among subreddits with high degrees of user overlap. That said, we detect three reciprocal competitive interactions among the three subreddits that focus on real estate. The edges from \texttt{r\Slash fatfire} to \texttt{r\Slash commercialrealestate} and \texttt{r\Slash financialindependence} are competitive as well. +Interestingly, all interactions between the general finance subreddits (\texttt{r\Slash financialplanning} and \texttt{r\Slash financialindependence}) and \texttt{r\Slash realestate} are mutualistic. +%Interestingly, are mutualistic. + +\subsubsection{Mixed interactions among timepiece subreddits} + +Next, we turn to an example of an ecological community with low average ecological interaction but high ecological interaction strength. +We first select the 36 %(10\%) +large clusters with the average ecological interaction closest to 0. To find an ecological community with a mixture of mutualism and competition, we select from the 15 clusters with the greatest ecological interaction strength from within this group and chose the \textit{timepiece} cluster containing 7 subreddits about watches. + +As shown in Figure \ref{fig:mixed.network}, the ecological community of timepiece subreddits is dense with ecological interactions (although not as dense as the mental health subreddits). We observe both reciprocated mutualistic interactions, like that between \texttt{r\Slash rolex} and \texttt{r\Slash gshock}, and competitive interactions like that between \texttt{r\Slash gshock} and \texttt{r\Slash seiko}. We also observe numerous unreciprocated competitive and mutualistic relationships like the mutualism between \texttt{r\Slash watchexchange} and \texttt{r\Slash watchcirclejerk}\footnote{The suffix is widely understood on Reddit to signify a jokey, meme, or satirical subreddit.} +and the competition between \texttt{r\Slash japanesewatches} and \texttt{r\Slash seiko}. +Though the average ecological interaction among these subreddits is near 0, our analysis reveals a complex ecological community with a mixture of competition and mutualism. + +\subsubsection{Sparse interactions among Call of Duty subreddits} + +To find a case where ecological interactions are weak, we return to the group of the 36 %(10\%) +large clusters with the average ecological interaction closest to 0 but select from the 15 clusters within this group with the lowest ecological interaction strength. From these, we chose the \textit{Call of Duty} cluster containing five groups about the popular military first-person shooter series of video games. + +% % more quotations +The Call of Duty ecological community is sparse, having only two significant ecological interactions among its 5 member groups. This ecological community includes subreddits about different editions of the series such as \texttt{r\Slash blackops3}, \texttt{r\Slash infinitewarfar} and \texttt{r\Slash wwii} as well as one about a popular spin-off zombie game \texttt{r\Slash codzombies} and the more general \texttt{r\Slash callofduty} subreddit. We find that growth in \texttt{r\Slash blackops3} or \texttt{r\Slash codzombies} predicts growth in \texttt{r\Slash infinitewarfare} and no other ecological interactions. + +The timepiece and Call of Duty ecological communities illustrate how subreddits with overlapping users can have relatively strong or weak forms of ecological interdependence. Although both clusters are characterized by high degrees of user overlap and low average ecological interaction, the timepiece cluster has a dense competition-mutualism network while the call of duty network is sparse. + +\subsection{Study C: Predicting Growth} +\label{sec:res.studyC} + +We now compare the environmental approach of population ecology with the relational approach of community ecology. +In Study B, we presented examples of diverse ecological communities among subreddits with overlapping members. However, the presence of this diversity this does not mean that ecological interactions are related to the growth of online groups, the key outcome of previous ecological studies. We therefore hypothesized that ecological interactions will improve the predictive performance of a density dependence model in H2. + +\subsubsection{Ecological interactions do not improve growth prediction} +\label{sec:res.likelihood.ratio.test} + +To test H2, we compare Model 1, our density dependence model having first- and second-order terms for overlap density, with Model 2, which also includes average subreddit mutualism (§\ref{sec:mes.sub.mut}) as a predictor. We also examine Model 3, in which the only predictor is average subreddit mutualism. Table \ref{tab:density} shows regression coefficients for our models. + +We do not observe a statistically significant association between average subreddit mutualism and growth ($B_3=0.12, SE=0.26$). +% We observe that average subreddit mutualism is positively associated with growth , which makes sense as subreddits with greater average subreddit mutualism benefit more from mutualism or are hurt less from competition. +Moreover, a likelihood ratio test comparing Model 1 and Model 2 does not support H2 as Model 2 does not predict subreddit growth better than Model 1 ($\chi^2 = 0.23$, $p>0.05$). +% Therefore, average subreddit mutualism does not help predict growth compared to the density dependence model alone. +Comparing Model 2 to Model 3 shows that overlap density explains variation that average subreddit mutualism does not ($\chi^2 = 33$, $p<0.001$). +%This suggests that the density of a subreddit's niche helps explain subreddit growth in important ways not captured by ecological interactions. +Overlap density helps explain a group's future growth, but the overall degree of mutualism or competition a group faces in its ecological community does not. +% In §\ref{sec:discussion}, we discuss how overlap density may only capture the hospitality of a group's environment and may be independent of mutualism and competition within its ecological community. + +\subsubsection{Forecasting accuracy} +\label{sec:res.forecasting} + +The likelihood ratio tests in §\ref{sec:res.likelihood.ratio.test} are limited because improvements in predictive performance (or lack thereof) may be due to unobserved factors predictive of growth that are correlated with average subreddit mutualism. We hypothesized in H3 that the intergroup dependencies in our VAR models can better forecast the size of subreddits compared to baseline time series models that do not account for ecological interactions. As described in §\ref{sec:mes.forecasting}, we test H3 by comparing two forecasting metrics: the root-mean-square-error (RMSE) and the continuous ranked probability score (CRPS). + +VAR models including ecological interactions have forecasting performance superior to the baseline model in terms of both RMSE and CRPS. We evaluate the 24-week forecast performance for all subreddits which were assigned to clusters. The RMSE under the baseline model (0.84) is greater than the RMSE of the VAR models (0.75) and the CRPS of the baseline model (72,853) is also greater than the CRPS of the VAR models (72,669). This reflects a substantive improvement in forecast accuracy robust to the choice of the forecasting metric. + +Our baseline model contains a constant term and a trend term for each group and therefore accounts for all time-invariant within-group variation. Because overlap density is a subreddit-level variable that does not vary over time, +we know that the improvement in forecasting performance comes from modeling ecological interactions in ways not captured by overlap density. + +\section{Threats to Validity} +\label{sec:limitations} +Our work is subject to several important threats to validity that we cannot fully address. First, we study ecological communities on only one platform hosting online groups and our results may not generalize to other platforms or time periods. +Additionally, while our community ecology approach assumes that ecological interactions drive dynamics in the size of groups over time and cause groups to grow or decline, drawing causal inference using our method would depend on several untestable assumptions. For example, our ability to infer causal relationships might be limited if groups we do not consider---including groups on other platforms---play a role in an ecological community. Regression estimates in Models 1-3 may be confounded by omitted variables and cannot support causal interpretation. +Therefore, we refrain from claiming that the relationships we infer are causal. + +The method we propose for identifying ecological interactions between online groups has limitations common to all time series analysis of observational data. +Potential omitted variables might also include additional time lags of group size. Although we chose to use VAR(1) models with only 1 time lag, we hope future work can improve upon our approach and model more complex dynamics with additional lags. +% Our results are offered as limited temporal associations consistent with inferred ecological interactions. +Like most other time series analysis, vector autoregression assumes that the error terms are stationary. This is difficult to evaluate empirically and may not be realistic \citep{canova_var_2007}. Future work might relax these assumptions using more complex models with time-varying parameters, state space models \citep{box-steffensmeier_time_2014}, nonlinear time series models \citep{cenci_regularized_2019, kantz_nonlinear_2003}, or stationarity-enforcing priors \citep{heaps_enforcing_2020}. Such approaches may require additional contextual knowledge and be difficult to scale to an analysis of hundreds of different ecological communities, but may prove fruitful in future work focusing on ecological communities of interest. Such models may also be useful in future work investigating how ecological interactions change over time. + +Additional threats to validity stem from our use of algorithmic clustering to identify ecological communities. +Organizational ecologists have rarely attempted to estimate the full community matrix for an entire population containing a large number of groups because of data and statistical limitations \citep[e.g.][]{ruef_emergence_2000, sorensen_recruitment-based_2004}. For instance, 100 million possible ecological interactions exist within a set of 10,000 communities. Attempting to infer them all raises considerable computational and statistical challenges. +% This makes it necessary to narrow the scope to the ecological communities of interest in ways appropriate to the research question. +We chose to use a clustering analysis to explore the typical ecological communities on a platform. + +% Yet, a + +While we choose clusters based on high degrees of user overlap and validate our clustering in terms of the silhouette coefficient and purity criteria, we might have obtained different results if we had clustered in a different way. Additionally, our efforts to obtain clusters with a high silhouette coefficient lead us to remove a large number of subreddits from our analysis. Thus, our results are not representative of Reddit overall, but only of those subreddits that were included in our analysis. Furthermore, clustering algorithms like the one we use may not have unique solutions and different initial conditions and hyperparameters might lead to different results. While these allow us to scale up our analysis, future work should use principled definitions of an ecological community based on qualitative contextual knowledge in focused studies of particular ecological communities. +% future investigations should also consider qualitative approaches to constructing ecological communities. +% Finally, our three cases studies are limited in that they can offer only a proof-of-concept analysis and an enticing hint at more comprehensive future analyses with more rigorously defined populations of online groups. +% Although we found varying results in the three ecological communities we selected, these case studies can provide little explanation for when one should expect to find different forms of commensalism in online groups. Our hope is that these initial results can point in new directions for research. +% % We looked at three different sets of related online groups and found three qualitatively different ecological communities. +% As is true in all case study research, there is little reason to expect findings from any one of our case studies to generalize to any specific other set of contexts. + +\section{Discussion} +\label{sec:discussion} + +To introduce community ecology and compare it to population ecology, we presented three studies. In Study A, we found support for H1 showing---as predicted by density dependence theory---that overlap density has an $\cap$-shaped association with subreddit growth. +Subreddits with moderate overlap density in our data declined less than subreddits with either very low or very high overlap density. +According to population ecology theory, this suggests that high-density environments are competitive and less conducive to growth than medium-density environments. + +%prevalence of mutualism among highly overlapping subreddits contrast with our results for + +Surprisingly, this contrasts with our results in Study B, where we studied the diversity of ecological communities using vector autoregression models of group size over time to infer networks of ecological interactions. +%surveyed clusters of highly overlapping groups on Reddit to. +We find ecological communities that are mutualistic or competitive, that mix the two, or that have few significant ecological interactions at all. Overall, however, ecological communities of subreddits are typically mutualistic and mutualistic interactions are stronger on average than competitive ones. Although we find evidence of density dependence, density-dependent competition does not necessarily reflect typical relationships in ecological communities of highly overlapping subreddits. + +%As discussed more below, our results are due to the fact that support for H1 does not necessarily mean that most relationships between subreddits with the greatest degrees of user overlap are competitive. + +Our results in Study C show that the size of the other members of an ecological community improves time series forecasts of participation in online groups. However, average subreddit mutualism did not help predict growth. +This suggests that population ecology and community ecology offer complementary environmental and relational perspectives. +Population ecology's focus on environmental factors such as niche and overlap density is useful for predicting growth, but does not provide a way to study networks of mutualism and competition. +Community ecology unpacks density and provides insights about the specific relationships between groups. While modeling these interactions helps forecast participation levels in groups, the existence of these interactions may be independent of future growth. For example, if mutualistic relationships are common in declining ecological communities, that would explain our result for H2. + +% these interactions helps time series forecasting, but whether the interactions + +% While we advance community ecology as an alternative framework to population ecology, our results show that population ecology and community ecology are complementary perspectives. +% We tested H2 to find out whether including subreddit average mutualism improves the ability of a density dependence model to predict the size of a subreddit n.test weeks in the future and found that it did not. Therefore, + +% Yet in support of H3, including ecological interactions in the vector autoregression (VAR) models substantially improves their forecasting performance. + + +% Our findings in Study A and Study B may appear contradictory, their coincidence in our data points to ways in which population ecology and community ecology conceive of different kinds of ecological dynamics. + +The complementary nature of the two ecologies is seen in the coincidence of our findings in Study A and Study B. +Indeed, these results can help explain the puzzling set of empirical results about the relationship between overlap density and outcomes like growth, decline and survival \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. +Studies of density dependence theory in social computing measure the density of an online group's niche in terms of its overlap in participants or topics. +%Resource overlaps seem to reflect competitive forces in some circumstances but mutualistic ones in others. +Our analysis clearly shows that resource overlaps between two groups might have little to do with whether they are mutualists or competitors. Instead, overlaps may simply reflect the hospitality of the environment to groups with overlapping topics or user bases. +As a result, the differing environmental conditions of Wikis and Usenet groups might explain why user overlap was associated with the survival of wikis \citep{zhu_impact_2014} but with the decline of Usenet groups \citep{wang_impact_2012}. Wikia was a young and growing platform during \citepos{zhu_impact_2014} data collection period when the growth of groups may have been limited by knowledge of how to build a wiki, and this knowledge was provided by overlapping experienced users. +Usenet was in decline during \citepos{wang_impact_2012} study period and this may have produced competitive environmental conditions as users became more scarce. +%Users of groups with high overlap density may have greater commitment to the platform than to any particular group and competition over such users may become fierce when a platform goes into decline. + +% as users with comm + +% because + +% and \citeauthor{tan_all_2015} \cite{tan_all_2015} observe that accounts posting in fewer different groups are more likely to leave a platform. +% As \citeauthor{kraut_building_2012} \cite{kraut_building_2012} argue, commitment to subgroups can enhance commitment to a broader group. This suggests that On the other hand, members of a group with high overlap density may have little commitment to it in particular. + +% This suggests that commitment to a + +% We suggest that when commitment to the platform declines this may amplify competition as +% may present environmental conditions for strong competition over those members +% This suggests that +% Such groups may face greater challenges in sustaining participation when the platform goes into decline. + +The widespread mutualism found in Study B resonates with long-held understandings of ecological interactions in evolutionary theory \citep{kropotkin_mutual_2012}. Competition is unlikely to persist because it decreases survival. Because mutualism increases survival, it will be favored by natural selection \citep{armstrong_competitive_1980, axelrod_evolution_1981}. Similarly, competition can be avoided if groups adopt specialized roles in their ecological community, a dynamic known as resource partitioning in organizational ecology \citep{carroll_concentration_1985,menge_competition_1972,schoener_resource_1974}. Resource partitioning theory suggests that the competition among real estate subreddits observed in Figure \ref{fig:comp.network} may be due to a lack of specialization. If specialization does not emerge over time, such groups of competing subreddits may have decreased survival. By contrast, mental health support groups like those observed in Figure \ref{fig:comp.network} appear to have distinctive purposes or roles. Future work to test such mechanisms in ecological communities of online groups may reveal ways that online groups complement or cooperate with each other. + + +%Our results demonstrate population ecology's approach to competition and mutualism in a test of density dependence theory and provide an evaluation of community ecology's ability to predict subreddit growth. + + +%Future work should directly test this hypothesis about the relationships between platform-based and subgroup-based commitment. + +% In general, competition over overlapping resources will have no effect on group growth if something besides the overlapping resource limits growth \cite{verhoef_community_2010}. For example, two wikis might share a large number of contributors (have high user overlap), but their growth might be limited by a lack of core contributors who perform important administrative tasks like policy making and software administration \cite{zhu_impact_2014}. Community ecology relaxes the assumption that competition and mutualism are caused by user overlap density and instead seeks to infer them from data. +% To illustrate our approach, we presented 4 example ecological communities found on Reddit §\ref{sec:case.studies}. +Within large platforms for online groups, the great number of ecological communities that can be studied should make it possible for future work to apply methods from network science to construct and test generalizable theories about the roles of different types of resources, design features of platforms, and governance institutions in these ecological interactions. Future work should also incorporate community ecology analysis in case studies of important topics such ecological communities engaged in peer production, political mobilization, misinformation, or mental health support. + +Although we focused on online groups within a single platform, groups may use multiple platforms with distinctive affordances for different purposes \citep{fiesler_moving_2020, kiene_technological_2019}. Since the VAR method relies only on time series data to infer ecological interactions, it can be applied to study ecological communities spanning social media platforms. Community ecology can thus provide a bridge between quantitative studies of participation in online groups and theories of interconnected information ecologies \citep{nardi_information_1999}. While we focus on relationships between groups sharing a platform, one can apply our concepts and methods to understand how interdependent systems of technologies and users give rise to higher levels of social organization on social media platforms \citep{astley_two_1985, aldrich_organizations_2006}. + +\subsection{Implications for Design} + +% While Resnick et al.~\citep{resnick_starting_2012} +In the final chapter of their book on \textit{Building Successful Online Communities}, \citet{kraut_building_2012} advise managers of online groups to select an effective niche and beware of competition. However, these recommendations are based on little direct evidence from studies of online groups and offer almost no concrete steps that designer or group should take based on either piece of advice. Although further research into ecological interactions is needed before design principles can be derived, we provide a framework for online group managers to think about ecological constraints on group size. +While intuition suggests that online group managers might seek out mutualistic relationships and avoid competitive ones, it is often not obvious whether another group with overlapping users is a competitor or mutualist. +Our method provides a way for group managers to know. + +Competitors have a negative impact on growth, but ecological theory suggests that specialization is an adaptive strategy in response to competition \citep{aldrich_organizations_2006, carroll_concentration_1985, kraut_building_2012, powell_network_2005}. +%For example, the growth of Wikipedia caused other online encyclopedia projects to shift their focus \cite{hill_almost_2013}. +Using our method, group managers might identify competitors limiting the growth of their groups. With the knowledge of this analysis in hand, they might be able to escape a competitive dynamic by specializing. +While competitive relationships are defined by how they decrease the size of groups, competition can also be important to the health of the broader ecological community. Exit to an alternative group can be an avenue for political change in response to grievances and poor governance \citep{hirschman_exit_1970, frey_emergence_2019}. The threat of competition with other groups may make expressions of voice more persuasive to moderators or platforms \citep{hirschman_exit_1970}. + +Groups looking to increase activity should desire to seek out mutualistic relationships, and we believe that designers of online platforms can help them do so. Features such as meta-groups, group search, recommendation engines, and practices like linking related groups may lower barriers between groups and support mutualism. However, it is not obvious to what extent particular features will support competition, mutualism, or both. Using our method, managers and designers can test features intended to support mutualism. + +\section{Conclusion} + +% Rewrite conclusion +While explanations for the rise or decline of online groups often look to internal mechanisms, understanding the role of interdependence between online groups is increasingly important. +While prior research has investigated competition and mutualism among online groups with overlapping users and topics using the population ecology framework \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}, this approach does not provide a way to infer competitive or mutualistic interactions among related groups. +We introduce the community ecology framework as a complementary perspective to population ecology. +% The two ecologies both seek to explain why online groups grow or survive, but they focus on different levels of analysis \cite{astley_two_1985}. +By inferring competition-mutualism networks directly from time-series data, our community ecology approach helps resolve the empirical tensions raised by prior ecological work in social computing and reveal that most interactions within clusters of subreddits with highly overlapping users are mutualistic. Our methods provide a foundation for future work investigating related online groups. +% \printbibliography[title={References},heading=secbib] + diff --git a/dissertations/nathante_uw_2021/ch4_competitive_exclusion.tex b/dissertations/nathante_uw_2021/ch4_competitive_exclusion.tex new file mode 100644 index 0000000..1c1409c --- /dev/null +++ b/dissertations/nathante_uw_2021/ch4_competitive_exclusion.tex @@ -0,0 +1,933 @@ +% +%% This is file `sample-authordraft.tex', +%% generated with the docstrip utility. +%% +%% The original source files were: +%% +%% samples.dtx (with options: `authordraft') +%% +%% IMPORTANT NOTICE: +%% +%% For the copyright see the source file. +%% +%% Any modified versions of this file must be renamed +%% with new filenames distinct from sample-authordraft.tex. +%% +%% For distribution of the original source see the terms +%% for copying and modification in the file samples.dtx. +%% +%% This generated file may be distributed as long as the +%% original source files, as listed above, are part of the +%% same distribution. (The sources need not necessarily be +%% in the same archive or directory.) +%% +%% The first command in your LaTeX source must be the \documentclass command. +% \documentclass[sigconf,authordraft]{acmart} + + +%%%% As of March 2017, [siggraph] is no longer used. Please use sigconf (above) for SIGGRAPH conferences. + +%%%% As of May 2020, [sigchi] and [sigchi-a] are no longer used. Please use sigconf (above) for SIGCHI conferences. + +%%%% Proceedings format for SIGPLAN conferences +% \documentclass[sigplan, anonymous, authordraft]{acmart} + +%%%% Proceedings format for conferences using one-column small layout +%\documentclass[acmsmall,authordraft]{acmart} + +% NOTE that a single column version is required for submission and peer review. This can be done by changing the \doucmentclass[...]{acmart} in this template to +% \documentclass[sigconf,review=True]{acmart} +\chapterprecishere{ +% Most explanations of changes in online group size focus on internal factors like social structures or design decisions. +% do not make the , and render critical questions like “which other groups are a given group's strongest competitors or mutualists?” unanswerable. +% TODO: Polish abstract +% Online groups interact with each other as people, content and ideas flow among them. +We introduce a method for inferring competitive and mutualistic interactions between online groups from time series participation data based on the theoretical framework of community ecology. Platforms often host multiple online groups with highly overlapping topics and members. How can researchers and designers understand how interactions between related groups affect measures of group health? Inspired by population ecology, prior social computing research has studied competition and mutualism among related groups by correlating group size with degrees of overlap in content and membership. The resulting body of evidence is puzzling as overlaps seem sometimes to help and other times to hurt. We suggest that this confusion results from aggregating intergroup relationships into an overall environmental effect instead of focusing on networks of competition and mutualism among groups as our approach does. We compare population and community ecology analyses of online community growth by analyzing clusters of subreddits with high user overlap but varying degrees of competition and mutualism. +} + +%% +%% The code below is generated by the tool at http://dl.acm.org/ccs.cfm. +%% Please copy and paste the code instead of the example below. +%% +% \begin{CCSXML} +% +% +% 10010520.10010553.10010562 +% Computer systems organization~Embedded systems +% 500 +% +% +% 10010520.10010575.10010755 +% Computer systems organization~Redundancy +% 300 +% +% +% 10010520.10010553.10010554 +% Computer systems organization~Robotics +% 100 +% +% +% 10003033.10003083.10003095 +% Networks~Network reliability +% 100 +% +% +% \end{CCSXML} + +% \ccsdesc[500]{Computer systems organization~Embedded systems} +% \ccsdesc[300]{Computer systems organization~Redundancy} +% \ccsdesc{Computer systems organization~Robotics} +% \ccsdesc[100]{Networks~Network reliability} + +%% +%% Keywords. The author(s) should pick words that accurately describe +%% the work being presented. Separate the keywords with commas. +% \keywords{datasets, neural networks, gaze detection, text tagging} + +%% A "teaser" image appears between the author and affiliation +%% information and the body of the document, and typically spans the +%% page. + +% \begin{teaserfigure} +% \includegraphics[width=\textwidth]{sampleteaser} +% \caption{Seattle Mariners at Spring Training, 2010.} +% \Description{Enjoying the baseball game from the third-base +% seats. Ichiro Suzuki preparing to bat.} +% \label{fig:teaser} + +% \end{teaserfigure} + +%% +%% This command processes the author and affiliation and title +%% information and builds the first part of the formatted document. + + +% \fontsize{12pt}{24pt} +% \selectfont + +%% We're going for a "known puzzle" + "clarifying confusion" framing +%% Rememver to frame aronud the depvar + +%% TODO: rewrite with a new outline +%% Introduction, Related Work, Materials & Methods, Results, Discussion, Conclusions +%% Put research question in the introduction. +%% Put hypotheses in Related Work. +%% Consider Hypothesizing that mutualism will be more common than competition because subreddits in these clusters are specialized. +%% Cut unneeded ecological terms +%% Define needed ecological terms + +\section{Introduction} +\label{sec:intro} + +% Why we need an ecological approach +%Online groups are important places where people collaborate to produce information sources, engage in discussions and participate in culture. +Although the fact is frequently ignored in social computing scholarship, online groups do not exist in isolation.\footnote{We use the term ``online group'' instead of ``online community'' to help avoid confusion with our term ``community ecology'' which plays an important conceptual and analytic role in our paper.} Indeed, although studying interdependence between online groups is different and complex \citep{hill_studying_2019}, research in social computing has sought to quantify how online groups share users or topics \citep{datta_identifying_2017, del_tredici_semantic_2018, tan_all_2015, hessel_science_2016}, and how such interactions relate to outcomes like the emergence of new groups \citep{tan_tracing_2018}, contributions to peer-produced knowledge \citep{vincent_examining_2018}, and the spread of hate speech \citep{chandrasekharan_you_2017}. Although this work has demonstrated that intergroup interactions matter very little intergroup research has tackled questions of group success---i.e., why some online groups succeed in maintaining active and long-lived participation while most do not. +%\citep{kraut_role_2014, resnick_starting_2012}. % commented out since there was no response +Can intergroup relationships +% competition or mutualism between online groups +explain whether online groups will grow or decline? +% NOTE: I guess you've added the footnote above to address the reviewer concern. It's important but (a) I think it's too early in the manuscript to bring this in and (b) it should be in a footnote. -mako +% I moved it below by the RQ. + +%a growing body of social computing research shows that online groups, such as wikis, discussion forums and mailing lists spawn new groups and wage conflicts against, compete with and help each other citep{datta_identifying_2017, tan_tracing_2018, wang_impact_2012, zhu_impact_2014}. + +% individual chances of success while mutualistic dynamics increase them. + +% How do relationships between groups shape their chances of success? + +% What's wrong with previous ecological approaches +% Should we introduce ecological theory in the introduction at all? + +Studies in social computing have drawn from organizational ecology to answer this question \citep{wang_impact_2012, zhu_impact_2014, resnick_starting_2012, zhu_selecting_2014}. Inspired by the ecological study of biological systems, organizational ecology is an influential body of theory in sociology that studies competition and mutualism among human organizations +% , ranging from commercial industries to social movements \citep{hannan_population_1977, baum_ecological_2006}. +% NOTE: There's a jump between this sentence and the last one. I think we might need to signal, somehow, that orgecol is not puzzling or the results in soccomp are puzzling in regards to them. I've changed puzzling below to inconsistent but we should make it clear what it's inconsistent with. -mako +Although ecological studies of firms and social movements have developed a clear and established body of theory with strong empirical support \citep{baum_ecological_2006}, similar studies of online groups have yielded inconsistent results that differ both from one context to another and from theoretical predictions. For example, wikis whose memberships overlap with other wikis survived longer \citep{zhu_selecting_2014}, but Usenet groups with overlapping memberships failed more quickly \citep{wang_impact_2012}. + +% NOTE: I'm not sure conflation is the right term here. I've reworked this paragraph below -mako +% I think you nailed it. -- nate +We argue that these confusing results are the result of a conflation of concepts and measures from two distinct strands of theory in organizational ecology: \emph{population ecology} and \emph{community ecology}. Both define competition as a form of interdependence that \emph{decreases} growth and mutualism as one that \emph{increases} growth. However, population ecology focuses on modeling the how overlapping resources among groups affect their subsequent growth, decline, or survival \citep{astley_two_1985, baum_ecological_2006, dobrev_dynamics_2001}. It does not attempt to directly study competitive and mutualistic interactions. On the other hand, community ecology recognizes that groups often exist within ``ecological communities,'' or clusters of highly related entities, and provides an approach for inferring competitive and mutualistic interactions among these. Although the stated goal of ecological research in social computing has been to understand how groups influence each others' ability to sustain participation, ecological research in social computing has relied exclusively on concepts and measures from population ecology. This paper seeks to explain the puzzling set of findings in ecological social computing research by introducing community ecology. + +%These strands have different concepts of ecological dynamics, different levels of analysis and make distinct theoretical predictions \citep{astley_two_1985}. +% despite the fact that doing so is vital to + + + +% Our contributions to CSCW are theoretical, methodological, and empirical. + +% Our theoretical contribution, articulated in §\ref{sec:community_ecology}, + +% We then demonstrate both approaches by investigating our research question: +% \textit{(\textbf{RQ}) How does community ecology's view of competition and mutualism in online groups compare to that of population ecology?} + +% Our overarching goal is to introduce community ecology as a theoretical and methodological framework for understanding how the relationships between specific online groups shape their growth or decline. + +We do so in a three-part empirical study using a dataset drawn from the 10,000 communities on Reddit with the most contributors to analyze 641 clusters of online groups with overlapping participants. +In Study A, we conduct the most important type of population ecology analysis, a test of what is called density dependence theory, and find support for the theory. +%This suggests that competition is strongest when user overlap is high and mutualism is weakest when overlap is low. +This analysis suggests that high degrees of user overlap are associated with competition. +%VAR models are widely used in biological ecology to make inferences about competitive or mutualistic interactions between species. +In Study B, we introduce our method for community ecology analysis that infers networks of competitive and mutualistic interactions by using clustering analysis and vector autoregression (VAR) models of group size over time \citep{sims_macroeconomics_1980, canova_var_2007, ives_estimating_2003}. We illustrate the method in four case studies and present a large-scale computational analysis showing that mutualistic interactions are far more common than competitive ones. +Finally, in Study C, we bring Study A and Study B together to compare population ecology and community ecology by extending the density dependence model from Study A with a variable accounting for competition and mutualism. While we find that adding this variable does not help predict growth, including ecological interactions in our VAR models improves time series forecasting. + +% importance of accounting for mutualistic and competitive interactions in predicting the growth of online groups. We + +% While models including , . + +We discuss how these findings illuminate the differences between population ecology and community ecology and show how the two perspectives are complementary. +While Study A suggests that competition is strongest when user overlap is high, Study B finds widespread mutualism among groups with overlapping membership. +Although these findings might seem contradictory, they reflect how population ecology studies overlapping resources related to favorable or unfavorable environmental conditions, while community ecology studies competitive and mutualistic interactions playing out in local networks of specific groups. By demonstrating that mutualistic and competitive interactions within clusters of highly related groups are important---and by describing how to measure them---this paper lays the groundwork for future research to investigate and design for interdependence between online groups that supports their growth and success. + +%we demonstrate that interactions are important and how to inferred and are useful for time series forecasts of + +% and inform design + +% by understanding + +%lays the groundwork for future research toward design + +% understanding how different forms of + + + + + +% To answer this question, We validate our approach by showing in §\ref{sec:res.forecasting} that + +% % NOTE: Is it (1) the top 1000? It would be nice to summarize the comprehensiveness here. (2) I'm ambivalent about the word "network" here. -mako +% We make four specific empirical contributions: Reddit in §\ref{sec:res.characterizing} and . + +% and provide an explanation for why previous ecological research in social computing has led to confusing and inconsistent results. + + + +% NOTE: Is the sentence below correct? I guess so (at least indirectly) but I haven't read the new discussion. -mako New discussion isn't written yet, but right now that explanation is in the background section. :) -N + +% NOTE: cut this last sentence? -mako - I think this last sentence will be a more accurate reflection of the discussion. -N +% We + +% We + +% We make a theoretical contribution by introducing the community ecology perspective that We also make a methodological contribution by providing a method for inferring these relationships from time-series data on group sizes + +% Where prior approaches aggregate individual relationships between groups, our approach makes it possible to answer critical questions like ``which are a given online group's mutualists or competitors?'' + +% In the process, our theoretical work brings clarity to a confusing set of empirical results in prior research. + +%Discussing this seemingly contrasting finding motivates future investigations into how competitive or mutualistic ecological communities form and why some environments for online groups are competitive or mutualistic. + +% This method builds on a popular approach in biology that provides robust inferences about networks of ecological relationships. , analysis of stability, forecasts of future participation, and can scale to analyze systems of dozens of related communities. We apply this approach to four datasets. + +% We validate our method using simulated data to show that it can identify a full range of ecological relationships and conduct a series of three case studies of groups hosted on the platform Reddit in \textsection \ref{sec:case.studies}. Although limited, these case studies make a third contribution in the form of empirical findings that suggest that specific patterns of relationships vary substantially across networks of groups and that mutualism appears to be much more common than competition. + +\section{Related Work} +\label{sec:related.work} + +% One sentence on "timeliness." Find citations (Chowdry, Benkler, +Online groups are important sites for social support \citep{de_choudhury_mental_2014}, entertainment \citep{ducheneaut_alone_2006}, information sharing \citep{benkler_wealth_2006}, and political mobilization of disinformation campaigns and protest movements \citep{choudhury_social_2016, benkler_social_2013, krafft_disinformation_2020}. +% knowledge of the ecosystem of online groups is important for advancing social science and informing future designs to support and manage online groups. +Although an online group's ability to achieve its goals depends on attracting and retaining contributors, few develop a sizable group of participants \citep{benkler_wealth_2006, dimaggio_social_2001, johnson_emergence_2014, koh_encouraging_2007, kraut_role_2014}. Many attempts to explain the success and growth of online groups look to properties of individual groups like characteristics of founders \citep{kraut_role_2014}, language use \citep{danescu-niculescu-mizil_no_2013}, turnover \citep{dabbish_fresh_2012}, and designs for regulating behavior \citep{halfaker_rise_2013, teblunthuis_revisiting_2018}. + +Recent research suggests that interdependence among online groups is also important to explain success and failure \citep{cunha_are_2019, kairam_life_2012, tan_all_2015, tan_tracing_2018}. +For example, banning hate subreddits reduced hate speech in related subreddits \citep{chandrasekharan_you_2017}. In a very different context, there is evidence that Reddit and Stack Overflow receive substantial benefits from activity on Wikipedia \citep{vincent_examining_2018}. +% ; and editors make valuable and qualitatively different contributions across different languages of Wikipedia \cite{hale_cross-language_2015}. In addition, growth trajectories of online groups initially about similar topics can diverge \cite{zhang_understanding_2021}. +Our work contributes to this literature by providing a new conceptual lens and statistical method for studying competition and mutualism between online groups. + +% , which theorizes how online groups depend on distinct types of resources. +% As we discuss in §\ref{sec:rdp}, the nature of these resources makes possible conditions for mutualism or competition. In §\ref{sec:ecology_background}, we explain how prior ecological studies of online groups extended RDT to consider how overlapping resources between communities can drive competition and mutualism and propose our first hypothesis which replicates part of these studies in Reddit, our empirical context. Finally, in §\ref{sec:community_ecology}, we draw anew from biology and organizational ecology to present our community ecology approach and propose hypotheses to validate its usefulness for predicting the growth of online groups. + +\subsection{Online Groups Depend on Resources} +\label{sec:rdp} + +Like prior ecological research in social computing and information systems, we build on resource dependence theory (RDT) \citep{butler_membership_2001, wang_impact_2012}. +\citet{butler_membership_2001} introduces +RDT to argue that growth in online groups is driven by positive feedback as participants contribute resources such as content, information, attention, or social interactions, which motivate further contributions by subsequent participants. That said, online groups do not grow forever and RDT explains that growth is self-limiting because costs of participation increase in larger groups \citep{butler_membership_2001, butler_attraction-selection-attrition_2014}. + + +% While growth far from the only criteria of success for an online group, much social computing research follows RDT by seeking to support groups' growth and survival through the attraction or retention of members \cite{koh_encouraging_2007, kraut_role_2014, cunha_are_2019}. + +% For example, explanations of Wikipedia's transition from growth to decline structures for quality assurance in a growing project that constituted barriers to newcomer participation \cite{halfaker_rise_2013, teblunthuis_revisiting_2018} spawned significant interest in designs for increasing newcomer retention that have met with limited success \citep[e.g.][]{halfaker_snuggle:_2014, morgan_tea_2013, narayan_wikipedia_2017}. Social structures like leadership, organizational practices, network structure, and design decisions can lower costs and increase benefits of participation \cite{butler_membership_2001, kraut_role_2014, tsugawa_impact_2019}. + + +%TODO: incorporate the below citations to "demonstrate that this is of importance to the social computing audience"" Also cite Charlie's paper about cross-platform interdependence + +%We review this foundational work in §\ref{sec:resource_dep} and then narrow our focus to prior ecological studies and other empirical work about interdependence between online groups in §\ref{sec:ecology_background}. Then, in §\ref{sec:community_ecology} we review sociological research developing community ecology theory and apply it to online groups. + +% It also builds closely on two bodies of ecological theory: first, explanations from population ecology that describe entities as sharing resources in environments and second, explanations from community ecology that theorize networks of specific community relationships. +% In our background we introduce the first two bodies of related work in sections \ref{sec:resource_dep} and \ref{sec:ecology_background}. + + % Frame around the dependent variable: + + % Explaining participation is important because + % 1. It's a longstanding concern of the field + % 2. Online Groups are important to society + % models + % ranging from entertainment, information exchange, social interaction, to the collaborative production of knowledge and organization of collective action + + +% This positive feedback between the value of prior contributions and the motivation for future contributions drives community growth. +% Think about the implications of our findings for the rival vs nonrival resources that could be in play. + +% Maybe try to deepen the discussion of resource competition, or maybe its better to avoid getting dragged into this. + +Ecological approaches recognize that interrelated online groups may share resources with one another in ways that constrain their growth and survival. \textit{Rival} resources like participants' time, attention, and efforts raise the possibility of competition because they become unavailable to others when used by one group \citep{benkler_wealth_2006, kubiszewski_production_2010, ostrom_public_1977,romer_endogenous_1990}. RDT suggests that declines in online participation can be explained in terms of competition over important rival resources \citep{wang_impact_2012}. +% Online participation in general has opportunity costs and may compete with alternatives like sleep, entertainment, or work \cite{becker_theory_1965, butler_attraction-selection-attrition_2014}. +% So online groups that provide similar benefits may be the most likely competitors because once someone has obtained satisfying benefits from one group they may go offline or switch to another activity instead of seeking similar benefits from competitor groups.\footnote{Economists refer to these as ``substitutes.' } + +% providing the same benefits at lesser costs might be a compelling alternative. +% If different online groups can substitute for participation in one another and participation is rival this will lead to competition between the communities and decrease participation in both. +% Public goods are nonrival because their usefulness is not diminished when others use them. + +On the other hand, online groups also rely on \textit{nonrival} resources. They can even produce connective and communal public goods like opportunities to communicate or collections of information \citep{fulk_connective_1996} which can be ``antirival'' when their usefulness increases as a result of others using them \citep{kubiszewski_production_2010, weber_political_2000}. For example, the usefulness of a communication network increases as more people join it \citep{fulk_connective_1996, katz_network_1985}. Similarly, the usefulness of an information good can increase as more people come to know, refer to, and depend upon it \citep{kubiszewski_production_2010, weber_political_2000}. +% as when +%Awareness that an online group provides an audience can motivate participation \cite{zhang_group_2011}. +If multiple online groups help build the same connective or communal public goods, they may form mutualistic interactions where contributions to one group may ``spill over'' and motivate participation in mutualist groups \citep{zhu_impact_2014}. +Ecological approaches seek to understand how different types of resources will limit or promote growth. +% as was demonstrated when Chinese government blocked the Chinese language edition of Wikipedia, unblocked contributors decreased their participation +% + + +%As a result, researchers, designers, and managers of online communities often set aside thorny questions of interdependence between online communities. +%While extensions of the resource dependence framework recognize the importance of exit from online communities \cite{butler_attraction-selection-attrition_2014}, they do not say where people go when they leave. % Before turning to our theory of community ecology, we note differences between ecological theory and analysis in organization and biological science from other uses of the term ecology in HCI and social computing. +% The term ``ecology'' often connotes interconnectedness, complexity, growth, and nature, and also crises of resource sustainability, loss, and extinction \cite{worster_natures_1994, blevis_ecological_2015}. Most references technologists make to ``ecology'' +% For example Nardi and O'Day invoke the ecological metaphor in describing their vision for individuals to cultivate intentional and localized relationships with technology \cite{nardi_information_2000, bowker_bonnie_2001}. +% This continues a long-running intellectual exchange between social and biological sciences. Economic thought was strongly influenced by Darwinian evolution and ecologists in biology were influenced by economic models to understand and solve problems in forestry and conservation \cite{kropotkin_mutual_2012, worster_natures_1994}. Once modern ecological science was developed it was not long before it was applied to understand human societies \cite[e.g.][]{park_human_1936, hawley_human_1986}. Because theories of organizational ecology were crafted to address particular concerns in organization science and are laden with assumptions appropriate to traditional firms with fixed and durable boundaries, our ecological approach also draws from biology. + +% TODO This section needs a number of new concrete examples. Revisit the ecological literature as well. Also perhaps add some examples from the interview paper (which we'll cite and anonymize). +\subsection{Population Ecology, Density Dependence and Overlapping Resources} +\label{sec:ecology_background} + +% Our theoretical approach draws from ecology. +While this paper focuses on the ecological study of online groups, other social computing and HCI scholars have used the term ``ecology'' (and related concepts like ``ecoystem'' and ``environment'') to denote an assemblage of sites, devices, or platforms \citep{nardi_information_1999,wang_coming_2015}. We use the term more narrowly to refer to conceptual and mathematical models of ecological dynamics. +In particular, our work builds on a tradition rooted in \textit{organizational ecology}. First developed in the late 1970s by sociologists studying interactions between firms, organizational ecology was inspired by, and has drawn closely from, ecological studies in biology \citep{hannan_population_1977}. + +Because online groups bear similarities to traditional organizations, organizational ecology provides a compelling theoretical framework for understanding interdependence among online groups. It has inspired at least three high-quality empirical studies of how resources shared by online groups shared shape their growth, decline, or survival \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. +These studies draw from the \textit{population ecology} strand of organizational ecology +%, while we introduce \textit{community ecology} as an alternative. +that studies ecological dynamics within a population of groups. In organizational ecology, populations have been defined as sets of organizations sharing an organizational industry or business model \citep{hannan_organizational_1989}. In social computing, populations have been defined as online groups sharing a given social media platform \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. + +While population ecology involves several distinct theoretical propositions, \textit{density dependence theory} (DDT) is perhaps the most prominent and is the subject of all three prior ecological studies of online groups \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. DDT models competitive or mutualistic forces in a population of groups as a function of \textit{density} which, in the earliest and most influential studies of DDT, is simply the size of the population. In this way, DDT assumes that every group in the population is facing the same competitive and mutualistic pressures \citep{aldrich_organizations_2006}. +However, online groups sharing a platform have diverse topics \citep{kairam_life_2012}, norms \citep{chandrasekharan_internets_2018, fiesler_reddit_2018}, and user bases \citep{tan_all_2015}. Because groups sharing few resources are unlikely to be strongly interdependent, ecological studies of online groups have modeled density dependence based on the concept of \emph{overlap density} \citep{baum_ecological_2006, dobrev_dynamics_2001, wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. Rather than the number of groups that exist in a population, overlap density measures the extent to which an one group's members or topics overlap with all other groups'. Overlap density thus characterizes a group's \emph{niche} or local \emph{resource environment} defined by its distinctive topic and membership. + + +%Unlike \citet{datta_identifying_2017}, we do not divide user frequency by the number of subreddits where the user appears because we do not wish to assume that users who comment in many subreddits are less ecologically important. + +%Overlap density is thus not a property of a population of groups, but a property of the resource environment a particular group faces. + + +% While foundational studies of density dependence in organizational research measu +% red density and growth at the population level, ecological studies of online groups .\footnote{Although it is less common in organizational research, overlap density has also been used by some organizational ecologists \cite[e.g.][]{dobrev_dynamics_2001}.} +% Are this paragraph and the next one necessary or just confusing? +DDT proposes a model for the growth of organizational populations that has a similar structure to \citet{butler_membership_2001} RDT model for the growth of online groups. +In DDT, mutualism is the engine of positive feedback driving population growth. Organizational ecologists show how successful organizations in an emerging industry develop nonrival resources like the legitimacy of a business model or industrial know-how that attract new organizations to enter the market \citep{carroll_density_1989,hannan_organizational_1989}. Similarly, a population of online groups, such as those sharing a platform, may grow in size as their platform gains in popularity, as established groups spin off new ones, and as useful knowledge develops that can be shared between groups \citep{tan_tracing_2018, zhu_impact_2014}. + + +% TODO add a footnote to show the analytical equivalence between the models and connection to Malthus. +In RDT, growth of online groups is self-limiting because of the challenges in managing large groups \citep{butler_membership_2001}. In DDT, competition among population members over rival resources limits growth \citep{hannan_organizational_1989}. DDT thus proposes a trade-off in which low density reflects limited opportunities for mutualistic contributions of nonrival resources like legitimacy, connectivity, and knowledge, but high density reflects competition over rival resources. +Therefore, DDT predicts that the relationship between density and positive outcomes like growth or survival is $\cap$-shaped (inverse-U-shaped) \citep{baum_ecological_2006, carroll_density_1989}. + +% Save the potential conflict between RDT and DDT for the discussion +% An individual online group's growth may be limited by the ability of their social structures to scale to include more members \citep{butler_membership_2001} or due to competition with other groups over members \citep{hannan_organizational_1989}. + +%In a homogenous population or in cases where litt +%Population ecologists have used a number of definitions of population, but they often refer to sets of organizations having the same organizational form or business model. + +%This is because many environments present a trade-off between mutualism and competition: mutualistic forces are stronger when density is low and competitive forces are stronger when density is higher. The intuition is that low-density environments reflect poor environmental conditions for success---if conditions were good then they would attract more growing communities hence be more dense. On the other hand, high-density environments are thought to become crowded and competitive \citepp{hannan_organizational_1989}. + +Tests of DDT in populations of online groups yield inconsistent results. In \citet{wang_impact_2012}, user overlap in Usenet newsgroups is associated with decreasing numbers of participants. Similarly, \citet{teblunthuis_population_2020} find that topical overlaps between online petitions are negatively associated with participation. By contrast, \citet{zhu_impact_2014} find that membership overlap is positively associated with increasing survival of new Wikia wikis. Only \citet{zhu_selecting_2014} find support for the $\cap$-shaped relationship predicted by DDT in an enterprise social media platform. + +In Study A, we provide a test of DDT using data from Reddit. The classical logic of DDT appears reasonable in the context of Reddit because low overlap density is likely to reflect an impoverished environment lacking in non-rival resources like skills and knowledge of experienced users, while a group with high overlap is likely to face competition over its members \citep{zhu_selecting_2014, zhu_impact_2014}: +\textit{(\textbf{H1}) The relationship between overlap density and the growth of online groups is $\cap$-shaped (inverse-U-shaped).} +% such as the + +%DDT sees competition and mutualism as environmental properties of an online group's niche. + +DDT proposes that very high levels of density will decrease growth because of increasing forces of competition within a niche. However, to conclude that groups with the greatest membership overlap are likely competitors would be to commit a well-known statistical fallacy +% (the term ecological fallacy does not refer to theories of population or community ecology, but rather to ``ecological correlations,'' meaning correlations involving aggregates) +\citep{piantadosi_ecological_1988, robinson_ecological_1950}. +The density of a group's environment suggests that it faces competition or mutualism, but it does not tell us which overlapping communities are competitors and which are mutualists. +% DDT therefore relates resource overlaps to the growth of online groups, yet stops short of inferring competitive or mutualistic interactions among them. It does not provide a way of learning when and why groups are mutualists or competitors and this limits its ability to inform designs that take these interactions into account. +Community ecology overcomes this limitation of DDT. + +\subsection{Introducing Community Ecology \label{sec:community_ecology}} + +Perhaps the most natural way to understand the distinction between population ecology and community ecology is in where they believe ecological dynamics like competition and mutualism play out \citep{astley_two_1985}. While population ecology locates competition and mutualism within an environmental niche, community ecology locates competition and mutualism in networks of interdependent groups called \emph{ecological communities} \citep{aldrich_organizations_2006}. In organizational ecology, this can mean studying interactions between different organizational populations \citep[e.g.][]{sorensen_recruitment-based_2004, mcpherson_ecology_1983}, or networks of interactions between organizations \citep[e.g][]{powell_network_2005, margolin_normative_2012}. +%Doing so makes visible the distinctive roles that particular groups play. +While varying conceptions of community ecology are found in the organizational ecology literature \citep{freeman_community_2006}, the approach we describe is identical in structure to that taken by \citet{aldrich_organizations_2006} and \citet{hawley_human_1986}. + +Community ecology focuses on \emph{ecological interactions} \citep{aldrich_organizations_2006}. +%In organizational ecology, these interactions are referred to as ``commensal relationships.'' However, biologists use the term ``commensal'' quite differently to mean an unreciprocated mutualistic interaction in which one species provides benefits to another while being unaffected by it. While for the most part, we draw our conceptions and terminology from organizational ecology rather than biology, the use of the term ``commensalism'' in organizational ecology can be confusing. We therefore adopt the term ``ecological interaction.'' +Ecological interactions can be mutualistic when one group has a positive influence on the second such that growth in the first group leads to growth in the second. They can also be competitive if one group has a negative effect on the second such that growth in the first group leads to decline in the second. Ecological interactions can be reciprocated if mutualism (or competition) from one group to another group is returned in kind. An ecological interaction can also be mutualistic in one direction and competitive in the other. The competitive or mutualistic interactions in an ecological community are quantified by the \emph{community matrix}, a central analytical object in community ecology in both biology and organization science \citep{verhoef_community_2010, novak_characterizing_2016, aldrich_organizations_2006}. + +In Study B, we demonstrate community ecology by inferring networks of ecological interactions in ecological communities on Reddit. Because our understanding of community ecology theory does not suggest hypotheses about what we will find, we conduct an exploratory data analysis to determine whether mutualism or competition among subreddits is more common on Reddit and present case studies illustrating the types of ecological communities we identify. + +%So a commensal relationship exists between each pair of groups in an ecological community. + +% There are six possible ecological interactions as described in Table \ref{tab:interaction.types}. Note that they can be reciprocal (as in full mutualism and competition) or not (as in partial mutualism and competition). In our framework ``predation'' is an interaction that is positive in one direction but negative in the other. It is also possible that growth or decline in the first group has no effect on the second group, and visa-versa, a situation termed ``neutrality.'' + + +% \begin{table} +% \caption{The five possible ecological interactions between two online groups. Values in the column ``i $\rightarrow$ j'' represent the sign of $\phi_{i,j}$ group i's effect on group j. Based on table 11.1 from \citet{aldrich_organizations_2006}.} +% \centering +% \begin{tabular}{c|c|c} +% i $\rightarrow$ j ($\phi_{i,j}$)& i $\rightarrow$ j ($\phi_{i,j}$) & Interaction type \\ \hline +% $+$ & $+$ & Full mutualism \\ +% $+$ & $\cdot$ & Partial mutualism \\ +% $+$ & $-$ & Predation \\ +% $-$ & $\cdot$ & Partial competition \\ +% $-$ & $-$ & Full competition \\ +% $\cdot$ & $\cdot$ & Neutrality +% \end{tabular} +% \label{tab:interaction.types} +% \end{table} + +% by conceiving of community ecology as the study of relationships between different groups. + +% Relationships studied in community ecology are defined by how they , but they are also important because networks of relationships +%and give rise to higher-order properties like stability. + +%Our community ecology approach instead focus on relationships between communities from overlap density approaches to focuses on relationships between communities as a step toward solving the puzzle. + +%Consider the example of how \citet{zhu_impact_2014} find membership overlap is associated with increasing survival of new Wikia wikis, but in \citepos{wang_impact_2012} study of Usenet groups user overlaps are associated with decreasing group sizes. + +% Consider cutting this since we don't look at any other factors + +%study period, and they found a stronger relationship when overlapping members were from more established groups. Perhaps the growth Wikia wikis was limited by knowledge of how to build a Wiki which was provided by more experienced users and user overlaps were correlated with access to such knowledge. While + + +% What's the point of these three paragraphs? +\subsection{Predicting Growth} + +In Study C we build upon our analyses from Study A and Study B by testing whether community ecology can explain the growth and decline of online groups in ways that population ecology can not. We do this by analyzing in two different ways whether accounting for ecological interactions helps predict future group sizes. +% We expect it to do so because resource overlaps as modeled by DDT may be a poor proxy for the degree to which a group's environment is competitive or mutualistic. +In general, competition for overlapping resources will have no effect on group growth if something besides the overlapping resource limits growth \citep{verhoef_community_2010}. For example, two wikis might share a large number of contributors (they have high user overlap), but their growth might be limited by a lack of core contributors who perform important administrative tasks like policy making and software administration \citep{zhu_impact_2014}. Community ecology relaxes the assumption that competition and mutualism are caused by user overlap density and instead seeks to infer these relationships from data. We test the importance of this conceptual shift for predicting growth by testing two hypotheses. The first uses a model comparison approach to test if adding a measure of ecological interactions to the density dependence model in Study A improves prediction of growth: (\textit{\textbf{H2}) A model with ecological interactions and density dependence predicts growth in online groups better than density dependence alone.} + +Support for H2 may be a relatively low bar for assessing whether ecological interactions are important factors shaping the growth of online groups because of confounding moderator or mediator variables related to the occurrence of ecological interactions. +% For example, suppose mutualistic interactions were correlated with declining ecological communities. +Therefore, we also use a time series forecasting approach to test whether modeling ecological interactions is useful for making time series forecasts of participation in online groups: +%We seek to demonstrate in whether including commensal relationships in time series forecasting models improves forecasting performance. +(\textit{\textbf{H3}) The addition of ecological interactions to a baseline time series model improves the forecasting performance.} +While this does not directly compare population ecology and community ecology, it validates that ecological interactions are important. + +%With commensalism, we can seek to explain the puzzling results of resource overlap studies by exploring our second research question:\noindent \textbf{RQ2: How are degrees of user overlap and types of commensal relationships related?} + +% This paragraph isn't helping very much +% Ecological dynamics play out through the network of such relationships over time as represented by the \emph{community matrix}, $\Phi$. + + +% Analysis of the community matrix can reveal indirect relationships between groups and properties of an ecological community like stability \cite{ives_estimating_2003}. +%Seeing interdependence between online groups through a community ecology-based network of dynamical relationships can make visible special roles that particular groups play in an ecological community through their many mutualistic or competitive relationships. + +% Next we take a first methodological step toward answering questions like these by adapting vector autoregression models from biology and macroeconomics as an approach to inferring community matrices. We then apply our approach in three case studies of related groups hosted on Reddit to reveal three qualitatively different ecological communities. + +%% SOME BIKERACK RAISING MORE ISSUES WITH THE NICHE OVERLAP APPROACH + +% study online groups additionally shifts from an analogy of online communities as individual members of a biological species to online communities as species themselves and seeking to understand functional relationships between different online groups. +% Yet a closer examination of the analogy to density-dependence in organizational or biological populations reveals conceptual awkwardness. At issue is the referent of the term ``niche.'' Should we use ``niche'' to refer to a set of resources that an online community can utilize? This is what ``niche'' means in both overlap density and in our version of community ecology. + +% Social exposure is also important, but we don't deal with that in this . The idea here is that the cost-benefit structure depends on alternatives which can lower costs or . +%VAR analysis can quantify the stability of the system and affords exploration of counterfactual forecasts to simulate hypothetical interventions \citep{ives_estimating_2003}. + + +\section{Materials \& Methods} +\label{sec:methods} + + + +% The presentation of our materials and methods is organized as follows: First we introduce the methods and measures for Study A, beginning with +% \emph{user overlap} %(§\ref{sec:mes.overlap}) +% which is aggregated into \emph{overlap density} %(§\ref{sec:mes.density}) +% to predict subreddit \textit{growth} %(§\ref{sec:mes.growth}) +% in a loglinear regression model. Then, for Study B, we present +% our clustering procedure for identifying ecological communities % (§\ref{sec:clustering}) +% on which we fit VAR models % (§\ref{sec:var}) +% predicting \emph{group size}. % (§\ref{sec:mes.group.size}). +% To explore the types of ecological communities found on Reddit, we derive two measures from these models for each cluster: \emph{average ecological interaction} +%(§\ref{sec:mes.avg.mut}) +% which quantifies the degree of competition and mutualism in the ecological community and \emph{ecological interaction strength} %(§\ref{sec:mes.abs.int}) % which quantifies its overall intensity of ecological interactions. Next, we draw competition-mutualism networks in example ecological communities based on interpreting the VAR models using impulse response functions (IRFs) %(§\ref{sec:mes.irf}). +% Then, in Study C, we test H2 to compare community ecology and density dependence theory by adding \emph{subreddit average mutualism} %(§\ref{sec:mes.sub.mut}) +% to the model from Study A. Finally, we test H3 by evaluating whether including ecological interactions in the VAR models improves time series forecasting. % (§\ref{sec:mes.forecasting}). + +\subsection{Data} + +Our data are drawn from the publicly available Pushshift archive of Reddit submissions and comments which we obtained from December 5\textsuperscript{th} 2005 to April 13\textsuperscript{th} 2020 +\citet{baumgartner_pushshift_2020}. Within this dataset, we limit our analysis to submissions and comments from the 10,000 subreddits with the highest number of comments. There are 702 subreddits larger than the smallest subreddit included in our dataset having a majority of submissions marked ``NSFW,'' which typically indicates pornographic material. As others have done in large-scale studies of Reddit \citep[e.g.,][]{datta_identifying_2017}, we exclude these subreddits to avoid asking members of our research team to inspect clusters including pornography. The top 10,000 subreddits provide a sufficiently large number of ecological communities for our statistical analysis. + +\subsection{Study A: Density Dependence Theory} % and Community Ecology} +\label{methods:density} + + +\subsubsection{User overlap \nopunct} \label{sec:mes.overlap} + $o_{i,j}$ quantifies the degree to which two subreddits ($i$ and $j$) share users. + %From it we construct clusters of related groups in §\ref{sec:clustering} and quantify overlap density in §\ref{sec:mes.density}. +\citet{zhu_impact_2014} and \citet{wang_impact_2012} both measure user overlap between two groups by counting the number of users contributing to both groups at least once and exclude users who appear in more than 10 groups. In our preliminary analysis, we found that this measure led to similarity measures and clusters with poor face validity. These issues may have stemmed from how Reddit users often peripherally participate in many groups while participating heavily in few \citep{tan_all_2015, hamilton_loyalty_2017, zhang_community_2017}. Therefore, our measure of user overlap follows \citet{datta_identifying_2017} by using the number of comments each user makes in each pair of groups. + +To measure user overlap between subreddits, we first build user frequency vectors by counting the number of times each user comments in each subreddit. We prevent giving undue weight to subreddits with higher overall activity levels by normalizing the comment counts for each subreddit by the maximum number of comments by a single author in the subreddit: + +\begin{equation} + f_{u,j} = \frac{n_{\mathrm{u,j}}}{max_{v\in\mathrm{J}}n_{v,j}} \label{eq:user.frequency} +\end{equation} + +\noindent where $n_{u,j}$, the user frequency, is the number of times that user $u$ authors a comment in subreddit $j$. + +This results in a user frequency vector $F_j$ for each subreddit that is sparse and high-dimensional, having one element for each user account that comments in any subreddit in our dataset. +% In the course of developing our clustering analysis described in §\ref{sec:clustering}, we found that following an approach analogous to latent semantic analysis (LSA) improved the quality of our clusters. +Next, we use LSA to reduce the dimensionality of the user frequency vectors. +LSA is based on the singular value decomposition and is common in natural language processing and information retrieval. LSA preserves subreddit similarities while removing noise and dealing with sparsity \citep{dumais_latent_2004}: + +\begin{align} + \mathbf{F} &= \mathbf{U \Sigma V}^T \\ \nonumber + \widetilde{F_{j}} &= \mathbf{U_k}^TF_j \label{eq:user.frequency.svd} +\end{align} + +\noindent $\mathbf{F}$ is the matrix where columns are author frequency vectors $F_j$ and $\mathbf{U \Sigma V}^T$ is its singular value decomposition. Truncating the singular value decomposition to use only the first $k$ left-singular vectors gives $\mathbf{U_k}$. Left-multiplying a subreddit's author frequency vector by $\mathbf{U_k}$ transforms the high-dimensional author frequencies into $\widetilde{F_j}$, their approximation in the $k$-dimensional space. +% We choose $k=600$ in the course of our grid search for a good clustering described below in §\ref{sec:clustering}. + +%clustering with a high silhouette coefficient. + +We then obtain our measure of \textit{user overlap} by taking the cosine similarities between the resulting vectors for a pair of subreddits: +\begin{equation} + o_{i,j} = \frac{\widetilde{F_{j}} \cdot \widetilde{F_{i}}} {\norm{\widetilde{F_i}} \norm{\widetilde{F_j}}} \label{eq:user.overlap} +\end{equation} + +\noindent where $\norm{\widetilde{F_i}} = \sqrt{\sum_{x=1}^k \widetilde{f_{x,i}}^2}$ is the euclidean norm of the transformed user frequencies for subreddit $i$. + + + + +%We use the following methods and measures in our tests of our hypothesis that the relationship between user overlap density the growth of online groups is $\cap$-shaped (H1) and our hypothesis that accounting for ecological interactions will help explain growth beyond overlap density (H2): + +% We measure \emph{overlap density} and \emph{growth} to and . To test \textit{\textbf{H2}}, we add the overall influence of ecological interactions on a subreddit + +\subsubsection{Growth\nopunct}\label{sec:mes.growth} is the dependent variable in our density dependence model testing H1 and is also used in our test of H2 as part of Study B. Growth is measured as the change in the (log-transformed) size of a subreddit over the final 24 weeks of our data, from to November 4\textsuperscript{th} 2019 to April 13\textsuperscript{th} 2020. + +\subsubsection{Overlap density\nopunct} \label{sec:mes.density} $d_i$ is the normalized average user overlap for a given subreddit. It is the independent variable in our density dependence model testing H1: + +\begin{align}\label{eq:user.overlap.density} + d^*_{i} &= \frac{1}{\left|S\right|-1} \sum_{j\in R;j\ne i} \mathrm{o}_{i,j} \nonumber \\ + d_{i} &= \frac{d_i^*}{\mathrm{max}_j d_j^*} +\end{align} + +\noindent where $S$ is the set of groups in our dataset. + +\subsubsection{Regression model for H1} \label{sec:reg.H1} +To test H1, we fit Model 1 % in Equation \ref{eq:M1} +which has first and second-order terms for overlap density to allow for a curvilinear relationship between \emph{overlap density} and \emph{growth}. +\begin{align} +\mathrm{Model~1} & & Y_i = B_0 + B_1 d_{i} + B_2 d^2_{i} \label{eq:M1} +\end{align} +\noindent where $Y_i$ is the growth of subreddit $i$ and $d_i$ is its overlap density. + + +\subsection{Study B: Introducing Community Ecology} + + +%Here we review the prior work on which we build our methodological approach to inferring competitive and mutualistic relationships between online groups. %\textsection \ref{sec:inferring} describes our own methodological contributions. + +\subsubsection{Clustering to identify ecological communities} +\label{sec:clustering} +Analyzing networks of ecological interactions is the key difference between community ecology and population ecology. +% In Study A we set out to survey the types of ecological communities found on Reddit to provide a comparison with a large-scale population ecology analysis. +% in \ref{sec:clustering} +%Here, we use a heuristic approach based on clustering algorithms to find ecological communities of online groups that all have high user overlap. +To identify ecological communities of related subreddits, we use a clustering procedure based on the user overlap measure described above in §\ref{sec:mes.overlap}. +We selected a clustering model using grid search to obtain a high silhouette coefficient \citep{rousseeuw_silhouettes_1987}. The silhouette coefficient captures the degree to which a clustering creates groups of subreddits with high within-cluster similarity. +% relative to similarity with subreddits in other clusters. + +Our description of our measure for user overlap in §\ref{sec:mes.overlap} does not explain how we choose the number of LSA dimensions $k$. +To do so, we ran the affinity propagation \citep{frey_clustering_2007}, HDBSCAN \citep{mcinnes_hdbscan_2017} and \textit{k}-means clustering algorithms and selected the algorithm, hyperparameters, and LSA dimensions $k$ that resulted in the clustering with a high silhouette coefficient having less than 5,000 isolated subreddits, and at least 50 clusters. We limit the number of isolated subreddits because some choices of hyperparamters for the HDBSCAN algorithm could improve the silhouette coefficient, but at the cost of greatly increasing numbers of isolated subreddits. Choosing a relatively high limit to the number of isolates helps ensure that our clusters contain highly related communities. We chose an HDBSCAN clustering with 731 clusters, 4964 isolated subreddits, $k=600$ LSI dimensions, and a silhouette score of 0.48. +We exclude the isolated subreddits from our analysis. More details about our clustering selection process are found in the online supplement. + + +%In order to test H2 and answer RQ1, we estimate the community matrix of commensal relationships between selected communities of online groups. +We evaluate the external validity of the chosen clustering using the purity evaluation criterion \citep{manning_introduction_2018} +% : +% \begin{equation}45 +% \mathrm{Purity}=\frac{1}{N}\sum_{m\in M}\max_{d\in D}{|m \cap d|} +% \end{equation} +% \noindent Where $N$ is the number of clusters $M$, $D$ are ``true'' classes to which subreddits might belong and $max_{d\in D}|m \cap d|$ is the greatest number of subreddits in cluster $m$ that belong to the same class $d$. +To do so, an undergraduate research assistant examined a random sample of 100 clusters including 744 subreddits. By visiting the subreddits and using her own judgment, the assistant flagged subreddits that did not seem like a good fit for their assigned cluster. Using these labels and excluding 25 subreddits that have been deleted, made private, or banned, we calculated the purity of our clustering as 0.92. This means that we believe that 92\% of subreddits belong to their assigned cluster. +% Note that although we clustered subreddits based on user overlap, we obtain a high purity score based on a subjective evaluation of the subreddits' contents. + +%\subsection{Inferring Mutualistic and Competitive Interactions} + +% We find f(N.clusters) clusters and f(N.isolates) isolated subreddits. The median cluster has median.cluster.size subreddits and the largest cluster has + + +\subsubsection{Group size\nopunct} \label{sec:mes.group.size} is the dependent variable of the models we use to infer ecological interactions. Measured as the number of distinct commenting users in a subreddit each week, group size quantifies the number of people who participate in a subreddit over time. Typical of social media participation data, group size is highly skewed. Therefore, we transform it by adding 1 and taking the natural logarithm. + + +% The following three paragraphs probably belong in the methods section, but I'm trying to satisfy the reviewers. +\subsubsection{Inferring ecological interactions using Vector Auto Regression} +\label{sec:var} + +The community matrix $\mathbf{\Phi}$ of ecological interactions can be inferred from time series data using vector autoregression models (VAR models). VAR models are a workhorse in biological ecology because VAR(1) models (i.e., VAR models with a single autoregressive term) have a close relationship with the Gompertz of population growth which is widely used in ecology \citep{ives_estimating_2003}. Even in the presence of unmodeled nonlinearities, VAR(1) models can reliably identify competition or mutualism in empirically realistic scenarios \citep{certain_how_2018}. VAR models also been widely adopted in the social sciences, particularly in political science and in macroeconomics \citep{box-steffensmeier_time_2014}. + +% \citet{sims_macroeconomics_1980} advocated VAR modeling in macroeconomics to address a problem in the field as an alternative to structural equation modeling (SEM), which required detailed specification of a large number of theoretical assumptions to identify. +%similar to structural equation models but require fewer theoretical assumptions but are +%VAR models are flexible enough to model a wide range of systems so long as sufficiently long time-series data are available \citep{sims_macroeconomics_1980}. +VAR(1) models can be intuitively understood as a generalization of auto-regressive AR(1) models in time series analysis. But while AR(1) models predict the state of a single time series as a function of its previous value, VAR(1) models simultaneously predict multiple time series as a function of the values of every other variable in the system \citep{canova_var_2007, ives_estimating_2003}: + +\begin{equation}\label{eq:var1} +Y_t = B_0 + B_1t + \sum_{k \in K}A_k x_{k,t} + \sum_{j \in M}\Phi_{j} y_{j,t-1} + \epsilon_t +\end{equation} + +\noindent where $Y_t$ is a vector containing the sizes of a set of online groups ($M$) at time $t$. $B_0$ is the vector of intercept terms and $B_1$ is the vector of linear time trends ($b_{1,j}$) for each community ($j$). $\Phi_{j}$ represents the influence of $y_{j,t-1}$, the size of the $j^{\mathrm{th}}$ online group at time $t-1$ on $Y_t$. $\Phi_{j}$ is a column of $\mathbf{\Phi}$, a matrix of coefficients in which the diagonal elements correspond to intrinsic growth rates (marginal to the trend) for each online group and the off-diagonal elements are intergroup influences, and $\epsilon_t$ is the vector of error terms + +Additional time-dependent predictors ($x_{k,t}$) can be included in the vectors $X_{k}$ with coefficients $a_k$. Because subreddits are created at different times, growth trends must begin only after the subreddit is created. We use $X_{k}$ to introduce a counter-trend during the period prior to the creation of subreddits so that each group's growth trend begins in the period the group is created. For each group $j$ created at time $t^0_j$ we fill $X_{j}$ with the sequence $[1,2,3,\ldots\ ,t^0_j-1,0,0,0,\ldots\ ]$. In other words, $X_{j}$ adds a counter-trend only during the period prior to the first comment in subreddit $j$. We fix the elements $a_{j,i}$ of $A_j$ equal to 0 unless $i=j$, so the counter trend only influences subreddit $j$. This effectively sets $a_{j,j}$ approximately equal to $-b_{1,j}$. + +We fit VAR(1) models using ordinary least squares as implemented in the \texttt{vars} \texttt{R} package to predict the group size each week using over the history of each subreddit prior to November 4\textsuperscript{th} 2019 \citep{pfaff_var_2008}. We hold out 24 weeks of data for forecast evaluation and fit our models on the remainder. To ensure that sufficient data is available for fitting the models, we exclude 946 subreddits and 89 clusters having less than 156 weeks of activity. + +% where the cluster data lacks the necessary degrees of freedom to fit the model because the length of the training time series is less than the size of cluster plus 2. + + +% We hold out the weeks from fit.date to to.date for evalution. % Some of the clusters were too large or had too low levels of activity We include only We include a vector of intercept terms (to account for different equilibrium community sizes) and a vector of trends (to account for long-run endogenous growth) because we found that including these terms greatly improved the fit of our models to the data. Our VAR(1) models have this form in vector notation: + +%$$ Y_t = \Mu + \Phi_1 Y_{t-1} + \ldots + \Phi_p Y_{t-p} + \epsilon_t $$ +% TODO: avoid mixing matrix and vector notation. + +\subsubsection{Characterizing ecological communities} +\label{sec:characterizing.ecological.communities} + +In Study B, we interpret the community matrix $\mathbf{\Phi}$ as a directed network of ecological interactions, a \emph{competition-mutualism network} \citep{ives_estimating_2003}. Although the elements of $\mathbf{\Phi}$ correspond to direct associations between group sizes \citep{novak_characterizing_2016}, ecological interactions can also be indirect. Consider 3 one-directional interactions between three groups ($a$, $b$, $c$) such that growth in $a$ predicts decreased growth in $b$ ($\phi_{a,b} < 0$), growth in $b$ predicts decreased growth in $c$ ($\phi_{b,c} < 0$), but $a$ and $c$ do not directly interact ($\phi_{a,c} \approx 0$). + +This does not necessarily mean that groups A and C are independent. Rather, an exogenous increase in A predicts a decrease in B and thereby an eventual increase in C. Such indirect relationships are analyzed by using impulse response functions (IRFs) to interpret a VAR model \citep{box-steffensmeier_time_2014}. In large VAR models containing many groups, the great number of parameters can mean that few specific elements of $\mathbf{\Phi}$ will be statistically significant, even as many weak direct relationships can combine into statistically significant IRFs \citep{canova_var_2007}. + +\subsubsection{Average ecological interaction\nopunct} \label{sec:mes.avg.mut} $\overline{m}$ measures the extent to which an overall ecological community is mutualistic or competitive by taking the mean point estimate of the off-diagonal coefficients of $\mathbf{\Phi}$: + +\begin{equation}\label{eq:average.interaction} +\overline{m} = \frac{1}{\left|M\right| - 1} \sum_{i\in M} \sum_{j\in M;j\ne i} \phi_{i,j} +\end{equation} + +\noindent if $\overline{m} > 0$ then mutualistic interactions within the ecological community are stronger than competitive ones, and if $\overline{m} < 0$ then competitive interactions are stronger then mutualistic ones. + +\subsubsection{Ecological interaction strength\nopunct} \label{sec:mes.abs.int} $\kappa$ quantifies the overall strength of ecological interactions in an ecological community as the mean absolute value of the point estimates of the off-diagonal coefficients of $\mathbf{\Phi}$: + +\begin{equation}\label{eq:average.absolute.interaction} +\kappa = \frac{1}{\left|M\right| - 1} \sum_{i\in M} \sum_{j\in M;j\ne i} \left| \phi_{i,j} \right| +\end{equation} + +\noindent where $\left| \phi_{i,j} \right|$ is the absolute value of the coefficient $\phi_{i,j}$. + +Ecological communities of subreddits with overlapping users vary in both the overall strength of ecological interactions and in the overall degree of mutualism and competition between member groups. If an ecological community's average ecological interaction is positive, we say the ecological community is mutualistic. If it is negative, we say the ecological community is competitive. The average ecological interaction can be close to 0 in two ways. First, the ecological interaction strength can simply be low. Alternatively, the ecological community can have a mixture of competitive and mutualistic interactions that cancel one another out when averaged. % Such an ecological community can have high ecological interaction strength. + +\subsubsection{Impulse response functions\nopunct}\label{sec:mes.irf} (IRFs) of our VAR(1) models correspond to our visualizations of example competition-mutualism networks in §\ref{sec:case.studies}. An IRF predicts how much each group's size would change in response to a sudden increase in the size of each other group \citep{verhoef_community_2010}: + +\begin{equation} + \mathbf{\Theta_t} = \mathbf{\Theta_{t-1}}\mathbf{\Phi}, t = 1,2,... \label{eq:irf} +\end{equation} + +\noindent where $\mathbf{\Theta_t}$ is the impulse response function at time $t$. $\mathbf{\Theta_0}$ is an $M$-by-$M$ identity matrix so our impulses represent a log-unit increase of 1 to each group. $\mathbf{\Theta_t}$ is a matrix with elements $\theta^t_{i,j}$ corresponding to the response of group $j$ to the impulse of group $i$. We draw an edge $i \rightarrow j$ in the competition-mutualism network if the 95\% CI of $\theta^t_{i,j}$ does not include zero at any time $10>=t>0$. If $\theta^t_{i,j} >0 $, the edge indicates mutualism and if $\theta^t_{i,j} < 0$ the edge indicates competition.\footnote{In higher-order VAR($p$) models that use $p>1$ past observations as predictors $\theta^t_{i,j}$ can be less than 0 for some $t_a$ and greater than 0 for some $t_b$. However, this is not possible in the VAR(1) models we use.} We compute the IRFs with bootstrapped confidence intervals (CI) based on 1,000 samples using the \texttt{vars} \texttt{R} package. + + +% The community matrix $\Phi$ is interpretable as a network of commensal relationships \citep{ives_estimating_2003}. While the coefficients of $\mathbf{\Phi}$ correspond to direct associations between group sizes \cite{novak_characterizing_2016}, commensal relationships can also be indirect. Consider relationships between three groups (A, B, C) such that A partially competes with B and B partially competes with C but A and C have no direct relationship. A VAR(1) model inferring these relationships will have negative coefficients for $\phi_{AB}$ and $\phi_{BC}$ but $\phi_{AC}$ will be nearly zero. + +% TODO plot the examples on figure 1. + +%The central prediction of density dependence theory is that there will be a curviliear, inverse-U-shaped ($\cap$-shaped) relationship between overlap density and growth. + +\subsection{Study C: Predicting growth} + +\subsubsection{Average subreddit mutualism\nopunct}\label{sec:mes.sub.mut} $m_j$ is the independent variable for our test of H2 and measures the average influence of other subreddits in the ecological community on a given subreddit $j$, which we calculate by taking the mean of off-diagonal elements of row $j$ of the community matrix: + +\begin{equation}\label{eq:average.subreddit.mutualism} +m_j = \frac{1}{\left|M\right|-1}\sum_{i\in M;i\ne j} \phi_{i,j} +\end{equation} + +\noindent where $M$ is the set of subreddits in the ecological community and $\left|M\right|$ is the number of subreddits in $M$. We use the mean instead of the sum because different ecological communities have different numbers of subreddits. + +\subsubsection{Regression models for H2} We test H2 by using likelihood ratio tests to compare Model 1 % (above in \ref{sec:reg.H1}) +and Model 2 % in Equation \ref{eq:M2} +which adds \emph{average subreddit mutualism} ($m_i$) as a predictor. We also fit Model 3 % in Equation \ref{eq:M3} +which we compare to Model 2 to test if overlap density explains variation that average subreddit mutualism does not. + +\begin{align} +\mathrm{Model~2} & & Y_i &= B_0 + B_1 d_{i} + B_2 d^2_{i} + B_3 m_i \label{eq:M2} \\ +\mathrm{Model~3} & & Y_i &= B_0 + B_3 m_i \label{eq:M3} +\end{align} +\noindent where $Y_i$ is the growth of subreddit $i$, $d_i$ is its overlap density, $m_i$ is its average subreddit mutualism, and $B_0$, $B_1$, $B_2$, and $B_3$ are regression coefficients. + +\subsubsection{Forecasting growth using ecological interactions} +\label{sec:mes.forecasting} +To test H3, we evaluate whether modeling ecological interactions improves time series forecasting of future participation in online groups by comparing the model in Equation \ref{eq:var1} to a baseline model with off-diagonal elements of $\mathbf{\Phi}$ fixed to 0. This baseline model is equivalent to our VAR model, but excludes ecological interactions. + +We use two forecasting metrics with differing assumptions: root-mean-square-error (RMSE) and the continuous ranked probability score (CRPS). RMSE is commonly used, non-parametric, and intuitive, but does not take differing scales of the predicted variable or forecast uncertainty into account. Thus, in our setting it may place excessive weight on the forecasts of larger subreddits where errors may have greater magnitude simply because the absolute magnitude of the variance is greater. By rewarding forecasts where the true value has high probability under the predictive distribution, the CRPS accounts for variance in the data and rewards forecasts for both accuracy and precision and is thus a ``proper scoring rule'' for evaluating probabilistic forecasts \citep{gneiting_strictly_2007}. Our CRPS calculations assume that the predictive forecast distribution for each community is normal with standard deviations given by the 68.2\% forecast confidence interval. We calculate CRPS using the \texttt{scoringRules} \texttt{R} package \citep{jordan_evaluating_2019}. + +\section{Results} +\label{sec:results} + +% The organization of our results follows that of our methods. We begin with Study A % (§\ref{sec:res:studyA}) +% in which we find, as predicted by H1, that the relationship between overlap density and growth is $\cap$-shaped relationship. Then, in Study B,% (§\ref{sec:res.characterizing}) +% we explore a typology of ecological communities along two dimensions: (1) the degree to which a community is mutualistic or competitive, and (2) the overall strength of ecological interactions between the communities member groups. In the N.clusters ecological communities analyzed in our VAR(1) analysis, we find that mutualistic relationships are much more common than competitive ones. Our case studies % (§\ref{sec:case.studies}) +% illustrate the typology using 4 example ecological communities. Finally, in Study C, we do not find support for H2 %in §\ref{sec:res.likelihood.ratio.test} +% as adding average subreddit mutualism to the density dependence model does not improve growth prediction. But we do find, in support of H3, that ecological interactions improve forecasting performance in our time series models. + + + +\begin{figure*} + \centering + +\includegraphics[width=\linewidth]{figures/knitr-fig_densityxgrowth-1} + +\caption{Relationship between density and growth. A 2D histogram of subreddits with overlap density (log-transformed) on the X-axis and the change in the logarithm of the number of distinct commenting users on the Y-axis. The black line shows the marginal effect of overlap density on growth as predicted by Model 2. The gray region shows the 95\% confidence interval of the marginal effect. \label{fig:density}} +\end{figure*} + +% In §\ref{sec:ecology_background} we presented H1 before RQ1 but we report results for H1 in the same section as H2 since they refer to the same regression model. + +%We first present high-level findings that demonstrate advantages of our community ecology approach upon the overlap density approach. We find that accounting for commensal relationships in time-series models increases forecasting accuracy; that including subreddit average commensalism explains additional variation in subreddit over overlap density; and we compare the conclusions drawn density dependence analysis based on the correlation of overlap density and growth can lead about the ecological environment than our analysis modeling commensal relationships between groups. Finally, we examine the distribution of \emph{average commensalism} and \emph{average absolute commensalism} to illuminate a typology of ecological communities which we illustrate through + +\subsection{Study A: Density Dependence Theory} +\label{sec:res:studyA} + +%As discussed in §\ref{sec:ecology_background}, population ecology approaches in social computing propose that the relationship between overlap-density and growth/survival outcomes reflect an environment that may be competitive, mutualistic, or a mixture of both \citep{wang_impact_2012,zhu_impact_2014}. +We test the classical prediction of density dependence theory as formulated in H1 using Model 1 % (Equation \ref{eq:M1} in §\ref{methods:density}) +which has first- and second-order terms for the effect of overlap density on growth. As described in §\ref{sec:ecology_background}, H1 hypothesizes that overlap density will have a curvilinear $\cap$-shaped (inverse-U-shaped) relationship with growth indicated by a positive first-order regression coefficient and a negative second-order coefficient. + +\begin{table} + \centering + +% Table created by stargazer v.5.2.2 by Marek Hlavac, Harvard University. E-mail: hlavac at fas.harvard.edu +% Date and time: Thu, Jul 29, 2021 - 05:22:21 PM +\begin{tabular}{@{\extracolsep{5pt}}lccc} +\\[-1.8ex]\hline +\hline \\[-1.8ex] + & Model 1 & Model 2 & Model 3 \\ + Overlap density & 1.50$^{*}$ (0.26) & 1.50$^{*}$ (0.26) & \\ + Overlap density$^2$ & $-$2.08$^{*}$ (0.41) & $-$2.09$^{*}$ (0.41) & \\ + Average subreddit commensalism & & 0.12 (0.26) & 0.11 (0.26) \\ + Constant & $-$0.23$^{*}$ (0.03) & $-$0.23$^{*}$ (0.04) & $-$0.04$^{*}$ (0.01) \\ + \hline \\[-1.8ex] +Log Likelihood & -4970 & -4970 & -4986 \\ +Observations & 4,090 & 4,090 & 4,090 \\ +\hline +\hline \\[-1.8ex] +\textit{Note:} & \multicolumn{3}{r}{$^*$p$<0.01$} \\ +\end{tabular} + +\caption{Loglinear regression predicting subreddit growth as a function of overlap density. The model supports the prediction of density dependence theory of a $\cap$-shaped relationship between overlap density and growth. \label{tab:density}} +\end{table} + + +As predicted, we observe a $\cap$-shaped relationship between overlap density and growth. Figure \ref{fig:density} plots the marginal effects of overlap density on growth for the median subreddit laid over the data on which the model is fit. Table \ref{tab:density} shows regression coefficients for Models 1-3. For about half of subreddits, increasing overlap density is associated with higher growth rates. The point where increasing density ceases to predict increasing growth and begins to predict decreasing growth is at the 49\textsuperscript{th} percentile. +Prototypical subreddits at this overlap density grew slightly (95\% CI:[0.001,0.06]). Yet subreddits at the lower and upper extremes of overlap density slightly declined on average. Typical groups at the 20\textsuperscript{th} percentile of overlap density decline by 1.1 members (95\% CI:[-1.1,-1.15]) and typical groups at the 80\textsuperscript{th} percentile decline by 1.2 members (95\% CI:[-1.1,-1.28]). +While we find support for the classical theoretical prediction of a curvilinear, ($\cap$-shaped) relationship between overlap density and growth, this does not imply that relationships between highly overlapping communities are more competitive. +% Instead our results below % in §\ref{sec:res.characterizing} +% show that relationships in ecological communities of subreddits with high user overlaps are typically mutualistic. + + +\subsection{Study B: Introducing Community Ecology} +\label{sec:res.characterizing} + + + + +% describe the figure and the main takeaway +% As described in §\ref{sec:characterizing.ecological.communities}, an ecological community can have positive or negative average ecological interaction §\ref{sec:mes.avg.mut} indicating if it is competitive or mutualistic and ecological interaction strength §\ref{sec:mes.abs.int} provides a way to distinguish ecological communities with a mixture of competitive and mutualistic interactions from those where ecological interactions are weak. + +Figure \ref{fig:commense.x.abs.commense} visualizes the distribution of average ecological interaction and ecological interaction strength over the 641 ecological communities we identify. +We observe ecological communities characterized by strong forms of both mutualism and competition, others having mixtures of the two, and some with few significant ecological interactions. Mutualism is more common than competition, with the mean community having an average ecological interaction of 0.03 ($t=14.5$, $p<0.001$). We find that 524 clusters (81.7\%) are mutualistic. Not only are most ecological communities mutualistic, but more mutualistic ecological communities have greater ecological interaction strength (Spearman's $\rho=0.58$, $p<0.001$). +% Note that due to our clustering procedure, our analysis examines ecological interactions among subreddits with relatively high degrees of user overlap. +Therefore, our community ecology analysis suggests that among groups with similar users, mutualistic ecological interactions are more common than competitive ones. + +\begin{figure} + +\includegraphics[width=\linewidth]{figures/knitr-plot_commense_x_abs_commense-1} + +\caption{Two-dimensional histogram showing ecological communities on Reddit in our typology. The X-axis shows the overall degree of mutualism or competition in clusters of subreddits with high user overlap based on the average ecological interaction. The Y-axis shows the ecological interaction strength representing the overall magnitude of competition or mutualism.} +\label{fig:commense.x.abs.commense} +\end{figure} + + + +\subsubsection{Example ecological communities} +\label{sec:case.studies} + +We present four case studies to illustrate our typology of ecological communities of online groups. Figure \ref{fig:commense.x.abs.commense} shows that we find clusters of subreddits characterized by mutualism, competition, a mixture of mutualism and competition, and few ecological relationships at all. We select one case from each of these four types using our measures of average ecological interaction (§\ref{sec:mes.avg.mut}) and ecological interaction strength (§\ref{sec:mes.abs.int}). To allow for more interesting network structures, we draw our cases from the 367 large clusters having at least five subreddits. + +\input{resources/network-figures.tex} + +Figure \ref{fig:networks}, presents visualizations of competition-mutualism networks representing statistically significant impulse response functions as described in §\ref{sec:mes.irf}. During our analysis, we also examined the terms of the vector autoregression parameter $\mathbf{\Phi}$, the impulse response functions, and model fits and forecasts, all of which are available in our online supplement. We also visited each subreddit in the clusters and read their sidebars and top posts to support our brief qualitative descriptions. + +\subsubsection{Mutualism among mental health subreddits} + +% TODO, cite somebody on mental health. +To find a case characterized by mutualism, we selected the top 37 large clusters with the greatest average ecological interaction. From these, we arbitrarily chose one interesting ecological community, the \textit{mental health} cluster, which includes 11 subreddits for supporting people in struggles with mental health, addiction, and surviving abuse. +Constitutive subreddits include those focused on specific mental health diagnoses like \texttt{r\Slash bpd} (bipolar disorder) and \texttt{r\Slash cptsd} (complex post traumatic stress disorder) while others like \texttt{r\Slash survivorsofabuse} and \texttt{r\Slash adultsurvivors} +are support groups. + +The interactions among these subreddits are dense and primarily mutualistic as shown in Figure \ref{fig:mut.network}. There are a handful of competitive interactions like the reciprocal competition detected between \texttt{r\Slash codedependence} and \texttt{r\Slash bpd}. We also observe some interactions that are mutualistic in one direction and competitive in the other. For example, growth in \texttt{r\Slash addiction} predicts increasing growth in \texttt{r\Slash cptsd} even as that growth in \texttt{r\Slash cptsd} predicts decreasing growth in \texttt{r\Slash addiction}. This suggests a pattern in which \texttt{r\Slash cptsd} siphons members from \texttt{r\Slash addiction}. That said, the density of mutualistic interactions shown in Figure \ref{fig:mut.network} suggests that different subreddits have complementary roles in this ecological community as people turn to different types of groups for help with interrelated problems. While attempting to explain why different online groups form mutualistic or competitive interactions is left to future research, the example of mental health subreddits shows how groups with related topics and overlapping participants can have mutualistic interactions where growth in one predicts growth in many of the rest. + +\subsubsection{Competition among real estate and finance subreddits} + + +To find competitive clusters, we selected from the 36 large clusters with the lowest average ecological interaction an ecological community that we label \textit{finance}. Among the 6 subreddits in this cluster, \texttt{r\Slash realestateinvesting}, \texttt{r\Slash realestate} and \texttt{r\Slash commercialrealestate} all deal in different aspects of the real estate industry, while \texttt{r\Slash financialindependence} and \texttt{r\Slash fatfire} (the acronym ``fire'' means ``financial independence/retire early'') are focused on building wealth and becoming financially independent and \texttt{r\Slash financialplanning} is a general purpose subreddit for financial advice. + +In contrast to the mental health ecological community, the finance cluster has mostly competitive ties as visualized in Figure \ref{fig:comp.network}. The fact that even this cluster, among the most competitive in our data, contains a number of mutualistic ties reflects just how prevalent mutualism is among subreddits with high degrees of user overlap. That said, we detect three reciprocal competitive interactions among the three subreddits that focus on real estate. The edges from \texttt{r\Slash fatfire} to \texttt{r\Slash commercialrealestate} and \texttt{r\Slash financialindependence} are competitive as well. +Interestingly, all interactions between the general finance subreddits (\texttt{r\Slash financialplanning} and \texttt{r\Slash financialindependence}) and \texttt{r\Slash realestate} are mutualistic. +%Interestingly, are mutualistic. + +\subsubsection{Mixed interactions among timepiece subreddits} + +Next, we turn to an example of an ecological community with low average ecological interaction but high ecological interaction strength. +We first select the 36 %(10\%) +large clusters with the average ecological interaction closest to 0. To find an ecological community with a mixture of mutualism and competition, we select from the 15 clusters with the greatest ecological interaction strength from within this group and chose the \textit{timepiece} cluster containing 7 subreddits about watches. + +As shown in Figure \ref{fig:mixed.network}, the ecological community of timepiece subreddits is dense with ecological interactions (although not as dense as the mental health subreddits). We observe both reciprocated mutualistic interactions, like that between \texttt{r\Slash rolex} and \texttt{r\Slash gshock}, and competitive interactions like that between \texttt{r\Slash gshock} and \texttt{r\Slash seiko}. We also observe numerous unreciprocated competitive and mutualistic relationships like the mutualism between \texttt{r\Slash watchexchange} and \texttt{r\Slash watchcirclejerk}\footnote{The suffix is widely understood on Reddit to signify a jokey, meme, or satirical subreddit.} +and the competition between \texttt{r\Slash japanesewatches} and \texttt{r\Slash seiko}. +Though the average ecological interaction among these subreddits is near 0, our analysis reveals a complex ecological community with a mixture of competition and mutualism. + +\subsubsection{Sparse interactions among Call of Duty subreddits} + +To find a case where ecological interactions are weak, we return to the group of the 36 %(10\%) +large clusters with the average ecological interaction closest to 0 but select from the 15 clusters within this group with the lowest ecological interaction strength. From these, we chose the \textit{Call of Duty} cluster containing five groups about the popular military first-person shooter series of video games. + +% % more quotations +The Call of Duty ecological community is sparse, having only two significant ecological interactions among its 5 member groups. This ecological community includes subreddits about different editions of the series such as \texttt{r\Slash blackops3}, \texttt{r\Slash infinitewarfar} and \texttt{r\Slash wwii} as well as one about a popular spin-off zombie game \texttt{r\Slash codzombies} and the more general \texttt{r\Slash callofduty} subreddit. We find that growth in \texttt{r\Slash blackops3} or \texttt{r\Slash codzombies} predicts growth in \texttt{r\Slash infinitewarfare} and no other ecological interactions. + +The timepiece and Call of Duty ecological communities illustrate how subreddits with overlapping users can have relatively strong or weak forms of ecological interdependence. Although both clusters are characterized by high degrees of user overlap and low average ecological interaction, the timepiece cluster has a dense competition-mutualism network while the call of duty network is sparse. + +\subsection{Study C: Predicting Growth} +\label{sec:res.studyC} + +We now compare the environmental approach of population ecology with the relational approach of community ecology. +In Study B, we presented examples of diverse ecological communities among subreddits with overlapping members. However, the presence of this diversity this does not mean that ecological interactions are related to the growth of online groups, the key outcome of previous ecological studies. We therefore hypothesized that ecological interactions will improve the predictive performance of a density dependence model in H2. + +\subsubsection{Ecological interactions do not improve growth prediction} +\label{sec:res.likelihood.ratio.test} + +To test H2, we compare Model 1, our density dependence model having first- and second-order terms for overlap density, with Model 2, which also includes average subreddit mutualism (§\ref{sec:mes.sub.mut}) as a predictor. We also examine Model 3, in which the only predictor is average subreddit mutualism. Table \ref{tab:density} shows regression coefficients for our models. + +We do not observe a statistically significant association between average subreddit mutualism and growth ($B_3=0.12, SE=0.26$). +% We observe that average subreddit mutualism is positively associated with growth , which makes sense as subreddits with greater average subreddit mutualism benefit more from mutualism or are hurt less from competition. +Moreover, a likelihood ratio test comparing Model 1 and Model 2 does not support H2 as Model 2 does not predict subreddit growth better than Model 1 ($\chi^2 = 0.23$, $p>0.05$). +% Therefore, average subreddit mutualism does not help predict growth compared to the density dependence model alone. +Comparing Model 2 to Model 3 shows that overlap density explains variation that average subreddit mutualism does not ($\chi^2 = 33$, $p<0.001$). +%This suggests that the density of a subreddit's niche helps explain subreddit growth in important ways not captured by ecological interactions. +Overlap density helps explain a group's future growth, but the overall degree of mutualism or competition a group faces in its ecological community does not. +% In §\ref{sec:discussion}, we discuss how overlap density may only capture the hospitality of a group's environment and may be independent of mutualism and competition within its ecological community. + +\subsubsection{Forecasting accuracy} +\label{sec:res.forecasting} + +The likelihood ratio tests in §\ref{sec:res.likelihood.ratio.test} are limited because improvements in predictive performance (or lack thereof) may be due to unobserved factors predictive of growth that are correlated with average subreddit mutualism. We hypothesized in H3 that the intergroup dependencies in our VAR models can better forecast the size of subreddits compared to baseline time series models that do not account for ecological interactions. As described in §\ref{sec:mes.forecasting}, we test H3 by comparing two forecasting metrics: the root-mean-square-error (RMSE) and the continuous ranked probability score (CRPS). + +VAR models including ecological interactions have forecasting performance superior to the baseline model in terms of both RMSE and CRPS. We evaluate the 24-week forecast performance for all subreddits which were assigned to clusters. The RMSE under the baseline model (0.84) is greater than the RMSE of the VAR models (0.75) and the CRPS of the baseline model (72,853) is also greater than the CRPS of the VAR models (72,669). This reflects a substantive improvement in forecast accuracy robust to the choice of the forecasting metric. + +Our baseline model contains a constant term and a trend term for each group and therefore accounts for all time-invariant within-group variation. Because overlap density is a subreddit-level variable that does not vary over time, +we know that the improvement in forecasting performance comes from modeling ecological interactions in ways not captured by overlap density. + +\section{Threats to Validity} +\label{sec:limitations} +Our work is subject to several important threats to validity that we cannot fully address. First, we study ecological communities on only one platform hosting online groups and our results may not generalize to other platforms or time periods. +Additionally, while our community ecology approach assumes that ecological interactions drive dynamics in the size of groups over time and cause groups to grow or decline, drawing causal inference using our method would depend on several untestable assumptions. For example, our ability to infer causal relationships might be limited if groups we do not consider---including groups on other platforms---play a role in an ecological community. Regression estimates in Models 1-3 may be confounded by omitted variables and cannot support causal interpretation. +Therefore, we refrain from claiming that the relationships we infer are causal. + +The method we propose for identifying ecological interactions between online groups has limitations common to all time series analysis of observational data. +Potential omitted variables might also include additional time lags of group size. Although we chose to use VAR(1) models with only 1 time lag, we hope future work can improve upon our approach and model more complex dynamics with additional lags. +% Our results are offered as limited temporal associations consistent with inferred ecological interactions. +Like most other time series analysis, vector autoregression assumes that the error terms are stationary. This is difficult to evaluate empirically and may not be realistic \citep{canova_var_2007}. Future work might relax these assumptions using more complex models with time-varying parameters, state space models \citep{box-steffensmeier_time_2014}, nonlinear time series models \citep{cenci_regularized_2019, kantz_nonlinear_2003}, or stationarity-enforcing priors \citep{heaps_enforcing_2020}. Such approaches may require additional contextual knowledge and be difficult to scale to an analysis of hundreds of different ecological communities, but may prove fruitful in future work focusing on ecological communities of interest. Such models may also be useful in future work investigating how ecological interactions change over time. + +Additional threats to validity stem from our use of algorithmic clustering to identify ecological communities. +Organizational ecologists have rarely attempted to estimate the full community matrix for an entire population containing a large number of groups because of data and statistical limitations \citep[e.g.][]{ruef_emergence_2000, sorensen_recruitment-based_2004}. For instance, 100 million possible ecological interactions exist within a set of 10,000 communities. Attempting to infer them all raises considerable computational and statistical challenges. +% This makes it necessary to narrow the scope to the ecological communities of interest in ways appropriate to the research question. +We chose to use a clustering analysis to explore the typical ecological communities on a platform. + +% Yet, a + +While we choose clusters based on high degrees of user overlap and validate our clustering in terms of the silhouette coefficient and purity criteria, we might have obtained different results if we had clustered in a different way. Additionally, our efforts to obtain clusters with a high silhouette coefficient lead us to remove a large number of subreddits from our analysis. Thus, our results are not representative of Reddit overall, but only of those subreddits that were included in our analysis. Furthermore, clustering algorithms like the one we use may not have unique solutions and different initial conditions and hyperparameters might lead to different results. While these allow us to scale up our analysis, future work should use principled definitions of an ecological community based on qualitative contextual knowledge in focused studies of particular ecological communities. +% future investigations should also consider qualitative approaches to constructing ecological communities. +% Finally, our three cases studies are limited in that they can offer only a proof-of-concept analysis and an enticing hint at more comprehensive future analyses with more rigorously defined populations of online groups. +% Although we found varying results in the three ecological communities we selected, these case studies can provide little explanation for when one should expect to find different forms of commensalism in online groups. Our hope is that these initial results can point in new directions for research. +% % We looked at three different sets of related online groups and found three qualitatively different ecological communities. +% As is true in all case study research, there is little reason to expect findings from any one of our case studies to generalize to any specific other set of contexts. + +\section{Discussion} +\label{sec:discussion} + +To introduce community ecology and compare it to population ecology, we presented three studies. In Study A, we found support for H1 showing---as predicted by density dependence theory---that overlap density has an $\cap$-shaped association with subreddit growth. +Subreddits with moderate overlap density in our data declined less than subreddits with either very low or very high overlap density. +According to population ecology theory, this suggests that high-density environments are competitive and less conducive to growth than medium-density environments. + +%prevalence of mutualism among highly overlapping subreddits contrast with our results for + +Surprisingly, this contrasts with our results in Study B, where we studied the diversity of ecological communities using vector autoregression models of group size over time to infer networks of ecological interactions. +%surveyed clusters of highly overlapping groups on Reddit to. +We find ecological communities that are mutualistic or competitive, that mix the two, or that have few significant ecological interactions at all. Overall, however, ecological communities of subreddits are typically mutualistic and mutualistic interactions are stronger on average than competitive ones. Although we find evidence of density dependence, density-dependent competition does not necessarily reflect typical relationships in ecological communities of highly overlapping subreddits. + +%As discussed more below, our results are due to the fact that support for H1 does not necessarily mean that most relationships between subreddits with the greatest degrees of user overlap are competitive. + +Our results in Study C show that the size of the other members of an ecological community improves time series forecasts of participation in online groups. However, average subreddit mutualism did not help predict growth. +This suggests that population ecology and community ecology offer complementary environmental and relational perspectives. +Population ecology's focus on environmental factors such as niche and overlap density is useful for predicting growth, but does not provide a way to study networks of mutualism and competition. +Community ecology unpacks density and provides insights about the specific relationships between groups. While modeling these interactions helps forecast participation levels in groups, the existence of these interactions may be independent of future growth. For example, if mutualistic relationships are common in declining ecological communities, that would explain our result for H2. + +% these interactions helps time series forecasting, but whether the interactions + +% While we advance community ecology as an alternative framework to population ecology, our results show that population ecology and community ecology are complementary perspectives. +% We tested H2 to find out whether including subreddit average mutualism improves the ability of a density dependence model to predict the size of a subreddit n.test weeks in the future and found that it did not. Therefore, + +% Yet in support of H3, including ecological interactions in the vector autoregression (VAR) models substantially improves their forecasting performance. + + +% Our findings in Study A and Study B may appear contradictory, their coincidence in our data points to ways in which population ecology and community ecology conceive of different kinds of ecological dynamics. + +The complementary nature of the two ecologies is seen in the coincidence of our findings in Study A and Study B. +Indeed, these results can help explain the puzzling set of empirical results about the relationship between overlap density and outcomes like growth, decline and survival \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. +Studies of density dependence theory in social computing measure the density of an online group's niche in terms of its overlap in participants or topics. +%Resource overlaps seem to reflect competitive forces in some circumstances but mutualistic ones in others. +Our analysis clearly shows that resource overlaps between two groups might have little to do with whether they are mutualists or competitors. Instead, overlaps may simply reflect the hospitality of the environment to groups with overlapping topics or user bases. +As a result, the differing environmental conditions of Wikis and Usenet groups might explain why user overlap was associated with the survival of wikis \citep{zhu_impact_2014} but with the decline of Usenet groups \citep{wang_impact_2012}. Wikia was a young and growing platform during \citepos{zhu_impact_2014} data collection period when the growth of groups may have been limited by knowledge of how to build a wiki, and this knowledge was provided by overlapping experienced users. +Usenet was in decline during \citepos{wang_impact_2012} study period and this may have produced competitive environmental conditions as users became more scarce. +%Users of groups with high overlap density may have greater commitment to the platform than to any particular group and competition over such users may become fierce when a platform goes into decline. + +% as users with comm + +% because + +% and \citeauthor{tan_all_2015} \cite{tan_all_2015} observe that accounts posting in fewer different groups are more likely to leave a platform. +% As \citeauthor{kraut_building_2012} \cite{kraut_building_2012} argue, commitment to subgroups can enhance commitment to a broader group. This suggests that On the other hand, members of a group with high overlap density may have little commitment to it in particular. + +% This suggests that commitment to a + +% We suggest that when commitment to the platform declines this may amplify competition as +% may present environmental conditions for strong competition over those members +% This suggests that +% Such groups may face greater challenges in sustaining participation when the platform goes into decline. + +The widespread mutualism found in Study B resonates with long-held understandings of ecological interactions in evolutionary theory \citep{kropotkin_mutual_2012}. Competition is unlikely to persist because it decreases survival. Because mutualism increases survival, it will be favored by natural selection \citep{armstrong_competitive_1980, axelrod_evolution_1981}. Similarly, competition can be avoided if groups adopt specialized roles in their ecological community, a dynamic known as resource partitioning in organizational ecology \citep{carroll_concentration_1985,menge_competition_1972,schoener_resource_1974}. Resource partitioning theory suggests that the competition among real estate subreddits observed in Figure \ref{fig:comp.network} may be due to a lack of specialization. If specialization does not emerge over time, such groups of competing subreddits may have decreased survival. By contrast, mental health support groups like those observed in Figure \ref{fig:comp.network} appear to have distinctive purposes or roles. Future work to test such mechanisms in ecological communities of online groups may reveal ways that online groups complement or cooperate with each other. + + +%Our results demonstrate population ecology's approach to competition and mutualism in a test of density dependence theory and provide an evaluation of community ecology's ability to predict subreddit growth. + + +%Future work should directly test this hypothesis about the relationships between platform-based and subgroup-based commitment. + +% In general, competition over overlapping resources will have no effect on group growth if something besides the overlapping resource limits growth \cite{verhoef_community_2010}. For example, two wikis might share a large number of contributors (have high user overlap), but their growth might be limited by a lack of core contributors who perform important administrative tasks like policy making and software administration \cite{zhu_impact_2014}. Community ecology relaxes the assumption that competition and mutualism are caused by user overlap density and instead seeks to infer them from data. +% To illustrate our approach, we presented 4 example ecological communities found on Reddit §\ref{sec:case.studies}. +Within large platforms for online groups, the great number of ecological communities that can be studied should make it possible for future work to apply methods from network science to construct and test generalizable theories about the roles of different types of resources, design features of platforms, and governance institutions in these ecological interactions. Future work should also incorporate community ecology analysis in case studies of important topics such ecological communities engaged in peer production, political mobilization, misinformation, or mental health support. + +Although we focused on online groups within a single platform, groups may use multiple platforms with distinctive affordances for different purposes \citep{fiesler_moving_2020, kiene_technological_2019}. Since the VAR method relies only on time series data to infer ecological interactions, it can be applied to study ecological communities spanning social media platforms. Community ecology can thus provide a bridge between quantitative studies of participation in online groups and theories of interconnected information ecologies \citep{nardi_information_1999}. While we focus on relationships between groups sharing a platform, one can apply our concepts and methods to understand how interdependent systems of technologies and users give rise to higher levels of social organization on social media platforms \citep{astley_two_1985, aldrich_organizations_2006}. + +\subsection{Implications for Design} + +% While Resnick et al.~\citep{resnick_starting_2012} +In the final chapter of their book on \textit{Building Successful Online Communities}, \citet{kraut_building_2012} advise managers of online groups to select an effective niche and beware of competition. However, these recommendations are based on little direct evidence from studies of online groups and offer almost no concrete steps that designer or group should take based on either piece of advice. Although further research into ecological interactions is needed before design principles can be derived, we provide a framework for online group managers to think about ecological constraints on group size. +While intuition suggests that online group managers might seek out mutualistic relationships and avoid competitive ones, it is often not obvious whether another group with overlapping users is a competitor or mutualist. +Our method provides a way for group managers to know. + +Competitors have a negative impact on growth, but ecological theory suggests that specialization is an adaptive strategy in response to competition \citep{aldrich_organizations_2006, carroll_concentration_1985, kraut_building_2012, powell_network_2005}. +%For example, the growth of Wikipedia caused other online encyclopedia projects to shift their focus \cite{hill_almost_2013}. +Using our method, group managers might identify competitors limiting the growth of their groups. With the knowledge of this analysis in hand, they might be able to escape a competitive dynamic by specializing. +While competitive relationships are defined by how they decrease the size of groups, competition can also be important to the health of the broader ecological community. Exit to an alternative group can be an avenue for political change in response to grievances and poor governance \citep{hirschman_exit_1970, frey_emergence_2019}. The threat of competition with other groups may make expressions of voice more persuasive to moderators or platforms \citep{hirschman_exit_1970}. + +Groups looking to increase activity should desire to seek out mutualistic relationships, and we believe that designers of online platforms can help them do so. Features such as meta-groups, group search, recommendation engines, and practices like linking related groups may lower barriers between groups and support mutualism. However, it is not obvious to what extent particular features will support competition, mutualism, or both. Using our method, managers and designers can test features intended to support mutualism. + +\section{Conclusion} + +% Rewrite conclusion +While explanations for the rise or decline of online groups often look to internal mechanisms, understanding the role of interdependence between online groups is increasingly important. +While prior research has investigated competition and mutualism among online groups with overlapping users and topics using the population ecology framework \citep{wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}, this approach does not provide a way to infer competitive or mutualistic interactions among related groups. +We introduce the community ecology framework as a complementary perspective to population ecology. +% The two ecologies both seek to explain why online groups grow or survive, but they focus on different levels of analysis \cite{astley_two_1985}. +By inferring competition-mutualism networks directly from time-series data, our community ecology approach helps resolve the empirical tensions raised by prior ecological work in social computing and reveal that most interactions within clusters of subreddits with highly overlapping users are mutualistic. Our methods provide a foundation for future work investigating related online groups. +% \printbibliography[title={References},heading=secbib] + diff --git a/dissertations/nathante_uw_2021/ch5_conclusion.tex b/dissertations/nathante_uw_2021/ch5_conclusion.tex new file mode 100644 index 0000000..a40563c --- /dev/null +++ b/dissertations/nathante_uw_2021/ch5_conclusion.tex @@ -0,0 +1,302 @@ +% \documentclass[12pt]{memoir} + +% \usepackage{cdsc-memoir} +% % there are two chapter styles: cdsc-article and cdsc-memo +% % memo assumes that you remove the "\\" and the email address from the +% % \author field below as well as that you will comment out the +% % \published tag +% \chapterstyle{cdsc-article} + +% \usepackage[utf8]{inputenc} +% \usepackage{wrapfig} +% \usepackage[T1]{fontenc} +% \usepackage{textcomp} +% \usepackage[garamond]{mathdesign} + +% \usepackage[letterpaper,left=1in,right=1in,top=1in,bottom=1in]{geometry} + +% % packages i use in essentially every document +% \usepackage{graphicx} +% \usepackage{enumerate} + +% % packages i use in many documents but leave off by default +% % \usepackage{amsmath, amsthm, amssymb} +% % \usepackage{dcolumn} +% % \usepackage{endfloat} + +% % import and customize urls +% \usepackage[usenames,dvipsnames]{color} +% \usepackage[breaklinks]{hyperref} + +% \hypersetup{colorlinks=true, linkcolor=Black, citecolor=Black, filecolor=Blue, +% urlcolor=Blue, unicode=true} + +% % list of footnote symbols for \thanks{} +% \makeatletter +% \renewcommand*{\@fnsymbol}[1]{\ensuremath{\ifcase#1\or *\or \dagger\or \ddagger\or +% \mathsection\or \mathparagraph\or \|\or **\or \dagger\dagger +% \or \ddagger\ddagger \else\@ctrerr\fi}} +% \makeatother +% \newcommand*\samethanks[1][\value{footnote}]{\footnotemark[#1]} + +% % add bibliographic stuff +% \usepackage[american]{babel} +% \usepackage{csquotes} +% \usepackage[natbib=true, style=apa, backend=biber]{biblatex} +% \addbibresource{ch5_conclusion.bib} +% \DeclareLanguageMapping{american}{american-apa} + +% \defbibheading{secbib}[\bibname]{% +% \section*{#1}% +% \markboth{#1}{#1}% +% \baselineskip 14.2pt% +% \prebibhook} + +% \def\citepos#1{\citeauthor{#1}'s (\citeyear{#1})} +% \def\citespos#1{\citeauthor{#1}' (\citeyear{#1})} + +% % memoir function to take out of the space out of the whitespace lists +% \firmlists + +% % LATEX NOTE: these lines will import vc stuff after running `make vc` which +% % will add version control information to the bottom of each page. This can be +% % useful for keeping track of which version of a document somebody has: +% % \input{vc} +% % \pagestyle{cdsc-page-git} + +% % LATEX NOTE: this alternative line will just input a timestamp at the +% % build process, useful for Overleaf +% % \pagestyle{cdsc-page-overleaf} + +% \begin{document} + +% \setlength{\parskip}{4.5pt} +% % LATEX NOTE: Ideal linespacing is usually said to be between 120-140% the +% % typeface size. So, for 12pt (default in this document, we're looking for +% % somewhere between a 14.4-17.4pt \baselineskip. Single; 1.5 lines; and Double +% % in MSWord are equivalent to ~117%, 175%, and 233%. + +% \baselineskip 16pt + +% \title{Future Directions in the Ecology of Online Communities} +% \author{Nathan TeBlunthuis\\ +% \href{nathante@uw.edu}{nathante@uw.edu}} +% \date{September 1, 2021} + +% \maketitle + +% % LocalWords: + +% % TO CONCLUSION: + +% \section{} + +Chapter 1 says that ``the project of this dissertation is to begin reconstructing organizational ecology in the relatively theory-poor but data-rich context of online communities.'' By focusing on understanding the relationships between related online communities in ecological terms of competition and mutualism and in the emic language of members of overlapping communities, the preceding work seeks to build an empirical foundation to build new ecological theory. It has found qualitative and quantitative evidence that overlapping online communities often fill distinctive niches by providing complementary benefits to their users. Competitive dynamics also occur, and can be strong, but do not last as long. Although competition and mutualism play a role in their growth and survival, communities may not adapt to promote mutualism and avoid competition. Rather it seems likely that the ``principle of competitive exclusion'' takes hold in some other way, perhaps through a selection process in which communities normally must provide complementary benefits to existing ones in order to take off. + +Of course, these claims are limited by the empirical tools that were used to support them. +Inferences about competition and mutualism are based upon time series models with fundamentally untestable assumptions. +By fitting a far greater number of models than I could carefully specify I have taken an unabashed ``big data'' approach. +To make confident claims about any particular competitive or mutualistic relationship between two subreddits I would have to conduct a relatively exacting model selection and comparison procedure based on additional contextual knowledge of the communities' histories. +The large scale of this analysis supports the general findings enumerated above assuming that any model misspecifications have not introduced errors in a systematic and misleading way. +The fact that both the linear and nonlinear time series analyses and the active community members all seem to agree that mutualism is more common than competition provides some reassurance. It seems quite unlikely that all three will mislead in similar ways. + +The reconstruction project is still beginning, but at this stage we can propose preliminary answers to some key theoretical questions: (1) How do people construct systems of overlapping online communities? (2) What types of ``resources'' are most important for mediating ecological interactions? (3) How do ecological interactions relate to broader dynamics such as the growth of a platform or the popularity of a broader topic? and (4) How do barriers between different platforms affect cross-platform ecological relationships? + +Question (1) is fundamental to an ecological explanation for the development of online communities. A preliminary answer is that \emph{people construct systems of overlapping online communities as new online communities find distinctive niches in the neighborhood of existing communities relatively early in their development}. Chapter 4 finds evidence that systems of overlapping online communities are not constructed through an adaptation process and suggests a selection process as an alternative. It should be noted that selection and adaptation are not mutually exclusive and the systems of overlapping communities may develop through a hybrid process. Chapter 3 suggests that a large majority of active online communities each have a distinctive ecological niche. It seems likely that successful online communities are often quickly find a niche early in their development. + +Prior research and Chapter 2 both see users and topics as related to rival or non-rival resources that make competition and mutualism between online communities possible. However, Chapter 2 finds that user and topic overlap densities are very weakly correlated with online community growth suggesting that user and content overlaps are not very close analogs for the kinds of resource overlaps considered by organizational ecologists, such as the technological range of a firm's outputs \citep{dobrev_shifting_2003}. Based on findings from Chapter 3, a preliminary answer to question (2) is that \emph{online communities' ecological niches are a product of content categories, audiences, and social capital}. These dimensions of an online community's niche might be difficult to precisely measure, but they can be described in theory. + +Content categories are socially constructed classes such as ``memes,'' ``Q\&A,'' ``news,'' ``commentary,'' ``art,'' ``documentation'' and ``discussion.'' Online communities often specialize in a subset of possible content categories. +Specialization in a set of content categories might be achieved formally through rules or definitions of topical scope or informally, through the community's size, or the preferences and behaviors of its members. +The empirical work so far considers topics measured through semantic similarity or language models. +Content categories are likely to be correlated with such measures, but the measures are unlikely to faithfully capture important aspects of content categories like differences in medium, genre and form. + +The notion of social capital and audience as distinct aspects of a niche disentangles the concept of ``user.'' Social capital refers to the benefits that come from interpersonal interaction and sense-making with a homophilous or tight-knit community \citep{ackerman_sharing_2013}. Measures of group size or user overlap may be correlated with social capital, but they do little to distinguish a user who comments as a member crowd-like audience from a user who seeks social bonds and interactions with fellow members of their identity group or enthusiasts in their hobby. + +Question (3) is an important part of an ecological explanation for the rise and decline of platforms in terms of the communities they host. A preliminary answer is that \emph{ecological interactions and the rise or decline of a topical area drive one another in feedback process}. Chapter 2 suggests that growing platforms may be more likely to have mutualistic dynamics as they have an increasing number of potential niches for online communities of varying sizes and scopes. At the same time, mutualistic interactions among overlapping communities are likely to drive the rise of a platform as mutualists enrich niches in their neighborhoods. In a similar way, competition and the decline of topical area might reinforce each other if out-migration of users interested in the topic induces competition over the remaining users and this accelerates the communities' declines prompting further out-migration. + +Question (4) considers the ecological consequences of how different social media platforms divide related online communities such as the Wikis and subreddits about the same topic. A preliminary answer is that \emph{barriers between platforms limit both mutualistic and competitive dynamics} because of how they limit the sharing of users or content across platforms. However, when non-rival resources such as information and community building know-how are transferable across platforms communities on platforms designed to provide different types of benefits are likely to be mutualists. +For example, subreddits and Wikis about similar topics are probably mutualists because wikis are designed primarily for developing and sharing encyclopedic information and subreddits often focus on socialization and discussion. + +Now I will sketch several possible directions for near-future work in this research program. Some of these potential projects seek to develop more complete answers for the key theoretical questions and others will bridge ecological analysis to specific practical problems. +My hope is that empirical support and theoretical development will soon be sufficiently advanced to inform the design of present and future online community ecosystems and to understand the successes and limitations of peer production. + +\section{Ecological Relationships Between Platforms} + +A significant limitation of my empirical studies has been that they focus only on interactions among communities within a single large platform. However, online communities often overlap across platforms \citep{kiene_technological_2019} and cross-platform interactions are likely to be important \citep{vincent_examining_2018}. For example, Reddit's growth enormously increased in 2010 when users of rival site Digg.com migrated \emph{en mass}, suggesting that during this period subreddits and Digg sections were in competition \citep{noauthor_digg_2021}. In chapter 6 of \emph{Building Successful Online Communities}, \citet{resnick_starting_2012} recommend that new online communities ``carve out a useful and defendable (sic) niche in the ecology of competing communities.'' They base this recommendation upon virtually no evidence taken from studies of online communities or organizational ecology but rather by following intuitions drawn from economics and assuming that online communities may find themselves in ``winner-take-all'' situations. Although they recommend specialization as a strategy for avoiding competition, they also suggest ``lock-in'' features like having different user interfaces and making it so identities cannot be shared between communities. + +At issue is how \citet{resnick_starting_2012} attempt to simultaneously adopt the perspectives of two different types of actors whose interests are often unaligned. +Commercial platforms need to generate private revenues and seem to better fit the classical models of organizational ecology that have niche overlaps as highly correlated with competition. +A commercial platform may find mutualism between cross-platform communities a nuisance and may find the ``lock-in'' features unequivocally beneficial. +However, building a successful online community is not the same as building a platform that hosts online communities. +My ecological studies of relationships between communities suggest that mutualism is widespread among actually existing online communities within a platform. +In my conversations with members of overlapping communities, I learned that they often benefit from overlapping communities on different platforms. +Therefore it seems likely that that communities on commercial platforms that are both sufficiently ``open'' and sufficiently differentiated will also be mutualistic, even if the platforms compete with each other over revenues. If so, this points to the promise of designs that support resource sharing across such platforms. + +Knowledge about inter-platform ecological dynamics is only beginning to be created. +\citet{nagaraj_how_2021} have found that open source knowledge projects like open street map are hurt by competition with proprietary alternatives. +Cross-platform studies of digital traces face difficulties because it is not generally possible to associate user accounts on different platforms. +However, the time-series models I have used only depend on finding related communities and therefore enable studying ecological interactions without tracking users across platforms. +I am developing a new dataset of related subreddits, Fandom.com wikis, and Wikipedia articles to investigate ecological interactions between related communities on different platforms. + +\section{Selecting Niche Width} + +Choosing a scope is an important design decision for organizations and for online communities. As I found in Chapter 3, broad and narrow scopes are associated with trade-offs in the types of benefits that a community can provide. The choice of scope, or the choice of how a community will specialize, may also have implications for the community's short and long run survival. According to theories of organizational ecology, the choice of scope may affect a community's competitive and mutualistic dynamics and its ability to weather changes in a turbulent environment. + +Resource partitioning theory, discussed briefly in sections of Chapters 2, 3 and 4, provides a framework for understanding how specialization relates to competition. It proposes that larger generalists can coexist with specialists because large generalists are not optimally efficient at all of their activities, leaving opportunities for specialists to out-compete them in narrow niches \citep{carroll_concentration_1985}. Findings from Chapter 2 suggest that one prediction of resource partitioning theory seems to obtain in groups of overlapping online communities. This is that they often have a ``main'' community which is a large generalist and people participate in the specialist communities in order to obtain distinctive benefits not easily obtained in the main community \citep{baum_ecological_2006}. + +A related theory fragment of organizational ecology, niche width theory \citep{dobrev_dynamics_2001, freeman_niche_1983}, proposes that specialists are less able to survive during periods of rapid change. Large generalists may have advantages in changing environments because their diversity of interests which spreads out risk, their experience transferring knowledge between different parts of their organization, and their slack resources can all help them absorb negative outcomes \citep{dobrev_shifting_2003}. +As discussed in Chapter 4, online communities may inhabit unstable environments where sudden events, ongoing trends, and abrupt policy changes can all affect participation \citep{ratkiewicz_characterizing_2010}. + +An example illustrates how environmental change can threaten the success of specialists. During the Trump administration, a number of anti-trump subreddits were organized around specific controversies (e.g., \texttt{r\Slash the\_meuller, r\Slash marchagainsttrump, r\Slash keep\_track, r\Slash russialago}). +\texttt{r\Slash the\_mueller} was a subreddit about the Special Counsel's investigation into Russian election interference. +% As shown in \ref{fig:the.meuller}, +the number of posts in these subreddits declined following the end of the investigation. However, this subreddit has survived by successfully adapted and now has several posts a day critical of Trump but not specifically about the Meuller investigation. Yet a similar subreddit, \texttt{r\Slash russialago} has declined to a much lower activity level (a few posts a week) but remains focused on Russian interference. By comparison, the number of posts in the generalist (but still left-leaning) \texttt{r\Slash politics} has remained relatively stable. + Niche width theory would predict that shifting to more general types of anti-Trump content may expose \texttt{r\Slash the\_mueller} to greater competition with other political subreddits. However, if it had not adapted it might have little reason to exist after the end of Mueller's investigation. + +Theories of online community specialization can be empirically testable with better quantification of the ways that overlapping communities are different from one another. These include features of content like choice of medium (text, images, video, links), content sources (what websites are they linking to?), types of participants with varying roles and styles of participation, and structures like policies, size and moderation. Niche width theory additionally requires measuring environmental changes that may threaten the survival of communities. Observable events corresponding to interesting environmental variation may include crisis events, elections and the release or cancellation of entertainment products. Comparing the growth, performance, and ecological dynamics of overlapping communities during times of high or low change can test these theories and point toward design principles for online community scoping that account for the trade-offs in different types of specialization. + + +% Other studies in organizational ecology, and in biological ecology more generally, resource partitioning refers to how different groups specialize to minimize niche overlaps and avoid competition. + +\section{Ecological Implications for Production and Performance} + +So far, the ecology of online communities has focused on understanding competition and mutualism among overlapping online communities. An important limitation of this work has been to conceptualize competition and mutualism as dynamics related to the growth of online communities. This follows biologists and organizational ecologists, but not all online communities have to grow in order to provide their intended benefits \citep{foote_starting_2017}. An important step forward this research program will be to relate interdependence between online communities to outcomes besides growth that may be more directly connected to the value of the public goods that communities produce. + +Quantifying the value of public information goods produced by online communities is a major methodological and theoretical challenge. Much of the field of economics depends on the assumption that the utility of a good can be measured by its price. Price is a valuable measure of value in economic theory because it is set by market mechanisms that align supply and demand. Online communities are thought to be able to produce public goods because they can lower transaction costs \citep{benkler_coases_2002}. Negotiating a price in these settings is simply not worth it. A price will reintroduce transaction costs and undercut the pro-social motivations people have for contributing. + +Of course, this does not mean the public goods online communities produce are worthless. Estimates of the cost of replacing by paying editors a market rate placed its value between 6 and 10 billion dollars in 2013 \citep{band_wikipedias_2013}. However, without a price mechanism, supply and demand may become ``misaligned.'' The quality of Wikipedia articles is uneven and the most popular content is often not the highest quality \citep{warncke-wang_misalignment_2015, gorbatai_exploring_2011}. +In classical economic theories, goods will be produced to meet the demand, but in peer production the size of an audience seems only weakly related to the level of production. +Explaining when online communities will produce high quality public goods like Wikipedia articles \citep{arazy_evolutionary_2019,arazy_determinants_2010,asthana_few_2018} or open source software \citep{champion_underproduction_2021} is thus important to understanding the successes and failures of peer production. + +% Critical mass theory can potentially explain how supply and demand can be linked in public goods production and can also be synthesized with ecology \citep{marwell_critical_1993}. The central mechanism of the theory the notion of a ``production function,'' which maps a quantity of contributor input to a level of good produced. The theory proposes that the shape of the production function is determined by the collective action problem that a group must overcome to produce the good and determines the level of the good that will be produced by rational actors. Some prior research applies this theory to online communities, but does not operationalize its central propositions related to production functions \citep{solomon_critical_2014}. + +Critical mass theory offers to explain the conditions for successful collective action in public goods production and can also be synthesized with ecology \citep{marwell_critical_1993}. Many CSCW systems appear to require a critical mass of users to start or sustain their usefulness \citep{ackerman_intellectual_2000}. The most important device in the theory is the \emph{production function}, which maps an individual's contributions to the value they get from contributing. The theory proposes that the shape of the production function is determined by the collective action problem that a group faces in producing the good. If a production function is \emph{accelerating} (\emph{decelerating}) then a contribution increases (decreases) the payoff of the next contribution. +The rational actors in a group each have their own production function and together these determine the level of the good that they will produced. Some prior research applies this theory to Wikipedia, but does not attempt to measure value of contribution or operationalize the theory's propositions about the relationship between production functions and collective action \citep{raban_empirical_2010, solomon_critical_2014}. Analyzing critical mass theory in the context of communal public goods production can also be an important theoretical contribution to communication theory \citep{fulk_connective_1996}. + +To illustrate, consider a hypothetical example of the construction of an online community for building a collaborative knowledge base, such as Wikidata. +This can be cast as a collective action problem because the project can provide a wide range of benefits to a potentially large group of people, but no individual can provide the full range benefits alone \citep{marwell_critical_1993, fulk_connective_1996}. +Say a single individual, the community's founder who is an expert engineer and researcher, attempts to bootstrap the community by providing an initial design and implementation for the novel system, a small number of entries and by making efforts to publicize the community. +The founder hopes that others to join and contribute to constructing a valuable resource. + +During this period in the community's development, the \emph{critical mass} consists of just the founder, who is motivated and capable of in the hopes that others will see these contributions and subsequently make their own. The founder has a large and unique set of resources enabling them pay the \emph{start-up costs} involved in founding the community when no one else would. After these start-up costs are paid, others can make much more granular contributions like adding entries to the knowledge base. The founder hopes that others will perceive expected benefits from contributing that exceed the costs of contributing. +% This might not happen and if time goes by and noone else contributes, the founder, all alone and discouraged, might conclude that it is not longer work making their own contributions. +In theoretical terms, the founder hopes that the others' production functions are accelerating and paying the start up costs will move the others' production functions into a favorable region where they will contribute. + +% If some time goes by and noone else contributes, the founder, all alone and discouraged, might conclude that it is no longer worth making their own contributions. Now the community has failed to hold on to a critical mass and becomes inactive. But say that the founder's early contributions have been useful to somone else (member 2) who chooses to make their own contributions because they expect to benefit from the ``warm glow'' of reciprocity, through social interaction with the founder, or from the future contributions that their own contributions might attract. + +Ecology has important implications for critical mass theory because important aspects of the collective action problem that influence the production function are related to the composition of the group and prior work suggests that individuals with varying experiences are important to online community growth \citep{kairam_life_2012}. +Heterogeneous groups are thought to be conducive to collective action because they are more likely to contain individuals who can contribute different things like start up costs or rare pieces of information \cite{fulk_connective_1996}. +% This makes it easier to form a critical mass of individuals who can make start up contributions \citep{marwell_critical_1993}. + +Returning to the example of a collaborative knowledge base, it is important to recognize that many contributions will involve \emph{articulation work} activities like documenting, answering questions, naming, and interpreting that are required to make the knowledge base work in practice \citep{schmidt_taking_1992, suchman_supporting_1996}. +Even though contributions of articulation work might not directly add new features or data to the knowledge base, they can be important to accelerating community members' production functions. +A heterogeneous community may be more likely to include members who are skilled at articulation work that benefits other members. +On the other hand, If different subgroups of a large community have sufficiently different application areas some articulation work might be specific to each subgroup. +For example, biologists might make and document biology-specific norms for the collaborative knowledge base, but this would not be useful to physicists. +Thus individuals' production functions might depend most strongly on the other members of their subgroup when subgroup-specific articulation work is a limiting factor. + +% At the same time, the utility of a collaborative knowledge overall often depends on linking to knowledge outside of one's domain of expertiseso the . + + + + + + +% elaborate on what a collective action problem is. +% develop the example of a distributed database and why it might be hard to do collective production of it at different stages or phases +% at different phases of developement the distributed database the critical mass needed to maintain the collective action dynamic is differnet in composition or in form. +% Ecology is related to critical mass theory because important aspects of the collective action problem that influence the production function are related to the composition of the group. Heterogenous groups are thought to be condusive to production because they are more likely to contain individuals who will contribute very much and therby make it easier to form a ``critical mass'' of individuals who can overcome the start-up costs common in collective action problems \citep{marwell_critical_1993}. For example, a start-up cost for an open source database might take the form of an initial design for a novel system that can only be provided by expert engineers or researchers. But once the initial system is developed, additional features, bug fixes, and documentation can be added by a much broader group of developers who wish to use the system in their applications. Therefore, open source community's ability to biuld a valuable system depends on including both database experts and application developers. Prior work suggests that individual with varying experiences are important to online community growth \citep{T}. + +I am starting work to find out how production functions help explain when online communities achieve critical mass and produce quality outputs and if relationships among communities influence the shape of production functions in ways that make collective action easier or more difficult in different conditions. +Measuring production functions requires the ability to precisely quantify the quality or value of individual contributions. +As a step in this direction, I have developed an improved measurement of Wikipedia article quality in research accepted for publication and included in Appendix A. +Prior article quality measures have been based on machine learning models that do not provide a continuous measure amenable to statistical analysis and that were miscalibrated for units of analysis like articles or projects. +Research using these measures has got around these problems by adopting an assumption that article quality levels on Wikipedia are ``evenly spaced'' from one another. +I use a method that relaxes this assumption, provides evidence that it is unfounded, and improves the accuracy of the models. + +I have also done some methodological work on the ``demand side'' to understand how audiences use Wikipedia content. Most prior work has been limited to measuring page views. In Appendix B, I study the amount of time spent reading articles by Wikipedia visitors and find that readers in the Global South remain on pages for longer, especially in the last page view in a session. +Although the measure used in that study may not be available for use in the future, this work has prepared me for the time when better reading time data is available. +It will be interesting to see if the audience for an article relates to critical mass dynamics. + +\section{Ecology and the Diffusion of Technologies for Community Governance} + +Future ecological research can also look at the role of ecological dynamics in the emergence and diffusion of novel artifacts, technologies, information and ideas. Overlapping technology use in particular is a potential mechanism for specialization and mutualism. I have previously suggested that sharing a host platform may not be sufficient for defining an organizational form because communities have considerable flexibility in making their own rules and configuring their own custom technology. If sufficiently strong patterns are found in the sets of rules or technologies that communities adopt, these might justify treating communities sharing such structures as organizational forms or at least a potentially important kind of niche overlap. + +When online communities share technologies, this can create important forms of interdependence and collaborative innovation on tools is potentially an important type of mutualism. +For example \citet{chandrasekharan_crossmod:_2019} developed a system called ``Cross Mod'' for subreddits to collaborate on customizable machine learning models for monitoring misbehavior. +Smaller communities pooling data about rule violations can potentially build more accurate models than single communities can. +Technologies like Cross Mod allow communities to select which other communities they wish to import data from and therefore are most useful when communities are institutionally compatible. +This suggests that sharing governance technologies may be a good proxy for an organizational form. + +However, as I found in Appendix C, my study of algorithmic flagging tools on Wikipedia, machine learning tools for predicting misbehavior may reproduce the biases of community moderators. +They can also improve the fairness of moderator judgments if moderators use the models instead of other biased social signals to find potential misbehavior. +Additional risks may arise when algorithmic tools are shared by overlapping communities. +The learned norms and and standards of behavior from one community may not be appropriate in other communities. +If shared flagging algorithms can more easily implement norms that are more widely held, the diffusion of an algorithm that makes regulating behavior easier and more predictable might mediate the diffusion of the norm. + +The method I developed for the study in Appendix C provides a way to assess the consequences of a machine learning classifier without intervening in a community. +Future work at the intersection of ecology and online community governance might use this method in a study of the relationships between the performance of algorithms for enforcing different rules, the diffusion of the rules, and the growth and survival of communities having the rules. + + +\section{Microfoundations for Ecological Macrodynamics} + +% Good chance this micro-macro stuff heads to the conclusion. Let's keep trying to make it work for now. These 3 paragraphs seem like a good argument for a study that links individual behavior or user flows to competition/mutualism or density. + +Predominant approaches in HCI and social computing and popular conceptions of social media platforms most often emphasize the role of managers of platforms in building online communities. +However, platforms have only a limited control over the ways that users build communities. +Furthermore, platforms struggle to maintain participants who may migrate to competing platforms. +Communities and their organizers can engage in collective action to protest platform's governance and design decisions \citep{matias_going_2016}. +Online communities also form intermediate structures over which platforms have limited influence such as the widespread clusters of highly overlapping communities I identify in Chapter 2. +An important goal of the ecology of online communities is to understand how patterns of action within individual communities are co-constitutive with the cultures and institutions of platforms. +%Overlapping online communities exist because individuals participate in them, but individuals cannot participate in communities that don't exist. +This goal faces a key type of puzzle in social science: to account for how ``micro-level'' individual actors give rise to ``macro-level'' organizations, institutions, online communities, and cultures even as individuals are situated within these very structures. + + +% This was because taking up inter-organizational dependence as an object of study raised a similar micro-macro puzzle. +Micro-macro puzzles are not only found in the constitution of individual persons and the social structures they inhabit. +Organizational ecology takes up a different kind of micro-macro puzzle at the level of reciprocal dependence between organizations and the organizational fields or industries they comprise. +The performance of an individual organization depends on ecological dynamics in its organizational field, but the organization itself contributes to these very dynamics. +Initial work in organizational ecology avoided this reciprocal causation by minimizing the action of individual organizations. +Structural inertia constrained the agency of organizational actors, and external institutions, competition, and legitimacy constrained organizational performance. + +At first, organizational ecologists did not deny that factors internal to organizations matter to organizational performance. +Yet they argued that \emph{ceteris paribus}, the chances of an organization's survival depend on environmental conditions and on mutualistic and competitive pressures \citep{hannan_organizational_1989}. +Later on, organizational ecology began accounting for rational adaptation and failure of individual organizations \citep{baum_ecological_2006}. +Recently they have incorporated the role of human cognition and social learning into their conceptualizations \citep{hannan_concepts_2019}, but as far as I am aware, empirical analyses have not stretched all the way from individual persons to inter-organizational dynamics. + +Online communities provide a distinctive opportunity to connect individual behaviors to outcomes at the community and ecological levels thanks to the finely grained behavioral data that made possible the analyses in Chapters 2 and 4. +However, all of the measures used in these projects have aggregated the behavior of many individuals into measures of overlap or group size. +I have not shown how the ways that individuals navigate among overlapping online communities give rise to the ecological dynamics I find. +Aware of this limitation, I initially proposed constructing an agent-based model to theorize the micro-mechanisms of ecological dynamics. +Along the way, I found that talking to individuals provided a more valuable micro-level account of how and why people participate in overlapping online communities. + +These interviews surfaced a conceptual model of a process by which new communities in a topical area spin-off specialists. +An important direction for future research will be to operationalize and test this model with data. +This future work should look for inspiration from measures of individual behavior introduced in recent research in HCI and social computing \citep{tan_tracing_2018, tan_all_2015, zhang_understanding_2021}. +Specifically, \citep{tan_tracing_2018} provide a method to associate newly created subreddits with prior subreddits whose users join the new subreddit and measure the language use of individuals to characterize their similarity to the other members of the community. Also, \citet{waller_generalists_2019} quantify users of online communities as generalists and specialists based on their activity styles using embedding methods. + +\section{Focused Case Studies} + +% find a better rationale than this? +% Why haven't we done this already? (b/c not as scientific?) +Finally, in order for ecological research in online communities to be useful to publics and practitioners, it will be important to conduct focused case studies of practical and popular interest. +Studies of the ecology of political communities, communities tying to make sense of the pandemic, ``meme stock'' and cryptocurrency communities, and pop culture fandom communities are all promising candidates. +A future project should investigate one or more cases in a mixed-methods study combining carefully constructed time series models for inferring ecological relationships and qualitative data in the form of grounded narrative accounts or interviews. + +In conclusion, my research set out to understand interdependence among online communities through the lens of organizational ecology. +It has questioned the how well foundational assumptions of organizational ecology apply to online communities and set out to validate basic assumptions like when online communities will form competitive or mutualistic relationships. +It has provided new methods for studying competition and mutualism among online communities and shown that mutualistic relationships are more common than competitive ones because they last longer. +Although the question of how groups of mutualistic online communities are constructed remains open, selection process theories provide a starting point for future investigation. +Many applications of ecological theories and methods to important questions about the emergence, performance, and design of online communities are promising. +% As I continue my work, I am releasing well-documented code and datasets to support this future work and I hope, other research yet unimagined. + +% bibliography here +\setcounter{biburlnumpenalty}{9001} +\printbibliography[title = {References}, heading=secbib] + + +% \end{document} % + +% LocalWords: +%%% reftex-default-bibliography: ("ch5_conclusion.bib") diff --git a/dissertations/nathante_uw_2021/copyright_page.pdf b/dissertations/nathante_uw_2021/copyright_page.pdf new file mode 100644 index 0000000..f706efb Binary files /dev/null and b/dissertations/nathante_uw_2021/copyright_page.pdf differ diff --git a/dissertations/nathante_uw_2021/diss_ecology_of_online_communities.pdf b/dissertations/nathante_uw_2021/diss_ecology_of_online_communities.pdf new file mode 100644 index 0000000..1958cb0 Binary files /dev/null and b/dissertations/nathante_uw_2021/diss_ecology_of_online_communities.pdf differ diff --git a/dissertations/nathante_uw_2021/ecological_models.bib b/dissertations/nathante_uw_2021/ecological_models.bib new file mode 100644 index 0000000..8b9c2be --- /dev/null +++ b/dissertations/nathante_uw_2021/ecological_models.bib @@ -0,0 +1,2119 @@ + +@book{aldrich_organizations_2006, + title = {Organizations {{Evolving}}}, + author = {Aldrich, H.E. and Ruef, M.}, + date = {2006}, + edition = {2}, + publisher = {{SAGE Publications}}, + location = {{Thousand Oaks, CA}}, + isbn = {978-1-4129-1047-7} +} + +@article{armstrong_competitive_1980, + ids = {armstrong_competitive_1980-1}, + title = {Competitive {{Exclusion}}}, + author = {Armstrong, Robert A. and McGehee, Richard}, + date = {1980-02-01}, + journaltitle = {The American Naturalist}, + shortjournal = {The American Naturalist}, + volume = {115}, + number = {2}, + pages = {151--170}, + publisher = {{The University of Chicago Press}}, + issn = {0003-0147}, + abstract = {Recent developments in the mathematical theory of competitive exclusion are discussed and placed in historical perspective. The models which have been used in theoretical investigations of competitive exclusion are classified into two groups: those in which the resources regenerate according to an algebraic relationship (abiotic resource models), and those in which resource regeneration is governed by differential equations (biotic resource models). We then propose a mathematical framework for considering problems of competitive exclusion, and provide examples in which n competitors can coexist on k {$<$} n resources (both biotic and abiotic). These systems persist because of internally generated cyclic behavior. We conclude that the competitive exclusion principle applies in general only to coexistence at fixed densities.}, + file = {/home/nathante/Zotero/storage/WY46EPM3/Nat - 2021 - Competitive Exclusion.pdf;/home/nathante/Zotero/storage/6RRFPS4Z/283553.html} +} + +@article{astley_two_1985, + title = {The {{Two Ecologies}}: {{Population}} and {{Community Perspectives}} on {{Organizational Evolution}}}, + shorttitle = {The {{Two Ecologies}}}, + author = {Astley, W. Graham}, + date = {1985}, + journaltitle = {Administrative Science Quarterly}, + volume = {30}, + number = {2}, + eprint = {2393106}, + eprinttype = {jstor}, + pages = {224--241}, + issn = {0001-8392}, + abstract = {This paper distinguishes between two ecological perspectives on organizational evolution: population ecology and community ecology. The perspectives adopt different levels of analysis and produce contrasting views of the characteristic mode and tempo of organizational evolution. Population ecology limits investigation to evolutionary change unfolding within established populations, emphasizing factors that homogenize organizational forms and maintain population stability. Population ecology thus fails to explain how populations originate in the first place or how evolutionary change occurs through the proliferation of heterogeneous organizational types. Community ecology overcomes these limitations: it focuses on the rise and fall of populations as basic units of evolutionary change, simultaneously explaining forces that produce homogeneity and stability within populations and heterogeneity between them.}, + file = {/home/nathante/Zotero/storage/4Q76BREE/Astley - 1985 - The Two Ecologies Population and Community Perspe.pdf} +} + +@article{axelrod_evolution_1981, + title = {The Evolution of Cooperation}, + author = {Axelrod, R. and Hamilton, W. D.}, + date = {1981-03-27}, + journaltitle = {Science}, + volume = {211}, + number = {4489}, + pages = {1390--1396}, + issn = {0036-8075, 1095-9203}, + abstract = {Cooperation in organisms, whether bacteria or primates, has been a difficulty for evolutionary theory since Darwin. On the assumption that interactions between pairs of individuals occur on a probabilistic basis, a model is developed based on the concept of an evolutionarily stable strategy in the context of the Prisoner's Dilemma game. Deductions from the model, and the results of a computer tournament show how cooperation based on reciprocity can get started in an asocial world, can thrive while interacting with a wide range of other strategies, and can resist invasion once fully established. Potential applications include specific aspects of territoriality, mating, and disease.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/5W7KPW9P/1390.html} +} + +@article{banbura_large_2010, + title = {Large {{Bayesian}} Vector Auto Regressions}, + author = {Bańbura, Marta and Giannone, Domenico and Reichlin, Lucrezia}, + date = {2010}, + journaltitle = {Journal of Applied Econometrics}, + volume = {25}, + number = {1}, + pages = {71--92}, + issn = {1099-1255}, + abstract = {This paper shows that vector auto regression (VAR) with Bayesian shrinkage is an appropriate tool for large dynamic models. We build on the results of De Mol and co-workers (2008) and show that, when the degree of shrinkage is set in relation to the cross-sectional dimension, the forecasting performance of small monetary VARs can be improved by adding additional macroeconomic variables and sectoral information. In addition, we show that large VARs with shrinkage produce credible impulse responses and are suitable for structural analysis. Copyright © 2009 John Wiley \& Sons, Ltd.}, + langid = {english}, + annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/jae.1137}, + file = {/home/nathante/Zotero/storage/BJPRR8SM/Bańbura et al_2010_Large Bayesian vector auto regressions.pdf;/home/nathante/Zotero/storage/8WJXYLQS/jae.html} +} + +@article{barnett_competition_1987, + title = {Competition and Mutualism among Early Telephone Companies}, + author = {Barnett, William P. and Carroll, Glenn R.}, + date = {1987}, + journaltitle = {Administrative Science Quarterly}, + volume = {32}, + number = {3}, + eprint = {2392912}, + eprinttype = {jstor}, + pages = {400--421}, + issn = {0001-8392}, + abstract = {In an exploratory study of the early telephone industry, we search for evidence of competition and mutualism between legally autonomous companies. Neighboring companies are found to have both types of interdependencies, although their exact nature depends on organizational form. Companies in separate geographical locations are found to be competitive with each other, regardless of organizational form. The two prevalent organizational forms in the industry at this time each apparently flourished in distinct niches and were symbiotically related. The findings are interpreted within a community ecology framework.} +} + +@incollection{baum_ecological_2006, + title = {Ecological Approaches to Organizations}, + booktitle = {Sage {{Handbook}} for {{Organization Studies}}}, + author = {Baum, Joel A. C. and Shipilov, Andrew V.}, + date = {2006}, + pages = {55--110}, + publisher = {{Sage}}, + location = {{Rochester, NY}}, + abstract = {Our goal is to assess and consolidate the current state-of-the-art in organizational ecology. To accomplish this we review major theoretical statements, empirical studies, and arguments that are now being made. Although we attempt to survey ecological approaches to organizations comprehensively, because ecological research now constitutes a very large body of work, and because other extensive reviews are available (Aldrich \& Wiedenmayer, 1993; Barnett \& Carroll, 1995; Baum, 1996; Baum \& Amburgey, 2002; Baum \& Rao, 2004; Carroll, Dobrev \& Swaminathan, 2002; Galunic \& Weeks 2002; Rao, 2002; Singh \& Lumsden, 1990), we emphasize recent work that challenges and extends established theory and highlight new and emerging directions for future research that appear promising. Our appraisal focuses on two main themes - demographic processes and ecological processes.}, + file = {/home/nathante/Zotero/storage/EGQC2W5I/Baum and Shipilov - 2006 - Ecological approaches to organizations.pdf;/home/nathante/Zotero/storage/38MBRGMQ/papers.html} +} + +@article{baumgartner_pushshift_2020, + title = {The {{Pushshift Reddit}} Dataset}, + author = {Baumgartner, Jason and Zannettou, Savvas and Keegan, Brian and Squire, Megan and Blackburn, Jeremy}, + date = {2020-05-26}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + shortjournal = {ICWSM}, + volume = {14}, + pages = {830--839}, + issn = {2334-0770}, + langid = {english}, + keywords = {pushift,reddit}, + file = {/home/nathante/Zotero/storage/DHRFJ58I/Baumgartner et al. - 2020 - The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/G5E8SQFN/Baumgartner et al_2020_The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/A8X5UY9R/2001.html;/home/nathante/Zotero/storage/B9FRQR94/7347.html} +} + +@article{becker_theory_1965, + title = {A {{Theory}} of the {{Allocation}} of {{Time}}}, + author = {Becker, Gary S.}, + date = {1965-09}, + journaltitle = {The Economic Journal}, + shortjournal = {The Economic Journal}, + volume = {75}, + number = {299}, + pages = {493}, + issn = {00130133}, + langid = {english}, + file = {/home/nathante/Zotero/storage/82WK59JA/Becker - 1965 - A Theory of the Allocation of Time.pdf} +} + +@report{benkler_social_2013, + type = {SSRN Scholarly Paper}, + title = {Social {{Mobilization}} and the {{Networked Public Sphere}}: {{Mapping}} the {{SOPA}}-{{PIPA Debate}}}, + shorttitle = {Social {{Mobilization}} and the {{Networked Public Sphere}}}, + author = {Benkler, Yochai and Roberts, Hal and Faris, Robert and Solow-Niederman, Alicia and Etling, Bruce}, + date = {2013}, + number = {ID 2295953}, + institution = {{Social Science Research Network}}, + location = {{Rochester, NY}}, + abstract = {This paper uses a new set of online research tools to develop a detailed study of the public debate over proposed legislation in the United States designed to give prosecutors and copyright holders new tools to pursue suspected online copyright violations. For this study, we compiled, mapped, and analyzed a set of 9,757 stories relevant to the COICA-SOPA-PIPA debate from September 2010 through the end of January 2012 using Media Cloud, an open source tool created at the Berkman Center to allow quantitative analysis of a large number of online media sources. This study applies a mixed-methods approach by combining text and link analysis with human coding and informal interviews to map the evolution of the controversy over time and to analyze the mobilization, roles, and interactions of various actors.This novel, data-driven perspective on the dynamics of the networked public sphere supports an optimistic view of the potential for networked democratic participation, and offers a view of a vibrant, diverse, and decentralized networked public sphere that exhibited broad participation, leveraged topical expertise, and focused public sentiment to shape national public policy. We find that the fourth estate function was fulfilled by a network of small-scale commercial tech media, standing non-media NGOs, and individuals, whose work was then amplified by traditional media. Mobilization was effective, and involved substantial experimentation and rapid development. We observe the rise to public awareness of an agenda originating in the networked public sphere and its framing in the teeth of substantial sums of money spent to shape the mass media narrative in favor of the legislation. Moreover, we witness what we call an attention backbone, in which more trafficked sites amplify less-visible individual voices on specific subjects. Some aspects of the events suggest that they may be particularly susceptible to these kinds of democratic features, and may not be generalizable. Nonetheless, the data suggest that, at least in this case, the networked public sphere enabled a dynamic public discourse that involved both individual and organizational participants and offered substantive discussion of complex issues contributing to affirmative political action.Find more information about the paper, including raw data available for download and an interactive visualization of the maps included in this paper, on the Berkman Center website.}, + file = {/home/nathante/Zotero/storage/P9M6MASA/Benkler et al. - 2013 - Social Mobilization and the Networked Public Spher.pdf} +} + +@book{benkler_wealth_2006, + title = {The Wealth of Networks: {{How}} Social Production Transforms Markets and Freedom}, + author = {Benkler, Yochai}, + date = {2006}, + publisher = {{Yale University Press}}, + location = {{New Haven, CT}}, + pagetotal = {528}, + keywords = {bookReview,Economics,FOSS,foundations of social computing,import,Innovation,Legal Studies,peer production} +} + +@article{blei_latent_2003, + title = {Latent Dirichlet Allocation}, + author = {Blei, David M. and Ng, Andrew Y. and Jordan, Michael I.}, + date = {2003}, + journaltitle = {The Journal of Machine Learning Research}, + volume = {3}, + pages = {993--1022}, + file = {/home/nathante/Zotero/storage/2K3E7TJH/Blei et al. - 2003 - Latent dirichlet allocation.pdf} +} + +@inproceedings{blevis_ecological_2015, + title = {Ecological {{Perspectives}} in {{HCI}}: {{Promise}}, {{Problems}}, and {{Potential}}}, + shorttitle = {Ecological {{Perspectives}} in {{HCI}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference Extended Abstracts}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Blevis, Eli and Bødker, Susanne and Flach, John and Forlizzi, Jodi and Jung, Heekyoung and Kaptelinin, Victor and Nardi, Bonnie and Rizzo, Antonio}, + date = {2015-04-18}, + series = {{{CHI EA}} '15}, + pages = {2401--2404}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {The aim of the workshop is to provide a forum for researchers and practitioners to discuss the present and future of ecological perspectives in HCI. The participants will reflect on the current uses and interpretations of "ecology" and related concepts in the field. The workshop will assess the potential of ecological perspectives in HCI for supporting rich and meaningful analysis, as well as innovative design, of interactive technologies in real-life contexts.}, + isbn = {978-1-4503-3146-3}, + keywords = {affordances,artifact ecologies,ecological psychology,ecology,habitat,information ecologies,social ecology,sustainability} +} + +@article{bowker_bonnie_2001, + title = {Bonnie {{Nardi}} and {{Vicki O}}'{{Day}}, {{Information Ecologies}}: {{Using Technology}} with {{Heart}}}, + shorttitle = {Bonnie {{Nardi}} and {{Vicki O}}'{{Day}}, {{Information Ecologies}}}, + author = {Bowker, Geoffrey C.}, + date = {2001-03}, + journaltitle = {Computer Supported Cooperative Work (CSCW)}, + shortjournal = {Computer Supported Cooperative Work (CSCW)}, + volume = {10}, + number = {1}, + pages = {143--145}, + issn = {0925-9724, 1573-7551}, + langid = {english} +} + +@book{box-steffensmeier_time_2014, + title = {Time Series Analysis for the Social Sciences}, + author = {Box-Steffensmeier, Janet M}, + date = {2014}, + abstract = {"Time-series, or longitudinal, data are ubiquitous in the social sciences. Unfortunately, analysts often treat the time-series properties of their data as a nuisance rather than a substantively meaningful dynamic process to be modeled and interpreted. Time-Series Analysis for Social Sciences provides accessible, up-to-date instruction and examples of the core methods in time-series econometrics. Janet M. Box-Steffensmeier, John R. Freeman, Jon C. Pevehouse, and Matthew P. Hitt cover a wide range of topics including ARIMA models, time-series regression, unit-root diagnosis, vector autoregressive models, error-correction models, intervention models, fractional integration, ARCH models, structural breaks, and forecasting. This book is aimed at researchers and graduate students who have taken at least one course in multivariate regression. Examples are drawn from several areas of social science, including political behavior, elections, international conflict, criminology, and comparative political economy"--}, + isbn = {978-0-521-87116-7 978-0-521-69155-0}, + langid = {english}, + annotation = {OCLC: 879601718} +} + +@article{brandt_bayesian_2012, + title = {A {{Bayesian Poisson Vector Autoregression Model}}}, + author = {Brandt, Patrick T. and Sandler, Todd}, + date = {2012}, + journaltitle = {Political Analysis}, + shortjournal = {Polit. anal.}, + volume = {20}, + number = {3}, + pages = {292--315}, + issn = {1047-1987, 1476-4989}, + abstract = {Multivariate count models are rare in political science, despite the presence of many count time series. This article develops a new Bayesian Poisson vector autoregression (BaP-VAR) model that can characterize endogenous dynamic counts with no restrictions on the contemporaneous correlations. Impulse responses, decomposition of the forecast errors, and dynamic multiplier methods for the effects of exogenous covariate shocks are illustrated for the model. Two full illustrations of the model, its interpretations, and results are presented. The first example is a dynamic model that reanalyzes the patterns and predictors of superpower rivalry events. The second example applies the model to analyze the dynamics of transnational terrorist targeting decisions between 1968 and 2008. The latter example’s results have direct implications for contemporary policy about terrorists’ targeting that are both novel and innovative in the study of terrorism.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FXWYBXR7/Brandt and Sandler - 2012 - A Bayesian Poisson Vector Autoregression Model.pdf} +} + +@article{butler_attraction-selection-attrition_2014, + title = {An Attraction-Selection-Attrition Theory of Online Community Size and Resilience}, + author = {Butler, Brian S. and Bateman, Patrick J. and Gray, Peter H. and Diamant, E. Ilana}, + date = {2014-09}, + journaltitle = {MIS Q.}, + volume = {38}, + number = {3}, + pages = {699--728}, + issn = {0276-7783}, + abstract = {Online discussion communities play an important role in the development of relationships and the transfer of knowledge within and across organizations. Their underlying technologies enhance these processes by providing infrastructures through which group-based communication can occur. Community administrators often make decisions about technologies with the goal of enhancing the user experience, but the impact of such decisions on how a community develops must also be considered. To shed light on this complex and under-researched phenomenon, we offer a model of key latent constructs influenced by technology choices and possible causal paths by which they have dynamic effects on communities. Two important community characteristics that can be impacted are community size (number of members) and community resilience (membership that is willing to remain involved with the community in spite of variability and change in the topics discussed). To model community development, we build on attraction-selection-attrition (ASA) theory, introducing two new concepts: participation costs (how much time and effort are required to engage with content provided in a community) and topic consistency cues (how strongly a community signals that topics that may appear in the future will be consistent with what it has hosted in the past). We use the proposed ASA theory of online communities (OCASA) to develop a simulation model of community size and resilience that affirms some conventional wisdom and also has novel and counterintuitive implications. Analysis of the model leads to testable new propositions about the causal paths by which technology choices affect the emergence of community size and community resilience, and associated implications for community sustainability.}, + file = {/home/nathante/Zotero/storage/292C8XTF/Butler et al. - 2014 - An Attraction-selection-attrition Theory of Online.pdf} +} + +@article{butler_cross-purposes_2011, + title = {The Cross-Purposes of Cross-Posting: Boundary Reshaping Behavior in Online Discussion Communities}, + shorttitle = {The Cross-Purposes of Cross-Posting}, + author = {Butler, Brian S. and Wang, Xiaoqing}, + date = {2011-09-15}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {23}, + pages = {993--1010}, + issn = {1047-7047}, + abstract = {Increasingly, online discussion communities are used to support activities ranging from software development to political campaigns. An important feature of an online discussion community is its content boundaries, which are individual perceptions of what materials and discussions are part of the community and what are not, and how that community is related to others within a larger system. Yet in spite of its importance, many community infrastructures allow individual participants to reshape content boundaries by simultaneously associating their contributions with multiple online discussion communities. This reshaping behavior is a controversial aspect of the creation and management of many types of online discussion communities. On one hand, many communities explicitly discourage boundary reshaping behaviors in their frequently asked questions or terms-of-use document. On the other hand, community infrastructures continue to allow such reshaping behaviors. To explain this controversy, we theorize how the extent of boundary reshaping in an online discussion community has simultaneously positive and negative effects on its member dynamics and responsiveness. We test predictions about the conflicting effects of reshaping behaviors with 60 months of longitudinal data from 140 USENET newsgroups, focusing on cross-posting activities as a form of reshaping behavior. Empirical results are consistent with the proposed hypotheses that reshaping behaviors within a discussion community affect member dynamics and community responsiveness in both positive and negative ways. Taken together, the findings highlight the boundary-related design challenges faced by managers seeking to support ongoing activity within online discussion communities.}, + issue = {3-part-2}, + file = {/home/nathante/Zotero/storage/MHIHVXMA/Butler and Wang - 2012 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/ZDTPFJP3/Butler and Wang - 2011 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/5XCPFJS9/isre.1110.html} +} + +@article{butler_membership_2001, + title = {Membership Size, Communication Activity, and Sustainability: {{A}} Resource-Based Model of Online Social Structures}, + shorttitle = {Membership {{Size}}, {{Communication Activity}}, and {{Sustainability}}}, + author = {Butler, Brian S.}, + date = {2001}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {12}, + number = {4}, + eprint = {23011457}, + eprinttype = {jstor}, + pages = {346--362}, + issn = {1047-7047}, + abstract = {As telecommunication networks become more common, there is an increasing interest in the factors underlying the development of online social structures. It has been proposed that these structures are new forms of organizing which are not subject to the same constraints as traditional social structures. However, from anecdotal evidence and case studies it is difficult to evaluate whether online social structures are subject to the same problems as traditional social structures. Drawing from prior studies of traditional social structures and empirical analyses of longitudinal data from a sample of Internet-based groups, this exploratory work considers the role of size and communication activity in sustainable online social structures. A resource-based theory of sustainable social structures is presented. Members contribute time, energy, and other resources, enabling a social structure to provide benefits for individuals. These benefits, which include information, influence, and social support, are the basis for a social structure's ability to attract and retain members. This model focuses on the system of opposing forces that link membership size as a component of resource availability and communication activity as an aspect of benefit provision to the sustainability of an online social structure. Analyses of data from a random sample of e-mail-based Internet social structures (listservs) indicate that communication activity and size have both positive and negative effects on a structure's sustainability. These results suggest that while the use of networked communication technologies may alter the form of communication, balancing the opposing impacts of membership size and communication activity in order to maintain resource availability and provide benefits for current members remains a fundamental problem underlying the development of sustainable online social structures.}, + file = {/home/nathante/Zotero/storage/4ENNLMAH/Butler - 2001 - Membership Size, Communication Activity, and Susta.pdf;/home/nathante/Zotero/storage/U7AUNAZT/Butler-2001-ISR-Membership_size_communication_activitiy_sustainability.pdf} +} + +@incollection{canova_bayesian_2007, + title = {Bayesian {{VARs}}}, + booktitle = {Methods for {{Applied Macroeconomic Research}}}, + author = {Canova, Fabio}, + date = {2007}, + eprint = {j.ctvcm4hrv.13}, + eprinttype = {jstor}, + pages = {373--417}, + publisher = {{Princeton University Press}}, + abstract = {We saw in chapter 4 that VAR models can be used to characterize any vector of time series under a minimal set of conditions. We have also seen that, since VARs are reduced-form models, identification restrictions, motivated by economic theory, are needed to conduct meaningful policy analyses. Reduced-form VARs are also typically unsuitable for out-of-sample forecasting. To reasonably approximate the Wold representation, it is in fact necessary to have a VAR with long lags. A generous parametrization means that unrestricted VARs are not operational alternatives to either standard macroeconometric models, where insignificant coefficients are purged out of the specification, or}, + isbn = {978-0-691-11504-7}, + file = {/home/nathante/Zotero/storage/PGQG5UX9/Canova - 2007 - Bayesian VARs.pdf} +} + +@incollection{canova_var_2007, + title = {{{VAR Models}}}, + booktitle = {Methods for {{Applied Macroeconomic Research}}}, + author = {Canova, Fabio}, + date = {2007}, + eprint = {j.ctvcm4hrv.7}, + eprinttype = {jstor}, + pages = {111--164}, + publisher = {{Princeton University Press}}, + abstract = {This chapter describes a set of techniques which stand apart from those considered in the next three chapters, in the sense that economic theory is only minimally used in the inferential process. VAR models, pioneered by Chris Sims about 25 years ago, have acquired a permanent place in the toolkit of applied macroeconomists, both to summarize the information contained in the data and to conduct certain types of policy experiments. VAR models are well-suited to the first purpose: the Wold theorem ensures that any vector of time series has a VAR representation under mild regularity conditions and this makes them}, + isbn = {978-0-691-11504-7}, + file = {/home/nathante/Zotero/storage/ZQYCMAPQ/Canova - 2007 - VAR Models.pdf} +} + +@article{carpenter_stan:_2016, + title = {Stan: {{A}} Probabilistic Programming Language}, + shorttitle = {Stan}, + author = {Carpenter, Bob and Gelman, Andrew and Hoffman, Matt and Lee, Daniel and Goodrich, Ben and Betancourt, Michael and Brubaker, Michael A. and Guo, Jiqiang and Li, Peter and Riddell, Allen}, + date = {2016}, + journaltitle = {Journal of Statistical Software}, + volume = {20}, + number = {2}, + pages = {1--37}, + file = {/home/nathante/Zotero/storage/2L4LAHJ2/Stan - Probabilistic Programming Language.pdf} +} + +@article{carroll_concentration_1985, + title = {Concentration and Specialization: {{Dynamics}} of Niche Width in Populations of Organizations}, + shorttitle = {Concentration and {{Specialization}}}, + author = {Carroll, Glenn R.}, + date = {1985-05-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {90}, + number = {6}, + pages = {1262--1283}, + issn = {0002-9602}, + abstract = {This paper departs from the common practice of focusing on large, generalist organizations and shows that new organizational insights are obtined by adopting a broader, ecological perspective. The newspaper publishing industry is examined as an illustration. The ecological focus shows that many small, specialized organizations operate successfully in this industry, despite apparently high levels of local concentration. A resource-partitioning model is advanced to explain the interorganizational relationships between generalist and specialist organizations. Statistical tests of the model using historical data on 2,808 American local newspaper organizations show the merit of using the ecological perspective for analyzing industries.}, + file = {/home/nathante/Zotero/storage/G38AK5SZ/Carroll - 1985 - Concentration and specialization Dynamics of nich.pdf;/home/nathante/Zotero/storage/8PG3QCP3/228210.html} +} + +@article{carroll_density_1989, + title = {Density Dependence in the Evolution of Populations of Newspaper Organizations}, + author = {Carroll, Glenn R. and Hannan, Michael T.}, + date = {1989-08}, + journaltitle = {American Sociological Review}, + volume = {54}, + number = {4}, + eprint = {2095875}, + eprinttype = {jstor}, + pages = {524}, + issn = {00031224}, + file = {/home/nathante/Zotero/storage/TCCRW99U/DensitiyDependenceInNewspaperOrg_Carroll_Hannan_1989.pdf} +} + +@article{cenci_regularized_2019, + title = {Regularized {{S}}-Map for Inference and Forecasting with Noisy Ecological Time Series}, + author = {Cenci, Simone and Sugihara, George and Saavedra, Serguei}, + date = {2019}, + journaltitle = {Methods in Ecology and Evolution}, + volume = {10}, + number = {5}, + pages = {650--660}, + issn = {2041-210X}, + abstract = {It is well known that fluctuations of species abundances observed in ecological time series emerge from an interplay between deterministic nonlinear dynamics and stochastic forces. Importantly, nonlinearity and stochasticity introduce significant challenges to the analysis of ecological time series, such as the inference of the effect of species interactions on community dynamics and forecasting of species abundances. Local linear fits with state-space-dependent kernel functions, known as S-maps, provide an efficient method to infer Jacobian coefficients (a proxy for the local effect of species interactions) and to make reliable forecasts from nonlinear time series. Yet, while it has been shown that the S-map outperforms existing methods for nonparametric inference and forecasting, the methodology is sensitive to process noise. To overcome this limitation, we integrate the S-map with different regularization schemes. To validate our approach, we test our methodology against different levels of noise and nonlinearity using three standard population dynamics models. We show that an appropriate choice of the regularization scheme, alongside an accurate choice of the kernel functions, can significantly improve the in-sample inference of Jacobian coefficients and the out-of-sample forecast of species abundances in the presence of process noise. We further validate our methodology using two empirical time series of marine microbial communities. Our results illustrate that the regularized S-map is an efficient method for nonparametric inference and forecasting from noisy, nonlinear, ecological time series. Yet, attention must be paid on the regularization scheme and the structure of the kernel for whether inference or forecasting is the ultimate goal of a research study.}, + langid = {english}, + keywords = {_tablet,nonlinear time series,out-of-sample forecast,parameter inference,process noise,regularization,S-map}, + annotation = {\_eprint: https://besjournals.onlinelibrary.wiley.com/doi/pdf/10.1111/2041-210X.13150}, + file = {/home/nathante/Zotero/storage/J9VXK8CH/Cenci et al_2019_Regularized S-map for inference and forecasting with noisy ecological time.pdf;/home/nathante/Zotero/storage/WTD6LD6D/2041-210X.html} +} + +@article{certain_how_2018, + title = {How Do {{MAR}}(1) Models Cope with Hidden Nonlinearities in Ecological Dynamics?}, + author = {Certain, Grégoire and Barraquand, Frédéric and Gårdmark, Anna}, + date = {2018-09-01}, + journaltitle = {Methods in Ecology and Evolution}, + shortjournal = {Methods in Ecology and Evolution}, + volume = {9}, + number = {9}, + pages = {1975--1995}, + issn = {2041-210X}, + abstract = {Abstract Multivariate autoregressive (MAR) models are an increasingly popular technique to infer interaction strengths between species in a community and to predict the community response to environmental change. The most commonly employed MAR(1) models, with one time lag, can be viewed either as multispecies competition models with Gompertz density dependence or, more generally, as a linear approximation of more complex, nonlinear dynamics around stable equilibria. This latter interpretation allows for broader applicability, but may come at a cost in terms of interpretation of estimates and reliability of both short- and long-term predictions. We investigate what these costs might be by fitting MAR(1) models to simulated 2-species competition, consumer-resource and host?parasitoid systems, as well as a larger food web influenced by the environment. We review how MAR(1) coefficients can be interpreted and evaluate how reliable are estimates of interaction strength, rank, or sign; accuracy of short-term forecasts; as well as the ability of MAR(1) models to predict the long-term responses of communities submitted to environmental change such as PRESS perturbations. The net effects of species j on species i are usually (90\%-95\%) well recovered in terms of sign or rank, with the notable exception of overcompensatory dynamics. In actual values, net effects of species j on species i are not well recovered when the underlying dynamics are nonlinear. MAR(1) models are better at making short-term qualitative forecasts (next point going up or down) than at predicting long-term responses to environmental perturbations, which can be severely over- as well as underestimated. We conclude that when applying MAR(1) models to ecological data, inferences on net effects among species should be limited to signs, or the Gompertz assumption should be tested and discussed. This particular assumption on density-dependence (log-linearity) is also required for unbiased long-term predictions. Overall, we think that MAR(1) models are highly useful tools to resolve and characterize community dynamics, but we recommend to use them in conjunction with alternative, nonlinear models resembling the ecological context in order to improve their interpretation in specific applications.}, + file = {/home/nathante/Zotero/storage/PHIQKS3T/2041-210X.html} +} + +@article{chandrasekharan_internets_2018, + title = {The Internet's Hidden Rules: {{An}} Empirical Study of Reddit Norm Violations at Micro, Meso, and Macro Scales}, + shorttitle = {The {{Internet}}'s {{Hidden Rules}}}, + author = {Chandrasekharan, Eshwar and Samory, Mattia and Jhaver, Shagun and Charvat, Hunter and Bruckman, Amy and Lampe, Cliff and Eisenstein, Jacob and Gilbert, Eric}, + date = {2018}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {32:1--32:25}, + issn = {2573-0142}, + abstract = {Norms are central to how online communities are governed. Yet, norms are also emergent, arise from interaction, and can vary significantly between communities---making them challenging to study at scale. In this paper, we study community norms on Reddit in a large-scale, empirical manner. Via 2.8M comments removed by moderators of 100 top subreddits over 10 months, we use both computational and qualitative methods to identify three types of norms: macro norms that are universal to most parts of Reddit; meso norms that are shared across certain groups of subreddits; and micro norms that are specific to individual, relatively unique subreddits. Given the size of Reddit's user base---and the wide range of topics covered by different subreddits---we argue this represents the first large-scale census of the norms in broader internet culture. In other words, these findings shed light on what Reddit values, and how widely-held those values are. We conclude by discussing implications for the design of new and existing online communities.}, + issue = {CSCW}, + keywords = {community norms,mixed methods.,moderation,online communities}, + file = {/home/nathante/Zotero/storage/2CA9ZVFB/Chandrasekharan et al. - 2018 - The Internet's Hidden Rules An Empirical Study of.pdf;/home/nathante/Zotero/storage/HUP7XT5H/Chandrasekharan et al_2018_The Internet's Hidden Rules.pdf} +} + +@article{chandrasekharan_you_2017, + ids = {chandrasekharan_you_2017-1}, + title = {You Can't Stay Here: {{The}} Efficacy of Reddit's 2015 Ban Examined through Hate Speech}, + shorttitle = {You Can't Stay Here}, + author = {Chandrasekharan, Eshwar and Pavalanathan, Umashanthi and Srinivasan, Anirudh and Glynn, Adam and Eisenstein, Jacob and Gilbert, Eric}, + date = {2017-12}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {31:1--31:22}, + issn = {2573-0142}, + abstract = {In 2015, Reddit closed several subreddits-foremost among them r/fatpeoplehate and r/CoonTown-due to violations of Reddit's anti-harassment policy. However, the effectiveness of banning as a moderation approach remains unclear: banning might diminish hateful behavior, or it may relocate such behavior to different parts of the site. We study the ban of r/fatpeoplehate and r/CoonTown in terms of its effect on both participating users and affected subreddits. Working from over 100M Reddit posts and comments, we generate hate speech lexicons to examine variations in hate speech usage via causal inference methods. We find that the ban worked for Reddit. More accounts than expected discontinued using the site; those that stayed drastically decreased their hate speech usage-by at least 80\%. Though many subreddits saw an influx of r/fatpeoplehate and r/CoonTown "migrants," those subreddits saw no significant changes in hate speech usage. In other words, other subreddits did not inherit the problem. We conclude by reflecting on the apparent success of the ban, discussing implications for online moderation, Reddit and internet communities more broadly.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/5Z8CCRM2/Chandrasekharan et al. - 2017 - You Can'T Stay Here The Efficacy of Reddit's 2015.pdf} +} + +@book{charmaz_constructing_2015, + ids = {charmaz_constructing_2014}, + title = {Constructing Grounded Theory: {{A}} Practical Guide through Qualitative Analysis}, + shorttitle = {Constructing {{Grounded Theory}}}, + author = {Charmaz, Kathy}, + date = {2015}, + edition = {2}, + publisher = {{SAGE}}, + location = {{Thousand Oaks, California}}, + isbn = {0-7619-7352-4} +} + +@article{chen_impact_2019, + title = {The {{Impact}} of {{Media Censorship}}: 1984 or {{Brave New World}}?}, + shorttitle = {The {{Impact}} of {{Media Censorship}}}, + author = {Chen, Yuyu and Yang, David Y.}, + date = {2019-06}, + journaltitle = {American Economic Review}, + volume = {109}, + number = {6}, + pages = {2294--2332}, + issn = {0002-8282}, + abstract = {Media censorship is a hallmark of authoritarian regimes. We conduct a field experiment in China to measure the effects of providing citizens with access to an uncensored internet. We track subjects' media consumption, beliefs regarding the media, economic beliefs, political attitudes, and behaviors over 18 months. We find four main results: (i) free access alone does not induce subjects to acquire politically sensitive information; (ii) temporary encouragement leads to a persistent increase in acquisition, indicating that demand is not permanently low; (iii) acquisition brings broad, substantial, and persistent changes to knowledge, beliefs, attitudes, and intended behaviors; and (iv) social transmission of information is statistically significant but small in magnitude. We calibrate a simple model to show that the combination of low demand for uncensored information and the moderate social transmission means China's censorship apparatus may remain robust to a large number of citizens receiving access to an uncensored internet.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/DI644H6E/Chen and Yang - 2019 - The Impact of Media Censorship 1984 or Brave New .pdf;/home/nathante/Zotero/storage/FZC97WCG/Chen and Yang - 2019 - The Impact of Media Censorship 1984 or Brave New .pdf;/home/nathante/Zotero/storage/95EW4R3G/articles.html} +} + +@inproceedings{choudhury_social_2016, + title = {Social {{Media Participation}} in an {{Activist Movement}} for {{Racial Equality}}}, + booktitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Choudhury, Munmun De and Jhaver, Shagun and Sugar, Benjamin and Weber, Ingmar}, + date = {2016-03-31}, + abstract = {From the Arab Spring to the Occupy Movement, social media has been instrumental in driving and supporting socio-political movements throughout the world. In this paper, we present one of the first social media investigations of an activist movement around racial discrimination and police violence, known as “Black Lives Matter”. Considering Twitter as a sensor for the broader community’s perception of the events related to the movement, we study participation over time, the geographical differences in this participation, and its relationship to protests that unfolded on the ground. We find evidence for continued participation across four temporally separated events related to the movement, with notable changes in engagement and language over time. We also find that participants from regions of historically high rates of black victimization due to police violence tend to express greater negativity and make more references to loss of life. Finally, we observe that social media attributes of affect, behavior and language can predict future protest participation on the ground. We discuss the role of social media in enabling collective action around this unique movement and how social media platforms may help understand perceptions on a socially contested and sensitive issue like race.}, + eventtitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FF8RRANF/De Choudhury et al_2016_Social Media Participation in an Activist Movement for Racial Equality.pdf;/home/nathante/Zotero/storage/PIRFXX7F/Choudhury et al_2016_Social Media Participation in an Activist Movement for Racial Equality.pdf;/home/nathante/Zotero/storage/ZBVVGIXA/De Choudhury et al_2016_Social Media Participation in an Activist Movement for Racial Equality.pdf;/home/nathante/Zotero/storage/6XM9W7ZH/13168.html;/home/nathante/Zotero/storage/HIXCPVI3/13168.html;/home/nathante/Zotero/storage/SF7VFSH4/13168.html} +} + +@article{copland_reddit_2020, + title = {Reddit Quarantined: Can Changing Platform Affordances Reduce Hateful Material Online?}, + shorttitle = {Reddit Quarantined}, + author = {Copland, Simon}, + date = {2020-10-21}, + journaltitle = {Internet Policy Review}, + volume = {9}, + number = {4}, + publisher = {{Berlin: Alexander von Humboldt Institute for Internet and Society}}, + issn = {2197-6775}, + abstract = {Can we reduce hateful material online through changing platform affordances? Studying Reddit’s quarantine function, this paper argues the results of this approach are mixed.}, + file = {/home/nathante/Zotero/storage/KY4RZWR4/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/SZWA55IE/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/9KXC37K7/225653.html;/home/nathante/Zotero/storage/M6NKY3K2/reddit-quarantined-can-changing-platform-affordances-reduce-hateful-material.html} +} + +@inproceedings{cunha_are_2019, + ids = {cunha_are_2019-1,cunha_are_2019-2}, + title = {Are All Successful Communities Alike? {{Characterizing}} and Predicting the Success of Online Communities}, + shorttitle = {Are All Successful Communities Alike?}, + booktitle = {The {{World Wide Web Conference}}}, + author = {Cunha, Tiago and Jurgens, David and Tan, Chenhao and Romero, Daniel}, + date = {2019-05-13}, + series = {{{WWW}} '19}, + pages = {318--328}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {The proliferation of online communities has created exciting opportunities to study the mechanisms that explain group success. While a growing body of research investigates community success through a single measure - typically, the number of members - we argue that there are multiple ways of measuring success. Here, we present a systematic study to understand the relations between these success definitions and test how well they can be predicted based on community properties and behaviors from the earliest period of a community's lifetime. We identify four success measures that are desirable for most communities: (i) growth in the number of members; (ii) retention of members; (iii) long term survival of the community; and (iv) volume of activities within the community. Surprisingly, we find that our measures do not exhibit very high correlations, suggesting that they capture different types of success. Additionally, we find that different success measures are predicted by different attributes of online communities, suggesting that success can be achieved through different behaviors. Our work sheds light on the basic understanding on what success represents in online communities and what predicts it. Our results suggest that success is multi-faceted and cannot be measured nor predicted by a single measurement. This insight has practical implications for the creation of new online communities and the design of platforms that facilitate such communities.}, + isbn = {978-1-4503-6674-8}, + keywords = {Group Dynamics,Online Communities,Reddit,Success}, + file = {/home/nathante/Zotero/storage/CGBFCUGX/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/IYW3WKHV/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/PFS6682S/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/SMX88EL3/Cunha et al. - 2019 - Are All Successful Communities Alike Characterizi.pdf} +} + +@inproceedings{dabbish_fresh_2012, + ids = {dabbish_fresh_2012-1}, + title = {Fresh Faces in the Crowd: Turnover, Identity, and Commitment in Online Groups}, + shorttitle = {Fresh Faces in the Crowd}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Dabbish, Laura and Farzan, Rosta and Kraut, Robert and Postmes, Tom}, + date = {2012-02-11}, + series = {{{CSCW}} '12}, + pages = {245--248}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Turnover is commonplace in many online groups because of low barriers of entry and exit. In offline settings, turnover can have a negative impact because of reduced attachment to the group as an entity. However, in an online setting, turnover in terms of changes in the visible membership of a group may have a very different impact. Online only a limited amount of information about members and their activities is observable; in particular, it is easier to see the behavior of the subset of members who are active than the potentially larger set who are not. In this paper, we describe an experiment examining the influence of visible membership turnover on commitment to an online group. Our results suggest that increased turnover in an online group may increase social presence, creating perceptions of liveness, in turn leading to increased levels of participation in the group. However, this result holds primarily for groups with a common identity, suggesting that attention to behavior of others may be stronger when people share an identity with those others. Our results extend understandings of attachment in an online setting as well as theory about social tuning.}, + isbn = {978-1-4503-1086-4}, + keywords = {attachment,commitment,identity.,online groups,turnover}, + file = {/home/nathante/Zotero/storage/3IQQP4JM/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf;/home/nathante/Zotero/storage/GEVF3A53/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf} +} + +@inproceedings{danescu-niculescu-mizil_no_2013, + ids = {danescu-niculescu-mizil_no_2013-1}, + title = {No Country for Old Members: User Lifecycle and Linguistic Change in Online Communities}, + shorttitle = {No Country for Old Members}, + booktitle = {Proceedings of the 22nd International Conference on {{World Wide Web}} - {{WWW}} '13}, + author = {Danescu-Niculescu-Mizil, Cristian and West, Robert and Jurafsky, Dan and Leskovec, Jure and Potts, Christopher}, + date = {2013}, + pages = {307--318}, + publisher = {{ACM Press}}, + location = {{Rio de Janeiro, Brazil}}, + abstract = {Vibrant online communities are in constant flux. As members join and depart, the interactional norms evolve, stimulating further changes to the membership and its social dynamics. Linguistic change—in the sense of innovation that becomes accepted as the norm—is essential to this dynamic process: it both facilitates individual expression and fosters the emergence of a collective identity. We propose a framework for tracking linguistic change as it happens and for understanding how specific users react to these evolving norms. By applying this framework to two large online communities we show that users follow a determined two-stage lifecycle with respect to their susceptibility to linguistic change: a linguistically innovative learning phase in which users adopt the language of the community followed by a conservative phase in which users stop changing and the evolving community norms pass them by.}, + eventtitle = {The 22nd International Conference}, + isbn = {978-1-4503-2035-1}, + langid = {english}, + venue = {Rio de Janeiro, Brazil}, + file = {/home/nathante/Zotero/storage/L532IPRV/Danescu-Niculescu-Mizil et al. - 2013 - No Country for Old Members User Lifecycle and Lin.pdf;/home/nathante/Zotero/storage/LWECW2QM/Danescu-Niculescu-Mizil et al. - 2013 - No country for old members user lifecycle and lin.pdf} +} + +@article{datta_extracting_2019, + title = {Extracting {{Inter}}-{{Community Conflicts}} in {{Reddit}}}, + author = {Datta, Srayan and Adar, Eytan}, + date = {2019-07-06}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {13}, + pages = {146--157}, + issn = {2334-0770}, + abstract = {Anti-social behaviors in social media can happen both at user and community levels. While a great deal of attention is on the individual as an ‘aggressor,’ the banning of entire Reddit subcommunities (i.e., subreddits) demonstrates that this is a multi-layer concern. Existing research on inter-community conflict has largely focused on specific subcommunities or ideological opponents. However, antagonistic behaviors may be more pervasive and integrate into the broader network. In this work, we study the landscape of conflicts among subreddits by deriving higher-level (community) behaviors from the way individuals are sanctioned and rewarded. By constructing a conflict network, we characterize different patterns in subreddit-to-subreddit conflicts as well as communities of ‘co-targeted’ subreddits .The dynamics of these interactions also reveals a shift in conflict focus over time.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6IA9VN8K/Datta_Adar_2019_Extracting Inter-Community Conflicts in Reddit.pdf;/home/nathante/Zotero/storage/F3MHZ7Z6/3217.html} +} + +@article{datta_identifying_2017, + title = {Identifying {{Misaligned Inter}}-{{Group Links}} and {{Communities}}}, + author = {Datta, Srayan and Phelan, Chanda and Adar, Eytan}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {37:1--37:23}, + abstract = {Many social media systems explicitly connect individuals (e.g., Facebook or Twitter); as a result, they are the targets of most research on social networks. However, many systems do not emphasize or support explicit linking between people (e.g., Wikipedia or Reddit), and even fewer explicitly link communities. Instead, network analysis is performed through inference on implicit connections, such as co-authorship or text similarity. Depending on how inference is done and what data drove it, different networks may emerge. While correlated structures often indicate stability, in this work we demonstrate that differences, or misalignment, between inferred networks also capture interesting behavioral patterns. For example, high-text but low-author similarity often reveals communities "at war" with each other over an issue or high-author but low-text similarity can suggest community fragmentation. Because we are able to model edge direction, we also find that asymmetry in degree (in-versus-out) co-occurs with marginalized identities (subreddits related to women, people of color, LGBTQ, etc.). In this work, we provide algorithms that can identify misaligned links, network structures and communities. We then apply these techniques to Reddit to demonstrate how these algorithms can be used to decipher inter-group dynamics in social media.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/52FT8LT8/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf;/home/nathante/Zotero/storage/WKCJHV6R/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf} +} + +@article{de_choudhury_mental_2014, + title = {Mental Health Discourse on Reddit: Self-Disclosure, Social Support, and Anonymity}, + shorttitle = {Mental Health Discourse on Reddit}, + author = {De Choudhury, Munmun and De, Sushovan}, + date = {2014-05-16}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + shortjournal = {ICWSM}, + volume = {8}, + number = {1}, + pages = {71--80}, + issn = {2334-0770}, + issue = {1}, + langid = {english}, + keywords = {disinhibition}, + file = {/home/nathante/Zotero/storage/KNC2AQLL/Choudhury and De - 2014 - Mental Health Discourse on reddit Self-Disclosure.pdf;/home/nathante/Zotero/storage/GE2HKJ48/14526.html} +} + +@online{del_tredici_semantic_2018, + title = {Semantic {{Variation}} in {{Online Communities}} of {{Practice}}}, + author = {Del Tredici, Marco and Fernández, Raquel}, + date = {2018-06-15}, + eprint = {1806.05847}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {We introduce a framework for quantifying semantic variation of common words in Communities of Practice and in sets of topic-related communities. We show that while some meaning shifts are shared across related communities, others are community-specific, and therefore independent from the discussed topic. We propose such findings as evidence in favour of sociolinguistic theories of socially-driven semantic variation. Results are evaluated using an independent language modelling task. Furthermore, we investigate extralinguistic features and show that factors such as prominence and dissemination of words are related to semantic variation.}, + archiveprefix = {arXiv}, + langid = {english}, + file = {/home/nathante/Zotero/storage/E62NF57M/Del Tredici and Fernández - 2018 - Semantic Variation in Online Communities of Practi.pdf} +} + +@article{dimaggio_social_2001, + title = {Social Implications of the {{Internet}}}, + author = {DiMaggio, Paul and Hargittai, Eszter and Neuman, W. Russell and Robinson, John P.}, + date = {2001-08}, + journaltitle = {Annual Review of Sociology}, + volume = {27}, + number = {1}, + pages = {307--336}, + abstract = {The Internet is a critically important research site for sociologists testing theories of technology diffusion and media effects, particularly because it is a medium uniquely capable of integrating modes of communication and forms of content. Current research tends to focus on the Internet's implications in five domains: 1) inequality (the “digital divide”); 2) community and social capital; 3) political participation; 4) organizations and other economic institutions; and 5) cultural participation and cultural diversity. A recurrent theme across domains is that the Internet tends to complement rather than displace existing media and patterns of behavior. Thus in each domain, utopian claims and dystopic warnings based on extrapolations from technical possibilities have given way to more nuanced and circumscribed understandings of how Internet use adapts to existing patterns, permits certain innovations, and reinforces particular kinds of change. Moreover, in each domain the ultimate social implications of this new technology depend on economic, legal, and policy decisions that are shaping the Internet as it becomes institutionalized. Sociologists need to study the Internet more actively and, particularly, to synthesize research findings on individual user behavior with macroscopic analyses of institutional and political-economic factors that constrain that behavior.}, + file = {/home/nathante/Zotero/storage/DQUKUVBM/DiMaggio et al. - 2001 - Social implications of the internet.pdf} +} + +@article{dobrev_dynamics_2001, + title = {Dynamics of Niche Width and Resource Partitioning}, + author = {Dobrev, Stanislav D. and Kim, Tai‐Young and Hannan, Michael T.}, + date = {2001}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {106}, + number = {5}, + eprint = {10.1086/320821}, + eprinttype = {jstor}, + pages = {1299--1337}, + issn = {0002-9602}, + abstract = {This article examines the effects of crowding in a market center on rates of change in organizational niche width and on organizational mortality. It proposes that, although firms with wide niches benefit from risk spreading and economies of scale, they are simultaneously exposed to intense competition. An analysis of organizational dynamics in automobile manufacturing firms in France, Germany, and Great Britain shows that competitive pressure not only increases the hazard of disbanding but also prompts organizational transformations that give rise to processes of resource partitioning. Emphasizing the content/process distinction in conceptualizing organizational change, the article finds that the process effect of changes in niche width and position increases mortality hazards. We discuss our findings in light of the processes investigated by the ecological theories of density dependence, resource partitioning, and structural inertia, and point to the theoretical links that help to integrate these theories.}, + file = {/home/nathante/Zotero/storage/7HQIXSCS/Dobrev et al. - 2001 - Dynamics of niche width and resource partitioning.pdf} +} + +@inproceedings{ducheneaut_alone_2006, + title = {"{{Alone}} Together?": Exploring the Social Dynamics of Massively Multiplayer Online Games}, + shorttitle = {"{{Alone Together}}?}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Ducheneaut, Nicolas and Yee, Nicholas and Nickell, Eric and Moore, Robert J.}, + date = {2006}, + series = {{{CHI}} '06}, + pages = {407--416}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Massively Multiplayer Online Games (MMOGs) routinely attract millions of players but little empirical data is available to assess their players' social experiences. In this paper, we use longitudinal data collected directly from the game to examine play and grouping patterns in one of the largest MMOGs: World of Warcraft. Our observations show that the prevalence and extent of social activities in MMOGs might have been previously over-estimated, and that gaming communities face important challenges affecting their cohesion and eventual longevity. We discuss the implications of our findings for the design of future games and other online social spaces.}, + isbn = {978-1-59593-372-0}, + keywords = {activity metrics,massively multiplayer online games,MUDs,Online Communities,quantitative,social dynamics}, + file = {/home/nathante/Zotero/storage/SXB825WY/Ducheneaut et al. - 2006 - Alone together exploring the social dynamics o.pdf;/home/nathante/Zotero/storage/ZMRDXAKE/Ducheneaut et al. - 2006 - Alone Together Exploring the Social Dynamics o.pdf} +} + +@article{dumais_latent_2004, + title = {Latent Semantic Analysis}, + author = {Dumais, Susan T.}, + date = {2004}, + journaltitle = {Annual Review of Information Science and Technology}, + volume = {38}, + number = {1}, + pages = {188--230}, + issn = {1550-8382}, + langid = {english}, + annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/aris.1440380105}, + file = {/home/nathante/Zotero/storage/FJRA3VVC/Dumais_2004_Latent semantic analysis.pdf;/home/nathante/Zotero/storage/Y6WQY5QL/aris.html} +} + +@article{fiesler_moving_2020, + ids = {fiesler_moving_2020-1,fiesler_moving_2020-2}, + title = {Moving {{Across Lands}}: {{Online Platform Migration}} in {{Fandom Communities}}}, + shorttitle = {Moving {{Across Lands}}}, + author = {Fiesler, Casey and Dym, Brianna}, + date = {2020-05-28}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {4}, + pages = {042:1--042:25}, + abstract = {When online platforms rise and fall, sometimes communities fade away, and sometimes they pack their bags and relocate to a new home. To explore the causes and effects of online community migration, we examine transformative fandom, a longstanding, technology-agnostic community surrounding the creation, sharing, and discussion of creative works based on existing media. For over three decades, community members have left and joined many different online spaces, from Usenet to Tumblr to platforms of their own design. Through analysis of 28 in-depth interviews and 1,886 survey responses from fandom participants, we traced these migrations, the reasons behind them, and their impact on the community. Our findings highlight catalysts for migration that provide insights into factors that contribute to success and failure of platforms, including issues surrounding policy, design, and community. Further insights into the disruptive consequences of migrations (such as social fragmentation and lost content) suggest ways that platforms might both support commitment and better support migration when it occurs.}, + issue = {CSCW1}, + file = {/home/nathante/Zotero/storage/ER8P5AJ2/Fiesler_Dym_2020_Moving Across Lands.pdf;/home/nathante/Zotero/storage/JHDILSYU/Fiesler and Dym - 2020 - Moving Across Lands Online Platform Migration in .pdf} +} + +@inproceedings{fiesler_reddit_2018, + title = {Reddit Rules! {{Characterizing}} an Ecosystem of Governance.}, + booktitle = {Proceedings of the {{AAAI International Conference}} on {{Web}} and {{Social Media}}}, + author = {Fiesler, Casey and Jiang, Jialun" Aaron" and McCann, Joshua and Frye, Kyle and Brubaker, Jed R.}, + date = {2018}, + pages = {72--81}, + publisher = {{AAAI}}, + location = {{Stanford, CA}}, + eventtitle = {{{ICWSM}}}, + file = {/home/nathante/Zotero/storage/34TYXTGB/Fiesler - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/G9VFI2L7/Fiesler et al. - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/KT7KNG3J/Fiesler et al. - 2018 - Reddit rules! Characterizing an ecosystem of gover.pdf} +} + +@online{foote_how_2020, + title = {How Individual Behaviors Drive Inequality in Online Community Sizes: An Agent-Based Simulation}, + shorttitle = {How Individual Behaviors Drive Inequality in Online Community Sizes}, + author = {Foote, Jeremy and TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron}, + date = {2020-06-04}, + eprint = {2006.03119}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Why are online community sizes so extremely unequal? Most answers to this question have pointed to general mathematical processes drawn from physics like cumulative advantage. These explanations provide little insight into specific social dynamics or decisions that individuals make when joining and leaving communities. In addition, explanations in terms of cumulative advantage do not draw from the enormous body of social computing research that studies individual behavior. Our work bridges this divide by testing whether two influential social mechanisms used to explain community joining can also explain the distribution of community sizes. Using agent-based simulations, we evaluate how well individual-level processes of social exposure and decisions based on individual expected benefits reproduce empirical community size data from Reddit. Our simulations contribute to social computing theory by providing evidence that both processes together---but neither alone---generate realistic distributions of community sizes. Our results also illustrate the potential value of agent-based simulation to online community researchers to both evaluate and bridge individual and group-level theories.}, + archiveprefix = {arXiv}, + file = {/home/nathante/Zotero/storage/PMZDH4B2/Foote et al_2020_How individual behaviors drive inequality in online community sizes.pdf;/home/nathante/Zotero/storage/D57HFTGF/2006.html} +} + +@article{freeman_community_2006, + ids = {freeman_community_2006-1}, + title = {Community Ecology and the Sociology of Organizations}, + author = {Freeman, John H. and Audia, Pino G.}, + date = {2006}, + journaltitle = {Annual Review of Sociology}, + shortjournal = {Annual Review of Sociology}, + volume = {32}, + eprint = {29737735}, + eprinttype = {jstor}, + pages = {145--169}, + issn = {0360-0572}, + abstract = {Research on organizations is increasingly informed by analysis of community context. Community can be conceptualized as sets of relations between organizational forms or as places where organizations are located in resource space or in geography. In both modes, organizations operate interdependently with social institutions and with other units of social structure. Because such relationships channel flows of resources, opportunities are granted or withheld from social actors depending in part on their organization connections. Such considerations encourage analyses of organizations in ways that spread the relevance of results beyond organizationally defined research problem areas.}, + file = {/home/nathante/Zotero/storage/UT6RXR39/Freeman_Audia_2006_Community Ecology and the Sociology of Organizations.pdf} +} + +@article{freeman_liability_1983, + title = {The {{Liability}} of {{Newness}}: {{Age Dependence}} in {{Organizational Death Rates}}}, + shorttitle = {The {{Liability}} of {{Newness}}}, + author = {Freeman, John and Carroll, Glenn R. and Hannan, Michael T.}, + date = {1983}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {48}, + number = {5}, + eprint = {2094928}, + eprinttype = {jstor}, + pages = {692--710}, + issn = {0003-1224}, + abstract = {Age dependence in organizational death rates is studied using data on three populations of organizations: national labor unions, semiconductor electronics manufacturers, and newspaper publishing companies. There is a liability of newness in each of these populations but it differs depending on whether death occurs through dissolution or by absorption through merger. Liabilities of smallness and bigness are also identified but controlling for them does not eliminate age dependence.}, + file = {/home/nathante/Zotero/storage/CT32HPF2/Freeman et al. - 1983 - The Liability of Newness Age Dependence in Organi.pdf} +} + +@article{frey_clustering_2007, + title = {Clustering by {{Passing Messages Between Data Points}}}, + author = {Frey, Brendan J. and Dueck, Delbert}, + date = {2007-02-16}, + journaltitle = {Science}, + volume = {315}, + number = {5814}, + eprint = {17218491}, + eprinttype = {pmid}, + pages = {972--976}, + publisher = {{American Association for the Advancement of Science}}, + issn = {0036-8075, 1095-9203}, + abstract = {Clustering data by identifying a subset of representative examples is important for processing sensory signals and detecting patterns in data. Such “exemplars” can be found by randomly choosing an initial subset of data points and then iteratively refining it, but this works well only if that initial choice is close to a good solution. We devised a method called “affinity propagation,” which takes as input measures of similarity between pairs of data points. Real-valued messages are exchanged between data points until a high-quality set of exemplars and corresponding clusters gradually emerges. We used affinity propagation to cluster images of faces, detect genes in microarray data, identify representative sentences in this manuscript, and identify cities that are efficiently accessed by airline travel. Affinity propagation found clusters with much lower error than other methods, and it did so in less than one-hundredth the amount of time. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PVGJU5KN/Frey_Dueck_2007_Clustering by Passing Messages Between Data Points.pdf;/home/nathante/Zotero/storage/ERM5BMQT/972.html} +} + +@article{frey_emergence_2019, + title = {Emergence of Integrated Institutions in a Large Population of Self-Governing Communities}, + author = {Frey, Seth and Sumner, Robert W.}, + date = {2019-07-11}, + journaltitle = {PLOS ONE}, + shortjournal = {PLOS ONE}, + volume = {14}, + number = {7}, + pages = {e0216335}, + publisher = {{Public Library of Science}}, + issn = {1932-6203}, + abstract = {Most aspects of our lives are governed by large, highly developed institutions that integrate several governance tasks under one authority structure. But theorists differ as to the mechanisms that drive the development of such concentrated governance systems from rudimentary beginnings. Is the emergence of integrated governance schemes a symptom of consolidation of authority by small status groups? Or does integration occur because a complex institution has more potential responses to a complex environment? Here we examine the emergence of complex governance regimes in 5,000 sovereign, resource-constrained, self-governing online communities, ranging in scale from one to thousands of users. Each community begins with no community members and no governance infrastructure. As communities grow, they are subject to selection pressures that keep better managed servers better populated. We identify predictors of community success and test the hypothesis that governance complexity can enhance community fitness. We find that what predicts success depends on size: changes in complexity predict increased success with larger population servers. Specifically, governance rules in a large successful community are more numerous and broader in scope. They also tend to rely more on rules that concentrate power in administrators, and on rules that manage bad behavior and limited server resources. Overall, this work is consistent with theories that formal integrated governance systems emerge to organize collective responses to interdependent resource management problems, especially as factors such as population size exacerbate those problems.}, + langid = {english}, + keywords = {Community ecology,Computer software,Forests,Games,Internet,Online encyclopedias,Political theory,Resource management,Social psychology,Video games}, + file = {/home/nathante/Zotero/storage/AXDJPNKE/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/DA5HAVLH/Frey_Sumner_2019_Emergence of integrated institutions in a large population of self-governing.pdf;/home/nathante/Zotero/storage/Q3FI9DBS/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/4B26ZMHH/article.html;/home/nathante/Zotero/storage/4CRK5UUM/article.html;/home/nathante/Zotero/storage/8XFADRSX/article.html} +} + +@article{fulk_connective_1996, + title = {Connective and Communal Public Goods in Interactive Communication Systems}, + author = {Fulk, Janet and Flanagin, Andrew J. and Kalman, Michael E. and Monge, Peter R. and Ryan, Timothy}, + date = {1996}, + journaltitle = {Communication Theory}, + volume = {6}, + number = {1}, + pages = {60--87}, + issn = {1468-2885}, + abstract = {This paper extends theories of public goods to interactive communication systems. Two key public communication goods are identified. Connectivity provides point-to-point communication, and communality links members through commonly held information, such as that often found in databases. These extensions are important, we argue, because communication public goods operate differently from traditional material public goods. These differences have important implications for costs, benefits, and the realization of a critical mass of users that is necessary for realization of the good. We also explore multifunctional goods that combine various features and hybrid goods that link private goods to public ones. We examine the applicability of two key assumptions of public goods theory to interactive communication systems. First, jointness of supply specifies that consumption of a public good does not diminish its availability to others. Second, impossibility of exclusion stipulates that all members of the public have access to the good. We conclude with suggestions for further theoretical development.}, + langid = {english}, + keywords = {mantaining public goods}, + file = {/home/nathante/Zotero/storage/ZJVU4TGW/Fulk et al. - 1996 - Connective and communal public goods in interactiv.pdf;/home/nathante/Zotero/storage/8J5CPWLV/4259000.html} +} + +@inproceedings{geiger_using_2013, + title = {Using Edit Sessions to Measure Participation in {{Wikipedia}}}, + booktitle = {Proceedings of the 2013 Conference on {{Computer}} Supported Cooperative Work}, + author = {Geiger, R. Stuart and Halfaker, Aaron}, + date = {2013}, + pages = {861--870}, + publisher = {{ACM}}, + keywords = {activity,labor,labor-hours,peer production,quantitative methods,sessions,wikipedia,work,work practices}, + file = {/home/nathante/Zotero/storage/6EZ7WJ4T/Geiger and Halfaker - 2013 - Using edit sessions to measure participation in Wi.pdf;/home/nathante/Zotero/storage/9Z6ATSSC/cscw-sessions.pdf;/home/nathante/Zotero/storage/MFHWZS8R/Geiger and Halfaker - 2013 - Using Edit Sessions to Measure Participation in Wi.pdf;/home/nathante/Zotero/storage/VZ7BVKRW/citation.html} +} + +@article{gelman_why_2012, + title = {Why We (Usually) Don't Have to Worry about Multiple Comparisons}, + author = {Gelman, Andrew and Hill, Jennifer and Yajima, Masanao}, + date = {2012-04-01}, + journaltitle = {Journal of Research on Educational Effectiveness}, + volume = {5}, + number = {2}, + pages = {189--211}, + publisher = {{Routledge}}, + issn = {1934-5747}, + abstract = {Applied researchers often find themselves making statistical inferences in settings that would seem to require multiple comparisons adjustments. We challenge the Type I error paradigm that underlies these corrections. Moreover we posit that the problem of multiple comparisons can disappear entirely when viewed from a hierarchical Bayesian perspective. We propose building multilevel models in the settings where multiple comparisons arise. Multilevel models perform partial pooling (shifting estimates toward each other), whereas classical procedures typically keep the centers of intervals stationary, adjusting for multiple comparisons by making the intervals wider (or, equivalently, adjusting the p values corresponding to intervals of fixed width). Thus, multilevel models address the multiple comparisons problem and also yield more efficient estimates, especially in settings with low group-level variation, which is where multiple comparisons are a particular concern.}, + keywords = {Bayesian inference,hierarchical modeling,multiple comparisons,statistical significance,Type S error}, + annotation = {\_eprint: https://doi.org/10.1080/19345747.2011.618213} +} + +@article{gneiting_strictly_2007, + title = {Strictly {{Proper Scoring Rules}}, {{Prediction}}, and {{Estimation}}}, + author = {Gneiting, Tilmann and Raftery, Adrian E.}, + date = {2007-03-01}, + journaltitle = {Journal of the American Statistical Association}, + volume = {102}, + number = {477}, + pages = {359--378}, + publisher = {{Taylor \& Francis}}, + issn = {0162-1459}, + abstract = {Scoring rules assess the quality of probabilistic forecasts, by assigning a numerical score based on the predictive distribution and on the event or value that materializes. A scoring rule is proper if the forecaster maximizes the expected score for an observation drawn from the distributionF if he or she issues the probabilistic forecast F, rather than G ≠ F. It is strictly proper if the maximum is unique. In prediction problems, proper scoring rules encourage the forecaster to make careful assessments and to be honest. In estimation problems, strictly proper scoring rules provide attractive loss and utility functions that can be tailored to the problem at hand. This article reviews and develops the theory of proper scoring rules on general probability spaces, and proposes and discusses examples thereof. Proper scoring rules derive from convex functions and relate to information measures, entropy functions, and Bregman divergences. In the case of categorical variables, we prove a rigorous version of the Savage representation. Examples of scoring rules for probabilistic forecasts in the form of predictive densities include the logarithmic, spherical, pseudospherical, and quadratic scores. The continuous ranked probability score applies to probabilistic forecasts that take the form of predictive cumulative distribution functions. It generalizes the absolute error and forms a special case of a new and very general type of score, the energy score. Like many other scoring rules, the energy score admits a kernel representation in terms of negative definite functions, with links to inequalities of Hoeffding type, in both univariate and multivariate settings. Proper scoring rules for quantile and interval forecasts are also discussed. We relate proper scoring rules to Bayes factors and to cross-validation, and propose a novel form of cross-validation known as random-fold cross-validation. A case study on probabilistic weather forecasts in the North American Pacific Northwest illustrates the importance of propriety. We note optimum score approaches to point and quantile estimation, and propose the intuitively appealing interval score as a utility function in interval estimation that addresses width as well as coverage.}, + keywords = {Bayes factor,Bregman divergence,Brier score,Coherent,Continuous ranked probability score,Cross-validation,Entropy,Kernel score,Loss function,Minimum contrast estimation,Negative definite function,Prediction interval,Predictive distribution,Quantile forecast,Scoring rule,Skill score,Strictly proper,Utility function}, + annotation = {\_eprint: https://doi.org/10.1198/016214506000001437}, + file = {/home/nathante/Zotero/storage/ZDL34XF9/Gneiting_Raftery_2007_Strictly Proper Scoring Rules, Prediction, and Estimation.pdf;/home/nathante/Zotero/storage/6DRGUBQI/016214506000001437.html} +} + +@article{graeff_battle_2014, + title = {The Battle for ‘{{Trayvon Martin}}’: {{Mapping}} a Media Controversy Online and off-Line}, + shorttitle = {The Battle for ‘{{Trayvon Martin}}’}, + author = {Graeff, Erhardt and Stempeck, Matt and Zuckerman, Ethan}, + date = {2014-01}, + journaltitle = {First Monday}, + volume = {19}, + number = {2}, + issn = {13960466}, + langid = {english}, + keywords = {controversy mapping,media cloud,networked gatekeeping,political networks,quantitative media analysis}, + file = {/home/nathante/Zotero/storage/EXNM66WB/Graeff et al. - 2014 - The battle for ‘Trayvon Martin’ Mapping a media c.pdf;/home/nathante/Zotero/storage/BW5KPRPA/4947.html;/home/nathante/Zotero/storage/T7J9BSVG/3821.html} +} + +@article{gu_competition_2007, + ids = {gu_competition_2007-1}, + title = {Competition among Virtual Communities and User Valuation: {{The}} Case of Investing-Related Communities}, + shorttitle = {Competition {{Among Virtual Communities}} and {{User Valuation}}}, + author = {Gu, Bin and Konana, Prabhudev and Rajagopalan, Balaji and Chen, Hsuan-Wei Michelle}, + date = {2007}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {18}, + number = {1}, + eprint = {23211832}, + eprinttype = {jstor}, + pages = {68--85}, + issn = {1047-7047}, + abstract = {Virtual communities are a significant source of information for consumers and businesses. This research examines how users value virtual communities and how virtual communities differ in their value propositions. In particular, this research examines the nature of trade-offs between information quantity and quality, and explores the sources of positive and negative externalities in virtual communities. The analyses are based on more than 500,000 postings collected from three large virtual investing-related communities (VICs) for 14 different stocks over a period of four years. The findings suggest that the VICs engage in differentiated competition as they face trade-offs between information quantity and quality. This differentiation among VICs, in turn, attracts users with different characteristics. We find both positive and negative externalities at work in virtual communities. We propose and validate that the key factor that determines the direction of network externalities is posting quality. The contributions of the study include the extension of our understanding of the virtual community evaluation by users, the exposition of competition between virtual communities, the role of network externalities in virtual communities, and the development of an algorithmic methodology to evaluate the quality (noise or signal) of textual data. The insights from the study provide useful guidance for design and management of VICs.}, + file = {/home/nathante/Zotero/storage/ACBCQ93N/Gu et al. - 2007 - Competition Among Virtual Communities and User Val.pdf;/home/nathante/Zotero/storage/KJZXB8P6/Gu et al. - 2007 - Competition Among Virtual Communities and User Val.pdf} +} + +@inproceedings{hale_cross-language_2015, + title = {Cross-Language {{Wikipedia Editing}} of {{Okinawa}}, {{Japan}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Hale, Scott A.}, + date = {2015}, + series = {{{CHI}} '15}, + pages = {183--192}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {This article analyzes users who edit Wikipedia articles about Okinawa, Japan, in English and Japanese. It finds these users are among the most active and dedicated users in their primary languages, where they make many large, high-quality edits. However, when these users edit in their non-primary languages, they tend to make edits of a different type that are overall smaller in size and more often restricted to the narrow set of articles that exist in both languages. Design changes to motivate wider contributions from users in their non-primary languages and to encourage multilingual users to transfer more information across language divides are presented.}, + isbn = {978-1-4503-3145-6}, + file = {/home/nathante/Zotero/storage/WL3BSR4A/Hale - 2015 - Cross-language Wikipedia Editing of Okinawa, Japan.pdf} +} + +@article{halfaker_rise_2013, + title = {The Rise and Decline of an Open Collaboration System: How {{Wikipedia}}'s Reaction to Popularity Is Causing Its Decline}, + shorttitle = {The {{Rise}} and {{Decline}} of an {{Open Collaboration System}}}, + author = {Halfaker, Aaron and Geiger, R. Stuart and Morgan, Jonathan T. and Riedl, John}, + date = {2013-05-01}, + journaltitle = {American Behavioral Scientist}, + shortjournal = {American Behavioral Scientist}, + volume = {57}, + number = {5}, + pages = {664--688}, + issn = {0002-7642}, + abstract = {Open collaboration systems, such as Wikipedia, need to maintain a pool of volunteer contributors to remain relevant. Wikipedia was created through a tremendous number of contributions by millions of contributors. However, recent research has shown that the number of active contributors in Wikipedia has been declining steadily for years and suggests that a sharp decline in the retention of newcomers is the cause. This article presents data that show how several changes the Wikipedia community made to manage quality and consistency in the face of a massive growth in participation have ironically crippled the very growth they were designed to manage. Specifically, the restrictiveness of the encyclopedia’s primary quality control mechanism and the algorithmic tools used to reject contributions are implicated as key causes of decreased newcomer retention. Furthermore, the community’s formal mechanisms for norm articulation are shown to have calcified against changes—especially changes proposed by newer editors.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/7B7AFK58/Halfaker et al. - 2013 - The rise and decline of an open collaboration syst.pdf;/home/nathante/Zotero/storage/Y9676KNV/The Rise and Decline of an Open Collaboration Syst.pdf} +} + +@online{hamilton_loyalty_2017, + title = {Loyalty in Online Communities}, + author = {Hamilton, William L. and Zhang, Justine and Danescu-Niculescu-Mizil, Cristian and Jurafsky, Dan and Leskovec, Jure}, + date = {2017-05-24}, + eprint = {1703.03386}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Loyalty is an essential component of multi-community engagement. When users have the choice to engage with a variety of different communities, they often become loyal to just one, focusing on that community at the expense of others. However, it is unclear how loyalty is manifested in user behavior, or whether loyalty is encouraged by certain community characteristics. In this paper we operationalize loyalty as a user-community relation: users loyal to a community consistently prefer it over all others; loyal communities retain their loyal users over time. By exploring this relation using a large dataset of discussion communities from Reddit, we reveal that loyalty is manifested in remarkably consistent behaviors across a wide spectrum of communities. Loyal users employ language that signals collective identity and engage with more esoteric, less popular content, indicating they may play a curational role in surfacing new material. Loyal communities have denser user-user interaction networks and lower rates of triadic closure, suggesting that community-level loyalty is associated with more cohesive interactions and less fragmentation into subgroups. We exploit these general patterns to predict future rates of loyalty. Our results show that a user's propensity to become loyal is apparent from their first interactions with a community, suggesting that some users are intrinsically loyal from the very beginning.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computation and Language,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/HQQUMCBD/Hamilton et al_2017_Loyalty in Online Communities.pdf;/home/nathante/Zotero/storage/5RE84JF9/1703.html} +} + +@book{hannan_organizational_1989, + title = {Organizational Ecology}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1989}, + edition = {1}, + publisher = {{Harvard University Press}}, + location = {{Cambridge, MA}} +} + +@article{hannan_population_1977, + title = {The Population Ecology of Organizations}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1977}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {82}, + number = {5}, + eprint = {2777807}, + eprinttype = {jstor}, + pages = {929--964}, + issn = {0002-9602}, + abstract = {A population ecology perspective on organization-environment relations is proposed as an alternative to the dominant adaptation perspective. The strength of inertial pressures on organizational structure suggests the application of models that depend on competition and selection in populations of organizations. Several such models as well as issues that arise in attempts to apply them to the organization-environment problem are discussed.}, + file = {/home/nathante/Zotero/storage/TVD48Q77/Hannan and Freeman - 1977 - The Population Ecology of Organizations.pdf} +} + +@book{hawley_human_1986, + title = {Human Ecology: A Theoretical Essay}, + shorttitle = {Human Ecology}, + author = {Hawley, Amos Henry}, + date = {1986}, + publisher = {{University of Chicago Press}}, + location = {{Chicago; London}}, + isbn = {978-0-226-31983-4 978-0-226-31984-1}, + langid = {english}, + annotation = {OCLC: 993363851} +} + +@online{heaps_enforcing_2020, + title = {Enforcing Stationarity through the Prior in Vector Autoregressions}, + author = {Heaps, Sarah E.}, + date = {2020-04-20}, + eprint = {2004.09455}, + eprinttype = {arxiv}, + primaryclass = {stat}, + abstract = {Stationarity is a very common assumption in time series analysis. A vector autoregressive (VAR) process is stationary if and only if the roots of its characteristic equation lie outside the unit circle, constraining the autoregressive coefficient matrices to lie in the stationary region. However, the stationary region has a highly complex geometry which impedes specification of a prior distribution. In this work, an unconstrained reparameterisation of a stationary VAR model is presented. The new parameters are based on partial autocorrelation matrices, which are interpretable, and can be transformed bijectively to the space of unconstrained square matrices. This transformation preserves various structural forms of the partial autocorrelation matrices and readily facilitates specification of a prior. Properties of this prior are described along with an important special case which is exchangeable with respect to the order of the elements in the observation vector. Posterior inference and computation are described and implemented using Hamiltonian Monte Carlo via Stan. The prior and inferential procedures are illustrated with an application to a macroeconomic time series which highlights the benefits of enforcing stationarity.}, + archiveprefix = {arXiv}, + version = {1}, + keywords = {_tablet,VAR}, + file = {/home/nathante/Zotero/storage/VNW4X7ZM/Heaps_2020_Enforcing stationarity through the prior in vector autoregressions.pdf;/home/nathante/Zotero/storage/AKKHZYXS/2004.html} +} + +@inproceedings{hessel_science_2016, + ids = {hessel_science_2016-1}, + title = {Science, Askscience, and Badscience: On the Coexistence of Highly Related Communities}, + shorttitle = {Science, Askscience, and Badscience}, + booktitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Hessel, Jack and Tan, Chenhao and Lee, Lillian}, + date = {2016-03-31}, + eprint = {1612.07487}, + eprinttype = {arxiv}, + pages = {11}, + abstract = {When large social-media platforms allow users to easily formand self-organize into interest groups, highly related communities can arise. For example, the Reddit site hosts not just a group called food, but also HealthyFood, foodhacks,foodporn, and cooking, among others. Are these highly related communities created for similar classes of reasons (e.g.,to focus on a subtopic, to create a place for allegedly more “high-minded” discourse, etc.)? How do users allocate attention between such close alternatives when they are available or emerge over time? Are there different types of relations between close alternatives such as sharing many users vs. a new community drawing away members of an older one vs. a splinter group failing to cohere into a viable separate community? We investigate the interactions between highly related communities using data from reddit.com consisting of 975M posts and comments spanning an 8-year period. We identify a set of typical affixes that users adopt to create highly related communities and build a taxonomy of affixes. One interesting finding regarding users’ behavior is: after a newer community is created, for several types of highly-related community pairs, users that engage in a newer community tend to be more active in their original community than users that do not explore, even when controlling for previous level of engagement.}, + archiveprefix = {arXiv}, + eventtitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + langid = {english}, + keywords = {Computer Science - Social and Information Networks,Physics - Physics and Society}, + file = {/home/nathante/Zotero/storage/2W6YBUBD/Hessel et al_2016_Science, AskScience, and BadScience.pdf;/home/nathante/Zotero/storage/4FLLXNV9/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/WS6TW26Q/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/3NHVFA3U/1612.html;/home/nathante/Zotero/storage/DXX4CJ7T/14739.html;/home/nathante/Zotero/storage/YSX2WN2J/13106.html} +} + +@incollection{hill_almost_2013, + title = {Almost {{Wikipedia}}: {{What}} Eight Early Online Collaborative Encyclopedia Projects Reveal about the Mechanisms of Collective Action.}, + booktitle = {Essays on Volunteer Mobilization in Peer Production}, + author = {Hill, Benjamin Mako}, + date = {2013}, + publisher = {{Massachusetts Institute of Technology}}, + location = {{Cambridge, Massachusetts}}, + annotation = {PhD Dissertation} +} + +@incollection{hill_studying_2019, + title = {Studying Populations of Online Communities}, + booktitle = {The {{Oxford Handbook}} of {{Networked Communication}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + editor = {Foucault Welles, Brooke and González-Bailón, Sandra}, + date = {2019-09}, + pages = {173--193}, + publisher = {{Oxford University Press}}, + location = {{Oxford, UK}}, + abstract = {While the large majority of published research on online communities consists of analyses conducted entirely within individual communities, this chapter argues for a population-based approach, in which researchers study groups of similar communities. For example, although there have been thousands of papers published about Wikipedia, a population-based approach might compare all wikis on a particular topic. Using examples from published empirical studies, the chapter describes five key benefits of this approach. First, it argues that population-level research increases the generalizability of findings. Next, it describes four processes and dynamics that are only possible to study using populations: community-level variables, information diffusion processes across communities, ecological dynamics, and multilevel community processes. The chapter concludes with a discussion of a series of limitations and challenges.}, + isbn = {978-0-19-046051-8}, + langid = {english}, + file = {/home/nathante/Zotero/storage/39ZWGGYN/Hill and Shaw - 2019 - Studying Populations of Online Communities.pdf;/home/nathante/Zotero/storage/BTB3AQGV/oxfordhb-9780190460518-e-8.html} +} + +@article{hirsch_sacrifice_1990-1, + title = {Sacrifice for the {{Cause}}: {{Group Processes}}, {{Recruitment}}, and {{Commitment}} in a {{Student Social Movement}}}, + shorttitle = {Sacrifice for the {{Cause}}}, + author = {Hirsch, Eric L.}, + date = {1990}, + journaltitle = {American Sociological Review}, + volume = {55}, + number = {2}, + eprint = {2095630}, + eprinttype = {jstor}, + pages = {243--254}, + issn = {0003-1224}, + abstract = {[Recruitment and commitment in protest movements are best explained by analyzing group-level political processes such as consciousness-raising, collective empowerment, polarization, and collective decision-making. Such processes increase protesters' political solidarity--their commitment to the cause and their belief in the non-institutional tactics that further that cause. Other frameworks, such as the rational choice and collective behavior approaches, are less adequate in accounting for recruitment and commitment. Rational choice perspectives neglect group processes by suggesting that decisions about whether to join or stay at a protest are based largely on isolated individual cost/benefit calculations. The collective behavior view that protests are spawned by confused and insecure individuals in situations of social unrest cannot be reconciled with the fact that most protests originate among close-knit groups of politically committed activists using carefully planned strategies and tactics. These conclusions are based on a study of the 1985 Columbia University divestment protest.]}, + file = {/home/nathante/Zotero/storage/3IITPEFE/Hirsch - 1990 - Sacrifice for the Cause Group Processes, Recruitm.pdf} +} + +@book{hirschman_exit_1970, + title = {Exit, {{Voice}}, and {{Loyalty}}: {{Responses}} to {{Decline}} in {{Firms}}, {{Organizations}}, and {{States}}}, + shorttitle = {Exit, {{Voice}}, and {{Loyalty}}}, + author = {Hirschman, Albert O.}, + date = {1970}, + publisher = {{Harvard University Press}}, + abstract = {An innovator in contemporary thought on economic and political development looks here at decline rather than growth. Albert O. Hirschman makes a basic distinction between alternative ways of reacting to deterioration in business firms and, in general, to dissatisfaction with organizations: one, “exit,” is for the member to quit the organization or for the customer to switch to the competing product, and the other, “voice,” is for members or customers to agitate and exert influence for change “from within.”The efficiency of the competitive mechanism, with its total reliance on exit, is questioned for certain important situations. As exit often undercuts voice while being unable to counteract decline, loyalty is seen in the function of retarding exit and of permitting voice to play its proper role.The interplay of the three concepts turns out to illuminate a wide range of economic, social, and political phenomena. As the author states in the preface, “having found my own unifying way of looking at issues as diverse as competition and the two-party system, divorce and the American character, black power and the failure of 'unhappy' top officials to resign over Vietnam, I decided to let myself go a little.”}, + isbn = {978-0-674-27660-4}, + langid = {english}, + pagetotal = {180}, + file = {/home/nathante/Zotero/storage/87VQQN7Z/Hirschman - 1970 - Exit, Voice, and Loyalty Responses to Decline in .pdf} +} + +@online{hollister_twitter_2021, + title = {Twitter Is Deleting {{Trump}}’s Attempts to Circumvent Ban}, + author = {Hollister, Sean}, + date = {2021-01-08T20:45:51-05:00}, + abstract = {He suggested he would build his own platform in now-deleted messages.}, + langid = {english}, + organization = {{The Verge}}, + file = {/home/nathante/Zotero/storage/A7QDJJ3Y/trump-tried-to-evade-his-ban-with-potus-but-those-tweets-were-instantly-deleted.html} +} + +@article{iriberri_life-cycle_2009, + title = {A Life-Cycle Perspective on Online Community Success}, + author = {Iriberri, Alicia and Leroy, Gondy}, + date = {2009-02}, + journaltitle = {ACM Computing Surveys}, + shortjournal = {ACM Comput. Surv.}, + volume = {41}, + number = {2}, + pages = {1--29}, + issn = {0360-0300, 1557-7341}, + abstract = {Using the information systems lifecycle as a unifying framework, we review online communities research and propose a sequence for incorporating success conditions during initiation and development to increase their chances of becoming a successful community, one in which members participate actively and develop lasting relationships. Online communities evolve following distinctive lifecycle stages and recommendations for success are more or less relevant depending on the developmental stage of the online community. In addition, the goal of the online community under study determines the components to include in the development of a successful online community. Online community builders and researchers will benefit from this review of the conditions that help online communities succeed.}, + langid = {english}, + keywords = {lifecycle,literature review,Online communities,success factors}, + file = {/home/nathante/Zotero/storage/3BRDSVKE/Iriberri and Leroy - 2009 - A life-cycle perspective on online community succe.pdf;/home/nathante/Zotero/storage/3V8BAWQT/Iriberri and Leroy - 2009 - A life-cycle perspective on online community succe.pdf} +} + +@article{ives_estimating_2003, + title = {Estimating {{Community Stability}} and {{Ecological Interactions}} from {{Time}}-{{Series Data}}}, + author = {Ives, A. R. and Dennis, B. and Cottingham, K. L. and Carpenter, S. R.}, + date = {2003-05}, + journaltitle = {Ecological Monographs}, + shortjournal = {Ecological Monographs}, + volume = {73}, + number = {2}, + pages = {301--330}, + issn = {0012-9615}, + abstract = {Natural ecological communities are continuously buffeted by a varying environment, often making it difficult to measure the stability of communities using concepts requiring the existence of an equilibrium point. Instead of an equilibrium point, the equilibrial state of communities subject to environmental stochasticity is a stationary distribution, which is characterized by means, variances, and other statistical moments. Here, we derive three properties of stochastic multispecies communities that measure different characteristics associated with community stability. These properties can be estimated from multispecies time-series data using first-order multivariate autoregressive (MAR(1)) models. We demonstrate how to estimate the parameters of MAR(1) models and obtain confidence intervals for both parameters and the measures of stability. We also address the problem of estimation when there is observation (measurement) error. To illustrate these methods, we compare the stability of the planktonic communities in three lakes in which nutrient loading and planktivorous fish abundance were experimentally manipulated. MAR(1) models and the statistical methods we present can be used to identify dynamically important interactions between species and to test hypotheses about stability and other dynamical properties of naturally varying ecological communities. Thus, they can be used to integrate theoretical and empirical studies of community dynamics.}, + langid = {english}, + keywords = {_tablet}, + file = {/home/nathante/Zotero/storage/S394LE96/Ives et al_2003_Estimating Community Stability and Ecological Interactions from Time-Series Data.pdf} +} + +@online{jing_sameness_2019, + ids = {jing_sameness_2019-1}, + title = {Sameness {{Attracts}}, {{Novelty Disturbs}}, but {{Outliers Flourish}} in {{Fanfiction Online}}}, + author = {Jing, Elise and DeDeo, Simon and Ahn, Yong-Yeol}, + date = {2019-04-16}, + eprint = {1904.07741}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {The nature of what people enjoy is not just a central question for the creative industry, it is a driving force of cultural evolution. It is widely believed that successful cultural products balance novelty and conventionality: they provide something familiar but at least somewhat divergent from what has come before, and occupy a satisfying middle ground between "more of the same" and "too strange". We test this belief using a large dataset of over half a million works of fanfiction from the website Archive of Our Own (AO3), looking at how the recognition a work receives varies with its novelty. We quantify the novelty through a term-based language model, and a topic model, in the context of existing works within the same fandom. Contrary to the balance theory, we find that the lowest-novelty are the most popular and that popularity declines monotonically with novelty. A few exceptions can be found: extremely popular works that are among the highest novelty within the fandom. Taken together, our findings not only challenge the traditional theory of the hedonic value of novelty, they invert it: people prefer the least novel things, are repelled by the middle ground, and have an occasional enthusiasm for extreme outliers. It suggests that cultural evolution must work against inertia --- the appetite people have to continually reconsume the familiar, and may resemble a punctuated equilibrium rather than a smooth evolution.}, + archiveprefix = {arXiv}, + keywords = {cultural evolution,novelty}, + file = {/home/nathante/Zotero/storage/PU4D24ZW/Jing et al. - 2019 - Sameness Attracts, Novelty Disturbs, but Outliers .pdf;/home/nathante/Zotero/storage/SJNNBUWE/Jing et al_2019_Sameness Attracts, Novelty Disturbs, but Outliers Flourish in Fanfiction Online.pdf;/home/nathante/Zotero/storage/ENUI7ANA/1904.html;/home/nathante/Zotero/storage/HWUJ5XXT/1904.html} +} + +@article{johnson_emergence_2014, + title = {Emergence of Power Laws in Online Communities: {{The}} Role of Social Mechanisms and Preferential Attachment.}, + shorttitle = {Emergence of {{Power Laws}} in {{Online Communities}}}, + author = {Johnson, Steven L. and Faraj, Samer and Kudaravalli, Srinivas}, + date = {2014}, + journaltitle = {Management Information Systems Quarterly}, + volume = {38}, + number = {3}, + pages = {795--808}, + file = {/home/nathante/Zotero/storage/MPZJHWCB/Johnson et al. - 2014 - Emergence of power laws in online communities The.pdf;/home/nathante/Zotero/storage/525WPBUV/10.html} +} + +@article{jordan_evaluating_2019, + title = {Evaluating {{Probabilistic Forecasts}} with {{scoringRules}}}, + author = {Jordan, Alexander and Krüger, Fabian and Lerch, Sebastian}, + date = {2019-08-21}, + journaltitle = {Journal of Statistical Software}, + volume = {90}, + number = {1}, + pages = {1--37}, + issn = {1548-7660}, + issue = {1}, + langid = {english}, + keywords = {comparative evaluation,ensemble forecasts,out-of-sample evaluation,predictive distributions,proper scoring rules,R,score computation}, + file = {/home/nathante/Zotero/storage/4FH4NMHR/Jordan et al_2019_Evaluating Probabilistic Forecasts with scoringRules.pdf} +} + +@inproceedings{kairam_life_2012, + title = {The Life and Death of Online Groups: Predicting Group Growth and Longevity}, + shorttitle = {The Life and Death of Online Groups}, + booktitle = {Proceedings of the Fifth {{ACM}} International Conference on {{Web}} Search and Data Mining}, + author = {Kairam, Sanjay Ram and Wang, Dan J. and Leskovec, Jure}, + date = {2012-02-08}, + series = {{{WSDM}} '12}, + pages = {673--682}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {We pose a fundamental question in understanding how to identify and design successful communities: What factors predict whether a community will grow and survive in the long term? Social scientists have addressed this question extensively by analyzing offline groups which endeavor to attract new members, such as social movements, finding that new individuals are influenced strongly by their ties to members of the group. As a result, prior work on the growth of communities has treated growth primarily as a diffusion processes, leading to findings about group evolution which can be difficult to explain. The proliferation of online social networks and communities, however, has created new opportunities to study, at a large scale and with very fine resolution, the mechanisms which lead to the formation, growth, and demise of online groups. In this paper, we analyze data from several thousand online social networks built on the Ning platform with the goal of understanding the factors contributing to the growth and longevity of groups within these networks. Specifically, we investigate the role that two types of growth (growth through diffusion and growth by other means) play during a group's formative stages from the perspectives of both the individual member and the group. Applying these insights to a population of groups of different ages and sizes, we build a model to classify groups which will grow rapidly over the short-term and long-term. Our model achieves over 79\% accuracy in predicting group growth over the following two months and over 78\% accuracy in predictions over the following two years. We utilize a similar approach to predict which groups will die within a year. The results of our combined analysis provide insight into how both early non-diffusion growth and a complex set of network constraints appear to contribute to the initial and continued growth and success of groups within social networks. Finally we discuss implications of this work for the design, maintenance, and analysis of online communities.}, + isbn = {978-1-4503-0747-5}, + keywords = {group formation,information diffusion,online communities,social networks}, + file = {/home/nathante/Zotero/storage/NS675EXH/Kairam et al_The Life and Death of Online Groups.pdf;/home/nathante/Zotero/storage/QZR8T2QH/Kairam et al_2012_The life and death of online groups.pdf} +} + +@book{kantz_nonlinear_2003, + title = {Nonlinear {{Time Series Analysis}}}, + author = {Kantz, Holger and Schreiber, Thomas}, + date = {2003}, + edition = {2}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge}}, + abstract = {The paradigm of deterministic chaos has influenced thinking in many fields of science. Chaotic systems show rich and surprising mathematical structures. In the applied sciences, deterministic chaos provides a striking explanation for irregular behaviour and anomalies in systems which do not seem to be inherently stochastic. The most direct link between chaos theory and the real world is the analysis of time series from real systems in terms of nonlinear dynamics. Experimental technique and data analysis have seen such dramatic progress that, by now, most fundamental properties of nonlinear dynamical systems have been observed in the laboratory. Great efforts are being made to exploit ideas from chaos theory wherever the data displays more structure than can be captured by traditional methods. Problems of this kind are typical in biology and physiology but also in geophysics, economics, and many other sciences.}, + isbn = {978-0-521-52902-0}, + file = {/home/nathante/Zotero/storage/BQVXZ6AD/519783E4E8A2C3DCD4641E42765309C7.html} +} + +@article{katz_network_1985, + title = {Network {{Externalities}}, {{Competition}}, and {{Compatibility}}}, + author = {Katz, Michael L. and Shapiro, Carl}, + date = {1985}, + journaltitle = {The American Economic Review}, + volume = {75}, + number = {3}, + eprint = {1814809}, + eprinttype = {jstor}, + pages = {424--440}, + publisher = {{American Economic Association}}, + issn = {0002-8282}, + file = {/home/nathante/Zotero/storage/FPC475A5/Katz_Shapiro_1985_Network Externalities, Competition, and Compatibility.pdf} +} + +@article{kiene_managing_2018, + title = {Managing Organizational Culture in Online Group Mergers}, + author = {Kiene, Charles and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {2}, + pages = {89:1-89-21}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/NV8YEK8W/Kiene et al. - 2018 - Managing organizational culture in online group me.pdf} +} + +@article{kiene_technological_2019, + title = {Technological Frames and User Innovation: Exploring Technological Change in Community Moderation Teams}, + shorttitle = {Technological Frames and User Innovation}, + author = {Kiene, Charles and Jiang, Jialun "Aaron" and Hill, Benjamin Mako}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {44:1--44:23}, + abstract = {Management of technological change in organizations is one of the most enduring topics in the literature on computer-supported cooperative work. The successful navigation of technological change is both more challenging and more critical in online communities that are entirely mediated by technology than it is in traditional organizations. This paper presents an analysis of 14 in-depth interviews with moderators of subcommunities of one technological platform (Reddit) that added communities on a new technological platform (Discord). Moderation teams experienced several problems related to moderating content at scale as well as a disconnect between the affordances of Discord and their assumptions based on their experiences on Reddit. We found that moderation teams used Discord's API to create scripts and bots that augmented Discord to make the platform work more like tools on Reddit. These tools were particularly important in communities struggling with scale. Our findings suggest that increasingly widespread end user programming allow users of social computing systems to innovate and deploy solutions to unanticipated design problems by transforming new technological platforms to align with their past expectations.}, + issue = {CSCW}, + keywords = {API,bots,chat,computer-mediated communication,discord,moderation,online communities,reddit,social computing,technological change}, + file = {/home/nathante/Zotero/storage/E2PDCY58/Kiene et al. - 2019 - Technological frames and user innovation explorin.pdf;/home/nathante/Zotero/storage/U7M6IZY4/Kiene et al. - 2019 - Technological Frames and User Innovation Explorin.pdf} +} + +@article{koh_encouraging_2007, + title = {Encouraging Participation in Virtual Communities}, + author = {Koh, Joon and Kim, Young-Gul and Butler, Brian and Bock, Gee-Woo}, + date = {2007-02-01}, + journaltitle = {Communications of the ACM}, + shortjournal = {Commun. ACM}, + volume = {50}, + number = {2}, + pages = {68--73}, + issn = {00010782}, + langid = {english}, + file = {/home/nathante/Zotero/storage/TP9FPWMG/Koh et al. - 2007 - Encouraging participation in virtual communities.pdf} +} + +@article{krafft_disinformation_2020, + title = {Disinformation by {{Design}}: {{The Use}} of {{Evidence Collages}} and {{Platform Filtering}} in a {{Media Manipulation Campaign}}}, + shorttitle = {Disinformation by {{Design}}}, + author = {Krafft, P. M. and Donovan, Joan}, + date = {2020-03-03}, + journaltitle = {Political Communication}, + volume = {37}, + number = {2}, + pages = {194--214}, + publisher = {{Routledge}}, + issn = {1058-4609}, + abstract = {Disinformation campaigns such as those perpetrated by far-right groups in the United States seek to erode democratic social institutions. Looking to understand these phenomena, previous models of disinformation have emphasized identity-confirmation and misleading presentation of facts to explain why such disinformation is shared. A risk of these accounts, which conjure images of echo chambers and filter bubbles, is portraying people who accept disinformation as relatively passive recipients or conduits. Here we conduct a case study of tactics of disinformation to show how platform design and decentralized communication contribute to advancing the spread of disinformation even when that disinformation is continuously and actively challenged where it appears. Contrary to a view of disinformation flowing within homogeneous echo chambers, in our case study we observe substantial skepticism against disinformation narratives as they form. To examine how disinformation spreads amidst skepticism in this case, we employ a document-driven multi-site trace ethnography to analyze a contested rumor that crossed anonymous message boards, the conservative media ecosystem, and other platforms. We identify two important factors that filtered out skepticism and contested explanations, which facilitated the transformation of this rumor into a disinformation campaign: (1) the aggregation of information into evidence collages—image files that aggregate positive evidence—and (2) platform filtering—the decontextualization of information as these claims crossed platforms. Our findings provide an elucidation of “trading up the chain” dynamics explored by previous researchers and a counterpoint to the relatively mechanistic accounts of passive disinformation propagation that dominate the quantitative literature. We conclude with a discussion of how these factors relate to the communication power available to disparate groups at different times, as well as practical implications for inferring intent from social media traces and practical implications for the design of social media platforms.}, + keywords = {4chan,Alt-right,disinformation,media manipulation,tactics}, + annotation = {\_eprint: https://doi.org/10.1080/10584609.2019.1686094}, + file = {/home/nathante/Zotero/storage/3EQB8KSG/Krafft_Donovan_2020_Disinformation by Design.pdf;/home/nathante/Zotero/storage/MRRVEJWU/10584609.2019.html} +} + +@book{kraut_building_2012, + ids = {kraut2012building,kraut_building_2012-1}, + title = {Building Successful Online Communities: {{Evidence}}-Based Social Design}, + author = {Kraut, Robert E. and Resnick, Paul and Kiesler, Sara}, + date = {2012}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + abstract = {Uses insights from social science, psychology, and economics to offer advice on planning and managing an online community.}, + isbn = {978-0-262-29831-5}, + langid = {english}, + keywords = {design,foundations of social computing}, + file = {/home/nathante/Zotero/storage/B4XSKAVW/04-kraut10-Newcomers-current.pdf;/home/nathante/Zotero/storage/CX4KDC3G/01-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/IJCEWA6L/06-Resnick10-Startup-current.pdf;/home/nathante/Zotero/storage/JEWAVXHG/02-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/RIM4D9KS/05-kiesler10-Regulation-current.pdf;/home/nathante/Zotero/storage/S6Z28BBS/03-Ren10-Commitment-current.pdf} +} + +@inproceedings{kraut_role_2014, + ids = {kraut_role_2014-1}, + title = {The {{Role}} of {{Founders}} in {{Building Online Groups}}}, + booktitle = {Proceedings of the 17th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Kraut, Robert E. and Fiore, Andrew T.}, + date = {2014}, + series = {{{CSCW}} '14}, + pages = {722--732}, + publisher = {{ACM}}, + location = {{Baltimore, Maryland, USA}}, + abstract = {As a class, online groups are popular, but many die before they become successful. This research traced the fate of 472,231 new online groups. By the end of a 3-month observation period, 57\% of the groups had died, ceasing to post new content. Founders' human and social capital before the group was formed, the decisions they made when they created the group and their behavior in the group during its first week all predicted group survival. Many of the results suggest that founders create more successful groups if they have more resources (e.g., more online friends) and opportunities for acquiring relevant skills (e.g., more experience with online groups) and are more active in their group. However, founders who are too controlling seem to present a threat their groups. Their groups are more likely to fail if they are the only group administrator, if they have ties to all group members and if they were responsible for adding all group members.}, + isbn = {978-1-4503-2540-0}, + keywords = {birth,death,facebook,founder,online groups}, + file = {/home/nathante/Zotero/storage/EXXNCL5Q/Kraut_Fiore_2014_The role of founders in building online groups.pdf;/home/nathante/Zotero/storage/Z25RMXV6/Kraut and Fiore - 2014 - The Role of Founders in Building Online Groups.pdf;/home/nathante/Zotero/storage/BPWDS3GX/citation.html} +} + +@book{kropotkin_mutual_2012, + title = {Mutual Aid: A Factor of Evolution}, + shorttitle = {Mutual {{Aid}}}, + author = {Kropotkin, Peter}, + date = {2012-05-02}, + origdate = {1902}, + publisher = {{Courier Corporation}}, + abstract = {In this cornerstone of modern liberal social theory, Peter Kropotkin states that the most effective human and animal communities are essentially cooperative, rather than competitive. Kropotkin based this classic on his observations of natural phenomena and history, forming a work of stunning and well-reasoned scholarship. Essential to the understanding of human evolution as well as social organization, it offers a powerful counterpoint to the tenets of Social Darwinism. It also cites persuasive evidence of human nature's innate compatibility with anarchist society."Kropotkin's basic argument is correct," noted evolutionary biologist Stephen Jay Gould. "Struggle does occur in many modes, and some lead to cooperation among members of a species as the best pathway to advantage for individuals." Anthropologist Ashley Montagu declared that "Mutual Aid will never be any more out of date than will the Declaration of Independence. New facts may increasingly become available, but we can already see that they will serve largely to support Kropotkin's conclusion that 'in the ethical progress of man, mutual support—not mutual struggle—has had the leading part.'" Physician and author Alex Comfort asserted that "Kropotkin profoundly influenced human biology by his theory of Mutual Aid. . . . He was one of the first systematic students of animal communities, and may be regarded as the founder of modern social ecology."}, + isbn = {978-0-486-12153-6}, + langid = {english}, + pagetotal = {338} +} + +@article{kubiszewski_production_2010, + title = {The Production and Allocation of Information as a Good That Is Enhanced with Increased Use}, + author = {Kubiszewski, Ida and Farley, Joshua and Costanza, Robert}, + date = {2010-04-01}, + journaltitle = {Ecological Economics}, + shortjournal = {Ecological Economics}, + series = {Special {{Section}} - {{Payments}} for {{Environmental Services}}: {{Reconciling Theory}} and {{Practice}}}, + volume = {69}, + number = {6}, + pages = {1344--1354}, + issn = {0921-8009}, + abstract = {Information has some unique characteristics. Unlike most other goods and services, it is neither rival (use by one prevents use by others) nor non-rival (use by one does not affect use by others), but is enhanced with increased use, or ‘additive’. Therefore a unique allocation system for both the production and consumption of information is needed. Under the current market-based allocation system, production of information is often limited through the exclusive rights produced by patents and copyrights. This limits scientists' ability to share and build on each other's knowledge. We break the problem down into three separate questions: (1) do markets generate the type of information most important for modern society? (2) are markets the most appropriate institution for producing that information? and (3) once information is produced, are markets the most effective way of maximizing the social value of that information? We conclude that systematic market failures make it unlikely that markets will generate the most important types of information, while the unique characteristics of information reduce the cost-effectiveness of markets in generating information and in maximizing its social value. We then discuss alternative methods that do not have these shortcomings, and that would lead to greater overall economic efficiency, social justice and ecological sustainability. These methods include monetary prizes, publicly funded research from which the produced information is released into the public domain, and status driven incentive structures like those in academia and the “open-source” community.}, + langid = {english}, + keywords = {Allocation,Anti-rival,Copyrights,Information,Intellectual property rights,Knowledge,Market failure,Patents}, + file = {/home/nathante/Zotero/storage/DX84YZM7/S092180091000039X.html} +} + +@inproceedings{kumar_community_2018, + ids = {kumar_community_2018-1}, + title = {Community {{Interaction}} and {{Conflict}} on the {{Web}}}, + booktitle = {Proceedings of the 2018 {{World Wide Web Conference}}}, + author = {Kumar, Srijan and Hamilton, William L. and Leskovec, Jure and Jurafsky, Dan}, + date = {2018-04-23}, + series = {{{WWW}} '18}, + pages = {933--943}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + location = {{Lyon, France}}, + abstract = {Users organize themselves into communities on web platforms. These communities can interact with one another, often leading to conflicts and toxic interactions. However, little is known about the mechanisms of interactions between communities and how they impact users. Here we study intercommunity interactions across 36,000 communities on Reddit, examining cases where users of one community are mobilized by negative sentiment to comment in another community. We show that such conflicts tend to be initiated by a handful of communities---less than 1\% of communities start 74\% of conflicts. While conflicts tend to be initiated by highly active community members, they are carried out by significantly less active members. We find that conflicts are marked by formation of echo chambers, where users primarily talk to other users from their own community. In the long-term, conflicts have adverse effects and reduce the overall activity of users in the targeted communities. Our analysis of user interactions also suggests strategies for mitigating the negative impact of conflicts---such as increasing direct engagement between attackers and defenders. Further, we accurately predict whether a conflict will occur by creating a novel LSTM model that combines graph embeddings, user, community, and text features. This model can be used to create an early-warning system for community moderators to prevent conflicts. Altogether, this work presents a data-driven view of community interactions and conflict, and paves the way towards healthier online communities.}, + isbn = {978-1-4503-5639-8}, + keywords = {antisocial behavior,community,conflict,interaction,intercommunity,society,web}, + file = {/home/nathante/Zotero/storage/3R7J48EQ/Kumar et al_2018_Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/FPJ44933/Kumar et al. - 2018 - Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/U6GYGZDS/Kumar_et_al-2018-Community_interaction_conflict-WWW.pdf} +} + +@inproceedings{lanzara_knowledge_2003, + title = {The Knowledge Ecology of Open-Source Software Projects}, + booktitle = {19th {{EGOS Colloquium}}, {{Copenhagen}}}, + author = {Lanzara, Giovan Francesco and Morner, Michele and others}, + date = {2003}, + annotation = {00082}, + file = {/home/nathante/Zotero/storage/MY6MJGIC/Lanzara et al_2003_The knowledge ecology of open-source software projects.pdf} +} + +@book{lave_situated_1991, + title = {Situated Learning: {{Legitimate}} Peripheral Participation}, + shorttitle = {Situated {{Learning}}}, + author = {Lave, Jean and Wenger, Etienne}, + date = {1991}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge, UK}}, + abstract = {In this important theoretical treatise, Jean Lave, anthropologist, and Etienne Wenger, computer scientist, push forward the notion of situated learning–that learning is fundamentally a social process and not solely in the learner's head. The authors maintain that learning viewed as situated activity has as its central defining characteristic a process they call legitimate peripheral participation. Learners participate in communities of practitioners, moving toward full participation in the sociocultural practices of a community. Legitimate peripheral participation provides a way to speak about crucial relations between newcomers and oldtimers and about their activities, identities, artifacts, knowledge and practice. The communities discussed in the book are midwives, tailors, quartermasters, butchers, and recovering alcoholics, however, the process by which participants in those communities learn can be generalized to other social groups.}, + isbn = {978-0-521-42374-8}, + langid = {english}, + keywords = {Education / Educational Psychology,Psychology / Cognitive Psychology & Cognition,Psychology / Developmental / General,Psychology / General,Psychology / Personality} +} + +@inproceedings{ludford_think_2004, + title = {Think {{Different}}: {{Increasing Online Community Participation Using Uniqueness}} and {{Group Dissimilarity}}}, + shorttitle = {Think {{Different}}}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Ludford, Pamela J. and Cosley, Dan and Frankowski, Dan and Terveen, Loren}, + date = {2004}, + series = {{{CHI}} '04}, + pages = {631--638}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Online communities can help people form productive relationships. Unfortunately, this potential is not always fulfilled: many communities fail, and designers don't have a solid understanding of why. We know community activity begets activity. The trick, however, is to inspire participation in the first place. Social theories suggest methods to spark positive community participation. We carried out a field experiment that tested two such theories. We formed discussion communities around an existing movie recommendation web site, manipulating two factors: (1) similarity-we controlled how similar group members' movie ratings were; and (2) uniqueness-we told members how their movie ratings (with respect to a discussion topic) were unique within the group. Both factors positively influenced participation. The results offer a practical success story in applying social science theory to the design of online communities.}, + isbn = {978-1-58113-702-6}, + venue = {Vienna, Austria}, + file = {/home/nathante/Zotero/storage/94P38A6I/Ludford et al. - 2004 - Think Different Increasing Online Community Parti.pdf} +} + +@book{manning_introduction_2018, + title = {Introduction to Information Retrieval}, + author = {Manning, Christopher D and Raghavan, Prabhakar and Schütze, Hinrich and {Cambridge University Press}}, + date = {2018}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge}}, + isbn = {978-0-521-86571-5}, + langid = {english}, + annotation = {OCLC: 1077323048} +} + +@article{margolin_normative_2012, + title = {Normative {{Influences}} on {{Network Structure}} in the {{Evolution}} of the {{Children}}’s {{Rights NGO Network}}, 1977-2004:}, + shorttitle = {Normative {{Influences}} on {{Network Structure}} in the {{Evolution}} of the {{Children}}’s {{Rights NGO Network}}, 1977-2004}, + author = {Margolin, Drew B. and Shen, Cuihua and Lee, Seungyoon and Weber, Matthew S. and Fulk, Janet and Monge, Peter}, + date = {2012-10-23}, + journaltitle = {Communication Research}, + abstract = {This study examines the impact of legitimacy on the dynamics of interorganizational networks within the nongovernmental organizations’ children’s rights communi...}, + langid = {english}, + keywords = {codification,community ecology,evolution,network evolution,NGOs,norms,SIENA}, + file = {/home/nathante/Zotero/storage/295X7HRD/Margolin et al_2012_Normative Influences on Network Structure in the Evolution of the Children’s.pdf;/home/nathante/Zotero/storage/T494X64A/0093650212463731.html} +} + +@article{mcinnes_hdbscan_2017, + title = {Hdbscan: {{Hierarchical}} Density Based Clustering}, + shorttitle = {Hdbscan}, + author = {McInnes, Leland and Healy, John and Astels, Steve}, + date = {2017-03-21}, + journaltitle = {The Journal of Open Source Software}, + shortjournal = {JOSS}, + volume = {2}, + number = {11}, + pages = {205}, + issn = {2475-9066}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6B488I3N/McInnes et al. - 2017 - hdbscan Hierarchical density based clustering.pdf} +} + +@inproceedings{mcmahon_substantial_2017, + title = {The Substantial Interdependence of {{Wikipedia}} and {{Google}}: {{A}} Case Study on the Relationship between Peer Production Communities and Information Technologies}, + shorttitle = {The {{Substantial Interdependence}} of {{Wikipedia}} and {{Google}}}, + booktitle = {International {{AAAI Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} 2017)}, + author = {McMahon, Connor and Johnson, Isaac L. and Hecht, Brent J.}, + date = {2017}, + pages = {142--151}, + publisher = {{AAAI}}, + location = {{Palo Alto, California}}, + file = {/home/nathante/Zotero/storage/6TX35RFQ/McMahon et al. - 2017 - The substantial interdependence of Wikipedia and G.pdf} +} + +@article{mcpherson_ecology_1983, + title = {An Ecology of Affiliation}, + author = {McPherson, J. Miller}, + date = {1983}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {48}, + number = {4}, + eprint = {2117719}, + eprinttype = {jstor}, + pages = {519--532}, + issn = {0003-1224}, + abstract = {This paper develops an ecological model of the competition of social organizations for members. The concept of the ecological niche is quantified explicitly in a way which ties together geography, time, and the social composition of organizations. A differential equation model analogous to the Lotka-Volterra competition equations in biology captures the dynamics of the system. This dynamic model is related to the niche concept in a novel way, which produces an easily understood and powerful picture of the static and dynamic structure of the community. This new perspective provides a theoretical link between the aggregate macrostructural theory of Blau (1977a,b) and the microstructural dynamics of organizational demography (Pfeffer, 1983). The model is tested with data on organizations from a midwestern city.}, + file = {/home/nathante/Zotero/storage/WIDCF8XB/McPherson - 1983 - An ecology of affiliation.pdf} +} + +@article{mcpherson_testing_1996, + title = {Testing a {{Dynamic Model}} of {{Social Composition}}: {{Diversity}} and {{Change}} in {{Voluntary Groups}}}, + shorttitle = {Testing a {{Dynamic Model}} of {{Social Composition}}}, + author = {McPherson, J. Miller and Rotolo, Thomas}, + date = {1996}, + journaltitle = {American Sociological Review}, + volume = {61}, + number = {2}, + eprint = {2096330}, + eprinttype = {jstor}, + pages = {179--202}, + issn = {0003-1224}, + abstract = {[We test a dynamic model of the social composition of voluntary groups. The model is based on the idea that sociodemographic variables define social niches in which voluntary groups grow and decline, share and compete, and change or remain static. The flow of individuals through such groups depends on the competition of other groups for their time and other resources. We build a dynamic model of this process and show how this model can account for changes in the social composition and the social heterogeneity of voluntary groups. We use life history data on the group affiliations of 1,050 individuals from 1974 to 1989 to test hypotheses about the diversity of education among group members and about the mean level of education of the members. Our data strongly support the hypotheses.]}, + file = {/home/nathante/Zotero/storage/KCQZTDG3/McPherson and Rotolo - 1996 - Testing a Dynamic Model of Social Composition Div.pdf} +} + +@article{menge_competition_1972, + title = {Competition for {{Food}} between {{Two Intertidal Starfish Species}} and Its {{Effect}} on {{Body Size}} and {{Feeding}}}, + author = {Menge, Bruce A.}, + date = {1972-07-01}, + journaltitle = {Ecology}, + volume = {53}, + number = {4}, + pages = {635--644}, + issn = {1939-9170}, + abstract = {Two predaceous intertidal starfish that overlap broadly with respect to food, space, and time were found to compete for a limited food supply in the San Juan Islands, Washington State. The experiment involved complete removal of a larger (up to 600 g average wet weight) starfish (Pisaster ochraceus) from a small island—reef and addition of them to a second island—reef while a third reef served as a control. In response to Pisaster removal, the mean individual wet weight of the smaller (maximum size = 45 g wet weight) asteroid (Leptasterias hexactis) increased significantly in 15 months. Addition of Pisaster resulted in a significant decrease in Leptasterias size; no change in average Leptasterias size was observed on the control reef. A highly significant inverse correlation between the estimated biomass densities (wet weight/m2) of the two species at 10 areas suggests that competition is widespread and that the species are generally in competitive equilibrium. A major consequence of the small size of Leptasterias is an apparent inability to capture larger prey. Coexistence seems based upon "specialization" by each predator on different—sized prey. Evidently, reduced competition stress results in an increase in Leptsterias's community role as a predator. Conversely, when competition is severe, the community role of Leptasterias appears unimportant.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/45XBILMY/Menge_1972_Competition for Food between Two Intertidal Starfish Species and its Effect on.pdf;/home/nathante/Zotero/storage/KGMIJVUZ/Menge - 1972 - Competition for Food between Two Intertidal Starfi.html} +} + +@article{monge_communication_2008, + title = {Communication Network Evolution in Organizational Communities}, + author = {Monge, Peter R. and Heiss, Bettina M. and Margolin, Drew B.}, + date = {2008-11-01}, + journaltitle = {Communication Theory}, + volume = {18}, + number = {4}, + pages = {449--477}, + issn = {1468-2885}, + abstract = {Organizational communities are typically defined as populations of organizations that are tied together by networks of communication and other relations in overlapping resource niches. Traditionally, evolutionary theorists and researchers have examined organizational populations that comprise organizational communities by focusing on their properties rather than on the networks that link them. However, a full understanding of the evolution of organizational communities requires insight into both organizations and their networks. Consequently, this article presents a variety of conceptual tools for applying evolutionary theory to organizations, organizational communities, and their networks, including the notions of relational carrying capacity and linkage fitness. It illustrates evolutionary principles, such as variation, selection, and retention, that lead to the formation, growth, maintenance, and eventual demise of communication and other network linkages. This perspective allows us to understand the ways in which community survival and success are as dependent on their communication linkages as they are on the organizations they connect. The article concludes with suggestions for potential applications of evolutionary theory to other areas of human communication.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/EX9I2ZQ7/Monge et al. - 2008 - Communication network evolution in organizational .pdf;/home/nathante/Zotero/storage/CGNEW4L6/abstract.html} +} + +@article{monge_evolution_2008, + ids = {monge_evolution_2008-1}, + title = {The Evolution of Organizational Communication}, + author = {Monge, Peter and Poole, Marshall Scott}, + date = {2008-12-01}, + journaltitle = {Journal of Communication}, + shortjournal = {J Commun}, + volume = {58}, + number = {4}, + pages = {679--692}, + issn = {0021-9916}, + abstract = {Organizational communication, by its very definition, constitutes an intersection, one that exists between the study of human communication and the study of hum}, + langid = {english}, + file = {/home/nathante/Zotero/storage/AXJW4Y2U/Monge Poole - 2008 - The Evolution of Organizational Communication.pdf;/home/nathante/Zotero/storage/CIRHK5AY/Monge and Poole - 2008 - The evolution of organizational communication.pdf;/home/nathante/Zotero/storage/6S8MJ277/4098380.html;/home/nathante/Zotero/storage/ZSAC9PZZ/abstract.html} +} + +@article{monge_evolutionary_2011, + title = {Evolutionary and Ecological Models for Organizational Communication}, + author = {Monge, Peter R. and Lee, Seungyoon and Fulk, Janet and Frank, Lauren B. and Margolin, Drew and Schultz, Courtney and Shen, Cuihua and Weber, Matthew}, + date = {2011}, + journaltitle = {Advancing research in organizational communication through qualitative methodology, Management Communication Quarterly}, + volume = {25}, + number = {1}, + pages = {26--34} +} + +@book{nardi_information_1999, + title = {Information {{Ecologies}} : Using Technology with Heart}, + author = {Nardi, Bonnie A. and O'Day, Vicki L.}, + date = {1999}, + publisher = {{The MIT Press}}, + location = {{Cambridge, Massachusetts}}, + file = {/home/nathante/Zotero/storage/EFBVQ3YV/Nardi and O'Day - 2000 - Information ecologies using technology with heart.pdf} +} + +@article{newell_user_nodate, + title = {User {{Migration}} in {{Online Social Networks}}: {{A Case Study}} on {{Reddit During}} a {{Period}} of {{Community Unrest}}}, + author = {Newell, Edward and Jurgens, David and Saleem, Haji Mohammad and Vala, Hardik and Sassine, Jad and Armstrong, Caitrin and Ruths, Derek}, + pages = {10}, + abstract = {Platforms like Reddit have attracted large and vibrant communities, but the individuals in those communities are free to migrate to other platforms at any time. History has borne this out with the mass migration from Slashdot to Digg. The underlying motivations of individuals who migrate between platforms, and the conditions that favor migration online are not well-understood. We examine Reddit during a period of community unrest affecting millions of users in the summer of 2015, and analyze large-scale changes in user behavior and migration patterns to Reddit-like alternative platforms. Using self-reported statements from user comments, surveys, and a computational analysis of the activity of users with accounts on multiple platforms, we identify the primary motivations driving user migration. While a notable number of Reddit users left for other platforms, we found that an important pull factor that enabled Reddit to retain users was its long tail of niche content. Other platforms may reach critical mass to support popular or “mainstream” topics, but Reddit’s large userbase provides a key advantage in supporting niche topics.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/YM2YE9P9/Newell et al. - User Migration in Online Social Networks A Case S.pdf} +} + +@article{novak_characterizing_2016, + title = {Characterizing {{Species Interactions}} to {{Understand Press Perturbations}}: {{What Is}} the {{Community Matrix}}?}, + shorttitle = {Characterizing {{Species Interactions}} to {{Understand Press Perturbations}}}, + author = {Novak, Mark and Yeakel, Justin D. and Noble, Andrew E. and Doak, Daniel F. and Emmerson, Mark and Estes, James A. and Jacob, Ute and Tinker, M. Timothy and Wootton, J. Timothy}, + date = {2016}, + journaltitle = {Annual Review of Ecology, Evolution, and Systematics}, + volume = {47}, + number = {1}, + pages = {409--432}, + abstract = {The community matrix is among ecology's most important mathematical abstractions, formally encapsulating the interconnected network of effects that species have on one another's populations. Despite its importance, the term “community matrix” has been applied to multiple types of matrices that have differing interpretations. This has hindered the application of theory for understanding community structure and perturbation responses. Here, we clarify the correspondence and distinctions among the Interaction matrix, the Alpha matrix, and the Jacobian matrix, terms that are frequently used interchangeably as well as synonymously with the term “community matrix.” We illustrate how these matrices correspond to different ways of characterizing interaction strengths, how they permit insights regarding different types of press perturbations, and how these are related by a simple scaling relationship. Connections to additional interaction strength characterizations encapsulated by the Beta matrix, the Gamma matrix, and the Removal matrix are also discussed. Our synthesis highlights the empirical challenges that remain in using these tools to understand actual communities.}, + annotation = {\_eprint: https://doi.org/10.1146/annurev-ecolsys-032416-010215}, + file = {/home/nathante/Zotero/storage/5JMM6PUA/Novak et al_2016_Characterizing Species Interactions to Understand Press Perturbations.pdf} +} + +@incollection{ostrom_public_1977, + title = {Public Goods and Public Choices}, + booktitle = {Alternatives {{For Delivering Public Services}}: {{Toward Improved Performance}}}, + author = {Ostrom, Vincent and Ostrom, Elinor}, + editor = {Savas, Emanuel S.}, + date = {1977}, + pages = {7--49}, + publisher = {{Westview Press}}, + location = {{Boulder, CO}} +} + +@article{park_human_1936, + title = {Human {{Ecology}}}, + author = {Park, Robert Ezra}, + date = {1936-07-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {42}, + number = {1}, + pages = {1--15}, + issn = {0002-9602}, + abstract = {Human ecology is an attempt to apply to the interrelations of human beings a type of analysis previously applied to the interrelations of plants and animals. The term "symbiosis" describes a type of social relationship that is biotic rather than cultural. This biotic social order comes into existence and is maintained by competition. In plant and animal societies competition is unrestricted by an institutional or moral order. Human society is a consequence and effect of this limitation of the symbiotic social order by the cultural. Different social sciences are concerned with the forms which this limitation of the natural or ecological social order assumes on (1) the economic, (2) the political, and (3) the moral level.}, + file = {/home/nathante/Zotero/storage/CBVGR8RU/Park - 1936 - Human Ecology.pdf;/home/nathante/Zotero/storage/UKMY6VUE/217327.html} +} + +@article{pedregosa_scikit-learn:_2011, + ids = {pedregosa_scikit-learn_2011}, + title = {Scikit-Learn: {{Machine}} Learning in Python}, + shorttitle = {Scikit-Learn}, + author = {Pedregosa, Fabian and Varoquaux, Gaël and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, Édouard}, + date = {2011-10}, + journaltitle = {Journal of Machine Learning Research}, + volume = {12}, + number = {85}, + pages = {2825--2830}, + abstract = {Scikit-learn is a Python module integrating a wide range of state-of-the-art machine learning algorithms for medium-scale supervised and unsupervised problems. This package focuses on bringing machine learning to non-specialists using a general-purpose high-level language. Emphasis is put on ease of use, performance, documentation, and API consistency. It has minimal dependencies and is distributed under the simplified BSD license, encouraging its use in both academic and commercial settings. Source code, binaries, and documentation can be downloaded from http://scikit-learn.sourceforge.net.}, + file = {/home/nathante/Zotero/storage/4TQWE3MC/Pedregosa et al_2011_Scikit-learn.pdf;/home/nathante/Zotero/storage/6XS2PM2P/Pedregosa et al. - 2011 - Scikit-learn Machine Learning in Python.pdf} +} + +@article{pfaff_var_2008, + title = {{{VAR}}, {{SVAR}} and {{SVEC Models}}: {{Implementation Within R Package}} Vars}, + shorttitle = {{{VAR}}, {{SVAR}} and {{SVEC Models}}}, + author = {Pfaff, Bernhard}, + date = {2008-07-29}, + journaltitle = {Journal of Statistical Software}, + volume = {27}, + number = {1}, + pages = {1--32}, + issn = {1548-7660}, + issue = {1}, + langid = {english}, + file = {/home/nathante/Zotero/storage/RH6KYQN4/Pfaff_2008_VAR, SVAR and SVEC Models.pdf;/home/nathante/Zotero/storage/TV4UBIR4/v027i04.html} +} + +@article{piantadosi_ecological_1988, + title = {The Ecological Fallacy}, + author = {Piantadosi, Steven and Byar, David P and Green, Sylvan B}, + date = {1988}, + journaltitle = {American Journal of Epidemiology}, + volume = {127}, + pages = {893--904}, + langid = {english}, + file = {/home/nathante/Zotero/storage/2UZWZ4L5/Piantadosi et al. - THE ECOLOGICAL FALLACY.pdf} +} + +@article{pontikes_ecology_2014, + title = {An {{Ecology}} of {{Social Categories}}}, + author = {Pontikes, Elizabeth and Hannan, Michael}, + date = {2014}, + journaltitle = {Sociological Science}, + volume = {1}, + pages = {311--343}, + issn = {23306696}, + abstract = {This article proposes that meaningful social classification emerges from an ecological dynamic that operates in two planes: feature space and label space. It takes a dynamic view of classification, allowing objects’ movements in both spaces to change the meaning of social categories. The first part of the theory argues that agents assign labels to objects based on perceptions of their similarities to existing members of a category. The second part of the theory shows that an object’s perceived similarity to members of other categories reduces its typicality in a focal category. This means that for categories with a high degree of overlap with other categories in label space (lenient categories), the link between feature-based similarities and labeling weakens. The findings suggest that social classification will likely evolve to contain both constraining and lenient categories. The theory implies that this process is self-reinforcing, so that constraining categories become more constraining, whereas lenient categories become more lenient.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/XPRTHWKT/Pontikes and Hannan - 2014 - An Ecology of Social Categories.pdf} +} + +@article{powell_network_2005, + title = {Network {{Dynamics}} and {{Field Evolution}}: {{The Growth}} of {{Interorganizational Collaboration}} in the {{Life Sciences}}}, + shorttitle = {Network {{Dynamics}} and {{Field Evolution}}}, + author = {Powell, Walter W. and White, Douglas R. and Koput, Kenneth W. and Owen‐Smith, Jason}, + date = {2005-01-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {110}, + number = {4}, + pages = {1132--1205}, + issn = {0002-9602}, + abstract = {A recursive analysis of network and institutional evolution is offered to account for the decentralized structure of the commercial field of the life sciences. Four alternative logics of attachment—accumulative advantage, homophily, follow‐the‐trend, and multiconnectivity—are tested to explain the structure and dynamics of interorganizational collaboration in biotechnology. Using multiple novel methods, the authors demonstrate how different rules for affiliation shape network evolution. Commercialization strategies pursued by early corporate entrants are supplanted by universities, research institutes, venture capital, and small firms. As organizations increase their collaborative activities and diversify their ties to others, cohesive subnetworks form, characterized by multiple, independent pathways. These structural components, in turn, condition the choices and opportunities available to members of a field, thereby reinforcing an attachment logic based on differential connections to diverse partners.}, + file = {/home/nathante/Zotero/storage/EF4XB53L/Powell et al. - 2005 - Network Dynamics and Field Evolution The Growth o.pdf;/home/nathante/Zotero/storage/LHDCZSJ8/Powell et al. - 2005 - Network Dynamics and Field Evolution The Growth o.pdf;/home/nathante/Zotero/storage/DMFDV96J/421508.html;/home/nathante/Zotero/storage/IA9J8P9S/421508.html} +} + +@article{ransbotham_membership_2011, + title = {Membership Turnover and Collaboration Success in Online Communities: {{Explaining}} Rises and Falls from Grace in {{Wikipedia}}}, + shorttitle = {Membership Turnover and Collaboration Success in Online Communities}, + author = {Ransbotham, Sam and Kane, Gerald C.}, + date = {2011}, + journaltitle = {MIS Quarterly}, + volume = {35}, + number = {3}, + pages = {613}, + file = {/home/nathante/Zotero/storage/76S4J3K6/8.html} +} + +@incollection{resnick_starting_2012, + title = {Starting New Online Communities}, + booktitle = {Building Successful Online Communities: {{Evidence}}-Based Social Design}, + author = {Resnick, Paul and Konstan, Joseph and Chen, Yan and Kraut, Robert E}, + date = {2012}, + pages = {231--280}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-29831-5}, + file = {/home/nathante/Zotero/storage/GFUVQWNN/06-Resnick10-Startup-current.pdf} +} + +@article{robinson_ecological_1950, + title = {Ecological {{Correlations}} and the {{Behavior}} of {{Individuals}}}, + author = {Robinson, W. S.}, + date = {1950}, + journaltitle = {American Sociological Review}, + volume = {15}, + number = {3}, + eprint = {2087176}, + eprinttype = {jstor}, + pages = {351--357}, + publisher = {{[American Sociological Association, Sage Publications, Inc.]}}, + issn = {0003-1224}, + file = {/home/nathante/Zotero/storage/8SXZCILH/Robinson_1950_Ecological Correlations and the Behavior of Individuals.pdf} +} + +@article{romer_endogenous_1990, + ids = {romer_endogenous_nodate}, + title = {Endogenous {{Technological Change}}}, + author = {Romer, Paul M.}, + date = {1990-10-01}, + journaltitle = {Journal of Political Economy}, + shortjournal = {Journal of Political Economy}, + volume = {98}, + pages = {S71-S102}, + publisher = {{The University of Chicago Press}}, + issn = {0022-3808}, + abstract = {Growth in this model is driven by technological change that arises from intentional investment decisions made by profit-maximizing agents. The distinguishing feature of the technology as an input is that it is neither a conventional good nor a public good; it is a nonrival, partially excludable good. Because of the nonconvexity introduced by a nonrival good, price-taking competition cannot be supported. Instead, the equilibrium is one with monopolistic competition. The main conclusions are that the stock of human capital determines the rate of growth, that too little human capital is devoted to research in equilibrium, that integration into world markets will increase growth rates, and that having a large population is not sufficient to generate growth.}, + issue = {5, Part 2}, + file = {/home/nathante/Zotero/storage/7P2Z89NB/Romer - Endogenous Technological Change.pdf;/home/nathante/Zotero/storage/LWDU35L4/Romer_1990_Endogenous Technological Change.pdf;/home/nathante/Zotero/storage/ZGZ7ARQX/261725.html} +} + +@article{roughgarden_competition_1983, + title = {Competition and {{Theory}} in {{Community Ecology}}}, + author = {Roughgarden, Jonathan}, + date = {1983-11-01}, + journaltitle = {The American Naturalist}, + shortjournal = {The American Naturalist}, + volume = {122}, + number = {5}, + pages = {583--601}, + publisher = {{The University of Chicago Press}}, + issn = {0003-0147}, + file = {/home/nathante/Zotero/storage/GTX2ZMUV/Roughgarden_1983_Competition and Theory in Community Ecology.pdf;/home/nathante/Zotero/storage/KW74SQ2C/284160.html} +} + +@article{rousseeuw_silhouettes_1987, + title = {Silhouettes: {{A}} Graphical Aid to the Interpretation and Validation of Cluster Analysis}, + shorttitle = {Silhouettes}, + author = {Rousseeuw, Peter J.}, + date = {1987-11-01}, + journaltitle = {Journal of Computational and Applied Mathematics}, + shortjournal = {Journal of Computational and Applied Mathematics}, + volume = {20}, + pages = {53--65}, + issn = {0377-0427}, + abstract = {A new graphical display is proposed for partitioning techniques. Each cluster is represented by a so-called silhouette, which is based on the comparison of its tightness and separation. This silhouette shows which objects lie well within their cluster, and which ones are merely somewhere in between clusters. The entire clustering is displayed by combining the silhouettes into a single plot, allowing an appreciation of the relative quality of the clusters and an overview of the data configuration. The average silhouette width provides an evaluation of clustering validity, and might be used to select an ‘appropriate’ number of clusters.}, + langid = {english}, + keywords = {classification,cluster analysis,clustering validity,Graphical display}, + file = {/home/nathante/Zotero/storage/FP4RLR43/Rousseeuw_1987_Silhouettes.pdf;/home/nathante/Zotero/storage/SPBGRW8Q/0377042787901257.html} +} + +@article{ruef_credit_2009, + title = {Credit and {{Classification}}: {{The Impact}} of {{Industry Boundaries}} in {{Nineteenth}}-{{Century America}}}, + shorttitle = {Credit and {{Classification}}}, + author = {Ruef, Martin and Patterson, Kelly}, + date = {2009-09-01}, + journaltitle = {Administrative Science Quarterly}, + shortjournal = {Administrative Science Quarterly}, + volume = {54}, + number = {3}, + pages = {486--520}, + issn = {0001-8392}, + abstract = {In this article, we examine how issues of multi-category membership (hybridity) were handled during the evolution of one of the first general systems of industrial classification in the United States, the credit rating schema of R. G. Dun and Company. Drawing on a repeated cross-sectional study of credit evaluations during the postbellum period (1870–1900), our empirical analyses suggest that organizational membership in multiple categories need not be problematic when classification systems themselves are emergent or in flux and when organizations avoid rare combinations or identities involving ambiguous components. As Dun's schema became institutionalized, boundaries between industries were more clearly defined and boundary violations became subject to increased attention and penalty by credit reporters. Our perspective highlights the utility of an evolutionary perspective and tests its implications for the salience of distinct mechanisms of hybridity.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6P8JPZX3/Ruef and Patterson - 2009 - Credit and Classification The Impact of Industry .pdf} +} + +@article{ruef_emergence_2000, + title = {The Emergence of Organizational Forms: {{A}} Community Ecology Approach}, + shorttitle = {The {{Emergence}} of {{Organizational Forms}}}, + author = {Ruef, Martin}, + date = {2000-11-01}, + journaltitle = {American Journal of Sociology}, + volume = {106}, + number = {3}, + pages = {658--714}, + file = {/home/nathante/Zotero/storage/X6KXYEI5/Ruef - 2000 - The Emergence of Organizational Forms A Community.pdf;/home/nathante/Zotero/storage/NHGAJDIR/318963.html} +} + +@book{sayama_introduction_2015, + title = {Introduction to the {{Modeling}} and {{Analysis}} of {{Complex Systems}}}, + author = {Sayama, Hiroki}, + date = {2015}, + publisher = {{Open SUNY Textbooks, Milne Library}}, + location = {{Geneseo, NY}}, + isbn = {978-1-942341-09-3}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PYSMX3D8/Sayama - Introduction to the Modeling and Analysis of Compl.pdf} +} + +@article{schoener_resource_1974, + title = {Resource {{Partitioning}} in {{Ecological Communities}}}, + author = {Schoener, Thomas W.}, + date = {1974}, + journaltitle = {Science}, + volume = {185}, + number = {4145}, + eprint = {1738612}, + eprinttype = {jstor}, + pages = {27--39}, + issn = {0036-8075}, + file = {/home/nathante/Zotero/storage/R86IDGJN/1738612.pdf;/home/nathante/Zotero/storage/U4UCJ2BT/Schoener - 1974 - Resource Partitioning in Ecological Communities.pdf} +} + +@book{schweik_internet_2012, + title = {Internet Success: {{A}} Study of Open-Source Software Commons}, + shorttitle = {Internet Success}, + author = {Schweik, Charles M. and English, Robert C.}, + date = {2012}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-01725-1}, + pagetotal = {351} +} + +@article{shaw_laboratories_2014, + title = {Laboratories of Oligarchy? {{How}} the Iron Law Extends to Peer Production}, + shorttitle = {Laboratories of {{Oligarchy}}?}, + author = {Shaw, Aaron and Hill, Benjamin Mako}, + date = {2014}, + journaltitle = {Journal of Communication}, + shortjournal = {J Commun}, + volume = {64}, + number = {2}, + pages = {215--238}, + issn = {1460-2466}, + abstract = {Peer production projects like Wikipedia have inspired voluntary associations, collectives, social movements, and scholars to embrace open online collaboration as a model of democratic organization. However, many peer production projects exhibit entrenched leadership and deep inequalities, suggesting that they may not fulfill democratic ideals. Instead, peer production projects may conform to Robert Michels' “iron law of oligarchy,” which proposes that democratic membership organizations become increasingly oligarchic as they grow. Using exhaustive data of internal processes from a sample of 683 wikis, we construct empirical measures of participation and test for increases in oligarchy associated with growth in wikis' contributor bases. In contrast to previous studies, we find support for Michels' iron law and conclude that peer production entails oligarchic organizational forms.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GIII687R/Shaw and Hill - 2014 - Laboratories of oligarchy How the iron law extend.pdf;/home/nathante/Zotero/storage/W3846GC6/full.html} +} + +@article{shaw_pipeline_2018, + title = {The {{Pipeline}} of {{Online Participation Inequalities}}: {{The Case}} of {{Wikipedia Editing}}}, + shorttitle = {The {{Pipeline}} of {{Online Participation Inequalities}}}, + author = {Shaw, Aaron and Hargittai, Eszter}, + date = {2018-02-01}, + journaltitle = {Journal of Communication}, + shortjournal = {Journal of Communication}, + volume = {68}, + number = {1}, + pages = {143--168}, + issn = {0021-9916}, + abstract = {Digital inequalities undermine the democratizing potential of the Internet. While many people engage in public discourse through participatory media, knowledge gaps limit engagement in the networked public sphere. Participatory web platforms have unique potential to facilitate a more equitable production of knowledge. This paper conceptualizes a pipeline of online participation and models the awareness and behaviors necessary to become a contributor to the networked public sphere. We test the theory with the case of Wikipedia editing, relying on survey data from a diverse, national sample of U.S. adults. Our findings underscore the multidimensionality of digital inequalities and suggest new pathways toward closing knowledge gaps by highlighting the importance of education and Internet skills for online stratification processes.}, + keywords = {Digital Inequality,Internet & society,Internet Skills,Knowledge Gap,Knowledge gap theory (Communication),online participation,Social participation,Social stratification,Survey Research,wikipedia}, + file = {/home/nathante/Zotero/storage/IIFZGIVP/Shaw and Hargittai - 2018 - The pipeline of online participation inequalities.pdf;/home/nathante/Zotero/storage/NCJPN2PQ/Shaw and Hargittai - 2018 - The Pipeline of Online Participation Inequalities.pdf;/home/nathante/Zotero/storage/8VA8V6VV/Shaw and Hargittai - 2018.html;/home/nathante/Zotero/storage/WAUM42SV/4915319.html} +} + +@book{shirky_here_2008, + title = {Here Comes Everybody : {{The}} Power of Organizing without Organizations}, + author = {Shirky, Clay.}, + date = {2008}, + publisher = {{Penguin Press}}, + location = {{New York, NY}}, + abstract = {An examination of how the rapid spread of new forms of social interaction enabled by technology is changing the way humans form groups and exist within them, with profound long-term economic and social effects--for good and for ill. Our age's new technologies of social networking are evolving, and evolving us, into new groups doing new things in new ways, and old and new groups alike doing the old things better and more easily. Hierarchical structures that exist to manage the work of groups are seeing their raisons d'e\^tre swiftly eroded by the rising tide. Business models are being destroyed, transformed, born at dizzying speeds, and the larger social impact is profound. Clay Shirky is one of our wisest observers of the transformational power of the new forms of tech-enabled social interaction, and this is his reckoning with the ramifications of all this on what we do and who we are.--From publisher description. Discusses and uses examples of how digital networks transform the ability of humans to gather and cooperate with one another.}, + isbn = {978-1-59420-153-0}, + langid = {english}, + keywords = {FOSS,Media Studies}, + file = {/home/nathante/Zotero/storage/DHBTQ79D/shirky-2008.pdf} +} + +@article{sims_macroeconomics_1980, + title = {Macroeconomics and {{Reality}}}, + author = {Sims, Christopher A.}, + date = {1980}, + journaltitle = {Econometrica}, + volume = {48}, + number = {1}, + eprint = {1912017}, + eprinttype = {jstor}, + pages = {1--48}, + issn = {0012-9682}, + abstract = {[Existing strategies for econometric analysis related to macroeconomics are subject to a number of serious objections, some recently formulated, some old. These objections are summarized in this paper, and it is argued that taken together they make it unlikely that macroeconomic models are in fact over identified, as the existing statistical theory usually assumes. The implications of this conclusion are explored, and an example of econometric work in a non-standard style, taking account of the objections to the standard style, is presented.]}, + file = {/home/nathante/Zotero/storage/5L9AKP48/Sims - 1980 - Macroeconomics and Reality.pdf} +} + +@article{sorensen_recruitment-based_2004, + ids = {sorensen_recruitment-based_2004-1}, + title = {Recruitment-Based Competition between Industries: A Community Ecology}, + shorttitle = {Recruitment-Based Competition between Industries}, + author = {Sørensen, Jesper B.}, + date = {2004-02-01}, + journaltitle = {Industrial and Corporate Change}, + shortjournal = {Ind Corp Change}, + volume = {13}, + number = {1}, + pages = {149--170}, + publisher = {{Oxford Academic}}, + issn = {0960-6491}, + abstract = {Abstract. Because entrepreneurs often must recruit labor in order to launch their ventures, the labor market is a potential source of constraint in the entrepr}, + langid = {english}, + file = {/home/nathante/Zotero/storage/Z4KJZUBF/Sorensen - 2004 - Recruitment-based competition between industries .pdf;/home/nathante/Zotero/storage/PHCVIXUJ/707535.html} +} + +@article{sugihara_nonlinear_1994, + title = {Nonlinear Forecasting for the Classification of Natural Time Series}, + author = {Sugihara, George and Grenfell, Bryan Thomas and May, Robert McCredie and Tong, H.}, + date = {1994-09-15}, + journaltitle = {Philosophical Transactions of the Royal Society of London. Series A: Physical and Engineering Sciences}, + shortjournal = {Philosophical Transactions of the Royal Society of London. Series A: Physical and Engineering Sciences}, + volume = {348}, + number = {1688}, + pages = {477--495}, + publisher = {{Royal Society}}, + abstract = {There is a growing trend in the natural sciences to view time series as products of dynamical systems. This viewpoint has proven to be particularly useful in stimulating debate and insight into the nature of the underlying generating mechanisms. Here I review some of the issues concerning the use of forecasting in the detection of nonlinearities and possible chaos, particularly with regard to stochastic chaos. Moreover, it is shown how recent attempts to measure meaningful Lyapunov exponents for ecological data are fundamentally flawed, and that when observational noise is convolved with process noise, computing Lyapunov exponents for the real system will be difficult. Such problems pave the way for more operational definitions of dynamic complexity (cf. Yao \& Tong, this volume) . Aside from its use in the characterization of chaos, nonlinear forecasting can be used more broadly in pragmatic classification problems. Here I review a recent example of nonlinear forecasting as it is applied to classify human heart rhythms. In particular, it is shown how forecast nonlinearity can be a good discriminator of the physiological effects of age, and how prediction-decay may discriminate heartdisease. In so doing, I introduce a method for characterizing nonlinearity using ‘S-maps’ and a method for analysing multiple short time series with composite attractors.}, + file = {/home/nathante/Zotero/storage/TGW3IUGS/Sugihara et al_1994_Nonlinear forecasting for the classification of natural time series.pdf;/home/nathante/Zotero/storage/CGSTKS5R/rsta.1994.html} +} + +@inproceedings{suh_singularity_2009, + title = {The Singularity Is Not near: Slowing Growth of {{Wikipedia}}}, + shorttitle = {The {{Singularity}} Is {{Not Near}}}, + booktitle = {Proceedings of the 5th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Suh, Bongwon and Convertino, Gregorio and Chi, Ed H. and Pirolli, Peter}, + date = {2009}, + series = {{{WikiSym}} '09}, + pages = {1--10}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Prior research on Wikipedia has characterized the growth in content and editors as being fundamentally exponential in nature, extrapolating current trends into the future. We show that recent editing activity suggests that Wikipedia growth has slowed, and perhaps plateaued, indicating that it may have come against its limits to growth. We measure growth, population shifts, and patterns of editor and administrator activities, contrasting these against past results where possible. Both the rate of page growth and editor growth has declined. As growth has declined, there are indicators of increased coordination and overhead costs, exclusion of newcomers, and resistance to new edits. We discuss some possible explanations for these new developments in Wikipedia including decreased opportunities for sharing existing knowledge and increased bureaucratic stress on the socio-technical system itself.}, + isbn = {978-1-60558-730-1}, + file = {/home/nathante/Zotero/storage/WTEMKAUC/Suh et al. - 2009 - The singularity is not near slowing growth of Wik.pdf} +} + +@inproceedings{tan_all_2015, + title = {All Who Wander: {{On}} the Prevalence and Characteristics of Multi-Community Engagement}, + shorttitle = {All Who Wander}, + booktitle = {Proceedings of the 24th {{International Conference}} on {{World Wide Web}}}, + author = {Tan, Chenhao and Lee, Lillian}, + date = {2015}, + series = {{{WWW}} '15}, + pages = {1056--1066}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + location = {{Republic and Canton of Geneva, Switzerland}}, + abstract = {Although analyzing user behavior within individual communities is an active and rich research domain, people usually interact with multiple communities both on- and off-line. How do users act in such multi-community environments? Although there are a host of intriguing aspects to this question, it has received much less attention in the research community in comparison to the intra-community case. In this paper, we examine three aspects of multi-community engagement: the sequence of communities that users post to, the language that users employ in those communities, and the feedback that users receive, using longitudinal posting behavior on Reddit as our main data source, and DBLP for auxiliary experiments. We also demonstrate the effectiveness of features drawn from these aspects in predicting users' future level of activity. One might expect that a user's trajectory mimics the "settling-down" process in real life: an initial exploration of sub-communities before settling down into a few niches. However, we find that the users in our data continually post in new communities; moreover, as time goes on, they post increasingly evenly among a more diverse set of smaller communities. Interestingly, it seems that users that eventually leave the community are "destined" to do so from the very beginning, in the sense of showing significantly different "wandering" patterns very early on in their trajectories; this finding has potentially important design implications for community maintainers. Our multi-community perspective also allows us to investigate the "situation vs. personality" debate from language usage across different communities.}, + isbn = {978-1-4503-3469-3}, + keywords = {DBLP,language,lifecycle,multiple communities,reddit}, + file = {/home/nathante/Zotero/storage/8GL2XQG3/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf;/home/nathante/Zotero/storage/J3RVCH26/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf} +} + +@inproceedings{tan_tracing_2018, + title = {Tracing Community Genealogy: How New Communities Emerge from the Old}, + shorttitle = {Tracing {{Community Genealogy}}}, + booktitle = {Proceedings of the {{Twelfth International Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} '18)}, + author = {Tan, Chenhao}, + date = {2018}, + pages = {395--404}, + publisher = {{AAAI}}, + location = {{Palo Alto, California}}, + abstract = {The process by which new communities emerge is a central research issue in the social sciences. While a growing body of research analyzes the formation of a single community by examining social networks between individuals, we introduce a novel community-centered perspective. We highlight the fact that the context in which a new community emerges contains numerous existing communities. We reveal the emerging process of communities by tracing their early members’ previous community memberships.}, + file = {/home/nathante/Zotero/storage/QEAEMFYR/Tan - 2018 - Tracing Community Genealogy How New Communities E.pdf} +} + +@inproceedings{teblunthuis_density_2017, + title = {Density Dependence without Resource Partitioning: Population Ecology on {{Change}}.Org}, + shorttitle = {Density {{Dependence Without Resource Partitioning}}}, + booktitle = {Companion of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2017}, + series = {{{CSCW}} '17 {{Companion}}}, + pages = {323--326}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {E-petitioning is a prominent form of Internet-based collective action. We apply theories from organizational population ecology to investigate whether similar petitions compete for signatures. We use latent Dirichlet allocation (LDA) topic modeling to identify topical niches. Using these niches, we test two theories from population ecology on 442,109 Change.org petitions. First, we find evidence for density dependence, an inverse-U-shaped relationship between the density of a petition's niche and the number of signatures the petition obtains. This suggests e-petitioning is competitive and that e-petitions draw on overlapping resource pools. Second, although resource partitioning theory predicts that topically specialized petitions will obtain more signatures in concentrated populations, we find no evidence of this. This suggests that specialists struggle to avoid competition with generalists.}, + isbn = {978-1-4503-4688-7}, + file = {/home/nathante/Zotero/storage/54585RCP/TeBlunthuis et al. - 2017 - Density dependence without resource partitioning .pdf} +} + +@unpublished{teblunthuis_population_2020, + title = {The Population Ecology of Online Collective Action}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2020-06-19}, + eventtitle = {6th {{International Conference}} on {{Computational Social Science}}} +} + +@inproceedings{teblunthuis_revisiting_2018, + title = {Revisiting "{{The}} Rise and Decline" in a Population of Peer Production Projects}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '18)}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + pages = {355:1--355:7}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Do patterns of growth and stabilization found in large peer production systems such as Wikipedia occur in other communities? This study assesses the generalizability of Halfaker et al.'s influential 2013 paper on "The Rise and Decline of an Open Collaboration System." We replicate its tests of several theories related to newcomer retention and norm entrenchment using a dataset of hundreds of active peer production wikis from Wikia. We reproduce the subset of the findings from Halfaker and colleagues that we are able to test, comparing both the estimated signs and magnitudes of our models. Our results support the external validity of Halfaker et al.'s claims that quality control systems may limit the growth of peer production communities by deterring new contributors and that norms tend to become entrenched over time.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/7YEVSVQM/TeBlunthuis et al. - 2018 - Revisiting The Rise and Decline in a Population .pdf} +} + +@inproceedings{tsugawa_impact_2019, + ids = {tsugawa_impact_2019-2}, + title = {The Impact of Social Network Structure on the Growth and Survival of Online Communities}, + booktitle = {Proceedings of the 2019 {{IEEE}}/{{ACM International Conference}} on {{Advances}} in {{Social Networks Analysis}} and {{Mining}}}, + author = {Tsugawa, Sho and Niida, Sumaru}, + date = {2019-08-27}, + series = {{{ASONAM}} '19}, + pages = {1112--1119}, + publisher = {{Association for Computing Machinery}}, + location = {{Vancouver, British Columbia, Canada}}, + abstract = {While online communities are important platforms for various social activities, many online communities fail to survive, which motivates researchers to investigate factors affecting the growth and survival of online communities. We comprehensively examine the effects of a wide variety of social network features on the growth and survival of communities in Reddit. We show that several social network features, including clique ratio, density, clustering coefficient, reciprocity and centralization, have significant effects on the survival of communities. In contrast, we also show that social network features examined in this paper only have weak effects on the growth of communities. Moreover, we conducted experiments predicting future growth and survival of online communities from social network features. The results show that social network features are useful for predicting the survival of communities but not for predicting their growth.}, + isbn = {978-1-4503-6868-1}, + file = {/home/nathante/Zotero/storage/8JF3SZ74/Tsugawa and Niida - 2019 - The impact of social network structure on the grow.pdf;/home/nathante/Zotero/storage/J9RMRP49/Tsugawa_Niida_2019_The impact of social network structure on the growth and survival of online.pdf;/home/nathante/Zotero/storage/L4EQ4VRI/Tsugawa_Niida_2019_The impact of social network structure on the growth and survival of online.pdf} +} + +@article{ven_explaining_1995, + title = {Explaining {{Development}} and {{Change}} in {{Organizations}}}, + author = {Ven, Andrew H. Van De and Poole, Marshall Scott}, + date = {1995-07-01}, + journaltitle = {Academy of Management Review}, + shortjournal = {ACAD MANAGE REV}, + volume = {20}, + number = {3}, + pages = {510--540}, + issn = {0363-7425, 1930-3807}, + abstract = {This article introduces four basic theories that may serve as building blocks for explaining processes of change in organizations: life cycle, teleology, dialectics, and evolution. These four theories represent different sequences of change events that are driven by different conceptual motors and operate at different organizational levels. This article identifies the circumstances when each theory applies and proposes how interplay among the theories produces a wide variety of more complex theories of change and development in organizational life.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/APD9T5KZ/258786.pdf;/home/nathante/Zotero/storage/FBX2F2XQ/510.html} +} + +@book{verhoef_community_2010, + title = {Community Ecology: Processes, Models, and Applications}, + shorttitle = {Community Ecology}, + author = {Verhoef, Herman A and Morin, Peter J}, + date = {2010}, + publisher = {{Oxford University Press}}, + location = {{Oxford}}, + isbn = {978-0-19-922897-3 978-0-19-922898-0}, + langid = {english}, + annotation = {OCLC: 876676566} +} + +@inproceedings{vincent_examining_2018, + title = {Examining {{Wikipedia}} with a Broader Lens: {{Quantifying}} the Value of {{Wikipedia}}'s Relationships with Other Large-Scale Online Communities}, + shorttitle = {Examining {{Wikipedia With}} a {{Broader Lens}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Vincent, Nicholas and Johnson, Isaac and Hecht, Brent}, + date = {2018}, + series = {{{CHI}} '18}, + pages = {566:1--566:13}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {The extensive Wikipedia literature has largely considered Wikipedia in isolation, outside of the context of its broader Internet ecosystem. Very recent research has demonstrated the significance of this limitation, identifying critical relationships between Google and Wikipedia that are highly relevant to many areas of Wikipedia-based research and practice. This paper extends this recent research beyond search engines to examine Wikipedia's relationships with large-scale online communities, Stack Overflow and Reddit in particular. We find evidence of consequential, albeit unidirectional relationships. Wikipedia provides substantial value to both communities, with Wikipedia content increasing visitation, engagement, and revenue, but we find little evidence that these websites contribute to Wikipedia in return. Overall, these findings highlight important connections between Wikipedia and its broader ecosystem that should be considered by researchers studying Wikipedia. Critically, our results also emphasize the key role that volunteer-created Wikipedia content plays in improving other websites, even contributing to revenue generation.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/8YF9QUFS/Vincent et al. - 2018 - Examining Wikipedia With a Broader Lens Quantifyi.pdf;/home/nathante/Zotero/storage/FHXYQSZK/Vincent et al. - 2018 - Examining Wikipedia With a Broader Lens Quantifyi.pdf} +} + +@inproceedings{waller_generalists_2019, + title = {Generalists and {{Specialists}}: {{Using Community Embeddings}} to {{Quantify Activity Diversity}} in {{Online Platforms}}}, + shorttitle = {Generalists and {{Specialists}}}, + booktitle = {The {{World Wide Web Conference}} on - {{WWW}} '19}, + author = {Waller, Isaac and Anderson, Ashton}, + date = {2019}, + pages = {1954--1964}, + publisher = {{ACM Press}}, + location = {{San Francisco, CA, USA}}, + abstract = {In many online platforms, people must choose how broadly to allocate their energy. Should one concentrate on a narrow area of focus, and become a specialist, or apply oneself more broadly, and become a generalist? In this work, we propose a principled measure of how generalist or specialist a user is, and study behavior in online platforms through this lens. To do this, we construct highly accurate community embeddings that represent communities in a high-dimensional space. We develop sets of community analogies and use them to optimize our embeddings so that they encode community relationships extremely well. Based on these embeddings, we introduce a natural measure of activity diversity, the GS-score. Applying our embedding-based measure to online platforms, we observe a broad spectrum of user activity styles, from extreme specialists to extreme generalists, in both community membership on Reddit and programming contributions on GitHub. We find that activity diversity is related to many important phenomena of user behavior. For example, specialists are much more likely to stay in communities they contribute to, but generalists are much more likely to remain on platforms as a whole. We also find that generalists engage with significantly more diverse sets of users than specialists do. Furthermore, our methodology leads to a simple algorithm for community recommendation, matching state-of-theart methods like collaborative filtering. Our methods and results introduce an important new dimension of online user behavior and shed light on many aspects of online platform use.}, + eventtitle = {The {{World Wide Web Conference}}}, + isbn = {978-1-4503-6674-8}, + langid = {english}, + keywords = {activity diversity,community embeddings,community recommendation,generalist and specialists}, + file = {/home/nathante/Zotero/storage/5F77953J/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf;/home/nathante/Zotero/storage/PK32L55Y/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf} +} + +@inproceedings{wang_coming_2015, + title = {Coming of {{Age}} ({{Digitally}}): {{An Ecological View}} of {{Social Media Use}} among {{College Students}}}, + shorttitle = {Coming of {{Age}} ({{Digitally}})}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Wang, Yiran and Niiya, Melissa and Mark, Gloria and Reich, Stephanie M. and Warschauer, Mark}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {571--582}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {We take an ecological approach to studying social media use and its relation to mood among college students. We conducted a mixed-methods study of computer and phone logging with daily surveys and interviews to track college students' use of social media during all waking hours over seven days. Continual and infrequent checkers show different preferences of social media sites. Age differences also were found. Lower classmen tend to be heavier users and to primarily use Facebook, while upper classmen use social media less frequently and utilize sites other than Facebook more often. Factor analysis reveals that social media use clusters into patterns of content-sharing, text-based entertainment/discussion, relationships, and video consumption. The more constantly one checks social media daily, the less positive is one's mood. Our results suggest that students construct their own patterns of social media usage to meet their changing needs in their environment. The findings can inform further investigation into social media use as a benefit and/or distraction for students.}, + isbn = {978-1-4503-2922-4}, + keywords = {college students,computer logging,facebook,in situ study,social media}, + file = {/home/nathante/Zotero/storage/B6BFNKKK/Wang et al_2015_Coming of Age (Digitally).pdf} +} + +@article{wang_impact_2012, + ids = {wang_impact_2013}, + title = {The Impact of Membership Overlap on Growth: {{An}} Ecological Competition View of Online Groups}, + shorttitle = {The Impact of Membership Overlap on Growth}, + author = {Wang, Xiaoqing and Butler, Brian S. and Ren, Yuqing}, + date = {2012-06-15}, + journaltitle = {Organization Science}, + shortjournal = {Organization Science}, + volume = {24}, + number = {2}, + pages = {414--431}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {The dominant narrative of the Internet has been one of unconstrained growth, abundance, and plenitude. It is in this context that new forms of organizing, such as online groups, have emerged. However, the same factors that underlie the utopian narrative of Internet life also give rise to numerous online groups, many of which fail to attract participants or to provide significant value. This suggests that despite the potential transformative nature of modern information technology, issues of scarcity, competition, and context may remain critical to the performance and functioning of online groups. In this paper, we draw from organizational ecology theories to develop an ecological view of online groups to explain how overlapping membership among online groups causes intergroup competition for member attention and affects a group's ability to grow. Hypotheses regarding the effects of group size, age, and membership overlap on growth are proposed and tested with data from a 64-month, longitudinal sample of 240 online discussion groups. The analysis shows that sharing members with other groups reduced future growth rates, suggesting that membership overlap puts competitive pressure on online groups. Our results also suggest that, compared with smaller and younger groups, larger and older groups experience greater difficulty in growing their membership. In addition, larger groups were more vulnerable to competitive pressure than smaller groups: larger groups experienced greater difficulty in growing their membership than smaller groups as competition intensified. Overall, our findings show how an abundance of opportunities afforded by technologies can create scarcity in user time and effort, which increases competitive pressure on online groups. Our ecological view extends organizational ecology theory to new organizational forms online and highlights the importance of studying the competitive environment of online groups.}, + file = {/home/nathante/Zotero/storage/3WI37Y9S/Wang et al. - 2013 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/D7GAZURV/Wang et al. - 2012 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/EQSW25XD/Wang et al. - 2012 - The impact of membership overlap on growth An eco.pdf;/home/nathante/Zotero/storage/8QDPVTSM/orsc.1120.html;/home/nathante/Zotero/storage/IK6SB3L8/orsc.1120.html} +} + +@article{wasko_why_2005, + title = {Why {{Should I Share}}? {{Examining Social Capital}} and {{Knowledge Contribution}} in {{Electronic Networks}} of {{Practice}}}, + shorttitle = {Why {{Should I Share}}?}, + author = {Wasko, Molly McLure and Faraj, Samer}, + date = {2005}, + journaltitle = {MIS Quarterly}, + volume = {29}, + number = {1}, + eprint = {25148667}, + eprinttype = {jstor}, + pages = {35--57}, + publisher = {{Management Information Systems Research Center, University of Minnesota}}, + issn = {0276-7783}, + abstract = {Electronic networks of practice are computer-mediated discussion forums focused on problems of practice that enable individuals to exchange advice and ideas with others based on common interests. However, why individuals help strangers in these electronic networks is not well understood: there is no immediate benefit to the contributor, and free-riders are able to acquire the same knowledge as everyone else. To understand this paradox, we apply theories of collective action to examine how individual motivations and social capital influence knowledge contribution in electronic networks. This study reports on the activities of one electronic network supporting a professional legal association. Using archival, network, survey, and content analysis data, we empirically test a model of knowledge contribution. We find that people contribute their knowledge when they perceive that it enhances their professional reputations, when they have the experience to share, and when they are structurally embedded in the network. Surprisingly, contributions occur without regard to expectations of reciprocity from others or high levels of commitment to the network.}, + file = {/home/nathante/Zotero/storage/JHMZDCUP/Wasko_Faraj_2005_Why Should I Share.pdf} +} + +@unpublished{weber_political_2000, + title = {The {{Political Economy}} of {{Open Source Software}}}, + author = {Weber, Steven}, + date = {2000-06}, + langid = {english}, + file = {/home/nathante/Zotero/storage/MZQLT27W/Weber - The Political Economy of Open Source Software.pdf} +} + +@book{worster_natures_1994, + title = {Nature's Economy: A History of Ecological Ideas}, + shorttitle = {Nature's Economy}, + author = {Worster, Donald}, + date = {1994}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge; New York, NY, USA}}, + abstract = {Nature's Economy is a wide-ranging investigation of ecology's past. It traces the origins of the concept, discusses the thinkers who have shaped it, and shows how it in turn has shaped the modern perception of our place in nature.}, + isbn = {978-1-107-26680-3}, + langid = {english}, + annotation = {OCLC: 855524849}, + file = {/home/nathante/Zotero/storage/E2XXC7KJ/(Studies in Environment and History) Worster D.-Nature's Economy_ A History of Ecological Ideas-Cambridge University Press (1994).djvu} +} + +@article{xu_modeling_2017, + title = {Modeling the Adoption of Social Media by Newspaper Organizations: {{An}} Organizational Ecology Approach}, + shorttitle = {Modeling the Adoption of Social Media by Newspaper Organizations}, + author = {Xu, Yu}, + date = {2017-02-01}, + journaltitle = {Telematics and Informatics}, + shortjournal = {Telematics and Informatics}, + volume = {34}, + number = {1}, + pages = {151--163}, + issn = {0736-5853}, + abstract = {Although the ecological approach has been utilized in the field of communication, no prior research has applied this perspective to examine the organizational selection of social media. This study employs the framework of density dependence to understand what drives the adoption of social media by organizations. Fixed-effects negative binominal regression models were run to test the hypotheses that predicted the founding rates of 2007 Chinese newspaper organizations in 31 provincial units on Sina Weibo from August 2009 to June 2015. The results show that the founding rate of party newspapers exhibits inverted U-shaped relationships to local or non-local party newspaper density and non-party newspaper density. At the same time, the density dependence hypothesis is supported only for the effect of non-party newspaper density outside the provincial unit on the founding rate of non-party newspapers. Unexpectedly, non-party newspaper density within the provincial boundary exerts no significant influence on this founding rate. Both local and non-local party newspaper densities significantly and negatively influence the decisions to start homepages on Sina Weibo among non-newspaper organizations. Discussion and implications are provided.}, + file = {/home/nathante/Zotero/storage/FZT5VZNZ/Xu - 2017 - Modeling the adoption of social media by newspaper.pdf;/home/nathante/Zotero/storage/JYGYT3XA/XU - 2018 - The Ecological Dynamics of Organizational Change .pdf;/home/nathante/Zotero/storage/YLREBQ4E/S0736585315300812.html} +} + +@article{yarchi_political_2021, + title = {Political {{Polarization}} on the {{Digital Sphere}}: {{A Cross}}-Platform, {{Over}}-Time {{Analysis}} of {{Interactional}}, {{Positional}}, and {{Affective Polarization}} on {{Social Media}}}, + shorttitle = {Political {{Polarization}} on the {{Digital Sphere}}}, + author = {Yarchi, Moran and Baden, Christian and Kligler-Vilenchik, Neta}, + date = {2021-03-15}, + journaltitle = {Political Communication}, + volume = {38}, + number = {1-2}, + pages = {98--139}, + publisher = {{Routledge}}, + issn = {1058-4609}, + abstract = {Political polarization on the digital sphere poses a real challenge to many democracies around the world. Although the issue has received some scholarly attention, there is a need to improve the conceptual precision in the increasingly blurry debate. The use of computational communication science approaches allows us to track political conversations in a fine-grained manner within their natural settings – the realm of interactive social media. The present study combines different algorithmic approaches to studying social media data in order to capture both the interactional structure and content of dynamic political talk online. We conducted an analysis of political polarization across social media platforms (analyzing Facebook, Twitter, and WhatsApp) over 16 months, with close to a quarter million online contributions regarding a political controversy in Israel. Our comprehensive measurement of interactive political talk enables us to address three key aspects of political polarization: (1) interactional polarization – homophilic versus heterophilic user interactions; (2) positional polarization – the positions expressed, and (3) affective polarization – the emotions and attitudes expressed. Our findings indicate that political polarization on social media cannot be conceptualized as a unified phenomenon, as there are significant cross-platform differences. While interactions on Twitter largely conform to established expectations (homophilic interaction patterns, aggravating positional polarization, pronounced inter-group hostility), on WhatsApp, de-polarization occurred over time. Surprisingly, Facebook was found to be the least homophilic platform in terms of interactions, positions, and emotions expressed. Our analysis points to key conceptual distinctions and raises important questions about the drivers and dynamics of political polarization online.}, + keywords = {computational communication science approach,cross-platform analysis,over-time analysis,Political polarization,social media}, + annotation = {\_eprint: https://doi.org/10.1080/10584609.2020.1785067}, + file = {/home/nathante/Zotero/storage/2FVADM6B/Yarchi et al_2021_Political Polarization on the Digital Sphere.pdf;/home/nathante/Zotero/storage/7YA6IE6V/10584609.2020.html} +} + +@article{zhang_community_2017, + title = {Community Identity and User Engagement in a Multi-Community Landscape}, + author = {Zhang, Justine and Hamilton, William L. and Danescu-Niculescu-Mizil, Cristian and Jurafsky, Dan and Leskovec, Jure}, + date = {2017-05}, + journaltitle = {Proceedings of the International AAAI Conference on Weblogs and Social Media. International AAAI Conference on Weblogs and Social Media}, + shortjournal = {Proc Int AAAI Conf Weblogs Soc Media}, + volume = {2017}, + eprint = {29354325}, + eprinttype = {pmid}, + pages = {377--386}, + issn = {2162-3449}, + abstract = {A community’s identity defines and shapes its internal dynamics. Our current understanding of this interplay is mostly limited to glimpses gathered from isolated studies of individual communities. In this work we provide a systematic exploration of the nature of this relation across a wide variety of online communities. To this end we introduce a quantitative, language-based typology reflecting two key aspects of a community’s identity: how distinctive, and how temporally dynamic it is. By mapping almost 300 Reddit communities into the landscape induced by this typology, we reveal regularities in how patterns of user engagement vary with the characteristics of a community., Our results suggest that the way new and existing users engage with a community depends strongly and systematically on the nature of the collective identity it fosters, in ways that are highly consequential to community maintainers. For example, communities with distinctive and highly dynamic identities are more likely to retain their users. However, such niche communities also exhibit much larger acculturation gaps between existing users and newcomers, which potentially hinder the integration of the latter., More generally, our methodology reveals differences in how various social phenomena manifest across communities, and shows that structuring the multi-community landscape can lead to a better understanding of the systematic nature of this diversity.}, + pmcid = {PMC5774974}, + file = {/home/nathante/Zotero/storage/DZEYKKSS/Zhang et al. - 2017 - Community Identity and User Engagement in a Multi-.pdf;/home/nathante/Zotero/storage/MMY3NEQ4/Zhang et al_2017_Community Identity and User Engagement in a Multi-Community Landscape.pdf;/home/nathante/Zotero/storage/FFFSAVRR/14904.html} +} + +@article{zhang_group_2011, + title = {Group Size and Incentives to Contribute: A Natural Experiment at Chinese Wikipedia}, + shorttitle = {Group Size and Incentives to Contribute}, + author = {Zhang, Xiaoquan (Michael) and Zhu, Feng}, + date = {2011-06}, + journaltitle = {American Economic Review}, + volume = {101}, + number = {4}, + pages = {1601--1615}, + issn = {0002-8282}, + abstract = {The literature on the private provision of public goods suggests an inverse relationship between incentives to contribute and group size. We find, however, that after an exogenous reduction of group size at Chinese Wikipedia, the nonblocked contributors decrease their contributions by 42.8 percent on average. We attribute the cause to social effects: contributors receive social benefits that increase with both the amount of their contributions and group size, and the shrinking group size weakens these social benefits. Consistent with our explanation, we find that the more contributors value social benefits, the more they reduce their contributions after the block. (JEL H41, L17, L82)}, + langid = {english}, + keywords = {Media,Public Goods; Open Source Products and Markets; Entertainment}, + file = {/home/nathante/Zotero/storage/63JBCUER/Zhang and Zhu - 2011 - Group Size and Incentives to Contribute A Natural.pdf;/home/nathante/Zotero/storage/BWMQ96PV/articles.html} +} + +@article{zhang_intergroup_2019, + title = {Intergroup {{Contact}} in the {{Wild}}: {{Characterizing Language Differences}} between {{Intergroup}} and {{Single}}-Group {{Members}} in {{NBA}}-Related {{Discussion Forums}}}, + shorttitle = {Intergroup {{Contact}} in the {{Wild}}}, + author = {Zhang, Jason Shuo and Tan, Chenhao and Lv, Qin}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {193:1--193:35}, + abstract = {Intergroup contact has long been considered as an effective strategy to reduce prejudice between groups. However, recent studies suggest that exposure to opposing groups in online platforms can exacerbate polarization. To further understand the behavior of individuals who actively engage in intergroup contact in practice, we provide a large-scale observational study of intragroup behavioral differences between members with and without intergroup contact. We leverage the existing structure of NBA-related discussion forums on Reddit to study the context of professional sports. We identify fans of each NBA team as members of a group and trace whether they have intergroup contact. Our results show that members with intergroup contact use more negative and abusive language in their affiliated group than those without such contact, after controlling for activity levels. We further quantify different levels of intergroup contact and show that there may exist nonlinear mechanisms regarding how intergroup contact relates to intragroup behavior. Our findings provide complementary evidence to experimental studies in a novel context and also shed light on possible reasons for the different outcomes in prior studies.}, + issue = {CSCW}, + keywords = {intergroup contact,intragroup behavior,language usage,nba-related discussion forums,polarization}, + file = {/home/nathante/Zotero/storage/B5RRUXKC/Zhang et al_2019_Intergroup Contact in the Wild.pdf} +} + +@article{zhang_understanding_2021, + title = {Understanding the {{Diverging User Trajectories}} in {{Highly}}-{{Related Online Communities During}} the {{Covid}}-19 {{Pandemic}}}, + author = {Zhang, Jason Shuo and Keegan, Brian and Lv, Qin and Tan, Chenhao}, + date = {2021}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {5}, + eprint = {2006.04816}, + eprinttype = {arxiv}, + pages = {12}, + abstract = {As the COVID-19 pandemic is disrupting life worldwide, related online communities are popping up. In particular, two “new” communities, /r/China flu and /r/Coronavirus, emerged on Reddit and have been dedicated to COVIDrelated discussions from the very beginning of this pandemic. With /r/Coronavirus promoted as the official community on Reddit, it remains an open question how users choose between these two highly-related communities. In this paper, we characterize user trajectories in these two communities from the beginning of COVID-19 to the end of September 2020. We show that new users of /r/China flu and /r/Coronavirus were similar from January to March. After that, their differences steadily increase, evidenced by both language distance and membership prediction, as the pandemic continues to unfold. Furthermore, users who started at /r/China flu from January to March were more likely to leave, while those who started in later months tend to remain highly “loyal”. To understand this difference, we develop a movement analysis framework to understand membership changes in these two communities and identify a significant proportion of /r/China flu members (around 50\%) that moved to /r/Coronavirus in February. This movement turns out to be highly predictable based on other subreddits that users were previously active in. Our work demonstrates how two highly related communities emerge and develop their own identity in a crisis, and highlights the important role of existing communities in understanding such an emergence.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computers and Society,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3HZBRY3S/Zhang et al. - Understanding the Diverging User Trajectories in H.pdf;/home/nathante/Zotero/storage/V3QR9ASE/Zhang et al. - 2021 - Understanding the Diverging User Trajectories in H.pdf} +} + +@inproceedings{zhu_impact_2014, + title = {The Impact of Membership Overlap on the Survival of Online Communities}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Kraut, Robert E. and Kittur, Aniket}, + date = {2014-04-26}, + series = {{{CHI}} '14}, + pages = {281--290}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {If the people belong to multiple online communities, their joint membership can influence the survival of each of the communities to which they belong. Communities with many joint memberships may struggle to get enough of their members' time and attention, but find it easy to import best practices from other communities. In this paper, we study the effects of membership overlap on the survival of online communities. By analyzing the historical data of 5673 Wikia communities, we find that higher levels of membership overlap are positively associated with higher survival rates of online communities. Furthermore, we find that it is beneficial for young communities to have shared members who play a central role in other mature communities. Our contributions are two-fold. Theoretically, by examining the impact of membership overlap on the survival of online communities we identified an important mechanism underlying the success of online communities. Practically, our findings may guide community creators on how to effectively manage their members, and tool designers on how to support this task.}, + isbn = {978-1-4503-2473-1}, + keywords = {membership overlap,online communities,survival analysis}, + file = {/home/nathante/Zotero/storage/GV2D7ZKS/Zhu et al. - 2014 - The Impact of Membership Overlap on the Survival o.pdf;/home/nathante/Zotero/storage/IY4RTSGD/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf;/home/nathante/Zotero/storage/JZE5JGAZ/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf} +} + +@inproceedings{zhu_selecting_2014, + title = {Selecting an Effective Niche: {{An}} Ecological View of the Success of Online Communities}, + shorttitle = {Selecting an Effective Niche}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Chen, Jilin and Matthews, Tara and Pal, Aditya and Badenes, Hernan and Kraut, Robert E.}, + date = {2014}, + series = {{{CHI}} '14}, + pages = {301--310}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Online communities serve various important functions, but many fail to thrive. Research on community success has traditionally focused on internal factors. In contrast, we take an ecological view to understand how the success of a community is influenced by other communities. We measured a community's relationship with other communities - its "niche" - through four dimensions: topic overlap, shared members, content linking, and shared offline organizational affiliation. We used a mixed-method approach, combining the quantitative analysis of 9495 online enterprise communities and interviews with community members. Our results show that too little or too much overlap in topic with other communities causes a community's activity to suffer. We also show that this main result is moderated in predictable ways by whether the community shares members with, links to content in, or shares an organizational affiliation with other communities. These findings provide new insight on community success, guiding online community designers on how to effectively position their community in relation to others.}, + isbn = {978-1-4503-2473-1}, + venue = {Toronto, Ontario, Canada}, + keywords = {online communities,success,topic overlap,workplace}, + file = {/home/nathante/Zotero/storage/FNS9RSWC/Zhu et al. - 2014 - Selecting an Effective Niche An Ecological View o.pdf;/home/nathante/Zotero/storage/KIHWVKUQ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf;/home/nathante/Zotero/storage/RFMX2CBJ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf} +} + + diff --git a/dissertations/nathante_uw_2021/equalogy.tex b/dissertations/nathante_uw_2021/equalogy.tex new file mode 100644 index 0000000..b1f3a01 --- /dev/null +++ b/dissertations/nathante_uw_2021/equalogy.tex @@ -0,0 +1,923 @@ +\chapterprecishere{ +Large-scale quantitative analyses have shown that individuals frequently talk to each other about similar things in different online spaces. Why do these overlapping communities exist? We provide an answer grounded in the analysis of 20 interviews with active participants in clusters of highly related subreddits: within a broad topical area, there are a diversity of benefits an online community can confer. These include (a) specific information and discussions, (b) socialization with similar others, and (c) attention from the largest possible audience. A single community cannot meet all needs. +Our findings suggest that topical areas within an online community platform tend to become populated by groups of specialized communities with diverse sizes, topical boundaries, and rules. Compared with any single community, such systems of overlapping communities are able to provide a greater range of benefits. + +} + + +\section{Introduction} + +Early work in social computing treated online communities as isolated units that could be understood without considering their members' participation in other online communities. As community hosting platforms such as Reddit and Facebook have grown in prominence, social computing scholars have sought to document and explore the connections between online communities \citep{datta_extracting_2019, hill_studying_2019, tan_all_2015, zhu_selecting_2014}. +This research has shown that online communities overlap with each other in terms of their memberships and topics in ways that have important consequences for a range of outcomes \citep{teblunthuis_identifying_2021, chandrasekharan_internets_2018, wang_impact_2012}. + + +User and topic overlap is widespread---both within platforms and across them. +For example, a range of studies have highlighted the fact that members frequently participate in multiple online groups. +This occurs both serially as users migrate between communities over time \citep{lu_investigate_2019, tan_all_2015, tan_tracing_2018} and concurrently as individuals belong to multiple groups at once \citep{wang_impact_2012, hwang_why_2021, zhu_impact_2014}. +Many large platforms host distinct communities with similar topics and content \citep{datta_identifying_2017, zhu_selecting_2014}. +In at least one study, researchers have documented that overlaps in users and topics often coincide \citep{datta_identifying_2017}. +In other words, members of online communities often simultaneously participate in overlapping conversations with overlapping groups of people in different online spaces. + +\textit{Why are the same individuals talking to each other about similar things in different online communities?} +Although social computing offers many theories of why individuals might want to participate in a community, almost all empirical work in social computing on user and topic overlap has used computational or quantitative analysis. As a result, we know very little about what overlaps mean to users. Critically, we also have very little in the way of empirical evidence that is able to speak to why communities overlap in the first place. + +Our work seeks to complement existing quantitative research with a better qualitative understanding of intercommunity overlap and contribute to several streams of social computing scholarship. +In particular, our work complements a series of social computing studies that have taken inspiration from ecological theory and shown that online groups' growth and survival are closely tied to activity in adjacent online spaces \citep{teblunthuis_identifying_2021, wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. + + +We seek to answer our research question (in italics above) through an interview-based study of Reddit users with experience in overlapping communities. Using a dataset of posts and comments on Reddit, we identify clusters of communities on Reddit with highly overlapping users and topics and recruit a set of 20 participants from nine clusters. +Drawing from a grounded theory analysis of interview transcripts, we develop an explanation of why many users simultaneously participate in communities with overlapping memberships and topics. + +Our findings suggest that users seek three salient benefits from online groups: users want to (a) find specific types of content, discussions, and information; (b) connect with similar types of people; and (c) share content with the largest possible audience. Our work also suggests that these three benefits are frequently in conflict such that the more a community provides one of these benefits, the less able it may be to provide the other two. Because it is difficult for a single community to fully provide all three benefits, clusters of multiple overlapping communities are constructed to do so in aggregate. + + + + + + + + + + + + + + + + + + + + + + + + + + +\section{Related Work} + + + + + + + +\label{sec:overlapping} + + +Although most research in online communities analyzes the internal factors driving online community success \citep{kraut_building_2012}, a growing literature studies communities related by overlaps in topic or membership \citep{datta_identifying_2017, tan_all_2015, teblunthuis_identifying_2021, zhu_impact_2014}. +This work has found that concurrent engagement in multiple communities is common on large platforms that host online communities such as Reddit where individuals smoothly jump from community to community \citep{tan_all_2015}. +With several exceptions \citep[e.g.,][]{fiesler_moving_2020, kiene_technological_2019, zhao_social_2016, hwang_why_2021}, this work typically takes the fact that communities overlap for granted and focuses on the consequences of overlap on outcomes such as the emergence and growth of communities \citep{butler_cross-purposes_2011, zhu_impact_2014} and the diffusion of types of language such as hate speech \citep{chandrasekharan_internets_2018}. None of this work provides insight into how communities come to overlap and why these overlaps persist. + +Researchers have investigated intercommunity conflict and found that conflict is initiated by a very small proportion of online communities \citep{kumar_community_2018}. +Other work has shown that content cross-posted to different communities contributes to the ongoing renegotiation of the topical boundaries \citep{butler_cross-purposes_2011}. +\citet{zhang_understanding_2021} have shown that topical boundaries can also shift as similar communities attract users with different interests. +In a related sense, \citet{massanari_gamergate_2017} has argued that toxic communities can influence the broader culture of a platform for online communities. As a result, banning problematic communities from a platform such as Reddit can reduce toxicity in adjacent communities that are not directly affected \citep{chandrasekharan_you_2017, ribeiro_platform_2021}. + + +A number of studies on overlapping communities draw upon ecological theory \citep{teblunthuis_identifying_2021, wang_impact_2012, zhu_impact_2014, zhu_selecting_2014}. +Ecological approaches in social computing theorize that overlaps between users and topics relate to competitive or mutualistic forces and drive outcomes such as growth and survival. For example, \citet{wang_impact_2012} found that membership overlap reduced the growth rate of Usenet groups. \citet{zhu_selecting_2014} found that participation rates often suffered if there was too little or too much overlap with other communities. +\citet{zhu_impact_2014} found that communities' survival was positively associated with membership overlap, especially with overlap with older communities. Recently, \citet{teblunthuis_identifying_2021} found that mutualism is common in clusters of overlapping subreddits. + +Although these studies use statistical analysis to tell us about how communities relate to each other, they do not to speak to \textit{how} participants understand the relationships between similar online communities or \textit{why} they participate in overlapping communities. The exclusively quantitative nature of these accounts means that a range of potential explanations are possible. + +Although we know of no qualitative examination focused directly on understanding why overlapping communities exist, there are a series of qualitative papers that point to potential answers. \citet{fiesler_moving_2020} describe the history of online fanfiction writing communities migrating across platforms in pursuit of hospitable infrastructure. Similarly, \citet{zhao_social_2016} describe how individuals use multiple social media platforms to meet varied and nuanced communication needs. +Although their study is primarily quantitative, \citet{zhu_selecting_2014} include quotes from interviews to support the emic validity of notions of competition and mutualism between groups in an enterprise social media system. +Finally, \citepos{hwang_why_2021} paper seeks to explain why individuals participate in persistently small online communities on Reddit and ends with a reflection that many small communities are sustainable only because they are ``nested'' within larger niches. +All told, these findings suggest a rich social process by which participants in online communities purposefully construct and move between overlapping spaces. + + +% Social computing scholarship has pointed to differences in affordances between platforms and the ways in which members migrate between communities over time. +However, the very small amount of qualitative evidence from participants in overlapping communities in the same platform means that we lack a strong sense of why members choose to participate in multiple communities simultaneously. +Although ecological studies attempt to quantify competition and mutualism, we know little about how members understand the relationships between their communities or if these key ecological concepts have any emic resonance. +Our work seeks to place ecological studies of online communities on firmer qualitative ground. + + + + + + + + + + + + + +\subsection{Reasons for Joining Online Communities} +\label{sec:reasons} + +Decades of social computing research has sought to understand why people belong to particular online communities \citep{kraut_building_2012}. +It has long been recognized that different people have different motivations and that a single individual may have multiple motivations that include the social, informational, and material benefits users receive through their participation \citep{butler_membership_2001, turner_where_2005, xigen_li_factors_2011}. In terms of uses and gratifications theory, ``users actively seek particular media with the goal of gratifying an existing need'' \citep{lampe_motivations_2010}. +Past research has shown that people seek online communities to collaborate on projects \citep{poor_computer_2014}, to receive social support \citep{leimeister_evaluation_2005}, to cooperate with friends \citep{turner_where_2005}, and, especially, to exchange information \citep{ridings_virtual_2004, muhtaseb_arab_2008, leavitt_role_2017, liang_knowledge_2017}. +Other research focuses on the growth and decline of membership in online communities and surfaces motivations for why people choose not to participate \citep{cunha_are_2019}. \citet{brandtzaeg_user_2008} found that a lack of trust or low quality content can lead to declines in membership. +Online communities may decline because leaders are resistant to change and unwelcoming to newcomers \citep{shaw_laboratories_2014, halfaker_rise_2013, teblunthuis_revisiting_2018}. + + + + +Although our findings are the result of an inductive process of bottom-up grounded theory analysis, the presentation of our findings relies on three existing concepts. + + + + + + +\subsubsection{Finding specific content} + +One of the most important features of online communities is their ability to enable the spread of useful knowledge and information \citep{faraj_online_2016}. By connecting individuals with specific information and skills that they desire, online communities match knowledge seekers with experts and foster collaboration on information goods \citep{benkler_wealth_2006, lakhani_how_2003, fulk_connective_1996,fiesler_growing_2017}. +Research has often focused on the ways that individuals utilize diverse types of social computing systems to meet their specific information needs through systems such as Q\&A sites \citep{adamic_knowledge_2008}, synchronous chat systems \citep{white_effects_2011}, search engines \citep{morris_comparison_2010}, social network sites \citep{starbird_crowd_2012,morris_what_2010}, fanworks \citep{fiesler_growing_2017}, and knowledge bases \citep{ackerman_answer_1990,orlikowski_learning_1992}. + + + + + +\subsubsection{Homophily} + +A second need that online communities serve is to foster connections with similar others. The term \textit{homophily}, ``a tendency for friendships to form between those who are alike in a designated respect'' \citep{lazarsfeld_friendship_1954}, describes the set of benefits people can only receive from others who share their identities, beliefs, interests, or culture \citep{mcpherson_birds_2001}. +In offline settings, homophily helps explain why tastes in cuisine, music, and other cultural preferences are often correlated \citep{dellaposta_why_2015}, why similar people tend to congregate, and what happens when they do \citep{mcpherson_birds_2001}. +Homophily on social networks may drive the emergence of online ``echo chambers'' as individuals seek online communities whose members share their beliefs \citep{johnson_communication_2009, grevet_managing_2014, himelboim_valence-based_2016, dvir-gvirsman_media_2017}. + +Research has shown that people have greater degrees of trust in homophilous groups and are more likely to share content posted by homophilous others \citep{ma_when_2019, chang_specialization_2014}. +Homophily has been described as an important feature of online fan communities \citep{hillman_alksjdflksfd_2014,fiesler_growing_2017}. + + + + + + +\subsubsection{Finding the largest possible audience} + +Research on online communities producing public information goods has found evidence that audience size motivates contributors \citep{zhang_group_2011}. Additionally, numerous studies have shown that users of social networking sites frequently consider the audience that their posts and messages may reach \citep{marwick_i_2011, zhang_configuring_2020}. +As individuals on social media typically have little information about who sees their posts, they conceive of ``imagined audiences'' based on cues from visible activity \citep{bernstein_quantifying_2013} and target imagined audiences using deliberate strategies, such as using multiple platforms to reach distinct audiences, in order to control who sees or does not see their posts \citep{litt_just_2016, marwick_i_2011, zhao_social_2016}. + + + + + + + + + + + +\section{Study Design} +\label{sec:methods} + +To study overlapping membership in online communities, we conduct interviews with members of online communities hosted on Reddit, a social media platform for sharing, discussing, and rating news, media, and other content in user-created subcommunities called ``subreddits.'' Individual users can participate in any of Reddit's millions of subreddit communities by posting ``submissions'' that might include a link to a news article, a question for discussion, an image, or text written by the submitter. Each submission has a corresponding threaded comments section. Users can also vote submissions and comments up or down as a form of distributed moderation and can give awards to comments and posts \citep{lampe_slashdot_2004, burtch_how_2021}. + +Subreddit communities are managed by teams of volunteer content moderators tasked with curtailing abusive behavior and keeping conversation on topic \citep{matias_civic_2019, seering_moderator_2019}. +Subreddits exist covering an enormous range of topics \citep{fiesler_reddit_2018}, and +Reddit has been the site of much research on overlapping online communities \citep{datta_identifying_2017, tan_all_2015, tan_tracing_2018, hessel_science_2016, teblunthuis_identifying_2021}. +Because the cost of creating and joining new communities on Reddit is very low, subreddits often overlap in both topic and membership. Users frequently create spinoff subreddit communities from larger and more established groups \citep{tan_tracing_2018}. + +\subsection{Participant Selection} + +To understand why people participate in overlapping communities, we set out to interview people who are active in highly related subreddits. Additional inclusion criteria were that users were adults (i.e., above the age of majority in their country) and able to participate in an interview in English. + +Our participant selection process began by first choosing clusters of highly related groups. To do so, we built a web-based data visualization of a clustering algorithm derived from user overlap to identify groups of interest-based subreddits having similar users. +To generate the visualization, we conducted a computational analysis of the Pushshift Reddit dump \citep{baumgartner_pushshift_2020}, containing a nearly complete collection of Reddit comments made before April 2020. We selected the top 10,000 subreddits based on the number of comments in this data and excluded subreddits where a majority of submissions were flagged as not safe for work. Next, following an approach described in prior work \citep{datta_identifying_2017}, we constructed the measure of user similarity by taking the cosine similarities of TF-IDF vectors. Using this similarity measure, we ran affinity propagation clustering \citep{frey_clustering_2007} to group subreddits having overlapping users. We then built an HTML visualization of these clusters based on t-distributed stochastic neighbor embedding (t-SNE). We have included the visualization in our online supplement. + +Although some aspects of our manual cluster selection process using this visualization were necessarily arbitrary, we tried to select clusters that were interest driven, involved primarily English language discussion, and were focused on content about which all members of the research team would be comfortable speaking. As a result, we did not select any clusters that were focused on sex or pornography, fringe or extreme politics, content specific to geographic regions, or topics that our group could not understand. + +We sought out clusters that we hoped would result in individuals from a diverse range of ages, genders, and life experiences. Although we did not collect demographic information from our interviewees, our interviewees' presentation and descriptions of themselves suggested that these efforts were not entirely successful. +Our pool of interviewees included young and middle-aged people; people of color; people from the United States, Canada, and Europe; people who did not speak English as a first language; and people who were non-male. +That said, men were very likely over-represented in our pool of interviewees, perhaps even in relation to the disproportionate participation of men on Reddit \citep{amaya_new_2021}. + +The clusters we selected each include 3--10 subreddits on the following topics: rock climbing, streetwear fashion, roller coasters, vintage audio, podcasting, painting, drag culture and performance, indie music, and dating for middle-aged adults. Information about each subreddit and cluster can be found in Table \ref{tab:subs_clusters_stats}. + + +\begin{table}[h!] + \footnotesize + \centering + \begin{tabular}{cccc} +\hline + \textbf{Subreddit} & \textbf{Cluster} & \textbf{Subscribers} & \textbf{Created} \\ + \hline +\rowcolor{lavenderblue} +r/bouldering & Climbing & 194,814 & 2009-10-28 \\ +\rowcolor{lavenderblue} +r/climbharder & Climbing & 117,288 & 2010-10-19 \\ +\rowcolor{lavenderblue} +r/climbing & Climbing & 935,621 & 2008-07-17 \\ +\rowcolor{lavenderblue} +r/climbingcirclejerk & Climbing & 45,032 & 2011-08-18 \\ +r/Drag & Drag & 44,724 & 2011-01-15 \\ +r/Dragula & Drag & 27,510 & 2016-11-03 \\ +r/rupaulsdragrace & Drag & 440,329 & 2011-11-15 \\ +r/RPDR\_UK & Drag & 31,867 & 2019-02-07 \\ +r/SpoiledDragRace & Drag & 69,027 & 2018-02-16 \\ +r/MsPaintsArtRace & Drag & 61,292 & 2017-04-17 \\ +\rowcolor{lavenderblue} +r/MGMT & Indie Music & 17,744 & 2010-02-25 \\ +\rowcolor{lavenderblue} +r/tameimpala & Indie Music & 94,248 & 2011-10-30 \\ +\rowcolor{lavenderblue} +r/kgatlw & Indie Music & 59,191 & 2015-07-01 \\ +\rowcolor{lavenderblue} +r/Indieheads & Indie Music & 1,932,698 & 2013-12-24 \\ +r/datingoverthirty & Middle Age Dating & 436,480 & 2014-11-04 \\ +r/DatingAfterThirty & Middle Age Dating & 11,550 & 2018-03-09 \\ +r/datingoverforty & Middle Age Dating & 52,522 & 2018-12-15 \\ +r/relationshipsover35 & Middle Age Dating & 14,916 & 2018-02-06 \\ +\rowcolor{lavenderblue} +r/OilPainting & Painting & 186,716 & 2011-09-22 \\ +\rowcolor{lavenderblue} +r/Painting & Painting & 280,865 & 2008-06-13 \\ +\rowcolor{lavenderblue} +r/PourPainting & Painting & 178,800 & 2017-07-28 \\ +\rowcolor{lavenderblue} +r/Watercolor & Painting & 269,882 & 2012-01-15 \\ +\rowcolor{lavenderblue} +r/HappyTrees & Painting & 53,362 & 2011-02-07 \\ +r/podcasts & Podcasting & 1,995,693 & 2008-01-25 \\ +r/podcast & Podcasting & 60,497 & 2009-01-02 \\ +r/podcasting & Podcasting & 73,010 & 2010-09-17 \\ +r/audiodrama & Podcasting & 129,102 & 2010-11-30 \\ +r/ska & Podcasting & 34,397 & 2008-03-12 \\ +\rowcolor{lavenderblue} +r/guessthecoaster & Rollercoasters & 5,094 & 2017-06-30 \\ +\rowcolor{lavenderblue} +r/rollercoasterjerk & Rollercoasters & 12,378 & 2016-07-14 \\ +\rowcolor{lavenderblue} +r/rollercoasters & Rollercoasters & 66,652 & 2010-07-31 \\ +\rowcolor{lavenderblue} +r/rct & Rollercoasters & 55,275 & 2010-08-04 \\ +\rowcolor{lavenderblue} +r/themeparkitect & Rollercoasters & 13,536 & 2014-06-16 \\ +r/streetwear & Streetwear & 2,678,745 & 2011-04-30 \\ +r/supremeclothing & Streetwear & 154,797 & 2012-04-04 \\ +r/womensstreetwear & Streetwear & 421,279 & 2016-04-25 \\ +r/bapeheads & Streetwear & 19,672 & 2013-08-12 \\ +r/malefashion & Streetwear & 207,843 & 2011-04-02 \\ +r/sadboys & Streetwear & 74,932 & 2013-06-30 \\ +r/techwearclothing & Streetwear & 94,675 & 2017-03-01 \\ +r/Vans & Streetwear & 51,997 & 2011-07-01 \\ +\rowcolor{lavenderblue} +r/cassetteculture & Vintage Audio & 45,615 & 2011-05-25 \\ +\rowcolor{lavenderblue} +r/typewriters & Vintage Audio & 20,037 & 2010-10-25 \\ +\rowcolor{lavenderblue} +r/vintageaudio & Vintage Audio & 59,202 & 2011-09-18 \\ +\hline + \end{tabular} + \caption{Clusters of subreddits from which we recruited participants, subscriber counts at the time of the study, and the creation date of each subreddit.} + \label{tab:subs_clusters_stats} + \end{table} + + +Using the Pushshift Reddit dataset, we identified candidate participants who were among the top 80\% most frequent commenters within each cluster, who participated in multiple subreddits in the cluster, and who were active in the cluster during a period of at least 1 calendar year. +We began recruiting a random sample of 50 candidates matching these criteria within each cluster by sending direct messages through Reddit. Interested potential recruits filled out a short online survey confirming that they were adults and able to participate in English language interviews. The survey also asked participants about their participation and familiarity with each of the subreddits in each cluster to verify that they were knowledgeable. +At the beginning of each interview, we asked if there were any other subreddits related to those identified by the clustering algorithm. As a result, our conversations were not limited to the subreddits listed in Table \ref{tab:subs_clusters_stats}. + +We began by recruiting participants from the first three clusters listed in Table \ref{tab:subs_clusters_stats}. We found ourselves reaching saturation within these clusters quickly. We also found that different clusters were surfacing quite different data. In response, we added additional clusters and recruited at least two participants from each until we reached global saturation. In some clusters, we did not reach saturation in two interviews. In these cases, we sent additional invitations and conducted additional interviews. +In total, 20 participants were successfully recruited and interviewed by five members of the research team before we reached global saturation and ceased data collection. The characteristics of our interviewees are presented in Table \ref{table:participants}. + +All of our interviews were semistructured. Although we drew from a long series of open-ended questions about participation in different subreddits and the relationships between communities, we chose our questions based on what our subjects wanted to talk about. A copy of our interview protocol is included in our supplementary material. +Interviews were 49 min long on average but varied substantially in length. We suggested conducting interviews over Zoom but offered participants their choice of communication channel. As a result, we conducted two interviews over the phone, one using Discord chat, and the rest over Zoom. +Interviews were transcribed automatically using Zoom's built-in transcription and the otter.ai service and were then manually corrected by the authors. After each interview, participants were compensated with a digital gift card for \$20 USD through the Tango Card reward service\footnote{\url{https://www.tangocard.com/}} + + + +\subsection{Qualitative Data Analysis} + +Our analysis followed \citepos{charmaz_constructing_2015} approach to grounded theory as closely as possible. We conducted coding and data collection in parallel. We generated over 950 codes, which we then grouped in an iterative axial coding process that generated 18 thematic memos. As we completed collecting data, we refined our codes and combined themes to identify answers to our following orienting research questions: Why are there so many similar online communities? And why not more? Although primarily inductive, our analysis was influenced by sensitizing concepts from prior work including our knowledge of scholarship on overlapping online communities described in §\ref{sec:overlapping} and the reasons that people participate in online communities summarized in §\ref{sec:reasons}. +In analyzing our data, we noted that interviewees described their participation in multiple different subreddits and their preference for particular subreddits in terms of the inability of one community (often the ``main'' or ``largest'' community) to provide the desired benefits. This observation formed the basis of the grounded theory around which we organize our findings. + +\begin{table}[h!] +\footnotesize + \centering + \begin{tabular}{ccc} +\hline +\textbf{Participant ID} & \textbf{Cluster} & \textbf{Interview Length (min)} \\ \hline +\rowcolor{lavenderblue} +C1 & Climbing & 56 \\ +\rowcolor{lavenderblue} +C2 & Climbing & 51 \\ +\rowcolor{lavenderblue} +C3 & Climbing & 41 \\ +D1 & Drag & 51 \\ +D2 & Drag & 67 \\ +\rowcolor{lavenderblue} +I1 & Indie Music & 71 \\ +\rowcolor{lavenderblue} +I2 & Indie Music & 43 \\ +O1 & Podcasting & 30 \\ +O2 & Podcasting & 44 \\ +\rowcolor{lavenderblue} +P1 & Painting & 58 \\ +\rowcolor{lavenderblue} +P2 & Painting & 35 \\ +\rowcolor{lavenderblue} +P3 & Painting & 40 \\ +\rowcolor{lavenderblue} +P4 & Painting & 35 \\ +R1 & Rollercoasters & 24 \\ +R2 & Rollercoasters & 43 \\ +\rowcolor{lavenderblue} +S1 & Streetwear & 79 \\ +\rowcolor{lavenderblue} +S2 & Streetwear & 55 \\ +T1 & Dating in Middle Age & 63 \\ +T2 & Dating in Middle Age & 53 \\ +\rowcolor{lavenderblue} +V1 & Vintage Audio & 34 \\ +\rowcolor{lavenderblue} +V2 & Vintage Audio & 56 \\ \hline + \end{tabular} +\caption{List of anonymized participant IDs, the cluster from which we recruited them, and the length of their interview.} +\label{table:participants} +\end{table} + + +\subsection{Ethical Considerations} +Our study design was reviewed by the Institutional Review Board (IRB) at the University of Washington and was determined to be exempt. As part of the design of this study, we took several steps to protect the privacy of our research participants. Participants were fully briefed about the design of the study before being interviewed and were given documents concerning the study and contact information for our IRB. Explicit consent was obtained from every participant. + +Because this project involved collaboration with a relatively large team, we used the Keybase end-to-end encryption service for all discussion and data sharing. +Finally, participants were anonymized so that no direct identifier was recorded in the process of data collection, and only anonymized pseudonyms (e.g., C1, P2, and V2, as show in Table \ref{table:participants}) are published in this paper. We made several minor edits to quotes to obsure potentially identifying details. + + +% +\section{Findings} + +Why do people participate in multiple online communities around the same topic? The answer that emerged from our grounded theory is that no one community can provide all the benefits that users want. At a high level, we find that people have multiple and diverse motivations for participation in online communities. In §\ref{sec:benefits}, we describe the types of benefits they seek organized into three categories: (a) engaging with specific types of content, (b) homophilous socialization, and (c) sharing content contributions with as large an audience as possible. +In §4.2, we use data from our interviews to describe the tensions between these benefits. +We also investigate how our interviewees understood competition and mutualism---key concepts from ecological studies in social computing---between overlapping communities. Our interviewees overwhelmingly found mutualism to be more consistent with their understandings of overlapping online communities than competition. +Our contribution comes in the form of a theoretical framework, grounded in our data, that describes how the full benefits of participating in communities can only be satisfied by groups of communities. + + +\subsection{Benefits Users Seek from Communities} +\label{sec:benefits} + + +\subsubsection{Specific kinds of content} +\label{sec:content} + + +Content on Reddit is organized into subreddits that define their own topical boundaries. These boundaries may be broad (e.g., news) or narrow (e.g., types of painting media). Moreover, subreddits that prohibit types of content or behavior generate niches for subreddits with different rules. Despite such forms of specialization, multiple communities often welcome the same content and encourage users to ``cross-post'' material. + +A subreddit's topic---what it is about and what content should be posted---is often signified by its name. A climbing enthusiastic explains: + +\blockquote[C1]{I think the name itself [\texttt{r/climbharder}], kind of specifically points out that: this is not for people who climb hard. It's for people who climb and want to climb hard\textit{er}. +} + +\noindent C1 describes how the purpose of a subreddit is tied to its name by emphasizing the adjectival suffix ``-er'' as indicative of the fact that the subreddit is not about achieving elite performance but about improving. + +Similarly, a participant in subreddits about drag performance invokes Marshall McLuhan to describe how they know what content to post and where to post them: + +\blockquote[D1]{ Let’s say you were a drag artist and you wanted to show off something that you just created. You would have to go select which community you wanted to show it off in. And I guess among those, [\texttt{r/Drag}] would be the one to do that in. But if you’re---if you’re wanting to show off a piece of artwork or something that you made of a queen from Rupaul's drag race---and the best place to show that off would be to go to [\texttt{r/rupaulsdragrace}] and post it there. So it’s [a] `the medium is the message' kind of thing. \ldots You know where would get the most views [and] where would be the best place to post your content.} + +\noindent Like D1, our informants had deep knowledge of what kinds of specific content would be appropriate for each subreddit in their cluster. + + +Specialization also occurred as a form of regulatory arbitrage when one community had formal or informal rules about the kind of content that was allowed. In these cases, we would often hear about an adjacent community where breaking the rules is accepted, perhaps even the raison d'être. For example, \texttt{r/rupaulsdragrace} prohibits spoilers and information about the outcomes of a reality TV show. \texttt{r/spoileddragrace} is a community about the same show that allows spoilers. + +This pattern is so widespread on Reddit that it is often signaled in subreddit naming conventions \citep{hessel_science_2016}. The ``meta'' prefix signals meta-discussions, often drama-centered, about another subreddit. The ``jerk'' suffix signals a space for memes, mockery, silliness, or other content unaccepted in the ``main'' subreddit. Both are commonly understood and were discussed at length by our interviewees. +For example, among the Rollercoasters subreddits, R1 described the ``jerk'' subreddit as a ``joke subreddit'' where members of the main rollercoasters subreddit could make fun of themselves: + +\blockquote[R1]{I would definitely say \texttt{r/rollercoasters} and \texttt{r/rollercoasterjerk} are really deeply intertwined. It's usually all the same members and stuff because of the fact that the coaster `jerk' is just meant to make fun of the main subreddit. It's just a joke subreddit.} + + + +\noindent ``Jerk'' subreddits were a common source of discussion among our participants. + +Among the Climbing subreddits, the ``main'' subreddit about rock climbing (\texttt{r/climbing}) is welcoming to newcomers. C1 explained that members upvote posts by newcomers``to encourage more entrance into the sport.'' However, newcomer posts are often repetitive pictures of people climbing in gyms or videos of famous climbers. This annoys some experienced climbers. The ``jerk'' subreddit provides a backstage space where making fun of newcomers is permitted. + +In addition to being divided by rules, interrelated subreddits can be structured as a ladder of ``conceptual rungs'' where one finds larger communities as one ascends the ladder. A participant in the subreddits on art and painting described this phenomenon as + + + + +\blockquote[P2]{ +You go up through these conceptual rungs. +\ldots\ +When you go up from, say, \texttt{r/OilPainting}---like \texttt{r/HappyTrees} to \texttt{r/OilPainting}---it’s a much bigger community. And then from \texttt{r/OilPainting} to \texttt{Painting}, which is even bigger. +} + +\noindent P2 explained that smaller subreddits such as \texttt{/r/HappyTrees} support learners and are generally more welcoming places. Although the quotation above suggests that the size of communities increases as one moves up conceptual rungs, the relationship between topical scope and size was more complicated. In some topical areas, subreddits with relatively specific topics have the largest and most active communities. For example, \texttt{/r/rupaulsdragrace} is the most active drag subreddit by a large margin, even though it focuses on a reality TV series that is part of the broader drag community covered by \texttt{r/drag}. + +Although many specialized subreddits exist, people who want to share their work, ask a question, or have a specific discussion may not know the best place to post. Cross-posting---i.e., when someone posts the same content, questions, or messages in multiple communities---is widespread on Reddit. +Cross-posting has sometimes been viewed negatively as a form of attention grabbing (i.e., ``karma whoring'') \citep{poor_mechanisms_2005}. +More often, however, we heard that cross-posting was acceptable and even encouraged to establish complementary conversations or find different audiences. + + + + + + + + + + + + + + + + + + + +Multiple interviewees from the Climbing cluster, including C1, described how, when people ask for training advice in \texttt{r/climbing}, the largest subreddit about rock climbing, they will be advised to cross-post to \texttt{r/climbharder}: + +\blockquote[C1]{Somebody will post asking for advice in \texttt{r/climbing} and oftentimes, somebody will comment and be like, `Hey, you know? You’re welcome to ask this here, but you might get more and better responses at \texttt{r/climbharder}.'} + +\noindent C1 explained that even though conversations about training often start in the main subreddit, they are not likely to gain traction because not everybody in the main community is interested in the more intensive aspects of climbing. + + + + + + + + + + +In sum, the ecosystem of subreddits about similar topics provides more opportunities for people to find specific desired discussions. People receive positive feedback and engagement when they post content that fits a subreddit's specific topic. That said, the subreddit where a particular piece of content will be best received is often not clear to the person posting it. Cross-posting provides multiple chances to start a desired discussion. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +\subsubsection{Homophily} + + + +Online communities have long been recognized as a way to ``find my people'' by bringing together users who share things as diverse as a psychiatric diagnosis, enthusiasm for a hobby, or membership in a subculture or identity group. A member of the Middle Age Dating cluster of subreddits explains: + + +\blockquote[T2]{[When I joined the ADHD Reddit sites], I feel like I found my people after all these years. +\ldots\ +If you don't have ADHD, and don't wonder what's going on other people's brains all the time, I think you just think that everybody thinks like you. And they don't. They don't. So if you're 30 and you're having a problem, you really just want to talk to other 30 somethings. +} + +\noindent T2's description of having ``found my people'' and talking to other people like themselves invokes the idea of homophily: the desire to connect to others similar to oneself. +Analytically distinct from finding personalized information in narrowly focused subreddits, homophily was frequently cited as an end in itself by our interviewees. +Our interviewees sought to connect with ``like-minded'' people having similar interests, demographics, identities, tastes, and status. + +Even though the identities of others in subreddit communities are largely invisible, participants can easily imagine the demography of the subreddit. A participant in the Drag cluster of subreddits described \texttt{r/Dragula}, a community of fans of a TV show featuring horror-infused drag styles, as follows: + + + + + +\blockquote[D2]{ +I think it would be a mostly LGBTQ audience. And not many straights. But if there are straights, they would be really open minded or edgy. Or, I don’t know \ldots\ associated with that `dark' aesthetic.} + +\noindent D2's thoughts on \texttt{r/Dragula} convey a clear sense of the audience the subreddit. Of course, the pseudonymous nature of Reddit obscures age, race, gender, and ethnicity. That said, Reddit users draw on stereotypes about fanbases and cues such as mentions of schools, selfie posts, linguistic markers, and cultural references to build clear models of the types of people in a subreddit. In further unpacking these dimensions, D2 contrasts \texttt{r/Dragula} with the more mainstream subreddits about the show \textit{Rupaul's Drag Race}: + +\blockquote[D2]{ [As for subreddits about] the drag race (\texttt{r/rupaulsdragrace}), Drag Race UK (\texttt{r/RPDR\_UK}), and the spoiled drag race (\texttt{r/SpoiledDragRace}). \ldots\ +Most of [the participants in these other groups] don’t do drag. Most of them are, I think, white gay men, or straight women who see drag with a very narrow view of what drag is. Hegemonic? I don’t know if that’s the word, but they apply the same standards of beauty that are applied to women and men and artists and performers to this art form. +} + +\noindent D2 conveyed both a strong sense of the demographics of different drag subreddits and a strong sense of identification with \texttt{r/Dragula}, which they described as less toxic, more inclusive, and more creative, in part because its membership has a greater concentration of LGBTQ and non-White people who are less interested in conforming to hegemonic beauty standards. + + + + + +Subreddits divide broad topical areas such as drag, art, and fashion into subgroups of people occupying strata of status hierarchies associated with identity, expertise, and class. For example, in the Climbing cluster of subreddits, rock climbing ability confers status and separates beginners from advanced athletes. We found that these two groups concentrate their participation in different subreddits. Across the clusters, we found that experts sought out fellow experts with whom to share knowledge, offer reflections, and give advice grounded in shared extensive experience. + +Our Streetware interviewees reported that subreddits about fashion are split along lines that are associated with the price and status of the clothes being discussed: + + + +\blockquote[S1]{The kind of person, the Platonic ideal poster or user of something like \texttt{r/streetwear}, is probably more open-minded, maybe, in terms of what they think is cool, what they think is worth wearing. Whereas, you know, \ldots\ the \texttt{r/malefashion} snob is a snob.} + +\noindent Even though users of \texttt{r/streetwear} share and discuss men's fashion, \texttt{r/malefashion}, which focuses on higher-status and more expensive styles, looks down on their casual and youthful styles. +S1 is a member of the \texttt{r/streetwear} subreddit. +Although their groups are ``chill'' and ``supportive,'' higher-status groups are ``snobby.'' It is clear that S1 feels unwelcome and out of place in the higher-status group. + +Similarly, our interviewees described status hierarchies in Painting subreddits related to skill level and medium. +P4 described how they were invited to cross-post their work from \texttt{r/Watercolor} to \texttt{r/Artoilpainting}, a smaller subreddit that seems to have a complicated relationship with watercolor. Although watercolor submissions are allowed, and, in this instance, encouraged, both the subreddit's name and the similarity between its visual tag for watercolor submissions with the downvote button suggest that oil is the preferred medium in this community. +In this way, the division of topical spaces into spheres of similar status and identity allows members to find groups that exclude both those who look down on them and those who they look down upon. + + + + + + + +Although ``finding your people'' is satisfying in itself, it can also be a foundation for a wide range of other kinds of benefits. +For example, a homophilous community leads to conversations that can promote trust. Trust has many benefits such as building confidence in the advice and information shared within a community. In some communities we studied, this trust enabled buying, selling, and trading of material goods. + +V2, one of our interviewees from the Vintage Audio cluster, described a community of record collectors on Reddit that acted as a market for buying, selling, and trading records. They preferred this subreddit to other online markets such as Ebay because the community holds members accountable for honest transacting and because of the intrinsic reward that comes from sharing records with a fellow community member: + +\blockquote[V2]{Because it's a group of people that are like-minded, \ldots\ your feet are kind of held to the fire a little bit more about actually being realistic with the condition [of the material you are selling]. Whereas, [when you buy] vinyl at the used record shop, sometimes you feel like someone's trying to pull one over on you \ldots\ I feel like because it is a community, sometimes you can get some kind of better deal \ldots\ \\ I found other people that share the hobby that I like. So I almost, definitely, feel like they’re friends in a little way. And so I want to, if I’m ever selling, I’m going out of my way to make sure that whatever I’m doing, everything I’m doing, is above board. +} + +V2 was very enthusiastic about the ``marketplace wrapped in a community'' for vinyl records. According to V2, both buyers and sellers of records benefit from transacting within a community of like-minded hobbyists. Because the community holds sellers accountable, the community promotes honest representation of merchandise. Being part of a like-minded community where members feel friendship with each other gives sellers a reason to be honest, and even to discount their wares, because they get ``some kind of better deal.'' + +In sum, our interviewees turned to specific subreddits to find people who share their interests, tastes, problems, and identities. +Our participants described subreddits in terms of demographics and identity groups as well as styles, subgenres, or categories related to social status such as wealth, expertise, and beauty standards. They used these categories to place themselves within the constellation of related subreddits they participated in. +Members of subreddits who are ``finding their people'' benefit each other by acting as communities as well as building trust and feelings of friendship. Over time, these feelings can provide further benefits such as the ability to more safely engage in buying and selling. + +\subsubsection{Finding the largest possible audience} + +A third type of benefit derives from the number of members in a subreddit. +All our interviewees were keenly aware of the fact that a post reaching one of the top positions on a larger subreddit would receive the attention of a vast audience. They described this attention as emotionally thrilling and otherwise beneficial. For artists and influencers, large audiences brought material rewards. For learners, a large audience's collective knowledge could bring hard-to-find answers and advice. + +That said, our interviewees explained that larger subreddits do not necessarily provide a larger audience because posts in larger subreddits are more likely to be ignored or missed in the torrent of other content. Although posting in a smaller subreddit might increase the chances of finding an audience at all, subreddits that were too small were described as unattractive because they would not attract many posts or replies. Interviewees responded by choosing where to post strategically. + +Although the competition for the top spots on the front page of large subreddits can be fierce, this competition can make recognition from a large subreddit extremely gratifying: + + + +\blockquote[P2]{Likes are just kind of fake: fake social currency. But yeah, when you get a charge out of it, yeah, I love it. Most of the time, painting is a really busy sub. I mean, like, in any given hour, the new page is already replaced. +\ldots\ \\ +If you can get something that gets a hold there and stays on the front page for a little while, [if] it gets up in even the top five, I've had a handful do that. That's kind of cool. +} + +\noindent P2 describes the thrill of reaching top positions in \texttt{r/painting} with posts of their paintings. Even though they are dismissive of likes on Reddit, they desire the attention their work gets from the subreddit. It sends traffic to their websites, raises their artistic profile, and helps them sell their art. Although these material incentives are important, part of the thrill comes from knowing that a given subreddit is competitive. Smaller subreddits are simply unable to provide these benefits. + + +However, posting in a large subreddit means the risk of being ignored: + + + + + + + +\blockquote[S2]{I think there’s this weird bell curve where the community needs to be big enough where people want to post content. But it can’t get too big where people are drowning each other out for attention.} + +\noindent S2 was among several of our interviewees who described an ideal ``middle ground'' for subreddit size. In general, we heard that people were less interested in posting content in very small subreddits that do not provide an audience. Thus, competition over the largest audiences drives people to smaller subreddits where they can reliably find an audience. I2 from our Indie Music cluster explained: + +\blockquote[I2]{Usually \texttt{r/Indieheads} is the way to reach more people if you want to. Just like if you wanted to do even more, you’d probably do it on \texttt{r/music}. \ldots\ Say a small indie band decided to do an AMA they would probably want to do it on \texttt{r/Indieheads}. Because if they did it on \texttt{r/music}, it would get drowned out and nobody would see it because there’s so many posts. In \texttt{r/Indieheads} it would get a decent bit of attention, I think. In the band subreddit, it would probably get a lot of attention too. But \texttt{r/Indieheads} seems like the best middle ground for that kind of thing. +} + +\noindent I2 explained that when the psych-rock band \textit{King Gizzard and the Wizard Lizard} wanted to engage with an audience on Reddit, they had a choice whether to post in the smaller ``band subreddit'' dedicated to them, the very large \texttt{r/music}, or the medium-sized \texttt{r/Indieheads}. Although posting in the band subreddit would have surely provided an audience, they chose \texttt{r/Indieheads}, which was large but where there was still little risk that their post would be drowned out. + +Our interviewees repeatedly described how finding an audience for one's content is a clear motivation for posting in larger subreddits. +However, we also heard that competition for attention in the largest subreddits leads people to try to find an audience in smaller subreddits. +In the smallest subreddits, posting may not seem worthwhile at all. +This trade-off between finding a large audience and being ignored suggests that posting in subreddits of intermediate size can be the most reliable way to reach a sizable audience. + + + + + + + + + +% + \subsection{Tensions Between the Benefits} +\label{sec:tradeoffs} + + + +The findings in the previous sections imply a clear reason that so many overlapping subreddits exist. When one subreddit prohibits a certain type of content or conversation, an adjacent group can form that allows it. When an identity group is marginalized in one subreddit, members of that group may form a subreddit of their own. When getting attention in a large subreddit is too difficult, a smaller subreddit becomes attractive. +Using data from our interviewees, we describe each of the three possible tensions that exist between the three benefits: (1) subreddits where one finds a large audience are less able to provide specific types of content; (2) communities with large audiences are rarely able to provide a community of similar others; (3) some valuable types of discussion and information are found only in diverse groups of people. As we discuss in §\ref{sec:discussion.trillemma}, taken together, these tensions form a ``trilemma''---i.e., a choice with three mutually incompatable options---between our interviewees' desires for specific content, homophily, and finding audiences. A single community might provide two of these benefits, but almost never all three. + + + + + + +\subsubsection{Larger audiences create background noise} + +In §\ref{sec:content}, we described how subreddits are structured according to distinctions between different types of content. Breaking topical areas into subreddits of varying levels of granularity makes finding specific content easier because doing so reduces the need to sift through unrelated material in a large and broad subreddit. Our interviewees often expressed that larger subreddits are simply not the best places for enthusiasts to have discussions: + + + +\blockquote[C2]{I see this background noise problem building [in] \texttt{r/climbing}, the main climbing community, [which] has just become less and less and less interesting and less relevant as it’s gotten bigger. That’s not really a problem. Right? That’s probably has more to do with my interest level and how long I’ve been on it. And my experience level with climbing. I'm just a little bit more crusty about it, you know? +} + +\noindent C2 describes losing interest in the primary subreddit about climbing as it grew because of the interviewee's specific interest in particular types of climbing content (i.e., material associated with being ``crusty'' or experienced). C2 recognizes that when \texttt{r/climbing} experienced growth, the larger volume of posts by newcomers to the sport created a ``background noise problem'' that made it difficult for established climbers to find discussions of interest. + + + + +Similarly, smaller subreddits can be incredibly valuable to those looking for highly specialized information. Even though they may have very low levels of activity, they can provide a way to learn about rare forms of expertise. A participant in our Vintage Audio cluster explained how they might seek out advice on building a reel-to-reel audio setup: + +\blockquote[V2]{ +If you're at [\texttt{r/ReelToReel}]. Everybody is hyper into them. Whereas there's probably overlap with somebody in \texttt{r/vintageaudio} \ldots\ If I'm like trying to rebuild my reel-to-reel player, I want to talk to \ldots the most knowledgeable person particularly about building reel-to-reel \ldots +So I know that who I'm talking to is hyper specific to the knowledge I want. +} + +\noindent Invoking \texttt{r/ReelToReel}, V2 describes a highly niche subreddit about archaic audio tape equipment with only 3,200 subscribers and a handful of posts each day. V2 is simply not looking to find a large audience. Instead, they want access to the ``most knowledgeable person'' with specific expertise because access to this expertise makes it possible for them to consider doing their own reel-to-reel projects. + +Although the \texttt{r/ReelToReel} community overlaps with the larger and more general \texttt{r\Slash vintageaudio}, the latter does not provide the ability to connect with a small group of expert enthusiasts in an old-fashioned technology. + +Similarly, when someone wants a podcast recommendation tailored to their personal tastes, asking in a larger subreddit is not likely to prove as fruitful as it is within a smaller one. O2, a participant in the Podcasting cluster explained: + +\blockquote[O2]{So I think for like \texttt{r/audiodrama}, I would probably write a longer post, and probably get a bit more into like, my personal tastes. Like I would comment about, `oh, I really love the acting in this one, is there anything similar?' +Open up a bit more about what I do and don’t like. Whereas I think in podcasts, it probably would be more direct. I’d ask a specific question \ldots\ more to the point, more factual, probably just more almost transactional.} + +\noindent Although the larger \texttt{r/podcasts} subreddit is a popular place to promote podcasts on Reddit, O2 explains that they prefer asking for recommendations in the smaller \texttt{r/audiodrama} where they find others willing to take their personal tastes into account. Our interviewees did not advance a ``smaller is better'' argument. O2 explains that they still engage in larger subreddits but use a more direct and transactional approach to information exchange when they do. Similarly, large art communities provide opportunities to find a large audience, but someone can find more substantive feedback to improve their skills, by posting in a smaller subreddit organized specifically for this purpose. + + + +Interviewees described the most general interest-based subreddits such as \texttt{r/podcasts}, \texttt{r/painting}, and \texttt{r/climbing} as more accessible and welcoming to newcomers and as reaching a larger audience: all things they valued. They also described these larger groups as having a high volume of low-effort posts or comments. +Our interviewees explained that although they play a useful role in an information ecosystem, the largest subreddits in a topical area are rarely the best places to look for information or advice. +They explained that small subreddits can effectively play host to content, information, and discussion that larger subreddits cannot. + + + + + + + + + + + + + + + +\subsubsection{Homophily is more difficult in larger groups} + +Because they have less background noise, smaller subreddits are more likely to provide better opportunities to connect with people who share one's distinctive interests, tastes, and identity. Smaller subreddits are also better places to find a community because they provide opportunities to have repeated encounters with recognizable others, off-topic discussions, and personal interactions. P4 explained: + + +\blockquote[P4]{Obviously, I want as many people to see my stuff as possible, especially [since I am] trying to establish myself. But at the same time, I do want to build a relationship with any sort of community that I can.} + + + + + +\noindent P4 explained that they participated in multiple communities because they have two goals as an artist. First, they want to find an audience for their artwork to establish their career. Second, they want to build a community with others who share their craft. They felt that they needed to turn to multiple subreddits to satisfy both needs. + +Although larger subreddits provide a large potential audience, smaller subreddits were described as being friendlier. Another interviewee from our Painting cluster explains that this is because of how people act differently in large and small subreddits: + +\blockquote[P3]{ +I live in the middle of nowhere. And every so often, before the pandemic, I would visit the [large city several hours away]. Now I found there were very polite people, both in [the city] and in [my rural area]. But the tone by which people carried themselves changes in their environment: that's kind of one of the big changing factors. So, in the city, people are in a rush, they're about their business. We don't really have time to chat. +\ldots\ +The big subreddits might seem unfriendly [but] it’s not that so much. Individual members are impolite or unfriendly. But it’s almost as though people carry themselves differently when we’re in different subreddits.} + +\noindent In their extended metaphor, P3 explained that large subreddits are like big cities full of busy people who do not ``have time to chat.'' Evocatively, they described people as behaving differently in large and small subreddits. The very same people who are rude in large subreddits might be friendly in smaller subreddits where people have repeated encounters with one another and have a stronger sense of knowing each other. +In another quote from the same cluster, P2 described how the small subreddit for Bob Ross-inspired painters, \texttt{r/HappyTrees}, stands out from the larger art subreddits because people know one another and it does not feel anonymous. The tight-knit nature of this community contributes to its utility as a source for feedback. + + + + + + + + + +\subsubsection{Tension between finding specific content and homophily} + +A third tension described by our interviewees is that between the desire to connect with similar others and the desire for forms of discussion, content, and feedback that can only be found in diverse groups inclusive of dissimilar others. +Our interviewees described a range of situations when they sought out dissimilar others. For example, they described beginners seeking to learn from experts and outsiders seeking to learn about other cultures. They also described how subreddits instituted rules to limit or organize content that also interfered with unstructured and off-topic discussions that helped with community building. + +For example, although multiple subreddits with overlapping users discuss the same episodes of the TV series \textit{Rupaul's Drag Race}, they have different understandings of events in the show depending on their national identities. D1 explained: + +\blockquote[D1]{ +The discussions played out differently on different subreddits. In the Drag Race UK sub there’s a lot more understanding about [a British drag queen] in particular, about where they come from \ldots\ In America we don’t understand how that person is from Worcestershire. } + +\noindent D1 explained that the cultural background of one of the drag queens was a subject of discussion in \texttt{r/RPDR\_UK}, the UK drag race subreddit, while the main subreddit, \texttt{r/rupaulsdragrace}, was ``dominated by the American viewpoint.'' + +Our interviewees described a number of subreddits focused on discussing broad topics from a specific national or regional culture context. These cultural communities within a topical area provide a homophilous space for sharing distinctive cultural knowledge and sensibilities. +The wrinkle is that even for our American interviewee D1, the \texttt{r/RPDR\_UK} subreddit provided an opportunity to enhance their own experience and appreciation of the show by observing and learning from members of another culture. +In examples such as these, our interviewees explained that communities where like-minded people can share their distinctive appreciation show could provide a source of knowledge for outsiders. + +Similarly, Painting participant P2 explained that a group that has a mixture of experts and beginners provides a better learning environment than does a group of beginners alone: + +\blockquote[P2]{If you can find a small group, with a small core of people who are particularly skilled, they sort of energize the group as a whole. \texttt{r/HappyTrees}, even though it's kind of a beginner subreddit, there's some people that posts there that are like, you know, Bob Ross instructors, or they've been doing this for years. And they've mastered that sort of \ldots\ ``happy trees'' thing. +} + +\noindent P2 explains that part of what makes \texttt{r/HappyTrees} great is that it connects learners to experts. A homogenous subreddit of only beginners or experts would not provide the same opportunities. + + + +To stay focused on specific types of content, subreddit moderators will frequently employ strict rules and heavy-handed moderation. +Our respondents explained that smaller subreddits can get by with fewer rules and lighter moderation because they have fewer behavior problems and are less attractive to toxic outsiders. They are also more able to self-police using Reddit's voting system and through direct interpersonal sanctions such as admonition. In the words of one of the Vintage Audio participants, + +\blockquote[V2]{In Reddit, the more users you get, the more strict the rules, and the more strict the moderation. Just to prevent problems. +} + +\noindent V2 continued and explained that when a subreddit is small enough that you can ``wrap your hands around'' and is built around a ``like-minded'' group, it can develop and enforce shared behavioral norms that substitute for formal rules and rigid enforcement regimes. V2 explained that the processes of creating spaces for specific types of information got in the way of building community. + +Similarly, one of our interviewees described \texttt{r/Indieheads}'s rules limiting how often one can post, requiring specific titles and tags, and prohibiting types of user-generated content. Although these rules help maintain a high-quality feed, they also prevent sharing of more personal and relatable forms of content such as amateur performances and chit-chat. As a result, subreddits that make rules to ensure that posts are on-topic frequently have adjacent ``-jerk'' subreddits that provide an outlet for jokes and memes and act as places where off-topic discussions can thrive. + + + + + + + + + +\subsection{Interviewee's Understandings of Competition and Mutualism} +\label{sec:results.competition} + + + + +Except for a small qualitative subpart of a single paper \citep{zhu_selecting_2014}, prior ecological studies in social computing have relied on concepts such as competition and mutualism but have provided limited evidence that such concepts are salient to participants. +As part of our interviews, we asked our interviewees if they perceived relationships between the communities they participated in to be competitive or mutualistic. In some cases, interviewees imagined hypothetical scenarios where competition might emerge from the perspective of subreddit moderators. For example, a participant in Climbing said: + +\blockquote[C1]{I guess if you put your Reddit [moderator status] on your resume or something, and you want to be a moderator of a larger community, you could try to get users from other communities. But I haven’t seen or experienced competition.} + +\noindent Although we asked nearly every interviewee about competition, only one interviewee (S2) described an actual instance of conflict or direct competition. +In nearly every other interview, our subjects found our suggestion that subreddits might be in competition to be surprising and strange. + +However, the idea that communities are complementary and mutualistic was much more intuitive. One Vintage Audio participant explained the relationship between subreddits: + + + + + +\blockquote[V2]{Yeah, the overlapping. \ldots\ They each have their own niche. \ldots\ They get big enough to have super critical mass of people. Then they'll have a reason to exist. And then they'll sort of fit into the ecosystem of different communities.} + +\noindent Consonant with this description of subreddits in unproblematic coexistence, our interviewees repeatedly suggested that there were not meaningful structural or technical limitations on the number of subreddits a user can join and this reduced the possibility of competition, if it did not eliminate it altogether. + + \section{Discussion} + + +\label{sec:discussion.trillemma} +\begin{figure} +\centering +\def\firstcircle{(0,0) circle (1.7cm)} +\def\secondcircle{(60:2.6cm) circle (1.7cm)} +\def\thirdcircle{(0:2.6cm) circle (1.7cm)} + +\definecolor{myyellow}{HTML}{fae772} +\definecolor{mygreen}{HTML}{4ac26c} +\definecolor{mypurple}{HTML}{31668c} +\begin{tikzpicture} + \begin{scope}[shift={(3cm,-5cm)}, fill opacity=1, text width=2cm, text centered] + + +\draw \firstcircle node [xshift=-1ex] {Specific Content}; + \draw \secondcircle node [yshift=1ex] {Largest Possible Audience}; + \draw \thirdcircle node [xshift=1ex] {Homopilous \\ Community}; + \node (A) at (0.6,1.2) {A}; + \node (A) at (2,1.2) {B}; + \node (A) at (1.325,0.75) {D}; + \node (A) at (1.325,0) {C}; + +\end{scope} +\end{tikzpicture} + + \caption{Venn diagram illustrating the specificity-homophily-audience ``trilemma.''} + \label{fig:trilemma} +\end{figure} + + +The tensions between the benefits that our interviewees sought can be thought of as forming a ``trilemma'' between finding specific content, homophily, and finding as large an audience as possible. This three-way dilemma captures the fact that the more a subreddit succeeds in providing any one of these benefits, the less able it will be able to provide the others. A portfolio of overlapping communities solves this problem by providing all three types of benefits. + + + +Figure \ref{fig:trilemma} visualizes the theorized trilemma. Each of the benefits described in §\ref{sec:benefits} is reflected in large circles. Each of the tensions described in §\ref{sec:tradeoffs} is reflected in the overlapping areas in the figure. +Area A contains communities that provide the largest possible audience and specific content but are unlikely to provide homophily to community members. Subreddits that provide large audiences face ``the background noise problem'' as a large volume of submissions makes it difficult for people to find the specific content they care about. +Area B contains communities that offer both large audiences and homophily but that will struggle to provide specific content. For example, an American interested in learning about international drag culture finds the need to search beyond \texttt{r/rupaulsdragrace}. +Area C contains communities that provide specialized content and a homophilous community but that may not attract large audiences. +Although not everyone who desires a specific type of content may be similar to those who produce the content, smaller subreddits can often provide both desired content and opportunities to socialize with similar others. +However, as the size of the audience increases, subreddits encounter the background noise problem and acquire a ``big city'' air of unfriendliness. + + + + + +\subsection{Connections to Prior Research} + +\subsubsection{Finding specific content} + +Our findings are consonant with prior work that the primary benefits provided by online communities stem from their power to connect people to novel and hard-to-find sources of information \citep{benkler_wealth_2006, campbell_thousands_2016, von_hippel_free_2016,fiesler_growing_2017}. +Our study adds to this work and complements recent findings of \citet{hwang_why_2021} by describing how nested and overlapping online communities are useful for information seeking and managing one's information exposure. Individuals often desire multiple types of content within a general subject area such as spoiled and spoiler-free discussions. +Even when a relatively obscure community such as \texttt{r/vintageaudio} exists, an even more specialized community such as \texttt{r/ReelToReel} may provide access to an even more specialized set of experts. + + +\subsubsection{Finding homophilous community} +Prior work has recognized the importance of homophily in motivating and structuring participation in online communities \citep{chang_specialization_2014, cunha_are_2019, grevet_managing_2014}. +Contributing to this line of research, we identified a number of types of homophily that drive an individual's decisions to participate. These included hobbies, expertise, age, national culture, identity, and status. Homophily was in tension with the need for specific content in that differences among many of these dimensions were valuable for finding information. + +Our results suggest that participants in online communities face trade-offs between homophily and information novelty. These may be similar in structure to the trade-offs between short and long ties observed in contexts such as work groups \citep{ruef_structure_2003} and social networks \citep{grevet_managing_2014, granovetter_strength_1973}. One advantage of joining a group of overlapping online communities is that it can help find information that would be unavailable in homophilous groups. + +\subsubsection{Finding the largest possible audience} + +Much social computing research points to the benefits of large audiences and large communities \citep{kraut_building_2012}. Our work adds more evidence to back up those claims. More relevant, perhaps, are recent counterclaims about the benefits of smallness. \citet{hwang_why_2021} presents an interview study with members of small Reddit communities. Although our results about the tensions between large audience size and other benefits are fully in line with Hwang and Foote's findings, our starting assumptions and ultimate takeaways are quite different. \citeauthor{hwang_why_2021} seek to understand why people participate in persistently small communities and conclude that smallness offers a range of benefits. Our results suggest that individuals seek out benefits that happen to be incompatible with largeness and participate in portfolios of communities that, because of the trilemma we described, will almost certainly include small ones. Although we believe that \citepos{hwang_why_2021} emphasis on smallness might draw focus to a side effect instead of the cause, we believe that the findings in our two papers are largely complementary. + +Although users may desire large audiences, large online communities often require additional structure to maintain order\citep{kiene_surviving_2016, gillespie_custodians_2018, kiene_technological_2019}. \citet{kiene_surviving_2016} describes how a massive influx of newcomers presents difficulties that can be managed by appointing additional moderators, increasing norm enforcement, and limiting the frequency of posts. \citet{lin_better_2017} find that such interventions help subreddits maintain comment quality and stay on topic during massive influxes of growth. Our sense is that these changes ensure the availability of specific content, in part, because of the growth-limiting effects of rules and enforcement \citep{halfaker_rise_2013, teblunthuis_revisiting_2018}. We see this as yet more evidence in favor of our theory. + + + + +\subsection{Implications for Ecological Studies in Social Computing} + + +The quote by V2 in §\ref{sec:results.competition} can be read as a kind of summary of resource partitioning theory (RPT), a strand of ecological research in organizational science that focuses on explaining specialization \citep{carroll_concentration_1985}. Although RPT has not been deeply examined in prior social computing work, our findings suggest that it may be able to explain the widespread occurrence of overlapping communities. RPT proposes that the reason that small specialized organizations coexist with large generalist organizations is that generalists are constrained in their ability to meet distinctive needs in niche markets \citep{carroll_why_2000, swaminathan_resource_2001}. In V2's terms, the ``ecosystem of different communities'' is constructed by a process in which those that ``have a reason to exist'' and are specialized to ``have their own niche'' will achieve ``critical mass.'' + + + + + + + + + + + + + + + +Our grounded theory suggests that the trade-offs in the capacity of an online community to provide different types of benefits that people seek from online communities give rise to new niches. +On the basis of our findings and our understanding of RPT, we hypothesize the following process to describe how systems of overlapping communities develop: + + + + + + + + +When a new topical area grows, the bulk of activity will happen in a generalist community. New members joining that community may seek and find the perceived benefits described in §\ref{sec:benefits} (i.e., specific kinds of content, homophily, and the largest possible audience). +If a topic area, such as art, is sufficiently general, initial membership growth occurs as the community attracts new and existing users interested in both general and more specific types of content. + +As growth continues, membership in the generalist community becomes heterogeneous with lower levels of homophily (e.g., amateur and professional artists) and more specific interests (e.g., painters and photographers) and types of engagement desired (e.g., attention from an audience or critique). At this point, the trade-offs we discuss in §4.2 related to size become relevant. Finding information related to a specialized subtopic and homophilous socializing grows difficult. + + + +If, as with Reddit, creating new communities is low cost, a community specialized in a subtopic can emerge. +This specialized community will likely not attract as large an audience as the generalist community. However, those most interested in the specific subtopic will join it to escape what our interviewees describe as ``background noise'' in the larger generalist community. +Similarly, those seeking personal interaction or social bonding with other community members will be more likely to find them in the specialized community. +A similar process occurs in the formation of spaces having different rules or purposes (such as ``jerk'' spaces). +The cycle will then begin anew as subreddits repartition a subtopic such as \texttt{r/painting} into subspecialists such as \texttt{r/oilpainting} and \texttt{r/watercolor}. +Although some of our interviewees described parts of this process, the model we have narrated is an untested theory. +We leave it to future work to establish its empirical validity. + + + + + + + + + +\subsection{Implications for Design} +By allowing users to create multiple communities with similar or identical topics, platforms can host ecosystems of online communities capable of providing a larger range of benefits to a larger range of users. +Some platforms, such as Stack Exchange, prohibit new communities from overlapping with existing communities \citep{fu_knowledge_2016}. +Our findings suggest that such rules limit the range of the benefits the platform's communities can confer. + +Existing designs for online community platforms such as Reddit are at best ``first-order approximations'' of an ideal solution in that a ``sociotechnical gap'' remains between these designs and the goal of a platform that meets every person's every need \citep{ackerman_intellectual_2000}. Our interviewees partly filled this gap with personalized bespoke solutions in the form of their handpicked portfolios of communities. +Improved designs for multi-community discovery and engagement can better support users in knitting together portfolios of communities. + +Many Reddit users make heavy use of the aggregated streaming feeds \texttt{r/all} and \texttt{r/popular}, which surface highly upvoted posts from across Reddit. +Our interviewees described these feeds as most often featuring content from subreddits that are already extremely popular. Furthermore, Reddit's system for recommending subreddits often returned irrelevant suggestions. +Suggesting communities in as many cells in Figure \ref{fig:trilemma} as possible could help users build their portfolios of communities. +Because increased visibility may create stress and labor for communities and moderators \citep{kiene_surviving_2016}, recommendations should target those potential members likely to be positive contributors. + +Although some of our interviewees used the ``multireddit'' feature for making a custom feed of subreddits, they described this feature as cumbersome and overwhelming. +A design alternative is to formalize or even automate the types of informal social practices our interviewees described such as cross-community linking and cross-posting. +For example, a subreddit such as \texttt{r/vintageaudio} might configure an auto-moderator to detect posts about reel-to-reel equipment and recommend cross-posting to \texttt{r/reeltoreel}. +A discussion-focused subreddit might routinely invite productive contributors to discussions in the related ``main'' subreddit. +Because intercommunity interactions can give rise to conflict, individual communities should have control of how such practices are implemented. +New tools for collaboration between moderation teams may enable the institution of policies encouraging productive concurrent participation in overlapping communities. + + + +\subsection{Limitations} + +Our study has limitations common to all interview-based studies. Our findings derive from in-depth conversations with relatively few of the people who were highly active participants in the handful of clusters of communities in our sample. +Although our study was designed to achieve analytic saturation within each cluster and to cover a wide range of types of topics discussed on Reddit, additional interviews across a wider range of communities might uncover new types of specialization. Additionally, our interviewees were among the most active members of the clusters, and their experiences may differ from those of peripheral members. +Similarly, we cannot speak to the experiences of those who participated in only one community within a cluster. + + + +Our interview data were collected at one point in time and cannot speak to how the dynamics we describe played out over time or how new communities were created and emerged. +Relatedly, although we find that overlapping communities tend to provide different benefits to members, we did not set out to interview community founders and thus cannot speak to the reasons that communities were created \citep{foote_starting_2017}. + +Furthermore, our study focuses only on the Reddit platform. Reddit has distinctive affordances for voting, moderation, and multicommunity engagement that might shape the construction and use of overlapping communities. Although Reddit is among the most popular online community platforms. Our findings may not describe relationships between overlapping communities on other platforms, or between one platform and another. Different platforms likely have different strengths or weaknesses for building communities that provide some types benefits but not others. +At the same time, cross-platform engagement may involve frictions related to the use of multiple identities and sociotechnical systems. +Future research should investigate how people use portfolios that include communities on multiple platforms. + +\section{Conclusion} +Why are the same people talking to each other about similar things in different online communities? We answer this question by developing a theory grounded in the analysis of 20 interviews with members of highly related communities on Reddit. Our answer suggests that people turn to online communities in search of multiple benefits---specific kinds of content and discussion, socialization in a homophilous community, and attention from the largest possible audience. We argue that although structures such as the topic, rules, and size of a community might improve the degree to which it provides one of these benefits, they will necessarily detract from its ability to provide others. Multiple communities having a range of structures exist to provide the full range of benefits. No community can do everything. + diff --git a/dissertations/nathante_uw_2021/equalogy_refs.bib b/dissertations/nathante_uw_2021/equalogy_refs.bib new file mode 100644 index 0000000..5bf5296 --- /dev/null +++ b/dissertations/nathante_uw_2021/equalogy_refs.bib @@ -0,0 +1,2529 @@ + +@inproceedings{ackerman_answer_1990, + title = {Answer {{Garden}}: {{A Tool}} for {{Growing Organizational Memory}}}, + shorttitle = {Answer {{Garden}}}, + booktitle = {Proceedings of the {{ACM SIGOIS}} and {{IEEE CS TC-OA Conference}} on {{Office Information Systems}}}, + author = {Ackerman, M. S. and Malone, T. W.}, + date = {1990}, + series = {{{COCS}} '90}, + pages = {31--39}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Answer Garden allows organizations to develop databases of commonly asked questions that grow “organically” as new questions arise and are answered. It is designed to help in situations (such as field service organizations and customer “hot lines”) where there is a continuing stream of questions, many of which occur over and over, but some of which the organization has never seen before. The system includes a branching network of diagnostic questions that helps users find the answers they want. If the answer is not present, the system automatically sends the question to the appropriate expert, and the answer is returned to the user as well as inserted into the branching network. Experts can also modify this network in response to users' problems. Our initial Answer Garden database contains questions and answers about how to use the X Window System.}, + isbn = {978-0-89791-358-4}, + file = {/home/nathante/Zotero/storage/Q6XN2KED/Ackerman and Malone - 1990 - Answer Garden A Tool for Growing Organizational M.pdf} +} + +@article{ackerman_intellectual_2000, + title = {The {{Intellectual Challenge}} of {{CSCW}}: {{The Gap Between Social Requirements}} and {{Technical Feasibility}}}, + shorttitle = {The {{Intellectual Challenge}} of {{CSCW}}}, + author = {Ackerman, Mark S.}, + date = {2000-09-01}, + journaltitle = {Human–Computer Interaction}, + volume = {15}, + number = {2-3}, + pages = {179--203}, + publisher = {{Taylor \& Francis}}, + issn = {0737-0024}, + abstract = {Over the last 10 years, Computer-Supported Cooperative Work (CSCW) has identified a base set of findings. These findings are taken almost as assumptions within the field. In summary, they argue that human activity is highly flexible, nuanced, and contextualized and that computational entities such as information sharing, roles, and social norms need to be similarly flexible, nuanced, and contextualized. However, current systems cannot fully support the social world uncovered by these findings. In this article I argue that there is an inherent gap between the social requirements of CSCW and its technical mechanisms. The social-technical gap is the divide between what we know we must support socially and what we can support technically. Exploring, understanding, and hopefully ameliorating this social-technical gap is the central challenge for CSCW as a field and one of the central problems for human-computer interaction. Indeed, merely attesting the continued centrality of this gap could be one of the important intellectual contributions of CSCW. I also argue that the challenge of the social-technical gap creates an opportunity to refocus CSCW.}, + keywords = {essay,overview,social computing,theory}, + annotation = {\_eprint: https://doi.org/10.1207/S15327051HCI1523\_5}, + file = {/home/nathante/Zotero/storage/6SR5GJPQ/Ackerman - 2000 - The Intellectual Challenge of CSCW The Gap Betwee.pdf;/home/nathante/Zotero/storage/E3NAR7N8/Ackerman - 2000 - The Intellectual Challenge of CSCW The Gap Betwee.pdf;/home/nathante/Zotero/storage/GCVP7ANI/S15327051HCI1523_5.html} +} + +@article{ackerman_sharing_2013, + title = {Sharing {{Knowledge}} and {{Expertise}}: {{The CSCW View}} of {{Knowledge Management}}}, + shorttitle = {Sharing {{Knowledge}} and {{Expertise}}}, + author = {Ackerman, Mark S. and Dachtera, Juri and Pipek, Volkmar and Wulf, Volker}, + date = {2013-08-21}, + journaltitle = {Computer Supported Cooperative Work (CSCW)}, + shortjournal = {Comput Supported Coop Work}, + volume = {22}, + number = {4-6}, + pages = {531--573}, + issn = {0925-9724, 1573-7551}, + abstract = {Knowledge Management (KM) is a diffuse and controversial term, which has been used by a large number of research disciplines. CSCW, over the last 20 years, has taken a critical stance towards most of these approaches, and instead, CSCW shifted the focus towards a practice-based perspective. This paper surveys CSCW researchers’ viewpoints on what has become called ‘knowledge sharing’ and ‘expertise sharing’. These are based in an understanding of the social contexts of knowledge work and practices, as well as in an emphasis on communication among knowledgeable humans. The paper provides a summary and overview of the two strands of knowledge and expertise sharing in CSCW, which, from an analytical standpoint, roughly represent ‘generations’ of research: an ‘object-centric’ and a ‘people-centric’ view. We also survey the challenges and opportunities ahead.}, + langid = {english} +} + +@inproceedings{adamic_knowledge_2008, + title = {Knowledge Sharing and Yahoo Answers: Everyone Knows Something}, + shorttitle = {Knowledge Sharing and Yahoo Answers}, + booktitle = {Proceedings of the 17th International Conference on {{World Wide Web}}}, + author = {Adamic, Lada A. and Zhang, Jun and Bakshy, Eytan and Ackerman, Mark S.}, + date = {2008-04-21}, + series = {{{WWW}} '08}, + pages = {665--674}, + publisher = {{Association for Computing Machinery}}, + location = {{Beijing, China}}, + abstract = {Yahoo Answers (YA) is a large and diverse question-answer forum, acting not only as a medium for sharing technical knowledge, but as a place where one can seek advice, gather opinions, and satisfy one's curiosity about a countless number of things. In this paper, we seek to understand YA's knowledge sharing and activity. We analyze the forum categories and cluster them according to content characteristics and patterns of interaction among the users. While interactions in some categories resemble expertise sharing forums, others incorporate discussion, everyday advice, and support. With such a diversity of categories in which one can participate, we find that some users focus narrowly on specific topics, while others participate across categories. This not only allows us to map related categories, but to characterize the entropy of the users' interests. We find that lower entropy correlates with receiving higher answer ratings, but only for categories where factual expertise is primarily sought after. We combine both user attributes and answer characteristics to predict, within a given category, whether a particular answer will be chosen as the best answer by the asker.}, + isbn = {978-1-60558-085-2}, + file = {/home/nathante/Zotero/storage/W97ZJFJS/Adamic et al_2008_Knowledge sharing and yahoo answers.pdf} +} + +@article{amaya_new_2021, + title = {New {{Data Sources}} in {{Social Science Research}}: {{Things}} to {{Know Before Working With Reddit Data}}}, + shorttitle = {New {{Data Sources}} in {{Social Science Research}}}, + author = {Amaya, Ashley and Bach, Ruben and Keusch, Florian and Kreuter, Frauke}, + date = {2021-10}, + journaltitle = {Social Science Computer Review}, + shortjournal = {Social Science Computer Review}, + volume = {39}, + number = {5}, + pages = {943--960}, + issn = {0894-4393, 1552-8286}, + abstract = {Social media are becoming more popular as a source of data for social science researchers. These data are plentiful and offer the potential to answer new research questions at smaller geographies and for rarer subpopulations. When deciding whether to use data from social media, it is useful to learn as much as possible about the data and its source. Social media data have properties quite different from those with which many social scientists are used to working, so the assumptions often used to plan and manage a project may no longer hold. For example, social media data are so large that they may not be able to be processed on a single machine; they are in file formats with which many researchers are unfamiliar, and they require a level of data transformation and processing that has rarely been required when using more traditional data sources (e.g., survey data). Unfortunately, this type of information is often not obvious ahead of time as much of this knowledge is gained through word-of-mouth and experience. In this article, we attempt to document several challenges and opportunities encountered when working with Reddit, the self-proclaimed “front page of the Internet” and popular social media site. Specifically, we provide descriptive information about the Reddit site and its users, tips for using organic data from Reddit for social science research, some ideas for conducting a survey on Reddit, and lessons learned in merging survey responses with Reddit posts. While this article is specific to Reddit, researchers may also view it as a list of the type of information one may seek to acquire prior to conducting a project that uses any type of social media data.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6TNVU83S/Amaya et al. - 2021 - New Data Sources in Social Science Research Thing.pdf} +} + +@article{barnett_predicting_2017, + title = {Predicting International {{Facebook}} Ties through Cultural Homophily and Other Factors}, + author = {Barnett, George A and Benefield, Grace A}, + date = {2017-02-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {2}, + pages = {217--239}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study describes the structure of the international Facebook friendship network and its determinants using various predictors, including physical proximity, cultural homophily, and communication. Network analysis resulted in one group of nations, with countries that bridge geographic and linguistic clusters (France, Spain, United Kingdom, and United Arab Emirates) being the most central. Countries with international Facebook friendship ties tended to share borders, language, civilization, and migration. Physical distance, shared hyperlinks, use of common websites, telephone traffic, cultural similarity, and international student exchange were either weakly or not significantly related to international Facebook friendships.}, + langid = {english}, + keywords = {Communication network analysis,cultural homophily,Facebook,international friendship,social media (SNS)}, + file = {/home/nathante/Zotero/storage/LPCY3MMC/Barnett and Benefield - 2017 - Predicting international Facebook ties through cul.pdf} +} + +@incollection{baum_ecological_2006, + title = {Ecological Approaches to Organizations}, + booktitle = {Sage {{Handbook}} for {{Organization Studies}}}, + author = {Baum, Joel A. C. and Shipilov, Andrew V.}, + date = {2006}, + pages = {55--110}, + publisher = {{Sage}}, + location = {{Rochester, NY}}, + abstract = {Our goal is to assess and consolidate the current state-of-the-art in organizational ecology. To accomplish this we review major theoretical statements, empirical studies, and arguments that are now being made. Although we attempt to survey ecological approaches to organizations comprehensively, because ecological research now constitutes a very large body of work, and because other extensive reviews are available (Aldrich \& Wiedenmayer, 1993; Barnett \& Carroll, 1995; Baum, 1996; Baum \& Amburgey, 2002; Baum \& Rao, 2004; Carroll, Dobrev \& Swaminathan, 2002; Galunic \& Weeks 2002; Rao, 2002; Singh \& Lumsden, 1990), we emphasize recent work that challenges and extends established theory and highlight new and emerging directions for future research that appear promising. Our appraisal focuses on two main themes - demographic processes and ecological processes.}, + file = {/home/nathante/Zotero/storage/EGQC2W5I/Baum and Shipilov - 2006 - Ecological approaches to organizations.pdf;/home/nathante/Zotero/storage/38MBRGMQ/papers.html} +} + +@article{baum_organizational_1994, + title = {Organizational {{Niches}} and the {{Dynamics}} of {{Organizational Founding}}}, + author = {Baum, Joel A. C. and Singh, Jitendra V.}, + date = {1994}, + journaltitle = {Organization Science}, + volume = {5}, + number = {4}, + eprint = {2635178}, + eprinttype = {jstor}, + pages = {483--501}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {In this paper we argue that patterns of organizational niche overlap and nonoverlap influence the organizational niches in which entrepreneurs create organizations. Organizational niches characterize the different resource requirements and productive capacities of individual organizations in a population. Depending on which organizational niches are targeted, entrepreneurs will face different competitive landscapes. For a population of day care centers (DCCs), we measure organizational niches and compute organizational niche overlaps in terms of the ages of children they are licensed to enroll. Using weights based on organizational niche overlaps, we disaggregate population density (i.e., the number of DCCs) into overlap density and nonoverlap density to measure the potential for competition and cooperation among DCCs. The overlap density of an organizational niche is equal to population density weighted by the overlaps of the focal organizational niche with all other organizational niches. Conversely, non-overlap density is equal to population density weighted by the absence of overlaps of a focal organizational niche with all other organizational niches. We hypothesize that overlap density will be negatively related to the founding rate. We expect entrepreneurs will be much less likely to target or be capable of founding organizations in crowded parts of the resource space than parts that are less densely populated. We also hypothesize that nonoverlap density will be positively related to the founding rate. This is because differentiated DCCs do not compete directly for resources, and, at the same time, their presence can have facilitative influences through complementary demand enhancement and widening social acceptance of the organization form. Supporting these predictions, a dynamic analysis showed that overlap density had a competitive effect on the founding rate, while nonoverlap density had a positive effect. Parallel effects were obtained when overlap and nonoverlap densities were further disaggregated on the basis of geographic proximity into local and diffuse components. Overall, our findings are consistent with earlier research on organizational founding at the population level, but reveal intrapopulation patterns of mutualism and competition that influence the likelihood of organizations being established in different organizational niches. The key result of this study, that location in a multidimensional resource space, together with the distribution of other competitors and noncompetitors, has a significant impact on founding probabilities serves to illuminate some of the underlying dynamics of competition and mutualism that impact strategic and entrepreneurial processes.}, + file = {/home/nathante/Zotero/storage/E2AGCRNI/Baum and Singh - 1994 - Organizational Niches and the Dynamics of Organiza.pdf} +} + +@article{baumgartner_pushshift_2020, + title = {The {{Pushshift Reddit}} Dataset}, + author = {Baumgartner, Jason and Zannettou, Savvas and Keegan, Brian and Squire, Megan and Blackburn, Jeremy}, + date = {2020-05-26}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + shortjournal = {ICWSM}, + volume = {14}, + pages = {830--839}, + issn = {2334-0770}, + langid = {english}, + keywords = {pushift,reddit}, + file = {/home/nathante/Zotero/storage/DHRFJ58I/Baumgartner et al. - 2020 - The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/G5E8SQFN/Baumgartner et al_2020_The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/A8X5UY9R/2001.html;/home/nathante/Zotero/storage/B9FRQR94/7347.html} +} + +@incollection{benkler_peer_2015, + title = {Peer Production: {{A}} Form of Collective Intelligence}, + booktitle = {Handbook of {{Collective Intelligence}}}, + author = {Benkler, Yochai and Shaw, Aaron and Hill, Benjamin Mako}, + editor = {Malone, Thomas W. and Bernstein, Michael S.}, + date = {2015}, + pages = {175--204}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-02981-0}, + langid = {english}, + file = {/home/nathante/Zotero/storage/SKULU2E6/Benkler et al. - 2015 - Peer production A form of collective intelligence.pdf} +} + +@book{benkler_wealth_2006, + title = {The Wealth of Networks: {{How}} Social Production Transforms Markets and Freedom}, + author = {Benkler, Yochai}, + date = {2006}, + publisher = {{Yale University Press}}, + location = {{New Haven, CT}}, + pagetotal = {528}, + keywords = {bookReview,Economics,FOSS,foundations of social computing,import,Innovation,Legal Studies,peer production} +} + +@incollection{bernstein_quantifying_2013, + title = {Quantifying the Invisible Audience in Social Networks}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Bernstein, Michael S. and Bakshy, Eytan and Burke, Moira and Karrer, Brian}, + date = {2013-04-27}, + pages = {21--30}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {When you share content in an online social network, who is listening? Users have scarce information about who actually sees their content, making their audience seem invisible and difficult to estimate. However, understanding this invisible audience can impact both science and design, since perceived audiences influence content production and self-presentation online. In this paper, we combine survey and large-scale log data to examine how well users' perceptions of their audience match their actual audience on Facebook. We find that social media users consistently underestimate their audience size for their posts, guessing that their audience is just 27\% of its true size. Qualitative coding of survey responses reveals folk theories that attempt to reverse-engineer audience size using feedback and friend count, though none of these approaches are particularly accurate. We analyze audience logs for 222,000 Facebook users' posts over the course of one month and find that publicly visible signals --- friend count, likes, and comments --- vary widely and do not strongly indicate the audience of a single post. Despite the variation, users typically reach 61\% of their friends each month. Together, our results begin to reveal the invisible undercurrents of audience attention and behavior in online social networks.}, + isbn = {978-1-4503-1899-0}, + keywords = {audience,information distribution,social networks} +} + +@article{bilgrei_broscience_2018, + title = {Broscience: {{Creating}} Trust in Online Drug Communities}, + shorttitle = {Broscience}, + author = {Bilgrei, Ola Røed}, + date = {2018-08-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {20}, + number = {8}, + pages = {2712--2727}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study explores the social mechanisms involved in online community trust. Drawing on interviews with members from two Norwegian Internet drug forums, the article illustrates how forum members evaluate the trustworthiness of online user-generated drug content, referred to as ‘broscience’. First, the shared narratives and boundaries within the forums generated a sense of collective identity, where members defined their online surroundings in terms of community trust and collaboration. Second, the subcultural argot within the forums helped members express a level of subcultural competence and authenticity, in which they were able to assess their credibility and initial trustworthiness. Third, the reputation linked to online identities created expectations and predictability as a basis for evaluating members’ trustworthiness. These findings touch upon the ambivalence of trust in an online setting and highlight the communal process that caused their ambivalence to be suspended, thereby enabling online community trust.}, + langid = {english}, + keywords = {Broscience,drugs,Internet subculture,online community,trust}, + file = {/home/nathante/Zotero/storage/WBMSUCSH/Bilgrei - 2018 - Broscience Creating trust in online drug communit.pdf} +} + +@article{boyd_social_2007, + title = {Social {{Network Sites}}: {{Definition}}, {{History}}, and {{Scholarship}}}, + shorttitle = {Social {{Network Sites}}}, + author = {Boyd, Danah M and Ellison, Nicole B.}, + date = {2007-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {13}, + number = {1}, + pages = {210--230}, + publisher = {{Oxford Academic}}, + abstract = {Social network sites (SNSs) are increasingly attracting the attention of academic and industry researchers intrigued by their affordances and reach. This special theme section of the Journal of Computer-Mediated Communication brings together scholarship on these emergent phenomena. In this introductory article, we describe features of SNSs and propose a comprehensive definition. We then present one perspective on the history of such sites, discussing key changes and developments. After briefly summarizing existing scholarship concerning SNSs, we discuss the articles in this special section and conclude with considerations for future research.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6BMGYUAE/Boyd and Ellison - 2007 - Social Network Sites Definition, History, and Sch.pdf;/home/nathante/Zotero/storage/JK59CLHH/4583062.html} +} + +@inproceedings{brandtzaeg_user_2008, + title = {User {{Loyalty}} and {{Online Communities}}: {{Why Members}} of {{Online Communities}} Are Not {{Faithful}}}, + shorttitle = {User {{Loyalty}} and {{Online Communities}}}, + booktitle = {Proceedings of the 2nd {{International Conference}} on {{INtelligent TEchnologies}} for Interactive {{enterTAINment}}}, + author = {Brandtzæg, Petter Bae and Heim, Jan}, + date = {2008}, + publisher = {{ICST}}, + location = {{Cancun, Mexico}}, + abstract = {Online communities are getting increasingly important for several different user groups; at the same time, community members seem to lack loyalty, as they often change from one community to another or use their community less over time. To survive and thrive, online communities must meet members’ needs. By using qualitative data are from an extensive online survey of online community users and a representative sample of Internet users, 200 responses to an open question regarding community-loyalty was analyzed. Results show that there are 9 main reasons why community-users decrease in their participation over time or, in simple terms, stop using their online community: 1) Lack of interesting people/friends attending, 2) Low quality content, 3) Low usability, 4) Harassment and bullying 5) Timeconsuming/isolating, 6) Low trust, 7) Over-commercialized, 8) Dissatisfaction with moderators and 9) Unspecified boring. The results, design implications and future research are discussed.}, + eventtitle = {2nd {{International Conference}} on {{INtelligent TEchnologies}} for Interactive {{enterTAINment}}}, + isbn = {978-963-9799-13-4}, + langid = {english}, + file = {/home/nathante/Zotero/storage/2KNF5QHS/Brandtzæg and Heim - 2008 - User Loyalty and Online Communities Why Members o.pdf} +} + +@article{brown_social_1987, + title = {Social {{Ties}} and {{Word-of-Mouth Referral Behavior}}}, + author = {Brown, Jacqueline Johnson and Reingen, Peter H.}, + date = {1987}, + journaltitle = {Journal of Consumer Research}, + volume = {14}, + number = {3}, + eprint = {2489496}, + eprinttype = {jstor}, + pages = {350--362}, + publisher = {{Oxford University Press}}, + issn = {0093-5301}, + abstract = {This article presents a network analysis of word-of-mouth referral behavior in a natural environment. The relational properties of tie strength and homophily were employed to examine referral behavior at micro and macro levels of inquiry. The study demonstrates different roles played by weak and strong social ties. At the macro level, weak ties displayed an important bridging function, allowing information to travel from one distinct subgroup of referral actors to another subgroup in the broader social system. At the micro level, strong and homophilous ties were more likely to be activated for the flow of referral information. Strong ties were also perceived as more influential than weak ties, and they were more likely to be utilized as sources of information for related goods.} +} + +@article{burnett_information_2004, + title = {Information {{Exchange}} in {{Virtual Communities}}: A {{Comparative Study}}}, + shorttitle = {Information {{Exchange}} in {{Virtual Communities}}}, + author = {Burnett, Gary and Buerkle, Harry}, + date = {2004-01-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {9}, + issn = {1083-6101}, + abstract = {Burnett's (2000) typology of information exchange in virtual communities attempts to provide a framework for examining the range of activities undertaken by participants in such communities. This study is the first in a series to apply the typology to specific virtual communities, in an effort to assess its accuracy against the day-to-day interactions to be found in two online communities. Through a comparison of these two communities using the typology, revisions to the typology are proposed which will allow it to reflect more accurately activities found within the communities. By providing a metric through which to address such questions, the revised typology will allow a richer understanding of virtual communities as social information environments.}, + issue = {JCMC922}, + file = {/home/nathante/Zotero/storage/39C7RSD8/4614481.html} +} + +@article{burtch_how_2021, + title = {How {{Do Peer Awards Motivate Creative Content}}? {{Experimental Evidence}} from {{Reddit}}}, + shorttitle = {How {{Do Peer Awards Motivate Creative Content}}?}, + author = {Burtch, Gordon and He, Qinglai and Hong, Yili and Lee, Dokyun}, + date = {2021-06-01}, + journaltitle = {Management Science}, + publisher = {{INFORMS}}, + issn = {0025-1909}, + abstract = {We theorize peer awards’ effects on the volume and novelty of creative user-generated content (UGC) produced at online platform communities. We then test our hypotheses via a randomized field experiment on Reddit, wherein we randomly and anonymously assigned Reddit’s Gold Award to 905 users’ posts over a two-month period. We find that peer awards induced recipients to make longer, more frequent posts and that these effects were particularly pronounced among newer community members. Further, we show that recipients were causally influenced to engage in greater (lesser) exploitation (exploration) behavior, producing content that exhibited significantly greater textual similarity to their own past (awarded) content. However, because the effects were most pronounced among new community members, who also produce content that, in general, is systematically more novel than that of established members to begin with, this process yields a desirable outcome: larger volumes of generally novel UGC for the community. This paper was accepted by Chris Forman, information systems.}, + keywords = {creativity,field experiment,peer awards,Reddit,text-mining,user-generated content}, + file = {/home/nathante/Zotero/storage/WJ4GQ39Y/Burtch et al_2021_How Do Peer Awards Motivate Creative Content.pdf} +} + +@article{butler_attraction-selection-attrition_2014, + title = {An Attraction-Selection-Attrition Theory of Online Community Size and Resilience}, + author = {Butler, Brian S. and Bateman, Patrick J. and Gray, Peter H. and Diamant, E. Ilana}, + date = {2014-09}, + journaltitle = {MIS Q.}, + volume = {38}, + number = {3}, + pages = {699--728}, + issn = {0276-7783}, + abstract = {Online discussion communities play an important role in the development of relationships and the transfer of knowledge within and across organizations. Their underlying technologies enhance these processes by providing infrastructures through which group-based communication can occur. Community administrators often make decisions about technologies with the goal of enhancing the user experience, but the impact of such decisions on how a community develops must also be considered. To shed light on this complex and under-researched phenomenon, we offer a model of key latent constructs influenced by technology choices and possible causal paths by which they have dynamic effects on communities. Two important community characteristics that can be impacted are community size (number of members) and community resilience (membership that is willing to remain involved with the community in spite of variability and change in the topics discussed). To model community development, we build on attraction-selection-attrition (ASA) theory, introducing two new concepts: participation costs (how much time and effort are required to engage with content provided in a community) and topic consistency cues (how strongly a community signals that topics that may appear in the future will be consistent with what it has hosted in the past). We use the proposed ASA theory of online communities (OCASA) to develop a simulation model of community size and resilience that affirms some conventional wisdom and also has novel and counterintuitive implications. Analysis of the model leads to testable new propositions about the causal paths by which technology choices affect the emergence of community size and community resilience, and associated implications for community sustainability.}, + file = {/home/nathante/Zotero/storage/292C8XTF/Butler et al. - 2014 - An Attraction-selection-attrition Theory of Online.pdf} +} + +@article{butler_cross-purposes_2011, + title = {The Cross-Purposes of Cross-Posting: Boundary Reshaping Behavior in Online Discussion Communities}, + shorttitle = {The Cross-Purposes of Cross-Posting}, + author = {Butler, Brian S. and Wang, Xiaoqing}, + date = {2011-09-15}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {23}, + pages = {993--1010}, + issn = {1047-7047}, + abstract = {Increasingly, online discussion communities are used to support activities ranging from software development to political campaigns. An important feature of an online discussion community is its content boundaries, which are individual perceptions of what materials and discussions are part of the community and what are not, and how that community is related to others within a larger system. Yet in spite of its importance, many community infrastructures allow individual participants to reshape content boundaries by simultaneously associating their contributions with multiple online discussion communities. This reshaping behavior is a controversial aspect of the creation and management of many types of online discussion communities. On one hand, many communities explicitly discourage boundary reshaping behaviors in their frequently asked questions or terms-of-use document. On the other hand, community infrastructures continue to allow such reshaping behaviors. To explain this controversy, we theorize how the extent of boundary reshaping in an online discussion community has simultaneously positive and negative effects on its member dynamics and responsiveness. We test predictions about the conflicting effects of reshaping behaviors with 60 months of longitudinal data from 140 USENET newsgroups, focusing on cross-posting activities as a form of reshaping behavior. Empirical results are consistent with the proposed hypotheses that reshaping behaviors within a discussion community affect member dynamics and community responsiveness in both positive and negative ways. Taken together, the findings highlight the boundary-related design challenges faced by managers seeking to support ongoing activity within online discussion communities.}, + issue = {3-part-2}, + file = {/home/nathante/Zotero/storage/MHIHVXMA/Butler and Wang - 2012 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/ZDTPFJP3/Butler and Wang - 2011 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/5XCPFJS9/isre.1110.html} +} + +@article{butler_membership_2001, + title = {Membership Size, Communication Activity, and Sustainability: {{A}} Resource-Based Model of Online Social Structures}, + shorttitle = {Membership {{Size}}, {{Communication Activity}}, and {{Sustainability}}}, + author = {Butler, Brian S.}, + date = {2001}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {12}, + number = {4}, + eprint = {23011457}, + eprinttype = {jstor}, + pages = {346--362}, + issn = {1047-7047}, + abstract = {As telecommunication networks become more common, there is an increasing interest in the factors underlying the development of online social structures. It has been proposed that these structures are new forms of organizing which are not subject to the same constraints as traditional social structures. However, from anecdotal evidence and case studies it is difficult to evaluate whether online social structures are subject to the same problems as traditional social structures. Drawing from prior studies of traditional social structures and empirical analyses of longitudinal data from a sample of Internet-based groups, this exploratory work considers the role of size and communication activity in sustainable online social structures. A resource-based theory of sustainable social structures is presented. Members contribute time, energy, and other resources, enabling a social structure to provide benefits for individuals. These benefits, which include information, influence, and social support, are the basis for a social structure's ability to attract and retain members. This model focuses on the system of opposing forces that link membership size as a component of resource availability and communication activity as an aspect of benefit provision to the sustainability of an online social structure. Analyses of data from a random sample of e-mail-based Internet social structures (listservs) indicate that communication activity and size have both positive and negative effects on a structure's sustainability. These results suggest that while the use of networked communication technologies may alter the form of communication, balancing the opposing impacts of membership size and communication activity in order to maintain resource availability and provide benefits for current members remains a fundamental problem underlying the development of sustainable online social structures.}, + file = {/home/nathante/Zotero/storage/4ENNLMAH/Butler - 2001 - Membership Size, Communication Activity, and Susta.pdf;/home/nathante/Zotero/storage/U7AUNAZT/Butler-2001-ISR-Membership_size_communication_activitiy_sustainability.pdf} +} + +@inproceedings{campbell_thousands_2016, + title = {Thousands of {{Positive Reviews}}: {{Distributed Mentoring}} in {{Online Fan Communities}}}, + shorttitle = {Thousands of {{Positive Reviews}}}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer-Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Campbell, Julie and Aragon, Cecilia and Davis, Katie and Evans, Sarah and Evans, Abigail and Randall, David}, + date = {2016-02-27}, + series = {{{CSCW}} '16}, + pages = {691--704}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Young people worldwide are participating in ever-increasing numbers in online fan communities. Far from mere shallow repositories of pop culture, these sites are accumulating significant evidence that sophisticated informal learning is taking place online in novel and unexpected ways. In order to understand and analyze in more detail how learning might be occurring, we conducted an in-depth nine-month ethnographic investigation of online fanfiction communities, including participant observation and fanfiction author interviews. Our observations led to the development of a theory we term distributed mentoring, which we present in detail in this paper. Distributed mentoring exemplifies one instance of how networked technology affords new extensions of behaviors that were previously bounded by time and space. Distributed mentoring holds potential for application beyond the spontaneous mentoring observed in this investigation and may help students receive diverse, thoughtful feedback in formal learning environments as well.}, + isbn = {978-1-4503-3592-8}, + keywords = {digital youth.,distributed mentoring,fanfiction,informal learning,Mentoring,online communities}, + file = {/home/nathante/Zotero/storage/D9ZM58VV/Campbell et al. - 2016 - Thousands of Positive Reviews Distributed Mentori.pdf} +} + +@article{carroll_concentration_1985, + title = {Concentration and Specialization: {{Dynamics}} of Niche Width in Populations of Organizations}, + shorttitle = {Concentration and {{Specialization}}}, + author = {Carroll, Glenn R.}, + date = {1985-05-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {90}, + number = {6}, + pages = {1262--1283}, + issn = {0002-9602}, + abstract = {This paper departs from the common practice of focusing on large, generalist organizations and shows that new organizational insights are obtined by adopting a broader, ecological perspective. The newspaper publishing industry is examined as an illustration. The ecological focus shows that many small, specialized organizations operate successfully in this industry, despite apparently high levels of local concentration. A resource-partitioning model is advanced to explain the interorganizational relationships between generalist and specialist organizations. Statistical tests of the model using historical data on 2,808 American local newspaper organizations show the merit of using the ecological perspective for analyzing industries.}, + file = {/home/nathante/Zotero/storage/G38AK5SZ/Carroll - 1985 - Concentration and specialization Dynamics of nich.pdf;/home/nathante/Zotero/storage/8PG3QCP3/228210.html} +} + +@article{carroll_why_2000, + title = {Why the Microbrewery Movement? {{Organizational}} Dynamics of Resource Partitioning in the {{U}}.{{S}}. Brewing Industry}, + shorttitle = {Why the {{Microbrewery Movement}}?}, + author = {Carroll, Glenn R. and Swaminathan, Anand}, + date = {2000}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {106}, + number = {3}, + eprint = {10.1086/318962}, + eprinttype = {jstor}, + pages = {715--762}, + issn = {0002-9602}, + abstract = {The number of small specialty brewers in the U.S. beer brewing industry has increased dramatically in recent decades, even as the market for beer became increasingly dominated by mass‐production brewing companies. Using the resource‐partitioning model of organizational ecology, this article shows that these two apparently contradictory trends are fundamentally interrelated. Hypotheses developed here refine the way scale competition among generalist organizations is modeled and improve the theoretical development of the sociological bases for the appeal of specialist organizations' products, especially those related to organizational identity. Evidence drawn from qualitative and quantitative research provides strong support for the theory. The article offers a brief discussion of the theoretical and substantive issues involved in application of the model to other industries and to other cultures.}, + file = {/home/nathante/Zotero/storage/X2ITSCRL/Carroll and Swaminathan - 2000 - Why the microbrewery movement Organizational dyna.pdf} +} + +@book{castells_rise_1996, + title = {Rise of {{The Network Society}} ({{Information Age Series}})}, + author = {Castells, Manuel}, + date = {1996}, + edition = {1}, + publisher = {{Wiley-Blackwell}}, + isbn = {1-55786-617-1}, + pagetotal = {481} +} + +@inproceedings{chancellor_norms_2018, + title = {Norms {{Matter}}: {{Contrasting Social Support Around Behavior Change}} in {{Online Weight Loss Communities}}}, + shorttitle = {Norms {{Matter}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Chancellor, Stevie and Hu, Andrea and De Choudhury, Munmun}, + date = {2018-04-21}, + series = {{{CHI}} '18}, + pages = {1--14}, + publisher = {{Association for Computing Machinery}}, + location = {{Montreal QC, Canada}}, + abstract = {Online health communities (OHCs) provide support across conditions; for weight loss, OHCs offer support to foster positive behavior change. However, weight loss behaviors can also be subverted on OHCs to promote disordered eating practices. Using comments as proxies for support, we use computational linguistic methods to juxtapose similarities and differences in two Reddit weight loss communities, r/proED and r/loseit. We employ language modeling and find that word use in both communities is largely similar. Then, by building a word embedding model, specifically a deep neural network on comment words, we contrast the context of word use and find differences that imply different behavior change goals in these OHCs. Finally, these content and context norms predict whether a comment comes from r/proED or r/loseit. We show that norms matter in understanding how different OHCs provision support to promote behavior change and discuss the implications for design and moderation of OHCs.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/77YDPVB6/Chancellor et al. - 2018 - Norms Matter Contrasting Social Support Around Be.pdf} +} + +@article{chandrasekharan_internets_2018, + title = {The Internet's Hidden Rules: {{An}} Empirical Study of Reddit Norm Violations at Micro, Meso, and Macro Scales}, + shorttitle = {The {{Internet}}'s {{Hidden Rules}}}, + author = {Chandrasekharan, Eshwar and Samory, Mattia and Jhaver, Shagun and Charvat, Hunter and Bruckman, Amy and Lampe, Cliff and Eisenstein, Jacob and Gilbert, Eric}, + date = {2018}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {32:1--32:25}, + issn = {2573-0142}, + abstract = {Norms are central to how online communities are governed. Yet, norms are also emergent, arise from interaction, and can vary significantly between communities---making them challenging to study at scale. In this paper, we study community norms on Reddit in a large-scale, empirical manner. Via 2.8M comments removed by moderators of 100 top subreddits over 10 months, we use both computational and qualitative methods to identify three types of norms: macro norms that are universal to most parts of Reddit; meso norms that are shared across certain groups of subreddits; and micro norms that are specific to individual, relatively unique subreddits. Given the size of Reddit's user base---and the wide range of topics covered by different subreddits---we argue this represents the first large-scale census of the norms in broader internet culture. In other words, these findings shed light on what Reddit values, and how widely-held those values are. We conclude by discussing implications for the design of new and existing online communities.}, + issue = {CSCW}, + keywords = {community norms,mixed methods.,moderation,online communities}, + file = {/home/nathante/Zotero/storage/2CA9ZVFB/Chandrasekharan et al. - 2018 - The Internet's Hidden Rules An Empirical Study of.pdf;/home/nathante/Zotero/storage/HUP7XT5H/Chandrasekharan et al_2018_The Internet's Hidden Rules.pdf} +} + +@online{chandrasekharan_quarantined_2020, + title = {Quarantined! {{Examining}} the {{Effects}} of a {{Community-Wide Moderation Intervention}} on {{Reddit}}}, + author = {Chandrasekharan, Eshwar and Jhaver, Shagun and Bruckman, Amy and Gilbert, Eric}, + date = {2020-09-24}, + eprint = {2009.11483}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Should social media platforms intervene when communities repeatedly break rules? What actions can they consider? In light of this hotly debated issue, platforms have begun experimenting with softer alternatives to outright bans. We examine one such intervention called quarantining, that impedes direct access to and promotion of controversial communities. Specifically, we present two case studies of what happened when Reddit quarantined the influential communities r/TheRedPill (TRP) and r/The\_Donald (TD). Working with over 85M Reddit posts, we apply causal inference methods to examine the quarantine’s effects on TRP and TD. We find that the quarantine made it more difficult to recruit new members: new user influx to TRP and TD decreased by 79.5\% and 58\%, respectively. Despite quarantining, existing users’ misogyny and racism levels remained unaffected. We conclude by reflecting on the effectiveness of this design friction in limiting the influence of toxic communities and discuss broader implications for content moderation.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/CB26SNVJ/Chandrasekharan et al. - 2020 - Quarantined! Examining the Effects of a Community-.pdf} +} + +@article{chandrasekharan_you_2017, + ids = {chandrasekharan_you_2017-1}, + title = {You Can't Stay Here: {{The}} Efficacy of Reddit's 2015 Ban Examined through Hate Speech}, + shorttitle = {You Can't Stay Here}, + author = {Chandrasekharan, Eshwar and Pavalanathan, Umashanthi and Srinivasan, Anirudh and Glynn, Adam and Eisenstein, Jacob and Gilbert, Eric}, + date = {2017-12}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {31:1--31:22}, + issn = {2573-0142}, + abstract = {In 2015, Reddit closed several subreddits-foremost among them r/fatpeoplehate and r/CoonTown-due to violations of Reddit's anti-harassment policy. However, the effectiveness of banning as a moderation approach remains unclear: banning might diminish hateful behavior, or it may relocate such behavior to different parts of the site. We study the ban of r/fatpeoplehate and r/CoonTown in terms of its effect on both participating users and affected subreddits. Working from over 100M Reddit posts and comments, we generate hate speech lexicons to examine variations in hate speech usage via causal inference methods. We find that the ban worked for Reddit. More accounts than expected discontinued using the site; those that stayed drastically decreased their hate speech usage-by at least 80\%. Though many subreddits saw an influx of r/fatpeoplehate and r/CoonTown "migrants," those subreddits saw no significant changes in hate speech usage. In other words, other subreddits did not inherit the problem. We conclude by reflecting on the apparent success of the ban, discussing implications for online moderation, Reddit and internet communities more broadly.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/5Z8CCRM2/Chandrasekharan et al. - 2017 - You Can'T Stay Here The Efficacy of Reddit's 2015.pdf} +} + +@inproceedings{chang_specialization_2014, + title = {Specialization, Homophily, and Gender in a Social Curation Site: Findings from Pinterest}, + shorttitle = {Specialization, Homophily, and Gender in a Social Curation Site}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Chang, Shuo and Kumar, Vikas and Gilbert, Eric and Terveen, Loren G.}, + date = {2014-02-15}, + series = {{{CSCW}} '14}, + pages = {674--686}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Pinterest is a popular social curation site where people collect, organize, and share pictures of items. We studied a fundamental issue for such sites: what patterns of activity attract attention (audience and content reposting)-- We organized our studies around two key factors: the extent to which users specialize in particular topics, and homophily among users. We also considered the existence of differences between female and male users. We found: (a) women and men differed in the types of content they collected and the degree to which they specialized; male Pinterest users were not particularly interested in stereotypically male topics; (b) sharing diverse types of content increases your following, but only up to a certain point; (c) homophily drives repinning: people repin content from other users who share their interests; homophily also affects following, but to a lesser extent. Our findings suggest strategies both for users (e.g., strategies to attract an audience) and maintainers (e.g., content recommendation methods) of social curation sites.}, + isbn = {978-1-4503-2540-0}, + keywords = {data analysis,social network,topic detection,user profiling}, + file = {/home/nathante/Zotero/storage/RVP6MZ6S/Chang et al. - 2014 - Specialization, homophily, and gender in a social .pdf} +} + +@book{charmaz_constructing_2015, + ids = {charmaz_constructing_2014}, + title = {Constructing Grounded Theory: {{A}} Practical Guide through Qualitative Analysis}, + shorttitle = {Constructing {{Grounded Theory}}}, + author = {Charmaz, Kathy}, + date = {2015}, + edition = {2}, + publisher = {{SAGE}}, + location = {{Thousand Oaks, California}}, + isbn = {0-7619-7352-4} +} + +@article{chesney_other_2004, + title = {“Other People Benefit. i Benefit from Their Work.” {{Sharing Guitar Tabs Online}}}, + author = {Chesney, Thomas}, + date = {2004-11-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {This paper reports the results of a study into a public space Internet portal which publishes guitar tabs (tablature) online, to examine what motivates people to participate in this activity and what benefits they get from doing so. A guitar tab is essentially sheet music for guitarists. The study examines why people contribute when it is easier for them not to publish their tabs and simply use the tabs that other people have posted. Answers to this will have implications for businesses wanting to encourage their employees to share their knowledge. An open ended questionnaire was sent to 183 tab publishers with a usable response rate of 39\%, which is considered high for surveys. The questionnaire sought to gather data on motivations, benefits and community interaction. The paper begins with a review of relevant theories of knowledge sharing and publishing, in particular the private-collective model of innovation (von Hippel \& von Krogh, 2003) which is used to analyze the results. Motivations are listed as under two categories, self and altruistic, with the most popular motivation being to share the songs with others, which is from the altruistic category. The most common benefit is personal satisfaction. The results show tab publishing fits with the private-collective model of innovation which means that a tab published online can be seen as a public good, as it is available to all, that has significant private elements. These private elements are the benefits that tab publishers get which the people who only use tabs without contributing their own, do not. The implications of the work are as follows. Enjoyment of the domain seems to be an important factor in motivating knowledge sharing. People who feel like they are part of a community and get satisfaction from being part of a community, will be more likely to contribute. The act of sharing knowledge should be as close to effortless as possible to encourage contributions. The act of preparing (collecting, collating etc.) the material to be shared should have meaning in itself for the person who is preparing it. If the act of sharing leads to increased status in the community people will be more likely to contribute. To encourage knowledge sharing, those who make use of the shared knowledge should be encouraged to give positive feedback to the person who shared it. To date, there has been little empirical work examining online posting forums.}, + issue = {JCMC1012}, + file = {/home/nathante/Zotero/storage/JWW5X2DI/4614460.html} +} + +@article{ciampaglia_production_2015, + title = {The Production of Information in the Attention Economy}, + author = {Ciampaglia, Giovanni Luca and Flammini, Alessandro and Menczer, Filippo}, + date = {2015-05-19}, + journaltitle = {Scientific Reports}, + volume = {5}, + pages = {9452}, + issn = {2045-2322}, + file = {/home/nathante/Zotero/storage/Z5SM58N9/srep09452.pdf} +} + +@article{coleman_social_1988, + title = {Social {{Capital}} in the {{Creation}} of {{Human Capital}}}, + author = {Coleman, James S.}, + date = {1988}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {94}, + eprint = {2780243}, + eprinttype = {jstor}, + pages = {S95-S120}, + issn = {0002-9602}, + abstract = {In this paper, the concept of social capital is introduced and illustrated, its forms are described, the social structural conditions under which it arises are examined, and it is used in an analysis of dropouts from high school. Use of the concept of social capital is part of a general theoretical strategy discussed in the paper: taking rational action as a starting point but rejecting the extreme individualistic premises that often accompany it. The conception of social capital as a resource for action is one way of introducing social structure into the rational action paradigm. Three forms of social capital are examined: obligations and expectations, information channels, and social norms. The role of closure in the social structure in facilitating the first and third of these forms of social capital is described. An analysis of the effect of the lack of social capital available to high school sophomores on dropping out of school before graduation is carried out. The effect of social capital within the family and in the community outside the family is examined.}, + file = {/home/nathante/Zotero/storage/8B8X2LBV/Coleman - 1988 - Social Capital in the Creation of Human Capital.pdf;/home/nathante/Zotero/storage/83B63Z3Y/Coleman - 1988 - Social Capital in the Creation of Human Capital.html} +} + +@inproceedings{cook_contribution_2009, + title = {Contribution, Commercialization \& Audience: Understanding Participation in an Online Creative Community}, + shorttitle = {Contribution, Commercialization \& Audience}, + booktitle = {Proceedings of the {{ACM}} 2009 International Conference on {{Supporting}} Group Work}, + author = {Cook, Eric and Teasley, Stephanie D. and Ackerman, Mark S.}, + date = {2009-05-10}, + series = {{{GROUP}} '09}, + pages = {41--50}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {This paper presents a qualitative study of attitudes towards participation and contribution in an online creative community. The setting of the work is an online community of practice focused on the use and development of a user-customizable music software package called Reaktor. Findings from the study highlight four emergent topics in the discourse related to user contributions to the community: contribution assessment, support for learning, perceptions of audience and tensions about commercialization. Our analysis of these topics frames discussion about the value and challenges of attending to amateur and professional users in online creative communities.}, + isbn = {978-1-60558-500-0}, + keywords = {amateurs,audiences,commercialization,community of practice,creativity,learning,online community,professionals,user-generated content} +} + +@article{copland_reddit_2020, + title = {Reddit Quarantined: Can Changing Platform Affordances Reduce Hateful Material Online?}, + shorttitle = {Reddit Quarantined}, + author = {Copland, Simon}, + date = {2020-10-21}, + journaltitle = {Internet Policy Review}, + volume = {9}, + number = {4}, + publisher = {{Berlin: Alexander von Humboldt Institute for Internet and Society}}, + issn = {2197-6775}, + abstract = {Can we reduce hateful material online through changing platform affordances? Studying Reddit’s quarantine function, this paper argues the results of this approach are mixed.}, + file = {/home/nathante/Zotero/storage/KY4RZWR4/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/SZWA55IE/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/9KXC37K7/225653.html;/home/nathante/Zotero/storage/M6NKY3K2/reddit-quarantined-can-changing-platform-affordances-reduce-hateful-material.html} +} + +@inproceedings{cunha_are_2019, + ids = {cunha_are_2019-1,cunha_are_2019-2}, + title = {Are All Successful Communities Alike? {{Characterizing}} and Predicting the Success of Online Communities}, + shorttitle = {Are All Successful Communities Alike?}, + booktitle = {The {{World Wide Web Conference}}}, + author = {Cunha, Tiago and Jurgens, David and Tan, Chenhao and Romero, Daniel}, + date = {2019-05-13}, + series = {{{WWW}} '19}, + pages = {318--328}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {The proliferation of online communities has created exciting opportunities to study the mechanisms that explain group success. While a growing body of research investigates community success through a single measure - typically, the number of members - we argue that there are multiple ways of measuring success. Here, we present a systematic study to understand the relations between these success definitions and test how well they can be predicted based on community properties and behaviors from the earliest period of a community's lifetime. We identify four success measures that are desirable for most communities: (i) growth in the number of members; (ii) retention of members; (iii) long term survival of the community; and (iv) volume of activities within the community. Surprisingly, we find that our measures do not exhibit very high correlations, suggesting that they capture different types of success. Additionally, we find that different success measures are predicted by different attributes of online communities, suggesting that success can be achieved through different behaviors. Our work sheds light on the basic understanding on what success represents in online communities and what predicts it. Our results suggest that success is multi-faceted and cannot be measured nor predicted by a single measurement. This insight has practical implications for the creation of new online communities and the design of platforms that facilitate such communities.}, + isbn = {978-1-4503-6674-8}, + keywords = {Group Dynamics,Online Communities,Reddit,Success}, + file = {/home/nathante/Zotero/storage/CGBFCUGX/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/IYW3WKHV/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/PFS6682S/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/SMX88EL3/Cunha et al. - 2019 - Are All Successful Communities Alike Characterizi.pdf} +} + +@inproceedings{dabbish_fresh_2012, + ids = {dabbish_fresh_2012-1}, + title = {Fresh Faces in the Crowd: Turnover, Identity, and Commitment in Online Groups}, + shorttitle = {Fresh Faces in the Crowd}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Dabbish, Laura and Farzan, Rosta and Kraut, Robert and Postmes, Tom}, + date = {2012-02-11}, + series = {{{CSCW}} '12}, + pages = {245--248}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Turnover is commonplace in many online groups because of low barriers of entry and exit. In offline settings, turnover can have a negative impact because of reduced attachment to the group as an entity. However, in an online setting, turnover in terms of changes in the visible membership of a group may have a very different impact. Online only a limited amount of information about members and their activities is observable; in particular, it is easier to see the behavior of the subset of members who are active than the potentially larger set who are not. In this paper, we describe an experiment examining the influence of visible membership turnover on commitment to an online group. Our results suggest that increased turnover in an online group may increase social presence, creating perceptions of liveness, in turn leading to increased levels of participation in the group. However, this result holds primarily for groups with a common identity, suggesting that attention to behavior of others may be stronger when people share an identity with those others. Our results extend understandings of attachment in an online setting as well as theory about social tuning.}, + isbn = {978-1-4503-1086-4}, + keywords = {attachment,commitment,identity.,online groups,turnover}, + file = {/home/nathante/Zotero/storage/3IQQP4JM/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf;/home/nathante/Zotero/storage/GEVF3A53/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf} +} + +@inproceedings{danescu-niculescu-mizil_no_2013, + ids = {danescu-niculescu-mizil_no_2013-1}, + title = {No Country for Old Members: User Lifecycle and Linguistic Change in Online Communities}, + shorttitle = {No Country for Old Members}, + booktitle = {Proceedings of the 22nd International Conference on {{World Wide Web}} - {{WWW}} '13}, + author = {Danescu-Niculescu-Mizil, Cristian and West, Robert and Jurafsky, Dan and Leskovec, Jure and Potts, Christopher}, + date = {2013}, + pages = {307--318}, + publisher = {{ACM Press}}, + location = {{Rio de Janeiro, Brazil}}, + abstract = {Vibrant online communities are in constant flux. As members join and depart, the interactional norms evolve, stimulating further changes to the membership and its social dynamics. Linguistic change—in the sense of innovation that becomes accepted as the norm—is essential to this dynamic process: it both facilitates individual expression and fosters the emergence of a collective identity. We propose a framework for tracking linguistic change as it happens and for understanding how specific users react to these evolving norms. By applying this framework to two large online communities we show that users follow a determined two-stage lifecycle with respect to their susceptibility to linguistic change: a linguistically innovative learning phase in which users adopt the language of the community followed by a conservative phase in which users stop changing and the evolving community norms pass them by.}, + eventtitle = {The 22nd International Conference}, + isbn = {978-1-4503-2035-1}, + langid = {english}, + venue = {Rio de Janeiro, Brazil}, + file = {/home/nathante/Zotero/storage/L532IPRV/Danescu-Niculescu-Mizil et al. - 2013 - No Country for Old Members User Lifecycle and Lin.pdf;/home/nathante/Zotero/storage/LWECW2QM/Danescu-Niculescu-Mizil et al. - 2013 - No country for old members user lifecycle and lin.pdf} +} + +@article{datta_extracting_2019, + title = {Extracting {{Inter-Community Conflicts}} in {{Reddit}}}, + author = {Datta, Srayan and Adar, Eytan}, + date = {2019-07-06}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {13}, + pages = {146--157}, + issn = {2334-0770}, + abstract = {Anti-social behaviors in social media can happen both at user and community levels. While a great deal of attention is on the individual as an ‘aggressor,’ the banning of entire Reddit subcommunities (i.e., subreddits) demonstrates that this is a multi-layer concern. Existing research on inter-community conflict has largely focused on specific subcommunities or ideological opponents. However, antagonistic behaviors may be more pervasive and integrate into the broader network. In this work, we study the landscape of conflicts among subreddits by deriving higher-level (community) behaviors from the way individuals are sanctioned and rewarded. By constructing a conflict network, we characterize different patterns in subreddit-to-subreddit conflicts as well as communities of ‘co-targeted’ subreddits .The dynamics of these interactions also reveals a shift in conflict focus over time.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6IA9VN8K/Datta_Adar_2019_Extracting Inter-Community Conflicts in Reddit.pdf;/home/nathante/Zotero/storage/F3MHZ7Z6/3217.html} +} + +@article{datta_identifying_2017, + title = {Identifying {{Misaligned Inter-Group Links}} and {{Communities}}}, + author = {Datta, Srayan and Phelan, Chanda and Adar, Eytan}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {37:1--37:23}, + abstract = {Many social media systems explicitly connect individuals (e.g., Facebook or Twitter); as a result, they are the targets of most research on social networks. However, many systems do not emphasize or support explicit linking between people (e.g., Wikipedia or Reddit), and even fewer explicitly link communities. Instead, network analysis is performed through inference on implicit connections, such as co-authorship or text similarity. Depending on how inference is done and what data drove it, different networks may emerge. While correlated structures often indicate stability, in this work we demonstrate that differences, or misalignment, between inferred networks also capture interesting behavioral patterns. For example, high-text but low-author similarity often reveals communities "at war" with each other over an issue or high-author but low-text similarity can suggest community fragmentation. Because we are able to model edge direction, we also find that asymmetry in degree (in-versus-out) co-occurs with marginalized identities (subreddits related to women, people of color, LGBTQ, etc.). In this work, we provide algorithms that can identify misaligned links, network structures and communities. We then apply these techniques to Reddit to demonstrate how these algorithms can be used to decipher inter-group dynamics in social media.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/52FT8LT8/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf;/home/nathante/Zotero/storage/WKCJHV6R/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf} +} + +@article{dellaposta_why_2015, + title = {Why {{Do Liberals Drink Lattes}}?}, + author = {DellaPosta, Daniel and Shi, Yongren and Macy, Michael}, + date = {2015-03}, + journaltitle = {American Journal of Sociology}, + volume = {120}, + number = {5}, + pages = {1473--1511}, + issn = {0002-9602, 1537-5390}, + langid = {english}, + file = {/home/nathante/Zotero/storage/LMVF2MJ5/DellaPosta et al_2015_Why Do Liberals Drink Lattes.pdf} +} + +@article{dvir-gvirsman_media_2017, + title = {Media Audience Homophily: {{Partisan}} Websites, Audience Identity and Polarization Processes}, + shorttitle = {Media Audience Homophily}, + author = {Dvir-Gvirsman, Shira}, + date = {2017-07-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {7}, + pages = {1072--1091}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {The study suggests that media consumers favor certain websites not only due to their content but also due to their audience. A new concept is introduced: “audience homophily,” which describes one’s preference for partisan media websites catering to a homogeneous, likeminded consumership. This attraction is explained in terms of the need for self-consistency, and I suggest that over time such behavior will polarize political identity through a spiral of reinforcement. Based on both a survey-experiment (N\,=\,300) and a panel study combined with web-tracking technology that recorded online-exposure behavior (N\,=\,397), it was found that individuals with more extreme ideology present higher levels of audience homophily and that, longitudinally, audience homophily is somewhat associated with ideological polarization, intolerance, and accessibility of political self-definition.}, + langid = {english}, + keywords = {Homophily,network analysis,partisan media,reinforcing-spiral model,selective exposure}, + file = {/home/nathante/Zotero/storage/WEQEAEJ4/Dvir-Gvirsman - 2017 - Media audience homophily Partisan websites, audie.pdf} +} + +@article{ellison_benefits_2007, + ids = {ellison_benefits_2007-1}, + title = {The {{Benefits}} of {{Facebook}} “{{Friends}}:” {{Social Capital}} and {{College Students}}’ {{Use}} of {{Online Social Network Sites}}}, + shorttitle = {The {{Benefits}} of {{Facebook}} “{{Friends}}}, + author = {Ellison, Nicole B. and Steinfield, Charles and Lampe, Cliff}, + date = {2007-07-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + volume = {12}, + number = {4}, + pages = {1143--1168}, + publisher = {{Oxford Academic}}, + issn = {1083-6101}, + abstract = {This study examines the relationship between use of Facebook, a popular online social network site, and the formation and maintenance of social capital. In addition to assessing bonding and bridging social capital, we explore a dimension of social capital that assesses one’s ability to stay connected with members of a previously inhabited community, which we call maintained social capital. Regression analyses conducted on results from a survey of undergraduate students (N = 286) suggest a strong association between use of Facebook and the three types of social capital, with the strongest relationship being to bridging social capital. In addition, Facebook usage was found to interact with measures of psychological well-being, suggesting that it might provide greater benefits for users experiencing low self-esteem and low life satisfaction.}, + langid = {english}, + keywords = {CMC,quantitative,SNS,Social capital,survey}, + file = {/home/nathante/Zotero/storage/C6PUU2LZ/Ellison et al. - 2007 - The Benefits of Facebook “Friends” Social Capital.pdf;/home/nathante/Zotero/storage/I5D8LMF3/Ellison et al. - 2007 - The Benefits of Facebook “Friends” Social Capital.pdf;/home/nathante/Zotero/storage/CFMJSBYE/4582961.html;/home/nathante/Zotero/storage/YZWIMZS9/abstract.html} +} + +@article{faraj_online_2016, + ids = {faraj_special_2016}, + title = {Online Community as Space for Knowledge Flows}, + author = {Faraj, Samer and von Krogh, Georg and Monteiro, Eric and Lakhani, Karim R.}, + options = {useprefix=true}, + date = {2016-12-01}, + journaltitle = {Information Systems Research}, + shortjournal = {INFORMS}, + volume = {27}, + number = {4}, + pages = {668--684}, + issn = {1047-7047}, + abstract = {Online communities frequently create significant economic and relational value for community participants and beyond. It is widely accepted that the underlying source of such value is the collective flow of knowledge among community participants. We distinguish the conditions for flows of tacit and explicit knowledge in online communities and advance an unconventional theoretical conjecture: Online communities give rise to tacit knowledge flows between participants. The crucial condition for these flows is not the advent of novel, digital technology as often portrayed in the literature, but instead the technology’s domestication by humanity and the sociality it affords. This conjecture holds profound implications for theory and research in the study of management and organization, as well as their relation to information technology.}, + file = {/home/nathante/Zotero/storage/4TH94S6Q/Faraj et al. - 2016 - Online Community as Space for Knowledge Flows.pdf;/home/nathante/Zotero/storage/NCY7A6S4/Faraj et al. - 2016 - Special Section Introduction—Online Community as S.pdf} +} + +@inproceedings{fiesler_growing_2017, + ids = {fiesler_growing_2017-1}, + title = {Growing {{Their Own}}: {{Legitimate Peripheral Participation}} for {{Computational Learning}} in an {{Online Fandom Community}}}, + shorttitle = {Growing {{Their Own}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Fiesler, Casey and Morrison, Shannon and Shapiro, R. Benjamin and Bruckman, Amy S.}, + date = {2017-02-25}, + series = {{{CSCW}} '17}, + pages = {1375--1386}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Online communities dedicated to the creation of fanworks (e.g., fiction or art inspired by media such as books or television shows) often serve as communities of practice for learning communication, artistic, and technical skills. In studying one successful fan fiction archive that was designed and built entirely by (predominantly women) fans, we observed processes of legitimate peripheral participation (LPP) in which some of these fans began in peripheral roles and came to be more involved in the technical aspects of the archive over time. In addition to outlining positive outcomes, we discuss the challenges of supporting learning within this CoP, particularly with respect to the burden on experts. We discuss potential implications and solutions for the problem of expert scarcity in CoPs, and propose that LPP within fan communities can be leveraged for broadening participation in computing among women.}, + isbn = {978-1-4503-4335-0}, + keywords = {broadening participation in computing,communities of practice,computing education,fandom,fanfiction,learning,legitimate peripheral participation,online communities,open source}, + file = {/home/nathante/Zotero/storage/QUSETR8Z/Fiesler et al. - 2017 - Growing Their Own Legitimate Peripheral Participa.pdf;/home/nathante/Zotero/storage/VRDFMKHZ/Fiesler et al_2017_Growing Their Own.pdf} +} + +@article{fiesler_moving_2020, + ids = {fiesler_moving_2020-1,fiesler_moving_2020-2}, + title = {Moving {{Across Lands}}: {{Online Platform Migration}} in {{Fandom Communities}}}, + shorttitle = {Moving {{Across Lands}}}, + author = {Fiesler, Casey and Dym, Brianna}, + date = {2020-05-28}, + journaltitle = {Proc. ACM Hum.-Comput. Interact}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {4}, + pages = {042:1--042:25}, + abstract = {When online platforms rise and fall, sometimes communities fade away, and sometimes they pack their bags and relocate to a new home. To explore the causes and effects of online community migration, we examine transformative fandom, a longstanding, technology-agnostic community surrounding the creation, sharing, and discussion of creative works based on existing media. For over three decades, community members have left and joined many different online spaces, from Usenet to Tumblr to platforms of their own design. Through analysis of 28 in-depth interviews and 1,886 survey responses from fandom participants, we traced these migrations, the reasons behind them, and their impact on the community. Our findings highlight catalysts for migration that provide insights into factors that contribute to success and failure of platforms, including issues surrounding policy, design, and community. Further insights into the disruptive consequences of migrations (such as social fragmentation and lost content) suggest ways that platforms might both support commitment and better support migration when it occurs.}, + issue = {CSCW1}, + file = {/home/nathante/Zotero/storage/ER8P5AJ2/Fiesler_Dym_2020_Moving Across Lands.pdf;/home/nathante/Zotero/storage/JHDILSYU/Fiesler and Dym - 2020 - Moving Across Lands Online Platform Migration in .pdf} +} + +@inproceedings{fiesler_reddit_2018, + title = {Reddit Rules! {{Characterizing}} an Ecosystem of Governance.}, + booktitle = {Proceedings of the {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Fiesler, Casey and Jiang, Jialun" Aaron" and McCann, Joshua and Frye, Kyle and Brubaker, Jed R.}, + date = {2018}, + pages = {72--81}, + publisher = {{AAAI}}, + location = {{Stanford, CA}}, + eventtitle = {{{ICWSM}}}, + file = {/home/nathante/Zotero/storage/34TYXTGB/Fiesler - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/G9VFI2L7/Fiesler et al. - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/KT7KNG3J/Fiesler et al. - 2018 - Reddit rules! Characterizing an ecosystem of gover.pdf} +} + +@article{figeac_how_2021, + title = {How Behavioral Homophily on Social Media Influences the Perception of Tie-Strengthening within Young Adults’ Personal Networks}, + author = {Figeac, Julien and Favre, Guillaume}, + date = {2021-06-25}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {14614448211020691}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study examines how social media and information-sharing behavior can influence young adults’ perceptions of changes in tie strength within their own personal networks. By focusing on the extended personal networks (27.56 relationships) of young adults, we show that social media leads them to feel closer to their “friends” whom they think of as exhibiting online behaviors similar to their own. This behavioral homophily mainly stems from frequent reactions between friends, when they like or comment upon each other’s posts. Such homophily is also related to the sharing of political news and entertaining content, which constitute a salient affordance in the “pervasive awareness” of social media and lead users to feel closer to those exhibiting similar content-sharing behavior. This similarity reveals how social media platforms help to shape personal networks over time, particularly by influencing user relationships with weak ties who share similar online behavior.}, + langid = {english}, + keywords = {Entertaining content,homophily,information-sharing,personal networks,pervasive awareness,political news,social media,weak ties}, + file = {/home/nathante/Zotero/storage/YAKLRLVE/Figeac and Favre - 2021 - How behavioral homophily on social media influence.pdf} +} + +@unpublished{foote_agent-based_2018, + title = {An {{Agent-Based Model}} of {{Online Community Joining}}}, + author = {Foote, Jeremy}, + date = {2018-07}, + editora = {TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron}, + editoratype = {collaborator}, + eventtitle = {International {{Conference}} on {{Computational Social Science}} ({{IC2S2}})}, + venue = {{Evanston, IL}} +} + +@online{foote_how_2020, + title = {How Individual Behaviors Drive Inequality in Online Community Sizes: An Agent-Based Simulation}, + shorttitle = {How Individual Behaviors Drive Inequality in Online Community Sizes}, + author = {Foote, Jeremy and TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron}, + date = {2020-06-04}, + eprint = {2006.03119}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Why are online community sizes so extremely unequal? Most answers to this question have pointed to general mathematical processes drawn from physics like cumulative advantage. These explanations provide little insight into specific social dynamics or decisions that individuals make when joining and leaving communities. In addition, explanations in terms of cumulative advantage do not draw from the enormous body of social computing research that studies individual behavior. Our work bridges this divide by testing whether two influential social mechanisms used to explain community joining can also explain the distribution of community sizes. Using agent-based simulations, we evaluate how well individual-level processes of social exposure and decisions based on individual expected benefits reproduce empirical community size data from Reddit. Our simulations contribute to social computing theory by providing evidence that both processes together---but neither alone---generate realistic distributions of community sizes. Our results also illustrate the potential value of agent-based simulation to online community researchers to both evaluate and bridge individual and group-level theories.}, + archiveprefix = {arXiv}, + file = {/home/nathante/Zotero/storage/PMZDH4B2/Foote et al_2020_How individual behaviors drive inequality in online community sizes.pdf;/home/nathante/Zotero/storage/D57HFTGF/2006.html} +} + +@inproceedings{foote_starting_2017, + title = {Starting Online Communities: Motivations and Goals of Wiki Founders}, + shorttitle = {Starting {{Online Communities}}}, + booktitle = {Proceedings of the 2017 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '17)}, + author = {Foote, Jeremy and Gergle, Darren and Shaw, Aaron}, + date = {2017}, + pages = {6376--6380}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Why do people start new online communities? Previous research has studied what helps communities to grow and what motivates contributors, but the reasons that people create new communities in the first place remain unclear. We present the results of a survey of over 300 founders of new communities on the online wiki hosting site Wikia.com. We analyze the motivations and goals of wiki creators, finding that founders have diverse reasons for starting wikis and diverse ways of defining their success. Many founders see their communities as occupying narrow topics, and neither seek nor expect a large group of contributors. We also find that founders with differing goals approach community building differently. We argue that community platform designers can create interfaces that support the diverse goals of founders more effectively.}, + isbn = {978-1-4503-4655-9}, + keywords = {peer production,survey,wikis}, + file = {/home/nathante/Zotero/storage/BWAIBPUK/Foote et al. - 2017 - Starting Online Communities Motivations and Goals.pdf} +} + +@article{frey_clustering_2007, + title = {Clustering by {{Passing Messages Between Data Points}}}, + author = {Frey, Brendan J. and Dueck, Delbert}, + date = {2007-02-16}, + journaltitle = {Science}, + volume = {315}, + number = {5814}, + eprint = {17218491}, + eprinttype = {pmid}, + pages = {972--976}, + publisher = {{American Association for the Advancement of Science}}, + issn = {0036-8075, 1095-9203}, + abstract = {Clustering data by identifying a subset of representative examples is important for processing sensory signals and detecting patterns in data. Such “exemplars” can be found by randomly choosing an initial subset of data points and then iteratively refining it, but this works well only if that initial choice is close to a good solution. We devised a method called “affinity propagation,” which takes as input measures of similarity between pairs of data points. Real-valued messages are exchanged between data points until a high-quality set of exemplars and corresponding clusters gradually emerges. We used affinity propagation to cluster images of faces, detect genes in microarray data, identify representative sentences in this manuscript, and identify cities that are efficiently accessed by airline travel. Affinity propagation found clusters with much lower error than other methods, and it did so in less than one-hundredth the amount of time. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PVGJU5KN/Frey_Dueck_2007_Clustering by Passing Messages Between Data Points.pdf;/home/nathante/Zotero/storage/ERM5BMQT/972.html} +} + +@article{frey_emergence_2019, + title = {Emergence of Integrated Institutions in a Large Population of Self-Governing Communities}, + author = {Frey, Seth and Sumner, Robert W.}, + date = {2019-07-11}, + journaltitle = {PLOS ONE}, + shortjournal = {PLOS ONE}, + volume = {14}, + number = {7}, + pages = {e0216335}, + publisher = {{Public Library of Science}}, + issn = {1932-6203}, + abstract = {Most aspects of our lives are governed by large, highly developed institutions that integrate several governance tasks under one authority structure. But theorists differ as to the mechanisms that drive the development of such concentrated governance systems from rudimentary beginnings. Is the emergence of integrated governance schemes a symptom of consolidation of authority by small status groups? Or does integration occur because a complex institution has more potential responses to a complex environment? Here we examine the emergence of complex governance regimes in 5,000 sovereign, resource-constrained, self-governing online communities, ranging in scale from one to thousands of users. Each community begins with no community members and no governance infrastructure. As communities grow, they are subject to selection pressures that keep better managed servers better populated. We identify predictors of community success and test the hypothesis that governance complexity can enhance community fitness. We find that what predicts success depends on size: changes in complexity predict increased success with larger population servers. Specifically, governance rules in a large successful community are more numerous and broader in scope. They also tend to rely more on rules that concentrate power in administrators, and on rules that manage bad behavior and limited server resources. Overall, this work is consistent with theories that formal integrated governance systems emerge to organize collective responses to interdependent resource management problems, especially as factors such as population size exacerbate those problems.}, + langid = {english}, + keywords = {Community ecology,Computer software,Forests,Games,Internet,Online encyclopedias,Political theory,Resource management,Social psychology,Video games}, + file = {/home/nathante/Zotero/storage/AXDJPNKE/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/DA5HAVLH/Frey_Sumner_2019_Emergence of integrated institutions in a large population of self-governing.pdf;/home/nathante/Zotero/storage/Q3FI9DBS/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/4B26ZMHH/article.html;/home/nathante/Zotero/storage/4CRK5UUM/article.html;/home/nathante/Zotero/storage/8XFADRSX/article.html} +} + +@inproceedings{fu_knowledge_2016, + title = {Knowledge Curation Discussions and Activity Dynamics in a Short Lived Social {{Q}} Amp;{{A}} Community}, + booktitle = {2016 {{IEEE}}/{{ACM Joint Conference}} on {{Digital Libraries}} ({{JCDL}})}, + author = {Fu, Hengyi and Stvilia, Besiki}, + date = {2016-06}, + pages = {203--204}, + abstract = {Studying the dynamics and lifecycles of online knowledge curation communities is essential to identify and assemble community type specific repertoires of strategies, rules, and actions of community design, governance, content creation and curation. This paper examines the lifecycle of a short lived social Q\&A community on Stack Exchange by performing the content analysis of the logs of member discussions and content curation actions.}, + eventtitle = {2016 {{IEEE}}/{{ACM Joint Conference}} on {{Digital Libraries}} ({{JCDL}})}, + keywords = {Buildings,community lifecycle,Economics,knowledge curation,Knowledge engineering,Message systems,Online communities,Organizations,Quality assurance,social Q&a,Time series analysis}, + file = {/home/nathante/Zotero/storage/7HLW4DFW/Fu_Stvilia_2016_Knowledge curation discussions and activity dynamics in a short lived social Q.pdf;/home/nathante/Zotero/storage/MGAU8R7U/7559585.html} +} + +@article{fulk_connective_1996, + title = {Connective and Communal Public Goods in Interactive Communication Systems}, + author = {Fulk, Janet and Flanagin, Andrew J. and Kalman, Michael E. and Monge, Peter R. and Ryan, Timothy}, + date = {1996}, + journaltitle = {Communication Theory}, + volume = {6}, + number = {1}, + pages = {60--87}, + issn = {1468-2885}, + abstract = {This paper extends theories of public goods to interactive communication systems. Two key public communication goods are identified. Connectivity provides point-to-point communication, and communality links members through commonly held information, such as that often found in databases. These extensions are important, we argue, because communication public goods operate differently from traditional material public goods. These differences have important implications for costs, benefits, and the realization of a critical mass of users that is necessary for realization of the good. We also explore multifunctional goods that combine various features and hybrid goods that link private goods to public ones. We examine the applicability of two key assumptions of public goods theory to interactive communication systems. First, jointness of supply specifies that consumption of a public good does not diminish its availability to others. Second, impossibility of exclusion stipulates that all members of the public have access to the good. We conclude with suggestions for further theoretical development.}, + langid = {english}, + keywords = {mantaining public goods}, + file = {/home/nathante/Zotero/storage/ZJVU4TGW/Fulk et al. - 1996 - Connective and communal public goods in interactiv.pdf;/home/nathante/Zotero/storage/8J5CPWLV/4259000.html} +} + +@book{gillespie_custodians_2018, + title = {Custodians of the {{Internet}}: Platforms, Content Moderation, and the Hidden Decisions That Shape Social Media}, + shorttitle = {Custodians of the Internet}, + author = {Gillespie, Tarleton}, + date = {2018}, + publisher = {{Yale University Press}}, + location = {{New Haven}}, + abstract = {"Most users want their Twitter feed, Facebook page, and YouTube comments to be free of harassment and porn. Whether faced with 'fake news' or livestreamed violence, 'content moderators'--who censor or promote user-posted content--have never been more important. This is especially true when the tools that social media platforms use to curb trolling, ban hate speech, and censor pornography can also silence the speech you need to hear. [The author] provides an overview of current social media practices and explains the underlying rationales for how, when, and why these policies are enforced. In doing so, [the author] highlights that content moderation receives too little public scrutiny even as it is shapes social norms and creates consequences for public discourse, cultural production, and the fabric of society. Based on interviews with content moderators, creators, and consumers, this...book is...for anyone who's ever clicked 'like' or 'retweet.'"--}, + isbn = {978-0-300-17313-0}, + pagetotal = {288}, + keywords = {Business & Economics / Industries / Media & Communications,Censorship,Computers / Web / Social Media,Political Science / Censorship,Social media,Social Science / Media Studies}, + annotation = {OCLC: on1005113962}, + file = {/home/nathante/Zotero/storage/Q2GZ28BB/Gillespie - 2018 - Custodians of the internet platforms, content mod.pdf} +} + +@article{graham_boundary_2019, + title = {Boundary Maintenance and the Origins of Trolling}, + author = {Graham, Elyse}, + date = {2019-09-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {21}, + number = {9}, + pages = {2029--2047}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This article presents a new social framework for understanding the origins of trolling and its expansion from an obscure practice, limited to a handful of boards on Usenet, to a pervasive component of Internet culture. I argue that trolling originated, in the term of sociologists, as a form of boundary maintenance that served to distinguish communities of self-identified online insiders from others beyond the boundaries of their community and to drive outsiders away from their spaces. This framework can help us to better understand the transformations that trolling has undergone in the decades since its inception, as well as the persistence of misogyny and prejudice throughout the history of the practice.}, + langid = {english}, + keywords = {Boundary maintenance,Internet communities,Internet history,online harassment,politics of cyberspace,trolling}, + file = {/home/nathante/Zotero/storage/6IN6XJWV/Graham - 2019 - Boundary maintenance and the origins of trolling.pdf} +} + +@article{granovetter_strength_1973, + title = {The {{Strength}} of {{Weak Ties}}}, + author = {Granovetter, Mark S.}, + date = {1973-05-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {78}, + number = {6}, + pages = {1360--1380}, + issn = {0002-9602}, + abstract = {Analysis of social networks is suggested as a tool for linking micro and macro levels of sociological theory. The procedure is illustrated by elaboration of the macro implications of one aspect of small-scale interaction: the strength of dyadic ties. It is argued that the degree of overlap of two individuals' friendship networks varies directly with the strength of their tie to one another. The impact of this principle on diffusion of influence and information, mobility opportunity, and community organization is explored. Stress is laid on the cohesive power of weak ties. Most network models deal, implicitly, with strong ties, thus confining their applicability to small, well-defined groups. Emphasis on weak ties lends itself to discussion of relations between groups and to analysis of segments of social structure not easily defined in terms of primary groups.}, + file = {/home/nathante/Zotero/storage/GM6GICWI/225469.html} +} + +@inproceedings{grevet_combating_2013, + title = {Combating Homophily through Design}, + booktitle = {Proceedings of the 2013 Conference on {{Computer}} Supported Cooperative Work Companion}, + author = {Grevet, Catherine}, + date = {2013-02-23}, + series = {{{CSCW}} '13}, + pages = {57--60}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Social networking has allowed us to be in constant contact with friends from many different backgrounds, yet we are unaware of many of our friends' perspectives and opinions. Networks are highly homophilous, meaning that people tend to associate with others similar to them. This leads to homogenous clusters. How should we design social media to facilitate constructive exchanges rather than polarize individuals? In my work, I propose to look at whether users are currently aware of the homophily phenomenon in their online networks and exploring social network designs to break homophily.}, + isbn = {978-1-4503-1332-2}, + keywords = {awareness,homophily,social networks,tie strength}, + file = {/home/nathante/Zotero/storage/XFJCI35Y/Grevet - 2013 - Combating homophily through design.pdf} +} + +@inproceedings{grevet_managing_2014, + title = {Managing Political Differences in Social Media}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Grevet, Catherine and Terveen, Loren G. and Gilbert, Eric}, + date = {2014-02-15}, + series = {{{CSCW}} '14}, + pages = {1400--1408}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Most people associate with people like themselves, a process called homophily. Exposure to diversity, however, makes us more informed as individuals and as a society. In this paper, we investigate political disagreements on Facebook to explore the conditions under which diverse opinions can coexist online. Via a mixed methods approach comprising 103 survey responses and 13 interviews with politically engaged American social media users, we found that participants who perceived more differences with their friends engaged less on Facebook than those who perceived more homogeneity. Weak ties were particularly brittle to political disagreements, despite being the ties most likely to offer diversity. Finally, based on our findings we suggest potential design opportunities to bridge across ideological difference: 1) support exposure to weak ties; and 2) make common ground visible while friends converse.}, + isbn = {978-1-4503-2540-0}, + keywords = {facebook,homophily,politics,relationship management,self- censorship,social media,tie strength}, + file = {/home/nathante/Zotero/storage/8VK4PWVX/Grevet et al. - 2014 - Managing political differences in social media.pdf} +} + +@inproceedings{guha_birds_2015, + title = {Do {{Birds}} of a {{Feather Watch Each Other}}? {{Homophily}} and {{Social Surveillance}} in {{Location Based Social Networks}}}, + shorttitle = {Do {{Birds}} of a {{Feather Watch Each Other}}?}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Guha, Shion and Wicker, Stephen B.}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {1010--1020}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Location sharing applications (LSA) have proliferated in recent years. Current research principally focuses on egocentric privacy issues and design but has historically not explored the impact of surveillance on location sharing behavior. In this paper, we examine homophily in friendship and surveillance networks for 65 foursquare users. Our results indicate that location surveillance networks are strongly homophilous along the lines of race and gender while friendship networks are weakly homophilous on income. Qualitatively, an analysis of comments and interviews provides support for a discourse around location surveillance, which is mainly social, collaborative, positive and participatory. We relate these findings with prior literature on surveillance, self-presentation and homophily and situate this study in existing HCI/CSCW scholarship.}, + isbn = {978-1-4503-2922-4}, + keywords = {foursquare,homophily,privacy,surveillance,visibility,vision}, + file = {/home/nathante/Zotero/storage/4G3RN2C5/Guha and Wicker - 2015 - Do Birds of a Feather Watch Each Other Homophily .pdf} +} + +@article{halfaker_rise_2013, + title = {The Rise and Decline of an Open Collaboration System: How {{Wikipedia}}'s Reaction to Popularity Is Causing Its Decline}, + shorttitle = {The {{Rise}} and {{Decline}} of an {{Open Collaboration System}}}, + author = {Halfaker, Aaron and Geiger, R. Stuart and Morgan, Jonathan T. and Riedl, John}, + date = {2013-05-01}, + journaltitle = {American Behavioral Scientist}, + shortjournal = {American Behavioral Scientist}, + volume = {57}, + number = {5}, + pages = {664--688}, + issn = {0002-7642}, + abstract = {Open collaboration systems, such as Wikipedia, need to maintain a pool of volunteer contributors to remain relevant. Wikipedia was created through a tremendous number of contributions by millions of contributors. However, recent research has shown that the number of active contributors in Wikipedia has been declining steadily for years and suggests that a sharp decline in the retention of newcomers is the cause. This article presents data that show how several changes the Wikipedia community made to manage quality and consistency in the face of a massive growth in participation have ironically crippled the very growth they were designed to manage. Specifically, the restrictiveness of the encyclopedia’s primary quality control mechanism and the algorithmic tools used to reject contributions are implicated as key causes of decreased newcomer retention. Furthermore, the community’s formal mechanisms for norm articulation are shown to have calcified against changes—especially changes proposed by newer editors.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/7B7AFK58/Halfaker et al. - 2013 - The rise and decline of an open collaboration syst.pdf;/home/nathante/Zotero/storage/Y9676KNV/The Rise and Decline of an Open Collaboration Syst.pdf} +} + +@book{hannan_organizational_1989, + title = {Organizational Ecology}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1989}, + edition = {1}, + publisher = {{Harvard University Press}}, + location = {{Cambridge, MA}} +} + +@article{hargittai_whose_2007, + title = {Whose {{Space}}? {{Differences}} among {{Users}} and {{Non-Users}} of {{Social Network Sites}}}, + shorttitle = {Whose {{Space}}?}, + author = {Hargittai, Eszter}, + date = {2007-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {13}, + number = {1}, + pages = {276--297}, + publisher = {{Oxford Academic}}, + abstract = {Are there systematic differences between people who use social network sites and those who stay away, despite a familiarity with them? Based on data from a survey administered to a diverse group of young adults, this article looks at the predictors of SNS usage, with particular focus on Facebook, MySpace, Xanga, and Friendster. Findings suggest that use of such sites is not randomly distributed across a group of highly wired users. A person’s gender, race and ethnicity, and parental educational background are all associated with use, but in most cases only when the aggregate concept of social network sites is disaggregated by service. Additionally, people with more experience and autonomy of use are more likely to be users of such sites. Unequal participation based on user background suggests that differential adoption of such services may be contributing to digital inequality.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/WVFZWUGF/Hargittai - 2007 - Whose Space Differences among Users and Non-Users.pdf;/home/nathante/Zotero/storage/C5TFC2YY/4583068.html} +} + +@article{helland_diaspora_2007, + title = {Diaspora on the {{Electronic Frontier}}: {{Developing Virtual Connections}} with {{Sacred Homelands}}}, + shorttitle = {Diaspora on the {{Electronic Frontier}}}, + author = {Helland, Christopher}, + date = {2007-04-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {12}, + number = {3}, + pages = {956--976}, + publisher = {{Oxford Academic}}, + abstract = {This study demonstrates how diaspora religious traditions utilized the Internet to develop significant network connections among each other and also to their place of origins. By examining the early Usenet system, I argue that the religious beliefs and practices of diaspora religious traditions were a motivating factor for developing Usenet groups where geographically dispersed individuals could connect with each other in safe, supportive, and religiously tolerant environments. This article explores the new forms of religious practices that began to occur on these sites, focusing on the manner in which Internet technology and the World Wide Web were utilized for activities such as long-distance ritual practice, cyber pilgrimage, and other religiously-motivated undertakings. Through these new online religious activities, diaspora groups have been able to develop significant connections not only among people, but also between people and the sacred homeland itself.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/QAMFAZAW/Helland - 2007 - Diaspora on the Electronic Frontier Developing Vi.pdf;/home/nathante/Zotero/storage/WNQX9GUY/4583017.html} +} + +@inproceedings{hemetsberger_sharing_2004, + title = {Sharing and Creating Knowledge in Open-Source Communities: The Case of {{KDE}}}, + booktitle = {Paper for {{Fifth European Conference}} on {{Organizational Knowledge}}, {{Learning}}, and {{Capabilities}}, {{Innsbruck}}}, + author = {Hemetsberger, Andrea and Reinhardt, Christian}, + date = {2004} +} + +@inproceedings{hessel_science_2016, + ids = {hessel_science_2016-1}, + title = {Science, Askscience, and Badscience: On the Coexistence of Highly Related Communities}, + shorttitle = {Science, Askscience, and Badscience}, + booktitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Hessel, Jack and Tan, Chenhao and Lee, Lillian}, + date = {2016-03-31}, + eprint = {1612.07487}, + eprinttype = {arxiv}, + pages = {11}, + abstract = {When large social-media platforms allow users to easily formand self-organize into interest groups, highly related communities can arise. For example, the Reddit site hosts not just a group called food, but also HealthyFood, foodhacks,foodporn, and cooking, among others. Are these highly related communities created for similar classes of reasons (e.g.,to focus on a subtopic, to create a place for allegedly more “high-minded” discourse, etc.)? How do users allocate attention between such close alternatives when they are available or emerge over time? Are there different types of relations between close alternatives such as sharing many users vs. a new community drawing away members of an older one vs. a splinter group failing to cohere into a viable separate community? We investigate the interactions between highly related communities using data from reddit.com consisting of 975M posts and comments spanning an 8-year period. We identify a set of typical affixes that users adopt to create highly related communities and build a taxonomy of affixes. One interesting finding regarding users’ behavior is: after a newer community is created, for several types of highly-related community pairs, users that engage in a newer community tend to be more active in their original community than users that do not explore, even when controlling for previous level of engagement.}, + archiveprefix = {arXiv}, + eventtitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + langid = {english}, + keywords = {Computer Science - Social and Information Networks,Physics - Physics and Society}, + file = {/home/nathante/Zotero/storage/2W6YBUBD/Hessel et al_2016_Science, AskScience, and BadScience.pdf;/home/nathante/Zotero/storage/4FLLXNV9/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/WS6TW26Q/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/3NHVFA3U/1612.html;/home/nathante/Zotero/storage/DXX4CJ7T/14739.html;/home/nathante/Zotero/storage/YSX2WN2J/13106.html} +} + +@incollection{hill_studying_2019, + title = {Studying Populations of Online Communities}, + booktitle = {The {{Oxford Handbook}} of {{Networked Communication}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + editor = {Foucault Welles, Brooke and González-Bailón, Sandra}, + date = {2019-09}, + pages = {173--193}, + publisher = {{Oxford University Press}}, + location = {{Oxford, UK}}, + abstract = {While the large majority of published research on online communities consists of analyses conducted entirely within individual communities, this chapter argues for a population-based approach, in which researchers study groups of similar communities. For example, although there have been thousands of papers published about Wikipedia, a population-based approach might compare all wikis on a particular topic. Using examples from published empirical studies, the chapter describes five key benefits of this approach. First, it argues that population-level research increases the generalizability of findings. Next, it describes four processes and dynamics that are only possible to study using populations: community-level variables, information diffusion processes across communities, ecological dynamics, and multilevel community processes. The chapter concludes with a discussion of a series of limitations and challenges.}, + isbn = {978-0-19-046051-8}, + langid = {english}, + file = {/home/nathante/Zotero/storage/39ZWGGYN/Hill and Shaw - 2019 - Studying Populations of Online Communities.pdf;/home/nathante/Zotero/storage/BTB3AQGV/oxfordhb-9780190460518-e-8.html} +} + +@inproceedings{hillman_alksjdflksfd_2014, + title = {'alksjdf;{{Lksfd}}': Tumblr and the Fandom User Experience}, + shorttitle = {'alksjdf;{{Lksfd}}'}, + booktitle = {Proceedings of the 2014 Conference on {{Designing}} Interactive Systems}, + author = {Hillman, Serena and Procyk, Jason and Neustaedter, Carman}, + date = {2014-06-21}, + series = {{{DIS}} '14}, + pages = {775--784}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {A growing trend is the participation in online fandom communities through the support of the blogging platform Tumblr. While past research has investigated backchannels-chatter related to live entertainment on micro-blogging sites such as Twitter-there is a lack of research on the behaviours and motivations of Tumblr users. In our study, we investigate why fandom users chose Tumblr over other social networking sites, their motivations behind participating in fandoms, and how they interact within the Tumblr community. Our findings show that users face many user interface challenges when participating in Tumblr fandoms, especially initially; yet, despite this, Tumblr fandom communities thrive with a common sense of social purpose and exclusivity where users feel they can present a more authentic reflection of themselves to those sharing similar experiences and interests. We describe how this suggests design directions for social networking and blogging sites in order to promote communities of users.}, + isbn = {978-1-4503-2902-6}, + keywords = {backchannels,entertainment,fandoms,fanfiction,micro-blogging,social networking,television,Tumblr}, + file = {/home/nathante/Zotero/storage/HZCLCKCG/Hillman et al. - 2014 - 'alksjdf\;Lksfd' tumblr and the fandom user experi.pdf} +} + +@article{himelboim_valence-based_2016, + title = {Valence-Based Homophily on {{Twitter}}: {{Network Analysis}} of {{Emotions}} and {{Political Talk}} in the 2012 {{Presidential Election}}}, + shorttitle = {Valence-Based Homophily on {{Twitter}}}, + author = {Himelboim, Itai and Sweetser, Kaye D and Tinkham, Spencer F and Cameron, Kristen and Danelo, Matthew and West, Kate}, + date = {2016-08-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {18}, + number = {7}, + pages = {1382--1400}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study integrates network and content analyses to examine valence-based homophily on Twitter or the tendency for individuals to interact with those expressing similar valence. During the 2012 federal election cycle, we collected Twitter conversations about 10 controversial political topics and mapped their network ties. Using network analysis, we discovered clusters—subgroups of highly self-connected users—and coded messages in each cluster for their expressed positive-to-negative emotional valence, level of support or opposition, and political leaning. We found that valence-based homophily successfully explained the selection of user interactions on Twitter, in terms of expressed emotional valence in their tweets or support versus criticism to an issue. It also finds conservative voices to be associated with negatively valenced clusters and vice versa. This study expands the theory of homophily beyond its traditional conceptualization and provides a new understanding of political-issue interactions in a social media context.}, + langid = {english}, + keywords = {2012 Election,emotional valence,homophily,political talk,social networks,Twitter}, + file = {/home/nathante/Zotero/storage/QUK4ID26/Himelboim et al. - 2016 - Valence-based homophily on Twitter Network Analys.pdf} +} + +@incollection{hollingshead_fostering_2002, + ids = {hollingshead_fostering_2002-1}, + title = {Fostering Intranet Knowledge Sharing: {{An}} Integration of Transactive Memory and Public Goods Approaches}, + shorttitle = {Fostering Intranet Knowledge Sharing}, + booktitle = {Distributed Work}, + author = {Hollingshead, Andrea B. and Fulk, Janet and Monge, Peter}, + date = {2002}, + pages = {335--355}, + publisher = {{Boston Review}}, + location = {{Cambridge, MA, US}}, + abstract = {Intranets--company Web sites designed for internal use--are an important technological innovation in many organizations that can aid in knowledge management, expertise recognition, and communication. This chapter identifies the conditions under which members of work groups are more likely to contribute to the development of intranets and the conditions under which intranets are more likely to result in more efficient and effective knowledge acquisition and dissemination. To that end, two theories developed to examine nontechnological systems are integrated and extended to intranets and computer-based knowledge systems: the theory of transactive memory and the public goods theory of collective action. Transactive memory theory is useful for predicting how organizational members use intranets to acquire, store, and retrieve knowledge. Public goods theory is useful for predicting which, how much, and when members will contribute and retrieve knowledge on intranets. (PsycInfo Database Record (c) 2020 APA, all rights reserved)}, + isbn = {978-0-262-08305-8}, + keywords = {Electronic Communication,Expert Systems,Information Systems,Organizational Effectiveness,Theories,Work Teams,Working Conditions}, + file = {/home/nathante/Zotero/storage/D34UXRQE/Hollingshead et al. - Fostering Intranet Knowledge Sharing An Integrati.pdf;/home/nathante/Zotero/storage/3A3Y658C/2002-17012-014.html} +} + +@inproceedings{hwang_why_2021, + title = {Why Do People Participate in Small Online Communities?}, + booktitle = {Proceedings of the {{ACM}} on {{Human-Computer Interaction}}}, + author = {Hwang, Sohyeon and Foote, Jeremy D.}, + date = {2021}, + eventtitle = {{{CSCW}}}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/H4FXQNBH/Hwang and Foote - 2021 - Why do people participate in small online communit.pdf;/home/nathante/Zotero/storage/UQYVIDWS/Hwang and Foote - 2021 - Why do people participate in small online communit.pdf} +} + +@article{johnson_communication_2009, + title = {Communication {{Communities}} or “{{CyberGhettos}}?”: {{A Path Analysis Model Examining Factors}} That {{Explain Selective Exposure}} to {{Blogs}}}, + shorttitle = {Communication {{Communities}} or “{{CyberGhettos}}?}, + author = {Johnson, Thomas J. and Bichard, Shannon L. and Zhang, Weiwu}, + date = {2009-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {15}, + number = {1}, + pages = {60--82}, + publisher = {{Oxford Academic}}, + abstract = {This study used an online panel of Internet users to examine the degree to which blog users practice selective exposure when seeking political information. The research employed a path analysis model to explore the extent to which exposure to offline and online discussion of political issues, and offline and online media use, as well as political variables and demographic factors, predict an individual's likelihood to engage in selective exposure to blogs. The findings indicate that respondents did practice selective exposure to blogs, predominantly those who are heavy blog users, politically active both online and offline, partisan, and highly educated.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/VXJLUSI9/Johnson et al. - 2009 - Communication Communities or “CyberGhettos” A Pa.pdf;/home/nathante/Zotero/storage/R9C73297/4064810.html} +} + +@inproceedings{jones_rscience_2019, + title = {R/Science: {{Challenges}} and {{Opportunities}} in {{Online Science Communication}}}, + shorttitle = {R/Science}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Jones, Ridley and Colusso, Lucas and Reinecke, Katharina and Hsieh, Gary}, + date = {2019-05-02}, + series = {{{CHI}} '19}, + pages = {1--14}, + publisher = {{Association for Computing Machinery}}, + location = {{Glasgow, Scotland Uk}}, + abstract = {Online discussion websites, such as Reddit's r/science forum, have the potential to foster science communication between researchers and the general public. However, little is known about who participates, what is discussed, and whether such websites are successful in achieving meaningful science discussions. To find out, we conducted a mixed-methods study analyzing 11,859 r/science posts and conducting interviews with 18 community members. Our results show that r/science facilitates rich information exchange and that the comments section provides a unique science communication document that guides engagement with scientific research. However, this community-sourced science communication comes largely from a knowledgeable public. We conclude with design suggestions for a number of critical problems that we uncovered: addressing the problem of topic newsworthiness and balancing broader participation and rigor.}, + isbn = {978-1-4503-5970-2}, + file = {/home/nathante/Zotero/storage/QJKUMC2A/Jones et al. - 2019 - rscience Challenges and Opportunities in Online .pdf} +} + +@article{joyce_predicting_2006, + title = {Predicting {{Continued Participation}} in {{Newsgroups}}}, + author = {Joyce, Elisabeth and Kraut, Robert E.}, + date = {2006-04-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {11}, + number = {3}, + pages = {723--747}, + issn = {1083-6101}, + abstract = {Turnover in online communities is very high, with most people who initially post a message to an online community never contributing again. In this paper, we test whether the responses that newcomers receive to their first posts influence the extent to which they continue to participate. The data come from initial posts made by 2,777 newcomers to six public newsgroups. We coded the content and valence of the initial post and its first response, if it received one, to see if these factors influenced newcomers’ likelihood of posting again. Approximately 61\% of newcomers received a reply to their initial post, and those who got a reply were 12\% more likely to post to the community again; their probability of posting again increased from 44\% to 56\%. They were more likely to receive a response if they asked a question or wrote a longer post. Surprisingly, the quality of the response they received—its emotional tone and whether it answered a newcomer’s question—did not influence the likelihood of the newcomer’s posting again.}, + file = {/home/nathante/Zotero/storage/KR2VSCNN/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.pdf;/home/nathante/Zotero/storage/ZVL66I3I/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.pdf;/home/nathante/Zotero/storage/VK44NCYI/4617705.html;/home/nathante/Zotero/storage/YXZPKK8E/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.html} +} + +@inproceedings{kairam_life_2012, + title = {The Life and Death of Online Groups: Predicting Group Growth and Longevity}, + shorttitle = {The Life and Death of Online Groups}, + booktitle = {Proceedings of the Fifth {{ACM}} International Conference on {{Web}} Search and Data Mining}, + author = {Kairam, Sanjay Ram and Wang, Dan J. and Leskovec, Jure}, + date = {2012-02-08}, + series = {{{WSDM}} '12}, + pages = {673--682}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {We pose a fundamental question in understanding how to identify and design successful communities: What factors predict whether a community will grow and survive in the long term? Social scientists have addressed this question extensively by analyzing offline groups which endeavor to attract new members, such as social movements, finding that new individuals are influenced strongly by their ties to members of the group. As a result, prior work on the growth of communities has treated growth primarily as a diffusion processes, leading to findings about group evolution which can be difficult to explain. The proliferation of online social networks and communities, however, has created new opportunities to study, at a large scale and with very fine resolution, the mechanisms which lead to the formation, growth, and demise of online groups. In this paper, we analyze data from several thousand online social networks built on the Ning platform with the goal of understanding the factors contributing to the growth and longevity of groups within these networks. Specifically, we investigate the role that two types of growth (growth through diffusion and growth by other means) play during a group's formative stages from the perspectives of both the individual member and the group. Applying these insights to a population of groups of different ages and sizes, we build a model to classify groups which will grow rapidly over the short-term and long-term. Our model achieves over 79\% accuracy in predicting group growth over the following two months and over 78\% accuracy in predictions over the following two years. We utilize a similar approach to predict which groups will die within a year. The results of our combined analysis provide insight into how both early non-diffusion growth and a complex set of network constraints appear to contribute to the initial and continued growth and success of groups within social networks. Finally we discuss implications of this work for the design, maintenance, and analysis of online communities.}, + isbn = {978-1-4503-0747-5}, + keywords = {group formation,information diffusion,online communities,social networks}, + file = {/home/nathante/Zotero/storage/NS675EXH/Kairam et al_The Life and Death of Online Groups.pdf;/home/nathante/Zotero/storage/QZR8T2QH/Kairam et al_2012_The life and death of online groups.pdf} +} + +@incollection{karumur_content_2018, + title = {Content Is {{King}}, {{Leadership Lags}}: {{Effects}} of {{Prior Experience}} on {{Newcomer Retention}} and {{Productivity}} in {{Online Production Groups}}}, + shorttitle = {Content Is {{King}}, {{Leadership Lags}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Karumur, Raghav Pavan and Yu, Bowen and Zhu, Haiyi and Konstan, Joseph A.}, + date = {2018-04-21}, + pages = {1--13}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Organizers of online groups often struggle to recruit members who can most effectively carry out the group's activities and remain part of the group over time. In a study of a sample of 30,000 new editors belonging to 1,054 English WikiProjects, we empirically examine the effects of generalized prior work-productivity experience (measured by overall prior article edits), prior leadership experience (measured by overall prior project edits), and localized prior work-productivity experience (measured by pre-joining article edits on a project) on early retention and productivity. We find that (1)generalized prior work-productivity experience is positively associated with retention, but negatively associated with productivity (2) prior leadership experience is negatively associated with both retention and productivity, and (3) localized prior work-productivity experience is positively associated with both retention and productivity within that focal project. We then discuss implications to inform the designs of early interventions aimed at group success.}, + isbn = {978-1-4503-5620-6}, + keywords = {learning transfer,newcomers,online communities,online groups,peer production,prior experience,productivity,resocialization,retention,subgroups,wikipedia,wikiprojects,withdrawal}, + file = {/home/nathante/Zotero/storage/YANJLZCB/Karumur et al. - 2018 - Content is King, Leadership Lags Effects of Prior.pdf} +} + +@article{kavanaugh_community_2005, + title = {Community {{Networks}}: {{Where Offline Communities Meet Online}}}, + shorttitle = {Community {{Networks}}}, + author = {Kavanaugh, Andrea and Carroll, John M. and Rosson, Mary Beth and Zin, Than Than and Reese, Debbie Denise}, + date = {2005-07-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {This study explores the design and practice of the Blacksburg Electronic Village (BEV), a mature networked community. We describe findings from longitudinal survey data on the use and social impact of community computer networking. The survey data show that increased involvement with people, issues and community since going online is explained by education, extroversion and age. Using path models, we show that a person's sense of belonging and collective efficacy, group memberships, activism and social use of the Internet act as mediating variables. These findings extend evidence in support of the argument that Internet use can strengthen social contact, community engagement and attachment. Conversely, it underlines concern about the impact of computer networking on people with lower levels of education, extroversion, efficacy, and community belonging. We suggest design strategies and innovative tools for non-experts that might increase social interaction and improve usability for disadvantaged and underrepresented individuals and groups.}, + issue = {JCMC10417}, + file = {/home/nathante/Zotero/storage/IWBLRSS4/4614510.html} +} + +@inproceedings{kiene_surviving_2016, + title = {Surviving an “{{Eternal September}}”: {{How}} an Online Community Managed a Surge of Newcomers}, + shorttitle = {Surviving an "{{Eternal September}}"}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Kiene, Charles and Monroy-Hernández, Andrés and Hill, Benjamin Mako}, + date = {2016}, + pages = {1152--1156}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {We present a qualitative analysis of interviews with participants in the NoSleep community within Reddit where millions of fans and writers of horror fiction congregate. We explore how the community handled a massive, sudden, and sustained increase in new members. Although existing theory and stories like Usenet's infamous "Eternal September" suggest that large influxes of newcomers can hurt online communities, our interviews suggest that NoSleep survived without major incident. We propose that three features of NoSleep allowed it to manage the rapid influx of newcomers gracefully: (1) an active and well-coordinated group of administrators, (2) a shared sense of community which facilitated community moderation, and (3) technological systems that mitigated norm violations. We also point to several important trade-offs and limitations.}, + isbn = {978-1-4503-3362-7}, + keywords = {newcomers,norms and governance,online communities,peer production,qualitative methods}, + file = {/home/nathante/Zotero/storage/2YPT6BUL/Kiene et al. - 2016 - Surviving an Eternal September How an Online Co.pdf;/home/nathante/Zotero/storage/S9JX8XE5/Kiene et al. - 2016 - Surviving an “Eternal September” How an online co.pdf} +} + +@article{kiene_technological_2019, + title = {Technological Frames and User Innovation: Exploring Technological Change in Community Moderation Teams}, + shorttitle = {Technological Frames and User Innovation}, + author = {Kiene, Charles and Jiang, Jialun "Aaron" and Hill, Benjamin Mako}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {44:1--44:23}, + abstract = {Management of technological change in organizations is one of the most enduring topics in the literature on computer-supported cooperative work. The successful navigation of technological change is both more challenging and more critical in online communities that are entirely mediated by technology than it is in traditional organizations. This paper presents an analysis of 14 in-depth interviews with moderators of subcommunities of one technological platform (Reddit) that added communities on a new technological platform (Discord). Moderation teams experienced several problems related to moderating content at scale as well as a disconnect between the affordances of Discord and their assumptions based on their experiences on Reddit. We found that moderation teams used Discord's API to create scripts and bots that augmented Discord to make the platform work more like tools on Reddit. These tools were particularly important in communities struggling with scale. Our findings suggest that increasingly widespread end user programming allow users of social computing systems to innovate and deploy solutions to unanticipated design problems by transforming new technological platforms to align with their past expectations.}, + issue = {CSCW}, + keywords = {API,bots,chat,computer-mediated communication,discord,moderation,online communities,reddit,social computing,technological change}, + file = {/home/nathante/Zotero/storage/E2PDCY58/Kiene et al. - 2019 - Technological frames and user innovation explorin.pdf;/home/nathante/Zotero/storage/U7M6IZY4/Kiene et al. - 2019 - Technological Frames and User Innovation Explorin.pdf} +} + +@article{klein_quality_2017, + title = {Quality Standards, Service Orientation, and Power in {{Airbnb}} and {{Couchsurfing}}}, + author = {Klein, Maximilian and Zhao, Jinhao and Ni, Jiajun and Johnson, Isaac and Hill, Benjamin Mako and Zhu, Haiyi}, + date = {2017}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {58:1--58:21}, + issn = {2573-0142}, + abstract = {Although Couchsurfing and Airbnb are both online communities that help users host strangers in their homes, they differ in an important sense: Couchsurfing prohibits monetary payment while Airbnb is built around it.We conducted interviews with users experienced on both Couchsurfing and Airbnb ("dual-users") to better understand systemic differences between the platforms. Based on these interviews we propose that, compared to Couchsurfing, Airbnb: (1) appears to require higher quality services, (2) places more emphasis on places over people, and (3) shifts social power from hosts to guests. Using public profiles from both platforms, we present analyses exploring each theme. Finally, we present evidence showing that Airbnb's growth has coincided with a decline in Couchsurfing. Taken together, our findings paint a complex picture of the changing character of network hospitality.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/WQS43NPP/Klein et al. - 2017 - Quality Standards, Service Orientation, and Power .pdf} +} + +@article{kou_understanding_2018, + title = {Understanding {{Social Roles}} in an {{Online Community}} of {{Volatile Practice}}: {{A Study}} of {{User Experience Practitioners}} on {{Reddit}}}, + shorttitle = {Understanding {{Social Roles}} in an {{Online Community}} of {{Volatile Practice}}}, + author = {Kou, Yubo and Gray, Colin M. and Toombs, Austin L. and Adams, Robin S.}, + date = {2018-12-21}, + journaltitle = {ACM Transactions on Social Computing}, + shortjournal = {Trans. Soc. Comput.}, + volume = {1}, + number = {4}, + pages = {17:1--17:22}, + issn = {2469-7818}, + abstract = {Community of practice (CoP) is a primary framework in social computing research that addresses learning and organizing specific practices in online communities. However, the classic CoP theory does not provide a detailed account for how practices change or evolve. Against the backdrop of a rapidly changing occupational landscape, it is crucial to understand how people participate in online communities focused on practices that have a volatile nature, as well as how social computing tools can best support them. In this article, we examine user experience (UX) design as a volatile practice that has no coherent body of knowledge and lacks a concrete path for newcomers to become a UX professional. Our study site is the “/r/userexperience” subreddit, an online UX community where practitioners socialize and learn. Using a mixed-methods approach, we identified five distinct social roles in relation to knowledge production and dissemination in the online community of volatile practice. We demonstrate that knowledge production is highly distributed, involving the participation and sensemaking of community members of varied levels of experience. We discuss how online platforms support online community of volatile practice and how our findings contribute to the CoP literature.}, + file = {/home/nathante/Zotero/storage/NWK464BS/Kou et al. - 2018 - Understanding Social Roles in an Online Community .pdf} +} + +@book{kraut_building_2012, + title = {Building Successful Online Communities: {{Evidence-based}} Social Design}, + author = {Kraut, Robert E. and Resnick, Paul and Kiesler, Sara}, + date = {2012}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + abstract = {Uses insights from social science, psychology, and economics to offer advice on planning and managing an online community.}, + isbn = {978-0-262-29831-5}, + langid = {english}, + keywords = {design,foundations of social computing}, + file = {/home/nathante/Zotero/storage/B4XSKAVW/04-kraut10-Newcomers-current.pdf;/home/nathante/Zotero/storage/CX4KDC3G/01-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/IJCEWA6L/06-Resnick10-Startup-current.pdf;/home/nathante/Zotero/storage/JEWAVXHG/02-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/RIM4D9KS/05-kiesler10-Regulation-current.pdf;/home/nathante/Zotero/storage/S6Z28BBS/03-Ren10-Commitment-current.pdf} +} + +@inproceedings{kumar_community_2018, + ids = {kumar_community_2018-1}, + title = {Community {{Interaction}} and {{Conflict}} on the {{Web}}}, + booktitle = {Proceedings of the 2018 {{World Wide Web Conference}}}, + author = {Kumar, Srijan and Hamilton, William L. and Leskovec, Jure and Jurafsky, Dan}, + date = {2018-04-23}, + series = {{{WWW}} '18}, + pages = {933--943}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + location = {{Lyon, France}}, + abstract = {Users organize themselves into communities on web platforms. These communities can interact with one another, often leading to conflicts and toxic interactions. However, little is known about the mechanisms of interactions between communities and how they impact users. Here we study intercommunity interactions across 36,000 communities on Reddit, examining cases where users of one community are mobilized by negative sentiment to comment in another community. We show that such conflicts tend to be initiated by a handful of communities---less than 1\% of communities start 74\% of conflicts. While conflicts tend to be initiated by highly active community members, they are carried out by significantly less active members. We find that conflicts are marked by formation of echo chambers, where users primarily talk to other users from their own community. In the long-term, conflicts have adverse effects and reduce the overall activity of users in the targeted communities. Our analysis of user interactions also suggests strategies for mitigating the negative impact of conflicts---such as increasing direct engagement between attackers and defenders. Further, we accurately predict whether a conflict will occur by creating a novel LSTM model that combines graph embeddings, user, community, and text features. This model can be used to create an early-warning system for community moderators to prevent conflicts. Altogether, this work presents a data-driven view of community interactions and conflict, and paves the way towards healthier online communities.}, + isbn = {978-1-4503-5639-8}, + keywords = {antisocial behavior,community,conflict,interaction,intercommunity,society,web}, + file = {/home/nathante/Zotero/storage/3R7J48EQ/Kumar et al_2018_Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/FPJ44933/Kumar et al. - 2018 - Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/U6GYGZDS/Kumar_et_al-2018-Community_interaction_conflict-WWW.pdf} +} + +@article{lakhani_how_2003, + title = {How Open Source Software Works: "{{Free}}" User-to-User Assistance}, + shorttitle = {How Open Source Software Works}, + author = {Lakhani, Karim R. and von Hippel, Eric}, + options = {useprefix=true}, + date = {2003}, + journaltitle = {Research Policy}, + volume = {32}, + number = {6}, + pages = {923--943}, + abstract = {Research into free and open source software development projects has so far largely focused on how the major tasks of software development are organized and motivated. But a complete project requires the execution of "mundane but necessary" tasks as well. In this paper, we explore how the mundane but necessary task of field support is organized in the case of Apache web server software, and why some project participants are motivated to provide this service gratis to others. We find that the Apache field support system functions effectively. We also find that, when we partition the help system into its component tasks, 98\% of the effort expended by information providers in fact returns direct learning benefits to those providers. This finding considerably reduces the puzzle of why information providers are willing to perform this task "for free." Implications are discussed.}, + keywords = {Econometrics,FOSS,Innovation}, + file = {/home/nathante/Zotero/storage/TZST9JHU/Lakhani and von Hippel - 2003 - How open source software works.pdf} +} + +@inproceedings{lampe_motivations_2010, + title = {Motivations to Participate in Online Communities}, + booktitle = {Proceedings of the 28th International Conference on {{Human}} Factors in Computing Systems}, + author = {Lampe, Cliff and Wash, Rick and Velasquez, Alcides and Ozkaya, Elif}, + date = {2010}, + pages = {1927--1936}, + publisher = {{ACM}}, + location = {{Atlanta, Georgia, USA}}, + abstract = {A consistent theoretical and practical challenge in the design of socio-technical systems is that of motivating users to participate in and contribute to them. This study examines the case of Everything2.com users from the theoretical perspectives of Uses and Gratifications and Organizational Commitment to compare individual versus organizational motivations in user participation. We find evidence that users may continue to participate in a site for different reasons than those that led them to the site. Feelings of belonging to a site are important for both anonymous and registered users across different types of uses. Long-term users felt more dissatisfied with the site than anonymous users. Social and cognitive factors seem to be more important than issues of usability in predicting contribution to the site.}, + isbn = {978-1-60558-929-9}, + file = {/home/nathante/Zotero/storage/7NIQDKFR/Lampe et al. - 2010 - Motivations to participate in online communities.pdf} +} + +@inproceedings{lampe_slashdot_2004, + title = {Slash(Dot) and Burn: Distributed Moderation in a Large Online Conversation Space}, + shorttitle = {Slash(Dot) and Burn}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Lampe, Cliff and Resnick, Paul}, + date = {2004}, + series = {{{CHI}} '04}, + pages = {543--550}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Can a system of distributed moderation quickly and consistently separate high and low quality comments in an online conversation? Analysis of the site Slashdot.org suggests that the answer is a qualified yes, but that important challenges remain for designers of such systems. Thousands of users act as moderators. Final scores for comments are reasonably dispersed and the community generally agrees that moderations are fair. On the other hand, much of a conversation can pass before the best and worst comments are identified. Of those moderations that were judged unfair, only about half were subsequently counterbalanced by a moderation in the other direction. And comments with low scores, not at top-level, or posted late in a conversation were more likely to be overlooked by moderators.}, + isbn = {978-1-58113-702-6}, + keywords = {collaborative filtering,computer-mediated communication,recommender systems}, + file = {/home/nathante/Zotero/storage/J4ALSW7H/Lampe and Resnick - 2004 - Slash(dot) and burn distributed moderation in a l.pdf} +} + +@incollection{lazarsfeld_friendship_1954, + title = {Friendship as a Social Process: A Substantive and Methodological Analysis}, + booktitle = {Freedom and Control in Modern Society}, + author = {Lazarsfeld, Paul F. and Merton, Robert K.}, + editor = {Berger, Morroe and Abel, Theodore and Page, Charles H.}, + date = {1954}, + pages = {18--66}, + publisher = {{Van Nostrand}}, + location = {{New York}}, + abstract = {Page} +} + +@article{lazer_studying_2020, + title = {Studying Human Attention on the {{Internet}}}, + author = {Lazer, David}, + date = {2020-01-07}, + journaltitle = {Proceedings of the National Academy of Sciences}, + shortjournal = {Proc Natl Acad Sci USA}, + volume = {117}, + number = {1}, + pages = {21--22}, + issn = {0027-8424, 1091-6490}, + langid = {english}, + file = {/home/nathante/Zotero/storage/T8C43YAK/Lazer - 2020 - Studying human attention on the Internet.pdf} +} + +@inproceedings{leavitt_role_2017, + title = {The Role of Information Visibility in Network Gatekeeping: {{Information}} Aggregation on Reddit during Crisis Events}, + shorttitle = {The Role of Information Visibility in Network Gatekeeping}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Leavitt, Alex and Robinson, John J.}, + date = {2017-02-25}, + series = {{{CSCW}} '17}, + pages = {1246--1261}, + publisher = {{Association for Computing Machinery}}, + location = {{Portland, Oregon, USA}}, + abstract = {As social media platforms witness more and more contributions from participants during developing crisis events, some platforms provide affordances that support visibility for specific pieces of information. However, the design of information visibility, especially in the context of controlling information flows (through gatekeeping), may shape how participants collect and share up-to-date information in these systems. This paper looks at the field site of reddit.com through trace ethnography methods to understand how the design of reddit's platform (from algorithms to user roles) impacts the visibility of information and subsequently how participants aggregate information in response to ongoing events. Through trace ethnographic analysis, we illustrate three themes related to tensions around visibility - behavioral, structural, and relational - and show how visibility shapes the work of producing information about crises in social news sites.}, + isbn = {978-1-4503-4335-0}, + file = {/home/nathante/Zotero/storage/6PIBDNTW/Leavitt and Robinson - 2017 - The Role of Information Visibility in Network Gate.pdf} +} + +@inproceedings{leavitt_this_2015, + title = {"{{This}} Is a Throwaway Account": {{Temporary}} Technical Identities and Perceptions of Anonymity in a Massive Online Community}, + shorttitle = {"{{This}} Is a Throwaway Account"}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Leavitt, Alex}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {317--327}, + publisher = {{Association for Computing Machinery}}, + location = {{Vancouver, BC, Canada}}, + abstract = {This paper explores temporary identities on social media platforms and individuals' uses of these identities with respect to their perceptions of anonymity. Given the research on multiple profile maintenance, little research has examined the role that some social media platforms play in affording users with temporary identities. Further, most of the research on anonymity stops short of the concept of varying perceptions of anonymity. This paper builds on these research areas by describing the phenomenon of temporary "throwaway accounts" and their uses on reddit.com, a popular social news site. In addition to ethnographic trace analysis to examine the contexts in which throwaway accounts are adopted, this paper presents a predictive model that suggests that perceptions of anonymity significantly shape the potential uses of throwaway accounts and that women are much more likely to adopt temporary identities than men.}, + isbn = {978-1-4503-2922-4}, + file = {/home/nathante/Zotero/storage/7ITF227V/Leavitt - 2015 - This is a Throwaway Account Temporary Technical.pdf} +} + +@article{leavitt_upvote_2017, + title = {Upvote My News: {{The}} Practices of Peer Information Aggregation for Breaking News on Reddit.Com}, + shorttitle = {Upvote My News}, + author = {Leavitt, Alex and Robinson, John J.}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {65:1--65:18}, + abstract = {Citizen participation in crisis communication increasingly occurs in social media contexts. As some platforms -- e.g., social news sites -- evolve around collaborative voting, filtering, and information sharing, the aggregation of breaking news information during crisis situations appears more often as an emergent practice in these online communities. Drawing from 53 interviews and descriptive quantitative analysis of reddit posts and comments, this paper presents a qualitative case study examining reddit.com members aggregate information during crisis events within the context of reddit's post/comment structure, crowd voting, and ranking algorithms. Using the lens of network gatekeeping, the paper shows how participants evaluate sources, organize information, and verify details to demonstrate how different affordances and limitations of information production allow or restrict particular types of network gatekeeping.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/TW846G2K/Leavitt and Robinson - 2017 - Upvote My News The Practices of Peer Information .pdf} +} + +@article{leimeister_evaluation_2005, + title = {Evaluation of a {{Systematic Design}} for a {{Virtual Patient Community}}}, + author = {Leimeister, Jan Marco and Krcmar, Helmut}, + date = {2005-07-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {Virtual Communities (VCs) offer ubiquitous access to information and exchange possibilities for people in similar situations, which is especially valuable for patients with chronic / life-threatening diseases. However, it is seldom considered possible to create VCs systematically. This article describes the evaluation of the design elements and factors that contributed to the success of the VC krebsgemeinschaft.de (a VC for cancer patients in the German-speaking internet), by assessing user acceptance and usage. Additionally, the existence of trust (a constituent element of working VCs) in krebsgemeinschaft.de is addressed. Based on these criteria, we empirically verify the chosen design components and generate insights into the systematic development and operation of VCs in general and VCs for patients in the German healthcare system in particular.}, + issue = {JCMC1041}, + file = {/home/nathante/Zotero/storage/BI7E4R6W/Leimeister and Krcmar - 2005 - Evaluation of a Systematic Design for a Virtual Pa.pdf;/home/nathante/Zotero/storage/G39U4C3F/4614530.html} +} + +@inproceedings{liang_knowledge_2017, + ids = {liang_knowledge_2017-1}, + title = {Knowledge Sharing in Online Discussion Threads: What Predicts the Ratings?}, + shorttitle = {Knowledge Sharing in Online Discussion Threads}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Liang, Yuyang}, + date = {2017-02-25}, + series = {{{CSCW}} '17}, + pages = {146--154}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {As an important category of user-generated content (UGC) community, Question and Answer (Q\&A) community offers internet users opportunities to ask questions and share knowledge with others. In order to understand how the ratings of knowledge contribution quality correlate with the way knowledge is being shared in discussion threads, the study examines user behaviors and profiles in a large knowledge sharing community, /r/Techsupport, a discussion based Q\&A site in Reddit.com concerning internet and technology problems. Negative binomial regressions and negative binomial mixed models are built to investigate the relationships among thread structure, level of user activity, user profiles and the ratings of threads and comments in the community. Results indicate that in the better rated threads, the structures tend to be more centralized with heterogeneous participants discussing the problem at a deeper level. Meanwhile, contributions with good ratings are more likely to be produced by users who are more engaged in commenting behaviors.}, + isbn = {978-1-4503-4335-0}, + keywords = {knowledge sharing,network structure,online community,threaded discussion,user generated content,user profile}, + file = {/home/nathante/Zotero/storage/852P8MGY/Liang - 2017 - Knowledge Sharing in Online Discussion Threads Wh.pdf} +} + +@inproceedings{lin_better_2017, + title = {Better When It Was Smaller? {{Community}} Content and Behavior after Massive Growth.}, + shorttitle = {Better {{When It Was Smaller}}?}, + booktitle = {Eleventh {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Lin, Zhiyuan and Salehi, Niloufar and Yao, Bowen and Chen, Yiqi and Bernstein, Michael S.}, + date = {2017}, + pages = {132--141}, + publisher = {{AAAI}}, + location = {{Montreal, Canada}}, + abstract = {Online communities have a love-hate relationship with membership growth: new members bring fresh perspectives, but old-timers worry that growth interrupts the community’s social dynamic and lowers content quality. To arbitrate these two theories, we analyze over 45 million comments from 10 Reddit subcommunities following an exogenous shock when each subcommunity was added to the default set for all Reddit users. Capitalizing on these natural experiments, we test for changes to the content vote patterns, linguistic patterns, and community network patterns before and after being defaulted. Results support a narrative that the communities remain high-quality and similar to their previous selves even post-growth. There is a temporary dip in upvote scores right after the communities were defaulted, but the communities quickly recover to pre-default or even higher levels. Likewise, complaints about low-quality posts do not rise in frequency after getting defaulted. Strong moderation also helps keep upvotes common and complaint levels low. Communities’ language use does not become more like the rest of Reddit after getting defaulted. However, growth does have some impact on attention: community members cluster their activity around a smaller proportion of posts after the community is defaulted.}, + eventtitle = {{{ICWSM}}}, + file = {/home/nathante/Zotero/storage/3NB3IZUR/Lin et al. - 2017 - Better When It Was Smaller Community Content and .pdf} +} + +@inproceedings{litt_just_2016, + title = {"{{Just Cast}} the {{Net}}, and {{Hopefully}} the {{Right Fish Swim}} into {{It}}": {{Audience Management}} on {{Social Network Sites}}}, + shorttitle = {\&\#x201c;{{Just Cast}} the {{Net}}, and {{Hopefully}} the {{Right Fish Swim}} into {{It}}\&\#x201d;}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer-Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Litt, Eden and Hargittai, Eszter}, + date = {2016-02-27}, + series = {{{CSCW}} '16}, + pages = {1488--1500}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {When users post on social network sites, they can engage in audience-reaching strategies, in an effort to reach desired audience members, as well as audience-limiting strategies, in an effort to avoid unwanted audience members. While much research has focused on users' audience-limiting strategies, little research has explicitly focused on users' audience-reaching strategies. Additionally, little work has explored either strategy at the post level. Using mixed methods involving a diary study and follow-up interviews focused on a diverse group of users' posts, this article reveals several audience-reaching strategies users engaged from altering their content to tagging. However, users in this study rarely used strategies to exclude people proactively and technologically outside of their targeted audiences, and instead broadcasted widely. Participants described several rationales for sharing broadly from skill-related issues to a reliance on the audience or site to filter the content.}, + isbn = {978-1-4503-3592-8}, + keywords = {Audience,audience management,audience-reaching strategies,imagined audience,privacy,social network sites}, + file = {/home/nathante/Zotero/storage/UKKUVHK2/Litt_Hargittai_2016_“\;Just Cast the Net, and Hopefully the Right Fish Swim into It”\;.pdf} +} + +@inproceedings{lu_investigate_2019, + title = {Investigate {{Transitions}} into {{Drug Addiction}} through {{Text Mining}} of {{Reddit Data}}}, + booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}}, + author = {Lu, John and Sridhar, Sumati and Pandey, Ritika and Hasan, Mohammad Al and Mohler, Georege}, + date = {2019-07-25}, + series = {{{KDD}} '19}, + pages = {2367--2375}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Increasing rates of opioid drug abuse and heightened prevalence of online support communities underscore the necessity of employing data mining techniques to better understand drug addiction using these rapidly developing online resources. In this work, we obtained data from Reddit, an online collection of forums, to gather insight into drug use/misuse using text snippets from users narratives. Specifically, using users' posts, we trained a binary classifier which predicts a user's transitions from casual drug discussion forums to drug recovery forums. We also proposed a Cox regression model that outputs likelihoods of such transitions. In doing so, we found that utterances of select drugs and certain linguistic features contained in one's posts can help predict these transitions. Using unfiltered drug-related posts, our research delineates drugs that are associated with higher rates of transitions from recreational drug discussion to support/recovery discussion, offers insight into modern drug culture, and provides tools with potential applications in combating the opioid crisis.}, + isbn = {978-1-4503-6201-6}, + keywords = {cox regression,drug addiction and recovery,reddit forum,text mining}, + file = {/home/nathante/Zotero/storage/GUQKME9M/Lu et al_2019_Investigate Transitions into Drug Addiction through Text Mining of Reddit Data.pdf} +} + +@inproceedings{ma_when_2019, + title = {When {{Do People Trust Their Social Groups}}?}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Ma, Xiao and Cheng, Justin and Iyer, Shankar and Naaman, Mor}, + date = {2019-05-02}, + pages = {1--12}, + publisher = {{ACM}}, + location = {{Glasgow Scotland Uk}}, + eventtitle = {{{CHI}} '19: {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + isbn = {978-1-4503-5970-2}, + langid = {english}, + file = {/home/nathante/Zotero/storage/ZEWUJPHL/Ma et al. - 2019 - When Do People Trust Their Social Groups.pdf} +} + +@article{majchrzak_contradictory_2013, + title = {The {{Contradictory Influence}} of {{Social Media Affordances}} on {{Online Communal Knowledge Sharing}}}, + author = {Majchrzak, Ann and Faraj, Samer and Kane, Gerald C. and Azad, Bijan}, + date = {2013-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {19}, + number = {1}, + pages = {38--55}, + publisher = {{Oxford Academic}}, + abstract = {The use of social media creates the opportunity to turn organization-wide knowledge sharing in the workplace from an intermittent, centralized knowledge management process to a continuous online knowledge conversation of strangers, unexpected interpretations and re-uses, and dynamic emergence. We theorize four affordances of social media representing different ways to engage in this publicly visible knowledge conversations: metavoicing, triggered attending, network-informed associating, and generative role-taking. We further theorize mechanisms that affect how people engage in the knowledge conversation, finding that some mechanisms, when activated, will have positive effects on moving the knowledge conversation forward, but others will have adverse consequences not intended by the organization. These emergent tensions become the basis for the implications we draw.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/9U9NTEVE/Majchrzak et al. - 2013 - The Contradictory Influence of Social Media Afford.pdf;/home/nathante/Zotero/storage/DBAC2BYD/4067499.html} +} + +@article{majchrzak_effect_2016, + title = {Effect of {{Knowledge-Sharing Trajectories}} on {{Innovative Outcomes}} in {{Temporary Online Crowds}}}, + author = {Majchrzak, Ann and Malhotra, Arvind}, + date = {2016-11-10}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + issn = {1047-7047}, + abstract = {There is substantial research on the effects of formal control structures (i.e., incentives, identities, organization, norms) on knowledge sharing leading to innovative outcomes in online communities. However, there is little research on how knowledge-sharing trajectories in temporary online crowds create innovative outcomes without these structures. Such research is particularly of interest in the context of temporary online crowds solicited with crowdsourcing in which there is only minimal structure for knowledge sharing. We identify eight types of crowdsourcing with different knowledge-sharing patterns. The focus of this study is on the one type of crowdsourcing—collaborative innovation challenges—in which there is the least restriction on knowledge sharing in the crowd. A content analysis was conducted of all time-stamped posts made in five different collaborative innovation challenges to identify different knowledge-sharing trajectories used. We found that a paradox-framed trajectory was more likely to be followed by innovative outcomes compared to three other knowledge-sharing trajectories. A paradox-framed trajectory is one in which a novel solution emerges when different participants post in the following sequence: (1) contributing a paradox associated with the problem objective, (2) sharing assumptions to validate the paradox, and (3) sharing initial ideas for resolving the paradox in a manner that meets the problem statement. Based on the findings, a theory of paradox-framed trajectories in temporary online crowds is presented along with implications for knowledge creation theories in general and online knowledge-creating communities in particular.}, + file = {/home/nathante/Zotero/storage/XI69RCFW/Majchrzak and Malhotra - 2016 - Effect of Knowledge-Sharing Trajectories on Innova.pdf} +} + +@online{mamie_are_2021, + title = {Are {{Anti-Feminist Communities Gateways}} to the {{Far Right}}? {{Evidence}} from {{Reddit}} and {{YouTube}}}, + shorttitle = {Are {{Anti-Feminist Communities Gateways}} to the {{Far Right}}?}, + author = {Mamié, Robin and Ribeiro, Manoel Horta and West, Robert}, + date = {2021-02-25}, + eprint = {2102.12837}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Researchers have suggested that "the Manosphere," a conglomerate of men-centered online communities, may serve as a gateway to far right movements. In that context, this paper quantitatively studies the migratory patterns between a variety of groups within the Manosphere and the Alt-right, a loosely connected far right movement that has been particularly active in mainstream social networks. Our analysis leverages over 300 million comments spread through Reddit (in 115 subreddits) and YouTube (in 526 channels) to investigate whether the audiences of channels and subreddits associated with these communities have converged between 2006 and 2018. In addition to subreddits related to the communities of interest, we also collect data on counterparts: other groups of users which we use for comparison (e.g., for YouTube we use a set of media channels). Besides measuring the similarity in the commenting user bases of these communities, we perform a migration study, calculating to which extent users in the Manosphere gradually engage with Alt-right content. Our results suggest that there is a large overlap between the user bases of the Alt-right and of the Manosphere and that members of the Manosphere have a bigger chance to engage with far right content than carefully chosen counterparts. However, our analysis also shows that migration and user base overlap varies substantially across different platforms and within the Manosphere. Members of some communities (e.g., Men's Rights Activists) gradually engage with the Alt-right significantly more than counterparts on both Reddit and YouTube, whereas for other communities, this engagement happens mostly on Reddit (e.g., Pick Up Artists). Overall, our work paints a nuanced picture of the pipeline between the Manosphere and the Alt-right, which may inform platforms' policies and moderation decisions regarding these communities.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computers and Society}, + file = {/home/nathante/Zotero/storage/33R8MJF4/Mamié et al. - 2021 - Are Anti-Feminist Communities Gateways to the Far Right.pdf;/home/nathante/Zotero/storage/N8VBLTAY/2102.html} +} + +@article{marwick_i_2011, + ids = {marwick_i_2011-1}, + title = {I Tweet Honestly, {{I}} Tweet Passionately: {{Twitter}} Users, Context Collapse, and the Imagined Audience}, + shorttitle = {I Tweet Honestly, {{I}} Tweet Passionately}, + author = {Marwick, A. E. and {boyd}, danah}, + date = {2011-02-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {13}, + number = {1}, + pages = {114--133}, + issn = {1461-4448}, + abstract = {Social media technologies collapse multiple audiences into single contexts, making it difficult for people to use the same techniques online that they do to handle multiplicity in face-to-face conversation. This article investigates how content producers navigate ‘imagined audiences’ on Twitter. We talked with participants who have different types of followings to understand their techniques, including targeting different audiences, concealing subjects, and maintaining authenticity. Some techniques of audience management resemble the practices of ‘micro-celebrity’ and personal branding, both strategic self-commodification. Our model of the networked audience assumes a many-to-many communication through which individuals conceptualize an imagined audience evoked through their tweets.}, + langid = {english}, + keywords = {imagined audiences,qualitative,SNS}, + file = {/home/nathante/Zotero/storage/GHXUFS86/Marwick and boyd - 2011 - I tweet honestly, I tweet passionately Twitter us.pdf} +} + +@article{massanari_gamergate_2017, + title = {\#{{Gamergate}} and {{The Fappening}}: {{How Reddit}}’s Algorithm, Governance, and Culture Support Toxic Technocultures}, + shorttitle = {\#{{Gamergate}} and {{The Fappening}}}, + author = {Massanari, Adrienne}, + date = {2017-03-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {3}, + pages = {329--346}, + issn = {1461-4448}, + abstract = {This article considers how the social-news and community site Reddit.com has become a hub for anti-feminist activism. Examining two recent cases of what are defined as “toxic technocultures” (\#Gamergate and The Fappening), this work describes how Reddit’s design, algorithm, and platform politics implicitly support these kinds of cultures. In particular, this piece focuses on the ways in which Reddit’s karma point system, aggregation of material across subreddits, ease of subreddit and user account creation, governance structure, and policies around offensive content serve to provide fertile ground for anti-feminist and misogynistic activism. The ways in which these events and communities reflect certain problematic aspects of geek masculinity are also considered. This research is informed by the results of a long-term participant-observation and ethnographic study into Reddit’s culture and community and is grounded in actor-network theory.}, + langid = {english}, + keywords = {Algorithms,design,Gamergate,gender,online communities,online harassment,platform politics,Reddit,The Fappening,toxic technocultures}, + file = {/home/nathante/Zotero/storage/D5W5JKQU/Massanari - 2017 - #Gamergate and The Fappening How Reddit’s algorit.pdf;/home/nathante/Zotero/storage/NGCFX9JB/Massanari - 2017 - #Gamergate and The Fappening How Reddit’s algorit.pdf} +} + +@article{matias_civic_2019, + title = {The Civic Labor of Volunteer Moderators Online}, + author = {Matias, J. Nathan}, + date = {2019-04}, + journaltitle = {Social Media + Society}, + volume = {5}, + number = {2}, + pages = {1--12}, + issn = {2056-3051, 2056-3051}, + abstract = {Volunteer moderators create, support, and control public discourse for millions of people online, even as moderators’ uncompensated labor upholds platform funding models. What is the meaning of this work and who is it for? In this article, I examine the meanings of volunteer moderation on the social news platform reddit. Scholarship on volunteer moderation has viewed this work separately as digital labor for platforms, civic participation in communities, or oligarchy among other moderators. In mixed-methods research sampled from over 52,000 subreddit communities and in over a dozen interviews, I show how moderators adopt all of these frames as they develop and re-develop everyday meanings of moderation—facing the platform, their communities, and other moderators alike. I also show how this civic notion of digital labor brings clarity to a strike by moderators in July 2015. Volunteer governance remains a common approach to managing social relations, conflict, and civil liberties online. Our ability to see how communities negotiate the meaning of moderation will shape our capacity to address digital governance as a society.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/Q8BACUUZ/Matias - 2019 - The Civic Labor of Volunteer Moderators Online.pdf} +} + +@article{mcmillan_sense_1986, + title = {Sense of Community: {{A}} Definition and Theory}, + shorttitle = {Sense of Community}, + author = {McMillan, David W. and Chavis, David M.}, + date = {1986}, + journaltitle = {Journal of Community Psychology}, + volume = {14}, + number = {1}, + pages = {6--23}, + publisher = {{John Wiley \& Sons}}, + location = {{US}}, + issn = {1520-6629(Electronic),0090-4392(Print)}, + abstract = {Proposes that a sense of community is a feeling that members have of belonging, a feeling that members matter to one another and to the group, and a shared faith that members' needs will be met through commitment to be together. The authors apply the term community equally to territorial communities (e.g., neighborhoods) and to relational communities (e.g., professional, spiritual). The proposed definition of a sense of community has 4 elements: membership, influence, integration and fulfillment of needs, and shared emotional connection. Subelements of these elements of a sense of community and how they work dynamically together to create and maintain it are described. Hypothetical examples from a university, neighborhood, youth gang, and kibbutz are presented to illustrate the interworkings of the elements of a sense of community. It is suggested that this understanding of sense of community has implications for community treatment programs for the mentally retarded and mentally ill. Where "community" means more than residency outside of an institution, strategies can be introduced to allow the therapeutic benefits of community to be developed within group homes and to provide for better integration with communities surrounding such facilities. (90 ref) (PsycINFO Database Record (c) 2016 APA, all rights reserved)}, + keywords = {Communities,Community Psychology,Group Dynamics,Sense of Community,Theories}, + file = {/home/nathante/Zotero/storage/D5ECP4GI/1987-03834-001.html} +} + +@article{mcpherson_birds_2001, + title = {Birds of a {{Feather}}: {{Homophily}} in {{Social Networks}}}, + shorttitle = {Birds of a {{Feather}}}, + author = {McPherson, Miller and Smith-Lovin, Lynn and Cook, James M}, + date = {2001-08-01}, + journaltitle = {Annual Review of Sociology}, + shortjournal = {Annu. Rev. Sociol.}, + volume = {27}, + number = {1}, + pages = {415--444}, + publisher = {{Annual Reviews}}, + issn = {0360-0572}, + abstract = {Similarity breeds connection. This principle—the homophily principle—structures network ties of every type, including marriage, friendship, work, advice, support, information transfer, exchange, comembership, and other types of relationship. The result is that people's personal networks are homogeneous with regard to many sociodemographic, behavioral, and intrapersonal characteristics. Homophily limits people's social worlds in a way that has powerful implications for the information they receive, the attitudes they form, and the interactions they experience. Homophily in race and ethnicity creates the strongest divides in our personal environments, with age, religion, education, occupation, and gender following in roughly that order. Geographic propinquity, families, organizations, and isomorphic positions in social systems all create contexts in which homophilous relations form. Ties between nonsimilar individuals also dissolve at a higher rate, which sets the stage for the formation of niches (localized positions) within social space. We argue for more research on: (a) the basic ecological processes that link organizations, associations, cultural communities, social movements, and many other social forms; (b) the impact of multiplex ties on the patterns of homophily; and (c) the dynamics of network change over time through which networks and other social entities co-evolve.}, + file = {/home/nathante/Zotero/storage/DWSDWJ8E/McPherson et al. - 2001 - Birds of a Feather Homophily in Social Networks.pdf;/home/nathante/Zotero/storage/GFG4ZCE8/annurev.soc.27.1.html} +} + +@article{mcpherson_ecology_1983, + title = {An Ecology of Affiliation}, + author = {McPherson, J. Miller}, + date = {1983}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {48}, + number = {4}, + eprint = {2117719}, + eprinttype = {jstor}, + pages = {519--532}, + issn = {0003-1224}, + abstract = {This paper develops an ecological model of the competition of social organizations for members. The concept of the ecological niche is quantified explicitly in a way which ties together geography, time, and the social composition of organizations. A differential equation model analogous to the Lotka-Volterra competition equations in biology captures the dynamics of the system. This dynamic model is related to the niche concept in a novel way, which produces an easily understood and powerful picture of the static and dynamic structure of the community. This new perspective provides a theoretical link between the aggregate macrostructural theory of Blau (1977a,b) and the microstructural dynamics of organizational demography (Pfeffer, 1983). The model is tested with data on organizations from a midwestern city.}, + file = {/home/nathante/Zotero/storage/WIDCF8XB/McPherson - 1983 - An ecology of affiliation.pdf} +} + +@article{mittell_sites_2009, + title = {Sites of Participation: {{Wiki}} Fandom and the Case of {{Lostpedia}}}, + shorttitle = {Sites of Participation}, + author = {Mittell, Jason}, + date = {2009-07-09}, + journaltitle = {Transformative Works and Cultures}, + shortjournal = {TWC}, + volume = {3}, + issn = {1941-2258}, + abstract = {This essay explores the award-winning fan site Lostpedia to examine how the wiki platform enables fan engagement, structures participation, and distinguishes between various forms of content, including canon, fanon, and parody. I write as a participant-observer, with extensive experience as a Lostpedia reader and editor. The article uses the "digital breadcrumbs" of wikis to trace the history of fan creativity, participation, game play, and debates within a shared site of community fan engagement. Using the Lostpedia site as a case study of fan praxis, the article highlights how issues like competing fandoms, copyright, and modes of discourse become manifest via the user-generated content of a fan wiki.} +} + +@inproceedings{morris_comparison_2010, + title = {A {{Comparison}} of {{Information Seeking Using Search Engines}} and {{Social Networks}}}, + booktitle = {Fourth {{International AAAI Conference}} on {{Weblogs}} and {{Social Media}}}, + author = {Morris, Meredith Ringel and Teevan, Jaime and Panovich, Katrina}, + date = {2010-05-16}, + abstract = {The Web has become an important information repository; often it is the first source a person turns to with an informa-tion need. One common way to search the Web is with a search engine. However, it is not always easy for people to find what they are looking for with keyword search, and at times the desired information may not be readily available online. An alternative, facilitated by the rise of social media, is to pose a question to one‟s online social network. In this paper, we explore the pros and cons of using a social net-working tool to fill an information need, as compared with a search engine. We describe a study in which 12 participants searched the Web while simultaneously posing a question on the same topic to their social network, and we compare the results they found by each method.}, + eventtitle = {Fourth {{International AAAI Conference}} on {{Weblogs}} and {{Social Media}}}, + langid = {english}, + file = {/home/nathante/Zotero/storage/MS2N5Z3X/Morris et al_2010_A Comparison of Information Seeking Using Search Engines and Social Networks.pdf;/home/nathante/Zotero/storage/D3C4PIU9/1518.html} +} + +@incollection{morris_what_2010, + title = {What Do People Ask Their Social Networks, and Why? A Survey Study of Status Message Q\&a Behavior}, + shorttitle = {What Do People Ask Their Social Networks, and Why?}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Morris, Meredith Ringel and Teevan, Jaime and Panovich, Katrina}, + date = {2010-04-10}, + pages = {1739--1748}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {People often turn to their friends, families, and colleagues when they have questions. The recent, rapid rise of online social networking tools has made doing this on a large scale easy and efficient. In this paper we explore the phenomenon of using social network status messages to ask questions. We conducted a survey of 624 people, asking them to share the questions they have asked and answered of their online social networks. We present detailed data on the frequency of this type of question asking, the types of questions asked, and respondents' motivations for asking their social networks rather than using more traditional search tools like Web search engines. We report on the perceived speed and quality of the answers received, as well as what motivates people to respond to questions seen in their friends' status messages. We then discuss the implications of our findings for the design of next-generation search tools.}, + isbn = {978-1-60558-929-9}, + keywords = {q&a,social networks,social search,web search}, + file = {/home/nathante/Zotero/storage/4N6C2AYW/Morris et al_2010_What do people ask their social networks, and why.pdf} +} + +@article{muhtaseb_arab_2008, + title = {Arab {{Americans}}’ {{Motives}} for {{Using}} the {{Internet}} as a {{Functional Media Alternative}} and {{Their Perceptions}} of {{U}}.{{S}}. {{Public Opinion}}}, + author = {Muhtaseb, Ahlam and Frey, Lawrence R.}, + date = {2008-04-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {13}, + number = {3}, + pages = {618--657}, + issn = {1083-6101}, + abstract = {This exploratory study employed uses and gratifications theory to understand Arab Americans’ salient motives for using the internet and whether the internet served as a functional alternative to other media to satisfy Arab Americans’ information-seeking and interpersonal needs. Spiral of silence theory also was used to investigate the relationship between Arab Americans’ perceptions of U.S. public opinion and their motives for using the internet. Results from an online questionnaire survey (N = 124) indicated that information seeking was the most salient motive for using the internet and that the internet did serve as a functional alternative, with a significant percentage of the internet sources used being foreign based. There was, however, no relationship between Arab Americans’ perceptions of U.S. public opinion and their motives for using the internet. The findings are discussed with respect to the use of the internet by members of this marginalized cultural group.}, + file = {/home/nathante/Zotero/storage/5PD4EGRG/Muhtaseb and Frey - 2008 - Arab Americans’ Motives for Using the Internet as .pdf;/home/nathante/Zotero/storage/WKH4PJ7L/4582964.html} +} + +@article{nissenbaum_internet_2017, + title = {Internet Memes as Contested Cultural Capital: {{The}} Case of 4chan’s /b/ Board}, + shorttitle = {Internet Memes as Contested Cultural Capital}, + author = {Nissenbaum, Asaf and Shifman, Limor}, + date = {2017-04-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {4}, + pages = {483--501}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This article explores the workings of memes as cultural capital in web-based communities. A grounded analysis of 4chan’s /b/ board reveals three main formulations of memes as capital, delineating them as subcultural knowledge, unstable equilibriums, and discursive weapons. While the first formulation follows well-documented notions about subcultural knowledge as a basis for boundary work, the latter two focus on the dualities intrinsic to Internet memes. The contradiction between following conventions and supplying innovative content leads to memes’ configuration as unstable equilibriums, triggering constant conflict about their “correct” use. Paradoxically, this struggle highlights collective identity, as it keeps shared culture at the center of discussion. Similarly, when memes are used as jabs at the most intense points of arguments, they function simultaneously as signifiers of superior authoritative status and as reminders of common affinity. Thus, the dualities underpinning memes’ structure lead to their performance as contested cultural capital.}, + langid = {english}, + keywords = {4chan,cultural capital,digital culture,Internet memes,web-based communities}, + file = {/home/nathante/Zotero/storage/5D4MWNNV/Nissenbaum and Shifman - 2017 - Internet memes as contested cultural capital The .pdf} +} + +@online{noauthor_crowd_nodate, + title = {Crowd {{Size}}, {{Diversity}} and {{Performance}} | {{Proceedings}} of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}} +} + +@book{north_institutions_1990-1, + title = {Institutions, {{Institutional Change}} and {{Economic Performance}}}, + author = {North, Douglass C.}, + date = {1990}, + series = {Political {{Economy}} of {{Institutions}} and {{Decisions}}}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge}}, + abstract = {Continuing his groundbreaking analysis of economic structures, Douglass North develops an analytical framework for explaining the ways in which institutions and institutional change affect the performance of economies, both at a given time and over time. Institutions exist, he argues, due to the uncertainties involved in human interaction; they are the constraints devised to structure that interaction. Yet, institutions vary widely in their consequences for economic performance; some economies develop institutions that produce growth and development, while others develop institutions that produce stagnation. North first explores the nature of institutions and explains the role of transaction and production costs in their development. The second part of the book deals with institutional change. Institutions create the incentive structure in an economy, and organisations will be created to take advantage of the opportunities provided within a given institutional framework. North argues that the kinds of skills and knowledge fostered by the structure of an economy will shape the direction of change and gradually alter the institutional framework. He then explains how institutional development may lead to a path-dependent pattern of development. In the final part of the book, North explains the implications of this analysis for economic theory and economic history. He indicates how institutional analysis must be incorporated into neo-classical theory and explores the potential for the construction of a dynamic theory of long-term economic change. Douglass C. North is Director of the Center of Political Economy and Professor of Economics and History at Washington University in St. Louis. He is a past president of the Economic History Association and Western Economics Association and a Fellow, American Academy of Arts and Sciences. He has written over sixty articles for a variety of journals and is the author of The Rise of the Western World: A New Economic History (CUP, 1973, with R.P. Thomas) and Structure and Change in Economic History (Norton, 1981). Professor North is included in Great Economists Since Keynes edited by M. Blaug (CUP, 1988 paperback ed.)}, + isbn = {978-0-521-39416-1} +} + +@inproceedings{oday_orienteering_1993, + title = {Orienteering in an Information Landscape: How Information Seekers Get from Here to There}, + shorttitle = {Orienteering in an Information Landscape}, + booktitle = {Proceedings of the {{SIGCHI}} Conference on {{Human}} Factors in Computing Systems - {{CHI}} '93}, + author = {O'Day, Vicki L. and Jeffries, Robin}, + date = {1993}, + pages = {438--445}, + publisher = {{ACM Press}}, + location = {{Amsterdam, The Netherlands}}, + eventtitle = {The {{SIGCHI}} Conference}, + isbn = {978-0-89791-575-5}, + langid = {english} +} + +@article{oliver_paradox_1988, + title = {The {{Paradox}} of {{Group Size}} in {{Collective Action}}: {{A Theory}} of the {{Critical Mass}}. {{II}}.}, + shorttitle = {The {{Paradox}} of {{Group Size}} in {{Collective Action}}}, + author = {Oliver, Pamela E. and Marwell, Gerald}, + date = {1988}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {53}, + number = {1}, + eprint = {2095728}, + eprinttype = {jstor}, + pages = {1--8}, + issn = {0003-1224}, + abstract = {Many sociologists incorrectly believe that larger groups are less likely to support collective action than smaller ones. The effect of group size, in fact, depends on costs. If the costs of collective goods rise with the number who share in them, larger groups act less frequently than smaller ones. If the costs vary little with group size, larger groups should exhibit more collective action than smaller ones because larger groups have more resources and are more likely to have a critical mass of highly interested and resourceful actors. The positive effects of group size increase with group heterogeneity and nonrandom social ties. Paradoxically, when groups are heterogeneous, fewer contributors may be needed to provide a good to larger groups, making collective action less complex and less expensive.}, + file = {/home/nathante/Zotero/storage/KDKQCV4I/Oliver and Marwell - 1988 - The Paradox of Group Size in Collective Action A .pdf} +} + +@book{olson_logic_1965, + title = {The Logic of Collective Action: {{Public}} Goods and the Theory of Groups}, + shorttitle = {The Logic of Collective Action}, + author = {Olson, Mancur}, + date = {1965}, + publisher = {{Harvard University Press}}, + location = {{Cambridge, MA}}, + langid = {english}, + keywords = {Business & Economics / Economics / General}, + file = {/home/nathante/Zotero/storage/6D295U4U/Olson - 1965 - The logic of collective action Public goods and t.pdf} +} + +@article{oreilly_work_1989, + title = {Work {{Group Demography}}, {{Social Integration}}, and {{Turnover}}}, + author = {O'Reilly, Charles A. and Caldwell, David F. and Barnett, William P.}, + date = {1989}, + journaltitle = {Administrative Science Quarterly}, + volume = {34}, + number = {1}, + eprint = {2392984}, + eprinttype = {jstor}, + pages = {21--37}, + publisher = {{[Sage Publications, Inc., Johnson Graduate School of Management, Cornell University]}}, + issn = {0001-8392}, + abstract = {Using 20 actual work units with 79 respondents, this study explores the relationships among group demography, social integration of the group, and individual turnover. Results suggest that heterogeneity in group tenure is associated with lower levels of group social integration which, in turn, is negatively associated with individual turnover. Models of these effects using individual-level integration measures are not significant. Further, the results suggest that it is the more distant group members who are more likely to leave. Both individual-level and group-level age demography directly affect turnover and are not moderated by social integration. The findings suggest a process by which group demography affects outcomes and support the usefulness of organizational demography for understanding group and individual functioning.} +} + +@inproceedings{orlikowski_learning_1992, + title = {Learning from Notes: {{Organizational}} Issues in Groupware Implementation}, + shorttitle = {Learning from {{Notes}}}, + booktitle = {Proceedings of the 1992 {{ACM Conference}} on {{Computer-supported Cooperative Work}}}, + author = {Orlikowski, Wanda J.}, + date = {1992}, + series = {{{CSCW}} '92}, + pages = {362--369}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {This paper explores the introduction of groupware into an organization to understand the changes in work practices and social interaction facilitated by the technology. The results suggest that people’s mental models and organizations’ structure and culture significantly influence how groupware is implemented and used. Specifically, in the absence of mental models that stressed its collaborative nature, groupwae was interpreted in terms of familiar personal, stand-alone technologies such as spreadsheets. Further, the culture and structure provided few incentives or norms for cooperating or sharing expertise, hence the groupware on its own was unlikely to engender collaboration. Recognizing the central influence of these cognitive and organizational elements is critical to developers, researchers, and practitioners of groupware.}, + isbn = {978-0-89791-542-7}, + venue = {Toronto, Ontario, Canada}, + keywords = {groupware,implementation,Lotus Notes,organizational factors,Technological Frames}, + file = {/home/nathante/Zotero/storage/VAHU9XE7/Orlikowski - 1992 - Learning from Notes Organizational Issues in Grou.pdf} +} + +@article{park_human_1936, + title = {Human {{Ecology}}}, + author = {Park, Robert Ezra}, + date = {1936-07-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {42}, + number = {1}, + pages = {1--15}, + issn = {0002-9602}, + abstract = {Human ecology is an attempt to apply to the interrelations of human beings a type of analysis previously applied to the interrelations of plants and animals. The term "symbiosis" describes a type of social relationship that is biotic rather than cultural. This biotic social order comes into existence and is maintained by competition. In plant and animal societies competition is unrestricted by an institutional or moral order. Human society is a consequence and effect of this limitation of the symbiotic social order by the cultural. Different social sciences are concerned with the forms which this limitation of the natural or ecological social order assumes on (1) the economic, (2) the political, and (3) the moral level.}, + file = {/home/nathante/Zotero/storage/CBVGR8RU/Park - 1936 - Human Ecology.pdf;/home/nathante/Zotero/storage/UKMY6VUE/217327.html} +} + +@article{pfeil_cultural_2006, + ids = {pfeil_cultural_2006-1}, + title = {Cultural Differences in Collaborative Authoring of Wikipedia}, + author = {Pfeil, Ulrike and Zaphiris, Panayiotis and Ang, Chee Siang}, + date = {2006}, + journaltitle = {Journal of Computer-Mediated Communication}, + volume = {12}, + number = {1}, + pages = {88--113}, + publisher = {{Oxford Academic}}, + issn = {1083-6101}, + abstract = {This article explores the relationship between national culture and computer-mediated communication (CMC) in Wikipedia. The articles on the topic game from the French, German, Japanese, and Dutch Wikipedia websites were studied using content analysis methods. Correlations were investigated between patterns of contributions and the four dimensions of cultural influences proposed by Hofstede (Power Distance, Collectivism versus Individualism, Femininity versus Masculinity, and Uncertainty Avoidance). The analysis revealed cultural differences in the style of contributions across the cultures investigated, some of which are correlated with the dimensions identified by Hofstede. These findings suggest that cultural differences that are observed in the physical world also exist in the virtual world.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/25UVU6KP/Pfeil et al. - 2006 - Cultural Differences in Collaborative Authoring of.pdf;/home/nathante/Zotero/storage/HTBSK98G/Pfeil et al. - 2006 - Cultural differences in collaborative authoring of.pdf;/home/nathante/Zotero/storage/NG42CGVS/4582988.html;/home/nathante/Zotero/storage/NN9FT3QC/4582988.html} +} + +@article{poor_computer_2014, + title = {Computer Game Modders’ Motivations and Sense of Community: {{A}} Mixed-Methods Approach}, + shorttitle = {Computer Game Modders’ Motivations and Sense of Community}, + author = {Poor, Nathaniel}, + date = {2014-12-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {16}, + number = {8}, + pages = {1249--1267}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Computer game modding, from modifying, combines several important issues: digital skills, play, community, making, and remixing. Yet, little academic work has explored the motivations and sense of community that modders have. This study is the first quantitative survey of game modders, and combines quantitative survey data with qualitative interview material. Findings suggest that modders are both old and young, mod more than one game or game series, have a strong sense of community, and enjoy helping others. Many respondents had contributed to other mods or had co-authored mods, and modding communities may function as online collaboratories. Although some research stresses how modders hope to get jobs in the gaming industry, overall the industry was not a motivator for most respondents.}, + langid = {english}, + keywords = {Collaboration,games,modding,motivation,online community}, + file = {/home/nathante/Zotero/storage/SY3IWUL2/Poor - 2014 - Computer game modders’ motivations and sense of co.pdf} +} + +@article{poor_mechanisms_2005, + title = {Mechanisms of an {{Online Public Sphere}}: The {{Website Slashdot}}}, + shorttitle = {Mechanisms of an {{Online Public Sphere}}}, + author = {Poor, Nathaniel}, + date = {2005-01-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {Both the theory of the public sphere and the utopian rhetoric surrounding the Internet have been a focus of scholars for some time. Given the ability of people to connect with others around the globe through the Internet, could the Internet give rise to online public spheres? If so, how would such spaces work? This article proposes that public spheres do exist on the Internet, and details how one functions. The case under study is the website Slashdot (http://slashdot.org), an online community of computer enthusiasts. The article studies the mechanisms, both normative and in code, that are vital to Slashdot's functioning, and shows how they help Slashdot function as a public sphere.}, + issue = {JCMC1028}, + file = {/home/nathante/Zotero/storage/5V4CJ2HJ/4614448.html} +} + +@article{poteete_heterogeneity_2004, + title = {Heterogeneity, {{Group Size}} and {{Collective Action}}: {{The Role}} of {{Institutions}} in {{Forest Management}}}, + shorttitle = {Heterogeneity, {{Group Size}} and {{Collective Action}}}, + author = {Poteete, Amy R. and Ostrom, Elinor}, + date = {2004}, + journaltitle = {Development and Change}, + volume = {35}, + number = {3}, + pages = {435--461}, + issn = {1467-7660}, + abstract = {Collective action for sustainable management among resource-dependent populations has important policy implications. Despite considerable progress in identifying factors that affect the prospects for collective action, no consensus exists about the role played by heterogeneity and size of group. The debate continues in part because of a lack of uniform conceptualization of these factors, the existence of non-linear relationships, and the mediating role played by institutions. This article draws on research by scholars in the International Forestry Resources and Institutions (IFRI) research network which demonstrates that some forms of heterogeneity do not negatively affect some forms of collective action. More importantly, IFRI research draws out the interrelations among group size, heterogeneity, and institutions. Institutions can affect the level of heterogeneity or compensate for it. Group size appears to have a non-linear relationship to at least some forms of collective action. Moreover, group size may be as much an indicator of institutional success as a precondition for such success.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/MVD6QER6/Poteete and Ostrom - 2004 - Heterogeneity, Group Size and Collective Action T.pdf} +} + +@article{ribeiro_platform_2021, + title = {Do {{Platform Migrations Compromise Content Moderation}}? {{Evidence}} from r/{{The}}\_{{Donald}} and r/{{Incels}}}, + shorttitle = {Do {{Platform Migrations Compromise Content Moderation}}?}, + author = {Ribeiro, Manoel Horta and Jhaver, Shagun and Zannettou, Savvas and Blackburn, Jeremy and Stringhini, Gianluca and De Cristofaro, Emiliano and West, Robert}, + date = {2021-10-13}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {5}, + pages = {1--24}, + issn = {2573-0142}, + issue = {CSCW2}, + langid = {english}, + file = {/home/nathante/Zotero/storage/DUI35QYA/Horta Ribeiro et al. - 2021 - Do Platform Migrations Compromise Content Moderati.pdf} +} + +@article{ridgeway_status_1982, + title = {Status in {{Groups}}: {{The Importance}} of {{Motivation}}}, + shorttitle = {Status in {{Groups}}}, + author = {Ridgeway, Cecilia L.}, + date = {1982}, + journaltitle = {American Sociological Review}, + volume = {47}, + number = {1}, + eprint = {2095043}, + eprinttype = {jstor}, + pages = {76--88}, + issn = {0003-1224}, + abstract = {This paper presents evidence that members' perceived motivation towards the group is an important determinant of the influence and status they attain in task-oriented groups. Following Meeker and Weitzel-O'Neill (1977) and Ridgeway (1978), it was suggested that people who enter a group with low external status characteristics (e.g., women in mixed sex groups, blacks in interracial groups) can use the communication of group-oriented motivation in combination with reasonably competent task contributions to overcome the fundamental inequality ("interaction disability") they would normally face, and achieve reasonably high levels of influence in the group. Results of an experiment using mixed and same sex groups showed that while group-oriented members are generally more influential than self-oriented ones, as predicted, the size of motivation's effect is dependent upon the member's external status characteristics. Females in male groups (low external status members) achieved fairly high influence and status when they appeared group-oriented, but very low status when self-oriented. As expected males in a female group (high external status members) achieved high influence regardless of their motivation.}, + file = {/home/nathante/Zotero/storage/F5GJIJMB/Ridgeway-1982-Status_in_groups.pdf} +} + +@book{ridgeway_status_2019, + title = {Status: Why Is It Everywhere? Why Does It Matter?}, + shorttitle = {Status}, + author = {Ridgeway, Cecilia L}, + date = {2019}, + abstract = {"Status is ubiquitous in modern life, yet our understanding of its role as a basic driver of inequality is surprisingly limited. In Status, sociologist and social psychologist Cecilia Ridgeway examines how this ancient and universal form of inequality influences today's ostensibly meritocratic institutions and why it matters. Ridgeway illuminates the complex ways in which status arises when people work together towards common goals, such as in classroom discussions, family decisions, or workplace deliberations. Ridgeway's research on status has important implications for our understanding of social inequality. Distinct from power or wealth, status is prized because it provides affirmation from others and affords access to valuable resources. Ridgeway demonstrates how the conferral of status inevitably leads to differing life outcomes for individuals, with impacts on pay, wealth creation, and health and wellbeing. Status beliefs are widely held views about who is better in society than others in terms of esteem, wealth, or competence. These beliefs ultimately confer advantages which can exacerbate social inequality. Ridgeway notes that status advantages based on race, gender, and class, such as the belief that white men are more competent than others because of their race and gender, have the greatest consequences for inequality by affording greater social and economic opportunities. Ridgeway argues that status beliefs make lower status groups less likely to challenge the status quo and greatly enhance higher status groups' ability to maintain their advantages in resources and access to positions of power. She illustrates how many lower status people, when given a baseline level of dignity and respect - being seen, for example, as poor but hardworking - will accept their lower status. She also shows that people remain willfully blind to status beliefs and their effects because recognizing them can lead to emotional discomfort. Acknowledging the insidious role of status in our lives would require many higher-status individuals to accept that they may not have succeeded based on their own merit; and many lower-status individuals would have to acknowledge that they may have been discriminated against. While Ridgeway notes the profound impact of status on society, she suggests that social inequality is not an inevitable consequence of our status beliefs. She shows how status beliefs can be undermined - as when we reject the idea that all racial and gender traits are fixed at birth, thus disrupting the idea that women and people of color are less competent than their male and white counterparts. Ridgeway both notes the profound impact of status on social inequality and charts a way forward that may allow it to have a less detrimental impact on our lives"--}, + isbn = {978-1-61044-889-5}, + langid = {english}, + annotation = {OCLC: 1104214327}, + file = {/home/nathante/Zotero/storage/ZNCJF4F3/Ridgeway_2019_Status.pdf} +} + +@article{ridings_antecedents_2002, + title = {Some Antecedents and Effects of Trust in Virtual Communities}, + author = {Ridings, Catherine M and Gefen, David and Arinze, Bay}, + date = {2002-12-01}, + journaltitle = {The Journal of Strategic Information Systems}, + shortjournal = {The Journal of Strategic Information Systems}, + volume = {11}, + number = {3}, + pages = {271--295}, + issn = {0963-8687}, + abstract = {This study explores several downstream effects of trust in virtual communities and the antecedents of trust in this unique type of environment. The data, applying an existing scale to measure two dimensions of trust (ability and benevolence/integrity), show that trust had a downstream effect on members' intentions to both give information and get information through the virtual community. Both these apparent dimensions of trust were increased through perceived responsive relationships in the virtual community, by a general disposition to trust, and by the belief that others confide personal information.}, + langid = {english}, + keywords = {Perceived responsiveness,Trust,Virtual communities}, + file = {/home/nathante/Zotero/storage/KLVEHLMR/S0963868702000215.html} +} + +@article{ridings_virtual_2004, + ids = {ridings_virtual_2004-1}, + title = {Virtual {{Community Attraction}}: {{Why People Hang}} out {{Online}}}, + shorttitle = {Virtual {{Community Attraction}}}, + author = {Ridings, Catherine M. and Gefen, David}, + date = {2004-11-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {10}, + number = {1}, + abstract = {Abstract. Understanding the attraction of virtual communities is crucial to organizations that want to tap into their enormous information potential. Existing}, + langid = {english}, + file = {/home/nathante/Zotero/storage/D64A3U6W/4614455.html;/home/nathante/Zotero/storage/NFKKWKZN/4614455.html} +} + +@incollection{robert_crowd_2015, + title = {Crowd {{Size}}, {{Diversity}} and {{Performance}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Robert, Lionel and Romero, Daniel M.}, + date = {2015-04-18}, + pages = {1379--1382}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Crowds are increasingly being adopted to solve complex problems. Size and diversity are two key characteristics of crowds; however their relationship to performance is often paradoxical. To better understand the effects of crowd size and diversity on crowd performance we conducted a study on the quality of 4,317 articles in the WikiProject Film community. The results of our study suggest that crowd size leads to better performance when crowds are more diverse. However, there is a break-even point -- smaller, less diverse crowds can outperform more diverse crowds of similar size. Our results offer new insights into the effects of size and diversity on the performance of crowds.}, + isbn = {978-1-4503-3145-6}, + keywords = {diversity,performance,team size,wikipedia}, + file = {/home/nathante/Zotero/storage/KVVXJ4WP/Robert and Romero - 2015 - Crowd Size, Diversity and Performance.pdf} +} + +@article{ruef_structure_2003, + title = {The {{Structure}} of {{Founding Teams}}: {{Homophily}}, {{Strong Ties}}, and {{Isolation}} among {{U}}.{{S}}. {{Entrepreneurs}}}, + shorttitle = {The {{Structure}} of {{Founding Teams}}}, + author = {Ruef, Martin and Aldrich, Howard E. and Carter, Nancy M.}, + date = {2003}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {68}, + number = {2}, + eprint = {1519766}, + eprinttype = {jstor}, + pages = {195--222}, + issn = {0003-1224}, + abstract = {The mechanisms governing the composition of formal social groups (e.g., task groups, organizational founding teams) remain poorly understood, owing to (1) a lack of representative sampling from groups found in the general population, (2) a "success" bias among researchers that leads them to consider only those groups that actually emerge and survive, and (3) a restrictive focus on some theorized mechanisms of group composition (e.g., homophily) to the exclusion of others. These shortcomings are addressed by analyzing a unique, representative data set of organizational founding teams sampled from the U.S. population. Rather than simply considering the properties of those founding teams that are empirically observed, a novel quantitative methodology generates the distribution of all possible teams, based on combinations of individual and relational characteristics. This methodology permits the exploration of five mechanisms of group composition--those based on homophily, functionality, status expectations, network constraint, and ecological constraint. Findings suggest that homophily and network constraints based on strong ties have the most pronounced effect on group composition. Social isolation (i.e., exclusion from a group) is more likely to occur as a result of ecological constraints on the availability of similar alters in a locality than as a result of status-varying membership choices.} +} + +@inproceedings{rusak_properties_2014, + title = {The Properties of {{Twitter}} Network Communications among Teenagers}, + booktitle = {Proceedings of the Companion Publication of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Rusak, Gili}, + date = {2014-02-15}, + series = {{{CSCW Companion}} '14}, + pages = {233--236}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {We study, quantitatively, for the first time, the traits of Twitter teenager networks. The results are compared with general population users, and show that teenagers behave uniquely. Teens tend to follow more users and increase friendships over time. They tend to friend individuals online who they already know offline. Teenagers also use Twitter as a news media and form supportive and dense communities. These results shed new light on the attributes of teenage communities. We can then utilize these ideas to find solutions to emerging problems involving the massive use of social media. For example, Twitter can be used as a positive tool for the prevention of bad habits among teens.}, + isbn = {978-1-4503-2541-7}, + keywords = {social networks,teenagers,twitter}, + file = {/home/nathante/Zotero/storage/S9RPN7JX/Rusak - 2014 - The properties of Twitter network communications a.pdf} +} + +@article{schoener_resource_1974, + title = {Resource {{Partitioning}} in {{Ecological Communities}}}, + author = {Schoener, Thomas W.}, + date = {1974}, + journaltitle = {Science}, + volume = {185}, + number = {4145}, + eprint = {1738612}, + eprinttype = {jstor}, + pages = {27--39}, + issn = {0036-8075}, + file = {/home/nathante/Zotero/storage/R86IDGJN/1738612.pdf;/home/nathante/Zotero/storage/U4UCJ2BT/Schoener - 1974 - Resource Partitioning in Ecological Communities.pdf} +} + +@article{seering_metaphors_2020, + ids = {seering_metaphors_2020-1}, + title = {Metaphors in Moderation}, + author = {Seering, Joseph and Kaufman, Geoff and Chancellor, Stevie}, + date = {2020-10-20}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {1461444820964968}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Volunteer content moderators are essential to the social media ecosystem through the roles they play in managing and supporting online social spaces. Recent work has described moderation primarily as a functional process of actions that moderators take, such as making rules, removing content, and banning users. However, the nuanced ways in which volunteer moderators envision their roles within their communities remain understudied. Informed by insights gained from 79 interviews with volunteer moderators from three platforms, we present a conceptual map of the territory of social roles in volunteer moderation, which identifies five categories with 22 metaphorical variants that reveal moderators’ implicit values and the heuristics that help them make decisions. These metaphors more clearly enunciate the roles volunteer moderators play in the broader social media content moderation apparatus and can drive purposeful engagement with volunteer moderators to better support the ways they guide and shape their communities.}, + langid = {english}, + keywords = {Facebook,governance,metaphors,moderation,online communities,platforms,Reddit,Twitch}, + file = {/home/nathante/Zotero/storage/6NR5XPIH/Seering et al. - 2020 - Metaphors in moderation.pdf;/home/nathante/Zotero/storage/FY8YDBFH/Seering et al. - 2020 - Metaphors in moderation.pdf} +} + +@article{seering_moderator_2019, + title = {Moderator Engagement and Community Development in the Age of Algorithms}, + author = {Seering, Joseph and Wang, Tony and Yoon, Jina and Kaufman, Geoff}, + date = {2019-01-11}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {1461444818821316}, + issn = {1461-4448}, + abstract = {Online communities provide a forum for rich social interaction and identity development for billions of Internet users worldwide. In order to manage these communities, platform owners have increasingly turned to commercial content moderation, which includes both the use of moderation algorithms and the employment of professional moderators, rather than user-driven moderation, to detect and respond to anti-normative behaviors such as harassment and spread of offensive content. We present findings from semi-structured interviews with 56 volunteer moderators of online communities across three platforms (Twitch, Reddit, and Facebook), from which we derived a generalized model categorizing the ways moderators engage with their communities and explaining how these communities develop as a result. This model contains three processes: being and becoming a moderator; moderation tasks, actions, and responses; and rules and community development. In this work, we describe how moderators contribute to the development of meaningful communities, both with and without algorithmic support.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/U8QLP3DK/Seering et al. - 2019 - Moderator engagement and community development in .pdf} +} + +@inproceedings{sengupta_what_2019, + title = {What Are {{Academic Subreddits Talking About}}? {{A Comparative Analysis}} of r/Academia and r/Gradschool}, + shorttitle = {What Are {{Academic Subreddits Talking About}}?}, + booktitle = {Conference {{Companion Publication}} of the 2019 on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Sengupta, Subhasree}, + date = {2019-11-09}, + series = {{{CSCW}} '19}, + pages = {357--361}, + publisher = {{Association for Computing Machinery}}, + location = {{Austin, TX, USA}}, + abstract = {Graduate school and academia can often be challenging and hard to navigate. This work explores how people are using Reddit to reach out to others in academic subreddits to talk about issues one might face in their academic journey. We also explore how such discussion differs between subreddits by comparing two popularly used academic subreddits: r/gradschool and r/academia. For each subreddit, we investigated 300 posts and 500 comments. Using topic modelling, we identify and distinguish the main emergent types of posts and comments we find in these two subreddits. We find that posts in r/academia center more on the challenging aspects of academia such as plagiarism, working in academia, and mental health, whereas r/gradschool posts deal with more generic issues on graduate school life. However, we find that the way the community reacts and provides support via comments is similar in both subreddits, mostly by providing moral support and solidarity.}, + isbn = {978-1-4503-6692-2}, + file = {/home/nathante/Zotero/storage/K4K3HITN/Sengupta - 2019 - What are Academic Subreddits Talking About A Comp.pdf} +} + +@inproceedings{sharma_studying_2015, + title = {Studying and {{Modeling}} the {{Connection}} between {{People}}'s {{Preferences}} and {{Content Sharing}}}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Sharma, Amit and Cosley, Dan}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {1246--1257}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {People regularly share items using online social media. However, people's decisions around sharing---who shares what to whom and why---are not well understood. We present a user study involving 87 pairs of Facebook users to understand how people make their sharing decisions. We find that even when sharing to a specific individual, people's own preference for an item (individuation) dominates over the recipient's preferences (altruism). People's open-ended responses about how they share, however, indicate that they do try to personalize shares based on the recipient. To explain these contrasting results, we propose a novel process model of sharing that takes into account people's preferences and the salience of an item. We also present encouraging results for a sharing prediction model that incorporates both the senders' and the recipients' preferences. These results suggest improvements to both algorithms that support sharing in social media and to information diffusion models.}, + isbn = {978-1-4503-2922-4}, + keywords = {directed sharing,information diffusion,sharing process,user preferences}, + file = {/home/nathante/Zotero/storage/V4LGES2Z/Sharma and Cosley - 2015 - Studying and Modeling the Connection between Peopl.pdf} +} + +@incollection{shaw_communication_1964, + title = {Communication {{Networks}}}, + booktitle = {Advances in {{Experimental Social Psychology}}}, + author = {Shaw, Marvin E.}, + editor = {Berkowitz, Leonard}, + date = {1964}, + volume = {1}, + pages = {111--147}, + publisher = {{Academic Press}}, + abstract = {The communication network imposed on the group influences its problem-solving efficiency, communication activity, organizational development, and member satisfaction. This chapter provides an overview of the communication networks, methodology employed in the research on communication networks and considers some of the structural properties of these networks, and outlines the major findings of experimental investigations of the effects of networks on group process. The major network difference is between centralized and decentralized networks. The direction and magnitude of the effects are modified by the following variables: kind of task, noise, information distribution, member personality, reinforcement, and the kind of prior experience the members have had in networks. The variable having the most pronounced effect is the kind of task the group must perform. Centralized networks are generally more efficient when the task requires merely the collection of information in one place, and decentralized networks are more efficient when further operations must be performed on the information before the task can be completed. The experiments discussed in the chapter, presents a great deal about the effects of communication networks, but the precise nature of many of the relationships among variables still remains unclear, and needs much clarification, such as network characteristics, kind of task, and group composition. The communication network studies have provided a great deal of information regarding structural effects upon group behavior. However, much more remains to be done.}, + file = {/home/nathante/Zotero/storage/ZTWM2MSC/Shaw - 1964 - Communication Networks.pdf} +} + +@article{shaw_laboratories_2014, + title = {Laboratories of Oligarchy? {{How}} the Iron Law Extends to Peer Production}, + shorttitle = {Laboratories of {{Oligarchy}}?}, + author = {Shaw, Aaron and Hill, Benjamin Mako}, + date = {2014}, + journaltitle = {Journal of Communication}, + shortjournal = {J Commun}, + volume = {64}, + number = {2}, + pages = {215--238}, + issn = {1460-2466}, + abstract = {Peer production projects like Wikipedia have inspired voluntary associations, collectives, social movements, and scholars to embrace open online collaboration as a model of democratic organization. However, many peer production projects exhibit entrenched leadership and deep inequalities, suggesting that they may not fulfill democratic ideals. Instead, peer production projects may conform to Robert Michels' “iron law of oligarchy,” which proposes that democratic membership organizations become increasingly oligarchic as they grow. Using exhaustive data of internal processes from a sample of 683 wikis, we construct empirical measures of participation and test for increases in oligarchy associated with growth in wikis' contributor bases. In contrast to previous studies, we find support for Michels' iron law and conclude that peer production entails oligarchic organizational forms.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GIII687R/Shaw and Hill - 2014 - Laboratories of oligarchy How the iron law extend.pdf;/home/nathante/Zotero/storage/W3846GC6/full.html} +} + +@article{simpson_status_2012, + title = {Status {{Hierarchies}} and the {{Organization}} of {{Collective Action}}}, + author = {Simpson, Brent and Willer, Robb and Ridgeway, Cecilia L.}, + date = {2012-09}, + journaltitle = {Sociological Theory}, + volume = {30}, + number = {3}, + pages = {149--166}, + issn = {0735-2751, 1467-9558}, + abstract = {Most work on collective action assumes that group members are undifferentiated by status, or standing, in the group. Yet such undifferentiated groups are rare, if they exist at all. Here we extend an existing sociological research program to address how extant status hierarchies help organize collective actions by coordinating how much and when group members should contribute to group efforts. We outline three theoretically derived predictions of how status hierarchies organize patterns of behavior to produce larger public goods.We review existing evidence relevant to two of the three hypotheses and present results from a preliminary experimental test of the third. Findings are consistent with the model.The tendency of these dynamics to lead status-differentiated groups to produce larger public goods may help explain the ubiquity of hierarchy in groups, despite the often negative effects of status inequalities for many group members.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/WVT6KAAY/Simpson et al. - 2012 - Status Hierarchies and the Organization of Collect.pdf} +} + +@article{sobre-denton_virtual_2016, + title = {Virtual Intercultural Bridgework: {{Social}} Media, Virtual Cosmopolitanism, and Activist Community-Building}, + shorttitle = {Virtual Intercultural Bridgework}, + author = {Sobré-Denton, Miriam}, + date = {2016-09-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {18}, + number = {8}, + pages = {1715--1731}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Social media facilitates a global–local orientation to the world that allows individuals to engage in virtual community-building and participate in communication to build global citizenship. This research situates virtual cosmopolitanism in the age of new media and globalization, describing it as a means for trans-local and transnational community-building for social justice movements and activism, including community liaison-building across corporeal borders and boundaries. New media as a site of imagined communities that become larger than their component parts is then analyzed through examining several virtual cosmopolitan communities. The essay concludes with assumptions about the qualities of virtual cosmopolitan communities, and recommendations for how they can facilitate intercultural liaisons for social justice activism and community-building across difference.}, + langid = {english}, + keywords = {Community-building,cosmopolitan solidarity,online activism,social justice,social media,virtual cosmopolitanism}, + file = {/home/nathante/Zotero/storage/Z5D3VAMN/Sobré-Denton - 2016 - Virtual intercultural bridgework Social media, vi.pdf} +} + +@inproceedings{soliman_characterization_2019, + title = {A {{Characterization}} of {{Political Communities}} on {{Reddit}}}, + booktitle = {Proceedings of the 30th {{ACM Conference}} on {{Hypertext}} and {{Social Media}}}, + author = {Soliman, Ahmed and Hafer, Jan and Lemmerich, Florian}, + date = {2019-09-12}, + series = {{{HT}} '19}, + pages = {259--263}, + publisher = {{Association for Computing Machinery}}, + location = {{Hof, Germany}}, + abstract = {The social news aggregator Reddit is among the most popular websites on the internet. Many online users use the platform to anonymously share and discuss (mostly US-centric) political content. In this ongoing work, we perform a comparative large-scale analysis of political subcommunities (subreddits) on Reddit using a dataset of more than 100 million posts from around 5 million users. In particular, we investigate these communities with respect to (1) the content posted, (2) their relationships to other subreddits, and (3) the distribution of attention received in these subcommunities. We find that left-leaning communities use derogatory language less often than right-leaning communities, but are more focused on news sources reflecting their own political leaning. We also observe that right-leaning communities are more interconnected with right-leaning subreddits on European politics. Finally, the attention of individual submissions (as measured by their number of up-votes or comments received) is spread more evenly in right-leaning communities.The social news aggregator Reddit is among the most popular websites on the internet. Many online users use the platform to anonymously share and discuss (mostly US-centric) political content. In this ongoing work, we perform a comparative large-scale analysis of political subcommunities (subreddits) on Reddit using a dataset of more than 100 million posts from around 5 million users. In particular, we investigate these communities with respect to (1) the content posted, (2) their relationships to other subreddits, and (3) the distribution of attention received in these subcommunities. We find that left-leaning communities use derogatory language less often than right-leaning communities, but are more focused on news sources reflecting their own political leaning. We also observe that right-leaning communities are more interconnected with right-leaning subreddits on European politics. Finally, the attention of individual submissions (as measured by their number of up-votes or comments received) is spread more evenly in right-leaning communities.}, + isbn = {978-1-4503-6885-8}, + file = {/home/nathante/Zotero/storage/R2YM5F8X/Soliman et al. - 2019 - A Characterization of Political Communities on Red.pdf} +} + +@inproceedings{starbird_crowd_2012, + title = {Crowd Computation: Organizing Information during Mass Disruption Events}, + shorttitle = {Crowd Computation}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work Companion}}}, + author = {Starbird, Kate}, + date = {2012}, + series = {{{CSCW}} '12}, + pages = {339--342}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {This research examines large-scale human interaction occurring through social media during times of mass disruption, seeking to understand how the connected crowd acts to organize a flood of data moving through those platforms into useful information resources. The work combines empirical analysis of social media communication, interviews, and participant observation to explore how people work to organize information and how they use social media platforms to organize themselves to do this work. Synthesizing findings from four distinct, yet interrelated studies, this research progresses towards a new conceptualization of the distributed, connected work of organizing information during mass disruption events.}, + isbn = {978-1-4503-1051-2} +} + +@article{swaminathan_resource_2001, + title = {Resource Partitioning and the Evolution of Specialist Organizations: {{The}} Role of Location and Identity in the {{U}}.{{S}}. Wine Industry}, + shorttitle = {Resource {{Partitioning}} and the {{Evolution}} of {{Specialist Organizations}}}, + author = {Swaminathan, Anand}, + date = {2001-12-01}, + journaltitle = {Academy of Management Journal}, + shortjournal = {ACAD MANAGE J}, + volume = {44}, + number = {6}, + pages = {1169--1185}, + issn = {0001-4273, 1948-0989}, + abstract = {Analyses of founding and mortality rates of specialist organizations in the U.S. wine industry over the period 1941-90 support Carroll’s (1985) location-based resource-partitioning model—crowding of generalists in the market center creates opportunities for specialists. Further, specialists are adversely affected when they violate their organizational form’s identity characteristics and also when generalists can assume a robust identity allowing them to operate in both specialist and generalist industry segments. The results suggest a prominent role for an organizational form’s identity in resource partitioning.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/HSF2S5JM/1169.html} +} + +@inproceedings{tan_all_2015, + title = {All Who Wander: {{On}} the Prevalence and Characteristics of Multi-Community Engagement}, + shorttitle = {All Who Wander}, + booktitle = {Proceedings of the 24th {{International Conference}} on {{World Wide Web}}}, + author = {Tan, Chenhao and Lee, Lillian}, + date = {2015}, + series = {{{WWW}} '15}, + pages = {1056--1066}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + location = {{Republic and Canton of Geneva, Switzerland}}, + abstract = {Although analyzing user behavior within individual communities is an active and rich research domain, people usually interact with multiple communities both on- and off-line. How do users act in such multi-community environments? Although there are a host of intriguing aspects to this question, it has received much less attention in the research community in comparison to the intra-community case. In this paper, we examine three aspects of multi-community engagement: the sequence of communities that users post to, the language that users employ in those communities, and the feedback that users receive, using longitudinal posting behavior on Reddit as our main data source, and DBLP for auxiliary experiments. We also demonstrate the effectiveness of features drawn from these aspects in predicting users' future level of activity. One might expect that a user's trajectory mimics the "settling-down" process in real life: an initial exploration of sub-communities before settling down into a few niches. However, we find that the users in our data continually post in new communities; moreover, as time goes on, they post increasingly evenly among a more diverse set of smaller communities. Interestingly, it seems that users that eventually leave the community are "destined" to do so from the very beginning, in the sense of showing significantly different "wandering" patterns very early on in their trajectories; this finding has potentially important design implications for community maintainers. Our multi-community perspective also allows us to investigate the "situation vs. personality" debate from language usage across different communities.}, + isbn = {978-1-4503-3469-3}, + keywords = {DBLP,language,lifecycle,multiple communities,reddit}, + file = {/home/nathante/Zotero/storage/8GL2XQG3/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf;/home/nathante/Zotero/storage/J3RVCH26/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf} +} + +@inproceedings{tan_tracing_2018, + title = {Tracing Community Genealogy: How New Communities Emerge from the Old}, + shorttitle = {Tracing {{Community Genealogy}}}, + booktitle = {Proceedings of the {{Twelfth International Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} '18)}, + author = {Tan, Chenhao}, + date = {2018}, + pages = {395--404}, + publisher = {{AAAI}}, + location = {{Palo Alto, California}}, + abstract = {The process by which new communities emerge is a central research issue in the social sciences. While a growing body of research analyzes the formation of a single community by examining social networks between individuals, we introduce a novel community-centered perspective. We highlight the fact that the context in which a new community emerges contains numerous existing communities. We reveal the emerging process of communities by tracing their early members’ previous community memberships.}, + file = {/home/nathante/Zotero/storage/QEAEMFYR/Tan - 2018 - Tracing Community Genealogy How New Communities E.pdf} +} + +@article{tausczik_impact_2019, + title = {The Impact of Group Size on the Discovery of Hidden Profiles in Online Discussion Groups}, + author = {Tausczik, Yla and Huang, Xiaoyun}, + date = {2019-11-14}, + journaltitle = {ACM Transactions on Social Computing}, + shortjournal = {Trans. Soc. Comput.}, + volume = {2}, + number = {3}, + pages = {10:1--10:25}, + issn = {2469-7818}, + abstract = {Online discussions help individuals to gather knowledge and make important decisions in diverse areas from health and finance to computing and data science. Online discussion groups exhibit unique group dynamics not found in traditional small groups, such as staggered participation and asynchronous communication, and the effects of these features on knowledge sharing is not well understood. In this article, we focus on one such aspect: wide variation in group size. Using a controlled experiment with a hidden profile task, we evaluate online discussion groups’ capacity to share distributed knowledge when group size ranges from 4 to 32 participants. We found that individuals in medium-sized discussions performed the best, and we suggest that this represents a tradeoff in which larger groups tend to share more facts, but have more difficulty than smaller groups at resolving misunderstandings.}, + keywords = {collective information processing,collective intelligence,Hidden profile,knowledge sharing,online forums}, + file = {/home/nathante/Zotero/storage/FNSPR8FH/Tausczik_Huang_2019_The Impact of Group Size on the Discovery of Hidden Profiles in Online.pdf} +} + +@inproceedings{teblunthuis_density_2017, + title = {Density Dependence without Resource Partitioning: Population Ecology on {{Change}}.Org}, + shorttitle = {Density {{Dependence Without Resource Partitioning}}}, + booktitle = {Companion of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2017}, + series = {{{CSCW}} '17 {{Companion}}}, + pages = {323--326}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {E-petitioning is a prominent form of Internet-based collective action. We apply theories from organizational population ecology to investigate whether similar petitions compete for signatures. We use latent Dirichlet allocation (LDA) topic modeling to identify topical niches. Using these niches, we test two theories from population ecology on 442,109 Change.org petitions. First, we find evidence for density dependence, an inverse-U-shaped relationship between the density of a petition's niche and the number of signatures the petition obtains. This suggests e-petitioning is competitive and that e-petitions draw on overlapping resource pools. Second, although resource partitioning theory predicts that topically specialized petitions will obtain more signatures in concentrated populations, we find no evidence of this. This suggests that specialists struggle to avoid competition with generalists.}, + isbn = {978-1-4503-4688-7}, + file = {/home/nathante/Zotero/storage/54585RCP/TeBlunthuis et al. - 2017 - Density dependence without resource partitioning .pdf} +} + +@online{teblunthuis_identifying_2021, + ids = {teblunthuis_community_2021,teblunthuis_community_2021-1,teblunthuis_identifying_2021-1}, + title = {Identifying {{Competition}} and {{Mutualism Between Online Groups}}}, + author = {TeBlunthuis, Nathan and Hill, Benjamin Mako}, + date = {2021-07-14}, + eprint = {2107.06970}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Platforms often host multiple online groups with highly overlapping topics and members. How can researchers and designers understand how interactions between related groups affect measures of group health? Inspired by population ecology, prior social computing research has studied competition and mutualism among related groups by correlating group size with degrees of overlap in content and membership. The resulting body of evidence is puzzling as overlaps seem sometimes to help and other times to hurt. We suggest that this confusion results from aggregating inter-group relationships into an overall environmental effect instead of focusing on networks of competition and mutualism among groups. We propose a theoretical framework based on community ecology and a method for inferring competitive and mutualistic interactions from time series participation data. We compare population and community ecology analyses of online community growth by analyzing clusters of subreddits with high user overlap but varying degrees of competition and mutualism.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3NW96WBR/TeBlunthuis_Hill_2021_Identifying Competition and Mutualism Between Online Groups.pdf;/home/nathante/Zotero/storage/XRLZFVHD/TeBlunthuis_Hill_2021_Identifying Competition and Mutualism Between Online Groups.pdf;/home/nathante/Zotero/storage/ZTDDJ9KW/TeBlunthuis and Hill - 2018 - A Community Ecology Approach for Identifying Compe.pdf;/home/nathante/Zotero/storage/MJH368X5/2107.html;/home/nathante/Zotero/storage/VK77YHAC/2107.html} +} + +@inproceedings{teblunthuis_revisiting_2018, + title = {Revisiting "{{The}} Rise and Decline" in a Population of Peer Production Projects}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + pages = {355:1--355:7}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Do patterns of growth and stabilization found in large peer production systems such as Wikipedia occur in other communities? This study assesses the generalizability of Halfaker et al.'s influential 2013 paper on "The Rise and Decline of an Open Collaboration System." We replicate its tests of several theories related to newcomer retention and norm entrenchment using a dataset of hundreds of active peer production wikis from Wikia. We reproduce the subset of the findings from Halfaker and colleagues that we are able to test, comparing both the estimated signs and magnitudes of our models. Our results support the external validity of Halfaker et al.'s claims that quality control systems may limit the growth of peer production communities by deterring new contributors and that norms tend to become entrenched over time.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/7YEVSVQM/TeBlunthuis et al. - 2018 - Revisiting The Rise and Decline in a Population .pdf} +} + +@article{triggs_context_2019, + ids = {triggs_context_2021}, + title = {Context Collapse and Anonymity among Queer {{Reddit}} Users}, + author = {Triggs, Anthony Henry and Møller, Kristian and Neumayer, Christina}, + date = {2019-11-27}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {23}, + number = {1}, + pages = {5--21}, + publisher = {{SAGE Publications}}, + issn = {1461-4448, 1461-7315}, + abstract = {This article maps out how people in queer communities on Reddit navigate context collapse. Drawing upon data from interviews with queer Reddit users and insights from other studies of context collapse in digital media, we argue that context collapse also occurs in anonymity-based social media. The interviews reveal queer Reddit users’ practices of context differentiation, occurring at four levels: somatic, system, inter-platform and intra-platform. We use these levels to map out how lesbian, gay, bisexual, transgender and queer or questioning (LGBTQ) people express their identities and find community on Reddit while seeking to minimize the risks imposed by multiple impending context collapses. Because living an authentic queer life can make subjects vulnerable, we find that despite Reddit’s anonymity, sophisticated practices of context differentiation are developed and maintained. We argue that context collapse in an era of big data and social media platforms operates beyond the control of any one user, which causes problems, particularly for queer people.}, + langid = {english}, + keywords = {Anonymity,bisexual,context collapse,gay,lesbian,Reddit,risk,transgender and queer or questioning}, + file = {/home/nathante/Zotero/storage/LSEXQYFM/Triggs et al. - 2021 - Context collapse and anonymity among queer Reddit .pdf} +} + +@article{tufekci_not_2013-1, + title = {"{{Not}} This One": Social Movements, the Attention Economy, and Microcelebrity Networked Activism}, + shorttitle = {" {{Not}} This One": Social Movements, the Attention Economy, and Microcelebrity Networked Activism}, + author = {Tufekci, Zeynep}, + date = {2013}, + journaltitle = {American Behavioral Scientist}, + pages = {0002764213479369}, + issn = {0002-7642}, + file = {/home/nathante/Zotero/storage/URM9ESR8/Tufekci_2013_ Not This One.pdf;/home/nathante/Zotero/storage/ZBQFHXMF/Tufekci_2013_ Not This One.pdf} +} + +@article{turner_where_2005, + title = {Where the {{Counterculture Met}} the {{New Economy}}: {{The WELL}} and the {{Origins}} of {{Virtual Community}}}, + shorttitle = {Where the {{Counterculture Met}} the {{New Economy}}}, + author = {Turner, Fred}, + date = {2005}, + journaltitle = {Technology and Culture}, + volume = {46}, + number = {3}, + pages = {485--512}, + issn = {1097-3729}, + abstract = {In lieu of an abstract, here is a brief excerpt of the content: Technology and Culture 46.3 (2005) 485-512 The WELL and the Origins of Virtual Community Fred Turner In 1993, freelance journalist Howard Rheingold published The Virtual Community: Homesteading on the Electronic Frontier and with it defined a new form of technologically enabled social life: virtual community. For the last eight years, he explained, he had been dialing in to a San Francisco Bay–area bulletin-board system (BBS) known as the Whole Earth 'Lectronic Link, or the WELL. In the WELL's text-only environment, he conversed with friends and colleagues, met new people, and over time built up relationships of startling intimacy. For Rheingold, these relationships formed an emotional bulwark against the loneliness of a highly technologized material world. As he explained, computer networks like the WELL allowed us "to recapture the sense of cooperative spirit that so many people seemed to lose when we gained all this technology." In the disembodied precincts of cyberspace, we could connect with one another practically and emotionally and "rediscover the power of cooperation, turning cooperation into a game, a way of life—a merger of knowledge capital, social capital, and communion." In the years since Rheingold's book appeared, the Internet and the Worldwide Web have swung into public view, and both the WELL and Rheingold's notion of virtual community have become touchstones for studies of the social implications of computer networking. Yet, despite the WELL's prominence, few have rigorously explored its roots in the American counterculture of the 1960s. As its name suggests, the Whole Earth 'Lectronic Link took shape within a network of individuals and publications that first came together long before the advent of ubiquitous computer networking, with the publication of the Whole Earth Catalog. In the spring of 1968, Stewart Brand, a former Merry Prankster and coproducer of the Trips Festival that helped spark the Haight-Ashbury psychedelic scene, noticed that many of his friends had begun to leave the city for the wilds of New Mexico and Northern California. As sociologists and journalists would soon explain, these migrants marked the leading edge of what would become the largest wave of communalization in American history. Brand had just inherited a hundred thousand dollars in stock and, as he recalled several years later, imagining his friends "starting their own civilization hither and yon in the sticks" got him thinking about the L.L.Bean catalog. This in turn led him to fantasize something he called the "Access Mobile" that would offer "all manner of access materials and advice for sale cheap," including books, camping gear, blueprints for houses and machines, and subscriptions to magazines. The publication that grew out of that fantasy would quickly become one of the defining documents of the American counterculture. Sized somewhere between a tabloid newspaper and a glossy magazine, the sixty-one-page first Whole Earth Catalog presented reviews of hand tools, books, and magazines arrayed in seven thematic categories: understanding whole systems, shelter and land use, industry and craft, communications, community, nomadics, and learning. Over the next four years, in a series of biannual issues, the Catalog ballooned to more than four hundred pages, sold more than a million-and-a-half copies, won a National Book Award, and spawned dozens of imitators. It also established a relationship between information technology, economic activity, and alternative forms of community that would outlast the counterculture itself and become a key feature of the digital world. Like other members of the counterculture, those who headed back to the land suffered a deep ambivalence toward technology. On the one hand, like their counterparts on the New Left they saw the large-scale weapons technologies of the cold war and the organizations that produced them as emblems of a malevolent and ubiquitous technological bureaucracy. On the other, as they played their stereos and dropped LSD many came to believe that small-scale technologies could help bring about an alternative to that world. Dancing at the Trips Festival or simply sitting around getting high with friends, many experienced a sense of spiritual interconnection. By the late 1960s, social theorists such as Charles Reich and Theodore Roszak had begun to argue that this interconnection could become the...} +} + +@inproceedings{vasilescu_how_2014, + ids = {vasilescu_how_2014-1}, + title = {How Social {{Q}}\&{{A}} Sites Are Changing Knowledge Sharing in Open Source Software Communities}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing - {{CSCW}} '14}, + author = {Vasilescu, Bogdan and Serebrenik, Alexander and Devanbu, Prem and Filkov, Vladimir}, + date = {2014}, + pages = {342--354}, + publisher = {{ACM Press}}, + location = {{Baltimore, Maryland, USA}}, + abstract = {Historically, mailing lists have been the preferred means for coordinating development and user support activities. With the emergence and popularity growth of social Q\&A sites such as the StackExchange network (e.g., StackOverflow), this is beginning to change. Such sites offer different sociotechnical incentives to their participants than mailing lists do, e.g., rich web environments to store and manage content collaboratively, or a place to showcase their knowledge and expertise more vividly to peers or potential recruiters. A key difference between StackExchange and mailing lists is gamification, i.e., StackExchange participants compete to obtain reputation points and badges. In this paper, we use a case study of R (a widely-used tool for data analysis) to investigate how mailing list participation has evolved since the launch of StackExchange. Our main contribution is the assembly of a joint data set from the two sources, in which participants in both the r-help mailing list and StackExchange are identifiable. This permits their activities to be linked across the two resources and also over time. With this data set we found that user support activities show a strong shift away from r-help. In particular, mailing list experts are migrating to StackExchange, where their behaviour is different. First, participants active both on r-help and on StackExchange are more active than those who focus exclusively on only one of the two. Second, they provide faster answers on StackExchange than on r-help, suggesting they are motivated by the gamified environment. To our knowledge, our study is the first to directly chart the changes in behaviour of specific contributors as they migrate into gamified environments, and has important implications for knowledge management in software engineering.}, + eventtitle = {The 17th {{ACM}} Conference}, + isbn = {978-1-4503-2540-0}, + langid = {english}, + keywords = {crowdsourced knowledge,gamification.,mailing lists,open source,social q&a}, + file = {/home/nathante/Zotero/storage/6DLS9FTI/Vasilescu et al. - 2014 - How social Q&\;A sites are changing knowledge sh.pdf;/home/nathante/Zotero/storage/MNHPJRT3/Vasilescu et al. - 2014 - How social Q&A sites are changing knowledge sharin.pdf} +} + +@book{verhoef_community_2010, + title = {Community Ecology: Processes, Models, and Applications}, + shorttitle = {Community Ecology}, + author = {Verhoef, Herman A and Morin, Peter J}, + date = {2010}, + publisher = {{Oxford University Press}}, + location = {{Oxford}}, + isbn = {978-0-19-922897-3 978-0-19-922898-0}, + langid = {english}, + annotation = {OCLC: 876676566} +} + +@book{von_hippel_democratizing_2006, + title = {Democratizing Innovation}, + author = {von Hippel, Eric}, + options = {useprefix=true}, + date = {2006}, + publisher = {{The MIT Press}}, + abstract = {Innovation is rapidly becoming democratized. Users, aided by improvements in computer and communications technology, increasingly can develop their own new products and services. These innovating users—both individuals and firms—often freely share their innovations with others, creating user-innovation communities and a rich intellectual commons. In Democratizing Innovation, Eric von Hippel looks closely at this emerging system of user-centered innovation. He explains why and when users find it profitable to develop new products and services for themselves, and why it often pays users to reveal their innovations freely for the use of all.The trend toward democratized innovation can be seen in software and information products—most notably in the free and open-source software movement—but also in physical products. Von Hippel's many examples of user innovation in action range from surgical equipment to surfboards to software security features. He shows that product and service development is concentrated among "lead users," who are ahead on marketplace trends and whose innovations are often commercially attractive. Von Hippel argues that manufacturers should redesign their innovation processes and that they should systematically seek out innovations developed by users. He points to businesses—the custom semiconductor industry is one example—that have learned to assist user-innovators by providing them with toolkits for developing new products. User innovation has a positive impact on social welfare, and von Hippel proposes that government policies, including R\&D subsidies and tax credits, should be realigned to eliminate biases against it. The goal of a democratized user-centered innovation system, says von Hippel, is well worth striving for.}, + isbn = {978-0-262-72047-2 978-0-262-00274-5}, + langid = {english}, + keywords = {innovation,org theory}, + file = {/home/nathante/Zotero/storage/ZK5N3JLA/search.html} +} + +@book{von_hippel_free_2016, + title = {Free Innovation}, + author = {von Hippel, Eric}, + options = {useprefix=true}, + date = {2016-11-18}, + edition = {1 edition}, + publisher = {{The MIT Press}}, + location = {{Cambridge, MA}}, + abstract = {A leading innovation scholar explains the growing phenomenon and impact of free innovation, in which innovations developed by consumers and given away “for free.”In this book, Eric von Hippel, author of the influential Democratizing Innovation, integrates new theory and research findings into the framework of a “free innovation paradigm.” Free innovation, as he defines it, involves innovations developed by consumers who are self-rewarded for their efforts, and who give their designs away “for free.” It is an inherently simple grassroots innovation process, unencumbered by compensated transactions and intellectual property rights.Free innovation is already widespread in national economies and is steadily increasing in both scale and scope. Today, tens of millions of consumers are collectively spending tens of billions of dollars annually on innovation development. However, because free innovations are developed during consumers' unpaid, discretionary time and are given away rather than sold, their collective impact and value have until very recently been hidden from view. This has caused researchers, governments, and firms to focus too much on the Schumpeterian idea of innovation as a producer-dominated activity. Free innovation has both advantages and drawbacks. Because free innovators are self-rewarded by such factors as personal utility, learning, and fun, they often pioneer new areas before producers see commercial potential. At the same time, because they give away their innovations, free innovators generally have very little incentive to invest in diffusing what they create, which reduces the social value of their efforts. The best solution, von Hippel and his colleagues argue, is a division of labor between free innovators and producers, enabling each to do what they do best. The result will be both increased producer profits and increased social welfare―a gain for all.}, + isbn = {978-0-262-03521-7}, + langid = {english}, + pagetotal = {240} +} + +@article{von_hippel_sticky_1994, + title = {"{{Sticky}} Information" and the Locus of Problem Solving: Implications for Innovation}, + shorttitle = {"{{Sticky Information}}" and the {{Locus}} of {{Problem Solving}}}, + author = {von Hippel, Eric}, + options = {useprefix=true}, + date = {1994}, + journaltitle = {Management Science}, + volume = {40}, + number = {4}, + pages = {429--439}, + issn = {0025-1909}, + abstract = {To solve a problem, needed information and problem-solving capabilities must be brought together. Often the information used in technical problem solving is costly to acquire, transfer, and use in a new location---is, in our terms, "sticky." In this paper we explore the impact of information stickiness on the locus of innovation-related problem solving. We find, first, that when sticky information needed by problem solvers is held at one site only, problem solving will be carried out at that locus, other things being equal. Second, when more than one locus of sticky information is called upon by problem solvers, the locus of problem solving may iterate among these sites as problem solving proceeds. When the costs of such iteration are high, then, third, problems that draw upon multiple sites of sticky information will sometimes be "task partitioned" into subproblems that each draw on only one such locus, and/or, fourth, investments will be made to reduce the stickiness of information at some locations. Information stickiness appears to affect a number of issues of importance to researchers and practitioners. Among these are patterns in the diffusion of information, the specialization of firms, the locus of innovation, and the nature of problems selected by problem solvers.}, + file = {/home/nathante/Zotero/storage/VJT3KFVS/von Hippel - 1994 - Sticky information and the locus of problem solv.pdf;/home/nathante/Zotero/storage/N5WSWBCN/v_3a40_3ay_3a1994_3ai_3a4_3ap_3a429-439.html} +} + +@inproceedings{waller_generalists_2019, + title = {Generalists and {{Specialists}}: {{Using Community Embeddings}} to {{Quantify Activity Diversity}} in {{Online Platforms}}}, + shorttitle = {Generalists and {{Specialists}}}, + booktitle = {The {{World Wide Web Conference}} on - {{WWW}} '19}, + author = {Waller, Isaac and Anderson, Ashton}, + date = {2019}, + pages = {1954--1964}, + publisher = {{ACM Press}}, + location = {{San Francisco, CA, USA}}, + abstract = {In many online platforms, people must choose how broadly to allocate their energy. Should one concentrate on a narrow area of focus, and become a specialist, or apply oneself more broadly, and become a generalist? In this work, we propose a principled measure of how generalist or specialist a user is, and study behavior in online platforms through this lens. To do this, we construct highly accurate community embeddings that represent communities in a high-dimensional space. We develop sets of community analogies and use them to optimize our embeddings so that they encode community relationships extremely well. Based on these embeddings, we introduce a natural measure of activity diversity, the GS-score. Applying our embedding-based measure to online platforms, we observe a broad spectrum of user activity styles, from extreme specialists to extreme generalists, in both community membership on Reddit and programming contributions on GitHub. We find that activity diversity is related to many important phenomena of user behavior. For example, specialists are much more likely to stay in communities they contribute to, but generalists are much more likely to remain on platforms as a whole. We also find that generalists engage with significantly more diverse sets of users than specialists do. Furthermore, our methodology leads to a simple algorithm for community recommendation, matching state-of-theart methods like collaborative filtering. Our methods and results introduce an important new dimension of online user behavior and shed light on many aspects of online platform use.}, + eventtitle = {The {{World Wide Web Conference}}}, + isbn = {978-1-4503-6674-8}, + langid = {english}, + keywords = {activity diversity,community embeddings,community recommendation,generalist and specialists}, + file = {/home/nathante/Zotero/storage/5F77953J/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf;/home/nathante/Zotero/storage/PK32L55Y/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf} +} + +@article{wang_impact_2012, + ids = {wang_impact_2013}, + title = {The Impact of Membership Overlap on Growth: {{An}} Ecological Competition View of Online Groups}, + shorttitle = {The Impact of Membership Overlap on Growth}, + author = {Wang, Xiaoqing and Butler, Brian S. and Ren, Yuqing}, + date = {2012-06-15}, + journaltitle = {Organization Science}, + shortjournal = {Organization Science}, + volume = {24}, + number = {2}, + pages = {414--431}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {The dominant narrative of the Internet has been one of unconstrained growth, abundance, and plenitude. It is in this context that new forms of organizing, such as online groups, have emerged. However, the same factors that underlie the utopian narrative of Internet life also give rise to numerous online groups, many of which fail to attract participants or to provide significant value. This suggests that despite the potential transformative nature of modern information technology, issues of scarcity, competition, and context may remain critical to the performance and functioning of online groups. In this paper, we draw from organizational ecology theories to develop an ecological view of online groups to explain how overlapping membership among online groups causes intergroup competition for member attention and affects a group's ability to grow. Hypotheses regarding the effects of group size, age, and membership overlap on growth are proposed and tested with data from a 64-month, longitudinal sample of 240 online discussion groups. The analysis shows that sharing members with other groups reduced future growth rates, suggesting that membership overlap puts competitive pressure on online groups. Our results also suggest that, compared with smaller and younger groups, larger and older groups experience greater difficulty in growing their membership. In addition, larger groups were more vulnerable to competitive pressure than smaller groups: larger groups experienced greater difficulty in growing their membership than smaller groups as competition intensified. Overall, our findings show how an abundance of opportunities afforded by technologies can create scarcity in user time and effort, which increases competitive pressure on online groups. Our ecological view extends organizational ecology theory to new organizational forms online and highlights the importance of studying the competitive environment of online groups.}, + file = {/home/nathante/Zotero/storage/3WI37Y9S/Wang et al. - 2013 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/D7GAZURV/Wang et al. - 2012 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/EQSW25XD/Wang et al. - 2012 - The impact of membership overlap on growth An eco.pdf;/home/nathante/Zotero/storage/8QDPVTSM/orsc.1120.html;/home/nathante/Zotero/storage/IK6SB3L8/orsc.1120.html} +} + +@inproceedings{wang_searching_2012, + ids = {wang_searching_2012-1}, + title = {Searching for the Goldilocks Zone: Trade-Offs in Managing Online Volunteer Groups}, + shorttitle = {Searching for the Goldilocks Zone}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Wang, Loxley Sijia and Chen, Jilin and Ren, Yuqing and Riedl, John}, + date = {2012}, + series = {{{CSCW}} '12}, + pages = {989--998}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Dedicated and productive members who actively contribute to community efforts are crucial to the success of online volunteer groups such as Wikipedia. What predicts member productivity? Do productive members stay longer? How does involvement in multiple projects affect member contribution to the community? In this paper, we analyze data from 648 WikiProjects to address these questions. Our results reveal two critical trade-offs in managing online volunteer groups. First, factors that increase member productivity, measured by the number of edits on Wikipedia articles, also increase likelihood of withdrawal from contributing, perhaps due to feelings of mission accomplished or burnout. Second, individual membership in multiple projects has mixed effects. It decreases the amount of work editors contribute to both the individual projects and Wikipedia as a whole. It increases withdrawal for each individual project yet reduces withdrawal from Wikipedia. We discuss how our findings expand existing theories to fit the online context and inform the design of new tools to improve online volunteer work.}, + isbn = {978-1-4503-1086-4}, + keywords = {online volunteer group,productivity,trade-off,wikipedia,withdrawal}, + file = {/home/nathante/Zotero/storage/7CKH7QT7/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf;/home/nathante/Zotero/storage/R8ALMDFI/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf;/home/nathante/Zotero/storage/Z28IT3FH/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf} +} + +@incollection{white_effects_2011, + title = {Effects of Community Size and Contact Rate in Synchronous Social Q\&a}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {White, Ryen W. and Richardson, Matthew and Liu, Yandong}, + date = {2011-05-07}, + pages = {2837--2846}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Social question-and-answer (Q\&A) involves the location of answers to questions through communication with people. Social Q\&A systems, such as mailing lists and Web forums are popular, but their asynchronous nature can lead to high answer latency. Synchronous Q\&A systems facilitate real-time dialog, usually via instant messaging, but face challenges with interruption costs and the availability of knowledgeable answerers at question time. We ran a longitudinal study of a synchronous social Q\&A system to investigate the effects of the rate with which potential answerers were contacted (trading off time-to-answer against interruption cost) and community size (varying total number of members). We found important differences in subjective and objective measures of system performance with these variations. Our findings help us understand the costs and benefits of varying contact rate and community size in synchronous social Q\&A, and inform system design for social Q\&A.}, + isbn = {978-1-4503-0228-9}, + keywords = {community size,contact rate,synchronous social q&a}, + file = {/home/nathante/Zotero/storage/YTF5HY6W/White et al. - 2011 - Effects of community size and contact rate in sync.pdf} +} + +@article{wu_estimating_2019, + title = {Estimating {{Attention Flow}} in {{Online Video Networks}}}, + author = {Wu, Siqi and Rizoiu, Marian-Andrei and Xie, Lexing}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {183:1--183:25}, + abstract = {Online videos have shown tremendous increase in Internet traffic. Most video hosting sites implement recommender systems, which connect the videos into a directed network and conceptually act as a source of pathways for users to navigate. At present, little is known about how human attention is allocated over such large-scale networks, and about the impacts of the recommender systems. In this paper, we first construct the Vevo network -- a YouTube video network with 60,740 music videos interconnected by the recommendation links, and we collect their associated viewing dynamics. This results in a total of 310 million views every day over a period of 9 weeks. Next, we present large-scale measurements that connect the structure of the recommendation network and the video attention dynamics. We use the bow-tie structure to characterize the Vevo network and we find that its core component (23.1\% of the videos), which occupies most of the attention (82.6\% of the views), is made out of videos that are mainly recommended among themselves. This is indicative of the links between video recommendation and the inequality of attention allocation. Finally, we address the task of estimating the attention flow in the video recommendation network. We propose a model that accounts for the network effects for predicting video popularity, and we show it consistently outperforms the baselines. This model also identifies a group of artists gaining attention because of the recommendation network. Altogether, our observations and our models provide a new set of tools to better understand the impacts of recommender systems on collective social attention.}, + issue = {CSCW}, + keywords = {empirical measurement,network effects,online attention,popularity prediction,recommender system,youtube}, + file = {/home/nathante/Zotero/storage/QEZJWR7U/Wu et al_2019_Estimating Attention Flow in Online Video Networks.pdf} +} + +@article{xigen_li_factors_2011, + title = {Factors Influencing the Willingness to Contribute Information to Online Communities}, + author = {{Xigen Li}}, + date = {2011-03}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {13}, + number = {2}, + pages = {279--296}, + issn = {1461-4448, 1461-7315}, + abstract = {This study examines the factors that influence the willingness to contribute information to online communities from the perspectives of the discretionary database and expectancy theory. The study identified four groups of variables and tested their predictive value on the willingness to contribute information to online communities. The findings confirmed the effect of the perceived value of contributing and the likelihood of getting a reward for the willingness to contribute. Cost of contribution was not a significant predictor of the willingness to contribute information. Benefit from, and interest in, the community were significant predictors, but community affinity was not. Among the four groups of variables, social approval was the strongest predictor of the willingness to contribute.}, + langid = {english} +} + +@article{xu_evolution_2021, + title = {Evolution of Audience Duplication Networks among Social Networking Sites: {{Exploring}} the Influences of Preferential Attachment, Audience Size, and Niche Width}, + shorttitle = {Evolution of Audience Duplication Networks among Social Networking Sites}, + author = {Xu, Yu}, + date = {2021-02-15}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {1461444821993048}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study examines the evolution of social networking sites (SNSs) from a networked audience duplication perspective. Guided by social network theory, the theory of double jeopardy, and niche theory, this study proposes an integrated framework to explain the evolution of SNS choices of the US audience between 2016 and 2019. Shared traffic data were retrieved from comScore’s Media Metrix Multi-Platform database. The empirical results of the separable temporal exponential random graph model (STERGM) confirm that preferential attachment, audience size, and niche width significantly drive the likelihood of tie formation and dissolution in the evolving audience duplication network. These effects hold true even when other endogenous structural features and exogenous nodal attributes are taken into account. Theoretical implications for the networked media landscape are discussed.}, + langid = {english}, + keywords = {Audience duplication,evolution,network analysis,organizational ecology,social media}, + file = {/home/nathante/Zotero/storage/94TAHIW3/Xu - 2021 - Evolution of audience duplication networks among s.pdf} +} + +@article{zhang_configuring_2020, + title = {Configuring {{Audiences}}: {{A Case Study}} of {{Email Communication}}}, + shorttitle = {Configuring {{Audiences}}}, + author = {Zhang, Justine and Pennebaker, James and Dumais, Susan and Horvitz, Eric}, + date = {2020-05-28}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {4}, + pages = {062:1--062:26}, + abstract = {When people communicate with each other, their choice of what to say is tied to their perceptions of the audience. For many communication channels, people have some ability to explicitly specify their audience members and the different roles they can play. While existing accounts of communication behavior have largely focused on how people tailor the content of their messages, we focus on the configuring of the audience as a complementary family of decisions in communication. We formulate a general description of audience configuration choices, highlighting key aspects of the audience that people could configure to reflect a range of communicative goals. We then illustrate these ideas via a case study of email usage-a realistic domain where audience configuration choices are particularly fine-grained and explicit in how email senders fill the To and Cc address fields. In a large collection of enterprise emails, we explore how people configure their audiences, finding salient patterns relating a sender's choice of configuration to the types of participants in the email exchange, the content of the message, and the nature of the subsequent interactions. Our formulation and findings show how analyzing audience configurations can enrich and extend existing accounts of communication behavior, and frame research directions on audience configuration decisions in communication and collaboration.}, + issue = {CSCW1}, + keywords = {audience,email,social interaction} +} + +@article{zhang_group_2011, + title = {Group Size and Incentives to Contribute: A Natural Experiment at Chinese Wikipedia}, + shorttitle = {Group Size and Incentives to Contribute}, + author = {Zhang, Xiaoquan Michael and Zhu, Feng}, + date = {2011-06}, + journaltitle = {American Economic Review}, + volume = {101}, + number = {4}, + pages = {1601--1615}, + issn = {0002-8282}, + abstract = {The literature on the private provision of public goods suggests an inverse relationship between incentives to contribute and group size. We find, however, that after an exogenous reduction of group size at Chinese Wikipedia, the nonblocked contributors decrease their contributions by 42.8 percent on average. We attribute the cause to social effects: contributors receive social benefits that increase with both the amount of their contributions and group size, and the shrinking group size weakens these social benefits. Consistent with our explanation, we find that the more contributors value social benefits, the more they reduce their contributions after the block. (JEL H41, L17, L82)}, + langid = {english}, + keywords = {Media,Public Goods; Open Source Products and Markets; Entertainment}, + file = {/home/nathante/Zotero/storage/63JBCUER/Zhang and Zhu - 2011 - Group Size and Incentives to Contribute A Natural.pdf;/home/nathante/Zotero/storage/BWMQ96PV/articles.html} +} + +@article{zhang_understanding_2021, + title = {Understanding the {{Diverging User Trajectories}} in {{Highly-Related Online Communities During}} the {{Covid-19 Pandemic}}}, + author = {Zhang, Jason Shuo and Keegan, Brian and Lv, Qin and Tan, Chenhao}, + date = {2021}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {5}, + eprint = {2006.04816}, + eprinttype = {arxiv}, + pages = {12}, + abstract = {As the COVID-19 pandemic is disrupting life worldwide, related online communities are popping up. In particular, two “new” communities, /r/China flu and /r/Coronavirus, emerged on Reddit and have been dedicated to COVIDrelated discussions from the very beginning of this pandemic. With /r/Coronavirus promoted as the official community on Reddit, it remains an open question how users choose between these two highly-related communities. In this paper, we characterize user trajectories in these two communities from the beginning of COVID-19 to the end of September 2020. We show that new users of /r/China flu and /r/Coronavirus were similar from January to March. After that, their differences steadily increase, evidenced by both language distance and membership prediction, as the pandemic continues to unfold. Furthermore, users who started at /r/China flu from January to March were more likely to leave, while those who started in later months tend to remain highly “loyal”. To understand this difference, we develop a movement analysis framework to understand membership changes in these two communities and identify a significant proportion of /r/China flu members (around 50\%) that moved to /r/Coronavirus in February. This movement turns out to be highly predictable based on other subreddits that users were previously active in. Our work demonstrates how two highly related communities emerge and develop their own identity in a crisis, and highlights the important role of existing communities in understanding such an emergence.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computers and Society,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3HZBRY3S/Zhang et al. - Understanding the Diverging User Trajectories in H.pdf;/home/nathante/Zotero/storage/V3QR9ASE/Zhang et al. - 2021 - Understanding the Diverging User Trajectories in H.pdf} +} + +@incollection{zhao_social_2016, + title = {The {{Social Media Ecology}}: {{User Perceptions}}, {{Strategies}} and {{Challenges}}}, + shorttitle = {The {{Social Media Ecology}}}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhao, Xuan and Lampe, Cliff and Ellison, Nicole B.}, + date = {2016-05-07}, + pages = {89--100}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Many existing studies of social media focus on only one platform, but the reality of users' lived experiences is that most users incorporate multiple platforms into their communication practices in order to access the people and networks they desire to influence. In order to better understand how people make sharing decisions across multiple sites, we asked our participants (N=29) to categorize all modes of communication they used, with the goal of surfacing their mental models about managing sharing across platforms. Our interview data suggest that people simultaneously consider "audience" and "content" when sharing and these needs sometimes compete with one another; that they have the strong desire to both maintain boundaries between platforms as well as allowing content and audience to permeate across these boundaries; and that they strive to stabilize their own communication ecosystem yet need to respond to changes necessitated by the emergence of new tools, practices, and contacts. We unpack the implications of these tensions and suggest future design possibilities.}, + isbn = {978-1-4503-3362-7}, + keywords = {boundary management,content sharing,media ecology,social media}, + file = {/home/nathante/Zotero/storage/44Z9658S/Zhao et al_2016_The Social Media Ecology.pdf} +} + +@inproceedings{zhu_impact_2014, + title = {The Impact of Membership Overlap on the Survival of Online Communities}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Kraut, Robert E. and Kittur, Aniket}, + date = {2014-04-26}, + series = {{{CHI}} '14}, + pages = {281--290}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {If the people belong to multiple online communities, their joint membership can influence the survival of each of the communities to which they belong. Communities with many joint memberships may struggle to get enough of their members' time and attention, but find it easy to import best practices from other communities. In this paper, we study the effects of membership overlap on the survival of online communities. By analyzing the historical data of 5673 Wikia communities, we find that higher levels of membership overlap are positively associated with higher survival rates of online communities. Furthermore, we find that it is beneficial for young communities to have shared members who play a central role in other mature communities. Our contributions are two-fold. Theoretically, by examining the impact of membership overlap on the survival of online communities we identified an important mechanism underlying the success of online communities. Practically, our findings may guide community creators on how to effectively manage their members, and tool designers on how to support this task.}, + isbn = {978-1-4503-2473-1}, + keywords = {membership overlap,online communities,survival analysis}, + file = {/home/nathante/Zotero/storage/GV2D7ZKS/Zhu et al. - 2014 - The Impact of Membership Overlap on the Survival o.pdf;/home/nathante/Zotero/storage/IY4RTSGD/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf;/home/nathante/Zotero/storage/JZE5JGAZ/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf} +} + +@inproceedings{zhu_selecting_2014, + title = {Selecting an Effective Niche: {{An}} Ecological View of the Success of Online Communities}, + shorttitle = {Selecting an Effective Niche}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Chen, Jilin and Matthews, Tara and Pal, Aditya and Badenes, Hernan and Kraut, Robert E.}, + date = {2014}, + series = {{{CHI}} '14}, + pages = {301--310}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Online communities serve various important functions, but many fail to thrive. Research on community success has traditionally focused on internal factors. In contrast, we take an ecological view to understand how the success of a community is influenced by other communities. We measured a community's relationship with other communities - its "niche" - through four dimensions: topic overlap, shared members, content linking, and shared offline organizational affiliation. We used a mixed-method approach, combining the quantitative analysis of 9495 online enterprise communities and interviews with community members. Our results show that too little or too much overlap in topic with other communities causes a community's activity to suffer. We also show that this main result is moderated in predictable ways by whether the community shares members with, links to content in, or shares an organizational affiliation with other communities. These findings provide new insight on community success, guiding online community designers on how to effectively position their community in relation to others.}, + isbn = {978-1-4503-2473-1}, + venue = {Toronto, Ontario, Canada}, + keywords = {online communities,success,topic overlap,workplace}, + file = {/home/nathante/Zotero/storage/FNS9RSWC/Zhu et al. - 2014 - Selecting an Effective Niche An Ecological View o.pdf;/home/nathante/Zotero/storage/KIHWVKUQ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf;/home/nathante/Zotero/storage/RFMX2CBJ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf} +} + + diff --git a/dissertations/nathante_uw_2021/figures/GN_session_device_plot-1.pdf b/dissertations/nathante_uw_2021/figures/GN_session_device_plot-1.pdf new file mode 100644 index 0000000..49c630e Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/GN_session_device_plot-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/calibration-1.pdf b/dissertations/nathante_uw_2021/figures/calibration-1.pdf new file mode 100644 index 0000000..399f280 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/calibration-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/cod_graphviz.pdf b/dissertations/nathante_uw_2021/figures/cod_graphviz.pdf new file mode 100644 index 0000000..14a24f1 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/cod_graphviz.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/fig_spacing-1.pdf b/dissertations/nathante_uw_2021/figures/fig_spacing-1.pdf new file mode 100644 index 0000000..a87d81e Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/fig_spacing-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/hazardplot-1.pdf b/dissertations/nathante_uw_2021/figures/hazardplot-1.pdf new file mode 100644 index 0000000..45f1d5f Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/hazardplot-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/histograms_1-1.pdf b/dissertations/nathante_uw_2021/figures/histograms_1-1.pdf new file mode 100644 index 0000000..24c5a36 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/histograms_1-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/kernelplots-1.pdf b/dissertations/nathante_uw_2021/figures/kernelplots-1.pdf new file mode 100644 index 0000000..4e11b02 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/kernelplots-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-adoption_me_plot-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-adoption_me_plot-1.pdf new file mode 100644 index 0000000..5ad5035 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-adoption_me_plot-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-fig_densityxgrowth-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-fig_densityxgrowth-1.pdf new file mode 100644 index 0000000..304b891 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-fig_densityxgrowth-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-h1_unreg_me_plot-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-h1_unreg_me_plot-1.pdf new file mode 100644 index 0000000..88304d5 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-h1_unreg_me_plot-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-h1_userpage_me_plot-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-h1_userpage_me_plot-1.pdf new file mode 100644 index 0000000..a16938c Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-h1_userpage_me_plot-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-me_plot_H2_anon-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-me_plot_H2_anon-1.pdf new file mode 100644 index 0000000..0ba6c6c Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-me_plot_H2_anon-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-me_plot_H2_no_user_page-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-me_plot_H2_no_user_page-1.pdf new file mode 100644 index 0000000..f70b397 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-me_plot_H2_no_user_page-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-plot_commense_x_abs_commense-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-plot_commense_x_abs_commense-1.pdf new file mode 100644 index 0000000..95f7bb5 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-plot_commense_x_abs_commense-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-regplot_H1_anon-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-regplot_H1_anon-1.pdf new file mode 100644 index 0000000..8a9b311 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-regplot_H1_anon-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-regplot_H3_anon-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-regplot_H3_anon-1.pdf new file mode 100644 index 0000000..0693d2e Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-regplot_H3_anon-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-regplot_controversial_anon-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-regplot_controversial_anon-1.pdf new file mode 100644 index 0000000..e692f26 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-regplot_controversial_anon-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/knitr-regplot_controversial_no_user_page-1.pdf b/dissertations/nathante_uw_2021/figures/knitr-regplot_controversial_no_user_page-1.pdf new file mode 100644 index 0000000..40f0143 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/knitr-regplot_controversial_no_user_page-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/mental_graphviz.pdf b/dissertations/nathante_uw_2021/figures/mental_graphviz.pdf new file mode 100644 index 0000000..238c1ff Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/mental_graphviz.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/model1aplot-1.pdf b/dissertations/nathante_uw_2021/figures/model1aplot-1.pdf new file mode 100644 index 0000000..10b482f Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/model1aplot-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/rcfilters_example_2.png b/dissertations/nathante_uw_2021/figures/rcfilters_example_2.png new file mode 100644 index 0000000..d53d00a Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/rcfilters_example_2.png differ diff --git a/dissertations/nathante_uw_2021/figures/realestate_graphviz.pdf b/dissertations/nathante_uw_2021/figures/realestate_graphviz.pdf new file mode 100644 index 0000000..56b9f1a Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/realestate_graphviz.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/score_correlation-1.pdf b/dissertations/nathante_uw_2021/figures/score_correlation-1.pdf new file mode 100644 index 0000000..d1bb0ca Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/score_correlation-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/uncertainty-1.pdf b/dissertations/nathante_uw_2021/figures/uncertainty-1.pdf new file mode 100644 index 0000000..77448c0 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/uncertainty-1.pdf differ diff --git a/dissertations/nathante_uw_2021/figures/watches_graphviz.pdf b/dissertations/nathante_uw_2021/figures/watches_graphviz.pdf new file mode 100644 index 0000000..b62f365 Binary files /dev/null and b/dissertations/nathante_uw_2021/figures/watches_graphviz.pdf differ diff --git a/dissertations/nathante_uw_2021/frontmatter.pdf b/dissertations/nathante_uw_2021/frontmatter.pdf new file mode 100644 index 0000000..5a371d7 Binary files /dev/null and b/dissertations/nathante_uw_2021/frontmatter.pdf differ diff --git a/dissertations/nathante_uw_2021/ores_fairness.bib b/dissertations/nathante_uw_2021/ores_fairness.bib new file mode 100644 index 0000000..159e0e6 --- /dev/null +++ b/dissertations/nathante_uw_2021/ores_fairness.bib @@ -0,0 +1,4039 @@ + +@article{adams_who_2019, + title = {Who Counts as a Notable Sociologist on {{Wikipedia}}? {{Gender}}, Race, and the ``Professor Test''}, + shorttitle = {Who {{Counts}} as a {{Notable Sociologist}} on {{Wikipedia}}?}, + author = {Adams, Julia and Br{\"u}ckner, Hannah and Naslund, Cambria}, + year = {2019}, + month = jan, + volume = {5}, + pages = {2378023118823946}, + issn = {2378-0231}, + abstract = {This paper documents and estimates the extent of underrepresentation of women and people of color on the pages of Wikipedia devoted to contemporary American sociologists. In contrast to the demographic diversity of the discipline, sociologists represented on Wikipedia are largely white men. The gender and racial/ethnic gaps in likelihood of representation have exhibited little change over time. Using novel data, we estimate the ``risk'' of having a Wikipedia page for a sample of contemporary sociologists. We show that the observed differences (in academic rank, length of career, and notability measured with both H-index and departmental reputation) between men and women sociologists and whites and nonwhites, respectively, explain only about half of the differences in the likelihood of being represented on Wikipedia. The article also enumerates both supply- and demand-side mechanisms that may account for these continuing gaps in representation.}, + file = {/home/nathante/Zotero/storage/PUURCFNU/Adams et al_2019_Who counts as a notable sociologist on Wikipedia.pdf}, + journal = {Socius}, + language = {en} +} + +@inproceedings{adler_content-driven_2007, + title = {A Content-Driven Reputation System for the Wikipedia}, + booktitle = {Proceedings of the 16th International Conference on {{World Wide Web}}}, + author = {Adler, B. Thomas and {de Alfaro}, Luca}, + year = {2007}, + month = may, + pages = {261--270}, + publisher = {{Association for Computing Machinery}}, + address = {{Banff, Alberta, Canada}}, + abstract = {We present a content-driven reputation system for Wikipedia authors. In our system, authors gain reputation when the edits they perform to Wikipedia articles are preserved by subsequent authors, and they lose reputation when their edits are rolled back or undone in short order. Thus, author reputation is computed solely on the basis of content evolution; user-to-user comments or ratings are not used. The author reputation we compute could be used to flag new contributions from low-reputation authors, or it could be used to allow only authors with high reputation to contribute to controversialor critical pages. A reputation system for the Wikipedia could also provide an incentive for high-quality contributions. We have implemented the proposed system, and we have used it to analyze the entire Italian and French Wikipedias, consisting of a total of 691, 551 pages and 5, 587, 523 revisions. Our results show that our notion of reputation has good predictive value: changes performed by low-reputation authors have a significantly larger than average probability of having poor quality, as judged by human observers, and of being later undone, as measured by our algorithms.}, + file = {/home/nathante/Zotero/storage/LYFCLBL6/Adler_de Alfaro_2007_A content-driven reputation system for the wikipedia.pdf}, + isbn = {978-1-59593-654-7}, + keywords = {reputation,user-generated content,Wikipedia}, + series = {{{WWW}} '07} +} + +@incollection{allison_convergence_2004, + ids = {allison\_360-2008:\_2008}, + title = {Convergence {{Problems}} in {{Logistic Regression}}}, + booktitle = {Numerical {{Issues}} in {{Statistical Computing}} for the {{Social Scientist}}}, + author = {Allison, Paul}, + year = {2004}, + pages = {238--252}, + publisher = {{John Wiley \& Sons, Ltd}}, + abstract = {Chapter ten shows how logistic regression models can produce inaccurate estimates or fail to converge altogether because of numerical problems. The chapter then provides methods to detect false convergence, and to make accurate estimation of logistic regressions. Topics include: maximum likelihood estimation of logistic regression computational problems arising from complete separation and quasi-complete separation a comparison of the ability of popular statistical packages to detect and report false convergence methods for avoiding quasi-separation: omitting variables, combing categories methods for robust and accurate logistic regression: exact inference, Bayesian estimation, penalized maximum likelihood estimation}, + copyright = {Copyright \textcopyright{} 2004 John Wiley \& Sons, Inc.}, + file = {/home/nathante/Zotero/storage/UXJNFJE3/Allison - 2008 - 360-2008 Convergence Failures in Logistic Regress.pdf;/home/nathante/Zotero/storage/7QH5HL9T/0471475769.html}, + isbn = {978-0-471-47576-7}, + keywords = {complete separation,exact inference,false convergence,logistic regression,logit,maximum likelihood estimation,MLE,Newton–Raphson,penalized maximum likelihood estimation,quasi-complete separation,sociological methods}, + language = {en} +} + +@inproceedings{amershi_guidelines_2019, + title = {Guidelines for {{Human}}-{{AI Interaction}}}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} - {{CHI}} '19}, + author = {Amershi, Saleema and Inkpen, Kori and Teevan, Jaime and {Kikin-Gil}, Ruth and Horvitz, Eric and Weld, Dan and Vorvoreanu, Mihaela and Fourney, Adam and Nushi, Besmira and Collisson, Penny and Suh, Jina and Iqbal, Shamsi and Bennett, Paul N.}, + year = {2019}, + pages = {1--13}, + publisher = {{ACM Press}}, + address = {{Glasgow, Scotland Uk}}, + abstract = {Advances in artifcial intelligence (AI) frame opportunities and challenges for user interface design. Principles for humanAI interaction have been discussed in the human-computer interaction community for over two decades, but more study and innovation are needed in light of advances in AI and the growing uses of AI technologies in human-facing applications. We propose 18 generally applicable design guidelines for human-AI interaction. These guidelines are validated through multiple rounds of evaluation including a user study with 49 design practitioners who tested the guidelines against 20 popular AI-infused products. The results verify the relevance of the guidelines over a spectrum of interaction scenarios and reveal gaps in our knowledge, highlighting opportunities for further research. Based on the evaluations, we believe the set of design guidelines can serve as a resource to practitioners working on the design of applications and features that harness AI technologies, and to researchers interested in the further development of guidelines for human-AI interaction design.}, + file = {/home/nathante/Zotero/storage/IMKIUJ8A/Amershi et al. - 2019 - Guidelines for Human-AI Interaction.pdf}, + isbn = {978-1-4503-5970-2}, + language = {en} +} + +@article{anthony_reputation_2009, + title = {Reputation and {{Reliability}} in {{Collective Goods}}: {{The Case}} of the {{Online Encyclopedia Wikipedia}}}, + shorttitle = {Reputation and {{Reliability}} in {{Collective Goods}}}, + author = {Anthony, Denise and Smith, Sean W. and Williamson, Timothy}, + year = {2009}, + month = aug, + volume = {21}, + pages = {283--306}, + issn = {1043-4631, 1461-7358}, + abstract = {An important organizational innovation enabled by the revolution in information technologies is `open source' production which converts private commodities into essentially public goods. Similar to other public goods, incentives for reputation and group identity appear to motivate contributions to open source projects, overcoming the social dilemma inherent in producing such goods. In this paper we examine how contributor motivations affect the type of contributions made to the open source online encyclopedia Wikipedia. As expected, we find that registered participants, motivated by reputation and commitment to the Wikipedia community, make many contributions with high reliability. Surprisingly, however, we find the highest reliability from the vast numbers of anonymous `Good Samaritans' who contribute only once. Our findings of high reliability in the contributions of both Good Samaritans and committed `zealots' suggest that open source production succeeds by altering the scope of production such that a critical mass of contributors can participate.}, + file = {/home/nathante/Zotero/storage/JAXBG4WL/Anthony et al. - 2009 - Reputation and Reliability in Collective Goods Th.pdf}, + journal = {Rationality and Society}, + language = {en}, + number = {3} +} + +@inproceedings{antin_technology-mediated_2012, + title = {Technology-Mediated Contributions: {{Editing}} Behaviors among New {{Wikipedians}}}, + shorttitle = {Technology-Mediated Contributions}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Antin, Judd and Cheshire, Coye and Nov, Oded}, + year = {2012}, + pages = {373--382}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {The power-law distribution of participation characterizes a wide variety of technology-mediated social participation (TMSP) systems, and Wikipedia is no exception. A minority of active contributors does most of the work. While the existence of a core of highly active contributors is well documented, how those individuals came to be so active is less well understood. In this study we extend prior research on TMSP and Wikipedia by examining in detail the characteristics of the revisions that new contributors make. In particular we focus on new users who maintain a minimum level of sustained activity during their first six months. We use content analysis of individual revisions as well as other quantitative techniques to examine three research questions regarding the effect of early diversification of activity, nature vs. nurture, and associations with later administrative and organizational activity. We present analyses that address each of these questions, and conclude with implications for our understanding of the progression of participation on Wikipedia and other TMSP systems.}, + file = {/home/nathante/Zotero/storage/DB5RI9R9/Antin et al. - 2012 - Technology-mediated contributions Editing behavio.pdf}, + isbn = {978-1-4503-1086-4}, + keywords = {legitimate peripheral participation.,wiki-work,wikipedia}, + series = {{{CSCW}} '12} +} + +@inproceedings{arazy_how_2017, + title = {On the "{{How}}" and "{{Why}}" of {{Emergent Role Behaviors}} in {{Wikipedia}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Arazy, Ofer and {Liifshitz-Assaf}, Hila and Nov, Oded and Daxenberger, Johannes and Balestra, Martina and Cheshire, Coye}, + year = {2017}, + pages = {2039--2051}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Research on peer-production suggests that as participants choose what actions to perform, prototypical activity patterns emerge. Recent work characterized these patterns and demonstrated that informal emergent roles are highly stable. Nonetheless, we know little about the ways in which contributors take on and shed emergent roles. The objectives of this study are to: (a) delineate the temporal dynamics of participants' emergent role taking behaviors, and (b) identify the motivations driving role-transition behaviors. Our study links motivation to role-transition behaviors within Wikipedia. Our first sample covered eleven years and 222,119 contributors, and was used to identify four categories of temporal role-taking behaviors, that differ in their mobility between emergent roles and across Wikipedia articles. Our second examination linked the motivations of 175 new participants to their subsequent role-taking activity over 14 months. Together, the two analyses reveal that role-taking categories can be distinguished based on participants' motivational orientation (intrinsic/extrinsic and self/others-oriented).}, + file = {/home/nathante/Zotero/storage/DF4QKZ79/Arazy et al_2017_On the How and Why of Emergent Role Behaviors in Wikipedia.pdf}, + isbn = {978-1-4503-4335-0}, + keywords = {emergent roles,motivation,online production communities,role mobility,role-taking,wikipedia}, + series = {{{CSCW}} '17} +} + +@book{ayers_how_2008, + title = {How {{Wikipedia Works}}}, + author = {Ayers, Phoebe and Matthews, Charles and Yates, Ben}, + year = {2008}, + publisher = {{No Starch Press}}, + abstract = {"We cover Wikipedia from soup to nuts: for readers trying to understand what's in Wikipedia, how and why it got there, and how to analyze the quality of the content you might find on the site; for current and future editors, from basic editing techniques and wikisyntax to not-so-basic information on complicated syntax, referencing and researching content, and editing collaboratively and harmoniously; and finally for anyone interested in how Wikipedia's vibrant and complicated community comes together to produce content, resolve disputes, and keep the site running. Finally, we touch on the wider world of Wikipedias in other languages, other Wikimedia projects, and the Wikimedia Foundation itself. We close with appendices about reusing Wikipedia content according to the terms of the GFDL license, and thoughts on using Wikipedia in a classroom setting. "Throughout, we provide community consensus viewpoints and our own thoughts on a common-sense approach to using and participating in Wikipedia, and a selection of carefully-chosen links to the thousands of pages of documentation, help and Wikipedia-space pages that we discuss -- not to mention a sprinkling of humor. In every discussion, we try to provide a sense of the community that supports and is at the heart of the Wikipedia project and mission." -- Phoebe Ayers,}, + collaborator = {{Phoebe Ayers; Charles Matthews; Ben Yates}}, + copyright = {Copyright (C) 2008 by Phoebe Ayers, Charles Matthews, and Ben Yates Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License, Version 1.2 or any later version published by the Free Software Foundation; with no Invariant Sections, no Front-Cover Texts, and with the Back-Cover Texts being "How Wikipedia Works", by Phoebe Ayers, Charles Matthews, and Ben Yates, published by No Starch Press. A copy of the license is included in the section entitled "GNU Free Documentation License".}, + file = {/home/nathante/Zotero/storage/UGD6S4C3/HowWikipediaWorks%2FHowWikipediaWorks.epub}, + keywords = {documentation,encyclopedias,Mediawiki,Social media,User-generated content,Wikimedia,Wikipedia,Wikipedia--Handbooks; manuals; etc.}, + language = {English} +} + +@article{bannon_cscw:_1989, + title = {{{CSCW}}: {{Four Characters}} in {{Search}} of a {{Context}}}, + shorttitle = {{{CSCW}}}, + author = {Bannon, Liam J. and Schmidt, Kjeld}, + year = {1989}, + abstract = {The title of this paper was chosen to highlight the fact that the label CSCW, although widely adopted as the acronym for the field of Computer Supported Cooperative Work, has been applied to computer applications of very different ilk. It is not at all clear what are the unique identifying elements of this research area. This paper provides a framework for approaching the issue of cooperative work and its possible computer support. The core issues are identified and prospects for the field are outlined.}, + file = {/home/nathante/Zotero/storage/VE4VKHB4/Bannon and Schmidt - 1989 - CSCW Four Characters in Search of a Context.pdf;/home/nathante/Zotero/storage/MX2QBNT4/2602.html}, + language = {en} +} + +@article{barabas_interventions_nodate, + title = {Interventions over {{Predictions}}: {{Reframing}} the {{Ethical Debate}} for {{Actuarial Risk Assessment}}}, + author = {Barabas, Chelsea and Dinakar, Karthik and Ito, Joichi and Virza, Madars and Zittrain, Jonathan}, + pages = {15}, + abstract = {Actuarial risk assessments might be unduly perceived as a neutral way to counteract implicit bias and increase the fairness of decisions made at almost every juncture of the criminal justice system, from pretrial release to sentencing, parole and probation. In recent times these assessments have come under increased scrutiny, as critics claim that the statistical techniques underlying them might reproduce existing patterns of discrimination and historical biases that are reflected in the data. Much of this debate is centered around competing notions of fairness and predictive accuracy, resting on the contested use of variables that act as ``proxies'' for characteristics legally protected against discrimination, such as race and gender.}, + file = {/home/nathante/Zotero/storage/8ZALXLJ6/Barabas et al. - Interventions over Predictions Reframing the Ethi.pdf}, + language = {en} +} + +@article{barker_tightening_1993, + title = {Tightening the {{Iron Cage}}: {{Concertive Control}} in {{Self}}-{{Managing Teams}}}, + shorttitle = {Tightening the {{Iron Cage}}}, + author = {Barker, James R.}, + year = {1993}, + volume = {38}, + pages = {408--437}, + issn = {0001-8392}, + abstract = {In this paper, I provide an ethnographic account of how an organization's control system evolved in response to a managerial change from hierarchical, bureaucratic control to concertive control in the form of self-managing teams. The study investigates how the organization's members developed a system of value-based normative rules that controlled their actions more powerfully and completely than the former system. I describe the organization and its members and provide a detailed account of the dynamics that emerged as concertive control became manifest through the members' interactions. This account depicts how concertive control evolved from the value consensus of the company's team workers to a system of normative rules that became increasingly rationalized. Contrary to some proponents of such systems, concertive control did not free these workers from Weber's iron cage of rational control. Instead, the concertive system, as it became manifest in this case, appeared to draw the iron cage tighter and to constrain the organization's members more powerfully.}, + file = {/home/nathante/Zotero/storage/WEUGEKWL/Barker - 1993 - Tightening the Iron Cage Concertive Control in Se.pdf}, + journal = {Administrative Science Quarterly}, + number = {3} +} + +@article{barley_technicians_1996, + title = {Technicians in the {{Workplace}}: {{Ethnographic Evidence}} for {{Bringing Work}} into {{Organizational Studies}}}, + shorttitle = {Technicians in the {{Workplace}}}, + author = {Barley, Stephen R.}, + year = {1996}, + volume = {41}, + pages = {404--441}, + issn = {0001-8392}, + abstract = {[This paper lays the groundwork for new models of work and relations of production that reflect changes in the division of labor and occupational structure of a postindustrial economy. It demonstrates how new ideal-typical occupations can be constructed, drawing on a set of ethnographies to propose an empirically grounded model of technicians' work. The paper focuses on two questions: What do technicians do and what do they know? The answers constitute a first cut at the ideal type, technician. The paper then turns to evidence of the difficulties that arise when organizations employ technicians but fail to appreciate the nature of their work. It closes by showing how a contextually derived model of technicians' work enables us to evaluate why some recent trends in organizing are congruent with an increasingly technical workforce, why others may be misguided, and why organizations are likely to face challenges that organizational theorists have but vaguely anticipated. The paper shows that the emergence of technicians' work may signify a shift to a more horizontal division of substantive expertise that undermines the logic of vertical organizing on which most organizational theory and practice still rests.]}, + journal = {Administrative Science Quarterly}, + number = {3} +} + +@article{barley_technology_1986, + title = {Technology as an Occasion for Structuring: Evidence from Observations of Ct Scanners and the Social Order of Radiology Departments}, + shorttitle = {Technology as an {{Occasion}} for {{Structuring}}}, + author = {Barley, Stephen R.}, + year = {1986}, + volume = {31}, + pages = {78--108}, + issn = {00018392}, + abstract = {New medical imaging devices, such as the CT scanner, have begun to challenge traditional role relations among radiologists and radiological technologists. Under some conditions, these technologies may actually alter the organizational and occupational structure of radiological work. However, current theories of technology and organizational form are insensitive to the potential number of structural variations implicit in role-based change. This paper expands recent sociological thought on the link between institution and action to outline a theory of how technology might occasion different organizational structures by altering institutionalized roles and patterns of interaction. In so doing, technology is treated as a social rather than a physical object, and structure is conceptualized as a process rather than an entity. The implications of the theory are illustrated by showing how identical CT scanners occasioned similar structuring processes in two radiology departments and yet led to divergent forms of organization. The data suggest that to understand how technologies alter organizational structures researchers may need to integrate the study of social action and the study of social form.}, + file = {/home/nathante/Zotero/storage/UDB2R3EG/Barley - 1986 - Technology as an occasion for structuring evidenc.pdf}, + journal = {Administrative Science Quarterly}, + keywords = {Organization Behavior,Sociology}, + number = {1} +} + +@article{barocas_big_2014, + title = {Big Data's End Run around Procedural Privacy Protections}, + author = {Barocas, Solon and Nissenbaum, Helen}, + year = {2014}, + month = oct, + volume = {57}, + pages = {31--33}, + issn = {00010782}, + file = {/home/nathante/Zotero/storage/PN4AQ3XF/Barocas and Nissenbaum - 2014 - Big data's end run around procedural privacy prote.pdf}, + journal = {Communications of the ACM}, + language = {en}, + number = {11} +} + +@article{barocas_big_2016, + title = {Big {{Data}}'s {{Disparate Impact}}}, + author = {Barocas, Solon and Selbst, Andrew D.}, + year = {2016}, + volume = {104}, + pages = {671--732}, + file = {/home/nathante/Zotero/storage/AZLDDAIA/Barocas and Selbst - 2016 - Big Data's Disparate Impact Essay.pdf}, + journal = {California Law Review}, + language = {eng} +} + +@book{barocas_fairness_2019, + title = {Fairness in {{Machine Learning}}}, + author = {Barocas, Solon and Hardt, Moritz and Narayanan, Arvind}, + year = {2019}, + publisher = {{fairmlbook.org}}, + file = {/home/nathante/Zotero/storage/9XHXWIZ8/Barocas et al. - Fairness in Machine Learning.pdf}, + language = {en} +} + +@book{becker_economics_1957, + title = {The Economics of Discrimination}, + author = {Becker, Gary Stanley}, + year = {1957}, + publisher = {{University of Chicago Press}}, + address = {{Chicago}}, + annotation = {OCLC: 859759499}, + language = {English} +} + +@article{benkler_coases_2002, + title = {Coase's {{Penguin}}, or, {{Linux}} and "{{The Nature}} of the {{Firm}}"}, + author = {Benkler, Yochai}, + year = {2002}, + month = dec, + volume = {112}, + pages = {369}, + issn = {00440094}, + file = {/home/nathante/Zotero/storage/XGFU7JWC/Benkler - 2002 - Coase's Penguin, or, Linux and The Nature of the .pdf;/home/nathante/Zotero/storage/77PAWR2Q/369_yochai_benkler.html}, + journal = {The Yale Law Journal}, + keywords = {Advantages,Economics,FOSS,Internet,Law,Legal Studies,Open source software,Production cooperatives,Socioeconomic factors}, + number = {3} +} + +@article{benkler_commons-based_2006, + title = {Commons-Based {{Peer Production}} and {{Virtue}}*}, + author = {Benkler, Yochai and Nissenbaum, Helen}, + year = {2006}, + volume = {14}, + pages = {394--419}, + issn = {1467-9760}, + file = {/home/nathante/Zotero/storage/FIHNZXC6/Benkler and Nissenbaum - 2006 - Commons-based Peer Production and Virtue.pdf;/home/nathante/Zotero/storage/DC3LX49A/j.1467-9760.2006.00235.html}, + journal = {Journal of Political Philosophy}, + language = {en}, + number = {4} +} + +@article{benkler_dont_2019, + title = {Don't Let Industry Write the Rules for {{AI}}}, + author = {Benkler, Yochai}, + year = {2019}, + month = may, + volume = {569}, + pages = {161}, + abstract = {Technology companies are running a campaign to bend research and regulation for their benefit; society must fight back, says Yochai Benkler.}, + copyright = {2019 Nature}, + file = {/home/nathante/Zotero/storage/7U2H83G8/Benkler - 2019 - Don’t let industry write the rules for AI.pdf}, + journal = {Nature}, + language = {EN} +} + +@article{berger_status_1980, + title = {Status {{Organizing Processes}}}, + author = {Berger, Joseph and Rosenholtz, Susan J. and Zelditch, Morris}, + year = {1980}, + month = aug, + volume = {6}, + pages = {479--508}, + issn = {0360-0572, 1545-2115}, + file = {/home/nathante/Zotero/storage/MECIVFNT/Berger et al. - 1980 - Status Organizing Processes.pdf}, + journal = {Annual Review of Sociology}, + language = {en}, + number = {1} +} + +@article{berk_impact_2017, + title = {An Impact Assessment of Machine Learning Risk Forecasts on Parole Board Decisions and Recidivism}, + author = {Berk, Richard}, + year = {2017}, + month = jun, + volume = {13}, + pages = {193--216}, + issn = {1572-8315}, + abstract = {Objectives:The Pennsylvania Board of Probation and Parole has begun using machine learning forecasts to help inform parole release decisions. In this paper, we evaluate the impact of the forecasts on those decisions and subsequent recidivism. Methods:A close approximation to a natural, randomized experiment is used to evaluate the impact of the forecasts on parole release decisions. A generalized regression discontinuity design is used to evaluate the impact of the forecasts on recidivism. Results:The forecasts apparently had no effect on the overall parole release rate, but did appear to alter the mix of inmates released. Important distinctions were made between offenders forecasted to be re-arrested for nonviolent crime and offenders forecasted to be re-arrested for violent crime. The balance of evidence indicates that the forecasts led to reductions in re-arrests for both nonviolent and violent crimes. Conclusions:Risk assessments based on machine learning forecasts can improve parole release decisions, especially when distinctions are made between re-arrests for violent and nonviolent crime.}, + file = {/home/nathante/Zotero/storage/5DUPLDAN/Berk_2017_An impact assessment of machine learning risk forecasts on parole board.pdf}, + journal = {Journal of Experimental Criminology}, + keywords = {Forecasting,Machine learning,Multinomial logistic regression,Parole,Recidivism,Regression discontinuity design}, + language = {en}, + number = {2} +} + +@inproceedings{bernstein_4chan_2011, + title = {4chan and /b/: {{An}} Analysis of Anonymity and Ephemerality in a Large Online Community}, + shorttitle = {4chan and /b/}, + booktitle = {Fifth {{International AAAI Conference}} on {{Weblogs}} and {{Social Media}}}, + author = {Bernstein, Michael Scott and {Monroy-Hern{\'a}ndez}, Andr{\'e}s and Harry, Drew and Andr{\'e}, Paul and Panovich, Katrina and Vargas, Greg}, + year = {2011}, + publisher = {{AAAI Publications}}, + address = {{Palo Alto, CA}}, + abstract = {4chan and /b/: An Analysis of Anonymity and Ephemerality in a Large Online Community}, + language = {en} +} + +@inproceedings{bernstein_4chan_2011-1, + title = {4chan and /b/: {{An Analysis}} of {{Anonymity}} and {{Ephemerality}} in a {{Large Online Community}}}, + shorttitle = {4chan and /b/}, + booktitle = {{{ICWSM}} 2011 (31/05/11)}, + author = {Bernstein, Michael and {Monroy-Hern{\'a}ndez}, Andr{\'e}s and Harry, Drew and Andr{\'e}, Paul and Panovich, Katrina and Vargas, Greg}, + year = {2011}, + month = jun, + abstract = {We present two studies of online ephemerality and anonymity based on the popular discussion board /b/ at 4chan.org: a website with over 7 million users that plays an influential role in Internet culture. Although researchers and practitioners often assume that user identity and data permanence are central tools in the design of online communities, we explore how /b/ succeeds despite being almost entirely anonymous and extremely ephemeral. We begin by describing /b/ and performing a content analysis that suggests the community is dominated by playful exchanges of images and links. Our first study uses a large dataset of more than five million posts to quantify ephemerality in /b/. We find that most threads spend just five seconds on the first page and less than five minutes on the site before expiring. Our second study is an analysis of identity signals on 4chan, finding that over 90\% of posts are made by fully anonymous users, with other identity signals adopted and discarded at will. We describe alternative mechanisms that /b/ participants use to establish status and frame their interactions}, + collaborator = {Bernstein, Michael and {Monroy-Hern{\'a}ndez}, Andr{\'e}s and Harry, Drew and Andr{\'e}, Paul and Panovich, Katrina and Vargas, Greg}, + file = {/home/nathante/Zotero/storage/W23GRE5M/Bernstein et al_2011_4chan and -b.pdf;/home/nathante/Zotero/storage/H3FX5ZCG/272345.html}, + language = {en} +} + +@techreport{bertanha_regression_2019, + title = {Regression {{Discontinuity Design}} with {{Many Thresholds}}}, + author = {Bertanha, Marinho}, + year = {2019}, + month = sep, + address = {{Rochester, NY}}, + institution = {{Social Science Research Network}}, + abstract = {Numerous empirical studies employ regression discontinuity designs with multiple cutoffs and heterogeneous treatments. A common practice is to normalize all the cutoffs to zero and estimate one effect. This procedure identifies the average treatment effect (ATE) on the observed distribution of individuals local to existing cutoffs. However, researchers often want to make inferences on more meaningful ATEs, computed over general counterfactual distributions of individuals, rather than simply the observed distribution of individuals local to existing cutoffs. This paper proposes a consistent and asymptotically normal estimator for such ATEs when heterogeneity follows a non-parametric function of cutoff characteristics in the sharp case. The proposed estimator converges at the minimax optimal rate of root-n for a specific choice of tuning parameters. Identification in the fuzzy case, with multiple cutoffs, is impossible unless heterogeneity follows a finite-dimensional function of cutoff characteristics. Under parametric heterogeneity, this paper proposes an ATE estimator for the fuzzy case that optimally combines observations to maximize its precision.}, + file = {/home/nathante/Zotero/storage/HVLPDV7I/papers.html}, + keywords = {Alternative Asymptotics,Average Treatment Effect,Multiple Cutoffs,Peer-effects,Regression Discontinuity Designs}, + language = {en}, + number = {ID 2712957}, + type = {{{SSRN Scholarly Paper}}} +} + +@article{bertrand_are_2004, + title = {Are {{Emily}} and {{Greg More Employable Than Lakisha}} and {{Jamal}}? {{A Field Experiment}} on {{Labor Market Discrimination}}}, + shorttitle = {Are {{Emily}} and {{Greg More Employable Than Lakisha}} and {{Jamal}}?}, + author = {Bertrand, Marianne and Mullainathan, Sendhil}, + year = {2004}, + month = sep, + volume = {94}, + pages = {991--1013}, + issn = {0002-8282}, + abstract = {We study race in the labor market by sending fictitious resumes to help-wanted ads in Boston and Chicago newspapers. To manipulate perceived race, resumes are randomly assigned African-American- or White-sounding names. White names receive 50 percent more callbacks for interviews. Callbacks are also more responsive to resume quality for White names than for African-American ones. The racial gap is uniform across occupation, industry, and employer size. We also find little evidence that employers are inferring social class from the names. Differential treatment by race still appears to still be prominent in the U. S. labor market.}, + file = {/home/nathante/Zotero/storage/HABWASWR/Bertrand_Mullainathan_2004_Are Emily and Greg More Employable Than Lakisha and Jamal.pdf;/home/nathante/Zotero/storage/HTKMZ7EC/articles.html}, + journal = {American Economic Review}, + keywords = {Economics of Minorities; Races; Indigenous Peoples; and Immigrants,Non-labor Discrimination; Labor Discrimination}, + language = {en}, + number = {4} +} + +@techreport{bertrand_field_2016, + title = {Field {{Experiments}} on {{Discrimination}}}, + author = {Bertrand, Marianne and Duflo, Esther}, + year = {2016}, + month = feb, + address = {{Cambridge, MA}}, + institution = {{National Bureau of Economic Research}}, + abstract = {This article reviews the existing field experimentation literature on the prevalence of discrimination, the consequences of such discrimination, and possible approaches to undermine it. We highlight key gaps in the literature and ripe opportunities for future field work. Section 1 reviews the various experimental methods that have been employed to measure the prevalence of discrimination, most notably audit and correspondence studies; it also describes several other measurement tools commonly used in lab-based work that deserve greater consideration in field research. Section 2 provides an overview of the literature on the costs of being stereotyped or discriminated against, with a focus on self-expectancy effects and self-fulfilling prophecies; section 2 also discusses the thin field-based literature on the consequences of limited diversity in organizations and groups. The final section of the paper, Section 3, reviews the evidence for policies and interventions aimed at weakening discrimination, covering role model and intergroup contact effects, as well as socio-cognitive and technological de-biasing strategies.}, + file = {/home/nathante/Zotero/storage/ECD5Z2Z5/Bertrand and Duflo - 2016 - Field Experiments on Discrimination.pdf}, + language = {en}, + number = {w22014} +} + +@article{bijker_how_2010, + title = {How Is Technology Made?\textemdash{{That}} Is the Question!}, + shorttitle = {How Is Technology Made?}, + author = {Bijker, Wiebe E.}, + year = {2010}, + month = jan, + volume = {34}, + pages = {63--76}, + issn = {0309-166X}, + abstract = {This article reviews constructivist technology studies, and especially the social construction of technology (SCOT). To investigate how these constructivist studies regard the ontology of technology, I will trace their historical development in units of analysis, methodological approaches and research questions. Constructivist technology studies are relativistic in only one sense: methodological. They are agnostic with respect to the ontology of technology. Constructivist studies of technology thus do not primarily answer the question `what is technology?'; they trace the process `how to make technology'.}, + file = {/home/nathante/Zotero/storage/NI9P8HKB/Bijker - 2010 - How is technology made—That is the question!.pdf;/home/nathante/Zotero/storage/DCAGQA6H/1702334.html}, + journal = {Cambridge Journal of Economics}, + language = {en}, + number = {1} +} + +@techreport{bird_exploring_2016, + title = {Exploring or {{Exploiting}}? {{Social}} and {{Ethical Implications}} of {{Autonomous Experimentation}} in {{AI}}}, + shorttitle = {Exploring or {{Exploiting}}?}, + author = {Bird, Sarah and Barocas, Solon and Crawford, Kate and Diaz, Fernando and Wallach, Hanna}, + year = {2016}, + month = oct, + address = {{Rochester, NY}}, + institution = {{Social Science Research Network}}, + abstract = {In the field of computer science, large-scale experimentation on users is not new. However, driven by advances in artificial intelligence, novel autonomous systems for experimentation are emerging that raise complex, unanswered questions for the field. Some of these questions are computational, while others relate to the social and ethical implications of these systems. We see these normative questions as urgent because they pertain to critical infrastructure upon which large populations depend, such as transportation and healthcare. Although experimentation on widely used online platforms like Facebook has stoked controversy in recent years, the unique risks posed by autonomous experimentation have not received sufficient attention, even though such techniques are being trialled on a massive scale. In this paper, we identify several questions about the social and ethical implications of autonomous experimentation systems. These questions concern the design of such systems, their effects on users, and their resistance to some common mitigations.}, + file = {/home/nathante/Zotero/storage/CVUYGCX7/Bird et al. - 2016 - Exploring or Exploiting Social and Ethical Implic.pdf;/home/nathante/Zotero/storage/NXMES5MG/papers.html}, + keywords = {Artificial Intelligence,Autonomous Experimentation,Ethics,Privacy}, + language = {en}, + number = {ID 2846909}, + type = {{{SSRN Scholarly Paper}}} +} + +@article{blackwell_classification_2017, + title = {Classification and {{Its Consequences}} for {{Online Harassment}}: {{Design Insights}} from {{HeartMob}}}, + shorttitle = {Classification and {{Its Consequences}} for {{Online Harassment}}}, + author = {Blackwell, Lindsay and Dimond, Jill and Schoenebeck, Sarita and Lampe, Cliff}, + year = {2017}, + month = dec, + volume = {1}, + pages = {24:1--24:19}, + issn = {2573-0142}, + abstract = {Online harassment is a pervasive and pernicious problem. Techniques like natural language processing and machine learning are promising approaches for identifying abusive language, but they fail to address structural power imbalances perpetuated by automated labeling and classification. Similarly, platform policies and reporting tools are designed for a seemingly homogenous user base and do not account for individual experiences and systems of social oppression. This paper describes the design and evaluation of HeartMob, a platform built by and for people who are disproportionately affected by the most severe forms of online harassment. We conducted interviews with 18 HeartMob users, both targets and supporters, about their harassment experiences and their use of the site. We examine systems of classification enacted by technical systems, platform policies, and users to demonstrate how 1) labeling serves to validate (or invalidate) harassment experiences; 2) labeling motivates bystanders to provide support; and 3) labeling content as harassment is critical for surfacing community norms around appropriate user behavior. We discuss these results through the lens of Bowker and Star's classification theories and describe implications for labeling and classifying online abuse. Finally, informed by intersectional feminist theory, we argue that fully addressing online harassment requires the ongoing integration of vulnerable users' needs into the design and moderation of online platforms.}, + file = {/home/nathante/Zotero/storage/JAS5LVNV/Blackwell et al_2017_Classification and Its Consequences for Online Harassment.pdf}, + journal = {Proc. ACM Hum.-Comput. Interact.}, + keywords = {bystanders,classification,intersectionality,labeling,moderation,online harassment,social norms,support}, + number = {CSCW} +} + +@article{boczkowski_processes_2004, + title = {The {{Processes}} of {{Adopting Multimedia}} and {{Interactivity}} in {{Three Online Newsrooms}}}, + author = {Boczkowski, Pablo J.}, + year = {2004}, + month = jun, + volume = {54}, + pages = {197--213}, + issn = {0021-9916}, + abstract = {This article examines the material culture of newsroom practices by focusing on the dynamics of the processes through which news workers adopt new technologies. More specifically, it looks at some key factors that shape the adoption of multimedia and interactive technologies in online newspapers. Through ethnographic case studies of innovations in 3 online newsrooms, I show that variations in organizational structures, work practices, and representations of the users are related to different ways in which members of the newsroom appropriate these technologies. I draw from this analysis to reflect on issues related to the technological dimension of editorial work and the dynamics of media convergence.}, + file = {/home/nathante/Zotero/storage/Q7R4C6AC/Boczkowski - 2004 - The Processes of Adopting Multimedia and Interacti.pdf;/home/nathante/Zotero/storage/DRN8NQN8/4102894.html}, + journal = {Journal of Communication}, + number = {2} +} + +@article{bogen_all_2019, + title = {All the {{Ways Hiring Algorithms Can Introduce Bias}}}, + author = {Bogen, Miranda}, + year = {2019}, + month = may, + issn = {0017-8012}, + abstract = {From job ads to salary offers.}, + file = {/home/nathante/Zotero/storage/PAE4HYM2/all-the-ways-hiring-algorithms-can-introduce-bias.html}, + journal = {Harvard Business Review}, + keywords = {Analytics,Hiring,Technology} +} + +@article{bordalo_salience_2012, + title = {Salience {{Theory}} of {{Choice Under Risk}}}, + author = {Bordalo, Pedro and Gennaioli, Nicola and Shleifer, Andrei}, + year = {2012}, + month = aug, + volume = {127}, + pages = {1243--1285}, + issn = {0033-5533}, + abstract = {Abstract. We present a theory of choice among lotteries in which the decision maker's attention is drawn to (precisely defined) salient payoffs. This leads th}, + file = {/home/nathante/Zotero/storage/X7NPN44P/Bordalo et al_2012_Salience Theory of Choice Under Risk.pdf;/home/nathante/Zotero/storage/6YQD8E66/1922202.html}, + journal = {The Quarterly Journal of Economics}, + language = {en}, + number = {3} +} + +@article{bosch-domenech_averting_2010, + title = {Averting Risk in the Face of Large Losses: {{Bernoulli}} vs. {{Tversky}} and {{Kahneman}}}, + shorttitle = {Averting Risk in the Face of Large Losses}, + author = {{Bosch-Dom{\`e}nech}, Antoni and Silvestre, Joaquim}, + year = {2010}, + month = may, + volume = {107}, + pages = {180--182}, + issn = {01651765}, + abstract = {Prospect Theory asserts that people display risk attraction in high-probability losses. But our subjects tend to avoid fair risks for large (\texteuro 30 to \texteuro 90), high-probability (80\%) real losses, vindicating Bernoulli's view that risk aversion is the dominant attitude.}, + file = {/home/nathante/Zotero/storage/FXMFEUYU/Bosch-Domènech and Silvestre - 2010 - Averting risk in the face of large losses Bernoul.pdf}, + journal = {Economics Letters}, + language = {en}, + number = {2} +} + +@book{bowker_sorting_2008, + title = {Sorting Things out: Classification and Its Consequences}, + author = {Bowker, Geoffrey C and Star, Susan Leigh}, + year = {2008}, + publisher = {{MIT Press}}, + address = {{Cambridge, Mass.}}, + annotation = {OCLC: 699516543}, + isbn = {978-0-262-02461-7 978-0-262-52295-3}, + language = {English} +} + +@article{boyd_critical_2012, + title = {Critical {{Questions For Big Data}}: {{Provocations}} for a Cultural, Technological, and Scholarly Phenomenon}, + shorttitle = {{{CRITICAL QUESTIONS FOR BIG DATA}}}, + author = {{boyd}, danah and Crawford, Kate}, + year = {2012}, + month = jun, + volume = {15}, + pages = {662--679}, + issn = {1369-118X, 1468-4462}, + file = {/home/nathante/Zotero/storage/GK7IGF5E/c3482eab3cd1fde0bd3ae88632996fe37c97e70dd7d0e9578784296ae0b87c05.pdf;/home/nathante/Zotero/storage/IWR3F2J2/boyd and Crawford - 2012 - Critical Questions for Big Data.pdf}, + journal = {Information, Communication \& Society}, + language = {en}, + number = {5} +} + +@inproceedings{boyd_profiles_2006, + title = {Profiles as {{Conversation}}: {{Networked Identity Performance}} on {{Friendster}}}, + shorttitle = {Profiles as {{Conversation}}}, + booktitle = {Proceedings of the 39th {{Annual Hawaii International Conference}} on {{System Sciences}} ({{HICSS}}'06)}, + author = {Boyd, D. and Heer, J.}, + year = {2006}, + month = jan, + volume = {3}, + pages = {59c-59c}, + issn = {1530-1605}, + abstract = {Profiles have become a common mechanism for presenting one's identity online. With the popularity of online social networking services such as Friendster.com, Profiles have been extended to include explicitly social information such as articulated "Friend" relationships and Testimonials. With such Profiles, users do not just depict themselves, but help shape the representation of others on the system. In this paper, we will discuss how the performance of social identity and relationships shifted the Profile from being a static representation of self to a communicative body in conversation with the other represented bodies. We draw on data gathered through ethnography and reaffirmed through data collection and visualization to analyze the communicative aspects of Profiles within the Friendster service. We focus on the role of Profiles in context creation and interpretation, negotiating unknown audiences, and initiating conversations. Additionally, we explore the shift from conversation to static representation, as active Profiles fossilize into recorded traces.}, + file = {/home/nathante/Zotero/storage/QYVMJELB/Boyd_Heer_2006_Profiles as Conversation.pdf;/home/nathante/Zotero/storage/28HRJAAP/1579411.html}, + keywords = {Art,Context,Data visualization,Digital communication,Muscles,Psychology,Reflection,Social network services,Sociology,Testing} +} + +@article{brayne_big_2017, + title = {Big {{Data Surveillance}}: {{The Case}} of {{Policing}}}, + shorttitle = {Big {{Data Surveillance}}}, + author = {Brayne, Sarah}, + year = {2017}, + month = oct, + volume = {82}, + pages = {977--1008}, + issn = {0003-1224}, + abstract = {This article examines the intersection of two structural developments: the growth of surveillance and the rise of ``big data.'' Drawing on observations and interviews conducted within the Los Angeles Police Department, I offer an empirical account of how the adoption of big data analytics does\textemdash and does not\textemdash transform police surveillance practices. I argue that the adoption of big data analytics facilitates amplifications of prior surveillance practices and fundamental transformations in surveillance activities. First, discretionary assessments of risk are supplemented and quantified using risk scores. Second, data are used for predictive, rather than reactive or explanatory, purposes. Third, the proliferation of automatic alert systems makes it possible to systematically surveil an unprecedentedly large number of people. Fourth, the threshold for inclusion in law enforcement databases is lower, now including individuals who have not had direct police contact. Fifth, previously separate data systems are merged, facilitating the spread of surveillance into a wide range of institutions. Based on these findings, I develop a theoretical model of big data surveillance that can be applied to institutional domains beyond the criminal justice system. Finally, I highlight the social consequences of big data surveillance for law and social inequality.}, + file = {/home/nathante/Zotero/storage/FRJ3CBV6/Brayne - 2017 - Big Data Surveillance The Case of Policing.pdf}, + journal = {American Sociological Review}, + language = {en}, + number = {5} +} + +@article{brodersen_inferring_2015, + title = {Inferring Causal Impact Using {{Bayesian}} Structural Time-Series Models}, + author = {Brodersen, Kay H. and Gallusser, Fabian and Koehler, Jim and Remy, Nicolas and Scott, Steven L.}, + year = {2015}, + month = mar, + volume = {9}, + pages = {247--274}, + issn = {1932-6157, 1941-7330}, + abstract = {An important problem in econometrics and marketing is to infer the causal impact that a designed market intervention has exerted on an outcome metric over time. This paper proposes to infer causal impact on the basis of a diffusion-regression state-space model that predicts the counterfactual market response in a synthetic control that would have occurred had no intervention taken place. In contrast to classical difference-in-differences schemes, state-space models make it possible to (i) infer the temporal evolution of attributable impact, (ii) incorporate empirical priors on the parameters in a fully Bayesian treatment, and (iii) flexibly accommodate multiple sources of variation, including local trends, seasonality and the time-varying influence of contemporaneous covariates. Using a Markov chain Monte Carlo algorithm for posterior inference, we illustrate the statistical properties of our approach on simulated data. We then demonstrate its practical utility by estimating the causal effect of an online advertising campaign on search-related site visits. We discuss the strengths and limitations of state-space models in enabling causal attribution in those settings where a randomised experiment is unavailable. The CausalImpact R package provides an implementation of our approach.}, + file = {/home/nathante/Zotero/storage/Q55S5H7A/Brodersen et al. - 2015 - Inferring causal impact using Bayesian structural .pdf;/home/nathante/Zotero/storage/HLZVJ5W2/1430226092.html}, + journal = {The Annals of Applied Statistics}, + keywords = {advertising,Causal inference,counterfactual,difference in differences,econometrics,market research,observational,synthetic control}, + language = {EN}, + mrnumber = {MR3341115}, + number = {1}, + zmnumber = {06446568} +} + +@book{broughton_wikipedia_2008, + title = {Wikipedia the Missing Manual}, + author = {Broughton, John}, + year = {2008}, + publisher = {{Pogue Press/O'Reilly}}, + address = {{Beijing; Sebastopol, CA}}, + annotation = {OCLC: 708321411}, + isbn = {978-0-596-51516-4}, + language = {English} +} + +@article{brubaker_beyond_2000, + title = {Beyond "{{Identity}}"}, + author = {Brubaker, Rogers and Cooper, Frederick}, + year = {2000}, + volume = {29}, + pages = {1--47}, + issn = {0304-2421}, + file = {/home/nathante/Zotero/storage/UJT35WGG/Brubaker and Cooper - 2000 - Beyond Identity.pdf}, + journal = {Theory and Society}, + keywords = {social identity theory}, + number = {1} +} + +@inproceedings{bryant_becoming_2005, + title = {Becoming {{Wikipedian}}: Transformation of Participation in a Collaborative Online Encyclopedia}, + shorttitle = {Becoming {{Wikipedian}}}, + booktitle = {Proceedings of the 2005 {{International ACM SIGGROUP Conference}} on {{Supporting Group Work}}}, + author = {Bryant, Susan L. and Forte, Andrea and Bruckman, Amy}, + year = {2005}, + pages = {1--10}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Traditional activities change in surprising ways when computer-mediated communication becomes a component of the activity system. In this descriptive study, we leverage two perspectives on social activity to understand the experiences of individuals who became active collaborators in Wikipedia, a prolific, cooperatively-authored online encyclopedia. Legitimate peripheral participation provides a lens for understanding participation in a community as an adaptable process that evolves over time. We use ideas from activity theory as a framework to describe our results. Finally, we describe how activity on the Wikipedia stands in striking contrast to traditional publishing and suggests a new paradigm for collaborative systems.}, + file = {/home/nathante/Zotero/storage/QMIZANQT/Bryant et al. - 2005 - Becoming Wikipedian transformation of participati.pdf}, + isbn = {1-59593-223-2}, + keywords = {activity theory,community,legitimate peripheral participation,qualitative,Wiki,wikipedia}, + series = {{{GROUP}} '05} +} + +@inproceedings{butler_dont_2008, + title = {Don't Look Now, but We've Created a Bureaucracy: {{The}} Nature and Roles of Policies and Rules in {{Wikipedia}}}, + shorttitle = {Don't {{Look Now}}, but {{We}}'{{Ve Created}} a {{Bureaucracy}}}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Butler, Brian and Joyce, Elisabeth and Pike, Jacqueline}, + year = {2008}, + pages = {1101--1110}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Wikis are sites that support the development of emergent, collective infrastructures that are highly flexible and open, suggesting that the systems that use them will be egalitarian, free, and unstructured. Yet it is apparent that the flexible infrastructure of wikis allows the development and deployment of a wide range of structures. However, we find that the policies in Wikipedia and the systems and mechanisms that operate around them are multi-faceted. In this descriptive study, we draw on prior work on rules and policies in organizations to propose and apply a conceptual framework for understanding the natures and roles of policies in wikis. We conclude that wikis are capable of supporting a broader range of structures and activities than other collaborative platforms. Wikis allow for and, in fact, facilitate the creation of policies that serve a wide variety of functions.}, + file = {/home/nathante/Zotero/storage/B6DMALCN/Butler et al. - 2008 - Don't look now, but we've created a bureaucracy T.pdf}, + isbn = {978-1-60558-011-1}, + keywords = {community,dynamics,policies,policy,rules,wikipedia,wikis}, + series = {{{CHI}} '08} +} + +@article{cai_human-centered_2019, + title = {Human-{{Centered Tools}} for {{Coping}} with {{Imperfect Algorithms}} during {{Medical Decision}}-{{Making}}}, + author = {Cai, Carrie J. and Reif, Emily and Hegde, Narayan and Hipp, Jason and Kim, Been and Smilkov, Daniel and Wattenberg, Martin and Viegas, Fernanda and Corrado, Greg S. and Stumpe, Martin C. and Terry, Michael}, + year = {2019}, + month = feb, + abstract = {Machine learning (ML) is increasingly being used in image retrieval systems for medical decision making. One application of ML is to retrieve visually similar medical images from past patients (e.g. tissue from biopsies) to reference when making a medical decision with a new patient. However, no algorithm can perfectly capture an expert's ideal notion of similarity for every case: an image that is algorithmically determined to be similar may not be medically relevant to a doctor's specific diagnostic needs. In this paper, we identified the needs of pathologists when searching for similar images retrieved using a deep learning algorithm, and developed tools that empower users to cope with the search algorithm on-the-fly, communicating what types of similarity are most important at different moments in time. In two evaluations with pathologists, we found that these refinement tools increased the diagnostic utility of images found and increased user trust in the algorithm. The tools were preferred over a traditional interface, without a loss in diagnostic accuracy. We also observed that users adopted new strategies when using refinement tools, re-purposing them to test and understand the underlying algorithm and to disambiguate ML errors from their own errors. Taken together, these findings inform future human-ML collaborative systems for expert decision-making.}, + archivePrefix = {arXiv}, + eprint = {1902.02960}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/XME3KEXB/Cai et al. - 2019 - Human-Centered Tools for Coping with Imperfect Alg.pdf;/home/nathante/Zotero/storage/CS2YXH3Q/1902.html}, + journal = {arXiv:1902.02960 [cs]}, + keywords = {Computer Science - Computers and Society,Computer Science - Human-Computer Interaction}, + primaryClass = {cs} +} + +@article{caliskan_semantics_2017, + title = {Semantics Derived Automatically from Language Corpora Contain Human-like Biases}, + author = {Caliskan, Aylin and Bryson, Joanna J. and Narayanan, Arvind}, + year = {2017}, + month = apr, + volume = {356}, + pages = {183--186}, + issn = {0036-8075, 1095-9203}, + abstract = {Machines learn what people know implicitly AlphaGo has demonstrated that a machine can learn how to do things that people spend many years of concentrated study learning, and it can rapidly learn how to do them better than any human can. Caliskan et al. now show that machines can learn word associations from written texts and that these associations mirror those learned by humans, as measured by the Implicit Association Test (IAT) (see the Perspective by Greenwald). Why does this matter? Because the IAT has predictive value in uncovering the association between concepts, such as pleasantness and flowers or unpleasantness and insects. It can also tease out attitudes and beliefs\textemdash for example, associations between female names and family or male names and career. Such biases may not be expressed explicitly, yet they can prove influential in behavior. Science, this issue p. 183; see also p. 133 Machine learning is a means to derive artificial intelligence by discovering patterns in existing data. Here, we show that applying machine learning to ordinary human language results in human-like semantic biases. We replicated a spectrum of known biases, as measured by the Implicit Association Test, using a widely used, purely statistical machine-learning model trained on a standard corpus of text from the World Wide Web. Our results indicate that text corpora contain recoverable and accurate imprints of our historic biases, whether morally neutral as toward insects or flowers, problematic as toward race or gender, or even simply veridical, reflecting the status quo distribution of gender with respect to careers or first names. Our methods hold promise for identifying and addressing sources of bias in culture, including technology. Computers can learn which words go together more or less often and can thus mimic human performance on a test of implicit bias. Computers can learn which words go together more or less often and can thus mimic human performance on a test of implicit bias.}, + copyright = {Copyright \textcopyright{} 2017, American Association for the Advancement of Science}, + file = {/home/nathante/Zotero/storage/9K65BZSX/Caliskan et al. - 2017 - Semantics derived automatically from language corp.pdf;/home/nathante/Zotero/storage/Q83HU9M9/183.html}, + journal = {Science}, + language = {en}, + number = {6334}, + pmid = {28408601} +} + +@article{callahan_cultural_2011, + title = {Cultural Bias in {{Wikipedia}} Content on Famous Persons}, + author = {Callahan, Ewa S. and Herring, Susan C.}, + year = {2011}, + volume = {62}, + pages = {1899--1915}, + issn = {1532-2890}, + abstract = {Wikipedia advocates a strict ``neutral point of view'' (NPOV) policy. However, although originally a U.S-based, English-language phenomenon, the online, user-created encyclopedia now has versions in many languages. This study examines the extent to which content and perspectives vary across cultures by comparing articles about famous persons in the Polish and English editions of Wikipedia. The results of quantitative and qualitative content analyses reveal systematic differences related to the different cultures, histories, and values of Poland and the United States; at the same time, a U.S./English-language advantage is evident throughout. In conclusion, the implications of these findings for the quality and objectivity of Wikipedia as a global repository of knowledge are discussed, and recommendations are advanced for Wikipedia end users and content developers.}, + copyright = {\textcopyright{} 2011 ASIS\&T}, + file = {/home/nathante/Zotero/storage/Y2N7GCKN/asi.html}, + journal = {Journal of the American Society for Information Science and Technology}, + language = {en}, + number = {10} +} + +@article{campolo_ai_2017, + title = {{{AI}} Now 2017 Report}, + author = {Campolo, Alex and Sanfilippo, Madelyn and Whittaker, Meredith and Crawford, Kate}, + year = {2017}, + journal = {AI Now Institute at New York University} +} + +@inproceedings{caraban_23_2019, + title = {23 {{Ways}} to {{Nudge}}: {{A Review}} of {{Technology}}-{{Mediated Nudging}} in {{Human}}-{{Computer Interaction}}}, + shorttitle = {23 {{Ways}} to {{Nudge}}}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Caraban, Ana and Karapanos, Evangelos and Gon{\c c}alves, Daniel and Campos, Pedro}, + year = {2019}, + month = may, + pages = {1--15}, + publisher = {{Association for Computing Machinery}}, + address = {{Glasgow, Scotland Uk}}, + abstract = {Ten years ago, Thaler and Sunstein introduced the notion of nudging to talk about how subtle changes in the 'choice architecture' can alter people's behaviors in predictable ways. This idea was eagerly adopted in HCI and applied in multiple contexts, including health, sustainability and privacy. Despite this, we still lack an understanding of how to design effective technology-mediated nudges. In this paper we present a systematic review of the use of nudging in HCI research with the goal of laying out the design space of technology-mediated nudging - the why (i.e., which cognitive biases do nudges combat) and the how (i.e., what exact mechanisms do nudges employ to incur behavior change). All in all, we found 23 distinct mechanisms of nudging, grouped in 6 categories, and leveraging 15 different cognitive biases. We present these as a framework for technology-mediated nudging, and discuss the factors shaping nudges' effectiveness and their ethical implications.}, + file = {/home/nathante/Zotero/storage/T7HSR4S2/Caraban et al_2019_23 Ways to Nudge.pdf}, + isbn = {978-1-4503-5970-2}, + keywords = {behavioral economics,nudging,persuasive technology}, + series = {{{CHI}} '19} +} + +@article{champion_forensic_2019, + title = {A {{Forensic Qualitative Analysis}} of {{Contributions}} to {{Wikipedia}} from {{Anonymity Seeking Users}}}, + author = {Champion, Kaylea and McDonald, Nora and Bankes, Stephanie and Zhang, Joseph and Greenstadt, Rachel and Forte, Andrea and Hill, Benjamin Mako}, + year = {2019}, + month = nov, + volume = {3}, + pages = {53:1--53:26}, + issn = {2573-0142}, + abstract = {By choice or by necessity, some contributors to commons-based peer production sites use privacy-protecting services to remain anonymous. As anonymity seekers, users of the Tor network have been cast both as ill-intentioned vandals and as vulnerable populations concerned with their privacy. In this study, we use a dataset drawn from a corpus of Tor edits to Wikipedia to uncover the character of Tor users' contributions. We build in-depth narrative descriptions of Tor users' actions and conduct a thematic analysis that places their editing activity into seven broad groups. We find that although their use of a privacy-protecting service marks them as unusual within Wikipedia, the character of many Tor users' contributions is in line with the expectations and norms of Wikipedia. However, our themes point to several important places where lack of trust promotes disorder, and to contributions where risks to contributors, service providers, and communities are unaligned.}, + file = {/home/nathante/Zotero/storage/G8KWEDXX/Champion et al_2019_A Forensic Qualitative Analysis of Contributions to Wikipedia from Anonymity.pdf}, + journal = {Proc. ACM Hum.-Comput. Interact.}, + keywords = {anonymity,forensic analysis,forensic qualitative analysis,online communities,peer production,privacy,thematic analysis,threat models,tor,user-generated content,wikipedia}, + number = {CSCW} +} + +@inproceedings{chancellor_relationships_2019, + title = {The {{Relationships Between Data}}, {{Power}}, and {{Justice}} in {{CSCW Research}}}, + booktitle = {Conference {{Companion Publication}} of the 2019 on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Chancellor, Stevie and Guha, Shion and Kaye, Jofish and King, Jen and Salehi, Niloufar and Schoenebeck, Sarita and Stowell, Elizabeth}, + year = {2019}, + pages = {102--105}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Many decisions about social, economic, and personal life are heavily data-driven. At the same time, data has become increasingly quantified, and available to people and institutions in positions of power, often with little introspection or reflection on its positive uses or harmful misuses. This panel will inspect CSCW's role in identifying constructive and appropriate uses of data and its responsibility for protecting against harms and inequalities perpetuated by misuse. The panel will present a series of debates about quantification of data, data surveillance, organizational data use, and policy making. An overarching theme throughout the set of debates is interrogating CSCW's role in extending critical scholarship on power and justice towards academic, policy, and industry impact.}, + file = {/home/nathante/Zotero/storage/DM2TIT3T/Chancellor et al_2019_The Relationships Between Data, Power, and Justice in CSCW Research.pdf}, + isbn = {978-1-4503-6692-2}, + keywords = {data,equity,gender,health,justice,power,privacy,surveillance}, + series = {{{CSCW}} '19} +} + +@inproceedings{chancellor_thyghgapp:_2016, + title = {\#thyghgapp: {{Instagram Content Moderation}} and {{Lexical Variation}} in {{Pro}}-{{Eating Disorder Communities}}}, + shorttitle = {\#thyghgapp}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer}}-{{Supported Cooperative Work}} \& {{Social Computing}} - {{CSCW}} '16}, + author = {Chancellor, Stevie and Pater, Jessica Annette and Clear, Trustin A and Gilbert, Eric and De Choudhury, Munmun}, + year = {2016}, + pages = {1199--1211}, + publisher = {{ACM Press}}, + address = {{San Francisco, California, USA}}, + file = {/home/nathante/Zotero/storage/GRIUSC4F/Chancellor et al. - 2016 - #thyghgapp Instagram Content Moderation and Lexic.pdf}, + isbn = {978-1-4503-3592-8}, + language = {en} +} + +@article{chandrasekharan_crossmod:_2019, + title = {Crossmod: {{A Cross}}-{{Community Learning}}-Based {{System}} to {{Assist Reddit Moderators}}}, + shorttitle = {Crossmod}, + author = {Chandrasekharan, Eshwar and Gandhi, Chaitrali and Mustelier, Matthew Wortley and Gilbert, Eric}, + year = {2019}, + month = nov, + volume = {3}, + pages = {174:1--174:30}, + issn = {2573-0142}, + abstract = {In this paper, we introduce a novel sociotechnical moderation system for Reddit called Crossmod. Through formative interviews with 11 active moderators from 10 different subreddits, we learned about the limitations of currently available automated tools, and how a new system could extend their capabilities. Developed out of these interviews, Crossmod makes its decisions based on cross-community learning---an approach that leverages a large corpus of previous moderator decisions via an ensemble of classifiers. Finally, we deployed Crossmod in a controlled environment, simulating real-time conversations from two large subreddits with over 10M subscribers each. To evaluate Crossmod's moderation recommendations, 4 moderators reviewed comments scored by Crossmod that had been drawn randomly from existing threads. Crossmod achieved an overall accuracy of 86\% when detecting comments that would be removed by moderators, with high recall (over 87.5\%). Additionally, moderators reported that they would have removed 95.3\% of the comments flagged by Crossmod; however, 98.3\% of these comments were still online at the time of this writing (i.e., not removed by the current moderation system). To the best of our knowledge, Crossmod is the first open source, AI-backed sociotechnical moderation system to be designed using participatory methods.}, + file = {/home/nathante/Zotero/storage/UI6AZIHJ/Chandrasekharan et al_2019_Crossmod.pdf}, + journal = {Proc. ACM Hum.-Comput. Interact. CSCW}, + keywords = {ai,community norms,machine learning,mixed initiative,moderation,online communities,online governance,open source.,participatory design,sociotechnical systems} +} + +@inproceedings{chang_trajectories_2019, + title = {Trajectories of {{Blocked Community Members}}: {{Redemption}}, {{Recidivism}} and {{Departure}}}, + shorttitle = {Trajectories of {{Blocked Community Members}}}, + booktitle = {The {{World Wide Web Conference}}}, + author = {Chang, Jonathan and {Danescu-Niculescu-Mizil}, Cristian}, + year = {2019}, + pages = {184--195}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Community norm violations can impair constructive communication and collaboration online. As a defense mechanism, community moderators often address such transgressions by temporarily blocking the perpetrator. Such actions, however, come with the cost of potentially alienating community members. Given this tradeoff, it is essential to understand to what extent, and in which situations, this common moderation practice is effective in reinforcing community rules. In this work, we introduce a computational framework for studying the future behavior of blocked users on Wikipedia. After their block expires, they can take several distinct paths: they can reform and adhere to the rules, but they can also recidivate, or straight-out abandon the community. We reveal that these trajectories are tied to factors rooted both in the characteristics of the blocked individual and in whether they perceived the block to be fair and justified. Based on these insights, we formulate a series of prediction tasks aiming to determine which of these paths a user is likely to take after being blocked for their first offense, and demonstrate the feasibility of these new tasks. Overall, this work builds towards a more nuanced approach to moderation by highlighting the tradeoffs that are in play.}, + file = {/home/nathante/Zotero/storage/RVAFNAH5/Chang_Danescu-Niculescu-Mizil_2019_Trajectories of Blocked Community Members.pdf}, + isbn = {978-1-4503-6674-8}, + series = {{{WWW}} '19} +} + +@article{cheng_additive_2019, + title = {An Additive {{Gaussian}} Process Regression Model for Interpretable Non-Parametric Analysis of Longitudinal Data}, + author = {Cheng, Lu and Ramchandran, Siddharth and Vatanen, Tommi and Lietz{\'e}n, Niina and Lahesmaa, Riitta and Vehtari, Aki and L{\"a}hdesm{\"a}ki, Harri}, + year = {2019}, + month = apr, + volume = {10}, + pages = {1798}, + issn = {2041-1723}, + abstract = {Longitudinal data are common in biomedical research, but their analysis is often challenging. Here, the authors present an additive Gaussian process regression model specifically designed for statistical analysis of longitudinal experimental data.}, + copyright = {2019 The Author(s)}, + journal = {Nature Communications}, + language = {En}, + number = {1} +} + +@article{chouldechova_fair_2017, + title = {Fair {{Prediction}} with {{Disparate Impact}}: {{A Study}} of {{Bias}} in {{Recidivism Prediction Instruments}}}, + shorttitle = {Fair {{Prediction}} with {{Disparate Impact}}}, + author = {Chouldechova, Alexandra}, + year = {2017}, + month = jun, + volume = {5}, + pages = {153--163}, + issn = {2167-6461}, + abstract = {Recidivism prediction instruments (RPIs) provide decision-makers with an assessment of the likelihood that a criminal defendant will reoffend at a future point in time. Although such instruments are gaining increasing popularity across the country, their use is attracting tremendous controversy. Much of the controversy concerns potential discriminatory bias in the risk assessments that are produced. This article discusses several fairness criteria that have recently been applied to assess the fairness of RPIs. We demonstrate that the criteria cannot all be simultaneously satisfied when recidivism prevalence differs across groups. We then show how disparate impact can arise when an RPI fails to satisfy the criterion of error rate balance.}, + file = {/home/nathante/Zotero/storage/RRHGATGP/Chouldechova - 2017 - Fair Prediction with Disparate Impact A Study of .pdf;/home/nathante/Zotero/storage/Z4BTCQWG/big.2016.html}, + journal = {Big Data}, + number = {2} +} + +@article{coleman_social_1988, + title = {Social {{Capital}} in the {{Creation}} of {{Human Capital}}}, + author = {Coleman, James S.}, + year = {1988}, + volume = {94}, + pages = {S95-S120}, + issn = {0002-9602}, + abstract = {In this paper, the concept of social capital is introduced and illustrated, its forms are described, the social structural conditions under which it arises are examined, and it is used in an analysis of dropouts from high school. Use of the concept of social capital is part of a general theoretical strategy discussed in the paper: taking rational action as a starting point but rejecting the extreme individualistic premises that often accompany it. The conception of social capital as a resource for action is one way of introducing social structure into the rational action paradigm. Three forms of social capital are examined: obligations and expectations, information channels, and social norms. The role of closure in the social structure in facilitating the first and third of these forms of social capital is described. An analysis of the effect of the lack of social capital available to high school sophomores on dropping out of school before graduation is carried out. The effect of social capital within the family and in the community outside the family is examined.}, + file = {/home/nathante/Zotero/storage/6IT8T9R4/Coleman_1988_Social Capital in the Creation of Human Capital.pdf;/home/nathante/Zotero/storage/NMBEIQNM/Coleman - 1988 - Social Capital in the Creation of Human Capital.html}, + journal = {American Journal of Sociology} +} + +@inproceedings{collier_conflict_2012, + title = {Conflict, {{Criticism}}, or {{Confidence}}: {{An Empirical Examination}} of the {{Gender Gap}} in {{Wikipedia Contributions}}}, + shorttitle = {Conflict, {{Criticism}}, or {{Confidence}}}, + booktitle = {Proceedings of the {{ACM}} 2012 {{Conference}} on {{Computer Supported Cooperative Work}}}, + author = {Collier, Benjamin and Bear, Julia}, + year = {2012}, + pages = {383--392}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {A recent survey of contributors to Wikipedia found that less than 15\% of contributors are women. This gender contribution gap has received significant attention from both researchers and the media. A panel of researchers and practitioners has offered several insights and opinions as to why a gender gap exists in contributions despite gender anonymity online. The gender research literature suggests that the difference in contribution rates could be due to three factors: (1) the high levels of conflict in discussions, (2) dislike of critical environments, and (3) lack of confidence in editing other contributors' work. This paper examines these hypotheses regarding the existence of the gender gap in contribution by using data from an international survey of 176,192 readers, contributors, and former contributors to Wikipedia, including measures of demographics, education, motivation, and participation. Implications for improving the design and culture of online communities to be more gender inclusive are discussed.}, + file = {/home/nathante/Zotero/storage/QCMRMHU7/Collier and Bear - 2012 - Conflict, Criticism, or Confidence An Empirical E.pdf}, + isbn = {978-1-4503-1086-4}, + keywords = {confidence,conflict,criticism,gender,survey,wikipedia}, + series = {{{CSCW}} '12} +} + +@article{cowgill_impact_nodate, + title = {The {{Impact}} of {{Algorithms}} on {{Judicial Discretion}}: {{Evidence}} from {{Regression Discontinuities}}}, + author = {Cowgill, Bo}, + pages = {24}, + abstract = {How do judges use algorithmic suggestions in criminal proceedings? I study bail-setting in criminal cases in Broward County Florida, where judges are provided predictions of defendants' recidivism using an algorithm derived from historical data. The algorithm's output is continuous, but is shared with judges in rounded buckets (low, medium and high). Using the underlying continuous score, I examine judicial decisions close to the thresholds using a regression discontinuity design. Defendants slightly above the thresholds are detained an average extra one to four weeks before trial, depending on the threshold. Black defendants' outcomes are more sensitive to the thresholds' than white defendants. When I link jail decisions to outcomes, I find that the extra jail-time given to defendants above the thresholds corresponds to a small increase in recidivism within two years. These results suggest that algorithmic suggestions have a causal impact on criminal proceedings and recidivism.}, + file = {/home/nathante/Zotero/storage/TR3XZX7Z/Cowgill_The Impact of Algorithms on Judicial Discretion.pdf}, + language = {en} +} + +@article{crawford_what_2016, + title = {What Is a Flag for? {{Social}} Media Reporting Tools and the Vocabulary of Complaint}, + shorttitle = {What Is a Flag For?}, + author = {Crawford, Kate and Gillespie, Tarleton}, + year = {2016}, + month = mar, + volume = {18}, + pages = {410--428}, + issn = {1461-4448, 1461-7315}, + abstract = {The flag is now a common mechanism for reporting offensive content to an online platform, and is used widely across most popular social media sites. It serves both as a solution to the problem of curating massive collections of user-generated content and as a rhetorical justification for platform owners when they decide to remove content. Flags are becoming a ubiquitous mechanism of governance\textemdash yet their meaning is anything but straightforward. In practice, the interactions between users, flags, algorithms, content moderators, and platforms are complex and highly strategic. Significantly, flags are asked to bear a great deal of weight, arbitrating both the relationship between users and platforms, and the negotiation around contentious public issues. In this essay, we unpack the working of the flag, consider alternatives that give greater emphasis to public deliberation, and consider the implications for online public discourse of this now commonplace yet rarely studied sociotechnical mechanism.}, + file = {/home/nathante/Zotero/storage/UUKRMQDL/Crawford_Gillespie_2016_What is a flag for.pdf;/home/nathante/Zotero/storage/NI3S35Z7/410.html}, + journal = {New Media \& Society}, + keywords = {community,Facebook,flagging,norms,platforms,twitter,YouTube}, + language = {en}, + number = {3} +} + +@article{dawes_clinical_1989, + title = {Clinical versus Actuarial Judgment}, + author = {Dawes, R. M. and Faust, D. and Meehl, P. E.}, + year = {1989}, + month = mar, + volume = {243}, + pages = {1668--1674}, + issn = {0036-8075, 1095-9203}, + abstract = {Professionals are frequently consulted to diagnose and predict human behavior; optimal treatment and planning often hinge on the consultant's judgmental accuracy. The consultant may rely on one of two contrasting approaches to decision-making--the clinical and actuarial methods. Research comparing these two approaches shows the actuarial method to be superior. Factors underlying the greater accuracy of actuarial methods, sources of resistance to the scientific findings, and the benefits of increased reliance on actuarial approaches are discussed.}, + copyright = {\textcopyright{} 1989}, + file = {/home/nathante/Zotero/storage/E8HV359M/Dawes et al. - 1989 - Clinical versus actuarial judgment.pdf;/home/nathante/Zotero/storage/CUBGWHPI/1668.html}, + journal = {Science}, + language = {en}, + number = {4899}, + pmid = {2648573} +} + +@article{de_laat_profiling_2016, + title = {Profiling Vandalism in {{Wikipedia}}: {{A Schauerian}} Approach to Justification}, + shorttitle = {Profiling Vandalism in {{Wikipedia}}}, + author = {{de Laat}, Paul B.}, + year = {2016}, + month = jun, + volume = {18}, + pages = {131--148}, + issn = {1572-8439}, + abstract = {In order to fight massive vandalism the English-language Wikipedia has developed a system of surveillance which is carried out by humans and bots, supported by various tools. Central to the selection of edits for inspection is the process of using filters or profiles. Can this profiling be justified? On the basis of a careful reading of Frederick Schauer's books about rules in general (1991) and profiling in particular (2003) I arrive at several conclusions. The effectiveness, efficiency, and risk-aversion of edit selection all greatly increase as a result. The argument for increasing predictability suggests making all details of profiling manifestly public. Also, a wider distribution of the more sophisticated anti-vandalism tools seems indicated. As to the specific dimensions used in profiling, several critical remarks are developed. When patrollers use `assisted editing' tools, severe `overuse' of several features (anonymity, warned before) is a definite possibility, undermining profile efficacy. The easy remedy suggested is to render all of them invisible on the interfaces as displayed to patrollers. Finally, concerning not only assisted editing tools but tools against vandalism generally, it is argued that the anonymity feature is a sensitive category: anons have been in dispute for a long time (while being more prone to vandalism). Targeting them as a special category violates the social contract upon which Wikipedia is based. The feature is therefore a candidate for mandatory `underuse': it should be banned from all anti-vandalism filters and profiling algorithms, and no longer be visible as a special edit trait.}, + file = {/home/nathante/Zotero/storage/RVTRCFUQ/de Laat - 2016 - Profiling vandalism in Wikipedia A Schauerian app.pdf}, + journal = {Ethics and Information Technology}, + keywords = {Algorithms,Bots,Discrimination,Profiling,Rules,Vandalism,Wikipedia}, + language = {en}, + number = {2} +} + +@article{de_laat_use_2015, + title = {The Use of Software Tools and Autonomous Bots against Vandalism: Eroding {{Wikipedia}}'s Moral Order?}, + shorttitle = {The Use of Software Tools and Autonomous Bots against Vandalism}, + author = {{de Laat}, Paul B.}, + year = {2015}, + month = sep, + volume = {17}, + pages = {175--188}, + issn = {1572-8439}, + abstract = {English-language Wikipedia is constantly being plagued by vandalistic contributions on a massive scale. In order to fight them its volunteer contributors deploy an array of software tools and autonomous bots. After an analysis of their functioning and the `coactivity' in use between humans and bots, this research `discloses' the moral issues that emerge from the combined patrolling by humans and bots. Administrators provide the stronger tools only to trusted users, thereby creating a new hierarchical layer. Further, surveillance exhibits several troubling features: questionable profiling practices (concerning anonymous users in particular), the use of the controversial measure of reputation (under consideration), `oversurveillance' where quantity trumps quality, and a prospective loss of the required moral skills whenever bots take over from humans. The most troubling aspect, though, is that Wikipedia has become a Janus-faced institution. One face is the basic platform of MediaWiki software, transparent to all. Its other face is the anti-vandalism system, which, in contrast, is opaque to the average user, in particular as a result of the algorithms and neural networks in use. Finally it is argued that this secrecy impedes a much needed discussion to unfold; a discussion that should focus on a `rebalancing' of the anti-vandalism system and the development of more ethical information practices towards contributors.}, + file = {/home/nathante/Zotero/storage/T5SQKMRF/de Laat - 2015 - The use of software tools and autonomous bots agai.pdf}, + journal = {Ethics and Information Technology}, + keywords = {Bots,Disclosive ethics,Profiling,Surveillance,Vandalism,Wikipedia}, + language = {en}, + number = {3} +} + +@techreport{desai_trust_2017, + title = {Trust {{But Verify}}: {{A Guide}} to {{Algorithms}} and the {{Law}}}, + shorttitle = {Trust {{But Verify}}}, + author = {Desai, Deven R. and Kroll, Joshua A.}, + year = {2017}, + month = apr, + address = {{Rochester, NY}}, + institution = {{Social Science Research Network}}, + abstract = {The call for algorithmic transparency as a way to manage the power of new data-driven decision-making techniques misunderstands the nature of the processes at issue and underlying technology. Part of the problem is that the term, algorithm, is broad. It encompasses disparate concepts even in mathematics and computer science. Matters worsen in law and policy. Law is driven by a linear, almost Newtonian, view of cause and effect where inputs and defined process lead to clear outputs. In that world, a call for transparency has the potential to work. The reality is quite different. Real computer systems use vast data sets not amenable to disclosure. The rules used to make decisions are often inferred from these data and cannot be readily explained or understood. And at a deep and mathematically provable level, certain things, including the exact behavior of an algorithm, can sometimes not be tested or analyzed. From a technical perspective, current attempts to expose algorithms to the sun will fail to deliver critics' desired results and may create the illusion of clarity in cases where clarity is not possible.}, + file = {/home/nathante/Zotero/storage/FV443JPY/Desai and Kroll - 2017 - Trust But Verify A Guide to Algorithms and the La.pdf;/home/nathante/Zotero/storage/WHA8KW4J/papers.html}, + keywords = {accountability,algorithms,bias,big data,code,computational methods,computer science,cyber,discrimination,governance,internet,law,machine learning,technology,transparency}, + language = {en}, + number = {ID 2959472}, + type = {{{SSRN Scholarly Paper}}} +} + +@techreport{dillon_determinants_2013, + title = {The {{Determinants}} of {{Mismatch Between Students}} and {{Colleges}}}, + author = {Dillon, Eleanor Wiske and Smith, Jeffrey Andrew}, + year = {2013}, + month = aug, + institution = {{National Bureau of Economic Research}}, + abstract = {We use the National Longitudinal Survey of Youth 1997 cohort to examine mismatch between student ability and college quality. Mismatch has implications for the design of state higher education systems and for student aid policy. The data indicate substantial amounts of both undermatch (high ability students at low quality colleges) and overmatch (low ability students at high quality colleges). Student application and enrollment decisions, rather than college admission decisions, drive most mismatch. Financial constraints, information, and the public college options facing each student all affect the probability of mismatch. More informed students attend higher quality colleges, even when doing so involves overmatching.}, + file = {/home/nathante/Zotero/storage/EADW9U7G/Dillon and Smith - 2013 - The Determinants of Mismatch Between Students and .pdf}, + number = {19286}, + type = {Working {{Paper}}} +} + +@article{dimaggio_iron_1983, + title = {The Iron Cage Revisited: {{Institutional}} Isomorphism and Collective Rationality in Organizational Fields}, + shorttitle = {The {{Iron Cage Revisited}}}, + author = {DiMaggio, Paul J. and Powell, Walter W.}, + year = {1983}, + volume = {48}, + pages = {147--160}, + issn = {00031224}, + file = {/home/nathante/Zotero/storage/6VEW77SP/DiMaggio and Powell - 1983 - The iron cage revisited Institutional isomorphism.pdf;/home/nathante/Zotero/storage/T6PIQCHC/2095101.html}, + journal = {American Sociological Review}, + keywords = {Organization Behavior,Sociology}, + number = {2} +} + +@article{dobbie_effects_2018, + title = {The {{Effects}} of {{Pretrial Detention}} on {{Conviction}}, {{Future Crime}}, and {{Employment}}: {{Evidence}} from {{Randomly Assigned Judges}}}, + shorttitle = {The {{Effects}} of {{Pretrial Detention}} on {{Conviction}}, {{Future Crime}}, and {{Employment}}}, + author = {Dobbie, Will and Goldin, Jacob and Yang, Crystal S.}, + year = {2018}, + month = feb, + volume = {108}, + pages = {201--240}, + issn = {0002-8282}, + abstract = {Over 20 percent of prison and jail inmates in the United States are currently awaiting trial, but little is known about the impact of pretrial detention on defendants. This paper uses the detention tendencies of quasi-randomly assigned bail judges to estimate the causal effects of pretrial detention on subsequent defendant outcomes. Using data from administrative court and tax records, we find that pretrial detention significantly increases the probability of conviction, primarily through an increase in guilty pleas. Pretrial detention has no net effect on future crime, but decreases formal sector employment and the receipt of employment- and tax-related government benefits. These results are consistent with (i) pretrial detention weakening defendants' bargaining positions during plea negotiations and (ii) a criminal conviction lowering defendants' prospects in the formal labor market.}, + file = {/home/nathante/Zotero/storage/Y5B3T54R/Dobbie et al. - 2018 - The Effects of Pretrial Detention on Conviction, F.pdf;/home/nathante/Zotero/storage/NLZZXIPP/articles.html}, + journal = {American Economic Review}, + keywords = {Labor Demand; Wage Level and Structure,Plant Closings; Litigation Process; Illegal Behavior and the Enforcement of Law,Severance Pay,Wage Differentials; Unemployment Insurance}, + language = {en}, + number = {2} +} + +@article{donath_signals_2007, + title = {Signals in {{Social Supernets}}}, + author = {Donath, Judith}, + year = {2007}, + month = oct, + volume = {13}, + pages = {231--251}, + publisher = {{Oxford Academic}}, + abstract = {Abstract. Social network sites (SNSs) provide a new way to organize and navigate an egocentric social network. Are they a fad, briefly popular but ultimately u}, + file = {/home/nathante/Zotero/storage/BRJELJJZ/Donath_2007_Signals in Social Supernets.pdf;/home/nathante/Zotero/storage/VKT94DEV/4583064.html}, + journal = {Journal of Computer-Mediated Communication}, + language = {en}, + number = {1} +} + +@book{donath_social_2014, + title = {The Social Machine: Designs for Living Online}, + shorttitle = {The Social Machine}, + author = {Donath, Judith}, + year = {2014}, + abstract = {Computers were first conceived as "thinking machines," but in the twenty-first century they have become social machines, online places where people meet friends, play games, and collaborate on projects. In this book, Judith Donath argues persuasively that for social media to become truly sociable media, we must design interfaces that reflect how we understand and respond to the social world. People and their actions are still harder to perceive online than face to face: interfaces are clunky, and we have less sense of other people's character and intentions, where they congregate, and what they do.Donath presents new approaches to creating interfaces for social interaction. She addresses such topics as visualizing social landscapes, conversations, and networks; depicting identity with knowledge markers and interaction history; delineating public and private space; and bringing the online world's open sociability into the physical world. Donath asks fundamental questions about how we want to live online and offers thought-provoking designs that explore radically new ways of interacting and communicating.}, + annotation = {OCLC: 1139880278}, + file = {/home/nathante/Zotero/storage/YZRGA7ZD/Donath_2014_The social machine.pdf}, + isbn = {978-0-262-32348-2}, + language = {English} +} + +@book{douglas_purity_2015, + title = {Purity and {{Danger}}: An {{Analysis}} of {{Concepts}} of {{Pollution}} and {{Taboo}}}, + shorttitle = {Purity and {{Danger}}}, + author = {Douglas, Professor Mary}, + year = {2015}, + annotation = {OCLC: 1100437432}, + file = {/home/nathante/Zotero/storage/YFMDBPNG/Douglas_2015_Purity and Danger.pdf}, + isbn = {978-1-134-43823-5}, + language = {English} +} + +@article{dubrovsky_equalization_1991, + title = {The {{Equalization Phenomenon}}: {{Status Effects}} in {{Computer}}-{{Mediated}} and {{Face}}-to-{{Face Decision}}-{{Making Groups}}}, + shorttitle = {The {{Equalization Phenomenon}}}, + author = {Dubrovsky, Vitaly J. and Kiesler, Sara and Sethna, Beheruz N.}, + year = {1991}, + month = jun, + volume = {6}, + pages = {119--146}, + publisher = {{Taylor \& Francis}}, + issn = {0737-0024}, + abstract = {New computer-based communications technologies make possible new or expanded forms of group work. Although earlier researchers suggest that scant social information in these technologies might cause status equalization in groups, no experimental test of this phenomenon has been made. In a laboratory experiment, we compared face-to-face communication with electronic mail in decision-making groups whose members differed in social status. We examined status in two ways: by varying the external status of group members, and by varying the decision task to manipulate expertise. When the groups made decisions in face-to-face meetings, the high-status member dominated discussions with the three low-status members. Also, the high-status member more often was a "first advocate" in the face-to-face discussions, and first advocates were more influential than later advocates. These status inequalities in face-to-face decision making were pronounced just when the high-status member's expertise was relevant to the decision task. When the same groups made comparable decisions using electronic mail, status and expertise inequalities in participation were reduced. A striking and unexpected result was that "first" advocacy was shared by high- and low-status members in discussions using electronic mail. This behavior resulted in increased equality of influence across status and expertise. We discuss the implications of these results for research and for design of new communication technologies.}, + annotation = {\_eprint: https://www.tandfonline.com/doi/pdf/10.1207/s15327051hci0602\_2}, + file = {/home/nathante/Zotero/storage/XVR2A3LB/s15327051hci0602_2.html}, + journal = {Human\textendash Computer Interaction}, + number = {2} +} + +@article{duflo_women_2012, + title = {Women {{Empowerment}} and {{Economic Development}}}, + author = {Duflo, Esther}, + year = {2012}, + month = dec, + volume = {50}, + pages = {1051--1079}, + issn = {0022-0515}, + file = {/home/nathante/Zotero/storage/QBJXCWDW/Duflo - 2012 - Women Empowerment and Economic Development.pdf}, + journal = {Journal of Economic Literature}, + language = {en}, + number = {4} +} + +@article{eggers_validity_2015, + title = {On the Validity of the Regression Discontinuity Design for Estimating Electoral Effects: {{New}} Evidence from over 40,000 Close Races}, + shorttitle = {On the {{Validity}} of the {{Regression Discontinuity Design}} for {{Estimating Electoral Effects}}}, + author = {Eggers, Andrew C. and Fowler, Anthony and Hainmueller, Jens and Hall, Andrew B. and Snyder, James M.}, + year = {2015}, + month = jan, + volume = {59}, + pages = {259--274}, + issn = {1540-5907}, + abstract = {The regression discontinuity (RD) design is a valuable tool for identifying electoral effects, but this design is only effective when relevant actors do not have precise control over election results. Several recent papers contend that such precise control is possible in large elections, pointing out that the incumbent party is more likely to win very close elections in the United States House of Representatives in recent periods. In this article, we examine whether similar patterns occur in other electoral settings, including the U.S. House in other time periods, statewide, state legislative, and mayoral races in the U.S. and national or local elections in nine other countries. No other case exhibits this pattern. We also cast doubt on suggested explanations for incumbent success in close House races. We conclude that the assumptions behind the RD design are likely to be met in a wide variety of electoral settings and offer a set of best practices for RD researchers going forward.}, + copyright = {\textcopyright{} 2014, Midwest Political Science Association}, + file = {/home/nathante/Zotero/storage/XF3MP6E4/Eggers et al. - 2015 - On the validity of the regression discontinuity de.pdf}, + journal = {American Journal of Political Science}, + language = {en}, + number = {1} +} + +@article{ellison_connection_2011, + title = {Connection Strategies: {{Social}} Capital Implications of {{Facebook}}-Enabled Communication Practices}, + shorttitle = {Connection Strategies}, + author = {Ellison, Nicole B. and Steinfield, Charles and Lampe, Cliff}, + year = {2011}, + month = sep, + volume = {13}, + pages = {873--892}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study assesses whether Facebook users have different `connection strategies,' a term which describes a suite of Facebook-related relational communication activities, and explores the relationship between these connection strategies and social capital. Survey data (N = 450) from a random sample of undergraduate students reveal that only social information-seeking behaviors contribute to perceptions of social capital; connection strategies that focus on strangers or close friends do not. We also find that reporting more `actual' friends on the site is predictive of social capital, but only to a point. We believe the explanation for these findings may be that the identity information in Facebook serves as a social lubricant, encouraging individuals to convert latent to weak ties and enabling them to broadcast requests for support or information.}, + file = {/home/nathante/Zotero/storage/F3UC99WE/Ellison et al_2011_Connection strategies.pdf}, + journal = {New Media \& Society}, + language = {en}, + number = {6} +} + +@article{ellison_managing_2006, + title = {Managing {{Impressions Online}}: {{Self}}-{{Presentation Processes}} in the {{Online Dating Environment}}}, + shorttitle = {Managing {{Impressions Online}}}, + author = {Ellison, Nicole and Heino, Rebecca and Gibbs, Jennifer}, + year = {2006}, + month = jan, + volume = {11}, + pages = {415--441}, + publisher = {{Oxford Academic}}, + abstract = {Abstract. This study investigates self-presentation strategies among online dating participants, exploring how participants manage their online presentation of}, + file = {/home/nathante/Zotero/storage/F95Z645B/Ellison et al_2006_Managing Impressions Online.pdf;/home/nathante/Zotero/storage/ZVUP9E4V/4617726.html}, + journal = {Journal of Computer-Mediated Communication}, + language = {en}, + number = {2} +} + +@article{fe_short-_2016, + title = {Short- and Long-Run Estimates of the Local Effects of Retirement on Health}, + author = {F{\'e}, Eduardo and Hollingsworth, Bruce}, + year = {2016}, + volume = {179}, + pages = {1051--1067}, + issn = {1467-985X}, + abstract = {We explore the existence of short- and long-term effects of retirement on health. Short-term effects are estimated with a regression discontinuity design which is robust to weak instruments and where the underlying assumptions of continuity of potential outcomes are uncontroversial. To identify the long-term effects we propose a parametric model which, under strong assumptions, can separate normal deterioration of health from the causal effects of retirement. We apply our framework to the British Household Panel Survey and find that retirement has little effect on health. However, our estimates suggest that retirement opens the gate to a sedentary life with an impoverished social component and this is a channel through which retirement could indirectly affect health in the long run.}, + copyright = {\textcopyright{} 2015 Royal Statistical Society}, + file = {/home/nathante/Zotero/storage/7PK7IEJB/Fé and Hollingsworth - 2016 - Short- and long-run estimates of the local effects.pdf;/home/nathante/Zotero/storage/GLBP5DRN/rssa.html}, + journal = {Journal of the Royal Statistical Society: Series A (Statistics in Society)}, + keywords = {Health,Instrumental variables,Regression discontinuity,Retirement,Wild bootstrap}, + language = {en}, + number = {4} +} + +@article{feldman_falling_2019, + title = {Falling {{Not Far}} from the {{Tree}}: {{Entrepreneurs}} and {{Organizational Heritage}}}, + shorttitle = {Falling {{Not Far}} from the {{Tree}}}, + author = {Feldman, Maryann P. and Ozcan, Serden and Reichstein, Toke}, + year = {2019}, + month = mar, + issn = {1047-7039}, + abstract = {Past research has shown that founders bring important capabilities and resources from their prior employment into their new firms and that these intergenerational transfers influence the performance of these ventures. However, we know little about whether organizational practices also transfer from parents to spawns, and if so, what types of practices are transferred? Using a combination of survey and registrar data and through a detailed identification strategy, we examine these two previously unaddressed questions. Our results provide strong evidence for organizational heritage in practices. About 70\% of the comparisons of start-ups and other established organizations are less similar than the average similarity between a parent organization and its spawn and that the overlap in organizational practices is almost 10\% greater between a spawn and its parents than between the spawn and other established firms. Our further investigation shows that not all practices seem to find their way into the new entrepreneurial firms. In particular, practices that are valuable for and fit with the requirements of a start-up organization, and at the same time are more clearly defined and casually less ambiguous, are more likely to be transferred by the founders from their previous employers. These results contribute to our understanding of how entrepreneurs assemble their organizations and practice innovation as well as the diffusion of practices and the origins of firm heterogeneity.}, + file = {/home/nathante/Zotero/storage/8V3R8LXR/Feldman et al. - 2019 - Falling Not Far from the Tree Entrepreneurs and O.pdf;/home/nathante/Zotero/storage/YX2YZ7L5/orsc.2018.html}, + journal = {Organization Science} +} + +@article{fiesler_participant_2018, + title = {``{{Participant}}'' {{Perceptions}} of {{Twitter Research Ethics}}}, + author = {Fiesler, Casey and Proferes, Nicholas}, + year = {2018}, + month = jan, + volume = {4}, + pages = {2056305118763366}, + issn = {2056-3051}, + abstract = {Social computing systems such as Twitter present new research sites that have provided billions of data points to researchers. However, the availability of public social media data has also presented ethical challenges. As the research community works to create ethical norms, we should be considering users' concerns as well. With this in mind, we report on an exploratory survey of Twitter users' perceptions of the use of tweets in research. Within our survey sample, few users were previously aware that their public tweets could be used by researchers, and the majority felt that researchers should not be able to use tweets without consent. However, we find that these attitudes are highly contextual, depending on factors such as how the research is conducted or disseminated, who is conducting it, and what the study is about. The findings of this study point to potential best practices for researchers conducting observation and analysis of public data.}, + file = {/home/nathante/Zotero/storage/ECMBKZLL/Fiesler and Proferes - 2018 - “Participant” Perceptions of Twitter Research Ethi.pdf}, + journal = {Social Media + Society}, + language = {en}, + number = {1} +} + +@inproceedings{fiesler_reddit_2018, + title = {Reddit Rules! {{Characterizing}} an Ecosystem of Governance.}, + booktitle = {Proceedings of the {{AAAI International Conference}} on {{Web}} and {{Social Media}}}, + author = {Fiesler, Casey and Jiang, Jialun" Aaron" and McCann, Joshua and Frye, Kyle and Brubaker, Jed R.}, + year = {2018}, + pages = {72--81}, + publisher = {{AAAI}}, + file = {/home/nathante/Zotero/storage/MZYNTAGQ/Fiesler - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/PKCDGI2A/Fiesler et al. - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/SJ5B9R2M/Fiesler et al. - 2018 - Reddit rules! Characterizing an ecosystem of gover.pdf} +} + +@article{ford_anyone_2017, + title = {`{{Anyone}} Can Edit', Not Everyone Does: {{Wikipedia}}'s Infrastructure and the Gender Gap}, + shorttitle = {`{{Anyone}} Can Edit', Not Everyone Does}, + author = {Ford, Heather and Wajcman, Judy}, + year = {2017}, + month = aug, + volume = {47}, + pages = {511--527}, + issn = {0306-3127}, + abstract = {Feminist STS has long established that science's provenance as a male domain continues to define what counts as knowledge and expertise. Wikipedia, arguably one of the most powerful sources of information today, was initially lauded as providing the opportunity to rebuild knowledge institutions by providing greater representation of multiple groups. However, less than ten percent of Wikipedia editors are women. At one level, this imbalance in contributions and therefore content is yet another case of the masculine culture of technoscience. This is an important argument and, in this article, we examine the empirical research that highlights these issues. Our main objective, however, is to extend current accounts by demonstrating that Wikipedia's infrastructure introduces new and less visible sources of gender disparity. In sum, our aim here is to present a consolidated analysis of the gendering of Wikipedia.}, + file = {/home/nathante/Zotero/storage/AZV37IZ7/Ford and Wajcman - 2017 - ‘Anyone can edit’, not everyone does Wikipedia’s .pdf}, + journal = {Social Studies of Science}, + language = {en}, + number = {4} +} + +@article{forte_decentralization_2009, + ids = {forte\_decentralization\_2009-1}, + title = {Decentralization in {{Wikipedia}} Governance}, + author = {Forte, Andrea and Larco, Vanesa and Bruckman, Amy}, + year = {2009}, + month = jul, + volume = {26}, + pages = {49--72}, + issn = {0742-1222}, + abstract = {How does "self-governance" happen in Wikipedia? Through in-depth interviews with 20 individuals who have held a variety of responsibilities in the English-language Wikipedia, we obtained rich descriptions of how various forces produce and regulate social structures on the site. Although Wikipedia is sometimes portrayed as lacking oversight, our analysis describes Wikipedia as an organization with highly refined policies, norms, and a technological architecture that supports organizational ideals of consensus building and discussion. We describe how governance on the site is becoming increasingly decentralized as the community grows and how this is predicted by theories of commons-based governance developed in offline contexts. We also briefly examine local governance structures called WikiProjects through the example of WikiProject Military History, one of the oldest and most prolific projects on the site.}, + file = {/home/nathante/Zotero/storage/XWB8BK4P/MIS0742-1222260103.html}, + journal = {Journal of Management Information Systems}, + keywords = {governance,online communities,self-organizing systems,Wikipedia}, + number = {1} +} + +@inproceedings{forte_privacy_2017, + ids = {forte\_privacy\_2017-5}, + title = {Privacy, Anonymity, and Perceived Risk in Open Collaboration: A Study of {{Tor}} Users and {{Wikipedians}}}, + shorttitle = {Privacy, {{Anonymity}}, and {{Perceived Risk}} in {{Open Collaboration}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Forte, Andrea and Andalibi, Nazanin and Greenstadt, Rachel}, + year = {2017}, + pages = {1800--1811}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {This qualitative study examines privacy practices and concerns among contributors to open collaboration projects. We collected interview data from people who use the anonymity network Tor who also contribute to online projects and from Wikipedia editors who are concerned about their privacy to better understand how privacy concerns impact participation in open collaboration projects. We found that risks perceived by contributors to open collaboration projects include threats of surveillance, violence, harassment, opportunity loss, reputation loss, and fear for loved ones. We explain participants' operational and technical strategies for mitigating these risks and how these strategies affect their contributions. Finally, we discuss chilling effects associated with privacy loss, the need for open collaboration projects to go beyond attracting and educating participants to consider their privacy, and some of the social and technical approaches that could be explored to mitigate risk at a project or community level.}, + file = {/home/nathante/Zotero/storage/A39AC55X/ForteCSCW17-Anonymity.pdf;/home/nathante/Zotero/storage/SY9J4JAV/Forte et al_2017_Privacy, Anonymity, and Perceived Risk in Open Collaboration.pdf;/home/nathante/Zotero/storage/ZQ2L9JXV/Forte et al. - 2017 - Privacy, anonymity, and perceived risk in open col.pdf}, + isbn = {978-1-4503-4335-0}, + keywords = {identity,Identity,privacy,risk,social computing,tor,wikipedia}, + series = {{{CSCW}} '17} +} + +@book{foucault_discipline_1979, + title = {Discipline and Punish}, + author = {Foucault, Michel}, + year = {1979}, + publisher = {{Vintage Books}}, + address = {{New York}}, + annotation = {OCLC: 961381898}, + language = {English} +} + +@article{fourcade_categories_2017, + title = {Categories {{All}} the {{Way Down}}}, + author = {Fourcade, Marion and Healy, Kieran}, + year = {2017}, + volume = {42}, + pages = {286--296}, + issn = {0172-6404}, + abstract = {\guillemotright Kategorien auf der ganzen Linie\guillemotleft. Scores and classifications are dual to one another. Cardinal and ordinal measures are repeatedly used to produce nominal classifications of essential worth. Conversely, presumptively natural kinds provide the basis for new measurement and scoring systems. Over time, the iterative application of nominal classifications and quantifying measures produce involuted, nested systems whose structure and origins are hard to disentangle. While careful studies of earlier systems and methods have often uncovered these arbitrary aspects, newer technical tools for classification are at once substantially more opaque than their predecessors and more likely to be employed on very large scales. The classification situations to which they give rise thus have the potential to produce the sort of naturalized facticity characteristic of classical social facts.}, + journal = {Historical Social Research / Historische Sozialforschung}, + number = {1 (159)} +} + +@article{fourcade_classification_2013, + title = {Classification Situations: {{Life}}-Chances in the Neoliberal Era}, + shorttitle = {Classification Situations}, + author = {Fourcade, Marion and Healy, Kieran}, + year = {2013}, + month = nov, + volume = {38}, + pages = {559--572}, + issn = {0361-3682}, + abstract = {This article examines the stratifying effects of economic classifications. We argue that in the neoliberal era market institutions increasingly use actuarial techniques to split and sort individuals into classification situations that shape life-chances. While this is a general and increasingly pervasive process, our main empirical illustration comes from the transformation of the credit market in the United States. This market works as both as a leveling force and as a condenser of new forms of social difference. The U.S. banking and credit system has greatly broadened its scope over the past twenty years to incorporate previously excluded groups. We observe this leveling tendency in the expansion of credit amongst lower-income households, the systematization of overdraft protections, and the unexpected and rapid growth of the fringe banking sector. But while access to credit has democratized, it has also differentiated. Scoring technologies classify and price people according to credit risk. This has allowed multiple new distinctions to be made amongst the creditworthy, as scores get attached to different interest rates and loan structures. Scores have also expanded into markets beyond consumer credit, such as insurance, real estate, employment, and elsewhere. The result is a cumulative pattern of advantage and disadvantage with both objectively measured and subjectively experienced aspects. We argue these private classificatory tools are increasingly central to the generation of ``market-situations'', and thus an important and overlooked force that structures individual life-chances. In short, classification situations may have become the engine of modern class situations.}, + file = {/home/nathante/Zotero/storage/K728PIWP/Fourcade and Healy - 2013 - Classification situations Life-chances in the neo.pdf;/home/nathante/Zotero/storage/TE5GXSYZ/S0361368213000743.html}, + journal = {Accounting, Organizations and Society}, + number = {8} +} + +@article{frey_this_2019, + ids = {frey\_designing\_2019}, + title = {"{{This}} Place Does What It Was Built for": {{Designing}} Digital Institutions for Participatory Change}, + shorttitle = {"{{This}} Place Does What It Was Built for"}, + author = {Frey, Seth and Krafft, P. M. and Keegan, Brian C.}, + year = {2019}, + month = nov, + volume = {3}, + pages = {32:1--32:31}, + issn = {2573-0142}, + abstract = {Whether we recognize it or not, the Internet is rife with exciting and original institutional forms that are transforming social organization on and offline. Governing these Internet platforms and other digital institutions has posed a challenge for engineers and managers, many of whom have little exposure to the relevant history or theory of institutional design. The dominant guiding practices for the design of digital institutions to date in human-computer interaction, computer-supported cooperative work, and the tech industry at large have been an incentive-focused behavioral engineering paradigm encompassing atheoretical approaches such as emulation, A/B-testing, engagement maximization, and piecemeal issue-driven engineering. One institutional analysis framework that has been useful in the study of traditional institutions comes from scholars of natural resource management, particularly that community of economists, anthropologists, and environmental and political scientists focused around the work of Elinor Ostrom, known collectively as the "Ostrom Workshop." A key finding from this community that has yet to be broadly incorporated into the design of many digital institutions is the importance of including participatory change mechanisms in what is called a "constitutional layer" of institutional design. The institutional rules that compose a constitutional layer facilitate stakeholder participation in the ongoing process of institutional design change. We explore to what extent consideration of constitutional layers is met or could be better met in three varied cases of digital institutions: cryptocurrencies, cannabis informatics, and amateur Minecraft server governance. Examining such highly varied cases allows us to demonstrate the broad relevance of constitutional layers in many different types of digital institutions.}, + archivePrefix = {arXiv}, + eprint = {1902.08728}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/63NTR6E9/Frey et al_2019_This place does what it was built for.pdf;/home/nathante/Zotero/storage/BP3LMBH4/Frey et al. - 2019 - Designing for Participation and Change in Digital .pdf}, + journal = {Proc. ACM Hum.-Comput. Interact.}, + keywords = {computational social science,Computer Science - Social and Information Networks,digital democracy,digital institutions,H.5.3,institutional analysis,institutional design,J.4,K.4.3,knowledge commons,resource management}, + number = {CSCW} +} + +@article{friedman_bias_1996, + title = {Bias in {{Computer Systems}}}, + author = {Friedman, Batya and Nissenbaum, Helen}, + year = {1996}, + month = jul, + volume = {14}, + pages = {330--347}, + issn = {1046-8188}, + abstract = {From an analysis of actual cases, three categories of bias in computer systems have been developed: preexisting, technical, and emergent. Preexisting bias has its roots in social institutions, practices, and attitudes. Technical bias arises from technical constraints of considerations. Emergent bias arises in a context of use. Although others have pointed to bias inparticular computer systems and have noted the general problem, we know of no comparable work that examines this phenomenon comprehensively and which offers a framework for understanding and remedying it. We conclude by suggesting that freedom from bias should by counted amoung the select set of criteria\textemdash including reliability, accuracy, and efficiency\textemdash according to which the quality of systems in use in society should be judged.}, + file = {/home/nathante/Zotero/storage/ZLKIQYWV/Friedman and Nissenbaum - 1996 - Bias in Computer Systems.pdf}, + journal = {ACM Trans. Inf. Syst.}, + keywords = {bias,computer ethics,computers and society,design methods,ethics,human values,social computing,social impact,standards,system design,universal design,values}, + number = {3} +} + +@article{friedman_social_2001, + title = {The Social Cost of Cheap Pseudonyms}, + author = {Friedman, Eric J. and Resnick, Paul}, + year = {2001}, + volume = {10}, + pages = {173--199}, + issn = {1530-9134}, + abstract = {We consider the problems of societal norms for cooperation and reputation when it is possible to obtain cheap pseudonyms, something that is becoming quite common in a wide variety of interactions on the Internet. This introduces opportunities to misbehave without paying reputational consequences. A large degree of cooperation can still emerge, through a convention in which newcomers ``pay their dues'' by accepting poor treatment from players who have established positive reputations. One might hope for an open society where newcomers are treated well, but there is an inherent social cost in making the spread of reputations optional. We prove that no equilibrium can sustain significantly more cooperation than the dues-paying equilibrium in a repeated random matching game with a large number of players in which players have finite lives and the ability to change their identities, and there is a small but nonvanishing probability of mistakes. Although one could remove the inefficiency of mistreating newcomers by disallowing anonymity, this is not practical or desirable in a wide variety of transactions. We discuss the use of entry fees, which permits newcomers to be trusted but excludes some players with low payoffs, thus introducing a different inefficiency. We also discuss the use of free but unreplaceable pseudonyms, and describe a mechanism that implements them using standard encryption techniques, which could be practically implemented in electronic transactions.}, + file = {/home/nathante/Zotero/storage/Y9DUDKVD/Friedman and Resnick - 2001 - The social cost of cheap pseudonyms.pdf;/home/nathante/Zotero/storage/V9LHVFG4/abstract.html}, + journal = {Journal of Economics \& Management Strategy}, + language = {en}, + number = {2} +} + +@article{gachter_collective_1999, + title = {Collective Action as a Social Exchange}, + author = {G{\"a}chter, Simon and Fehr, Ernst}, + year = {1999}, + volume = {39}, + pages = {341--369}, + issn = {0167-2681}, + abstract = {Social interactions are frequently associated with social approval. Anticipation of social sanctions may have important economic consequences, in particular in the realm of collective action and voluntary cooperation. This paper investigates the impact and the limitations of social rewards on people's behavior in the provision of a public good. We examine whether the opportunity to receive social approval in exchange for participation in collective actions is capable of overcome free-riding. We find that approval incentives alone are not sufficiently strong to cause a reduction in free-riding. However, in combination with some minimal social familiarity approval incentives generate a significant rise in cooperation. Our results also suggest that approval incentives give rise to multiple equilibria.}, + file = {/home/nathante/Zotero/storage/CAFW5TVG/Gächter and Fehr - 1999 - Collective action as a social exchange.pdf;/home/nathante/Zotero/storage/H3JQI42X/Gächter and Fehr - 1999 - Collective action as a social exchange.pdf}, + journal = {Journal of Economic Behavior \& Organization}, + keywords = {Collective actions,experiments,Social exchange,Social pressure,Voluntary cooperation}, + number = {4} +} + +@article{gan_gender_2018, + title = {Gender, Feedback, and Learners' Decisions to Share Their Creative Computing Projects}, + author = {Gan, Emilia F. and Hill, Benjamin Mako and Dasgupta, Sayamindu}, + year = {2018}, + volume = {2}, + pages = {54:1-54:23}, + file = {/home/nathante/Zotero/storage/RU9QB9ZL/Gan et al_2018_Gender, feedback, and learners' decisions to share their creative computing.pdf}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + number = {CSCW} +} + +@article{geiger_beyond_2017, + title = {Beyond Opening up the Black Box: {{Investigating}} the Role of Algorithmic Systems in {{Wikipedian}} Organizational Culture}, + shorttitle = {Beyond Opening up the Black Box}, + author = {Geiger, R Stuart}, + year = {2017}, + month = dec, + volume = {4}, + pages = {2053951717730735}, + issn = {2053-9517}, + abstract = {Scholars and practitioners across domains are increasingly concerned with algorithmic transparency and opacity, interrogating the values and assumptions embedded in automated, black-boxed systems, particularly in user-generated content platforms. I report from an ethnography of infrastructure in Wikipedia to discuss an often understudied aspect of this topic: the local, contextual, learned expertise involved in participating in a highly automated social\textendash technical environment. Today, the organizational culture of Wikipedia is deeply intertwined with various data-driven algorithmic systems, which Wikipedians rely on to help manage and govern the ``anyone can edit'' encyclopedia at a massive scale. These bots, scripts, tools, plugins, and dashboards make Wikipedia more efficient for those who know how to work with them, but like all organizational culture, newcomers must learn them if they want to fully participate. I illustrate how cultural and organizational expertise is enacted around algorithmic agents by discussing two autoethnographic vignettes, which relate my personal experience as a veteran in Wikipedia. I present thick descriptions of how governance and gatekeeping practices are articulated through and in alignment with these automated infrastructures. Over the past 15 years, Wikipedian veterans and administrators have made specific decisions to support administrative and editorial workflows with automation in particular ways and not others. I use these cases of Wikipedia's bot-supported bureaucracy to discuss several issues in the fields of critical algorithms studies; critical data studies; and fairness, accountability, and transparency in machine learning\textemdash most principally arguing that scholarship and practice must go beyond trying to ``open up the black box'' of such systems and also examine sociocultural processes like newcomer socialization.}, + file = {/home/nathante/Zotero/storage/YUNBX2XY/Geiger - 2017 - Beyond opening up the black box Investigating the.pdf}, + journal = {Big Data \& Society}, + language = {en}, + number = {2} +} + +@article{geiger_bot-based_2016, + title = {Bot-Based Collective Blocklists in {{Twitter}}: The Counterpublic Moderation of Harassment in a Networked Public Space}, + shorttitle = {Bot-Based Collective Blocklists in {{Twitter}}}, + author = {Geiger, R. Stuart}, + year = {2016}, + month = jun, + volume = {19}, + pages = {787--803}, + issn = {1369-118X}, + abstract = {This article introduces and discusses bot-based collective blocklists (or blockbots) in Twitter, which have been developed by volunteers to combat harassment in the social networking site. Blockbots support the curation of a shared blocklist of accounts, where subscribers to a blockbot will not receive any notifications or messages from accounts on the blocklist. Blockbots support counterpublic communities, helping people moderate their own experiences of a site. This article provides an introduction and overview of blockbots and the issues that they raise about networked publics and platform governance, extending an intersecting literature on online harassment, platform governance, and the politics of algorithms. Such projects involve a more reflective, intentional, transparent, collaborative, and decentralized way of using algorithmic systems to respond to issues of platform governance like harassment. I argue that blockbots are not just technical solutions but social ones as well, a notable exception to common technologically determinist solutions that often push responsibility for issues like harassment to the individual user. Beyond the case of Twitter, blockbots call our attention to collective, bottom-up modes of computationally assisted moderation that can be deployed by counterpublic groups who want to participate in networked publics where hegemonic and exclusionary practices are increasingly prevalent.}, + file = {/home/nathante/Zotero/storage/LF24ZUVI/Geiger_2016_Bot-based collective blocklists in Twitter.pdf;/home/nathante/Zotero/storage/E5T3KSKW/1369118X.2016.html}, + journal = {Information, Communication \& Society}, + keywords = {algorithms,Harassment,moderation,networked publics,public sphere,social media}, + number = {6} +} + +@article{geiger_bots_2014, + title = {Bots, Bespoke, Code and the Materiality of Software Platforms}, + author = {Geiger, R. Stuart}, + year = {2014}, + month = mar, + volume = {17}, + pages = {342--356}, + issn = {1369-118X}, + abstract = {This article introduces and discusses the role of bespoke code in Wikipedia, which is code that runs alongside a platform or system, rather than being integrated into server-side codebases by individuals with privileged access to the server. Bespoke code complicates the common metaphors of platforms and sovereignty that we typically use to discuss the governance and regulation of software systems through code. Specifically, the work of automated software agents (bots) in the operation and administration of Wikipedia is examined, with a focus on the materiality of code. As bots extend and modify the functionality of sites like Wikipedia, but must be continuously operated on computers that are independent from the servers hosting the site, they involve alternative relations of power and code. Instead of taking for granted the pre-existing stability of Wikipedia as a platform, bots and other bespoke code require that we examine not only the software code itself, but also the concrete, historically contingent material conditions under which this code is run. To this end, this article weaves a series of autobiographical vignettes about the author's experiences as a bot developer alongside more traditional academic discourse.}, + file = {/home/nathante/Zotero/storage/8FCQ2I9I/Geiger - 2014 - Bots, bespoke, code and the materiality of softwar.pdf;/home/nathante/Zotero/storage/XQLWPXV8/1369118X.2013.html}, + journal = {Information, Communication \& Society}, + keywords = {algorithms,bots,code,materiality,software,Wikipedia}, + number = {3} +} + +@inproceedings{geiger_when_2013, + title = {When the Levee Breaks: {{Without}} Bots, What Happens to {{Wikipedia}}'s Quality Control Processes?}, + shorttitle = {When the {{Levee Breaks}}}, + booktitle = {Proceedings of the 9th {{International Symposium}} on {{Open Collaboration}} ({{OpenSym}} '13)}, + author = {Geiger, R. Stuart and Halfaker, Aaron}, + year = {2013}, + pages = {6:1--6:6}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {In the first half of 2011, ClueBot NG -- one of the most prolific counter-vandalism bots in the English-language Wikipedia -- went down for four distinct periods, each period of downtime lasting from days to weeks. In this paper, we use these periods of breakdown as naturalistic experiments to study Wikipedia's heterogeneous quality control network, which we analyze as a multi-tiered system in which distinct classes of reviewers use various reviewing technologies to patrol for different kinds of damage at staggered time periods. Our analysis showed that the overall time-to-revert edits was almost doubled when this software agent was down. Yet while a significantly fewer proportion of edits made during the bot's downtime were reverted, we found that those edits were later eventually reverted. This suggests that other agents in Wikipedia took over this quality control work, but performed it at a far slower rate.}, + file = {/home/nathante/Zotero/storage/HVD834ZM/Geiger and Halfaker - 2013 - When the levee breaks Without bots, what happens .pdf}, + isbn = {978-1-4503-1852-5}, + keywords = {automation,bots,information quality,peer production,socio-technical systems,software agents,wikipedia} +} + +@inproceedings{geiger_work_2010, + title = {The {{Work}} of {{Sustaining Order}} in {{Wikipedia}}: {{The Banning}} of a {{Vandal}}}, + shorttitle = {The {{Work}} of {{Sustaining Order}} in {{Wikipedia}}}, + booktitle = {Proceedings of the 2010 {{ACM Conference}} on {{Computer Supported Cooperative Work}}}, + author = {Geiger, R. Stuart and Ribes, David}, + year = {2010}, + pages = {117--126}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {In this paper, we examine the social roles of software tools in the English-language Wikipedia, specifically focusing on autonomous editing programs and assisted editing tools. This qualitative research builds on recent research in which we quantitatively demonstrate the growing prevalence of such software in recent years. Using trace ethnography, we show how these often-unofficial technologies have fundamentally transformed the nature of editing and administration in Wikipedia. Specifically, we analyze "vandal fighting" as an epistemic process of distributed cognition, highlighting the role of non-human actors in enabling a decentralized activity of collective intelligence. In all, this case shows that software programs are used for more than enforcing policies and standards. These tools enable coordinated yet decentralized action, independent of the specific norms currently in force.}, + file = {/home/nathante/Zotero/storage/3ANJ4PAP/Geiger and Ribes - 2010 - The Work of Sustaining Order in Wikipedia The Ban.pdf}, + isbn = {978-1-60558-795-0}, + keywords = {bots,distributed cognition,ethnography,qualitative,social,trace ethnography,Wiki,wikipedia}, + series = {{{CSCW}} '10} +} + +@article{gelman_beyond_2014, + title = {Beyond {{Power Calculations}}: {{Assessing Type S}} ({{Sign}}) and {{Type M}} ({{Magnitude}}) {{Errors}}}, + shorttitle = {Beyond {{Power Calculations}}}, + author = {Gelman, Andrew and Carlin, John}, + year = {2014}, + month = nov, + volume = {9}, + pages = {641--651}, + issn = {1745-6916, 1745-6924}, + abstract = {Statistical power analysis provides the conventional approach to assess error rates when designing a research study. However, power analysis is flawed in that a narrow emphasis on statistical significance is placed as the primary focus of study design. In noisy, small-sample settings, statistically significant results can often be misleading. To help researchers address this problem in the context of their own studies, we recommend design calculations in which (a) the probability of an estimate being in the wrong direction (Type S [sign] error) and (b) the factor by which the magnitude of an effect might be overestimated (Type M [magnitude] error or exaggeration ratio) are estimated. We illustrate with examples from recent published research and discuss the largest challenge in a design calculation: coming up with reasonable estimates of plausible effect sizes based on external information.}, + file = {/home/nathante/Zotero/storage/LIMBCUC8/Gelman and Carlin - 2014 - Beyond Power Calculations Assessing Type S (Sign).pdf}, + journal = {Perspectives on Psychological Science}, + language = {en}, + number = {6} +} + +@inproceedings{gilbert_widespread_2013, + title = {Widespread {{Underprovision}} on {{Reddit}}}, + booktitle = {Proceedings of the 2013 {{Conference}} on {{Computer Supported Cooperative Work}}}, + author = {Gilbert, Eric}, + year = {2013}, + pages = {803--808}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Many online communities ask their members to do work for the good of everyone on the site. On social voting sites like Reddit, this means that users judge a stream of incoming links by voting them up or down. The links with the most up-votes bubble up to the main page, pointing everyone toward the best content. A threat to all sites designed this way, however, is underprovision: when too many people rely on others to contribute without doing so themselves. In this paper, we present findings suggesting that widespread underprovision of votes is happening on Reddit, arguably the internet's largest social voting community. Notably, Reddit overlooked 52\% of the most popular links the first time they were submitted. This suggests that many potentially popular links get ignored, jeopardizing the site's core purpose. We conclude by discussing possible reasons behind it, and suggest future research on social voting sites.}, + file = {/home/nathante/Zotero/storage/K3TI54AA/Gilbert_2013_Widespread Underprovision on Reddit.pdf}, + isbn = {978-1-4503-1331-5}, + keywords = {free riding,reddit,social navigation,underprovision,voting}, + series = {{{CSCW}} '13} +} + +@book{gillespie_custodians_2018, + title = {Custodians of the {{Internet}}: Platforms, Content Moderation, and the Hidden Decisions That Shape Social Media}, + shorttitle = {Custodians of the Internet}, + author = {Gillespie, Tarleton}, + year = {2018}, + publisher = {{Yale University Press}}, + address = {{New Haven}}, + abstract = {"Most users want their Twitter feed, Facebook page, and YouTube comments to be free of harassment and porn. Whether faced with 'fake news' or livestreamed violence, 'content moderators'--who censor or promote user-posted content--have never been more important. This is especially true when the tools that social media platforms use to curb trolling, ban hate speech, and censor pornography can also silence the speech you need to hear. [The author] provides an overview of current social media practices and explains the underlying rationales for how, when, and why these policies are enforced. In doing so, [the author] highlights that content moderation receives too little public scrutiny even as it is shapes social norms and creates consequences for public discourse, cultural production, and the fabric of society. Based on interviews with content moderators, creators, and consumers, this...book is...for anyone who's ever clicked 'like' or 'retweet.'"--}, + annotation = {OCLC: on1005113962}, + file = {/home/nathante/Zotero/storage/QFK2IFFY/Gillespie_2018_Custodians of the Internet.pdf}, + isbn = {978-0-300-17313-0}, + keywords = {Business \& Economics / Industries / Media \& Communications,Censorship,Computers / Web / Social Media,Political Science / Censorship,Social media,Social Science / Media Studies}, + lccn = {HM742 .G575 2018} +} + +@article{gonzalez-navarro_deterrence_2013, + title = {Deterrence and {{Geographical Externalities}} in {{Auto Theft}}}, + author = {{Gonzalez-Navarro}, Marco}, + year = {2013}, + month = oct, + volume = {5}, + pages = {92--110}, + issn = {1945-7782, 1945-7790}, + file = {/home/nathante/Zotero/storage/VATQILME/Gonzalez-Navarro - 2013 - Deterrence and Geographical Externalities in Auto .pdf}, + journal = {American Economic Journal: Applied Economics}, + language = {en}, + number = {4} +} + +@article{grabner-krauter_trust_2015, + title = {Trust in Online Social Networks: {{A}} Multifaceted Perspective}, + shorttitle = {Trust in Online Social Networks}, + author = {{Grabner-Kr{\"a}uter}, Sonja and Bitter, Sofie}, + year = {2015}, + month = jan, + volume = {44}, + pages = {48--68}, + publisher = {{Routledge}}, + issn = {0736-0932}, + abstract = {In recent years, online social networks (OSNs) have gained great popularity and are now among the most frequently visited sites on the Web. Although security standards and practices are an increasing focus of attention, participants still reveal great amounts of sensitive information in the Web 2.0 environment. Obviously, online social networking takes place in a context of trust. However, trust is a concept with many facets and dimensions. To facilitate trust research in OSNs this article aims at clarifying the role of trust and the relevance of facets of trust, social capital and embeddedness in OSNs. First, the focus is on the individual's decision to trust and on processes through which trust actually emerges. Subsequently, trust is viewed as a structurally embedded asset or a property of relationships and networks that helps to shape interaction patterns within OSNs. A conceptual framework is developed that integrates theoretical concepts from the trust literature, social network and social capital theory, and helps to map different trust-related issues in OSNs.}, + annotation = {\_eprint: https://doi.org/10.1080/07360932.2013.781517}, + file = {/home/nathante/Zotero/storage/NXZ4H29X/Grabner-Kräuter_Bitter_2015_Trust in online social networks.pdf;/home/nathante/Zotero/storage/AZKSCUEY/07360932.2013.html}, + journal = {Forum for Social Economics}, + keywords = {embeddedness,online social networks,social capital,social networking sites,trust}, + number = {1} +} + +@inproceedings{graells-garrido_first_2015, + title = {First {{Women}}, {{Second Sex}}: {{Gender Bias}} in {{Wikipedia}}}, + shorttitle = {First {{Women}}, {{Second Sex}}}, + booktitle = {Proceedings of the 26th {{ACM Conference}} on {{Hypertext}} \& {{Social Media}}}, + author = {{Graells-Garrido}, Eduardo and Lalmas, Mounia and Menczer, Filippo}, + year = {2015}, + pages = {165--174}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Contributing to the writing of history has never been as easy as it is today. Anyone with access to the Web is able to play a part on Wikipedia, an open and free encyclopedia, and arguably one of the primary sources of knowledge on the Web. In this paper, we study gender bias in Wikipedia in terms of how women and men are characterized in their biographies. To do so, we analyze biographical content in three aspects: meta-data, language, and network structure. Our results show that, indeed, there are differences in characterization and structure. Some of these differences are reflected from the off-line world documented by Wikipedia, but other differences can be attributed to gender bias in Wikipedia content. We contextualize these differences in social theory and discuss their implications for Wikipedia policy.}, + file = {/home/nathante/Zotero/storage/YUPG5HAT/Graells-Garrido et al. - 2015 - First Women, Second Sex Gender Bias in Wikipedia.pdf}, + isbn = {978-1-4503-3395-5}, + keywords = {computational linguistics,gender,gender bias,wikipedia}, + series = {{{HT}} '15} +} + +@article{green_implicit_2007, + title = {Implicit {{Bias}} among {{Physicians}} and Its {{Prediction}} of {{Thrombolysis Decisions}} for {{Black}} and {{White Patients}}}, + author = {Green, Alexander R. and Carney, Dana R. and Pallin, Daniel J. and Ngo, Long H. and Raymond, Kristal L. and Iezzoni, Lisa I. and Banaji, Mahzarin R.}, + year = {2007}, + month = sep, + volume = {22}, + pages = {1231--1238}, + issn = {1525-1497}, + abstract = {ContextStudies documenting racial/ethnic disparities in health care frequently implicate physicians' unconscious biases. No study to date has measured physicians' unconscious racial bias to test whether this predicts physicians' clinical decisions.ObjectiveTo test whether physicians show implicit race bias and whether the magnitude of such bias predicts thrombolysis recommendations for black and white patients with acute coronary syndromes.Design, Setting, and ParticipantsAn internet-based tool comprising a clinical vignette of a patient presenting to the emergency department with an acute coronary syndrome, followed by a questionnaire and three Implicit Association Tests (IATs). Study invitations were e-mailed to all internal medicine and emergency medicine residents at four academic medical centers in Atlanta and Boston; 287 completed the study, met inclusion criteria, and were randomized to either a black or white vignette patient.Main Outcome MeasuresIAT scores (normal continuous variable) measuring physicians' implicit race preference and perceptions of cooperativeness. Physicians' attribution of symptoms to coronary artery disease for vignette patients with randomly assigned race, and their decisions about thrombolysis. Assessment of physicians' explicit racial biases by questionnaire.ResultsPhysicians reported no explicit preference for white versus black patients or differences in perceived cooperativeness. In contrast, IATs revealed implicit preference favoring white Americans (mean IAT score = 0.36, P {$<$} .001, one-sample t test) and implicit stereotypes of black Americans as less cooperative with medical procedures (mean IAT score 0.22, P {$<$} .001), and less cooperative generally (mean IAT score 0.30, P {$<$} .001). As physicians' prowhite implicit bias increased, so did their likelihood of treating white patients and not treating black patients with thrombolysis (P = .009).ConclusionsThis study represents the first evidence of unconscious (implicit) race bias among physicians, its dissociation from conscious (explicit) bias, and its predictive validity. Results suggest that physicians' unconscious biases may contribute to racial/ethnic disparities in use of medical procedures such as thrombolysis for myocardial infarction.}, + file = {/home/nathante/Zotero/storage/H78EMZ77/Green et al. - 2007 - Implicit Bias among Physicians and its Prediction .pdf}, + journal = {Journal of General Internal Medicine}, + keywords = {clinical decisions,disparities,race,thrombolysis,unconscious bias}, + language = {en}, + number = {9} +} + +@article{greenwald_statistically_2015, + title = {Statistically Small Effects of the {{Implicit Association Test}} Can Have Societally Large Effects}, + author = {Greenwald, Anthony G. and Banaji, Mahzarin R. and Nosek, Brian A.}, + year = {2015}, + volume = {108}, + pages = {553--561}, + issn = {1939-1315(Electronic),0022-3514(Print)}, + abstract = {Greenwald, Poehlman, Uhlmann, and Banaji (2009; GPUB hereafter) reported an average predictive validity correlation of {\=r} = .236 for Implicit Association Test (IAT) measures involving Black\textendash White racial attitudes and stereotypes. Oswald, Mitchell, Blanton, Jaccard, and Tetlock (2013; OMBJT) reported a lower aggregate figure for correlations involving IAT measures ({\=r} = .148). The difference between the estimates of the 2 reviews was due mostly to their use of different policies for including effect sizes. GPUB limited their study to findings that assessed theoretically expected attitude\textendash behavior and stereotype\textendash judgment correlations along with others that the authors expected to show positive correlations. OMBJT included a substantial minority of correlations for which there was no theoretical expectation of a predictive relationship. Regardless of inclusion policy, both meta-analyses estimated aggregate correlational effect sizes that were large enough to explain discriminatory impacts that are societally significant either because they can affect many people simultaneously or because they can repeatedly affect single persons. (PsycINFO Database Record (c) 2019 APA, all rights reserved)}, + file = {/home/nathante/Zotero/storage/XNMJJJIN/Greenwald et al. - 2015 - Statistically small effects of the Implicit Associ.pdf;/home/nathante/Zotero/storage/GPX5FP89/display.html}, + journal = {Journal of Personality and Social Psychology}, + keywords = {Effect Size (Statistical),Prediction,Predictive Validity,Racial and Ethnic Attitudes,Statistical Validity}, + number = {4} +} + +@article{grimmelmann_virtues_2015, + title = {The {{Virtues}} of {{Moderation}}}, + author = {Grimmelmann, James}, + year = {2015}, + volume = {17}, + pages = {42--109}, + file = {/home/nathante/Zotero/storage/8AH4CCSL/Grimmelmann_2015_The Virtues of Moderation.pdf}, + journal = {Yale Journal of Law and Technology}, + language = {eng} +} + +@article{halaby_panel_2004, + title = {Panel {{Models}} in {{Sociological Research}}: {{Theory}} into {{Practice}}}, + shorttitle = {Panel {{Models}} in {{Sociological Research}}}, + author = {Halaby, Charles N.}, + year = {2004}, + volume = {30}, + pages = {507--544}, + abstract = {A selection of panel studies appearing in the American Sociological Review and the American Journal of Sociology between 1990 and 2003 shows that sociologists have been slow to capitalize on the advantages of panel data for controlling unobservables that threaten causal inference in observational studies. This review emphasizes regression methods that capitalize on the strengths of panel data for consistently estimating causal parameters in models for metric outcomes when measured explanatory variables are correlated with unit-specific unobservables. Both static and dynamic models are treated. Among the major subjects are fixed versus random effects methods, Hausman tests, Hausman-Taylor models, and instrumental variables methods, including Arrelano-Bond and Anderson-Hsaio estimation for models with lagged endogenous variables.}, + file = {/home/nathante/Zotero/storage/T6Q23K8T/Halaby - 2004 - Panel Models in Sociological Research Theory into.pdf}, + journal = {Annual Review of Sociology}, + keywords = {_tablet_modified}, + number = {1} +} + +@inproceedings{halfaker_dont_2011, + ids = {halfaker\_dont\_2011-1}, + title = {Don't Bite the Newbies: {{How}} Reverts Affect the Quantity and Quality of {{Wikipedia}} Work}, + shorttitle = {Don't Bite the Newbies}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}} ({{WikiSym}} '11)}, + author = {Halfaker, Aaron and Kittur, Aniket and Riedl, John}, + year = {2011}, + pages = {163--172}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Reverts are important to maintaining the quality of Wikipedia. They fix mistakes, repair vandalism, and help enforce policy. However, reverts can also be damaging, especially to the aspiring editor whose work they destroy. In this research we analyze 400,000 Wikipedia revisions to understand the effect that reverts had on editors. We seek to understand the extent to which they demotivate users, reducing the workforce of contributors, versus the extent to which they help users improve as encyclopedia editors. Overall we find that reverts are powerfully demotivating, but that their net influence is that more quality work is done in Wikipedia as a result of reverts than is lost by chasing editors away. However, we identify key conditions -- most specifically new editors being reverted by much more experienced editors - under which reverts are particularly damaging. We propose that reducing the damage from reverts might be one effective path for Wikipedia to solve the newcomer retention problem.}, + file = {/home/nathante/Zotero/storage/MYF3PP47/Halfaker et al_2011_Don't bite the newbies.pdf}, + isbn = {978-1-4503-0909-7}, + keywords = {experience,motivation,productivity,quality,revert,wikipedia,WikiWork} +} + +@inproceedings{halfaker_interpolating_2017, + title = {Interpolating {{Quality Dynamics}} in {{Wikipedia}} and {{Demonstrating}} the {{Keilana Effect}}}, + booktitle = {Proceedings of the 13th {{International Symposium}} on {{Open Collaboration}}}, + author = {Halfaker, Aaron}, + year = {2017}, + pages = {19:1--19:9}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {For open, volunteer generated content like Wikipedia, quality is a prominent concern. To measure Wikipedia's quality, researchers have historically relied on expert evaluation or assessments of article quality by Wikipedians themselves. While both of these methods have proven effective for answering many questions about Wikipedia's quality and processes, they are both problematic: expert evaluation is expensive and Wikipedian quality assessments are sporadic and unpredictable. Studies that explore Wikipedia's quality level or the processes that result in quality improvements have only examined small snapshots of Wikipedia and often rely on complex propensity models to deal with the unpredictable nature of Wikipedians' own assessments. In this paper, I describe a method for measuring article quality in Wikipedia historically and at a finer granularity than was previously possible. I use this method to demonstrate an important coverage dynamic in Wikipedia (specifically, articles about women scientists) and offer this method, dataset, and open API to the research community studying Wikipedia quality dynamics.}, + file = {/home/nathante/Zotero/storage/BE29237Z/Halfaker - 2017 - Interpolating Quality Dynamics in Wikipedia and De.pdf}, + isbn = {978-1-4503-5187-4}, + keywords = {Dataset,Interpolation,Methods,Modeling,Predictive,Quality,Wikipedia}, + series = {{{OpenSym}} '17} +} + +@article{halfaker_ores:_2020, + title = {{{ORES}}: {{Lowering Barriers}} with {{Participatory Machine Learning}} in {{Wikipedia}}}, + author = {Halfaker, Aaron and Geiger, R Stuart}, + year = {2020}, + month = oct, + volume = {4}, + pages = {37}, + file = {/home/nathante/Zotero/storage/XG2FHI6U/Halfaker et al. - ORES Facilitating re-mediation of Wikipedia's soc.pdf}, + language = {en}, + number = {148} +} + +@article{halfaker_rise_2013, + title = {The Rise and Decline of an Open Collaboration System: How {{Wikipedia}}'s Reaction to Popularity Is Causing Its Decline}, + shorttitle = {The {{Rise}} and {{Decline}} of an {{Open Collaboration System}}}, + author = {Halfaker, Aaron and Geiger, R. Stuart and Morgan, Jonathan T. and Riedl, John}, + year = {2013}, + month = may, + volume = {57}, + pages = {664--688}, + issn = {0002-7642}, + abstract = {Open collaboration systems, such as Wikipedia, need to maintain a pool of volunteer contributors to remain relevant. Wikipedia was created through a tremendous number of contributions by millions of contributors. However, recent research has shown that the number of active contributors in Wikipedia has been declining steadily for years and suggests that a sharp decline in the retention of newcomers is the cause. This article presents data that show how several changes the Wikipedia community made to manage quality and consistency in the face of a massive growth in participation have ironically crippled the very growth they were designed to manage. Specifically, the restrictiveness of the encyclopedia's primary quality control mechanism and the algorithmic tools used to reject contributions are implicated as key causes of decreased newcomer retention. Furthermore, the community's formal mechanisms for norm articulation are shown to have calcified against changes\textemdash especially changes proposed by newer editors.}, + file = {/home/nathante/Zotero/storage/BNK42N3V/Halfaker et al. - 2013 - The rise and decline of an open collaboration syst.pdf}, + journal = {American Behavioral Scientist}, + language = {en}, + number = {5} +} + +@inproceedings{halfaker_snuggle:_2014, + title = {Snuggle: {{Designing}} for {{Efficient Socialization}} and {{Ideological Critique}}}, + shorttitle = {Snuggle}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Halfaker, Aaron and Geiger, R. Stuart and Terveen, Loren G.}, + year = {2014}, + pages = {311--320}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Wikipedia, the encyclopedia "anyone can edit", has become increasingly less so. Recent academic research and popular discourse illustrates the often aggressive ways newcomers are treated by veteran Wikipedians. These are complex sociotechnical issues, bound up in infrastructures based on problematic ideologies. In response, we worked with a coalition of Wikipedians to design, develop, and deploy Snuggle, a new user interface that served two critical functions: making the work of newcomer socialization more effective, and bringing visibility to instances in which Wikipedians? current practice of gatekeeping socialization breaks down. Snuggle supports positive socialization by helping mentors quickly find newcomers whose good-faith mistakes were reverted as damage. Snuggle also supports ideological critique and reflection by bringing visibility to the consequences of viewing newcomers through a lens of suspiciousness.}, + file = {/home/nathante/Zotero/storage/4NR774KI/Halfaker et al. - 2014 - Snuggle Designing for Efficient Socialization and.pdf}, + isbn = {978-1-4503-2473-1}, + keywords = {activism,Algorithms,critique,design,newcomer socialization,quantitative,wikipedia}, + series = {{{CHI}} '14} +} + +@article{hancock_impression_2001, + title = {Impression {{Formation}} in {{Computer}}-{{Mediated Communication Revisited}}: {{An Analysis}} of the {{Breadth}} and {{Intensity}} of {{Impressions}}}, + shorttitle = {Impression {{Formation}} in {{Computer}}-{{Mediated Communication Revisited}}}, + author = {Hancock, Jeffery T. and Dunham, Phillip J.}, + year = {2001}, + month = jun, + volume = {28}, + pages = {325--347}, + publisher = {{SAGE Publications Inc}}, + issn = {0093-6502}, + abstract = {Following either a text-based, synchronous computer-mediated conversation (CMC) or a face-to-face dyadic interaction, 80 participants rated their partners' personality profile. Impressions were assessed in terms of both their breadth (the comprehensiveness of the impression) and intensity (the magnitude of the attributions). Results indicated that impressions formed in the CMC environment were less detailed but more intense than those formed face-to-face. These data provide support for theories that, in addition to acknowledging the unique constraints and characteristics of CMC, consider the cognitive strategies and heuristics involved in the impression formation process. The differential impact of a text-based medium on trait-specific impressions (e.g., extraversion, neuroticism) is also discussed in the context of a cross-modal approach to impression formation.}, + file = {/home/nathante/Zotero/storage/FQSIIKNL/HANCOCK_DUNHAM_2001_Impression Formation in Computer-Mediated Communication Revisited.pdf;/home/nathante/Zotero/storage/NGHLJB58/009365001028003004.pdf}, + journal = {Communication Research}, + language = {en}, + number = {3} +} + +@article{hara_cross-cultural_2010, + title = {Cross-Cultural Analysis of the {{Wikipedia}} Community}, + author = {Hara, Noriko and Shachaf, Pnina and Hew, Khe Foon}, + year = {2010}, + volume = {61}, + pages = {2097--2108}, + issn = {1532-2890}, + abstract = {This article reports a cross-cultural analysis of four Wikipedias in different languages and demonstrates their roles as communities of practice (CoPs). Prior research on CoPs and on the Wikipedia community often lacks cross-cultural analysis. Despite the fact that over 75\% of Wikipedia is written in languages other than English, research on Wikipedia primarily focuses on the English Wikipedia and tends to overlook Wikipedias in other languages. This article first argues that Wikipedia communities can be analyzed and understood as CoPs. Second, norms of behaviors are examined in four Wikipedia languages (English, Hebrew, Japanese, and Malay), and the similarities and differences across these four languages are reported. Specifically, typical behaviors on three types of discussion spaces (talk, user talk, and Wikipedia talk) are identified and examined across languages. Hofstede's dimensions of cultural diversity as well as the size of the community and the function of each discussion area provide lenses for understanding the similarities and differences. As such, this article expands the research on online CoPs through an examination of cultural variations across multiple CoPs and increases our understanding of Wikipedia communities in various languages.}, + copyright = {\textcopyright{} 2010 ASIS\&T}, + file = {/home/nathante/Zotero/storage/EB3CN7QN/Hara et al_2010_Cross-cultural analysis of the Wikipedia community.pdf;/home/nathante/Zotero/storage/VFQ6CII2/asi.html}, + journal = {Journal of the American Society for Information Science and Technology}, + language = {en}, + number = {10} +} + +@article{hardt_equality_2016, + title = {Equality of {{Opportunity}} in {{Supervised Learning}}}, + author = {Hardt, Moritz and Price, Eric and Srebro, Nathan}, + year = {2016}, + month = oct, + abstract = {We propose a criterion for discrimination against a specified sensitive attribute in supervised learning, where the goal is to predict some target based on available features. Assuming data about the predictor, target, and membership in the protected group are available, we show how to optimally adjust any learned predictor so as to remove discrimination according to our definition. Our framework also improves incentives by shifting the cost of poor classification from disadvantaged groups to the decision maker, who can respond by improving the classification accuracy.}, + archivePrefix = {arXiv}, + eprint = {1610.02413}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/EJBB256M/Hardt et al. - 2016 - Equality of Opportunity in Supervised Learning.pdf}, + journal = {arXiv:1610.02413 [cs]}, + keywords = {Computer Science - Machine Learning}, + language = {en}, + primaryClass = {cs} +} + +@inproceedings{hecht_tower_2010, + title = {The Tower of {{Babel}} Meets Web 2.0: User-Generated Content and Its Applications in a Multilingual Context}, + shorttitle = {The Tower of {{Babel}} Meets Web 2.0}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Hecht, Brent and Gergle, Darren}, + year = {2010}, + pages = {291--300}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + file = {/home/nathante/Zotero/storage/SPM6LWEH/Hecht and Gergle - 2010 - The tower of Babel meets web 2.0 user-generated c.pdf}, + isbn = {978-1-60558-929-9}, + keywords = {explicit semantic analysis,hyperlingual,knowledge diversity,language,multilingual,semantic relatedness,wikipedia}, + series = {{{CHI}} '10} +} + +@inproceedings{heindorf_debiasing_2019, + title = {Debiasing {{Vandalism Detection Models}} at {{Wikidata}}}, + booktitle = {Proceedings of the 2019 {{World Wide Web Conference}} ({{WWW}} '19)}, + author = {Heindorf, Stefan and Scholten, Yan and Engels, Gregor and Potthast, Martin}, + year = {2019}, + month = may, + pages = {11}, + publisher = {{ACM, New York, NY, USA}}, + address = {{San Francisco, CA, USA}}, + abstract = {Crowdsourced knowledge bases like Wikidata suffer from lowquality edits and vandalism, employing machine learning-based approaches to detect both kinds of damage. We reveal that stateof-the-art detection approaches discriminate anonymous and new users: benign edits from these users receive much higher vandalism scores than benign edits from older ones, causing newcomers to abandon the project prematurely. We address this problem for the first time by analyzing and measuring the sources of bias, and by developing a new vandalism detection model that avoids them. Our model FAIR-S reduces the bias ratio of the state-of-the-art vandalism detector WDVD from 310.7 to only 11.9 while maintaining high predictive performance at 0.963 ROCAUC and 0.316 PRAUC.}, + file = {/home/nathante/Zotero/storage/Z7TNPV75/Heindorf et al. - Debiasing Vandalism Detection Models at Wikidata.pdf}, + language = {en} +} + +@book{hernan_causal_2019, + title = {Causal {{Inference}}}, + author = {Hern{\'a}n, Miguel A. and Robins, Jamie M.}, + year = {2019}, + publisher = {{Chapman \& Hall/CRC}}, + address = {{Boca Raton}}, + annotation = {forthcoming.}, + file = {/home/nathante/Zotero/storage/27LLM8S2/Hernán and Robins - 2019 - Causal Inference.pdf;/home/nathante/Zotero/storage/C8AAV2CL/Hernán and Robins - 2019 - Causal Inference.pdf;/home/nathante/Zotero/storage/IPWQJBQG/Hernán and Robins - 2019 - Causal Inference.pdf}, + keywords = {_tablet_modified} +} + +@article{herring_gender_2000, + title = {Gender Differences in {{CMC}}: {{Findings}} and Implications}, + shorttitle = {Gender Differences in {{CMC}}}, + author = {Herring, Susan C.}, + year = {2000}, + volume = {18}, + pages = {0}, + file = {/home/nathante/Zotero/storage/K4ZYIVZN/Herring_2000_Gender differences in CMC.pdf}, + journal = {Computer Professionals for Social Responsibility Journal}, + number = {1} +} + +@book{hess_understanding_2011, + title = {Understanding Knowledge as a Commons: {{From}} Theory to Practice}, + shorttitle = {Understanding Knowledge as a Commons}, + editor = {Hess, Charlotte and Ostrom, Elinor}, + year = {2011}, + publisher = {{The MIT Press}}, + address = {{Cambridge, MA}}, + file = {/home/nathante/Zotero/storage/HM3QXD9D/Hess and Ostrom - 2011 - Understanding knowledge as a commons From theory .pdf}, + isbn = {0-262-51603-9} +} + +@article{hill_hidden_2020, + title = {The {{Hidden Costs}} of {{Requiring Accounts}}: {{Quasi}}-{{Experimental Evidence}} from {{Peer Production}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + year = {2020}, + pages = {30}, + abstract = {Online communities like Wikipedia produce valuable public information goods. While some of these communities require wouldbe contributors to create accounts, many do not. Does this requirement catalyze cooperation or inhibit participation? Prior research provides divergent predictions but little causal evidence. We conduct an empirical test using longitudinal data from 136 natural experiments where would-be contributors to wikis were suddenly required to log in to contribute. Requiring accounts leads to a small increase in account creation, but reduces both high and low quality contributions from registered and unregistered participants. Although the change deters a large portion of low quality participation, the vast majority of deterred contributions are higher quality. We conclude that requiring accounts introduces an under-theorized tradeoff for public goods production in interactive communication systems.}, + file = {/home/nathante/Zotero/storage/AT9Q94W5/Hill and Shaw - The Hidden Costs of Requiring Accounts Quasi-Expe.pdf;/home/nathante/Zotero/storage/LVRKFU6F/online_appendix-hill_shaw-hidden_costs-DRAFT-20191230.pdf}, + journal = {Communication Research}, + language = {en} +} + +@article{hogg_social_2000, + title = {Social {{Identity}} and {{Self}}-{{Categorization Processes}} in {{Organizational Contexts}}}, + author = {Hogg, Michael A. and Terry, Deborah J.}, + year = {2000}, + volume = {25}, + pages = {121--140}, + issn = {0363-7425}, + abstract = {Although aspects of social identity theory are familiar to organizational psychologists, its elaboration, through self-categorization theory, of how social categorization and prototype-based depersonalization actually produce social identity effects is less well known. We describe these processes, relate self-categorization theory to social identity theory, describe new theoretical developments in detail, and show how these developments can address a range of organizational phenomena. We discuss cohesion and deviance, leadership, subgroup and sociodemographic structure, and mergers and acquisitions.}, + file = {/home/nathante/Zotero/storage/386LB3JI/Hogg_Terry_2000_Social Identity and Self-Categorization Processes in Organizational Contexts.pdf}, + journal = {The Academy of Management Review}, + number = {1} +} + +@article{horne_enforcement_2001, + title = {The {{Enforcement}} of {{Norms}}: {{Group Cohesion}} and {{Meta}}-{{Norms}}}, + shorttitle = {The {{Enforcement}} of {{Norms}}}, + author = {Horne, Christine}, + year = {2001}, + volume = {64}, + pages = {253--266}, + publisher = {{[Sage Publications, Inc., American Sociological Association]}}, + issn = {0190-2725}, + abstract = {Group cohesion is generally thought to contribute to social order. One explanation for this correlation suggests that it can be attributed to higher rates of sanctioning found in solidary communities. In the present paper I suggest a mechanism that may be at least partially responsible for the relationship between group cohesion and sanctioning. I argue that ties among potential sanctioners and other nondeviant group members are key. The strength of these relationships affects the level of support that group members give to those who punish antisocial behavior, and in turn, influences the likelihood that such sanctioning will occur. Thus group cohesion affects punishment indirectly by increasing the rewards that are given to sanctioners. This increase in rewards then affects sanctioning decisions. I test these predictions using experimental methods. The results support the hypotheses.}, + journal = {Social Psychology Quarterly}, + number = {3} +} + +@article{hu_welfare_2018, + title = {Welfare and {{Distributional Impacts}} of {{Fair Classification}}}, + author = {Hu, Lily and Chen, Yiling}, + year = {2018}, + pages = {5}, + abstract = {Current methodologies in machine learning analyze the effects of various statistical parity notions of fairness primarily in light of their impacts on predictive accuracy and vendor utility loss. In this paper, we propose a new framework for interpreting the effects of fairness criteria by converting the constrained loss minimization problem into a social welfare maximization problem. This translation moves a classifier and its output into utility space where individuals, groups, and society atlarge experience different welfare changes due to classification assignments. Under this characterization, predictions and fairness constraints are seen as shaping societal welfare and distribution and revealing individuals' implied welfare weights in society\textemdash weights that may then be interpreted through a fairness lens. The social welfare formulation of the fairness problem brings to the fore concerns of distributive justice that have always had a central albeit more implicit role in standard algorithmic fairness approaches.}, + file = {/home/nathante/Zotero/storage/MX33TQB8/Hu and Chen - 2018 - Welfare and Distributional Impacts of Fair Classif.pdf;/home/nathante/Zotero/storage/PERZGMUS/Hu and Chen - Welfare and Distributional Impacts of Fair Classif.pdf;/home/nathante/Zotero/storage/DFXF29L9/1807.html}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}, + language = {en} +} + +@article{imbens_regression_2008, + title = {Regression Discontinuity Designs: {{A}} Guide to Practice}, + shorttitle = {Regression Discontinuity Designs}, + author = {Imbens, Guido W. and Lemieux, Thomas}, + year = {2008}, + month = feb, + volume = {142}, + pages = {615--635}, + issn = {0304-4076}, + abstract = {In regression discontinuity (RD) designs for evaluating causal effects of interventions, assignment to a treatment is determined at least partly by the value of an observed covariate lying on either side of a fixed threshold. These designs were first introduced in the evaluation literature by Thistlewaite and Campbell [1960. Regression-discontinuity analysis: an alternative to the ex-post Facto experiment. Journal of Educational Psychology 51, 309\textendash 317] With the exception of a few unpublished theoretical papers, these methods did not attract much attention in the economics literature until recently. Starting in the late 1990s, there has been a large number of studies in economics applying and extending RD methods. In this paper we review some of the practical and theoretical issues in implementation of RD methods.}, + file = {/home/nathante/Zotero/storage/BA7DD2FP/Imbens and Lemieux - 2008 - Regression discontinuity designs A guide to pract.pdf;/home/nathante/Zotero/storage/HS28CTR2/S0304407607001091.html}, + journal = {Journal of Econometrics}, + keywords = {Nonparametric estimation,Regression discontinuity,Treatment effects}, + number = {2}, + series = {The Regression Discontinuity Design: {{Theory}} and Applications} +} + +@article{jacob_practical_2012, + title = {A Practical Guide to Regression Discontinuity}, + author = {Jacob, Robin Tepper and Zhu, Pei and Somers, Marie-Andr{\'e}e and Bloom, Howard}, + year = {2012}, + file = {/home/nathante/Zotero/storage/Q9XHYJI3/Jacob et al. - 2012 - A practical guide to regression discontinuity.pdf}, + journal = {MDRC Working Papers on Research Methodology} +} + +@inproceedings{jacobs_meaning_2020, + title = {The Meaning and Measurement of Bias: Lessons from Natural Language Processing}, + shorttitle = {The Meaning and Measurement of Bias}, + booktitle = {Proceedings of the 2020 {{Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}}, + author = {Jacobs, Abigail Z. and Blodgett, Su Lin and Barocas, Solon and Daum{\'e}, Hal and Wallach, Hanna}, + year = {2020}, + month = jan, + pages = {706}, + publisher = {{Association for Computing Machinery}}, + address = {{Barcelona, Spain}}, + abstract = {The recent interest in identifying and mitigating bias in computational systems has introduced a wide range of different---and occasionally incomparable---proposals for what constitutes bias in such systems. This tutorial introduces the language of measurement modeling from the quantitative social sciences as a framework for examining how social, organizational, and political values enter computational systems and unpacking the varied normative concerns operationalized in different techniques for measuring "bias." We show that this framework helps to clarify the way unobservable theoretical constructs---such as "creditworthiness," "risk to society," or "tweet toxicity"---are turned into measurable quantities and how this process may introduce fairness-related harms. In particular, we demonstrate how to systematically assess the construct validity and reliability of these measurements to detect and characterize specific types of harms, which arise from mismatches between constructs and their operationalizations. We then take a critical look at existing approaches to examining "bias" in NLP models, ranging from work on embedding spaces to machine translation and hate speech detection. We show that measurement modeling can help uncover the implicit constructs that such work aims to capture when measuring "bias." In so doing, we illustrate the limits of current "debiasing" techniques, which have obscured the specific harms whose measurements they implicitly aim to reduce. By introducing the language of measurement modeling, we provide the FAT* community with a framework for making explicit and testing assumptions about unobservable theoretical constructs embedded in computational systems, thereby clarifying and uniting our understandings of fairness-related harms.}, + file = {/home/nathante/Zotero/storage/4BJ22FPC/Jacobs et al_2020_The meaning and measurement of bias.pdf}, + isbn = {978-1-4503-6936-7}, + keywords = {bias,construct validity,fairness,measurement,word embeddings}, + series = {{{FAT}}* '20} +} + +@article{jacobson_impression_1999, + title = {Impression {{Formation}} in {{Cyberspace}}: {{Online Expectations}} and {{Offline Experiences}} in {{Text}}-Based {{Virtual Communities}}}, + shorttitle = {Impression {{Formation}} in {{Cyberspace}}}, + author = {Jacobson, David}, + year = {1999}, + month = sep, + volume = {5}, + publisher = {{Oxford Academic}}, + abstract = {Abstract. How do people in cyberspace picture one another? More specifically, how do individuals engaged in text-based computer-mediated communication (CMC), w}, + file = {/home/nathante/Zotero/storage/GKBA2QLG/4584195.html}, + journal = {Journal of Computer-Mediated Communication}, + language = {en}, + number = {1} +} + +@misc{jeff_larson_how_2016, + title = {How {{We Analyzed}} the {{COMPAS Recidivism Algorithm}}}, + author = {Jeff Larson, Julia Angwin}, + year = {2016}, + month = may, + abstract = {ProPublica is an independent, non-profit newsroom that produces investigative journalism in the public interest.}, + copyright = {Copyright \textcopyright 2019 ProPublica.}, + file = {/home/nathante/Zotero/storage/MT2D6XSA/how-we-analyzed-the-compas-recidivism-algorithm.html}, + howpublished = {https://www.propublica.org/article/how-we-analyzed-the-compas-recidivism-algorithm}, + journal = {ProPublica}, + language = {en}, + type = {Text/Html} +} + +@book{jemielniak_common_2014, + title = {Common Knowledge?: {{An}} Ethnography of {{Wikipedia}}}, + shorttitle = {Common {{Knowledge}}?}, + author = {Jemielniak, Dariusz}, + year = {2014}, + publisher = {{Stanford University Press}}, + address = {{Stanford, CA}}, + abstract = {With an emphasis on peer\textendash produced content and collaboration, Wikipedia exemplifies a departure from traditional management and organizational models. This iconic "project" has been variously characterized as a hive mind and an information revolution, attracting millions of new users even as it has been denigrated as anarchic and plagued by misinformation. Has Wikipedia's structure and inner workings promoted its astonishing growth and enduring public relevance?In Common Knowledge?, Dariusz Jemielniak draws on his academic expertise and years of active participation within the Wikipedia community to take readers inside the site, illuminating how it functions and deconstructing its distinctive organization. Against a backdrop of misconceptions about its governance, authenticity, and accessibility, Jemielniak delivers the first ethnography of Wikipedia, revealing that it is not entirely at the mercy of the public: instead, it balances open access and power with a unique bureaucracy that takes a page from traditional organizational forms. Along the way, Jemielniak incorporates fascinating cases that highlight the tug of war among the participants as they forge ahead in this pioneering environment.}, + isbn = {978-0-8047-8944-8}, + language = {English} +} + +@article{jhaver_did_2019, + title = {"{{Did You Suspect}} the {{Post Would}} Be {{Removed}}?": {{Understanding User Reactions}} to {{Content Removals}} on {{Reddit}}}, + shorttitle = {"{{Did You Suspect}} the {{Post Would}} Be {{Removed}}?}, + author = {Jhaver, Shagun and Appling, Darren Scott and Gilbert, Eric and Bruckman, Amy}, + year = {2019}, + month = nov, + volume = {3}, + pages = {192:1--192:33}, + abstract = {Thousands of users post on Reddit every day, but a fifth of all posts are removed. How do users react to these removals? We conducted a survey of 907 Reddit users, asking them to reflect on their post removal a few hours after it happened. Examining the qualitative and quantitative responses from this survey, we present users' perceptions of the platform's moderation processes. We find that although roughly a fifth (18\%) of the participants accepted that their post removal was appropriate, a majority of the participants did not --- over a third (37\%) of the participants did not understand why their post was removed, and further, 29\% of the participants expressed some level of frustration about the removal. We focus on factors that shape users' attitudes aboutfairness in moderation andposting again in the community. Our results indicate that users who read community guidelines or receive explanations for removal are more likely to perceive the removal as fair and post again in the future. We discuss implications for moderation practices and policies. Our findings suggest that the extra effort required to establish community guidelines and educate users with helpful feedback is worthwhile, leading to better user attitudes about fairness and propensity to post again.}, + file = {/home/nathante/Zotero/storage/6GB89RXV/Jhaver et al_2019_Did You Suspect the Post Would be Removed.pdf}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + keywords = {content moderation,content regulation,removal explanations}, + number = {CSCW} +} + +@article{jhaver_human-machine_2019, + title = {Human-Machine Collaboration for Content Regulation: The Case of Reddit Automoderator}, + shorttitle = {Human-Machine Collaboration for Content Regulation}, + author = {Jhaver, Shagun and Birman, Iris and Gilbert, Eric and Bruckman, Amy}, + year = {2019}, + month = jul, + volume = {26}, + pages = {31:1--31:35}, + issn = {1073-0516}, + abstract = {What one may say on the internet is increasingly controlled by a mix of automated programs, and decisions made by paid and volunteer human moderators. On the popular social media site Reddit, moderators heavily rely on a configurable, automated program called ``Automoderator'' (or ``Automod''). How do moderators use Automod? What advantages and challenges does the use of Automod present? We participated as Reddit moderators for over a year, and conducted interviews with 16 moderators to understand the use of Automod in the context of the sociotechnical system of Reddit. Our findings suggest a need for audit tools to help tune the performance of automated mechanisms, a repository for sharing tools, and improving the division of labor between human and machine decision making. We offer insights that are relevant to multiple stakeholders\textemdash creators of platforms, designers of automated regulation systems, scholars of platform governance, and content moderators.}, + file = {/home/nathante/Zotero/storage/H75377NM/Jhaver et al_2019_Human-machine collaboration for content regulation.pdf}, + journal = {ACM Trans. Comput.-Hum. Interact.}, + keywords = {automated moderation,Automod,Content moderation,future of work,mixed initiative,platform governance}, + number = {5} +} + +@article{jhaver_view_2018, + ids = {jhaver\_view\_2018-1}, + title = {The View from the Other Side: {{The}} Border between Controversial Speech and Harassment on {{Kotaku}} in {{Action}}}, + shorttitle = {The View from the Other Side}, + author = {Jhaver, Shagun and Chan, Larry and Bruckman, Amy}, + year = {2018}, + month = feb, + volume = {23}, + issn = {13960466}, + abstract = {In this paper, we use mixed methods to study a controversial Internet site: The Kotaku in Action (KiA) subreddit. Members of KiA are part of GamerGate, a distributed social movement. We present an emic account of what takes place on KiA: who are they, what are their goals and beliefs, and what rules do they follow. Members of GamerGate in general and KiA in particular have often been accused of harassment. However, KiA site policies explicitly prohibit such behavior, and members insist that they have been falsely accused. Underlying the controversy over whether KiA supports harassment is a complex disagreement about what ``harassment'' is, and where to draw the line between freedom of expression and censorship. We propose a model that characterizes perceptions of controversial speech, dividing it into four categories: criticism, insult, public shaming, and harassment. We also discuss design solutions that address the challenges of moderating harassment without impinging on free speech, and communicating across different ideologies.}, + copyright = {Copyright (c) 2018 First Monday}, + journal = {First Monday}, + keywords = {Freedom of expression,GamerGate,Online harassment,Political correctness,Reddit}, + language = {en}, + number = {2} +} + +@article{jiang_moderation_2019, + title = {Moderation Challenges in Voice-Based Online Communities on {{Discord}}}, + author = {Jiang, Jialun "Aaron" and Kiene, Charles and Middler, Skyler and Brubaker, Jed R. and Fiesler, Casey}, + year = {2019}, + volume = {3}, + pages = {23}, + file = {/home/nathante/Zotero/storage/MERN3PJA/Jiang et al_2019_Moderation challenges in voice-based online communities on Discord.pdf}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + number = {CSCW}, + series = {{{CSCW}} '19} +} + +@article{jouhki_facebooks_2016, + title = {Facebook's {{Emotional Contagion Experiment}} as a {{Challenge}} to {{Research Ethics}}}, + author = {Jouhki, Jukka and Lauk, Epp and Penttinen, Maija and Sormanen, Niina and Uskali, Turo}, + year = {2016}, + month = oct, + volume = {4}, + pages = {75--85}, + issn = {2183-2439}, + copyright = {Copyright (c) 2016 Jukka Jouhki, Epp Lauk, Maija Penttinen, Niina Sormanen, Turo Uskali}, + file = {/home/nathante/Zotero/storage/WCTGZ5VZ/Jouhki et al_2016_Facebook’s Emotional Contagion Experiment as a Challenge to Research Ethics.pdf;/home/nathante/Zotero/storage/HNA2WPM8/579.html}, + journal = {Media and Communication}, + keywords = {Big data,emotional contagion,Facebook,informed consent,manipulation,methodology,privacy,research ethics,social media,user data}, + language = {en}, + number = {4} +} + +@book{kahneman_thinking_2011, + title = {Thinking, Fast and Slow}, + author = {Kahneman, Daniel}, + year = {2011}, + publisher = {{Macmillan}}, + file = {/home/nathante/Zotero/storage/GXXS62Q9/Daniel Kahneman - Thinking, Fast and Slow -Farrar, Straus and Giroux (2011).epub} +} + +@inproceedings{kang_why_2013, + title = {Why {{Do People Seek Anonymity}} on the {{Internet}}?: {{Informing Policy}} and {{Design}}}, + shorttitle = {Why {{Do People Seek Anonymity}} on the {{Internet}}?}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Kang, Ruogu and Brown, Stephanie and Kiesler, Sara}, + year = {2013}, + pages = {2657--2666}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {In this research we set out to discover why and how people seek anonymity in their online interactions. Our goal is to inform policy and the design of future Internet architecture and applications. We interviewed 44 people from America, Asia, Europe, and Africa who had sought anonymity and asked them about their experiences. A key finding of our research is the very large variation in interviewees' past experiences and life situations leading them to seek anonymity, and how they tried to achieve it. Our results suggest implications for the design of online communities, challenges for policy, and ways to improve anonymity tools and educate users about the different routes and threats to anonymity on the Internet.}, + file = {/home/nathante/Zotero/storage/79LAHM37/Kang et al_2013_Why Do People Seek Anonymity on the Internet.pdf}, + isbn = {978-1-4503-1899-0}, + keywords = {anonymity,information disclosure,online community,Privacy}, + series = {{{CHI}} '13} +} + +@inproceedings{kay_unequal_2015, + title = {Unequal {{Representation}} and {{Gender Stereotypes}} in {{Image Search Results}} for {{Occupations}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Kay, Matthew and Matuszek, Cynthia and Munson, Sean A.}, + year = {2015}, + pages = {3819--3828}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Information environments have the power to affect people's perceptions and behaviors. In this paper, we present the results of studies in which we characterize the gender bias present in image search results for a variety of occupations. We experimentally evaluate the effects of bias in image search results on the images people choose to represent those careers and on people's perceptions of the prevalence of men and women in each occupation. We find evidence for both stereotype exaggeration and systematic underrepresentation of women in search results. We also find that people rate search results higher when they are consistent with stereotypes for a career, and shifting the representation of gender in image search results can shift people's perceptions about real-world distributions. We also discuss tensions between desires for high-quality results and broader societal goals for equality of representation in this space.}, + file = {/home/nathante/Zotero/storage/VSN2RSRT/Kay et al. - 2015 - Unequal Representation and Gender Stereotypes in I.pdf}, + isbn = {978-1-4503-3145-6}, + keywords = {bias,gender,image search,inequality,representation,stereotypes}, + series = {{{CHI}} '15} +} + +@article{kiene_managing_2018, + title = {Managing Organizational Culture in Online Group Mergers}, + author = {Kiene, Charles and Shaw, Aaron and Hill, Benjamin Mako}, + year = {2018}, + volume = {2}, + pages = {89:1-89-21}, + file = {/home/nathante/Zotero/storage/8U5K7B3J/Kiene et al_2018_Managing organizational culture in online group mergers.pdf}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + number = {CSCW} +} + +@inproceedings{kiene_surviving_2016, + title = {Surviving an ``{{Eternal September}}'': {{How}} an Online Community Managed a Surge of Newcomers}, + shorttitle = {Surviving an "{{Eternal September}}"}, + booktitle = {Proceedings of the 2016 {{ACM Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '16)}, + author = {Kiene, Charles and {Monroy-Hern{\'a}ndez}, Andr{\'e}s and Hill, Benjamin Mako}, + year = {2016}, + pages = {1152--1156}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {We present a qualitative analysis of interviews with participants in the NoSleep community within Reddit where millions of fans and writers of horror fiction congregate. We explore how the community handled a massive, sudden, and sustained increase in new members. Although existing theory and stories like Usenet's infamous "Eternal September" suggest that large influxes of newcomers can hurt online communities, our interviews suggest that NoSleep survived without major incident. We propose that three features of NoSleep allowed it to manage the rapid influx of newcomers gracefully: (1) an active and well-coordinated group of administrators, (2) a shared sense of community which facilitated community moderation, and (3) technological systems that mitigated norm violations. We also point to several important trade-offs and limitations.}, + file = {/home/nathante/Zotero/storage/CEQKMQNS/kiene_monroy_hill-surving_eternal_september-CHI2016.pdf;/home/nathante/Zotero/storage/GCMTJWGC/Kiene et al. - 2016 - Surviving an Eternal September How an Online Co.pdf;/home/nathante/Zotero/storage/Y2WVJW56/Kiene et al. - 2016 - Surviving an Eternal September How an Online Co.pdf}, + isbn = {978-1-4503-3362-7}, + keywords = {Civic engagement,contribution,e-petition,Motivation,newcomers,norms and governance,online activism,online communities,peer production,power user,qualitative methods} +} + +@article{kiene_technological_2019, + title = {Technological Frames and User Innovation: Exploring Technological Change in Community Moderation Teams}, + shorttitle = {Technological Frames and User Innovation}, + author = {Kiene, Charles and Jiang, Jialun "Aaron" and Hill, Benjamin Mako}, + year = {2019}, + month = nov, + volume = {3}, + pages = {44:1--44:23}, + abstract = {Management of technological change in organizations is one of the most enduring topics in the literature on computer-supported cooperative work. The successful navigation of technological change is both more challenging and more critical in online communities that are entirely mediated by technology than it is in traditional organizations. This paper presents an analysis of 14 in-depth interviews with moderators of subcommunities of one technological platform (Reddit) that added communities on a new technological platform (Discord). Moderation teams experienced several problems related to moderating content at scale as well as a disconnect between the affordances of Discord and their assumptions based on their experiences on Reddit. We found that moderation teams used Discord's API to create scripts and bots that augmented Discord to make the platform work more like tools on Reddit. These tools were particularly important in communities struggling with scale. Our findings suggest that increasingly widespread end user programming allow users of social computing systems to innovate and deploy solutions to unanticipated design problems by transforming new technological platforms to align with their past expectations.}, + file = {/home/nathante/Zotero/storage/5G6DRNAE/Kiene et al_2019_Technological frames and user innovation.pdf}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + keywords = {API,bots,chat,computer-mediated communication,discord,moderation,online communities,reddit,social computing,technological change}, + number = {CSCW} +} + +@inproceedings{kiene_who_2020, + title = {Who {{Uses Bots}}? {{A Statistical Analysis}} of {{Bot Usage}} in {{Moderation Teams}}}, + shorttitle = {Who {{Uses Bots}}?}, + booktitle = {Extended {{Abstracts}} of the 2020 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Kiene, Charles and Hill, Benjamin Mako}, + year = {2020}, + month = apr, + pages = {1--8}, + publisher = {{Association for Computing Machinery}}, + address = {{Honolulu, HI, USA}}, + abstract = {Adopting new technology is challenging for volunteer moderation teams of online communities. Challenges are aggravated when communities increase in size. In a prior qualitative study, Kiene et al. found evidence that moderator teams adapted to challenges by relying on their experience in other technological platforms to guide the creation and adoption of innovative custom moderation "bots." In this study, we test three hypotheses on the social correlates of user innovated bot usage drawn from a previous qualitative study. We find strong evidence of the proposed relationship between community size and the use of user innovated bots. Although previous work suggests that smaller teams of moderators will be more likely to use these bots and that users with experience moderating in the previous platform will be more likely to do so, we find little evidence in support of either proposition.}, + file = {/home/nathante/Zotero/storage/FXQN944W/Kiene_Hill_2020_Who Uses Bots.pdf}, + isbn = {978-1-4503-6819-3}, + keywords = {bots,chat,computer-mediated communication,discord,moderation,online communities,population size,reddit,social computing}, + series = {{{CHI EA}} '20} +} + +@article{klein_gender_2015, + title = {Gender {{Gap Through Time}} and {{Space}}: {{A Journey Through Wikipedia Biographies}} and the "{{WIGI}}" {{Index}}}, + shorttitle = {Gender {{Gap Through Time}} and {{Space}}}, + author = {Klein, Maximilian and Konieczny, Piotr}, + year = {2015}, + month = feb, + abstract = {In this study we investigate how quantification of Wikipedia biographies can shed light on worldwide longitudinal gender inequality trends. We present an academic index allowing comparative study of gender inequality through space and time, the Wikipedia Gender Index (WIGI), based on metadata available through the Wikidata database. Our research confirms that gender inequality is a phenomenon with a long history, but whose patterns can be analyzed and quantified on a larger scale than previously thought possible. Through the use of Inglehart- Welzel cultural clusters, we show that gender inequality can be analyzed with regards to world's cultures. In the dimension studied (coverage of females and other genders in reference works) we show a steadily improving trend, through one with aspects that deserve careful follow up analysis (such as the surprisingly high ranking of the Confucian and South Asian clusters). Keywords: data mining, Wikidata, Wikipedia, gender gap, demographics}, + archivePrefix = {arXiv}, + eprint = {1502.03086}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/TR3JP5N4/Klein and Konieczny - 2015 - Gender Gap Through Time and Space A Journey Throu.pdf;/home/nathante/Zotero/storage/4YT543AI/1502.html}, + journal = {arXiv:1502.03086 [cs]}, + keywords = {Computer Science - Computers and Society}, + primaryClass = {cs} +} + +@inproceedings{klein_wikipedia_2015, + title = {Wikipedia in the {{World}} of {{Global Gender Inequality Indices}}: {{What}} the {{Biography Gender Gap}} Is {{Measuring}}}, + shorttitle = {Wikipedia in the {{World}} of {{Global Gender Inequality Indices}}}, + booktitle = {Proceedings of the 11th {{International Symposium}} on {{Open Collaboration}}}, + author = {Klein, Max and Konieczny, Piotr}, + year = {2015}, + pages = {16:1--16:2}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {While Wikipedia's editor gender gap is important but difficult to measure, its biographical gender gap can more readily be measured. We correlate a Wikipedia-derived gender inequality indicator (WIGI), with four widespread gender inequality indices in use today (GDI, GEI, GGGI, and SIGI). Analysing their methodologies and correlations to Wikipedia, we find evidence that Wikipedia's bias in biographical coverage is related to the gender bias in positions of social power.}, + file = {/home/nathante/Zotero/storage/K7NI42SK/Klein and Konieczny - 2015 - Wikipedia in the World of Global Gender Inequality.pdf}, + isbn = {978-1-4503-3666-6}, + keywords = {data mining,demographics,gender gap,Wikidata,Wikipedia}, + series = {{{OpenSym}} '15} +} + +@article{kleinberg_algorithmic_2018, + title = {Algorithmic {{Fairness}}}, + author = {Kleinberg, Jon and Ludwig, Jens and Mullainathan, Sendhil and Rambachan, Ashesh}, + year = {2018}, + volume = {108}, + pages = {22--27}, + issn = {2574-0768}, + abstract = {Concerns that algorithms may discriminate against certain groups have led to numerous efforts to 'blind' the algorithm to race. We argue that this intuitive perspective is misleading and may do harm. Our primary result is exceedingly simple, yet often overlooked. A preference for fairness should not change the choice of estimator. Equity preferences can change how the estimated prediction function is used (e.g., different threshold for different groups) but the function itself should not change. We show in an empirical example for college admissions that the inclusion of variables such as race can increase both equity and efficiency.}, + file = {/home/nathante/Zotero/storage/N3KX2H2B/Kleinberg et al. - 2018 - Algorithmic Fairness.pdf;/home/nathante/Zotero/storage/JZFSEZ7S/articles.html}, + journal = {AEA Papers and Proceedings}, + keywords = {Cluster Analysis,Factor Models; Equity; Justice; Inequality; and Other Normative Criteria and Measurement; Higher Education,Multiple or Simultaneous Equation Models: Classification Methods,Non-labor Discrimination,Principal Components,Research Institutions; Economics of Minorities; Races; Indigenous Peoples; and Immigrants}, + language = {en} +} + +@article{kleinberg_discrimination_2018, + title = {Discrimination in the {{Age}} of {{Algorithms}}}, + author = {Kleinberg, Jon and Ludwig, Jens and Mullainathan, Sendhil and Sunstein, Cass R.}, + year = {2018}, + month = dec, + volume = {10}, + pages = {113--174}, + issn = {2161-7201}, + abstract = {Abstract. The law forbids discrimination. But the ambiguity of human decision-making often makes it hard for the legal system to know whether anyone has discri}, + file = {/home/nathante/Zotero/storage/ZA5Y9UGX/Kleinberg et al. - 2019 - Discrimination in the Age of Algorithms.pdf;/home/nathante/Zotero/storage/VQMDXWE3/1902.html}, + journal = {Journal of Legal Analysis}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computers and Society,Computer Science - Machine Learning}, + language = {en} +} + +@article{kleinberg_human_2018, + title = {Human {{Decisions}} and {{Machine Predictions}}}, + author = {Kleinberg, Jon and Lakkaraju, Himabindu and Leskovec, Jure and Ludwig, Jens and Mullainathan, Sendhil}, + year = {2018}, + month = feb, + volume = {133}, + pages = {237--293}, + issn = {0033-5533}, + abstract = {Abstract. Can machine learning improve human decision making? Bail decisions provide a good test case. Millions of times each year, judges make jail-or-release}, + file = {/home/nathante/Zotero/storage/E5AH7FJC/Kleinberg et al. - 2018 - Human Decisions and Machine Predictions.pdf;/home/nathante/Zotero/storage/9ZIXNMTA/4095198.html}, + journal = {The Quarterly Journal of Economics}, + language = {en}, + number = {1} +} + +@article{kleinberg_inherent_2016, + title = {Inherent {{Trade}}-{{Offs}} in the {{Fair Determination}} of {{Risk Scores}}}, + author = {Kleinberg, Jon and Mullainathan, Sendhil and Raghavan, Manish}, + year = {2016}, + month = sep, + abstract = {Recent discussion in the public sphere about algorithmic classification has involved tension between competing notions of what it means for a probabilistic classification to be fair to different groups. We formalize three fairness conditions that lie at the heart of these debates, and we prove that except in highly constrained special cases, there is no method that can satisfy these three conditions simultaneously. Moreover, even satisfying all three conditions approximately requires that the data lie in an approximate version of one of the constrained special cases identified by our theorem. These results suggest some of the ways in which key notions of fairness are incompatible with each other, and hence provide a framework for thinking about the trade-offs between them.}, + archivePrefix = {arXiv}, + eprint = {1609.05807}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/6DD962LQ/Kleinberg et al. - 2016 - Inherent Trade-Offs in the Fair Determination of R.pdf;/home/nathante/Zotero/storage/6WDGDWL8/1609.html}, + journal = {arXiv:1609.05807 [cs, stat]}, + keywords = {Computer Science - Computers and Society,Computer Science - Machine Learning,Statistics - Machine Learning}, + primaryClass = {cs, stat} +} + +@article{kleinberg_prediction_2015, + title = {Prediction {{Policy Problems}}}, + author = {Kleinberg, Jon and Ludwig, Jens and Mullainathan, Sendhil and Obermeyer, Ziad}, + year = {2015}, + month = may, + volume = {105}, + pages = {491--495}, + issn = {0002-8282}, + abstract = {Most empirical policy work focuses on causal inference. We argue an important class of policy problems does not require causal inference but instead requires predictive inference. Solving these "prediction policy problems" requires more than simple regression techniques, since these are tuned to generating unbiased estimates of coefficients rather than minimizing prediction error. We argue that new developments in the field of "machine learning" are particularly useful for addressing these prediction problems. We use an example from health policy to illustrate the large potential social welfare gains from improved prediction.}, + file = {/home/nathante/Zotero/storage/ELRIPF5U/Kleinberg et al. - 2015 - Prediction Policy Problems.pdf;/home/nathante/Zotero/storage/BDRLBFZH/articles.html}, + journal = {American Economic Review}, + keywords = {Belief,Communication,Econometric Modeling: General; Forecasting Models,Information and Knowledge,Learning,Simulation Methods; Search,Unawareness}, + language = {en}, + number = {5} +} + +@article{kline_users_1996, + title = {Users as {{Agents}} of {{Technological Change}}: {{The Social Construction}} of the {{Automobile}} in the {{Rural United States}}}, + shorttitle = {Users as {{Agents}} of {{Technological Change}}}, + author = {Kline, Ronald and Pinch, Trevor}, + year = {1996}, + volume = {37}, + pages = {763--795}, + issn = {0040-165X}, + annotation = {00380}, + file = {/home/nathante/Zotero/storage/BAWTQLVB/Kline and Pinch - 1996 - Users as Agents of Technological Change The Socia.pdf}, + journal = {Technology and Culture}, + number = {4} +} + +@article{kling_computerization_1991, + title = {Computerization and {{Social Transformations}}}, + author = {Kling, Rob}, + year = {1991}, + month = jul, + volume = {16}, + pages = {342--367}, + issn = {0162-2439}, + abstract = {This article examines the relationship between the use of computer-based systems and transformations in parts of the social order. Answers to this question rest heavily on the way computer-based systems are consumed -not just produced or dissemtnated. The discourse about computerezation advanced in many professional magazines and the mass media is saturated with talk about "revolution, " and yet substantial social changes are often difficult to cdentcfy in carefully designed empirical studies. The article examines qualitative case studies of computerization in welfare agencies, urban planning, accounting, marketing, and manufacturing to examine the ways that computerization alters social life in varced ways: sometemes restructuring relationships and in other cases reinforcing existing social relationships. The article also examines some of the theoret ical issues in studies of computerization, such as drawing boundaries. It concludes with some observations about the sociology of computer sctence as an academic discipline.}, + file = {/home/nathante/Zotero/storage/VAWBIRRN/Kling - 1991 - Computerization and Social Transformations.pdf}, + journal = {Science, Technology, \& Human Values}, + language = {en}, + number = {3} +} + +@article{klonick_new_2017, + title = {The {{New Governors}}: {{The People}}, {{Rules}}, and {{Processes Governing Online Speech}}}, + shorttitle = {The {{New Governors}}}, + author = {Klonick, Kate}, + year = {2017}, + volume = {131}, + pages = {1598--1670}, + file = {/home/nathante/Zotero/storage/YETNBLH7/Klonick_2017_The New Governors.pdf}, + journal = {Harvard Law Review}, + language = {eng}, + number = {6} +} + +@book{kraut_building_2012, + title = {Building Successful Online Communities: Evidence-Based Social Design}, + author = {Kraut, Robert E. and Resnick, Paul and Kiesler, Sara}, + year = {2012}, + publisher = {{MIT Press}}, + address = {{Cambridge, MA}}, + abstract = {Uses insights from social science, psychology, and economics to offer advice on planning and managing an online community.}, + file = {/home/nathante/Zotero/storage/29MTSNUW/02-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/AB2273BF/05-kiesler10-Regulation-current.pdf;/home/nathante/Zotero/storage/D2S585PS/06-Resnick10-Startup-current.pdf;/home/nathante/Zotero/storage/EQCWGHKT/02-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/GEKSWUN5/03-Ren10-Commitment-current.pdf;/home/nathante/Zotero/storage/JR8BZBNN/01-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/QPEIB7TT/04-kraut10-Newcomers-current.pdf;/home/nathante/Zotero/storage/UN6ZPDYF/03-Ren10-Commitment-current.pdf;/home/nathante/Zotero/storage/VVMSLY22/05-kiesler10-Regulation-current.pdf;/home/nathante/Zotero/storage/ZHQ89ZM4/01-Resnick10-Intro-current.pdf}, + isbn = {978-0-262-29831-5}, + keywords = {design,foundations of social computing}, + language = {English} +} + +@incollection{kraut_regulating_2012, + title = {Regulating Behavior in Online Communities}, + booktitle = {Building {{Successful Online Communities}}: {{Evidence}}-{{Based Social Design}}}, + author = {Kiesler, Sara E. and Kraut, Robert E. and Resnick, Paul and Kittur, Aniket}, + editor = {Kraut, Robert E. and Resnick, Paul}, + year = {2012}, + month = mar, + publisher = {{The MIT Press}}, + file = {/home/nathante/Zotero/storage/RRZSYZSB/Kiesler et al_2012_Regulating behavior in online communities.pdf}, + isbn = {978-0-262-29831-5} +} + +@article{kreiss_limits_2011, + title = {The Limits of Peer Production: {{Some}} Reminders from {{Max Weber}} for the Network Society}, + shorttitle = {The Limits of Peer Production}, + author = {Kreiss, Daniel and Finn, Megan and Turner, Fred}, + year = {2011}, + month = mar, + volume = {13}, + pages = {243--259}, + issn = {1461-4448, 1461-7315}, + abstract = {In the last few years, a powerful consensus has emerged among scholars of digitally enabled peer production. In this view, digital technologies and social production processes are driving a dramatic democratization of culture and society. Moreover, leading scholars now suggest that these new, hyper-mediated modes of living and working are specifically challenging the hierarchical structures and concentrated power of bureaucracies. This paper first maps the assumptions underlying the new consensus on peer production so as to reveal the sources of its coherence. It then revisits Max Weber's account of bureaucracy. With Weber in mind, the paper aims to expose analytical weaknesses in the consensus view and offer a new perspective from which to study contemporary digital media.}, + file = {/home/nathante/Zotero/storage/MBDKFR2T/Kreiss et al. - 2011 - The limits of peer production Some reminders from.pdf}, + journal = {New Media \& Society}, + keywords = {bureaucracy,convergence,digital media,participatory culture,peer production,social networks}, + language = {en}, + number = {2} +} + +@inproceedings{kriplean_community_2007, + title = {Community, {{Consensus}}, {{Coercion}}, {{Control}}: {{Cs}}*{{W}} or {{How Policy Mediates Mass Participation}}}, + shorttitle = {Community, {{Consensus}}, {{Coercion}}, {{Control}}}, + booktitle = {Proceedings of the 2007 {{International ACM Conference}} on {{Supporting Group Work}}}, + author = {Kriplean, Travis and Beschastnikh, Ivan and McDonald, David W. and Golder, Scott A.}, + year = {2007}, + pages = {167--176}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {When large groups cooperate, issues of conflict and control surface because of differences in perspective. Managing such diverse views is a persistent problem in cooperative group work. The Wikipedian community has responded with an evolving body of policies that provide shared principles, processes, and strategies for collaboration. We employ a grounded approach to study a sample of active talk pages and examine how policies are employed as contributors work towards consensus. Although policies help build a stronger community, we find that ambiguities in policies give rise to power plays. This lens demonstrates that support for mass collaboration must take into account policy and power.}, + file = {/home/nathante/Zotero/storage/WCXFLI38/Kriplean et al_2007_Community, Consensus, Coercion, Control.pdf}, + isbn = {978-1-59593-845-9}, + keywords = {collaborative authoring,community,policy,power,wikipedia}, + series = {{{GROUP}} '07} +} + +@article{kroll_accountable_2016, + title = {Accountable {{Algorithms}}}, + author = {Kroll, Joshua A. and Barocas, Solon and Felten, Edward W. and Reidenberg, Joel R. and Robinson, David G. and Yu, Harlan}, + year = {2016}, + volume = {165}, + pages = {633--706}, + file = {/home/nathante/Zotero/storage/DEEXGJAV/Kroll et al. - 2016 - Accountable Algorithms.pdf}, + journal = {University of Pennsylvania Law Review}, + language = {eng} +} + +@article{kubler_state_2017, + title = {State of Urgency: {{Surveillance}}, Power, and Algorithms in {{France}}'s State of Emergency}, + shorttitle = {State of Urgency}, + author = {Kubler, Kyle}, + year = {2017}, + month = dec, + volume = {4}, + pages = {2053951717736338}, + issn = {2053-9517}, + abstract = {The recent terrorist attacks and ongoing state of emergency in France have brought questions of police surveillance into the public spotlight, making it increasingly important to understand how police attain data from citizens. Since 2005, the French police have been using IBM's computer program, i2 Analyst's Notebook, to aggregate information and craft criminal narratives. This technology serves to quickly connect suspects with crimes, looking for as many associations as possible, ranking and visualizing them based on level of importance. Recently, surveillance and state power have been theorized as having shifted to a posthegemonic, order. Drawing from literature on power, surveillance, and identity, this paper considers the various ways that algorithms can impact policing under a state of emergency by comparing the technical protocol of i2 Analyst's Notebook with the administrative protocol of the French state. Using i2 Analyst's Notebook as an example, this paper argues that posthegemonic theories of power have their place in determining how algorithms can be used for surveillance, but that they cannot completely explain their use under the state of emergency.}, + file = {/home/nathante/Zotero/storage/YQHX4437/Kubler_2017_State of urgency.pdf}, + journal = {Big Data \& Society}, + keywords = {algorithm,France,i2 Analyst’s Notebook,police,state of emergency,Surveillance}, + language = {en}, + number = {2} +} + +@inproceedings{kusner_counterfactual_2017, + title = {Counterfactual {{Fairness}}}, + booktitle = {Advances in {{Neural Information Processing Systems}} 30}, + author = {Kusner, Matt J and Loftus, Joshua and Russell, Chris and Silva, Ricardo}, + editor = {Guyon, I. and Luxburg, U. V. and Bengio, S. and Wallach, H. and Fergus, R. and Vishwanathan, S. and Garnett, R.}, + year = {2017}, + pages = {4066--4076}, + publisher = {{Curran Associates, Inc.}}, + file = {/home/nathante/Zotero/storage/HV97AW3B/Kusner et al. - 2017 - Counterfactual Fairness.pdf;/home/nathante/Zotero/storage/ZDR9SZDA/6995-counterfactual-fairness.html} +} + +@inproceedings{lam_wp:clubhouse?:_2011, + title = {{{WP}}:{{Clubhouse}}?: {{An Exploration}} of {{Wikipedia}}'s {{Gender Imbalance}}}, + shorttitle = {{{WP}}}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Lam, Shyong (Tony) K. and Uduwage, Anuradha and Dong, Zhenhua and Sen, Shilad and Musicant, David R. and Terveen, Loren and Riedl, John}, + year = {2011}, + pages = {1--10}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Wikipedia has rapidly become an invaluable destination for millions of information-seeking users. However, media reports suggest an important challenge: only a small fraction of Wikipedia's legion of volunteer editors are female. In the current work, we present a scientific exploration of the gender imbalance in the English Wikipedia's population of editors. We look at the nature of the imbalance itself, its effects on the quality of the encyclopedia, and several conflict-related factors that may be contributing to the gender gap. Our findings confirm the presence of a large gender gap among editors and a corresponding gender-oriented disparity in the content of Wikipedia's articles. Further, we find evidence hinting at a culture that may be resistant to female participation.}, + file = {/home/nathante/Zotero/storage/LANLMRIZ/p1-lam.pdf;/home/nathante/Zotero/storage/RYP6QUQI/Lam et al. - 2011 - WPClubhouse An Exploration of Wikipedia's Gende.pdf}, + isbn = {978-1-4503-0909-7}, + keywords = {content coverage,gender gap,wikipedia}, + series = {{{WikiSym}} '11} +} + +@article{lampe_crowdsourcing_2014, + title = {Crowdsourcing Civility: A Natural Experiment Examining the Effects of Distributed Moderation in Online Forums}, + shorttitle = {Crowdsourcing Civility}, + author = {Lampe, Cliff and Zube, Paul and Lee, Jusil and Park, Chul Hyun and Johnston, Erik}, + year = {2014}, + month = apr, + volume = {31}, + pages = {317--326}, + issn = {0740-624X}, + abstract = {Participation in discussions about the public interest can be enhanced by technology, but can also create an environment in which participants are overwhelmed by the quantity, quality, and diversity of information and arguments. Political participation is at a greater disadvantage than non-political activities in that participants from different parties already start out with established differences, which requires them to reach some form of common ground before progress can be made. Those seeking authentic deliberation are discouraged to participate when confronted with uncivil and inflammatory rhetoric. These issues are often exacerbated in online discussions, where lack of identity cues and low barriers to entry can lead to heightened incivility between participants, often labeled as ``flaming'' and ``trolling''. This paper explores the extent to which moderator systems, tools online discussion forums use to manage contributions, can reduce information overload and encourage civil conversations in virtual discussion spaces. Using the popular website Slashdot as an example of sound moderation in a public discourse setting, we found that users move toward consensus about which and how comments deserve to be moderated. Using these findings, we explore how transferable these systems are for participation in public matters specifically to the unique attributes of political discussion. Slashdot's political forum provides a comparison group that allowed us to find quantitative and qualitative differences in political posting, comments, and moderation. Our results show that large scale, civil participation is possible with a distributed moderation system that enables regularly lively debates to be conducted positively because the system provides tools for people to enforce norms of civility.}, + file = {/home/nathante/Zotero/storage/GIJ5JZ8L/Lampe et al_2014_Crowdsourcing civility.pdf;/home/nathante/Zotero/storage/TA2TQ252/S0740624X14000021.html}, + journal = {Government Information Quarterly}, + keywords = {Deliberation,Human–computer interaction,Moderation systems,Politics,Public administration,Social media}, + number = {2} +} + +@inproceedings{lampe_familiar_2007, + title = {A Familiar Face(Book): Profile Elements as Signals in an Online Social Network}, + shorttitle = {A Familiar Face(Book)}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}} - {{CHI}} '07}, + author = {Lampe, Cliff A.C. and Ellison, Nicole and Steinfield, Charles}, + year = {2007}, + pages = {435--444}, + publisher = {{ACM Press}}, + address = {{San Jose, California, USA}}, + abstract = {Using data from a popular online social network site, this paper explores the relationship between profile structure (namely, which fields are completed) and number of friends, giving designers insight into the importance of the profile and how it works to encourage connections and articulated relationships between users. We describe a theoretical framework that draws on aspects of signaling theory, common ground theory, and transaction costs theory to generate an understanding of why certain profile fields may be more predictive of friendship articulation on the site. Using a dataset consisting of 30,773 Facebook profiles, we determine which profile elements are most likely to predict friendship links and discuss the theoretical and design implications of our findings.}, + file = {/home/nathante/Zotero/storage/YA5WG4HD/Lampe et al. - 2007 - A familiar face(book) profile elements as signals.pdf}, + isbn = {978-1-59593-593-9}, + language = {en} +} + +@incollection{lampe_role_2012, + title = {The {{Role}} of {{Reputation Systems}} in {{Managing Online Communities}}}, + booktitle = {The {{Reputation Society}}}, + author = {Lampe, Cliff}, + editor = {Masum, Hassan and Tovey, Mark}, + year = {2012}, + publisher = {{The MIT Press}}, + file = {/home/nathante/Zotero/storage/KXBI9AU7/Masum and Tovey - 2012 - The Role of Reputation Systems in Managing Online .pdf}, + isbn = {978-0-262-29847-6}, + language = {en} +} + +@inproceedings{lampe_slashdot_2004, + title = {Slash(Dot) and {{Burn}}: {{Distributed Moderation}} in a {{Large Online Conversation Space}}}, + shorttitle = {Slash(Dot) and {{Burn}}}, + booktitle = {Conference on {{Human Factors}} in {{Computing Systems}} ({{CHI}})}, + author = {Lampe, Cliff and Resnick, Paul}, + year = {2004}, + pages = {543--550}, + publisher = {{ACM}}, + address = {{Vienna, Austria}} +} + +@incollection{lane_big_2015, + title = {Big {{Data}}'s {{End Run}} around {{Anonymity}} and {{Consent}}}, + booktitle = {Privacy, Big Data, and the Public Good: Frameworks for Engagement}, + author = {Barocas, Solon and Nissenbaum, Helen}, + editor = {Lane, Julia I}, + year = {2015}, + publisher = {{Cambridge University Press}}, + address = {{New York, NY}}, + annotation = {OCLC: 882939943}, + file = {/home/nathante/Zotero/storage/B6X69WPN/LaneEtAlPrivacyBigDataAndThePublicGood.pdf}, + isbn = {978-1-107-06735-6 978-1-107-63768-9}, + language = {English} +} + +@book{lane_privacy_2015, + title = {Privacy, Big Data, and the Public Good: Frameworks for Engagement}, + shorttitle = {Privacy, Big Data, and the Public Good}, + author = {Lane, Julia I}, + year = {2015}, + publisher = {{Cambridge University Press}}, + address = {{New York, NY}}, + annotation = {OCLC: 882939943}, + isbn = {978-1-107-06735-6 978-1-107-63768-9}, + language = {English} +} + +@article{lee_regression_2010, + title = {Regression Discontinuity Designs in Economics}, + author = {Lee, David S. and Lemieux, Thomas}, + year = {2010}, + volume = {48}, + pages = {281--355}, + abstract = {This paper provides an introduction and "user guide" to Regression Discontinuity (RD) designs for empirical researchers. It presents the basic theory behind the research design, details when RD is likely to be valid or invalid given economic incentives, explains why it is considered a "quasi-experimental" design, and summarizes different ways (with their advantages and disadvantages) of estimating RD designs and the limitations of interpreting these estimates. Concepts are discussed using examples drawn from the growing body of empirical research using RD.}, + file = {/home/nathante/Zotero/storage/2IVZMBH7/Lee and Lemieux - 2010 - Regression discontinuity designs in economics.pdf}, + journal = {Journal of Economic Literature}, + number = {2} +} + +@article{leonardi_crossing_2009, + title = {Crossing the {{Implementation Line}}: {{The Mutual Constitution}} of {{Technology}} and {{Organizing Across Development}} and {{Use Activities}}}, + shorttitle = {Crossing the {{Implementation Line}}}, + author = {Leonardi, Paul M.}, + year = {2009}, + month = jul, + volume = {19}, + pages = {278--310}, + issn = {1050-3293}, + file = {/home/nathante/Zotero/storage/ZVMVIDFS/Leonardi Paul M. - 2009 - Crossing the Implementation Line The Mutual Const.pdf;/home/nathante/Zotero/storage/4UPI9HU4/j.1468-2885.2009.01344.html}, + journal = {Communication Theory}, + number = {3} +} + +@article{leonardi_when_2011, + title = {When Flexible Routines Meet Flexible Technologies: {{Affordance}}, Constraint, and the Imbrication of Human and Material Agencies}, + shorttitle = {When {{Flexible Routines Meet Flexible Technologies}}}, + author = {Leonardi, Paul M.}, + year = {2011}, + month = mar, + volume = {35}, + pages = {147--168}, + issn = {0276-7783}, + abstract = {Employees in many contemporary organizations work with flexible routines and flexible technologies. When those employees find that they are unable to achieve their goals in the current environment, how do they decide whether they should change the composition of their routines or the materiality of the technologies with which they work? The perspective advanced in this paper suggests that the answer to this question depends on how human and material agencies-the basic building blocks common to both routines and technologies-are imbricated. Imbrication of human and material agencies creates infrastructure in the form of routines and technologies that people use to carry out their work. Routine or technological infrastructure used at any given moment is the result of previous imbrications of human and material agencies. People draw on this infrastructure to construct a perception that a technology either constrains their ability to achieve their goals, or that the technology affords the possibility of achieving new goals. The case of a computer simulation technology for automotive design used to illustrate this framework suggests that perceptions of constraint lead people to change their technologies while perceptions of affordance lead people to change their routines. This imbrication metaphor is used to suggest how a human agency approach to technology can usefully incorporate notions of material agency into its explanations of organizational change.}, + file = {/home/nathante/Zotero/storage/4MTGR6UB/Leonardi - 2011 - When flexible routines meet flexible technologies.pdf}, + journal = {MIS Q.}, + keywords = {affordances,agency,imbrication,materiality,organizational change,perception,routines,technological change}, + number = {1} +} + +@inproceedings{leung_can_2020, + title = {Can {{User Interface Design Influence Hiring Bias}} in the {{Online Freelance Marketplace}}?}, + booktitle = {{{CHI}} 2020}, + author = {Leung, Weiwen and Zhang, Zheng and Jibuti, Daviti and Zhao, Jinhao and Klein, Maximilian A and Pierce, Casey and Robert, Lionel and Zhu, Haiyi}, + year = {2020}, + pages = {10}, + abstract = {We conduct a study of hiring bias on a simulation platform where we ask Amazon MTurk participants to make hiring decisions for a mathematically intensive task. Our findings suggest hiring biases against Black workers and less attractive workers, and preferences towards Asian workers, female workers and more attractive workers. We also show that certain UI designs, including provision of candidates' information at the individual level and reducing the number of choices, can significantly reduce discrimination. However, provision of candidate's information at the subgroup level can increase discrimination. The results have practical implications for designing better online freelance marketplaces.}, + file = {/home/nathante/Zotero/storage/XRN9G5J7/Leung et al. - Can User Interface Design Influence Hiring Bias in.pdf}, + language = {en} +} + +@article{levy_designing_2017, + title = {Designing against {{Discrimination}} in {{Online Markets}}}, + author = {Levy, Karen and Barocas, Solon}, + year = {2017}, + volume = {32}, + pages = {1183}, + file = {/home/nathante/Zotero/storage/ZGKJ3AYW/Levy_Barocas_2017_Designing against Discrimination in Online Markets.pdf;/home/nathante/Zotero/storage/NQU7Z5MG/LandingPage.html}, + journal = {Berkeley Technology Law Journal} +} + +@inproceedings{lin_better_2017, + title = {Better When It Was Smaller? {{Community}} Content and Behavior after Massive Growth.}, + shorttitle = {Better {{When It Was Smaller}}?}, + booktitle = {{{ICWSM}}}, + author = {Lin, Zhiyuan and Salehi, Niloufar and Yao, Bowen and Chen, Yiqi and Bernstein, Michael S.}, + year = {2017}, + pages = {132--141}, + file = {/home/nathante/Zotero/storage/PMNRDFNT/Lin et al_2017_Better when it was smaller.pdf} +} + +@article{lindebaum_insights_2019, + title = {Insights {{From}} ``{{The Machine Stops}}'' to {{Better Understand Rational Assumptions}} in {{Algorithmic Decision Making}} and {{Its Implications}} for {{Organizations}}}, + author = {Lindebaum, Dirk and Vesa, Mikko and {den Hond}, Frank}, + year = {2019}, + month = may, + volume = {45}, + pages = {247--263}, + publisher = {{Academy of Management}}, + issn = {0363-7425}, + file = {/home/nathante/Zotero/storage/T3EYFTL7/Lindebaum et al_2019_Insights From “The Machine Stops” to Better Understand Rational Assumptions in.pdf}, + journal = {Academy of Management Review}, + keywords = {rationality,weber}, + number = {1} +} + +@incollection{lipton_does_2018, + title = {Does Mitigating {{ML}}\textbackslash textquotesingle s Impact Disparity Require Treatment Disparity?}, + booktitle = {Advances in {{Neural Information Processing Systems}} 31}, + author = {Lipton, Zachary and McAuley, Julian and Chouldechova, Alexandra}, + editor = {Bengio, S. and Wallach, H. and Larochelle, H. and Grauman, K. and {Cesa-Bianchi}, N. and Garnett, R.}, + year = {2018}, + pages = {8125--8135}, + publisher = {{Curran Associates, Inc.}}, + file = {/home/nathante/Zotero/storage/J3XV5ZLH/Lipton et al. - 2018 - Does mitigating MLtextquotesingle s impact dispar.pdf;/home/nathante/Zotero/storage/QKQ8JFVW/8035-does-mitigating-mls-impact-disparity-require-treatment-disparity.html} +} + +@article{litschig_impact_2013, + title = {The {{Impact}} of {{Intergovernmental Transfers}} on {{Education Outcomes}} and {{Poverty Reduction}}}, + author = {Litschig, Stephan and Morrison, Kevin M.}, + year = {2013}, + month = oct, + volume = {5}, + pages = {206--240}, + issn = {1945-7782}, + abstract = {This paper provides regression discontinuity evidence on development impacts of intergovernmental transfers. Extra transfers in Brazil increased local government spending per capita by about 20 percent over a 4 year period with no evidence of crowding out own revenue or other revenue sources. Schooling per capita increased by about 7 percent and literacy rates by about 4 percentage points. In line with the effect on human capital, the poverty rate was reduced by about 4 percentage points. Somewhat noisier results also suggest that the reelection probability of local incumbent parties in the 1988 elections improved by about 10 percentage points.}, + file = {/home/nathante/Zotero/storage/4PY2EGDT/Litschig_Morrison_2013_The Impact of Intergovernmental Transfers on Education Outcomes and Poverty.pdf;/home/nathante/Zotero/storage/AJB6PC68/articles.html}, + journal = {American Economic Journal: Applied Economics}, + keywords = {Education,Human Development,Income Distribution,Migration,Provision and Effects of Welfare Programs; Economic Development: Human Resources,Public Pensions; Analysis of Education; Education: Government Policy; Measurement and Analysis of Poverty; Welfare; Well-Being; and Poverty: Government Programs,State and Local Budget and Expenditures; State and Local Government: Health,Welfare}, + language = {en}, + number = {4} +} + +@inproceedings{liu_forecasting_2018, + title = {Forecasting the {{Presence}} and {{Intensity}} of {{Hostility}} on {{Instagram Using Linguistic}} and {{Social Features}}}, + booktitle = {Twelfth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Liu, Ping and Guberman, Joshua and Hemphill, Libby and Culotta, Aron}, + year = {2018}, + month = jun, + abstract = {Online antisocial behavior, such as cyberbullying, harassment, and trolling, is a widespread problem that threatens free discussion and has negative physical and mental health consequences for victims and communities. While prior work has proposed automated methods to identify hostile comments in online discussions, these methods work retrospectively on comments that have already been posted, making it difficult to intervene before an interaction escalates. In this paper we instead consider the problem of forecasting future hostilities in online discussions, which we decompose into two tasks: (1) given an initial sequence of non-hostile comments in a discussion, predict whether some future comment will contain hostility; and (2) given the first hostile comment in a discussion, predict whether this will lead to an escalation of hostility in subsequent comments. Thus, we aim to forecast both the presence and intensity of hostile comments based on linguistic and social features from earlier comments. To evaluate our approach, we introduce a corpus of over 30K annotated Instagram comments from over 1,100 posts. Our approach is able to predict the appearance of a hostile comment on an Instagram post ten or more hours in the future with an AUC of .82 (task 1), and can furthermore distinguish between high and low levels of future hostility with an AUC of .91 (task 2).}, + copyright = {Authors who publish a paper in this conference agree to the following terms: 1. Author(s) agree to transfer their copyrights in their article/paper to the Association for the Advancement of Artificial Intelligence (AAAI), in order to deal with future requests for reprints, translations, anthologies, reproductions, excerpts, and other publications. This grant will include, without limitation, the entire copyright in the article/paper in all countries of the world, including all renewals, extensions, and reversions thereof, whether such rights current exist or hereafter come into effect, and also the exclusive right to create electronic versions of the article/paper, to the extent that such right is not subsumed under copyright. 2. The author(s) warrants that they are the sole author and owner of the copyright in the above article/paper, except for those portions shown to be in quotations; that the article/paper is original throughout; and that the undersigned right to make the grants set forth above is complete and unencumbered. 3. The author(s) agree that if anyone brings any claim or action alleging facts that, if true, constitute a breach of any of the foregoing warranties, the author(s) will hold harmless and indemnify AAAI, their grantees, their licensees, and their distributors against any liability, whether under judgment, decree, or compromise, and any legal fees and expenses arising out of that claim or actions, and the undersigned will cooperate fully in any defense AAAI may make to such claim or action. Moreover, the undersigned agrees to cooperate in any claim or other action seeking to protect or enforce any right the undersigned has granted to AAAI in the article/paper. If any such claim or action fails because of facts that constitute a breach of any of the foregoing warranties, the undersigned agrees to reimburse whomever brings such claim or action for expenses and attorneys' fees incurred therein. 4. Author(s) retain all proprietary rights other than copyright (such as patent rights). 5. Author(s) may make personal reuse of all or portions of the above article/paper in other works of their own authorship. 6. Author(s) may reproduce, or have reproduced, their article/paper for the author's personal use, or for company use provided that AAAI copyright and the source are indicated, and that the copies are not used in a way that implies AAAI endorsement of a product or service of an employer, and that the copies per se are not offered for sale. The foregoing right shall not permit the posting of the article/paper in electronic or digital form on any computer network, except by the author or the author's employer, and then only on the author's or the employer's own web page or ftp site. Such web page or ftp site, in addition to the aforementioned requirements of this Paragraph, must provide an electronic reference or link back to the AAAI electronic server, and shall not post other AAAI copyrighted materials not of the author's or the employer's creation (including tables of contents with links to other papers) without AAAI's written permission. 7. Author(s) may make limited distribution of all or portions of their article/paper prior to publication. 8. In the case of work performed under U.S. Government contract, AAAI grants the U.S. Government royalty-free permission to reproduce all or portions of the above article/paper, and to authorize others to do so, for U.S. Government purposes. 9. In the event the above article/paper is not accepted and published by AAAI, or is withdrawn by the author(s) before acceptance by AAAI, this agreement becomes null and void.}, + file = {/home/nathante/Zotero/storage/N789YADS/Liu et al_2018_Forecasting the Presence and Intensity of Hostility on Instagram Using.pdf;/home/nathante/Zotero/storage/EH8E76LE/17875.html}, + language = {en} +} + +@article{lum_predict_2016, + title = {To Predict and Serve?}, + author = {Lum, Kristian and Isaac, William}, + year = {2016}, + volume = {13}, + pages = {14--19}, + issn = {1740-9713}, + abstract = {Predictive policing systems are used increasingly by law enforcement to try to prevent crime before it occurs. But what happens when these systems are trained using biased data? Kristian Lum and William Isaac consider the evidence \textendash{} and the social consequences}, + copyright = {\textcopyright{} 2016 The Royal Statistical Society}, + file = {/home/nathante/Zotero/storage/NAZFY89C/Lum and Isaac - 2016 - To predict and serve.pdf;/home/nathante/Zotero/storage/8RNYU5J9/j.1740-9713.2016.00960.html}, + journal = {Significance}, + number = {5} +} + +@article{lyon_snowden_2015, + title = {The {{Snowden Stakes}}: {{Challenges}} for {{Understanding Surveillance Today}}}, + shorttitle = {The {{Snowden Stakes}}}, + author = {Lyon, David}, + year = {2015}, + month = jul, + volume = {13}, + pages = {139--152}, + issn = {1477-7487}, + file = {/home/nathante/Zotero/storage/RU7XRZH4/Lyon_2015_The Snowden Stakes.pdf}, + journal = {Surveillance \& Society}, + language = {en}, + number = {2} +} + +@book{lyon_surveillance_2003, + title = {Surveillance as Social Sorting: Privacy, Risk, and Digital Discrimination}, + shorttitle = {Surveillance as Social Sorting}, + author = {Lyon, David}, + year = {2003}, + publisher = {{Routledge}}, + address = {{London; New York}}, + annotation = {OCLC: 829245839}, + file = {/home/nathante/Zotero/storage/F7AHSTDP/Lyon_2003_Surveillance as social sorting.pdf}, + isbn = {978-0-203-99488-7}, + language = {English} +} + +@inproceedings{ma_self-disclosure_2017, + title = {Self-{{Disclosure}} and {{Perceived Trustworthiness}} of {{Airbnb Host Profiles}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Ma, Xiao and Hancock, Jeffery T. and Lim Mingjie, Kenneth and Naaman, Mor}, + year = {2017}, + month = feb, + pages = {2397--2409}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Online peer-to-peer platforms like Airbnb allow hosts to list a property (e.g. a house, or a room) for short-term rentals. In this work, we examine how hosts describe themselves on their Airbnb profile pages. We use a mixed-methods study to develop a categorization of the topics that hosts self-disclose in their profile descriptions, and show that these topics differ depending on the type of guest engagement expected. We also examine the perceived trustworthiness of profiles using topic-coded profiles from 1,200 hosts, showing that longer self-descriptions are perceived to be more trustworthy. Further, we show that there are common strategies (a mix of topics) hosts use in self-disclosure, and that these strategies cause differences in perceived trustworthiness scores. Finally, we show that the perceived trustworthiness score is a significant predictor of host choice--especially for shorter profiles that show more variation. The results are consistent with uncertainty reduction theory, reflect on the assertions of signaling theory, and have important design implications for sharing economy platforms, especially those facilitating online-to-offline social exchange.}, + file = {/home/nathante/Zotero/storage/CL64SYM3/Ma et al_2017_Self-Disclosure and Perceived Trustworthiness of Airbnb Host Profiles.pdf}, + isbn = {978-1-4503-4335-0}, + keywords = {airbnb,self-disclosure,sharing economy,social exchange,trustworthiness}, + series = {{{CSCW}} '17} +} + +@article{macy_chains_1991, + title = {Chains of {{Cooperation}}: {{Threshold Effects}} in {{Collective Action}}}, + shorttitle = {Chains of {{Cooperation}}}, + author = {Macy, Michael W.}, + year = {1991}, + volume = {56}, + pages = {730--747}, + publisher = {{[American Sociological Association, Sage Publications, Inc.]}}, + issn = {0003-1224}, + abstract = {Granovetter's threshold model of collective action shows how each new participant triggers others until the chain reaction reaches a gap in the distribution of thresholds. Hence outcomes depend on the network of social ties that channel the chain reactions. However, structural analysis is encumbered by the assumption that thresholds derive from changing marginal returns on investments in public goods. A learning-theoretic specification imposes less stringent assumptions about the rationality of the actors and is much better suited to a structural analysis. Computer simulations suggest that threshold effects may be the key to solving the coordination problem: When individual choices are contingent on participation by others, this interdependence facilitates the coordination of contributions needed to shift the bistable system from a noncooperative equilibrium to a cooperative one. Further simulations with low-density networks show that these chain reactions require bridges that link socially distant actors, supporting Granovetter's case for the strength of weak ties.}, + journal = {American Sociological Review}, + number = {6} +} + +@article{malik_it_nodate, + title = {It Is an Updated Version of a Previously Published Paper. {{For}} Referencing, Please Give Citations to Both the Originally Published Work and to This Thesis.}, + author = {Malik, Momin M and Pfe, J{\"u}rgen}, + pages = {26}, + abstract = {In this Chapter, I use the rapid introduction of Facebook's ``People You May Know'' as a natural experiment by which to observe the causal effect of a recommender system on user behavior. I theoretically frame this as an example of decisions of platform governance having a causal effect on user behavior, which has larger implications for how we think about the data we get from social media platforms.}, + file = {/home/nathante/Zotero/storage/BPVQPFIS/Malik and Pfe - It is an updated version of a previously published.pdf}, + language = {en} +} + +@inproceedings{marlow_impression_2013, + title = {Impression Formation in Online Peer Production: Activity Traces and Personal Profiles in Github}, + shorttitle = {Impression Formation in Online Peer Production}, + booktitle = {Proceedings of the 2013 Conference on {{Computer}} Supported Cooperative Work}, + author = {Marlow, Jennifer and Dabbish, Laura and Herbsleb, Jim}, + year = {2013}, + month = feb, + pages = {117--128}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {In this paper we describe a qualitative investigation of impression formation in an online distributed software development community with social media functionality. We find that users in this setting seek out additional information about each other to explore the project space, inform future interactions, and understand the potential future value of a new person. They form impressions around other users' expertise based on history of activity across projects, and successful collaborations with key high status projects in the community. These impressions influence their receptivity to strangers' work contributions.}, + file = {/home/nathante/Zotero/storage/C8RITLA4/Marlow et al_2013_Impression formation in online peer production.pdf}, + isbn = {978-1-4503-1331-5}, + keywords = {activity traces,collaborative software development,impression formation,peer production}, + series = {{{CSCW}} '13} +} + +@article{massanari_gamergate_2017, + title = {\#{{Gamergate}} and {{The Fappening}}: {{How Reddit}}'s Algorithm, Governance, and Culture Support Toxic Technocultures}, + shorttitle = {\#{{Gamergate}} and {{The Fappening}}}, + author = {Massanari, Adrienne}, + year = {2017}, + month = mar, + volume = {19}, + pages = {329--346}, + issn = {1461-4448}, + abstract = {This article considers how the social-news and community site Reddit.com has become a hub for anti-feminist activism. Examining two recent cases of what are defined as ``toxic technocultures'' (\#Gamergate and The Fappening), this work describes how Reddit's design, algorithm, and platform politics implicitly support these kinds of cultures. In particular, this piece focuses on the ways in which Reddit's karma point system, aggregation of material across subreddits, ease of subreddit and user account creation, governance structure, and policies around offensive content serve to provide fertile ground for anti-feminist and misogynistic activism. The ways in which these events and communities reflect certain problematic aspects of geek masculinity are also considered. This research is informed by the results of a long-term participant-observation and ethnographic study into Reddit's culture and community and is grounded in actor-network theory.}, + file = {/home/nathante/Zotero/storage/C7A7MGWF/Massanari_2017_#Gamergate and The Fappening.pdf}, + journal = {New Media \& Society}, + language = {en}, + number = {3} +} + +@inproceedings{matias_civilservant_2018, + title = {Civilservant: Community-Led Experiments in Platform Governance}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Matias, J. Nathan and Mou, Merry}, + year = {2018}, + pages = {9:1--9:13}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {As online platforms monitor and intervene in the daily lives of billions of people, platforms are being used to govern enduring social problems. Field experiments could inform wise uses of this power if tensions between democratic values and experimentation could be resolved. In this paper, we introduce CivilServant, a novel experimentation infrastructure that online communities and their moderators use to evaluate policies and replicate each others' findings. We situate CivilServant in the political history of policy experiments and present design considerations for community participation, ethics, and replication. Based on two case studies of community-led experiments and public debriefings on the reddit platform, we share findings on community deliberation about experiment results. We also report on uses of evidence, finding that experiments informed moderator practices, community policies, and replications by communities and platforms. We discuss the implications of these findings for evaluating platform governance in an open, democratic, experimenting society.}, + file = {/home/nathante/Zotero/storage/ZW6KWLFH/Matias_Mou_2018_Civilservant.pdf}, + isbn = {978-1-4503-5620-6}, + keywords = {action research,ethics,field experiments,governance,moderation,platforms,policy evaluation,randomized trials}, + series = {{{CHI}} '18} +} + +@inproceedings{matias_going_2016, + title = {Going Dark: {{Social}} Factors in Collective Action against Platform Operators in the {{Reddit}} Blackout}, + shorttitle = {Going {{Dark}}}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '16)}, + author = {Matias, J. Nathan}, + year = {2016}, + pages = {1138--1151}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {This paper describes how people who lead communities on online platforms join together in mass collective action to influence platform operators. I investigate this by analyzing a protest against the social news platform reddit by moderators of 2,278 subreddit communities in July 2015. These moderators collectively disabled their subreddits, preventing millions of readers from accessing major parts of reddit and convincing the company to negotiate over their demands. This paper offers a descriptive analysis of the protest, combining qualitative content analysis, interviews, and quantitative analysis with the population of 52,735 active subreddits. Through participatory hypotheses testing with moderators, this study reveals social factors including the grievances of moderators, relations with platform operators, relations among moderators, subreddit resources, subreddit isolation, and moderators' relations with their subreddits that can lead to participation in mass collective action against a platform.}, + file = {/home/nathante/Zotero/storage/YAMH83QB/Matias_2016_Going dark.pdf}, + isbn = {978-1-4503-3362-7}, + keywords = {blackout,Collective Action,online platforms,participatory hypothesis testing,reddit,sabotage,social factors} +} + +@techreport{mayson_bias_2018, + title = {Bias {{In}}, {{Bias Out}}}, + author = {Mayson, Sandra G.}, + year = {2018}, + month = sep, + address = {{Rochester, NY}}, + institution = {{Social Science Research Network}}, + abstract = {Police, prosecutors, judges, and other criminal justice actors increasingly use algorithmic risk assessment to estimate the likelihood that a person will commit future crime. As many scholars have noted, these algorithms tend to have disparate racial impact. In response, critics advocate three strategies of resistance: (1) the exclusion of input factors that correlate closely with race, (2) adjustments to algorithmic design to equalize predictions across racial lines, and (3) rejection of algorithmic methods altogether.}, + file = {/home/nathante/Zotero/storage/8QRYJFV4/Mayson - 2018 - Bias In, Bias Out.pdf;/home/nathante/Zotero/storage/QMCUX8NQ/papers.html}, + keywords = {algorithms,bail reform,civil rights,COMPAS,criminal justice,criminal law,discrimination,inequality,minorities,prediction,race,rearrest,risk assessment}, + language = {en}, + number = {ID 3257004}, + type = {{{SSRN Scholarly Paper}}} +} + +@inproceedings{mcdonald_deceptive/honest/unreliable/reliable?_2016, + title = {Deceptive/{{Honest}}/{{Unreliable}}/{{Reliable}}? {{Unpacking Social Signaling Theory}} for {{Social Computing Systems Analysis}} and {{Design}}}, + shorttitle = {Deceptive/{{Honest}}/{{Unreliable}}/{{Reliable}}?}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing Companion}} - {{CSCW}} '16 {{Companion}}}, + author = {McDonald, David and Majid, Amirah and Shami, N.}, + year = {2016}, + pages = {481--484}, + publisher = {{ACM Press}}, + address = {{San Francisco, California, USA}}, + file = {/home/nathante/Zotero/storage/DHKHFE25/McDonald et al_2016_Deceptive-Honest-Unreliable-Reliable.pdf}, + isbn = {978-1-4503-3950-6}, + language = {en} +} + +@inproceedings{mcdonald_privacy_2019, + title = {Privacy, {{Anonymity}}, and {{Perceived Risk}} in {{Open Collaboration}}: {{A Study}} of {{Service Providers}}}, + shorttitle = {Privacy, {{Anonymity}}, and {{Perceived Risk}} in {{Open Collaboration}}}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} - {{CHI}} '19}, + author = {McDonald, Nora and Hill, Benjamin Mako and Greenstadt, Rachel and Forte, Andrea}, + year = {2019}, + pages = {1--12}, + publisher = {{ACM Press}}, + address = {{Glasgow, Scotland Uk}}, + abstract = {Anonymity can enable both healthy online interactions like support-seeking and toxic behaviors like hate speech. How do online service providers balance these threats and opportunities? This two-part qualitative study examines the challenges perceived by open collaboration service providers in allowing anonymous contributions to their projects. We interviewed eleven people familiar with organizational decisions related to privacy and security at fve open collaboration projects and followed up with an analysis of public discussions about anonymous contribution to Wikipedia. We contrast our fndings with prior work on threats perceived by project volunteers and explore misalignment between policies aiming to serve contributors and the privacy practices of contributors themselves.}, + file = {/home/nathante/Zotero/storage/C2SPBXH5/McDonald et al. - 2019 - Privacy, Anonymity, and Perceived Risk in Open Col.pdf}, + isbn = {978-1-4503-5970-2}, + language = {en} +} + +@article{mcgillicuddy_controlling_2016, + title = {Controlling {{Bad Behavior}} in {{Online Communities}}: {{An Examination}} of {{Moderation Work}}}, + shorttitle = {Controlling {{Bad Behavior}} in {{Online Communities}}}, + author = {McGillicuddy, Aiden and Bernard, Jean-Gregoire and Cranefield, Jocelyn}, + year = {2016}, + month = dec, + file = {/home/nathante/Zotero/storage/V4F2EJGG/23.html}, + journal = {ICIS 2016 Proceedings} +} + +@book{mcgrath_groups:_1984, + title = {Groups: Interaction and Performance}, + shorttitle = {Groups}, + author = {McGrath, Joseph E}, + year = {1984}, + publisher = {{Prentice-Hall}}, + address = {{Englewood Cliffs, N.J.}}, + isbn = {0-13-365700-0 978-0-13-365700-5}, + language = {English} +} + +@incollection{mcgrath_methods_1984, + title = {Methods for the Study of Groups}, + booktitle = {Groups: Interaction and Performance}, + author = {McGrath, Joseph E}, + year = {1984}, + pages = {28--40}, + publisher = {{Prentice-Hall}}, + address = {{Englewood Cliffs, N.J.}}, + isbn = {0-13-365700-0}, + language = {English} +} + +@article{menon_cost_nodate, + title = {The {{Cost}} of {{Fairness}} in {{Binary Classification}}}, + author = {Menon, Aditya Krishna and Williamson, Robert C}, + pages = {12}, + abstract = {Binary classifiers are often required to possess fairness in the sense of not overly discriminating with respect to a feature deemed sensitive, e.g. race. We study the inherent tradeoffs in learning classifiers with a fairness constraint in the form of two questions: what is the best accuracy we can expect for a given level of fairness?, and what is the nature of these optimal fairnessaware classifiers? To answer these questions, we provide three main contributions. First, we relate two existing fairness measures to cost-sensitive risks. Second, we show that for such costsensitive fairness measures, the optimal classifier is an instance-dependent thresholding of the class-probability function. Third, we relate the tradeoff between accuracy and fairness to the alignment between the target and sensitive features' class-probabilities. A practical implication of our analysis is a simple approach to the fairness-aware problem which involves suitably thresholding class-probability estimates.}, + file = {/home/nathante/Zotero/storage/US6KQHV5/Menon and Williamson - The Cost of Fairness in Binary Classification.pdf}, + language = {en} +} + +@inproceedings{merchant_signals_2019, + title = {Signals {{Matter}}: {{Understanding Popularity}} and {{Impact}} of {{Users}} on {{Stack Overflow}}}, + shorttitle = {Signals {{Matter}}}, + booktitle = {The {{World Wide Web Conference}}}, + author = {Merchant, Arpit and Shah, Daksh and Bhatia, Gurpreet Singh and Ghosh, Anurag and Kumaraguru, Ponnurangam}, + year = {2019}, + month = may, + pages = {3086--3092}, + publisher = {{Association for Computing Machinery}}, + address = {{San Francisco, CA, USA}}, + abstract = {Stack Overflow, a Q\&A site on programming, awards reputation points and badges (game elements) to users on performing various actions. Situating our work in Digital Signaling Theory, we investigate the role of these game elements in characterizing social qualities (specifically, popularity and impact) of its users. We operationalize these attributes using common metrics and apply statistical modeling to empirically quantify and validate the strength of these signals. Our results are based on a rich dataset of 3,831,147 users and their activities spanning nearly a decade since the site's inception in 2008. We present evidence that certain non-trivial badges, reputation scores and age of the user on the site positively correlate with popularity and impact. Further, we find that the presence of costly to earn and hard to observe signals qualitatively differentiates highly impactful users from highly popular users.}, + file = {/home/nathante/Zotero/storage/X4IZR33V/Merchant et al_2019_Signals Matter.pdf}, + isbn = {978-1-4503-6674-8}, + keywords = {Crowdsourced Knowledge,Digital Signaling}, + series = {{{WWW}} '19} +} + +@article{messner_heteroscedastic_2016, + title = {Heteroscedastic {{Censored}} and {{Truncated Regression}} with Crch}, + author = {Messner, Jakob W. and Mayr, Georg J. and Zeileis, Achim}, + year = {2016}, + volume = {8}, + pages = {173--181}, + issn = {2073-4859}, + file = {/home/nathante/Zotero/storage/BDPLI2MI/RJ-2016-012.html}, + journal = {The R Journal}, + language = {en}, + number = {1} +} + +@article{mitchell_prediction-based_2020, + title = {Prediction-{{Based Decisions}} and {{Fairness}}: {{A Catalogue}} of {{Choices}}, {{Assumptions}}, and {{Definitions}}}, + shorttitle = {Prediction-{{Based Decisions}} and {{Fairness}}}, + author = {Mitchell, Shira and Potash, Eric and Barocas, Solon and D'Amour, Alexander and Lum, Kristian}, + year = {2020}, + month = apr, + abstract = {A recent flurry of research activity has attempted to quantitatively define ``fairness'' for decisions based on statistical and machine learning (ML) predictions. The rapid growth of this new field has led to wildly inconsistent terminology and notation, presenting a serious challenge for cataloguing and comparing definitions. This paper attempts to bring much-needed order.}, + archivePrefix = {arXiv}, + eprint = {1811.07867}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/IGR8TLSL/Mitchell et al. - 2020 - Prediction-Based Decisions and Fairness A Catalog.pdf}, + journal = {arXiv:1811.07867 [stat]}, + keywords = {Statistics - Applications}, + language = {en}, + primaryClass = {stat} +} + +@article{mobasher_toward_2007, + title = {Toward {{Trustworthy Recommender Systems}}: {{An Analysis}} of {{Attack Models}} and {{Algorithm Robustness}}}, + shorttitle = {Toward {{Trustworthy Recommender Systems}}}, + author = {Mobasher, Bamshad and Burke, Robin and Bhaumik, Runa and Williams, Chad}, + year = {2007}, + month = oct, + volume = {7}, + issn = {1533-5399}, + abstract = {Publicly accessible adaptive systems such as collaborative recommender systems present a security problem. Attackers, who cannot be readily distinguished from ordinary users, may inject biased profiles in an attempt to force a system to ``adapt'' in a manner advantageous to them. Such attacks may lead to a degradation of user trust in the objectivity and accuracy of the system. Recent research has begun to examine the vulnerabilities and robustness of different collaborative recommendation techniques in the face of ``profile injection'' attacks. In this article, we outline some of the major issues in building secure recommender systems, concentrating in particular on the modeling of attacks and their impact on various recommendation algorithms. We introduce several new attack models and perform extensive simulation-based evaluations to show which attacks are most successful and practical against common recommendation techniques. Our study shows that both user-based and item-based algorithms are highly vulnerable to specific attack models, but that hybrid algorithms may provide a higher degree of robustness. Using our formal characterization of attack models, we also introduce a novel classification-based approach for detecting attack profiles and evaluate its effectiveness in neutralizing attacks.}, + file = {/home/nathante/Zotero/storage/I9XYCU8U/Mobasher et al_2007_Toward Trustworthy Recommender Systems.pdf}, + journal = {ACM Trans. Internet Technol.}, + keywords = {attack detection,collaborative filtering,Profile injection attacks,recommender systems,shilling}, + number = {4} +} + +@article{moller_not_2018, + title = {Do Not Blame It on the Algorithm: An Empirical Assessment of Multiple Recommender Systems and Their Impact on Content Diversity}, + shorttitle = {Do Not Blame It on the Algorithm}, + author = {M{\"o}ller, Judith and Trilling, Damian and Helberger, Natali and van Es, Bram}, + year = {2018}, + month = jul, + volume = {21}, + pages = {959--977}, + issn = {1369-118X}, + abstract = {In the debate about filter bubbles caused by algorithmic news recommendation, the conceptualization of the two core concepts in this debate, diversity and algorithms, has received little attention in social scientific research. This paper examines the effect of multiple recommender systems on different diversity dimensions. To this end, it maps different values that diversity can serve, and a respective set of criteria that characterizes a diverse information offer in this particular conception of diversity. We make use of a data set of simulated article recommendations based on actual content of one of the major Dutch broadsheet newspapers and its users (N=21,973 articles, N=500 users). We find that all of the recommendation logics under study proved to lead to a rather diverse set of recommendations that are on par with human editors and that basing recommendations on user histories can substantially increase topic diversity within a recommendation set.}, + journal = {Information, Communication \& Society}, + keywords = {automated content classification,diversity metrics,filter bubbles,News,recommender systems}, + number = {7} +} + +@article{morey_fallacy_2016, + title = {The Fallacy of Placing Confidence in Confidence Intervals}, + author = {Morey, Richard D. and Hoekstra, Rink and Rouder, Jeffrey N. and Lee, Michael D. and Wagenmakers, Eric-Jan}, + year = {2016}, + month = feb, + volume = {23}, + pages = {103--123}, + issn = {1531-5320}, + abstract = {Interval estimates \textendash{} estimates of parameters that include an allowance for sampling uncertainty \textendash{} have long been touted as a key component of statistical analyses. There are several kinds of interval estimates, but the most popular are confidence intervals (CIs): intervals that contain the true parameter value in some known proportion of repeated samples, on average. The width of confidence intervals is thought to index the precision of an estimate; CIs are thought to be a guide to which parameter values are plausible or reasonable; and the confidence coefficient of the interval (e.g., 95 \%) is thought to index the plausibility that the true parameter is included in the interval. We show in a number of examples that CIs do not necessarily have any of these properties, and can lead to unjustified or arbitrary inferences. For this reason, we caution against relying upon confidence interval theory to justify interval estimates, and suggest that other theories of interval estimation should be used instead.}, + file = {/home/nathante/Zotero/storage/8E7J46SG/Morey et al_2016_The fallacy of placing confidence in confidence intervals.pdf}, + journal = {Psychonomic Bulletin \& Review}, + language = {en}, + number = {1} +} + +@book{morgan_counterfactuals_2016, + title = {Counterfactuals and Causal Inference: Methods and Principles for Social Research}, + shorttitle = {Counterfactuals and Causal Inference}, + author = {Morgan, Stephen L and Winship, Christopher}, + year = {2016}, + publisher = {{Cambridge University Press}}, + address = {{New York, NY}}, + annotation = {OCLC: 1022809306}, + file = {/home/nathante/Zotero/storage/4RV2U7UT/Morgan et al. - 2016 - Counterfactuals and causal inference methods and .pdf}, + isbn = {978-1-107-06507-9 978-1-107-69416-3}, + language = {English} +} + +@inproceedings{morgan_evaluating_2018, + title = {Evaluating the Impact of the {{Wikipedia}} Teahouse on Newcomer Socialization and Retention}, + booktitle = {Proceedings of the 14th {{International Symposium}} on {{Open Collaboration}}}, + author = {Morgan, Jonathan T. and Halfaker, Aaron}, + year = {2018}, + pages = {20:1--20:7}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Effective socialization of new contributors is vital for the long-term sustainability of open collaboration projects. Previous research has identified many common barriers to participation. However, few interventions employed to increase newcomer retention over the long term by improving aspects of the onboarding experience have demonstrated success. This study presents an evaluation of the impact of one such intervention, the Wikipedia Teahouse, on new editor survival. In a controlled experiment, we find that new editors invited to the Teahouse are retained at a higher rate than editors who do not receive an invite. The effect is observed for both low-and high-activity newcomers, and for both short- and long-term survival.}, + file = {/home/nathante/Zotero/storage/KBL5PKQA/Morgan_Halfaker_2018_Evaluating the impact of the Wikipedia teahouse on newcomer socialization and.pdf}, + isbn = {978-1-4503-5936-8}, + keywords = {online communities,open collaboration,social norms,socialization,virtual work,Wikipedia}, + series = {{{OpenSym}} '18} +} + +@inproceedings{morgan_tea_2013, + title = {Tea and Sympathy: Crafting Positive New User Experiences on Wikipedia}, + shorttitle = {Tea and Sympathy}, + booktitle = {Proceedings of the 2013 Conference on {{Computer}} Supported Cooperative Work}, + author = {Morgan, Jonathan T. and Bouterse, Siko and Walls, Heather and Stierch, Sarah}, + year = {2013}, + pages = {839--848}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {We present the Teahouse, a pilot project for supporting and socializing new Wikipedia editors. Open collaboration systems like Wikipedia must continually recruit and retain new members in order to sustain themselves. Wikipedia's editor decline presents unique exigency for evaluating novel strategies to support newcomers and increase new user retention in such systems, particularly among demographics that are currently underrepresented in the user community. In this paper, we describe the design and deployment of Teahouse, and present preliminary findings. Our findings highlight the importance of intervening early in the editor lifecycle, providing user-friendly tools, creating safe spaces for newcomers, and facilitating positive interactions between newcomers and established community members.}, + file = {/home/nathante/Zotero/storage/SBEHFNBL/Morgan et al_2013_Tea and sympathy.pdf}, + isbn = {978-1-4503-1331-5}, + keywords = {gender,new users,socialization,user experience,wikipedia}, + series = {{{CSCW}} '13} +} + +@inproceedings{mouzannar_fair_2019, + title = {From {{Fair Decision Making To Social Equality}}}, + booktitle = {Proceedings of the {{Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}}, + author = {Mouzannar, Hussein and Ohannessian, Mesrob I. and Srebro, Nathan}, + year = {2019}, + pages = {359--368}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {The study of fairness in intelligent decision systems has mostly ignored long-term influence on the underlying population. Yet fairness considerations (e.g. affirmative action) have often the implicit goal of achieving balance among groups within the population. The most basic notion of balance is eventual equality between the qualifications of the groups. How can we incorporate influence dynamics in decision making? How well do dynamics-oblivious fairness policies fare in terms of reaching equality? In this paper, we propose a simple yet revealing model that encompasses (1) a selection process where an institution chooses from multiple groups according to their qualifications so as to maximize an institutional utility and (2) dynamics that govern the evolution of the groups' qualifications according to the imposed policies. We focus on demographic parity as the formalism of affirmative action. We first give conditions under which an unconstrained policy reaches equality on its own. In this case, surprisingly, imposing demographic parity may break equality. When it doesn't, one would expect the additional constraint to reduce utility, however, we show that utility may in fact increase. In real world scenarios, unconstrained policies do not lead to equality. In such cases, we show that although imposing demographic parity may remedy it, there is a danger that groups settle at a worse set of qualifications. As a silver lining, we also identify when the constraint not only leads to equality, but also improves all groups. These cases and trade-offs are instrumental in determining when and how imposing demographic parity can be beneficial in selection processes, both for the institution and for society on the long run.}, + file = {/home/nathante/Zotero/storage/K3P8GBJL/Mouzannar et al. - 2019 - From Fair Decision Making To Social Equality.pdf}, + isbn = {978-1-4503-6125-5}, + keywords = {affirmative action,demographic parity,dynamics,fairness,influence on society,selection processes,social equality}, + series = {{{FAT}}* '19} +} + +@article{myers_west_censored_2018, + title = {Censored, Suspended, Shadowbanned: {{User}} Interpretations of Content Moderation on Social Media Platforms}, + shorttitle = {Censored, Suspended, Shadowbanned}, + author = {Myers West, Sarah}, + year = {2018}, + month = nov, + volume = {20}, + pages = {4366--4383}, + issn = {1461-4448}, + abstract = {Social media platforms play an increasingly important civic role as platforms for discourse, where we discuss, debate, and share information. This article explores how users make sense of the content moderation systems social media platforms use to curate this discourse. Through a survey of users (n\,=\,519) who have experienced content moderation, I explore users' folk theories of how content moderation systems work, how they shape the affective relationship between users and platforms, and the steps users take to assert their agency by seeking redress. I find significant impacts of content moderation that go far beyond the questions of freedom of expression that have thus far dominated the debate. Raising questions about what content moderation systems are designed to accomplish, I conclude by conceptualizing an educational, rather than punitive, model for content moderation systems.}, + file = {/home/nathante/Zotero/storage/ECGSPUDX/Myers West_2018_Censored, suspended, shadowbanned.pdf}, + journal = {New Media \& Society}, + keywords = {Accountability,content moderation,free expression,social media,survey,transparency,user studies}, + language = {en}, + number = {11} +} + +@article{narayan_all_2019, + title = {All {{Talk}}: {{How Increasing Interpersonal Communication}} on {{Wikis May Not Enhance Productivity}}}, + shorttitle = {All {{Talk}}}, + author = {Narayan, Sneha and TeBlunthuis, Nathan and Hale, Wm Salt and Hill, Benjamin Mako and Shaw, Aaron}, + year = {2019}, + month = nov, + volume = {3}, + pages = {101:1--101:19}, + abstract = {Prior research suggests that facilitating easier communication in social computing systems will increase both interpersonal interactions as well as group productivity. This study tests these claims by examining the impact of a new communication feature called "message walls" that allows for faster and more intuitive interpersonal communication in wikis. Using panel data from a sample of 275 wiki communities that migrated to message walls and a method inspired by regression discontinuity designs, we analyze these transitions and estimate the impact of the system's introduction. Although the adoption of message walls was associated with increased communication among all editors and newcomers, it had little effect on productivity, and was further associated with a decrease in article contributions from new editors. Our results imply that design changes that make communication easier in a social computing system may not translate to increased participation along other dimensions.}, + file = {/home/nathante/Zotero/storage/HDIC576G/Narayan et al_2019_All Talk.pdf}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + keywords = {interpersonal communication,newcomers,peer production,wikis}, + number = {CSCW} +} + +@inproceedings{narayan_wikipedia_2017, + title = {The {{Wikipedia Adventure}}: Field Evaluation of an Interactive Tutorial for New Users}, + shorttitle = {The {{Wikipedia Adventure}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Narayan, Sneha and Orlowitz, Jake and Morgan, Jonathan and Hill, Benjamin Mako and Shaw, Aaron}, + year = {2017}, + pages = {1785--1799}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Integrating new users into a community with complex norms presents a challenge for peer production projects like Wikipedia. We present The Wikipedia Adventure (TWA): an interactive tutorial that offers a structured and gamified introduction to Wikipedia. In addition to describing the design of the system, we present two empirical evaluations. First, we report on a survey of users, who responded very positively to the tutorial. Second, we report results from a large-scale invitation-based field experiment that tests whether using TWA increased newcomers' subsequent contributions to Wikipedia. We find no effect of either using the tutorial or of being invited to do so over a period of 180 days. We conclude that TWA produces a positive socialization experience for those who choose to use it, but that it does not alter patterns of newcomer activity. We reflect on the implications of these mixed results for the evaluation of similar social computing systems.}, + file = {/home/nathante/Zotero/storage/K5U3C4LI/Narayan et al_2017_The Wikipedia Adventure.pdf}, + isbn = {978-1-4503-4335-0}, + keywords = {Gamification,newcomer socialization,online communities,peer production,Systems design,systems evaluation,wikipedia}, + series = {{{CSCW}} '17} +} + +@incollection{NIPS2016_6374, + title = {Equality of Opportunity in Supervised Learning}, + booktitle = {Advances in Neural Information Processing Systems 29}, + author = {Hardt, Moritz and Price, Eric and Price, Eric and Srebro, Nati}, + editor = {Lee, D. D. and Sugiyama, M. and Luxburg, U. V. and Guyon, I. and Garnett, R.}, + year = {2016}, + pages = {3315--3323}, + publisher = {{Curran Associates, Inc.}} +} + +@misc{noauthor_testing_nodate, + title = {Testing for {{Overdispersion}} in {{Poisson}} and {{Binomial Regression Models}}: {{Journal}} of the {{American Statistical Association}}: {{Vol}} 87, {{No}} 418}, + file = {/home/nathante/Zotero/storage/Y8D5P64H/01621459.1992.html}, + howpublished = {https://amstat.tandfonline.com/doi/abs/10.1080/01621459.1992.10475225} +} + +@article{obermeyer_dissecting_2019, + title = {Dissecting Racial Bias in an Algorithm Used to Manage the Health of Populations}, + author = {Obermeyer, Ziad and Powers, Brian and Vogeli, Christine and Mullainathan, Sendhil}, + year = {2019}, + month = oct, + volume = {366}, + pages = {447--453}, + issn = {0036-8075, 1095-9203}, + abstract = {Racial bias in health algorithms The U.S. health care system uses commercial algorithms to guide health decisions. Obermeyer et al. find evidence of racial bias in one widely used algorithm, such that Black patients assigned the same level of risk by the algorithm are sicker than White patients (see the Perspective by Benjamin). The authors estimated that this racial bias reduces the number of Black patients identified for extra care by more than half. Bias occurs because the algorithm uses health costs as a proxy for health needs. Less money is spent on Black patients who have the same level of need, and the algorithm thus falsely concludes that Black patients are healthier than equally sick White patients. Reformulating the algorithm so that it no longer uses costs as a proxy for needs eliminates the racial bias in predicting who needs extra care. Science, this issue p. 447; see also p. 421 Health systems rely on commercial prediction algorithms to identify and help patients with complex health needs. We show that a widely used algorithm, typical of this industry-wide approach and affecting millions of patients, exhibits significant racial bias: At a given risk score, Black patients are considerably sicker than White patients, as evidenced by signs of uncontrolled illnesses. Remedying this disparity would increase the percentage of Black patients receiving additional help from 17.7 to 46.5\%. The bias arises because the algorithm predicts health care costs rather than illness, but unequal access to care means that we spend less money caring for Black patients than for White patients. Thus, despite health care cost appearing to be an effective proxy for health by some measures of predictive accuracy, large racial biases arise. We suggest that the choice of convenient, seemingly effective proxies for ground truth can be an important source of algorithmic bias in many contexts. A health algorithm that uses health costs as a proxy for health needs leads to racial bias against Black patients. A health algorithm that uses health costs as a proxy for health needs leads to racial bias against Black patients.}, + copyright = {Copyright \textcopyright{} 2019 The Authors, some rights reserved; exclusive licensee American Association for the Advancement of Science. No claim to original U.S. Government Works. http://www.sciencemag.org/about/science-licenses-journal-article-reuseThis is an article distributed under the terms of the Science Journals Default License.}, + file = {/home/nathante/Zotero/storage/5I2GIKN5/447.html}, + journal = {Science}, + language = {en}, + number = {6464}, + pmid = {31649194} +} + +@book{oneil_weapons_2018, + title = {Weapons of Math Destruction: How Big Data Increases Inequality and Threatens Democracy}, + shorttitle = {Weapons of Math Destruction}, + author = {O'Neil, Cathy}, + year = {2018}, + publisher = {{Penguin Books}}, + address = {{London}}, + annotation = {OCLC: 1039545320}, + isbn = {978-0-14-198541-1}, + language = {English} +} + +@article{orlikowski_duality_1992, + title = {The {{Duality}} of {{Technology}}: {{Rethinking}} the {{Concept}} of {{Technology}} in {{Organizations}}}, + shorttitle = {The {{Duality}} of {{Technology}}}, + author = {Orlikowski, Wanda J.}, + year = {1992}, + month = aug, + volume = {3}, + pages = {398--427}, + issn = {1047-7039}, + abstract = {This paper develops a new theoretical model with which to examine the interaction between technology and organizations. Early research studies assumed technology to be an objective, external force that would have deterministic impacts on organizational properties such as structure. Later researchers focused on the human aspect of technology, seeing it as the outcome of strategic choice and social action. This paper suggests that either view is incomplete, and proposes a reconceptualization of technology that takes both perspectives into account. A theoretical model\textemdash the structurational model of technology\textemdash is built on the basis of this new conceptualization, and its workings explored through discussion of a field study of information technology. The paper suggests that the reformulation of the technology concept and the structurational model of technology allow a deeper and more dialectical understanding of the interaction between technology and organizations. This understanding provides insight into the limits and opportunities of human choice, technology development and use, and organizational design. Implications for future research of the new concept of technology and the structurational model of technology are discussed.}, + file = {/home/nathante/Zotero/storage/KA7WM3ZG/Orlikowski - 1992 - The Duality of Technology Rethinking the Concept .pdf;/home/nathante/Zotero/storage/FWXFHK22/orsc.3.3.html}, + journal = {Organization Science}, + number = {3} +} + +@inproceedings{orlikowski_learning_1992, + title = {Learning from Notes: {{Organizational}} Issues in Groupware Implementation}, + shorttitle = {Learning from Notes}, + booktitle = {Proceedings of the 1992 {{ACM}} Conference on {{Computer}}-Supported Cooperative Work}, + author = {Orlikowski, Wanda J.}, + year = {1992}, + pages = {362--369}, + publisher = {{ACM}} +} + +@article{ostrom_crafting_2011, + title = {Crafting Analytical Tools to Study Institutional Change}, + author = {Ostrom, Elinor and Basurto, Xavier}, + year = {2011}, + month = sep, + volume = {7}, + pages = {317--343}, + issn = {1744-1382}, + abstract = {:Most powerful analytical tools used in the social sciences are well suited for studying static situations. Static and mechanistic analysis, however, is not adequate to understand the changing world in which we live. In order to adequately address the most pressing social and environmental challenges looming ahead, we need to develop analytical tools for analyzing dynamic situations \textendash{} particularly institutional change. In this paper, we develop an analytical tool to study institutional change, more specifically, the evolution of rules and norms. We believe that in order for such an analytical tool to be useful to develop a general theory of institutional change, it needs to enable the analyst to concisely record the processes of change in multiple specific settings so that lessons from such settings can eventually be integrated into a more general predictive theory of change.}, + file = {/home/nathante/Zotero/storage/HD4CIZCN/Ostrom and Basurto - 2011 - Crafting analytical tools to study institutional c.pdf;/home/nathante/Zotero/storage/J7GVJAID/displayFulltext.html}, + journal = {Journal of Institutional Economics}, + number = {Special Issue 03} +} + +@book{ostrom_governing_1990, + title = {Governing the Commons: {{The}} Evolution of Institutions for Collective Action}, + shorttitle = {Governing the {{Commons}}}, + author = {Ostrom, Elinor}, + year = {1990}, + publisher = {{Cambridge University Press}}, + address = {{New York, NY}} +} + +@book{pentland_honest_2008, + title = {Honest {{Signals How They Shape Our World}}}, + author = {Pentland, Alex}, + year = {2008}, + publisher = {{The MIT Press}}, + annotation = {OCLC: 8162307241}, + isbn = {978-0-262-28139-3}, + language = {English} +} + +@article{perperoglou_review_2019, + title = {A Review of Spline Function Procedures in {{R}}}, + author = {Perperoglou, Aris and Sauerbrei, Willi and Abrahamowicz, Michal and Schmid, Matthias}, + year = {2019}, + month = mar, + volume = {19}, + pages = {46}, + issn = {1471-2288}, + abstract = {BackgroundWith progress on both the theoretical and the computational fronts the use of spline modelling has become an established tool in statistical regression analysis. An important issue in spline modelling is the availability of user friendly, well documented software packages. Following the idea of the STRengthening Analytical Thinking for Observational Studies initiative to provide users with guidance documents on the application of statistical methods in observational research, the aim of this article is to provide an overview of the most widely used spline-based techniques and their implementation in R.MethodsIn this work, we focus on the R Language for Statistical Computing which has become a hugely popular statistics software. We identified a set of packages that include functions for spline modelling within a regression framework. Using simulated and real data we provide an introduction to spline modelling and an overview of the most popular spline functions.ResultsWe present a series of simple scenarios of univariate data, where different basis functions are used to identify the correct functional form of an independent variable. Even in simple data, using routines from different packages would lead to different results.ConclusionsThis work illustrate challenges that an analyst faces when working with data. Most differences can be attributed to the choice of hyper-parameters rather than the basis used. In fact an experienced user will know how to obtain a reasonable outcome, regardless of the type of spline used. However, many analysts do not have sufficient knowledge to use these powerful tools adequately and will need more guidance.}, + file = {/home/nathante/Zotero/storage/7Z3HNQ69/Perperoglou et al. - 2019 - A review of spline function procedures in R.pdf}, + journal = {BMC Medical Research Methodology}, + keywords = {Functional form of continuous covariates,Multivariable modelling}, + language = {en}, + number = {1} +} + +@inproceedings{phelan_priors_2019, + title = {Some {{Prior}}(s) {{Experience Necessary}}: {{Templates}} for {{Getting Started With Bayesian Analysis}}}, + shorttitle = {Some {{Prior}}(s) {{Experience Necessary}}}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Phelan, Chanda and Hullman, Jessica and Kay, Matthew and Resnick, Paul}, + year = {2019}, + month = may, + pages = {1--12}, + publisher = {{Association for Computing Machinery}}, + address = {{Glasgow, Scotland Uk}}, + abstract = {Bayesian statistical analysis has gained attention in recent years, including in HCI. The Bayesian approach has several advantages over traditional statistics, including producing results with more intuitive interpretations. Despite growing interest, few papers in CHI use Bayesian analysis. Existing tools to learn Bayesian statistics require significant time investment, making it difficult to casually explore Bayesian methods. Here, we present a tool that lowers the barrier to exploration: a set of R code templates that guide Bayesian novices through their first analysis. The templates are tailored to CHI, supporting analyses found to be most common in recent CHI papers. In a user study, we found that the templates were easy to understand and use. However, we found that participants without a statistical background were not confident in their use. Together our contributions provide a concise analysis tool and empirical results for understanding and addressing barriers to using Bayesian analysis in HCI.}, + file = {/home/nathante/Zotero/storage/MYQWMN89/Phelan et al_2019_Some Prior(s) Experience Necessary.pdf}, + isbn = {978-1-4503-5970-2}, + keywords = {bayesian statistics,code templates,evaluation,hypothesis testing,statistics,tutorials}, + series = {{{CHI}} '19} +} + +@article{phelps_statistical_1972, + title = {The {{Statistical Theory}} of {{Racism}} and {{Sexism}}}, + author = {Phelps, Edmund S.}, + year = {1972}, + volume = {62}, + pages = {659--661}, + publisher = {{American Economic Association}}, + issn = {0002-8282}, + journal = {The American Economic Review}, + number = {4} +} + +@article{piskorski_testing_2017, + title = {Testing {{Coleman}}'s Social-Norm Enforcement Mechanism: {{Evidence}} from {{Wikipedia}}}, + shorttitle = {Testing {{Coleman}}'s {{Social}}-{{Norm Enforcement Mechanism}}}, + author = {Piskorski, Miko{\l}aj Jan and Gorbat{\^a}i, Andreea D.}, + year = {2017}, + volume = {122}, + pages = {1183--1222}, + issn = {0002-9602}, + abstract = {Since Durkheim, sociologists have believed that actors in dense network structures experience fewer norm violations. Coleman proposed one explanatory mechanism, arguing that dense networks provide an opportunity structure to reward those who punish norm violators, leading to more frequent punishment and in turn fewer norm violations. Despite ubiquitous scholarly references to Coleman's theory, little empirical work has directly tested it in large-scale natural settings with longitudinal data. The authors undertake such a test using records of norm violations during the editing process on Wikipedia, the largest user-generated online encyclopedia. These data allow them to track all three elements required to test Coleman's mechanism: norm violations, punishments for such violations, and rewards for those who punish violations. The results support Coleman's mechanism.}, + file = {/home/nathante/Zotero/storage/4CB8FMG7/Piskorski and Gorbatâi - 2017 - Testing coleman’s social-norm enforcement mechanis.pdf;/home/nathante/Zotero/storage/Z6XGXBHK/689816.pdf}, + journal = {American Journal of Sociology}, + number = {4} +} + +@article{postmes_behavior_2002, + title = {Behavior Online: {{Does}} Anonymous Computer Communication Reduce Gender Inequality?}, + shorttitle = {Behavior {{Online}}}, + author = {Postmes, Tom and Spears, Russell}, + year = {2002}, + month = aug, + volume = {28}, + pages = {1073--1083}, + issn = {0146-1672, 1552-7433}, + abstract = {Two studies examined dominance and self-stereotyping in mixed-sex groups who had online discussions. Gender differences in dominance varied as a function of several contextual variables: individuation, the accessibility of gender stereotypes, and the fit between group task and stereotype. Results of the second study indicate that only when group members are depersonalized (anonymous and not individuated) does stereotype activation produce gender-stereotypic behavior. However, the nature of stereotypic behavior is moderated by the fit between group task and stereotypes, such that men dominate when the topic is masculine but not when it is feminine. These findings do not support suggestions that the anonymity offered by online communication would lead to equalization. Instead, results confirm predictions from a social identity model of deindividuation effects that social effects of anonymity and identifiability in (online) groups depend on contextual factors such as stereotype accessibility and fit.}, + file = {/home/nathante/Zotero/storage/6ZPGSEPJ/Postmes_Spears_2002_Behavior online.pdf}, + journal = {Personality and Social Psychology Bulletin}, + language = {en}, + number = {8} +} + +@article{postmes_individuality_2005, + title = {Individuality and {{Social Influence}} in {{Groups}}: {{Inductive}} and {{Deductive Routes}} to {{Group Identity}}}, + shorttitle = {Individuality and {{Social Influence}} in {{Groups}}}, + author = {Postmes, Tom and Spears, Russell and Lee, Antonia T. and Novak, Rosemary J.}, + year = {2005}, + month = nov, + volume = {89}, + pages = {747--763}, + issn = {0022-3514}, + abstract = {A distinction between forms of social identity formation in small interactive groups is investigated. In groups in which a common identity is available or given, norms for individual behavior may be deduced from group properties (deductive identity). In groups in which interpersonal relations are central, a group identity may also be induced from individual group members' contributions, making individuality and individual distinctiveness a defining feature of the group (inductive identity). Two studies examined the prediction that depersonalization produced by anonymity has opposite effects for groups in which social identity has been induced or deduced. Results confirmed the prediction that depersonalization increases social influence in groups whose identity was more deductive. In contrast, depersonalization decreases social influence in inductive identity groups. Implications for the role of social identity in small groups are discussed. (PsycINFO Database Record (c) 2016 APA, all rights reserved)}, + file = {/home/nathante/Zotero/storage/QV8XLWLL/Postmes et al_2005_Individuality and Social Influence in Groups.pdf}, + journal = {Journal of Personality and Social Psychology}, + keywords = {Adult,Analysis of Variance,Attitude,communication,Communication,depersonalization,Depersonalization,Female,Group Dynamics,Group Identity,group polarization,Group Processes,Group Structure,Humans,identity,Identity Formation,individuality,Individuality,Interpersonal Relations,Interpersonal Relationships,Male,Netherlands,Self Concept,Semantics,small group,Social Distance,Social Identification,Social Identity,social influence,Social Influences,Social Perception,Social Values}, + number = {5} +} + +@inproceedings{potthast_automatic_2008, + title = {Automatic {{Vandalism Detection}} in {{Wikipedia}}}, + booktitle = {Advances in {{Information Retrieval}}}, + author = {Potthast, Martin and Stein, Benno and Gerling, Robert}, + editor = {Macdonald, Craig and Ounis, Iadh and Plachouras, Vassilis and Ruthven, Ian and White, Ryen W.}, + year = {2008}, + pages = {663--668}, + publisher = {{Springer}}, + address = {{Berlin, Heidelberg}}, + abstract = {We present results of a new approach to detect destructive article revisions, so-called vandalism, in Wikipedia. Vandalism detection is a one-class classification problem, where vandalism edits are the target to be identified among all revisions. Interestingly, vandalism detection has not been addressed in the Information Retrieval literature by now. In this paper we discuss the characteristics of vandalism as humans recognize it and develop features to render vandalism detection as a machine learning task. We compiled a large number of vandalism edits in a corpus, which allows for the comparison of existing and new detection approaches. Using logistic regression we achieve 83\% precision at 77\% recall with our model. Compared to the rule-based methods that are currently applied in Wikipedia, our approach increases the F-Measure performance by 49\% while being faster at the same time.}, + file = {/home/nathante/Zotero/storage/4F2BFMJ6/Potthast et al_2008_Automatic Vandalism Detection in Wikipedia.pdf}, + isbn = {978-3-540-78646-7}, + keywords = {Class Imbalance,Class Imbalance Problem,IEEE Computer Society,Retrieval Literature,Spam Detection}, + language = {en}, + series = {Lecture {{Notes}} in {{Computer Science}}} +} + +@article{press_angeles_2019, + title = {Los {{Angeles}} to Clear up to 50,000 Old Marijuana Convictions Using Algorithms}, + author = {Press, Associated}, + year = {2019}, + month = apr, + issn = {0261-3077}, + abstract = {Voters approved eliminating some pot-related crimes and clearing old convictions when they legalized adult marijuana use in 2016}, + chapter = {US news}, + file = {/home/nathante/Zotero/storage/D5QQPA2H/california-marijuana-convictions-wiped-out-code-for-america.html}, + journal = {The Guardian}, + keywords = {California,Cannabis,US crime,US news}, + language = {en-GB} +} + +@article{pressman_kahneman_2006, + title = {Kahneman, {{Tversky}}, and {{Institutional Economics}}}, + author = {Pressman, Steven}, + year = {2006}, + month = jun, + volume = {40}, + pages = {501--506}, + issn = {0021-3624, 1946-326X}, + file = {/home/nathante/Zotero/storage/K3CZPIR7/Pressman - 2006 - Kahneman, Tversky, and Institutional Economics.pdf}, + journal = {Journal of Economic Issues}, + language = {en}, + number = {2} +} + +@inproceedings{priedhorsky_creating_2007, + title = {Creating, Destroying, and Restoring Value in Wikipedia}, + booktitle = {Proceedings of the 2007 International {{ACM}} Conference on {{Supporting}} Group Work}, + author = {Priedhorsky, Reid and Chen, Jilin and Lam, Shyong (Tony) K. and Panciera, Katherine and Terveen, Loren and Riedl, John}, + year = {2007}, + month = nov, + pages = {259--268}, + publisher = {{Association for Computing Machinery}}, + address = {{Sanibel Island, Florida, USA}}, + abstract = {Wikipedia's brilliance and curse is that any user can edit any of the encyclopedia entries. We introduce the notion of the impact of an edit, measured by the number of times the edited version is viewed. Using several datasets, including recent logs of all article views, we show that an overwhelming majority of the viewed words were written by frequent editors and that this majority is increasing. Similarly, using the same impact measure, we show that the probability of a typical article view being damaged is small but increasing, and we present empirically grounded classes of damage. Finally, we make policy recommendations for Wikipedia and other wikis in light of these findings.}, + file = {/home/nathante/Zotero/storage/AJGBU5Q9/Priedhorsky et al_2007_Creating, destroying, and restoring value in wikipedia.pdf}, + isbn = {978-1-59593-845-9}, + keywords = {collaboration,damage,vandalism,wiki,wikipedia}, + series = {{{GROUP}} '07} +} + +@article{raghu_algorithmic_2019, + title = {The {{Algorithmic Automation Problem}}: {{Prediction}}, {{Triage}}, and {{Human Effort}}}, + shorttitle = {The {{Algorithmic Automation Problem}}}, + author = {Raghu, Maithra and Blumer, Katy and Corrado, Greg and Kleinberg, Jon and Obermeyer, Ziad and Mullainathan, Sendhil}, + year = {2019}, + month = mar, + abstract = {In a wide array of areas, algorithms are matching and surpassing the performance of human experts, leading to consideration of the roles of human judgment and algorithmic prediction in these domains. The discussion around these developments, however, has implicitly equated the specific task of prediction with the general task of automation. We argue here that automation is broader than just a comparison of human versus algorithmic performance on a task; it also involves the decision of which instances of the task to give to the algorithm in the first place. We develop a general framework that poses this latter decision as an optimization problem, and we show how basic heuristics for this optimization problem can lead to performance gains even on heavily-studied applications of AI in medicine. Our framework also serves to highlight how effective automation depends crucially on estimating both algorithmic and human error on an instance-by-instance basis, and our results show how improvements in these error estimation problems can yield significant gains for automation as well.}, + archivePrefix = {arXiv}, + eprint = {1903.12220}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/Z7BMT7XD/Raghu et al. - 2019 - The Algorithmic Automation Problem Prediction, Tr.pdf}, + journal = {arXiv:1903.12220 [cs]}, + keywords = {Computer Science - Artificial Intelligence,Computer Science - Computer Vision and Pattern Recognition,Computer Science - Machine Learning}, + language = {en}, + primaryClass = {cs} +} + +@article{rahwan_society---loop:_2018, + title = {Society-in-the-Loop: Programming the Algorithmic Social Contract}, + shorttitle = {Society-in-the-Loop}, + author = {Rahwan, Iyad}, + year = {2018}, + month = mar, + volume = {20}, + pages = {5--14}, + issn = {1572-8439}, + abstract = {Recent rapid advances in Artificial Intelligence (AI) and Machine Learning have raised many questions about the regulatory and governance mechanisms for autonomous machines. Many commentators, scholars, and policy-makers now call for ensuring that algorithms governing our lives are transparent, fair, and accountable. Here, I propose a conceptual framework for the regulation of AI and algorithmic systems. I argue that we need tools to program, debug and maintain an algorithmic social contract, a pact between various human stakeholders, mediated by machines. To achieve this, we can adapt the concept of human-in-the-loop (HITL) from the fields of modeling and simulation, and interactive machine learning. In particular, I propose an agenda I call society-in-the-loop (SITL), which combines the HITL control paradigm with mechanisms for negotiating the values of various stakeholders affected by AI systems, and monitoring compliance with the agreement. In short, `SITL = HITL + Social Contract.'}, + file = {/home/nathante/Zotero/storage/CPSJHBQG/Rahwan - 2018 - Society-in-the-loop programming the algorithmic s.pdf}, + journal = {Ethics and Information Technology}, + keywords = {Artificial intelligence,Ethics,Governance,Regulation,Society}, + language = {en}, + number = {1} +} + +@article{rains_impact_2007, + title = {The {{Impact}} of {{Anonymity}} on {{Perceptions}} of {{Source Credibility}} and {{Influence}} in {{Computer}}-{{Mediated Group Communication}}: {{A Test}} of {{Two Competing Hypotheses}}}, + shorttitle = {The {{Impact}} of {{Anonymity}} on {{Perceptions}} of {{Source Credibility}} and {{Influence}} in {{Computer}}-{{Mediated Group Communication}}}, + author = {Rains, Stephen A.}, + year = {2007}, + month = feb, + volume = {34}, + pages = {100--125}, + publisher = {{SAGE Publications Inc}}, + issn = {0093-6502}, + abstract = {As scholars and practitioners have endeavored to develop computer-based tools that foster effective communication and collaboration in groups, anonymity has played a key role. Anonymity purportedly minimizes status differences, liberates team members from a fear of retribution, and makes members feel more comfortable contributing to discussions. Yet these benefits may be outweighed by the impact of anonymity on receiver perceptions and behavior. Two competing hypotheses, drawn from adaptive structuration theory, were tested in this study to determine the impact of anonymity on receiver perceptions of sources and messages in computer-mediated group communication. The results of the multilevel models offer evidence in support of the discounting hypothesis and suggest that anonymity provided by electronic meeting systems may undermine source credibility and influence.}, + file = {/home/nathante/Zotero/storage/YE5MWGRF/Rains_2007_The Impact of Anonymity on Perceptions of Source Credibility and Influence in.pdf}, + journal = {Communication Research}, + language = {en}, + number = {1} +} + +@article{reagle_be_2010, + title = {``{{Be Nice}}'': {{Wikipedia}} Norms for Supportive Communication}, + shorttitle = {``{{Be Nice}}''}, + author = {Reagle, Joseph M.}, + year = {2010}, + month = apr, + volume = {16}, + pages = {161--180}, + publisher = {{Taylor \& Francis}}, + issn = {1361-4568}, + abstract = {Wikipedia is acknowledged to have been home to ``some bitter disputes.'' Indeed, conflict at Wikipedia is said to be ``as addictive as cocaine.'' Yet, such observations are not cynical commentary but motivation for a collection of social norms. These norms speak to the intentional stance and communicative behaviors Wikipedians should adopt when interacting with one another. In the following pages, I provide a survey of these norms on the English Wikipedia and argue that they can be characterized as supportive based on Jack Gibb's classic communication article ``Defensive Communication.''}, + annotation = {\_eprint: https://doi.org/10.1080/13614568.2010.498528}, + file = {/home/nathante/Zotero/storage/PVX7GGFJ/Jr_2010_“Be Nice”.pdf;/home/nathante/Zotero/storage/S68R73YN/13614568.2010.html}, + journal = {New Review of Hypermedia and Multimedia}, + keywords = {Collaboration,Communication,Prosocial,Supportive,Wikipedia}, + number = {1-2} +} + +@book{ridgeway_status:_2019, + title = {Status: Why Is It Everywhere? Why Does It Matter?}, + shorttitle = {Status}, + author = {Ridgeway, Cecilia L}, + year = {2019}, + abstract = {"Status is ubiquitous in modern life, yet our understanding of its role as a basic driver of inequality is surprisingly limited. In Status, sociologist and social psychologist Cecilia Ridgeway examines how this ancient and universal form of inequality influences today's ostensibly meritocratic institutions and why it matters. Ridgeway illuminates the complex ways in which status arises when people work together towards common goals, such as in classroom discussions, family decisions, or workplace deliberations. Ridgeway's research on status has important implications for our understanding of social inequality. Distinct from power or wealth, status is prized because it provides affirmation from others and affords access to valuable resources. Ridgeway demonstrates how the conferral of status inevitably leads to differing life outcomes for individuals, with impacts on pay, wealth creation, and health and wellbeing. Status beliefs are widely held views about who is better in society than others in terms of esteem, wealth, or competence. These beliefs ultimately confer advantages which can exacerbate social inequality. Ridgeway notes that status advantages based on race, gender, and class, such as the belief that white men are more competent than others because of their race and gender, have the greatest consequences for inequality by affording greater social and economic opportunities. Ridgeway argues that status beliefs make lower status groups less likely to challenge the status quo and greatly enhance higher status groups' ability to maintain their advantages in resources and access to positions of power. She illustrates how many lower status people, when given a baseline level of dignity and respect - being seen, for example, as poor but hardworking - will accept their lower status. She also shows that people remain willfully blind to status beliefs and their effects because recognizing them can lead to emotional discomfort. Acknowledging the insidious role of status in our lives would require many higher-status individuals to accept that they may not have succeeded based on their own merit; and many lower-status individuals would have to acknowledge that they may have been discriminated against. While Ridgeway notes the profound impact of status on society, she suggests that social inequality is not an inevitable consequence of our status beliefs. She shows how status beliefs can be undermined - as when we reject the idea that all racial and gender traits are fixed at birth, thus disrupting the idea that women and people of color are less competent than their male and white counterparts. Ridgeway both notes the profound impact of status on social inequality and charts a way forward that may allow it to have a less detrimental impact on our lives"--}, + annotation = {OCLC: 1104214327}, + file = {/home/nathante/Zotero/storage/C2RAV7ZL/Ridgeway_2019_Status.pdf}, + isbn = {978-1-61044-889-5}, + language = {English} +} + +@article{ritov_conditional_2017, + title = {On Conditional Parity as a Notion of Non-Discrimination in Machine Learning}, + author = {Ritov, Ya'acov and Sun, Yuekai and Zhao, Ruofei}, + year = {2017}, + month = jun, + abstract = {We identify conditional parity as a general notion of nondiscrimination in machine learning. In fact, several recently proposed notions of non-discrimination, including a few counterfactual notions, are instances of conditional parity. We show that conditional parity is amenable to statistical analysis by studying randomization as a general mechanism for achieving conditional parity and a kernel-based test of conditional parity.}, + archivePrefix = {arXiv}, + eprint = {1706.08519}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/UY744QDP/Ritov et al. - 2017 - On conditional parity as a notion of non-discrimin.pdf}, + journal = {arXiv:1706.08519 [cs, stat]}, + keywords = {Computer Science - Computers and Society,Computer Science - Machine Learning,Statistics - Machine Learning}, + language = {en}, + primaryClass = {cs, stat} +} + +@article{roberts_commercial_2016, + title = {Commercial {{Content Moderation}}: {{Digital Laborers}}' {{Dirty Work}}}, + shorttitle = {Commercial {{Content Moderation}}}, + author = {Roberts, Sarah}, + year = {2016}, + month = jan, + file = {/home/nathante/Zotero/storage/4B6A3N4K/12.html}, + journal = {Media Studies Publications} +} + +@inproceedings{roy_automation_2019, + title = {Automation {{Accuracy Is Good}}, but {{High Controllability May Be Better}}}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} - {{CHI}} '19}, + author = {Roy, Quentin and Zhang, Futian and Vogel, Daniel}, + year = {2019}, + pages = {1--8}, + publisher = {{ACM Press}}, + address = {{Glasgow, Scotland Uk}}, + abstract = {When automating tasks using some form of artificial intelligence, some inaccuracy in the result is virtually unavoidable. In many cases, the user must decide whether to try the automated method again, or fix it themselves using the available user interface. We argue this decision is influenced by both perceived automation accuracy and degree of task ``controllability'' (how easily and to what extent an automated result can be manually modified). This relationship between accuracy and controllability is investigated in a 750-participant crowdsourced experiment using a controlled, gamified task. With high controllability, self-reported satisfaction remained constant even under very low accuracy conditions, and overall, a strong preference was observed for using manual control rather than automation, despite much slower performance and regardless of very poor controllability.}, + file = {/home/nathante/Zotero/storage/LXKZGKMW/Roy et al. - 2019 - Automation Accuracy Is Good, but High Controllabil.pdf}, + isbn = {978-1-4503-5970-2}, + language = {en} +} + +@article{rubin_for_2008, + title = {For Objective Causal Inference, Design Trumps Analysis}, + author = {Rubin, Donald B.}, + year = {2008}, + month = sep, + volume = {2}, + pages = {808--840}, + issn = {1932-6157, 1941-7330}, + abstract = {For obtaining causal inferences that are objective, and therefore have the best chance of revealing scientific truths, carefully designed and executed randomized experiments are generally considered to be the gold standard. Observational studies, in contrast, are generally fraught with problems that compromise any claim for objectivity of the resulting causal inferences. The thesis here is that observational studies have to be carefully designed to approximate randomized experiments, in particular, without examining any final outcome data. Often a candidate data set will have to be rejected as inadequate because of lack of data on key covariates, or because of lack of overlap in the distributions of key covariates between treatment and control groups, often revealed by careful propensity score analyses. Sometimes the template for the approximating randomized experiment will have to be altered, and the use of principal stratification can be helpful in doing this. These issues are discussed and illustrated using the framework of potential outcomes to define causal effects, which greatly clarifies critical issues.}, + file = {/home/nathante/Zotero/storage/BYTQG6JP/euclid.aoas.1223908042.pdf;/home/nathante/Zotero/storage/NYNCKBQL/1223908042.html}, + journal = {The Annals of Applied Statistics}, + keywords = {Average causal effect,causal effects,complier average causal effect,instrumental variables,noncompliance,observational studies,propensity scores,randomized experiments,Rubin Causal Model}, + language = {EN}, + mrnumber = {MR2516795}, + number = {3}, + zmnumber = {1149.62089} +} + +@article{salganik_experimental_2006, + title = {Experimental {{Study}} of {{Inequality}} and {{Unpredictability}} in an {{Artificial Cultural Market}}}, + author = {Salganik, Matthew J. and Dodds, Peter Sheridan and Watts, Duncan J.}, + year = {2006}, + month = feb, + volume = {311}, + pages = {854--856}, + issn = {0036-8075, 1095-9203}, + abstract = {Hit songs, books, and movies are many times more successful than average, suggesting that ``the best'' alternatives are qualitatively different from ``the rest''; yet experts routinely fail to predict which products will succeed. We investigated this paradox experimentally, by creating an artificial ``music market'' in which 14,341 participants downloaded previously unknown songs either with or without knowledge of previous participants' choices. Increasing the strength of social influence increased both inequality and unpredictability of success. Success was also only partly determined by quality: The best songs rarely did poorly, and the worst rarely did well, but any other result was possible. Access to information about other people's musical choices changes one's own selections, exaggerating the market success of certain songs and introducing uncertainty. Access to information about other people's musical choices changes one's own selections, exaggerating the market success of certain songs and introducing uncertainty.}, + copyright = {American Association for the Advancement of Science}, + file = {/home/nathante/Zotero/storage/LA47MMBB/854.html}, + journal = {Science}, + keywords = {experiment,preferential attachment,quantitative}, + language = {en}, + number = {5762}, + pmid = {16469928} +} + +@inproceedings{sap_risk_2019, + ids = {sap\_risk\_2019-1,sap\_risk\_2019-2}, + title = {The {{Risk}} of {{Racial Bias}} in {{Hate Speech Detection}}}, + booktitle = {Proceedings of the 57th {{Annual Meeting}} of the {{Association}} for {{Computational Linguistics}}}, + author = {Sap, Maarten and Card, Dallas and Gabriel, Saadia and Choi, Yejin and Smith, Noah A.}, + year = {2019}, + pages = {1668--1678}, + publisher = {{Association for Computational Linguistics}}, + address = {{Florence, Italy}}, + abstract = {We investigate how annotators' insensitivity to differences in dialect can lead to racial bias in automatic hate speech detection models, potentially amplifying harm against minority populations. We first uncover unexpected correlations between surface markers of African American English (AAE) and ratings of toxicity in several widely-used hate speech datasets. Then, we show that models trained on these corpora acquire and propagate these biases, such that AAE tweets and tweets by self-identified African Americans are up to two times more likely to be labelled as offensive compared to others. Finally, we propose dialect and race priming as ways to reduce the racial bias in annotation, showing that when annotators are made explicitly aware of an AAE tweet's dialect they are significantly less likely to label the tweet as offensive.}, + file = {/home/nathante/Zotero/storage/4D2JJUPH/Sap et al. - 2019 - The Risk of Racial Bias in Hate Speech Detection.pdf;/home/nathante/Zotero/storage/5LZ3DBQ6/Sap et al. - 2019 - The Risk of Racial Bias in Hate Speech Detection.pdf;/home/nathante/Zotero/storage/Y82GFA8Q/Sap et al. - 2019 - The Risk of Racial Bias in Hate Speech Detection.pdf}, + language = {en} +} + +@article{sarabadani_building_2017, + title = {Building Automated Vandalism Detection Tools for {{Wikidata}}}, + author = {Sarabadani, Amir and Halfaker, Aaron and Taraborelli, Dario}, + year = {2017}, + pages = {1647--1654}, + abstract = {Wikidata, like Wikipedia, is a knowledge base that anyone can edit. This open collaboration model is powerful in that it reduces barriers to participation and allows a large number of people to contribute. However, it exposes the knowledge base to the risk of vandalism and low-quality contributions. In this work, we build on past work detecting vandalism in Wikipedia to detect vandalism in Wikidata. This work is novel in that identifying damaging changes in a structured knowledge-base requires substantially different feature engineering work than in a text-based wiki like Wikipedia. We also discuss the utility of these classifiers for reducing the overall workload of vandalism patrollers in Wikidata. We describe a machine classification strategy that is able to catch 89\% of vandalism while reducing patrollers' workload by 98\%, by drawing lightly from contextual features of an edit and heavily from the characteristics of the user making the edit.}, + archivePrefix = {arXiv}, + eprint = {1703.03861}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/4JQHA2ED/Sarabadani et al. - 2017 - Building automated vandalism detection tools for W.pdf}, + journal = {Proceedings of the 26th International Conference on World Wide Web Companion - WWW '17 Companion}, + keywords = {Computer Science - Computers and Society,Computer Science - Information Retrieval}, + language = {en} +} + +@article{schmidt_taking_1992, + title = {Taking {{CSCW}} Seriously}, + author = {Schmidt, Kjeld and Bannon, Liam}, + year = {1992}, + month = mar, + volume = {1}, + pages = {7--40}, + issn = {0925-9724, 1573-7551}, + abstract = {The topic of Computer Supported Cooperative Work (CSCW) has attracted much attention in the last few years. While the field is obviously still in the process of development, there is a marked ambiguity about the exact focus of the field. This lack of focus may hinder its further development and lead to its dissipation. In this paper we set out an approach to CSCW as a field of research which we believe provides a coherent conceptual framework for this area, suggesting that it should be concerned with thesupport requirements of cooperative work arrangements. This provides a more principled, comprehensive, and, in our opinion, more useful conception of the field than that provided by the conception of CSCW as being focused on computer support for groups. We then investigate the consequences of taking this alternative conception seriously, in terms of research directions for the field. As an indication of the fruits of this approach, we discuss the concept of `articulation work' and its relevance to CSCW. This raises a host of interesting problems that are marginalized in the work on small group support but critical to the success of CSCW systems `in the large', i. e., that are designed to meet current work requirements in the everyday world.}, + file = {/home/nathante/Zotero/storage/NSNR98XF/Schmidt and Bannon - 1992 - Taking CSCW seriously.pdf;/home/nathante/Zotero/storage/Y6SQA5JM/Schmidt and Bannon - 1992 - Taking CSCW seriously.html}, + journal = {Computer Supported Cooperative Work (CSCW)}, + language = {en}, + number = {1-2} +} + +@unpublished{schneider_admins_2019, + title = {Admins, {{Mods}}, and {{Benevolent Dictators}} for {{Life}}: {{The Implicit Feudalism}} of {{Online Communities}}}, + shorttitle = {Admins, {{Mods}}, and {{Benevolent Dictators}} for {{Life}}}, + author = {Schneider, Nathan}, + year = {2019}, + month = jul, + abstract = {This essay considers how social networks train users to interact with each other through certain widespread interface designs. I argue that an ``implicit feudalism'' informs the available options for community management on the Internet's most popular platforms for online communities. This pattern grants user-administrators absolutist reign over their fiefdoms, with competition among them as the primary mechanism for quality control, under rules set by the meta-absolutism of platform companies. Through experience in communities so constituted, users may be learning to trust effective absolutism, even if it is relatively rare, and distrust their own capacity for self-governance. In light of alternative management mechanisms with more democratic features, it becomes all the more clear that implicit feudalism is not a necessary condition. Hosted on the Open Science Framework}, + file = {/home/nathante/Zotero/storage/GH4X82K9/gxu3a.html;/home/nathante/Zotero/storage/IQZ2AWBY/gxu3a.html}, + language = {en} +} + +@article{schulman_effect_1999, + title = {The {{Effect}} of {{Race}} and {{Sex}} on {{Physicians}}' {{Recommendations}} for {{Cardiac Catheterization}}}, + author = {Schulman, Kevin A. and Berlin, Jesse A. and Harless, William and Kerner, Jon F. and Sistrunk, Shyrl and Gersh, Bernard J. and Dub{\'e}, Ross and Taleghani, Christopher K. and Burke, Jennifer E. and Williams, Sankey and Eisenberg, John M. and Ayers, William and Escarce, Jos{\'e} J.}, + year = {1999}, + month = feb, + volume = {340}, + pages = {618--626}, + issn = {0028-4793}, + abstract = {Epidemiologic studies have identified differences according to race and sex in the treatment of patients with cardiovascular disease in the United States.1\textendash 18 Some studies have found that blacks and women are less likely than whites and men, respectively, to undergo cardiac catheterization or coronary-artery bypass graft surgery when they are admitted to the hospital for treatment of chest pain or myocardial infarction.1\textendash 5,7,8,10,11,13,14 In contrast, other studies were unable to confirm that invasive procedures are underused in women.15,16 Racial differences in the treatment of cardiovascular disease may be explained by financial and . . .}, + file = {/home/nathante/Zotero/storage/23KCL9UU/Schulman et al. - 1999 - The Effect of Race and Sex on Physicians' Recommen.pdf;/home/nathante/Zotero/storage/X9RBAFGG/NEJM199902253400806.html}, + journal = {New England Journal of Medicine}, + number = {8}, + pmid = {10029647} +} + +@misc{schwartz_untold_2019, + title = {Untold {{History}} of {{AI}}: {{Algorithmic Bias Was Born}} in the 1980s}, + shorttitle = {Untold {{History}} of {{AI}}}, + author = {Schwartz, Oscar}, + year = {2019}, + month = apr, + abstract = {A medical school thought a computer program would make the admissions process fairer\textemdash but it did just the opposite}, + file = {/home/nathante/Zotero/storage/BA8YYSU2/untold-history-of-ai-the-birth-of-machine-bias.html}, + howpublished = {https://spectrum.ieee.org/tech-talk/tech-history/dawn-of-electronics/untold-history-of-ai-the-birth-of-machine-bias}, + journal = {IEEE Spectrum: Technology, Engineering, and Science News}, + language = {en} +} + +@book{scott_seeing_1998, + title = {Seeing like a State: How Certain Schemes to Improve the Human Condition Have Failed}, + shorttitle = {Seeing like a State}, + author = {Scott, James C}, + year = {1998}, + publisher = {{Yale University Press}}, + address = {{New Haven}}, + abstract = {In this wide-ranging and original book, James C. Scott analyzes failed cases of large-scale authoritarian plans in a variety of fields. He argues that centrally managed social plans derail when they impose schematic visions that do violence to complex interdependencies that are not - and cannot be - fully understood. Further the success of designs for social organization depends on the recognition that local, practical knowledge is as important as formal, epistemic knowledge. The author builds a persuasive case against "development theory" and imperialistic state planning that disregards the values, desires, and objections of its subjects. And in discussing these planning disasters, he identifies four conditions common to them all: the state's attempt to impose administrative order on nature and society; a high-modernist ideology that believes scientific intervention can improve every aspect of human life; a willingness to use authoritarian state power to effect large-scale innovations; and a prostrate civil society that cannot effectively resist such plans.}, + annotation = {OCLC: 37392803}, + isbn = {978-0-300-07016-3 978-0-300-07815-2}, + language = {English} +} + +@article{seering_moderator_2019, + title = {Moderator Engagement and Community Development in the Age of Algorithms}, + author = {Seering, Joseph and Wang, Tony and Yoon, Jina and Kaufman, Geoff}, + year = {2019}, + month = jul, + volume = {21}, + pages = {1417--1443}, + issn = {1461-4448}, + abstract = {Online communities provide a forum for rich social interaction and identity development for billions of Internet users worldwide. In order to manage these communities, platform owners have increasingly turned to commercial content moderation, which includes both the use of moderation algorithms and the employment of professional moderators, rather than user-driven moderation, to detect and respond to anti-normative behaviors such as harassment and spread of offensive content. We present findings from semi-structured interviews with 56 volunteer moderators of online communities across three platforms (Twitch, Reddit, and Facebook), from which we derived a generalized model categorizing the ways moderators engage with their communities and explaining how these communities develop as a result. This model contains three processes: being and becoming a moderator; moderation tasks, actions, and responses; and rules and community development. In this work, we describe how moderators contribute to the development of meaningful communities, both with and without algorithmic support.}, + file = {/home/nathante/Zotero/storage/8PPNBZNM/Seering et al_2019_Moderator engagement and community development in the age of algorithms.pdf}, + journal = {New Media \& Society}, + keywords = {Facebook,governance,moderation,online communities,platforms,Reddit,social networks,Twitch}, + language = {en}, + number = {7} +} + +@inproceedings{seering_shaping_2017, + title = {Shaping {{Pro}} and {{Anti}}-{{Social Behavior}} on {{Twitch Through Moderation}} and {{Example}}-{{Setting}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Seering, Joseph and Kraut, Robert and Dabbish, Laura}, + year = {2017}, + pages = {111--125}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Online communities have the potential to be supportive, cruel, or anywhere in between. The development of positive norms for interaction can help users build bonds, grow, and learn. Using millions of messages sent in Twitch chatrooms, we explore the effectiveness of methods for encouraging and discouraging specific behaviors, including taking advantage of imitation effects through setting positive examples and using moderation tools to discourage antisocial behaviors. Consistent with aspects of imitation theory and deterrence theory, users imitated examples of behavior that they saw, and more so for behaviors from high status users. Proactive moderation tools, such as chat modes which restricted the ability to post certain content, proved effective at discouraging spam behaviors, while reactive bans were able to discourage a wider variety of behaviors. This work considers the intersection of tools, authority, and types of behaviors, offering a new frame through which to consider the development of moderation strategies.}, + file = {/home/nathante/Zotero/storage/83NV5M5M/Seering et al_2017_Shaping Pro and Anti-Social Behavior on Twitch Through Moderation and.pdf}, + isbn = {978-1-4503-4335-0}, + keywords = {authority and imitation,chatroom behavior,moderation strategies}, + series = {{{CSCW}} '17} +} + +@inproceedings{selbst_fairness_2019, + title = {Fairness and {{Abstraction}} in {{Sociotechnical Systems}}}, + booktitle = {Proceedings of the {{Conference}} on {{Fairness}}, {{Accountability}}, and {{Transparency}}}, + author = {Selbst, Andrew D. and Boyd, Danah and Friedler, Sorelle A. and Venkatasubramanian, Suresh and Vertesi, Janet}, + year = {2019}, + pages = {59--68}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {A key goal of the fair-ML community is to develop machine-learning based systems that, once introduced into a social context, can achieve social and legal outcomes such as fairness, justice, and due process. Bedrock concepts in computer science---such as abstraction and modular design---are used to define notions of fairness and discrimination, to produce fairness-aware learning algorithms, and to intervene at different stages of a decision-making pipeline to produce "fair" outcomes. In this paper, however, we contend that these concepts render technical interventions ineffective, inaccurate, and sometimes dangerously misguided when they enter the societal context that surrounds decision-making systems. We outline this mismatch with five "traps" that fair-ML work can fall into even as it attempts to be more context-aware in comparison to traditional data science. We draw on studies of sociotechnical systems in Science and Technology Studies to explain why such traps occur and how to avoid them. Finally, we suggest ways in which technical designers can mitigate the traps through a refocusing of design in terms of process rather than solutions, and by drawing abstraction boundaries to include social actors rather than purely technical ones.}, + file = {/home/nathante/Zotero/storage/WCZTE4DH/Selbst et al. - 2019 - Fairness and Abstraction in Sociotechnical Systems.pdf}, + isbn = {978-1-4503-6125-5}, + keywords = {Fairness-aware Machine Learning,Interdisciplinary,Sociotechnical Systems}, + series = {{{FAT}}* '19} +} + +@article{selbst_intuitive_2018, + title = {The {{Intuitive Appeal}} of {{Explainable Machines}}}, + author = {Selbst, Andrew D. and Barocas, Solon}, + year = {2018}, + volume = {87}, + pages = {1085}, + file = {/home/nathante/Zotero/storage/F7644TYK/LandingPage.html}, + journal = {Fordham Law Review} +} + +@article{shane-simpson_examining_2017, + title = {Examining Potential Mechanisms Underlying the {{Wikipedia}} Gender Gap through a Collaborative Editing Task}, + author = {{Shane-Simpson}, Christina and {Gillespie-Lynch}, Kristen}, + year = {2017}, + month = jan, + volume = {66}, + pages = {312--328}, + issn = {0747-5632}, + abstract = {Research has identified a significant gender gap on the online encyclopedia, Wikipedia. The current research used a mixed experimental (type of feedback) and quasi-experimental (gender) design to examine the editing behaviors of college students during a public, collaborative editing task to identify potential factors underlying the Wikipedia gender gap. Overall, women edited more than men. However, in the editing condition most akin to Wikipedia, wherein female peer editors were underrepresented in the essay edits and feedback from peers was neutral, men trended towards adding more content than woman. Women added more content than men in this male-dominated essay condition when peer editors modeled constructive feedback. Although the type of edits from peer editors was counterbalanced, participants typically viewed an anonymous peer editor as male. Women viewed the anonymous editor as more critical of the participant's own work when compared with a gender-neutral peer editor. These results suggest that visible female editors on Wikipedia and broader encouragement of the use of constructive feedback may begin to alleviate the Wikipedia gender gap. Furthermore, the relatively high proportion of anonymous editors may exacerbate the Wikipedia gender gap, as anonymity may often be perceived as male and more critical.}, + journal = {Computers in Human Behavior}, + keywords = {communication,editing,gender,Online,wikipedia} +} + +@article{shaw_laboratories_2014, + title = {Laboratories of Oligarchy? {{How}} the Iron Law Extends to Peer Production}, + shorttitle = {Laboratories of {{Oligarchy}}?}, + author = {Shaw, Aaron and Hill, Benjamin Mako}, + year = {2014}, + volume = {64}, + pages = {215--238}, + issn = {1460-2466}, + abstract = {Peer production projects like Wikipedia have inspired voluntary associations, collectives, social movements, and scholars to embrace open online collaboration as a model of democratic organization. However, many peer production projects exhibit entrenched leadership and deep inequalities, suggesting that they may not fulfill democratic ideals. Instead, peer production projects may conform to Robert Michels' ``iron law of oligarchy,'' which proposes that democratic membership organizations become increasingly oligarchic as they grow. Using exhaustive data of internal processes from a sample of 683 wikis, we construct empirical measures of participation and test for increases in oligarchy associated with growth in wikis' contributor bases. In contrast to previous studies, we find support for Michels' iron law and conclude that peer production entails oligarchic organizational forms.}, + file = {/home/nathante/Zotero/storage/LQMKLGGI/Shaw_Hill_2014_Laboratories of oligarchy.pdf;/home/nathante/Zotero/storage/NIVQUNQI/full.html}, + journal = {Journal of Communication}, + language = {en}, + number = {2} +} + +@article{shi_wisdom_2019, + title = {The Wisdom of Polarized Crowds}, + author = {Shi, Feng and Teplitskiy, Misha and Duede, Eamon and Evans, James A.}, + year = {2019}, + month = mar, + pages = {1}, + issn = {2397-3374}, + abstract = {This article explores the effect of ideological polarization on team performance. By analysing millions of edits to Wikipedia, the authors reveal that politically diverse editor teams produce higher-quality articles than homogeneous or moderate teams, and they identify the mechanisms responsible for producing these superior articles.}, + copyright = {2019 The Author(s), under exclusive licence to Springer Nature Limited}, + file = {/home/nathante/Zotero/storage/8R8GLJHI/Shi et al. - 2019 - The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/Y4VUX8VQ/s41562-019-0541-6.html}, + journal = {Nature Human Behaviour}, + language = {En} +} + +@article{shorey_automation_2016, + title = {Automation, {{Algorithms}}, and {{Politics}}| {{Automation}}, {{Big Data}} and {{Politics}}: {{A Research Review}}}, + shorttitle = {Automation, {{Algorithms}}, and {{Politics}}| {{Automation}}, {{Big Data}} and {{Politics}}}, + author = {Shorey, Samantha and Howard, Philip N.}, + year = {2016}, + month = oct, + volume = {10}, + pages = {24}, + issn = {1932-8036}, + copyright = {The IJoC is an academic journal. As such, it is dedicated to the open exchange of information. For this reason, IJoC is freely available to individuals and institutions. Copies of this journal or articles in this journal may be distributed for research or educational purposes free of charge and without permission. However, commercial use of the IJoC website or the articles contained herein is expressly prohibited without the written consent of the editor. Authors who publish in The International Journal of Communication will release their articles under the Creative Commons Attribution Non-Commercial No Derivatives (by-nc-nd) license . This license allows anyone to copy and distribute the article for non-commercial purposes provided that appropriate attribution is given. For details of the rights authors grants users of their work, see the "human-readable summary" of the license , with a link to the full license. (Note that "you" refers to a user, not an author, in the summary.) This journal utilizes the LOCKSS system to create a distributed archiving system among participating libraries and permits those libraries to create permanent archives of the journal for purposes of preservation and restoration. The publisher perpetually authorizes participants in the LOCKSS system to archive and restore our publication through the LOCKSS System for the benefit of all LOCKSS System participants. Specifically participating libraries may: Collect and preserve currently accessible materials; Use material consistent with original license terms; Provide copies to other LOCKSS appliances for purposes of audit and repair. ~ Fair Use The U.S. Copyright Act of 1976 specifies, in Section 107, the terms of the Fair Use exception: Notwithstanding the provisions of sections 106 and 106A, the fair use of a copyrighted work, including such use by reproduction in copies or phonorecords or by any other means specified by that section, for purposes such as criticism, comment, news reporting, teaching (including multiple copies for classroom use), scholarship, or research, is not an infringement of copyright. In determining whether the use made of a work in any particular case is a fair use the factors to be considered shall include: the purpose and character of the use, including whether such use is of a commercial nature or is for nonprofit educational purposes; the nature of the copyrighted work; the amount and substantiality of the portion used in relation to the copyrighted work as a whole; \& the effect of the use upon the potential market for or value of the copyrighted work. The fact that a work is unpublished shall not itself bar a finding of fair use if such finding is made upon consideration of all the above factors. In accord with these provisions, the International Journal of Communication believes in the vigorous assertion and defense of Fair Use by scholars engaged in academic research, teaching and non-commercial publishing. Thus, we view the inclusion of ``quotations'' from existing print, visual, audio and audio-visual texts to be appropriate examples of Fair Use, as are reproductions of visual images for the purpose of scholarly analysis. We encourage authors to obtain appropriate permissions to use materials originally produced by others, but do not require such permissions as long as the usage of such materials falls within the boundaries of Fair Use.}, + file = {/home/nathante/Zotero/storage/PMSSUCGW/Shorey and Howard - 2016 - Automation, Algorithms, and Politics Automation, .pdf;/home/nathante/Zotero/storage/YP4YRTRI/1812.html}, + journal = {International Journal of Communication}, + keywords = {algorithms,automation,big data,critical,literature review,politics}, + language = {en}, + number = {0} +} + +@article{smith_keeping_nodate, + title = {Keeping {{Community}} in the {{Loop}}: {{Understanding Wikipedia Stakeholder Values}} for {{Machine Learning}}-{{Based Systems}}}, + author = {Smith, C Estelle and Yu, Bowen and Srivastava, Anjali and Halfaker, Aaron and Terveen, Loren and Zhu, Haiyi}, + pages = {13}, + abstract = {On Wikipedia, sophisticated algorithmic tools are used to assess the quality of edits and take corrective actions. However, algorithms can fail to solve the problems they were designed for if they conflict with the values of communities who use them. In this study, we take a Value-Sensitive Algorithm Design approach to understanding a community-created and -maintained machine learning-based algorithm called the Objective Revision Evaluation System (ORES)\textemdash a quality prediction system used in numerous Wikipedia applications and contexts. Five major values converged across stakeholder groups that ORES (and its dependent applications) should: (1) reduce the effort of community maintenance, (2) maintain human judgement as the final authority, (3) support differing peoples' differing workflows, (4) encourage positive engagement with diverse editor groups, and (5) establish trustworthiness of people and algorithms within the community. We reveal tensions between these values and discuss implications for future research to improve algorithms like ORES.}, + file = {/home/nathante/Zotero/storage/387TK32Q/Smith et al. - Keeping Community in the Loop Understanding Wikip.pdf}, + language = {en} +} + +@article{spears_computer-mediated_2009, + title = {Computer-Mediated Communication and Social Identity}, + author = {Spears, Russell and Lea, Martin and Postmes, Tom}, + year = {2009}, + month = feb, + abstract = {This article argues that social identities not only populate computer-mediated communication (CMC) and the Internet, but they often thrive there, both by designation (of identity: the cognitive dimension) and by design (the strategic dimension in which identities and their agendas are contested). This means that far from being eliminated in CMC, the group and its effects often shine through in CMC (intragroup cohesiveness and conformity, intergroup contrast, and competition). In terms of status and power differentials this can mean that the power and status relations associated with categories are reinforced, both cognitively, by being tied to the roles and relations associated with these identities, and strategically, by the surveillance which CMC can sometimes bring.}, + file = {/home/nathante/Zotero/storage/F6XDCAGP/oxfordhb-9780199561803-e-017.html}, + journal = {Oxford Handbook of Internet Psychology}, + language = {en} +} + +@article{srinivasan_content_2019, + title = {Content {{Removal As}} a {{Moderation Strategy}}: {{Compliance}} and {{Other Outcomes}} in the {{ChangeMyView Community}}}, + shorttitle = {Content {{Removal As}} a {{Moderation Strategy}}}, + author = {Srinivasan, Kumar Bhargav and {Danescu-Niculescu-Mizil}, Cristian and Lee, Lillian and Tan, Chenhao}, + year = {2019}, + month = nov, + volume = {3}, + pages = {163:1--163:21}, + issn = {2573-0142}, + abstract = {Moderators of online communities often employ comment deletion as a tool. We ask here whether, beyond the positive effects of shielding a community from undesirable content, does comment removal actually cause the behavior of the comment's author to improve? We examine this question in a particularly well-moderated community, the ChangeMyView subreddit. The standard analytic approach of interrupted time-series analysis unfortunately cannot answer this question of causality because it fails to distinguish the effect of having made a non-compliant comment from the effect of being subjected to moderator removal of that comment. We therefore leverage a "delayed feedback" approach based on the observation that some users may remain active between the time when they posted the non-compliant comment and the time when that comment is deleted. Applying this approach to such users, we reveal the causal role of comment deletion in reducing immediate noncompliance rates, although we do not find evidence of it having a causal role in inducing other behavior improvements. Our work thus empirically demonstrates both the promise and some potential limits of content removal as a positive moderation strategy, and points to future directions for identifying causal effects from observational data.}, + file = {/home/nathante/Zotero/storage/YHIKXIHS/Srinivasan et al_2019_Content Removal As a Moderation Strategy.pdf}, + journal = {Proc. ACM Hum.-Comput. Interact.}, + keywords = {changemyview,content moderation,delayed feedback,interrupted time-series analysis,quasi-experimental designs,reddit,time series}, + number = {CSCW} +} + +@techreport{stevenson_algorithmic_2019, + title = {Algorithmic {{Risk Assessment}} in the {{Hands}} of {{Humans}}}, + author = {Stevenson, Megan T. and Doleac, Jennifer L.}, + year = {2019}, + month = nov, + address = {{Rochester, NY}}, + institution = {{Social Science Research Network}}, + abstract = {We evaluate the impacts of adopting algorithmic predictions of future offending (risk assessments) as an aid to judicial discretion in felony sentencing. We find that judges' decisions are influenced by the risk score, leading to longer sentences for defendants with higher scores and shorter sentences for those with lower scores. However, we find no robust evidence that this reshuffling led to a decline in recidivism, and, over time, judges appeared to use the risk scores less. Risk assessment's failure to reduce recidivism is at least partially explained by judicial discretion in its use. Judges systematically grant leniency to young defendants, despite their high risk of reoffending. This is in line with a long standing practice of treating youth as a mitigator in sentencing, due to lower perceived culpability. Such a conflict in goals may have led prior studies to overestimate the extent to which judges make prediction errors. Since one of the most important inputs to the risk score is effectively off-limits, risk assessment's expected benefits are curtailed. We find no evidence that risk assessment affected racial disparities statewide, although there was a relative increase in sentences for black defendants in courts that appeared to use risk assessment most. We conduct simulations to evaluate how race and age disparities would have changed if judges had fully complied with the sentencing recommendations associated with the algorithm. Racial disparities might have increased slightly, but the largest change would have been higher relative incarceration rates for defendants under the age of 23. In the context of contentious public discussions about algorithms, our results highlight the importance of thinking about how man and machine interact.}, + file = {/home/nathante/Zotero/storage/XZBLCTY3/papers.html}, + keywords = {algorithms,risk assessment,sentencing}, + language = {en}, + number = {ID 3489440}, + type = {{{SSRN Scholarly Paper}}} +} + +@techreport{stevenson_algorithmic_2019-1, + title = {Algorithmic {{Risk Assessment}} in the {{Hands}} of {{Humans}}}, + author = {Stevenson, Megan T. and Doleac, Jennifer L.}, + year = {2019}, + month = nov, + address = {{Rochester, NY}}, + institution = {{Social Science Research Network}}, + abstract = {We evaluate the impacts of adopting algorithmic predictions of future offending (risk assessments) as an aid to judicial discretion in felony sentencing. We find that judges' decisions are influenced by the risk score, leading to longer sentences for defendants with higher scores and shorter sentences for those with lower scores. However, we find no robust evidence that this reshuffling led to a decline in recidivism, and, over time, judges appeared to use the risk scores less. Risk assessment's failure to reduce recidivism is at least partially explained by judicial discretion in its use. Judges systematically grant leniency to young defendants, despite their high risk of reoffending. This is in line with a long standing practice of treating youth as a mitigator in sentencing, due to lower perceived culpability. Such a conflict in goals may have led prior studies to overestimate the extent to which judges make prediction errors. Since one of the most important inputs to the risk score is effectively off-limits, risk assessment's expected benefits are curtailed. We find no evidence that risk assessment affected racial disparities statewide, although there was a relative increase in sentences for black defendants in courts that appeared to use risk assessment most. We conduct simulations to evaluate how race and age disparities would have changed if judges had fully complied with the sentencing recommendations associated with the algorithm. Racial disparities might have increased slightly, but the largest change would have been higher relative incarceration rates for defendants under the age of 23. In the context of contentious public discussions about algorithms, our results highlight the importance of thinking about how man and machine interact.}, + file = {/home/nathante/Zotero/storage/Y8RAGMH9/papers.html}, + keywords = {algorithms,risk assessment,sentencing}, + language = {en}, + number = {ID 3489440}, + type = {{{SSRN Scholarly Paper}}} +} + +@article{stevenson_assessing_2017, + title = {Assessing {{Risk Assessment}} in {{Action}}}, + author = {Stevenson, Megan T.}, + year = {2017}, + issn = {1556-5068}, + file = {/home/nathante/Zotero/storage/TR5NVDVW/Stevenson - 2017 - Assessing Risk Assessment in Action.pdf}, + journal = {SSRN Electronic Journal}, + language = {en} +} + +@article{stuart_using_2014, + title = {Using Propensity Scores in Difference-in-Differences Models to Estimate the Effects of a Policy Change}, + author = {Stuart, Elizabeth A. and Huskamp, Haiden A. and Duckworth, Kenneth and Simmons, Jeffrey and Song, Zirui and Chernew, Michael and Barry, Colleen L.}, + year = {2014}, + month = dec, + volume = {14}, + pages = {166--182}, + issn = {1387-3741}, + abstract = {Difference-in-difference (DD) methods are a common strategy for evaluating the effects of policies or programs that are instituted at a particular point in time, such as the implementation of a new law. The DD method compares changes over time in a group unaffected by the policy intervention to the changes over time in a group affected by the policy intervention, and attributes the ``difference-in-differences'' to the effect of the policy. DD methods provide unbiased effect estimates if the trend over time would have been the same between the intervention and comparison groups in the absence of the intervention. However, a concern with DD models is that the program and intervention groups may differ in ways that would affect their trends over time, or their compositions may change over time. Propensity score methods are commonly used to handle this type of confounding in other non-experimental studies, but the particular considerations when using them in the context of a DD model have not been well investigated. In this paper, we describe the use of propensity scores in conjunction with DD models, in particular investigating a propensity score weighting strategy that weights the four groups (defined by time and intervention status) to be balanced on a set of characteristics. We discuss the conceptual issues associated with this approach, including the need for caution when selecting variables to include in the propensity score model, particularly given the multiple time point nature of the analysis. We illustrate the ideas and method with an application estimating the effects of a new payment and delivery system innovation (an accountable care organization model called the ``Alternative Quality Contract'' (AQC) implemented by Blue Cross Blue Shield of Massachusetts) on health plan enrollee out-of-pocket mental health service expenditures. We find no evidence that the AQC affected out-of-pocket mental health service expenditures of enrollees.}, + journal = {Health services \& outcomes research methodology}, + number = {4}, + pmcid = {PMC4267761}, + pmid = {25530705} +} + +@article{suchman_working_2013, + title = {Working Relations of Technology Production and Use}, + author = {Suchman, Lucy}, + year = {2013}, + month = aug, + volume = {2}, + pages = {21--39}, + issn = {0925-9724, 1573-7551}, + abstract = {This paper explores the relevance of recent feminist reconstructions of objectivity for the development of alternative visions of technology production and use. I take as my starting place the working relations that make up the design and use of technical systems. Working relations are understood as networks or webs of connections that sustain the visible and invisible work required to construct coherent technologies and put them into use. I outline the boundaries that characterize current relations of development and use, and the boundary crossings required to transform them. Three contrasting premises for design-the view from nowhere, detached engagement, and located accountability \textemdash{} are taken to represent incommensurate alternatives for a politics of professional design. From the position of located accountability, I close by sketching aspects of what a feminist politics and associated practices of system development could be.}, + annotation = {00374}, + file = {/home/nathante/Zotero/storage/XPP8FM37/Suchman - 2013 - Working relations of technology production and use.pdf;/home/nathante/Zotero/storage/7JE5HCEM/BF00749282.html}, + journal = {Computer Supported Cooperative Work}, + keywords = {Computer Science; general,Design Practice,Feminist Epistemology,Interdisciplinary Studies,Psychology; general,Social Boundaries,Social Sciences; general,Systems Development,User Interfaces and Human Computer Interaction,Work-oriented Design}, + language = {en}, + number = {1-2} +} + +@article{suresh_framework_2019, + title = {A {{Framework}} for {{Understanding Unintended Consequences}} of {{Machine Learning}}}, + author = {Suresh, Harini and Guttag, John V.}, + year = {2019}, + month = jan, + abstract = {As machine learning increasingly affects people and society, it is important that we strive for a comprehensive and unified understanding of how and why unwanted consequences arise. For instance, downstream harms to particular groups are often blamed on "biased data," but this concept encompass too many issues to be useful in developing solutions. In this paper, we provide a framework that partitions sources of downstream harm in machine learning into five distinct categories spanning the data generation and machine learning pipeline. We describe how these issues arise, how they are relevant to particular applications, and how they motivate different solutions. In doing so, we aim to facilitate the development of solutions that stem from an understanding of application-specific populations and data generation processes, rather than relying on general claims about what may or may not be "fair."}, + archivePrefix = {arXiv}, + eprint = {1901.10002}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/JVWWY8WJ/Suresh and Guttag - 2019 - A Framework for Understanding Unintended Consequen.pdf;/home/nathante/Zotero/storage/FPHG6Q37/1901.html}, + journal = {arXiv:1901.10002 [cs, stat]}, + keywords = {Computer Science - Machine Learning,Statistics - Machine Learning}, + primaryClass = {cs, stat} +} + +@article{sweeney_discrimination_2013, + title = {Discrimination in {{Online Ad Delivery}}}, + author = {Sweeney, Latanya}, + year = {2013}, + month = jan, + abstract = {A Google search for a person's name, such as "Trevon Jones", may yield a personalized ad for public records about Trevon that may be neutral, such as "Looking for Trevon Jones?", or may be suggestive of an arrest record, such as "Trevon Jones, Arrested?". This writing investigates the delivery of these kinds of ads by Google AdSense using a sample of racially associated names and finds statistically significant discrimination in ad delivery based on searches of 2184 racially associated personal names across two websites. First names, assigned at birth to more black or white babies, are found predictive of race (88\% black, 96\% white), and those assigned primarily to black babies, such as DeShawn, Darnell and Jermaine, generated ads suggestive of an arrest in 81 to 86 percent of name searches on one website and 92 to 95 percent on the other, while those assigned at birth primarily to whites, such as Geoffrey, Jill and Emma, generated more neutral copy: the word "arrest" appeared in 23 to 29 percent of name searches on one site and 0 to 60 percent on the other. On the more ad trafficked website, a black-identifying name was 25\% more likely to get an ad suggestive of an arrest record. A few names did not follow these patterns. All ads return results for actual individuals and ads appear regardless of whether the name has an arrest record in the company's database. The company maintains Google received the same ad text for groups of last names (not first names), raising questions as to whether Google's technology exposes racial bias.}, + archivePrefix = {arXiv}, + eprint = {1301.6822}, + eprinttype = {arxiv}, + file = {/home/nathante/Zotero/storage/PB5Y6DUW/Sweeney - 2013 - Discrimination in Online Ad Delivery.pdf;/home/nathante/Zotero/storage/ZALDF9ZG/1301.html}, + journal = {arXiv:1301.6822 [cs]}, + keywords = {Computer Science - Computers and Society,Computer Science - Information Retrieval,H.3.3,H.3.5,K.4.1,K.4.2,K.5}, + primaryClass = {cs} +} + +@article{tanis_two_2007, + title = {Two Faces of Anonymity: {{Paradoxical}} Effects of Cues to Identity in {{CMC}}}, + shorttitle = {Two Faces of Anonymity}, + author = {Tanis, Martin and Postmes, Tom}, + year = {2007}, + month = mar, + volume = {23}, + pages = {955--970}, + issn = {0747-5632}, + abstract = {This paper presents two experimental studies investigating the effects of presenting cues that provide information about the interactors \textendash{} called cues to identity \textendash{} in computer mediated communications (CMCs). Study 1 shows that even though cues to identity affected interpersonal evaluations, in making them more positive, the presence of these cues were associated with less certainty and less medium satisfaction for users with experience in online communication. Study 2 shows that when performing an online communication task, participants felt more certain, were more satisfied with the medium, and thought they had performed better in the absence of cues to identity. Thus, this study supports the widespread assumption that rich interactions (i.e., interactions that allow the transmission of cues to identity such as face-to-face) are superior in that they make the interaction more personal, but that these outcomes are not mirrored by the evaluation of the interaction. It is suggested that the presence of cues to identity positively affects interpersonal perceptions, but at the same time decreases perceptions of solidarity or entitativity.}, + file = {/home/nathante/Zotero/storage/6YMY73ZT/S0747563205000567.html}, + journal = {Computers in Human Behavior}, + language = {en}, + number = {2}, + series = {Special {{Issue}}: {{Internet}} and {{Well}}-{{Being}} in {{Honor}} of the {{Memory}} of {{Michael Argyle}}} +} + +@inproceedings{teblunthuis_revisiting_2018, + title = {Revisiting "{{The}} Rise and Decline" in a Population of Peer Production Projects}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '18)}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + year = {2018}, + pages = {355:1--355:7}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Do patterns of growth and stabilization found in large peer production systems such as Wikipedia occur in other communities? This study assesses the generalizability of Halfaker et al.'s influential 2013 paper on "The Rise and Decline of an Open Collaboration System." We replicate its tests of several theories related to newcomer retention and norm entrenchment using a dataset of hundreds of active peer production wikis from Wikia. We reproduce the subset of the findings from Halfaker and colleagues that we are able to test, comparing both the estimated signs and magnitudes of our models. Our results support the external validity of Halfaker et al.'s claims that quality control systems may limit the growth of peer production communities by deterring new contributors and that norms tend to become entrenched over time.}, + file = {/home/nathante/Zotero/storage/B3E4X6GQ/TeBlunthuis et al. - 2018 - Revisiting The rise and decline in a population .pdf}, + isbn = {978-1-4503-5620-6}, + keywords = {governance,online communities,peer production,quality control,replication,retention,wikipedia,wikis} +} + +@article{thebault-spieker_simulation_2017, + title = {Simulation {{Experiments}} on (the {{Absence}} of) {{Ratings Bias}} in {{Reputation Systems}}}, + author = {{Thebault-Spieker}, Jacob and Kluver, Daniel and Klein, Maximilian A. and Halfaker, Aaron and Hecht, Brent and Terveen, Loren and Konstan, Joseph A.}, + year = {2017}, + month = dec, + volume = {1}, + pages = {1--25}, + issn = {25730142}, + file = {/home/nathante/Zotero/storage/ULK9KRXJ/Thebault-Spieker et al. - 2017 - Simulation Experiments on (the Absence of) Ratings.pdf}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + language = {en}, + number = {CSCW} +} + +@article{tufekci_algorithmic_2015, + title = {Algorithmic {{Harms}} beyond {{Facebook}} and {{Google}}: {{Emergent Challenges}} of {{Computational Agency}}}, + shorttitle = {Algorithmic {{Harms}} beyond {{Facebook}} and {{Google}}}, + author = {Tufekci, Zeynep}, + year = {2015}, + volume = {13}, + pages = {203}, + file = {/home/nathante/Zotero/storage/AA9AIIRN/LandingPage.html}, + journal = {Colorado Technology Law Journal} +} + +@article{turner_all_nodate, + title = {All {{Other Things Being Equal}}: {{A Paired Testing Study}} of {{Mortgage Lending Institutions}}}, + author = {Turner, Margery Austin and Freiberg, Fred and Godfrey, Erin and Herbig, Carla and Levy, Diane K and Smith, Robin R}, + pages = {93}, + file = {/home/nathante/Zotero/storage/UEIFEL79/Turner et al. - All Other Things Being Equal A Paired Testing Stu.pdf}, + language = {en} +} + +@article{turner_social_1986, + title = {The Social Identity Theory of Intergroup Behavior}, + author = {Turner, John C and Tajfel, Henri}, + year = {1986}, + volume = {5}, + pages = {7--24}, + file = {/home/nathante/Zotero/storage/ZET47BT3/Turner_Tajfel_1986_The social identity theory of intergroup behavior.pdf}, + journal = {Psychology of intergroup relations} +} + +@article{tversky_judgment_1974, + title = {Judgment under {{Uncertainty}}: {{Heuristics}} and {{Biases}}}, + shorttitle = {Judgment under {{Uncertainty}}}, + author = {Tversky, Amos and Kahneman, Daniel}, + year = {1974}, + month = sep, + volume = {185}, + pages = {1124--1131}, + issn = {0036-8075, 1095-9203}, + abstract = {This article described three heuristics that are employed in making judgements under uncertainty: (i) representativeness, which is usually employed when people are asked to judge the probability that an object or event A belongs to class or process B; (ii) availability of instances or scenarios, which is often employed when people are asked to assess the frequency of a class or the plausibility of a particular development; and (iii) adjustment from an anchor, which is usually employed in numerical prediction when a relevant value is available. These heuristics are highly economical and usually effective, but they lead to systematic and predictable errors. A better understanding of these heuristics and of the biases to which they lead could improve judgements and decisions in situations of uncertainty.}, + copyright = {1974 by the American Association for the Advancement of Science}, + file = {/home/nathante/Zotero/storage/VJPH9WTC/1124.html}, + journal = {Science}, + language = {en}, + number = {4157}, + pmid = {17835457} +} + +@article{vaisey_what_2017, + title = {What {{You Can}}\textemdash and {{Can}}'t\textemdash{{Do With Three}}-{{Wave Panel Data}}}, + author = {Vaisey, Stephen and Miles, Andrew}, + year = {2017}, + month = jan, + volume = {46}, + pages = {44--67}, + issn = {0049-1241}, + abstract = {The recent change in the general social survey (GSS) to a rotating panel design is a landmark development for social scientists. Sociological methodologists have argued that fixed-effects (FE) models are generally the best starting point for analyzing panel data because they allow analysts to control for unobserved time-constant heterogeneity. We review these treatments and demonstrate the advantages of FE models in the context of the GSS. We also show, however, that FE models have two rarely tested assumptions that can seriously bias parameter estimates when violated. We provide simple tests for these assumptions. We further demonstrate that FE models are extremely sensitive to the correct specification of temporal lags. We provide a simulation and a proof to show that the use of incorrect lags in FE models can lead to coefficients that are the opposite sign of the true parameter values.}, + file = {/home/nathante/Zotero/storage/G4LZEJJX/Vaisey and Miles - 2017 - What You Can—and Can’t—Do With Three-Wave Panel Da.pdf}, + journal = {Sociological Methods \& Research}, + language = {en}, + number = {1} +} + +@article{velden_decentering_2013, + title = {Decentering {{Design}}: {{Wikipedia}} and {{Indigenous Knowledge}}}, + shorttitle = {Decentering {{Design}}}, + author = {van der Velden, Maja}, + year = {2013}, + month = mar, + volume = {29}, + pages = {308--316}, + issn = {1044-7318}, + abstract = {This article is a reflection on the case of Wikipedia, the largest online reference site with 23 million articles, with 365 million readers, and without a page called Indigenous knowledge. A Postcolonial Computing lens, extended with the notion of decentering, is used to find out what happened with Indigenous knowledge in Wikipedia. Wikipedia's ordering technologies, such as policies and templates, play a central role in producing knowledge. Two designs, developed with and for Indigenous communities, are introduced to explore if another Wikipedia's design is possible.}, + file = {/home/nathante/Zotero/storage/4K5VKHKC/Velden_2013_Decentering Design.pdf;/home/nathante/Zotero/storage/SJ89ZZ9R/Velden - 2013 - Decentering Design Wikipedia and Indigenous Knowl.html}, + journal = {International Journal of Human\textendash Computer Interaction}, + number = {4} +} + +@article{ven_explaining_1995, + title = {Explaining {{Development}} and {{Change}} in {{Organizations}}}, + author = {Ven, Andrew H. Van De and Poole, Marshall Scott}, + year = {1995}, + month = jul, + volume = {20}, + pages = {510--540}, + issn = {0363-7425, 1930-3807}, + abstract = {This article introduces four basic theories that may serve as building blocks for explaining processes of change in organizations: life cycle, teleology, dialectics, and evolution. These four theories represent different sequences of change events that are driven by different conceptual motors and operate at different organizational levels. This article identifies the circumstances when each theory applies and proposes how interplay among the theories produces a wide variety of more complex theories of change and development in organizational life.}, + file = {/home/nathante/Zotero/storage/4F6CM4L6/Ven and Poole - 1995 - Explaining Development and Change in Organizations.pdf;/home/nathante/Zotero/storage/KJJ2J9US/510.html}, + journal = {Academy of Management Review}, + keywords = {DIALECTIC,INDUSTRIAL management,INDUSTRIAL organization,INNOVATION adoption,ORGANIZATIONAL death,ORGANIZATIONAL growth,ORGANIZATIONAL sociology,ORGANIZATIONAL structure,SOCIAL change}, + language = {en}, + number = {3} +} + +@article{ver_hoef_quasi-poisson_2007, + title = {Quasi-{{Poisson}} vs. Negative Binomial Regression: How Should We Model Overdispersed Count Data?}, + shorttitle = {Quasi-{{Poisson}} vs. Negative Binomial Regression}, + author = {Ver Hoef, Jay M. and Boveng, Peter L.}, + year = {2007}, + month = nov, + volume = {88}, + pages = {2766--2772}, + issn = {0012-9658}, + abstract = {Quasi-Poisson and negative binomial regression models have equal numbers of parameters, and either could be used for overdispersed count data. While they often give similar results, there can be striking differences in estimating the effects of covariates. We explain when and why such differences occur. The variance of a quasi-Poisson model is a linear function of the mean while the variance of a negative binomial model is a quadratic function of the mean. These variance relationships affect the weights in the iteratively weighted least-squares algorithm of fitting models to data. Because the variance is a function of the mean, large and small counts get weighted differently in quasi-Poisson and negative binomial regression. We provide an example using harbor seal counts from aerial surveys. These counts are affected by date, time of day, and time relative to low tide. We present results on a data set that showed a dramatic difference on estimating abundance of harbor seals when using quasi-Poisson vs. negative binomial regression. This difference is described and explained in light of the different weighting used in each regression method. A general understanding of weighting can help ecologists choose between these two methods.}, + journal = {Ecology}, + keywords = {Animals,Binomial Distribution,Data Collection,Data Interpretation; Statistical,Linear Models,Models; Statistical,Phoca,Poisson Distribution,Population Density,Population Growth,Probability,Regression Analysis,Seasons,Time Factors}, + language = {eng}, + number = {11}, + pmid = {18051645} +} + +@inproceedings{viegas_hidden_2007, + title = {The Hidden Order of Wikipedia}, + booktitle = {Proceedings of the 2nd {{International Conference}} on {{Online Communities}} and {{Social Computing}}}, + author = {Vi{\'e}gas, Fernanda B. and Wattenberg, Martin and McKeon, Matthew}, + year = {2007}, + pages = {445--454}, + address = {{Beijing, China}}, + abstract = {We examine the procedural side of Wikipedia, the well-known internet encyclopedia. Despite the lack of structure in the underlying wiki technology, users abide by hundreds of rules and follow well-defined processes. Our case study is the Featured Article (FA) process, one of the best established procedures on the site. We analyze the FA process through the theoretical framework of commons governance, and demonstrate how this process blends elements of traditional workflow with peer production. We conclude that rather than encouraging anarchy, many aspects of wiki technology lend themselves to the collective creation of formalized process and policy.}, + file = {/home/nathante/Zotero/storage/3MHPKWH3/Viégas et al_2007_The hidden order of wikipedia.pdf;/home/nathante/Zotero/storage/XM9KZDR6/citation.html}, + isbn = {978-3-540-73256-3} +} + +@inproceedings{wagner_its_2015, + title = {It's a {{Man}}'s {{Wikipedia}}? {{Assessing Gender Inequality}} in an {{Online Encyclopedia}}}, + shorttitle = {It's a {{Man}}'s {{Wikipedia}}?}, + booktitle = {Ninth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Wagner, Claudia and Garcia, David and Jadidi, Mohsen and Strohmaier, Markus}, + year = {2015}, + month = apr, + abstract = {Wikipedia is a community-created encyclopedia that contains information about notable people from different countries, epochs and disciplines and aims to document the world's knowledge from a neutral point of view. However, the narrow diversity of the Wikipedia editor community has the potential to introduce systemic biases such as gender biases into the content of Wikipedia. In this paper we aim to tackle a sub problem of this larger challenge by presenting and applying a computational method for assessing gender bias on Wikipedia along multiple dimensions. We find that while women on Wikipedia are covered and featured well in many Wikipedia language editions, the way women are portrayed starkly differs from the way men are portrayed. We hope our work contributes to increasing awareness about gender biases online, and in particular to raising attention to the different levels in which gender biases can manifest themselves on the web.}, + copyright = {Authors who publish a paper in this conference agree to the following terms: 1. Author(s) agree to transfer their copyrights in their article/paper to the Association for the Advancement of Artificial Intelligence (AAAI), in order to deal with future requests for reprints, translations, anthologies, reproductions, excerpts, and other publications. This grant will include, without limitation, the entire copyright in the article/paper in all countries of the world, including all renewals, extensions, and reversions thereof, whether such rights current exist or hereafter come into effect, and also the exclusive right to create electronic versions of the article/paper, to the extent that such right is not subsumed under copyright. 2. The author(s) warrants that they are the sole author and owner of the copyright in the above article/paper, except for those portions shown to be in quotations; that the article/paper is original throughout; and that the undersigned right to make the grants set forth above is complete and unencumbered. 3. The author(s) agree that if anyone brings any claim or action alleging facts that, if true, constitute a breach of any of the foregoing warranties, the author(s) will hold harmless and indemnify AAAI, their grantees, their licensees, and their distributors against any liability, whether under judgment, decree, or compromise, and any legal fees and expenses arising out of that claim or actions, and the undersigned will cooperate fully in any defense AAAI may make to such claim or action. Moreover, the undersigned agrees to cooperate in any claim or other action seeking to protect or enforce any right the undersigned has granted to AAAI in the article/paper. If any such claim or action fails because of facts that constitute a breach of any of the foregoing warranties, the undersigned agrees to reimburse whomever brings such claim or action for expenses and attorneys' fees incurred therein. 4. Author(s) retain all proprietary rights other than copyright (such as patent rights). 5. Author(s) may make personal reuse of all or portions of the above article/paper in other works of their own authorship. 6. Author(s) may reproduce, or have reproduced, their article/paper for the author's personal use, or for company use provided that AAAI copyright and the source are indicated, and that the copies are not used in a way that implies AAAI endorsement of a product or service of an employer, and that the copies per se are not offered for sale. The foregoing right shall not permit the posting of the article/paper in electronic or digital form on any computer network, except by the author or the author's employer, and then only on the author's or the employer's own web page or ftp site. Such web page or ftp site, in addition to the aforementioned requirements of this Paragraph, must provide an electronic reference or link back to the AAAI electronic server, and shall not post other AAAI copyrighted materials not of the author's or the employer's creation (including tables of contents with links to other papers) without AAAI's written permission. 7. Author(s) may make limited distribution of all or portions of their article/paper prior to publication. 8. In the case of work performed under U.S. Government contract, AAAI grants the U.S. Government royalty-free permission to reproduce all or portions of the above article/paper, and to authorize others to do so, for U.S. Government purposes. 9. In the event the above article/paper is not accepted and published by AAAI, or is withdrawn by the author(s) before acceptance by AAAI, this agreement becomes null and void.}, + file = {/home/nathante/Zotero/storage/KSA468EP/Wagner et al. - 2015 - It's a Man's Wikipedia Assessing Gender Inequalit.pdf;/home/nathante/Zotero/storage/2I7DYWF8/10585.html}, + language = {en} +} + +@article{wagner_women_2016, + title = {Women through the Glass Ceiling: Gender Asymmetries in {{Wikipedia}}}, + shorttitle = {Women through the Glass Ceiling}, + author = {Wagner, Claudia and {Graells-Garrido}, Eduardo and Garcia, David and Menczer, Filippo}, + year = {2016}, + month = dec, + volume = {5}, + pages = {5}, + issn = {2193-1127}, + abstract = {Contributing to the writing of history has never been as easy as it is today thanks to Wikipedia, a community-created encyclopedia that aims to document the world's knowledge from a neutral point of view. Though everyone can participate it is well known that the editor community has a narrow diversity, with a majority of white male editors. While this participatory gender gap has been studied extensively in the literature, this work sets out to assess potential gender inequalities in Wikipedia articles along different dimensions: notability, topical focus, linguistic bias, structural properties, and meta-data presentation. We find that (i) women in Wikipedia are more notable than men, which we interpret as the outcome of a subtle glass ceiling effect; (ii) family-, gender-, and relationship-related topics are more present in biographies about women; (iii) linguistic bias manifests in Wikipedia since abstract terms tend to be used to describe positive aspects in the biographies of men and negative aspects in the biographies of women; and (iv) there are structural differences in terms of meta-data and hyperlinks, which have consequences for information-seeking activities. While some differences are expected, due to historical and social contexts, other differences are attributable to Wikipedia editors. The implications of such differences are discussed having Wikipedia contribution policies in mind. We hope that the present work will contribute to increased awareness about, first, gender issues in the content of Wikipedia, and second, the different levels on which gender biases can manifest on the Web.}, + copyright = {2016 Wagner et al.}, + file = {/home/nathante/Zotero/storage/X2YTS5GS/Wagner et al. - 2016 - Women through the glass ceiling gender asymmetrie.pdf;/home/nathante/Zotero/storage/T8VBRXPH/s13688-016-0066-4.html}, + journal = {EPJ Data Science}, + language = {En}, + number = {1} +} + +@article{wallach_big_2019, + title = {Big {{Data}}, {{Machine Learning}}, and the {{Social Sciences}}: {{Fairness}}, {{Accountability}}, and {{Transparency}}}, + shorttitle = {Big {{Data}}, {{Machine Learning}}, and the {{Social Sciences}}}, + author = {Wallach, Hanna}, + year = {2019}, + month = jan, + abstract = {This essay is a (near) transcript of a talk I recently gave at a NIPS 2014 workshop on ``Fairness, Accountability, and Transparency in Machine Learning,'' organized by Solon Barocas and Moritz Hardt.}, + file = {/home/nathante/Zotero/storage/LEMNG7Z5/big-data-machine-learning-and-the-social-sciences-fairness-accountability-and-transparency.html}, + journal = {Medium}, + language = {en-US} +} + +@inproceedings{warncke-wang_tell_2013, + title = {Tell {{Me More}}: {{An Actionable Quality Model}} for {{Wikipedia}}}, + shorttitle = {Tell {{Me More}}}, + booktitle = {Proceedings of the 9th {{International Symposium}} on {{Open Collaboration}}}, + author = {{Warncke-Wang}, Morten and Cosley, Dan and Riedl, John}, + year = {2013}, + pages = {8:1--8:10}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {In this paper we address the problem of developing actionable quality models for Wikipedia, models whose features directly suggest strategies for improving the quality of a given article. We first survey the literature in order to understand the notion of article quality in the context of Wikipedia and existing approaches to automatically assess article quality. We then develop classification models with varying combinations of more or less actionable features, and find that a model that only contains clearly actionable features delivers solid performance. Lastly we discuss the implications of these results in terms of how they can help improve the quality of articles across Wikipedia.}, + file = {/home/nathante/Zotero/storage/AKDJ22WY/Warncke-Wang et al. - 2013 - Tell Me More An Actionable Quality Model for Wiki.pdf}, + isbn = {978-1-4503-1852-5}, + keywords = {classification,flaw detection,information quality,machine learning,modelling,Wikipedia}, + series = {{{WikiSym}} '13} +} + +@book{weber_economy_1978, + title = {Economy and Society}, + author = {Weber, Max}, + year = {1978}, + publisher = {{University of California Press}}, + address = {{Berkeley, CA}}, + isbn = {978-0-520-03500-3} +} + +@book{weber_protestant_2003, + title = {The Protestant Ethic and the Spirit of Capitalism}, + author = {Weber, Max}, + year = {2003}, + publisher = {{Dover Publications}}, + address = {{Mineola, NY}}, + abstract = {The Protestant ethic \textemdash{} a moral code stressing hard work, rigorous self-discipline, and the organization of one's life in the service of God \textemdash{} was made famous by sociologist and political economist Max Weber. In this brilliant study (his best-known and most controversial), he opposes the Marxist concept of dialectical materialism and its view that change takes place through "the struggle of opposites." Instead, he relates the rise of a capitalist economy to the Puritan determination to work out anxiety over salvation or damnation by performing good deeds \textemdash{} an effort that ultimately discouraged belief in predestination and encouraged capitalism. Weber's classic study has long been required reading in college and advanced high school social studies classrooms.}, + isbn = {978-0-486-12237-3}, + keywords = {Business \& Economics / Economic History,History / World}, + language = {en} +} + +@inproceedings{welser_finding_2011, + title = {Finding Social Roles in {{Wikipedia}}}, + booktitle = {Proceedings of the 2011 {{iConference}}}, + author = {Welser, Howard T. and Cosley, Dan and Kossinets, Gueorgi and Lin, Austin and Dokshin, Fedor and Gay, Geri and Smith, Marc}, + year = {2011}, + pages = {122--129}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + file = {/home/nathante/Zotero/storage/JAF2G4JK/Welser et al. - 2011 - Finding social roles in Wikipedia.pdf}, + isbn = {978-1-4503-0121-3}, + keywords = {online community,social networks,social roles,structural signatures,wikipedia}, + series = {{{iConference}} '11} +} + +@article{white_history_2018, + title = {The History of Women in Engineering on {{Wikipedia}}}, + author = {White, Alice}, + year = {2018}, + volume = {10}, + issn = {2054-5770}, + file = {/home/nathante/Zotero/storage/8JTTFUIM/the-history-of-women-in-engineering-on-wikipedia.html}, + journal = {Science Museum Group Journal}, + language = {en}, + number = {10} +} + +@book{woodward_applied_2017, + title = {Applied Time Series Analysis with {{R}}}, + author = {Woodward, Wayne A and Elliott, Alan C and Gray, Henry L}, + year = {2017}, + annotation = {OCLC: 1074338335}, + file = {/home/nathante/Zotero/storage/859XI3DK/Woodward et al. - 2017 - Applied time series analysis with R.pdf}, + isbn = {978-1-4987-3427-1 978-1-351-66450-9}, + keywords = {time series analysis}, + language = {English} +} + +@inproceedings{wulczyn_ex_2017, + title = {Ex {{Machina}}: {{Personal Attacks Seen}} at {{Scale}}}, + shorttitle = {Ex {{Machina}}}, + booktitle = {Proceedings of the 26th {{International Conference}} on {{World Wide Web}} - {{WWW}} '17}, + author = {Wulczyn, Ellery and Thain, Nithum and Dixon, Lucas}, + year = {2017}, + pages = {1391--1399}, + publisher = {{ACM Press}}, + address = {{Perth, Australia}}, + abstract = {The damage personal attacks cause to online discourse motivates many platforms to try to curb the phenomenon. However, understanding the prevalence and impact of personal attacks in online platforms at scale remains surprisingly difficult. The contribution of this paper is to develop and illustrate a method that combines crowdsourcing and machine learning to analyze personal attacks at scale. We show an evaluation method for a classifier in terms of the aggregated number of crowd-workers it can approximate. We apply our methodology to English Wikipedia, generating a corpus of over 100k high quality human-labeled comments and 63M machine-labeled ones from a classifier that is as good as the aggregate of 3 crowd-workers, as measured by the area under the ROC curve and Spearman correlation. Using this corpus of machinelabeled scores, our methodology allows us to explore some of the open questions about the nature of online personal attacks. This reveals that the majority of personal attacks on Wikipedia are not the result of a few malicious users, nor primarily the consequence of allowing anonymous contributions from unregistered users.}, + file = {/home/nathante/Zotero/storage/4PBY4Z39/Wulczyn et al. - 2017 - Ex Machina Personal Attacks Seen at Scale.pdf}, + isbn = {978-1-4503-4913-0}, + language = {en} +} + +@article{xu_empirical_2015, + title = {An Empirical Study of the Motivations for Content Contribution and Community Participation in {{Wikipedia}}}, + author = {Xu, Bo and Li, Dahui}, + year = {2015}, + month = apr, + volume = {52}, + pages = {275--286}, + issn = {0378-7206}, + abstract = {Internet users' participation and contributions are critical to the growth of Wikipedia. Based on self-determination theory, this study investigates the impacts of several motivational factors on two different types of user behaviors: content contribution and community participation. The research findings show that content contribution is more often driven by extrinsically oriented motivations, including reciprocity and the need for self-development, while community participation is more often driven by intrinsically oriented motivations, including altruism and a sense of belonging to the community. This paper contributes empirically to the research on Wikipedia, and it has practical implications for open content system development and management.}, + file = {/home/nathante/Zotero/storage/P4DN9DGC/Xu and Li - 2015 - An empirical study of the motivations for content .pdf;/home/nathante/Zotero/storage/WQ8JN8M2/S0378720614001487.html}, + journal = {Information \& Management}, + keywords = {Contribution,Motivation,Participation,Wikipedia}, + number = {3} +} + +@inproceedings{yang_who_2016, + title = {Who {{Did What}}: {{Editor Role Identification}} in {{Wikipedia}}}, + shorttitle = {Who {{Did What}}}, + booktitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Yang, Diyi and Halfaker, Aaron and Kraut, Robert and Hovy, Eduard}, + year = {2016}, + month = mar, + abstract = {Understanding the social roles played by contributors to online communities can facilitate the process of task routing. In this work, we develop new techniques to find roles in Wikipedia based on editors' low-level edit types and investigate how work contributed by people from different roles affect the article quality. To do this, we first built machine-learning models to automatically identify the edit categories associated with edits. We then applied a graphical model analogous to Latent Dirichlet Allocation to uncover the latent roles in editors' edit histories. Applying this technique revealed eight different roles editors play. Finally, we validated how our identified roles collaborate to improve the quality of articles. The results demonstrate that editors carrying on different roles contribute differently in terms of edit categories and articles in different quality stages need different types of editors. Implications for editor role identification and the validation of role contribution are discussed.}, + copyright = {Authors who publish a paper in this conference agree to the following terms: 1. Author(s) agree to transfer their copyrights in their article/paper to the Association for the Advancement of Artificial Intelligence (AAAI), in order to deal with future requests for reprints, translations, anthologies, reproductions, excerpts, and other publications. This grant will include, without limitation, the entire copyright in the article/paper in all countries of the world, including all renewals, extensions, and reversions thereof, whether such rights current exist or hereafter come into effect, and also the exclusive right to create electronic versions of the article/paper, to the extent that such right is not subsumed under copyright. 2. The author(s) warrants that they are the sole author and owner of the copyright in the above article/paper, except for those portions shown to be in quotations; that the article/paper is original throughout; and that the undersigned right to make the grants set forth above is complete and unencumbered. 3. The author(s) agree that if anyone brings any claim or action alleging facts that, if true, constitute a breach of any of the foregoing warranties, the author(s) will hold harmless and indemnify AAAI, their grantees, their licensees, and their distributors against any liability, whether under judgment, decree, or compromise, and any legal fees and expenses arising out of that claim or actions, and the undersigned will cooperate fully in any defense AAAI may make to such claim or action. Moreover, the undersigned agrees to cooperate in any claim or other action seeking to protect or enforce any right the undersigned has granted to AAAI in the article/paper. If any such claim or action fails because of facts that constitute a breach of any of the foregoing warranties, the undersigned agrees to reimburse whomever brings such claim or action for expenses and attorneys' fees incurred therein. 4. Author(s) retain all proprietary rights other than copyright (such as patent rights). 5. Author(s) may make personal reuse of all or portions of the above article/paper in other works of their own authorship. 6. Author(s) may reproduce, or have reproduced, their article/paper for the author's personal use, or for company use provided that AAAI copyright and the source are indicated, and that the copies are not used in a way that implies AAAI endorsement of a product or service of an employer, and that the copies per se are not offered for sale. The foregoing right shall not permit the posting of the article/paper in electronic or digital form on any computer network, except by the author or the author's employer, and then only on the author's or the employer's own web page or ftp site. Such web page or ftp site, in addition to the aforementioned requirements of this Paragraph, must provide an electronic reference or link back to the AAAI electronic server, and shall not post other AAAI copyrighted materials not of the author's or the employer's creation (including tables of contents with links to other papers) without AAAI's written permission. 7. Author(s) may make limited distribution of all or portions of their article/paper prior to publication. 8. In the case of work performed under U.S. Government contract, AAAI grants the U.S. Government royalty-free permission to reproduce all or portions of the above article/paper, and to authorize others to do so, for U.S. Government purposes. 9. In the event the above article/paper is not accepted and published by AAAI, or is withdrawn by the author(s) before acceptance by AAAI, this agreement becomes null and void.}, + file = {/home/nathante/Zotero/storage/JEAQ6XXM/Yang et al. - 2016 - Who Did What Editor Role Identification in Wikipe.pdf;/home/nathante/Zotero/storage/7VW4E8L2/13077.html}, + language = {en} +} + +@inproceedings{yin_understanding_2019, + title = {Understanding the {{Effect}} of {{Accuracy}} on {{Trust}} in {{Machine Learning Models}}}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} - {{CHI}} '19}, + author = {Yin, Ming and Wortman Vaughan, Jennifer and Wallach, Hanna}, + year = {2019}, + pages = {1--12}, + publisher = {{ACM Press}}, + address = {{Glasgow, Scotland Uk}}, + abstract = {We address a relatively under-explored aspect of human\textendash computer interaction: people's abilities to understand the relationship between a machine learning model's stated performance on held-out data and its expected performance post deployment. We conduct large-scale, randomized humansubject experiments to examine whether laypeople's trust in a model, measured in terms of both the frequency with which they revise their predictions to match those of the model and their self-reported levels of trust in the model, varies depending on the model's stated accuracy on held-out data and on its observed accuracy in practice. We find that people's trust in a model is affected by both its stated accuracy and its observed accuracy, and that the effect of stated accuracy can change depending on the observed accuracy. Our work relates to recent research on interpretable machine learning, but moves beyond the typical focus on model internals, exploring a different component of the machine learning pipeline.}, + file = {/home/nathante/Zotero/storage/2JKW7MKM/Yin et al. - 2019 - Understanding the Effect of Accuracy on Trust in M.pdf}, + isbn = {978-1-4503-5970-2}, + language = {en} +} + +@inproceedings{young_effect_2018, + title = {The Effect of Moderator Bots on Abusive Language Use}, + booktitle = {Proceedings of the {{International Conference}} on {{Pattern Recognition}} and {{Artificial Intelligence}}}, + author = {Young, Li-Yin}, + year = {2018}, + pages = {133--137}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Moderator bots are widely used on forums and social media. On Internet forums, moderator bots play an important role in automatically monitoring the content of images and the text present on the forum as well as providing repetitive information without the need for interaction with an administrator. The increasing use of moderator bots inspired me to study the performance of moderator bots. To date, research investigating the effect of moderator bots has not been conducted. Herein, we analyzed Reddit, a popular U.S. social news aggregation platform, which uses a moderator bot (AutoModerator) to mitigate the invalid comments occurring in discussion groups. This has thus become an ideal research opportunity for this study. We implemented a regression discontinuity design and interrupted time-series analysis to estimate the effect of AutoModerator on word-quality improvement. We observed an abrupt and significant decrease in the rate of abusive posts to which AutoModerator was attached. These results suggest that AutoModerator has been effective in controlling the word quality.}, + file = {/home/nathante/Zotero/storage/AVL7LIXY/Young_2018_The effect of moderator bots on abusive language use.pdf}, + isbn = {978-1-4503-6482-9}, + keywords = {Abusive Language Use,Moderator Bots,Online Communities,Regression Discontinuity Design}, + series = {{{PRAI}} 2018} +} + +@article{zhu_value-sensitive_2018, + title = {Value-{{Sensitive Algorithm Design}}: {{Method}}, {{Case Study}}, and {{Lessons}}}, + shorttitle = {Value-{{Sensitive Algorithm Design}}}, + author = {Zhu, Haiyi and Yu, Bowen and Halfaker, Aaron and Terveen, Loren}, + year = {2018}, + month = nov, + volume = {2}, + pages = {194:1--194:23}, + issn = {2573-0142}, + abstract = {Most commonly used approaches to developing automated or artificially intelligent algorithmic systems are Big Data-driven and machine learning-based. However, these approaches can fail, for two notable reasons: (1) they may lack critical engagement with users and other stakeholders; (2) they rely largely on historical human judgments, which do not capture and incorporate human insights into how the world can be improved in the future. We propose and describe a novel method for the design of such algorithms, which we call Value Sensitive Algorithm Design. Value Sensitive Algorithm Design incorporates stakeholders' tacit knowledge and explicit feedback in the early stages of algorithm creation. This increases the chance to avoid biases in design choices or to compromise key stakeholder values. Generally, we believe that algorithms should be designed to balance multiple stakeholders' needs, motivations, and interests, and to help achieve important collective goals. We also describe a specific project "Designing Intelligent Socialization Algorithms for WikiProjects in Wikipedia" to illustrate our method. We intend this paper to contribute to the rich ongoing conversation concerning the use of algorithms in supporting critical decision-making in society.}, + file = {/home/nathante/Zotero/storage/ANVA3X68/Zhu et al. - 2018 - Value-Sensitive Algorithm Design Method, Case Stu.pdf}, + journal = {Proc. ACM Hum.-Comput. Interact.}, + keywords = {algorithmic intervention,online communities,online recruitment,peer production,system buildings,value-sensitive algorithm design,wikipedia,wikiprojects}, + number = {CSCW} +} + +@article{zorn_institutional_2011, + title = {Institutional and {{Noninstitutional Influences}} on {{Information}} and {{Communication Technology Adoption}} and {{Use Among Nonprofit Organizations}}}, + author = {Zorn, Theodore E. and Flanagin, Andrew J. and Shoham, Mirit Devorah}, + year = {2011}, + month = jan, + volume = {37}, + pages = {1--33}, + issn = {1468-2958}, + abstract = {In this study, nonprofit organizations (NPOs) in New Zealand were surveyed to explore influences on adoption and use of information and communication technologies (ICTs). We sought to extend existing research by considering ``institutional'' influences alongside organizational and environmental features and by examining how institutional forces affect optimal use of ICTs. Findings suggest that NPOs adopting and using ICTs tended to be self-perceived leaders or those who scanned the environment and emulated leaders and tended to have organizational decisionmakers with the expertise to enable adoption and use. Furthermore, optimal fit of ICTs tended to be spurred by institutional forces if accompanied by self-perceived leadership and appropriate organizational resources. Implications for practice and theory are explored.}, + file = {/home/nathante/Zotero/storage/J9N68AD3/Zorn et al. - 2011 - Institutional and Noninstitutional Influences on I.pdf;/home/nathante/Zotero/storage/D7ZJFQXA/abstract.html}, + journal = {Human Communication Research}, + language = {en}, + number = {1} +} + + diff --git a/dissertations/nathante_uw_2021/references.bib b/dissertations/nathante_uw_2021/references.bib new file mode 100644 index 0000000..b9d661b --- /dev/null +++ b/dissertations/nathante_uw_2021/references.bib @@ -0,0 +1,2448 @@ + +@inproceedings{ackerman_answer_1990, + title = {Answer {{Garden}}: {{A Tool}} for {{Growing Organizational Memory}}}, + shorttitle = {Answer {{Garden}}}, + booktitle = {Proceedings of the {{ACM SIGOIS}} and {{IEEE CS TC}}-{{OA Conference}} on {{Office Information Systems}}}, + author = {Ackerman, M. S. and Malone, T. W.}, + year = {1990}, + series = {{{COCS}} '90}, + pages = {31--39}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Answer Garden allows organizations to develop databases of commonly asked questions that grow ``organically'' as new questions arise and are answered. It is designed to help in situations (such as field service organizations and customer ``hot lines'') where there is a continuing stream of questions, many of which occur over and over, but some of which the organization has never seen before. The system includes a branching network of diagnostic questions that helps users find the answers they want. If the answer is not present, the system automatically sends the question to the appropriate expert, and the answer is returned to the user as well as inserted into the branching network. Experts can also modify this network in response to users' problems. Our initial Answer Garden database contains questions and answers about how to use the X Window System.}, + isbn = {978-0-89791-358-4}, + file = {/home/nathante/Zotero/storage/Q6XN2KED/Ackerman and Malone - 1990 - Answer Garden A Tool for Growing Organizational M.pdf} +} + +@article{ackerman_sharing_2013, + title = {Sharing {{Knowledge}} and {{Expertise}}: {{The CSCW View}} of {{Knowledge Management}}}, + shorttitle = {Sharing {{Knowledge}} and {{Expertise}}}, + author = {Ackerman, Mark S. and Dachtera, Juri and Pipek, Volkmar and Wulf, Volker}, + year = {2013}, + month = aug, + journal = {Computer Supported Cooperative Work (CSCW)}, + volume = {22}, + number = {4-6}, + pages = {531--573}, + issn = {0925-9724, 1573-7551}, + abstract = {Knowledge Management (KM) is a diffuse and controversial term, which has been used by a large number of research disciplines. CSCW, over the last 20 years, has taken a critical stance towards most of these approaches, and instead, CSCW shifted the focus towards a practice-based perspective. This paper surveys CSCW researchers' viewpoints on what has become called `knowledge sharing' and `expertise sharing'. These are based in an understanding of the social contexts of knowledge work and practices, as well as in an emphasis on communication among knowledgeable humans. The paper provides a summary and overview of the two strands of knowledge and expertise sharing in CSCW, which, from an analytical standpoint, roughly represent `generations' of research: an `object-centric' and a `people-centric' view. We also survey the challenges and opportunities ahead.}, + language = {en} +} + +@inproceedings{adamic_knowledge_2008, + title = {Knowledge Sharing and Yahoo Answers: Everyone Knows Something}, + shorttitle = {Knowledge Sharing and Yahoo Answers}, + booktitle = {Proceedings of the 17th International Conference on {{World Wide Web}}}, + author = {Adamic, Lada A. and Zhang, Jun and Bakshy, Eytan and Ackerman, Mark S.}, + year = {2008}, + month = apr, + series = {{{WWW}} '08}, + pages = {665--674}, + publisher = {{Association for Computing Machinery}}, + address = {{Beijing, China}}, + abstract = {Yahoo Answers (YA) is a large and diverse question-answer forum, acting not only as a medium for sharing technical knowledge, but as a place where one can seek advice, gather opinions, and satisfy one's curiosity about a countless number of things. In this paper, we seek to understand YA's knowledge sharing and activity. We analyze the forum categories and cluster them according to content characteristics and patterns of interaction among the users. While interactions in some categories resemble expertise sharing forums, others incorporate discussion, everyday advice, and support. With such a diversity of categories in which one can participate, we find that some users focus narrowly on specific topics, while others participate across categories. This not only allows us to map related categories, but to characterize the entropy of the users' interests. We find that lower entropy correlates with receiving higher answer ratings, but only for categories where factual expertise is primarily sought after. We combine both user attributes and answer characteristics to predict, within a given category, whether a particular answer will be chosen as the best answer by the asker.}, + isbn = {978-1-60558-085-2}, + file = {/home/nathante/Zotero/storage/W97ZJFJS/Adamic et al_2008_Knowledge sharing and yahoo answers.pdf} +} + +@article{barnett_predicting_2017, + title = {Predicting International {{Facebook}} Ties through Cultural Homophily and Other Factors}, + author = {Barnett, George A and Benefield, Grace A}, + year = {2017}, + month = feb, + journal = {New Media \& Society}, + volume = {19}, + number = {2}, + pages = {217--239}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study describes the structure of the international Facebook friendship network and its determinants using various predictors, including physical proximity, cultural homophily, and communication. Network analysis resulted in one group of nations, with countries that bridge geographic and linguistic clusters (France, Spain, United Kingdom, and United Arab Emirates) being the most central. Countries with international Facebook friendship ties tended to share borders, language, civilization, and migration. Physical distance, shared hyperlinks, use of common websites, telephone traffic, cultural similarity, and international student exchange were either weakly or not significantly related to international Facebook friendships.}, + language = {en}, + keywords = {Communication network analysis,cultural homophily,Facebook,international friendship,social media (SNS)}, + file = {/home/nathante/Zotero/storage/LPCY3MMC/Barnett and Benefield - 2017 - Predicting international Facebook ties through cul.pdf} +} + +@incollection{baum_ecological_2006, + title = {Ecological Approaches to Organizations}, + booktitle = {Sage {{Handbook}} for {{Organization Studies}}}, + author = {Baum, Joel A. C. and Shipilov, Andrew V.}, + year = {2006}, + pages = {55--110}, + publisher = {{Sage}}, + address = {{Rochester, NY}}, + abstract = {Our goal is to assess and consolidate the current state-of-the-art in organizational ecology. To accomplish this we review major theoretical statements, empirical studies, and arguments that are now being made. Although we attempt to survey ecological approaches to organizations comprehensively, because ecological research now constitutes a very large body of work, and because other extensive reviews are available (Aldrich \& Wiedenmayer, 1993; Barnett \& Carroll, 1995; Baum, 1996; Baum \& Amburgey, 2002; Baum \& Rao, 2004; Carroll, Dobrev \& Swaminathan, 2002; Galunic \& Weeks 2002; Rao, 2002; Singh \& Lumsden, 1990), we emphasize recent work that challenges and extends established theory and highlight new and emerging directions for future research that appear promising. Our appraisal focuses on two main themes - demographic processes and ecological processes.}, + file = {/home/nathante/Zotero/storage/EGQC2W5I/Baum and Shipilov - 2006 - Ecological approaches to organizations.pdf;/home/nathante/Zotero/storage/38MBRGMQ/papers.html} +} + +@article{baum_organizational_1994, + title = {Organizational {{Niches}} and the {{Dynamics}} of {{Organizational Founding}}}, + author = {Baum, Joel A. C. and Singh, Jitendra V.}, + year = {1994}, + journal = {Organization Science}, + volume = {5}, + number = {4}, + pages = {483--501}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {In this paper we argue that patterns of organizational niche overlap and nonoverlap influence the organizational niches in which entrepreneurs create organizations. Organizational niches characterize the different resource requirements and productive capacities of individual organizations in a population. Depending on which organizational niches are targeted, entrepreneurs will face different competitive landscapes. For a population of day care centers (DCCs), we measure organizational niches and compute organizational niche overlaps in terms of the ages of children they are licensed to enroll. Using weights based on organizational niche overlaps, we disaggregate population density (i.e., the number of DCCs) into overlap density and nonoverlap density to measure the potential for competition and cooperation among DCCs. The overlap density of an organizational niche is equal to population density weighted by the overlaps of the focal organizational niche with all other organizational niches. Conversely, non-overlap density is equal to population density weighted by the absence of overlaps of a focal organizational niche with all other organizational niches. We hypothesize that overlap density will be negatively related to the founding rate. We expect entrepreneurs will be much less likely to target or be capable of founding organizations in crowded parts of the resource space than parts that are less densely populated. We also hypothesize that nonoverlap density will be positively related to the founding rate. This is because differentiated DCCs do not compete directly for resources, and, at the same time, their presence can have facilitative influences through complementary demand enhancement and widening social acceptance of the organization form. Supporting these predictions, a dynamic analysis showed that overlap density had a competitive effect on the founding rate, while nonoverlap density had a positive effect. Parallel effects were obtained when overlap and nonoverlap densities were further disaggregated on the basis of geographic proximity into local and diffuse components. Overall, our findings are consistent with earlier research on organizational founding at the population level, but reveal intrapopulation patterns of mutualism and competition that influence the likelihood of organizations being established in different organizational niches. The key result of this study, that location in a multidimensional resource space, together with the distribution of other competitors and noncompetitors, has a significant impact on founding probabilities serves to illuminate some of the underlying dynamics of competition and mutualism that impact strategic and entrepreneurial processes.}, + file = {/home/nathante/Zotero/storage/E2AGCRNI/Baum and Singh - 1994 - Organizational Niches and the Dynamics of Organiza.pdf} +} + +@article{baumgartner_pushshift_2020, + title = {The {{Pushshift Reddit}} Dataset}, + author = {Baumgartner, Jason and Zannettou, Savvas and Keegan, Brian and Squire, Megan and Blackburn, Jeremy}, + year = {2020}, + month = may, + journal = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {14}, + pages = {830--839}, + issn = {2334-0770}, + copyright = {Copyright (c) 2020 Association for the Advancement of Artificial Intelligence}, + language = {en}, + keywords = {pushift,reddit}, + file = {/home/nathante/Zotero/storage/DHRFJ58I/Baumgartner et al. - 2020 - The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/G5E8SQFN/Baumgartner et al_2020_The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/A8X5UY9R/2001.html;/home/nathante/Zotero/storage/B9FRQR94/7347.html} +} + +@incollection{benkler_peer_2015, + title = {Peer Production: {{A}} Form of Collective Intelligence}, + booktitle = {Handbook of {{Collective Intelligence}}}, + author = {Benkler, Yochai and Shaw, Aaron and Hill, Benjamin Mako}, + editor = {Malone, Thomas W. and Bernstein, Michael S.}, + year = {2015}, + pages = {175--204}, + publisher = {{MIT Press}}, + address = {{Cambridge, MA}}, + isbn = {978-0-262-02981-0}, + language = {en} +} + +@book{benkler_wealth_2006, + title = {The Wealth of Networks: {{How}} Social Production Transforms Markets and Freedom}, + author = {Benkler, Yochai}, + year = {2006}, + publisher = {{Yale University Press}}, + address = {{New Haven, CT}}, + keywords = {bookReview,Economics,FOSS,foundations of social computing,import,Innovation,Legal Studies,peer production} +} + +@incollection{bernstein_quantifying_2013, + title = {Quantifying the Invisible Audience in Social Networks}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Bernstein, Michael S. and Bakshy, Eytan and Burke, Moira and Karrer, Brian}, + year = {2013}, + month = apr, + pages = {21--30}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {When you share content in an online social network, who is listening? Users have scarce information about who actually sees their content, making their audience seem invisible and difficult to estimate. However, understanding this invisible audience can impact both science and design, since perceived audiences influence content production and self-presentation online. In this paper, we combine survey and large-scale log data to examine how well users' perceptions of their audience match their actual audience on Facebook. We find that social media users consistently underestimate their audience size for their posts, guessing that their audience is just 27\% of its true size. Qualitative coding of survey responses reveals folk theories that attempt to reverse-engineer audience size using feedback and friend count, though none of these approaches are particularly accurate. We analyze audience logs for 222,000 Facebook users' posts over the course of one month and find that publicly visible signals --- friend count, likes, and comments --- vary widely and do not strongly indicate the audience of a single post. Despite the variation, users typically reach 61\% of their friends each month. Together, our results begin to reveal the invisible undercurrents of audience attention and behavior in online social networks.}, + isbn = {978-1-4503-1899-0}, + keywords = {audience,information distribution,social networks} +} + +@article{bilgrei_broscience_2018, + title = {Broscience: {{Creating}} Trust in Online Drug Communities}, + shorttitle = {Broscience}, + author = {Bilgrei, Ola R{\o}ed}, + year = {2018}, + month = aug, + journal = {New Media \& Society}, + volume = {20}, + number = {8}, + pages = {2712--2727}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study explores the social mechanisms involved in online community trust. Drawing on interviews with members from two Norwegian Internet drug forums, the article illustrates how forum members evaluate the trustworthiness of online user-generated drug content, referred to as `broscience'. First, the shared narratives and boundaries within the forums generated a sense of collective identity, where members defined their online surroundings in terms of community trust and collaboration. Second, the subcultural argot within the forums helped members express a level of subcultural competence and authenticity, in which they were able to assess their credibility and initial trustworthiness. Third, the reputation linked to online identities created expectations and predictability as a basis for evaluating members' trustworthiness. These findings touch upon the ambivalence of trust in an online setting and highlight the communal process that caused their ambivalence to be suspended, thereby enabling online community trust.}, + language = {en}, + keywords = {Broscience,drugs,Internet subculture,online community,trust}, + file = {/home/nathante/Zotero/storage/WBMSUCSH/Bilgrei - 2018 - Broscience Creating trust in online drug communit.pdf} +} + +@article{boyd_social_2007, + title = {Social {{Network Sites}}: {{Definition}}, {{History}}, and {{Scholarship}}}, + shorttitle = {Social {{Network Sites}}}, + author = {Boyd, Danah M and Ellison, Nicole B.}, + year = {2007}, + month = oct, + journal = {Journal of Computer-Mediated Communication}, + volume = {13}, + number = {1}, + pages = {210--230}, + publisher = {{Oxford Academic}}, + abstract = {Social network sites (SNSs) are increasingly attracting the attention of academic and industry researchers intrigued by their affordances and reach. This special theme section of the Journal of Computer-Mediated Communication brings together scholarship on these emergent phenomena. In this introductory article, we describe features of SNSs and propose a comprehensive definition. We then present one perspective on the history of such sites, discussing key changes and developments. After briefly summarizing existing scholarship concerning SNSs, we discuss the articles in this special section and conclude with considerations for future research.}, + language = {en}, + file = {/home/nathante/Zotero/storage/6BMGYUAE/Boyd and Ellison - 2007 - Social Network Sites Definition, History, and Sch.pdf;/home/nathante/Zotero/storage/JK59CLHH/4583062.html} +} + +@inproceedings{brandtzaeg_user_2008, + title = {User {{Loyalty}} and {{Online Communities}}: {{Why Members}} of {{Online Communities}} Are Not {{Faithful}}}, + shorttitle = {User {{Loyalty}} and {{Online Communities}}}, + booktitle = {Proceedings of the 2nd {{International Conference}} on {{INtelligent TEchnologies}} for Interactive {{enterTAINment}}}, + author = {Brandtz{\ae}g, Petter Bae and Heim, Jan}, + year = {2008}, + publisher = {{ICST}}, + address = {{Cancun, Mexico}}, + abstract = {Online communities are getting increasingly important for several different user groups; at the same time, community members seem to lack loyalty, as they often change from one community to another or use their community less over time. To survive and thrive, online communities must meet members' needs. By using qualitative data are from an extensive online survey of online community users and a representative sample of Internet users, 200 responses to an open question regarding community-loyalty was analyzed. Results show that there are 9 main reasons why community-users decrease in their participation over time or, in simple terms, stop using their online community: 1) Lack of interesting people/friends attending, 2) Low quality content, 3) Low usability, 4) Harassment and bullying 5) Timeconsuming/isolating, 6) Low trust, 7) Over-commercialized, 8) Dissatisfaction with moderators and 9) Unspecified boring. The results, design implications and future research are discussed.}, + isbn = {978-963-9799-13-4}, + language = {en}, + file = {/home/nathante/Zotero/storage/2KNF5QHS/Brandtzæg and Heim - 2008 - User Loyalty and Online Communities Why Members o.pdf} +} + +@article{brown_social_1987, + title = {Social {{Ties}} and {{Word}}-of-{{Mouth Referral Behavior}}}, + author = {Brown, Jacqueline Johnson and Reingen, Peter H.}, + year = {1987}, + journal = {Journal of Consumer Research}, + volume = {14}, + number = {3}, + pages = {350--362}, + publisher = {{Oxford University Press}}, + issn = {0093-5301}, + abstract = {This article presents a network analysis of word-of-mouth referral behavior in a natural environment. The relational properties of tie strength and homophily were employed to examine referral behavior at micro and macro levels of inquiry. The study demonstrates different roles played by weak and strong social ties. At the macro level, weak ties displayed an important bridging function, allowing information to travel from one distinct subgroup of referral actors to another subgroup in the broader social system. At the micro level, strong and homophilous ties were more likely to be activated for the flow of referral information. Strong ties were also perceived as more influential than weak ties, and they were more likely to be utilized as sources of information for related goods.} +} + +@article{burnett_information_2004, + title = {Information {{Exchange}} in {{Virtual Communities}}: A {{Comparative Study}}}, + shorttitle = {Information {{Exchange}} in {{Virtual Communities}}}, + author = {Burnett, Gary and Buerkle, Harry}, + year = {2004}, + month = jan, + journal = {Journal of Computer-Mediated Communication}, + volume = {9}, + number = {JCMC922}, + issn = {1083-6101}, + abstract = {Burnett's (2000) typology of information exchange in virtual communities attempts to provide a framework for examining the range of activities undertaken by participants in such communities. This study is the first in a series to apply the typology to specific virtual communities, in an effort to assess its accuracy against the day-to-day interactions to be found in two online communities. Through a comparison of these two communities using the typology, revisions to the typology are proposed which will allow it to reflect more accurately activities found within the communities. By providing a metric through which to address such questions, the revised typology will allow a richer understanding of virtual communities as social information environments.}, + file = {/home/nathante/Zotero/storage/39C7RSD8/4614481.html} +} + +@article{butler_attraction-selection-attrition_2014, + title = {An Attraction-Selection-Attrition Theory of Online Community Size and Resilience}, + author = {Butler, Brian S. and Bateman, Patrick J. and Gray, Peter H. and Diamant, E. Ilana}, + year = {2014}, + month = sep, + journal = {MIS Q.}, + volume = {38}, + number = {3}, + pages = {699--728}, + issn = {0276-7783}, + abstract = {Online discussion communities play an important role in the development of relationships and the transfer of knowledge within and across organizations. Their underlying technologies enhance these processes by providing infrastructures through which group-based communication can occur. Community administrators often make decisions about technologies with the goal of enhancing the user experience, but the impact of such decisions on how a community develops must also be considered. To shed light on this complex and under-researched phenomenon, we offer a model of key latent constructs influenced by technology choices and possible causal paths by which they have dynamic effects on communities. Two important community characteristics that can be impacted are community size (number of members) and community resilience (membership that is willing to remain involved with the community in spite of variability and change in the topics discussed). To model community development, we build on attraction-selection-attrition (ASA) theory, introducing two new concepts: participation costs (how much time and effort are required to engage with content provided in a community) and topic consistency cues (how strongly a community signals that topics that may appear in the future will be consistent with what it has hosted in the past). We use the proposed ASA theory of online communities (OCASA) to develop a simulation model of community size and resilience that affirms some conventional wisdom and also has novel and counterintuitive implications. Analysis of the model leads to testable new propositions about the causal paths by which technology choices affect the emergence of community size and community resilience, and associated implications for community sustainability.}, + file = {/home/nathante/Zotero/storage/292C8XTF/Butler et al. - 2014 - An Attraction-selection-attrition Theory of Online.pdf} +} + +@article{butler_cross-purposes_2011, + title = {The Cross-Purposes of Cross-Posting: Boundary Reshaping Behavior in Online Discussion Communities}, + shorttitle = {The Cross-Purposes of Cross-Posting}, + author = {Butler, Brian S. and Wang, Xiaoqing}, + year = {2011}, + month = sep, + journal = {Information Systems Research}, + volume = {23}, + number = {3-part-2}, + pages = {993--1010}, + issn = {1047-7047}, + abstract = {Increasingly, online discussion communities are used to support activities ranging from software development to political campaigns. An important feature of an online discussion community is its content boundaries, which are individual perceptions of what materials and discussions are part of the community and what are not, and how that community is related to others within a larger system. Yet in spite of its importance, many community infrastructures allow individual participants to reshape content boundaries by simultaneously associating their contributions with multiple online discussion communities. This reshaping behavior is a controversial aspect of the creation and management of many types of online discussion communities. On one hand, many communities explicitly discourage boundary reshaping behaviors in their frequently asked questions or terms-of-use document. On the other hand, community infrastructures continue to allow such reshaping behaviors. To explain this controversy, we theorize how the extent of boundary reshaping in an online discussion community has simultaneously positive and negative effects on its member dynamics and responsiveness. We test predictions about the conflicting effects of reshaping behaviors with 60 months of longitudinal data from 140 USENET newsgroups, focusing on cross-posting activities as a form of reshaping behavior. Empirical results are consistent with the proposed hypotheses that reshaping behaviors within a discussion community affect member dynamics and community responsiveness in both positive and negative ways. Taken together, the findings highlight the boundary-related design challenges faced by managers seeking to support ongoing activity within online discussion communities.}, + file = {/home/nathante/Zotero/storage/MHIHVXMA/Butler and Wang - 2012 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/ZDTPFJP3/Butler and Wang - 2011 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/5XCPFJS9/isre.1110.html} +} + +@article{butler_membership_2001, + title = {Membership Size, Communication Activity, and Sustainability: {{A}} Resource-Based Model of Online Social Structures}, + shorttitle = {Membership {{Size}}, {{Communication Activity}}, and {{Sustainability}}}, + author = {Butler, Brian S.}, + year = {2001}, + journal = {Information Systems Research}, + volume = {12}, + number = {4}, + pages = {346--362}, + issn = {1047-7047}, + abstract = {As telecommunication networks become more common, there is an increasing interest in the factors underlying the development of online social structures. It has been proposed that these structures are new forms of organizing which are not subject to the same constraints as traditional social structures. However, from anecdotal evidence and case studies it is difficult to evaluate whether online social structures are subject to the same problems as traditional social structures. Drawing from prior studies of traditional social structures and empirical analyses of longitudinal data from a sample of Internet-based groups, this exploratory work considers the role of size and communication activity in sustainable online social structures. A resource-based theory of sustainable social structures is presented. Members contribute time, energy, and other resources, enabling a social structure to provide benefits for individuals. These benefits, which include information, influence, and social support, are the basis for a social structure's ability to attract and retain members. This model focuses on the system of opposing forces that link membership size as a component of resource availability and communication activity as an aspect of benefit provision to the sustainability of an online social structure. Analyses of data from a random sample of e-mail-based Internet social structures (listservs) indicate that communication activity and size have both positive and negative effects on a structure's sustainability. These results suggest that while the use of networked communication technologies may alter the form of communication, balancing the opposing impacts of membership size and communication activity in order to maintain resource availability and provide benefits for current members remains a fundamental problem underlying the development of sustainable online social structures.}, + copyright = {Copyright \textcopyright{} 2001 INFORMS}, + file = {/home/nathante/Zotero/storage/4ENNLMAH/Butler - 2001 - Membership Size, Communication Activity, and Susta.pdf;/home/nathante/Zotero/storage/U7AUNAZT/Butler-2001-ISR-Membership_size_communication_activitiy_sustainability.pdf} +} + +@inproceedings{campbell_thousands_2016, + title = {Thousands of {{Positive Reviews}}: {{Distributed Mentoring}} in {{Online Fan Communities}}}, + shorttitle = {Thousands of {{Positive Reviews}}}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer}}-{{Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Campbell, Julie and Aragon, Cecilia and Davis, Katie and Evans, Sarah and Evans, Abigail and Randall, David}, + year = {2016}, + month = feb, + series = {{{CSCW}} '16}, + pages = {691--704}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Young people worldwide are participating in ever-increasing numbers in online fan communities. Far from mere shallow repositories of pop culture, these sites are accumulating significant evidence that sophisticated informal learning is taking place online in novel and unexpected ways. In order to understand and analyze in more detail how learning might be occurring, we conducted an in-depth nine-month ethnographic investigation of online fanfiction communities, including participant observation and fanfiction author interviews. Our observations led to the development of a theory we term distributed mentoring, which we present in detail in this paper. Distributed mentoring exemplifies one instance of how networked technology affords new extensions of behaviors that were previously bounded by time and space. Distributed mentoring holds potential for application beyond the spontaneous mentoring observed in this investigation and may help students receive diverse, thoughtful feedback in formal learning environments as well.}, + isbn = {978-1-4503-3592-8}, + keywords = {digital youth.,distributed mentoring,fanfiction,informal learning,Mentoring,online communities}, + file = {/home/nathante/Zotero/storage/D9ZM58VV/Campbell et al. - 2016 - Thousands of Positive Reviews Distributed Mentori.pdf} +} + +@article{carroll_concentration_1985, + title = {Concentration and Specialization: {{Dynamics}} of Niche Width in Populations of Organizations}, + shorttitle = {Concentration and {{Specialization}}}, + author = {Carroll, Glenn R.}, + year = {1985}, + month = may, + journal = {American Journal of Sociology}, + volume = {90}, + number = {6}, + pages = {1262--1283}, + issn = {0002-9602}, + abstract = {This paper departs from the common practice of focusing on large, generalist organizations and shows that new organizational insights are obtined by adopting a broader, ecological perspective. The newspaper publishing industry is examined as an illustration. The ecological focus shows that many small, specialized organizations operate successfully in this industry, despite apparently high levels of local concentration. A resource-partitioning model is advanced to explain the interorganizational relationships between generalist and specialist organizations. Statistical tests of the model using historical data on 2,808 American local newspaper organizations show the merit of using the ecological perspective for analyzing industries.}, + file = {/home/nathante/Zotero/storage/G38AK5SZ/Carroll - 1985 - Concentration and specialization Dynamics of nich.pdf;/home/nathante/Zotero/storage/8PG3QCP3/228210.html} +} + +@article{carroll_why_2000, + title = {Why the Microbrewery Movement? {{Organizational}} Dynamics of Resource Partitioning in the {{U}}.{{S}}. Brewing Industry}, + shorttitle = {Why the {{Microbrewery Movement}}?}, + author = {Carroll, Glenn R. and Swaminathan, Anand}, + year = {2000}, + journal = {American Journal of Sociology}, + volume = {106}, + number = {3}, + pages = {715--762}, + issn = {0002-9602}, + abstract = {The number of small specialty brewers in the U.S. beer brewing industry has increased dramatically in recent decades, even as the market for beer became increasingly dominated by mass-production brewing companies. Using the resource-partitioning model of organizational ecology, this article shows that these two apparently contradictory trends are fundamentally interrelated. Hypotheses developed here refine the way scale competition among generalist organizations is modeled and improve the theoretical development of the sociological bases for the appeal of specialist organizations' products, especially those related to organizational identity. Evidence drawn from qualitative and quantitative research provides strong support for the theory. The article offers a brief discussion of the theoretical and substantive issues involved in application of the model to other industries and to other cultures.}, + file = {/home/nathante/Zotero/storage/X2ITSCRL/Carroll and Swaminathan - 2000 - Why the microbrewery movement Organizational dyna.pdf} +} + +@book{castells_rise_1996, + title = {Rise of {{The Network Society}} ({{Information Age Series}})}, + author = {Castells, Manuel}, + year = {1996}, + edition = {First}, + publisher = {{Wiley-Blackwell}}, + isbn = {1-55786-617-1} +} + +@inproceedings{chancellor_norms_2018, + title = {Norms {{Matter}}: {{Contrasting Social Support Around Behavior Change}} in {{Online Weight Loss Communities}}}, + shorttitle = {Norms {{Matter}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Chancellor, Stevie and Hu, Andrea and De Choudhury, Munmun}, + year = {2018}, + month = apr, + series = {{{CHI}} '18}, + pages = {1--14}, + publisher = {{Association for Computing Machinery}}, + address = {{Montreal QC, Canada}}, + abstract = {Online health communities (OHCs) provide support across conditions; for weight loss, OHCs offer support to foster positive behavior change. However, weight loss behaviors can also be subverted on OHCs to promote disordered eating practices. Using comments as proxies for support, we use computational linguistic methods to juxtapose similarities and differences in two Reddit weight loss communities, r/proED and r/loseit. We employ language modeling and find that word use in both communities is largely similar. Then, by building a word embedding model, specifically a deep neural network on comment words, we contrast the context of word use and find differences that imply different behavior change goals in these OHCs. Finally, these content and context norms predict whether a comment comes from r/proED or r/loseit. We show that norms matter in understanding how different OHCs provision support to promote behavior change and discuss the implications for design and moderation of OHCs.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/77YDPVB6/Chancellor et al. - 2018 - Norms Matter Contrasting Social Support Around Be.pdf} +} + +@article{chandrasekharan_internets_2018, + title = {The Internet's Hidden Rules: {{An}} Empirical Study of Reddit Norm Violations at Micro, Meso, and Macro Scales}, + shorttitle = {The {{Internet}}'s {{Hidden Rules}}}, + author = {Chandrasekharan, Eshwar and Samory, Mattia and Jhaver, Shagun and Charvat, Hunter and Bruckman, Amy and Lampe, Cliff and Eisenstein, Jacob and Gilbert, Eric}, + year = {2018}, + journal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + number = {CSCW}, + pages = {32:1--32:25}, + issn = {2573-0142}, + abstract = {Norms are central to how online communities are governed. Yet, norms are also emergent, arise from interaction, and can vary significantly between communities---making them challenging to study at scale. In this paper, we study community norms on Reddit in a large-scale, empirical manner. Via 2.8M comments removed by moderators of 100 top subreddits over 10 months, we use both computational and qualitative methods to identify three types of norms: macro norms that are universal to most parts of Reddit; meso norms that are shared across certain groups of subreddits; and micro norms that are specific to individual, relatively unique subreddits. Given the size of Reddit's user base---and the wide range of topics covered by different subreddits---we argue this represents the first large-scale census of the norms in broader internet culture. In other words, these findings shed light on what Reddit values, and how widely-held those values are. We conclude by discussing implications for the design of new and existing online communities.}, + keywords = {community norms,mixed methods.,moderation,online communities}, + file = {/home/nathante/Zotero/storage/2CA9ZVFB/Chandrasekharan et al. - 2018 - The Internet's Hidden Rules An Empirical Study of.pdf;/home/nathante/Zotero/storage/HUP7XT5H/Chandrasekharan et al_2018_The Internet's Hidden Rules.pdf} +} + +@article{chandrasekharan_quarantined_2020, + title = {Quarantined! {{Examining}} the {{Effects}} of a {{Community}}-{{Wide Moderation Intervention}} on {{Reddit}}}, + author = {Chandrasekharan, Eshwar and Jhaver, Shagun and Bruckman, Amy and Gilbert, Eric}, + year = {2020}, + month = sep, + journal = {arXiv:2009.11483 [cs]}, + eprint = {2009.11483}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Should social media platforms intervene when communities repeatedly break rules? What actions can they consider? In light of this hotly debated issue, platforms have begun experimenting with softer alternatives to outright bans. We examine one such intervention called quarantining, that impedes direct access to and promotion of controversial communities. Specifically, we present two case studies of what happened when Reddit quarantined the influential communities r/TheRedPill (TRP) and r/The\_Donald (TD). Working with over 85M Reddit posts, we apply causal inference methods to examine the quarantine's effects on TRP and TD. We find that the quarantine made it more difficult to recruit new members: new user influx to TRP and TD decreased by 79.5\% and 58\%, respectively. Despite quarantining, existing users' misogyny and racism levels remained unaffected. We conclude by reflecting on the effectiveness of this design friction in limiting the influence of toxic communities and discuss broader implications for content moderation.}, + archiveprefix = {arXiv}, + language = {en}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/CB26SNVJ/Chandrasekharan et al. - 2020 - Quarantined! Examining the Effects of a Community-.pdf} +} + +@article{chandrasekharan_you_2017, + ids = {chandrasekharan_you_2017-1}, + title = {You Can't Stay Here: {{The}} Efficacy of Reddit's 2015 Ban Examined through Hate Speech}, + shorttitle = {You Can't Stay Here}, + author = {Chandrasekharan, Eshwar and Pavalanathan, Umashanthi and Srinivasan, Anirudh and Glynn, Adam and Eisenstein, Jacob and Gilbert, Eric}, + year = {2017}, + month = dec, + journal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + number = {CSCW}, + pages = {31:1--31:22}, + issn = {2573-0142}, + abstract = {In 2015, Reddit closed several subreddits-foremost among them r/fatpeoplehate and r/CoonTown-due to violations of Reddit's anti-harassment policy. However, the effectiveness of banning as a moderation approach remains unclear: banning might diminish hateful behavior, or it may relocate such behavior to different parts of the site. We study the ban of r/fatpeoplehate and r/CoonTown in terms of its effect on both participating users and affected subreddits. Working from over 100M Reddit posts and comments, we generate hate speech lexicons to examine variations in hate speech usage via causal inference methods. We find that the ban worked for Reddit. More accounts than expected discontinued using the site; those that stayed drastically decreased their hate speech usage-by at least 80\%. Though many subreddits saw an influx of r/fatpeoplehate and r/CoonTown "migrants," those subreddits saw no significant changes in hate speech usage. In other words, other subreddits did not inherit the problem. We conclude by reflecting on the apparent success of the ban, discussing implications for online moderation, Reddit and internet communities more broadly.}, + file = {/home/nathante/Zotero/storage/5Z8CCRM2/Chandrasekharan et al. - 2017 - You Can'T Stay Here The Efficacy of Reddit's 2015.pdf} +} + +@inproceedings{chang_specialization_2014, + title = {Specialization, Homophily, and Gender in a Social Curation Site: Findings from Pinterest}, + shorttitle = {Specialization, Homophily, and Gender in a Social Curation Site}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Chang, Shuo and Kumar, Vikas and Gilbert, Eric and Terveen, Loren G.}, + year = {2014}, + month = feb, + series = {{{CSCW}} '14}, + pages = {674--686}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Pinterest is a popular social curation site where people collect, organize, and share pictures of items. We studied a fundamental issue for such sites: what patterns of activity attract attention (audience and content reposting)-- We organized our studies around two key factors: the extent to which users specialize in particular topics, and homophily among users. We also considered the existence of differences between female and male users. We found: (a) women and men differed in the types of content they collected and the degree to which they specialized; male Pinterest users were not particularly interested in stereotypically male topics; (b) sharing diverse types of content increases your following, but only up to a certain point; (c) homophily drives repinning: people repin content from other users who share their interests; homophily also affects following, but to a lesser extent. Our findings suggest strategies both for users (e.g., strategies to attract an audience) and maintainers (e.g., content recommendation methods) of social curation sites.}, + isbn = {978-1-4503-2540-0}, + keywords = {data analysis,social network,topic detection,user profiling}, + file = {/home/nathante/Zotero/storage/RVP6MZ6S/Chang et al. - 2014 - Specialization, homophily, and gender in a social .pdf} +} + +@book{charmaz_constructing_2015, + ids = {charmaz_constructing_2014}, + title = {Constructing Grounded Theory: {{A}} Practical Guide through Qualitative Analysis}, + shorttitle = {Constructing {{Grounded Theory}}}, + author = {Charmaz, Kathy}, + year = {2015}, + edition = {Second}, + publisher = {{SAGE}}, + address = {{Thousand Oaks, California}}, + isbn = {0-7619-7352-4} +} + +@article{chesney_other_2004, + title = {``Other People Benefit. i Benefit from Their Work.'' {{Sharing Guitar Tabs Online}}}, + author = {Chesney, Thomas}, + year = {2004}, + month = nov, + journal = {Journal of Computer-Mediated Communication}, + volume = {10}, + number = {JCMC1012}, + issn = {1083-6101}, + abstract = {This paper reports the results of a study into a public space Internet portal which publishes guitar tabs (tablature) online, to examine what motivates people to participate in this activity and what benefits they get from doing so. A guitar tab is essentially sheet music for guitarists. The study examines why people contribute when it is easier for them not to publish their tabs and simply use the tabs that other people have posted. Answers to this will have implications for businesses wanting to encourage their employees to share their knowledge. An open ended questionnaire was sent to 183 tab publishers with a usable response rate of 39\%, which is considered high for surveys. The questionnaire sought to gather data on motivations, benefits and community interaction. The paper begins with a review of relevant theories of knowledge sharing and publishing, in particular the private-collective model of innovation (von Hippel \& von Krogh, 2003) which is used to analyze the results. Motivations are listed as under two categories, self and altruistic, with the most popular motivation being to share the songs with others, which is from the altruistic category. The most common benefit is personal satisfaction. The results show tab publishing fits with the private-collective model of innovation which means that a tab published online can be seen as a public good, as it is available to all, that has significant private elements. These private elements are the benefits that tab publishers get which the people who only use tabs without contributing their own, do not. The implications of the work are as follows. Enjoyment of the domain seems to be an important factor in motivating knowledge sharing. People who feel like they are part of a community and get satisfaction from being part of a community, will be more likely to contribute. The act of sharing knowledge should be as close to effortless as possible to encourage contributions. The act of preparing (collecting, collating etc.) the material to be shared should have meaning in itself for the person who is preparing it. If the act of sharing leads to increased status in the community people will be more likely to contribute. To encourage knowledge sharing, those who make use of the shared knowledge should be encouraged to give positive feedback to the person who shared it. To date, there has been little empirical work examining online posting forums.}, + file = {/home/nathante/Zotero/storage/JWW5X2DI/4614460.html} +} + +@article{ciampaglia_production_2015, + title = {The Production of Information in the Attention Economy}, + author = {Ciampaglia, Giovanni Luca and Flammini, Alessandro and Menczer, Filippo}, + year = {2015}, + month = may, + journal = {Scientific Reports}, + volume = {5}, + pages = {9452}, + issn = {2045-2322}, + file = {/home/nathante/Zotero/storage/Z5SM58N9/srep09452.pdf} +} + +@article{coleman_social_1988, + title = {Social {{Capital}} in the {{Creation}} of {{Human Capital}}}, + author = {Coleman, James S.}, + year = {1988}, + journal = {American Journal of Sociology}, + volume = {94}, + pages = {S95-S120}, + issn = {0002-9602}, + abstract = {In this paper, the concept of social capital is introduced and illustrated, its forms are described, the social structural conditions under which it arises are examined, and it is used in an analysis of dropouts from high school. Use of the concept of social capital is part of a general theoretical strategy discussed in the paper: taking rational action as a starting point but rejecting the extreme individualistic premises that often accompany it. The conception of social capital as a resource for action is one way of introducing social structure into the rational action paradigm. Three forms of social capital are examined: obligations and expectations, information channels, and social norms. The role of closure in the social structure in facilitating the first and third of these forms of social capital is described. An analysis of the effect of the lack of social capital available to high school sophomores on dropping out of school before graduation is carried out. The effect of social capital within the family and in the community outside the family is examined.}, + file = {/home/nathante/Zotero/storage/8B8X2LBV/Coleman - 1988 - Social Capital in the Creation of Human Capital.pdf;/home/nathante/Zotero/storage/83B63Z3Y/Coleman - 1988 - Social Capital in the Creation of Human Capital.html} +} + +@inproceedings{cook_contribution_2009, + title = {Contribution, Commercialization \& Audience: Understanding Participation in an Online Creative Community}, + shorttitle = {Contribution, Commercialization \& Audience}, + booktitle = {Proceedings of the {{ACM}} 2009 International Conference on {{Supporting}} Group Work}, + author = {Cook, Eric and Teasley, Stephanie D. and Ackerman, Mark S.}, + year = {2009}, + month = may, + series = {{{GROUP}} '09}, + pages = {41--50}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {This paper presents a qualitative study of attitudes towards participation and contribution in an online creative community. The setting of the work is an online community of practice focused on the use and development of a user-customizable music software package called Reaktor. Findings from the study highlight four emergent topics in the discourse related to user contributions to the community: contribution assessment, support for learning, perceptions of audience and tensions about commercialization. Our analysis of these topics frames discussion about the value and challenges of attending to amateur and professional users in online creative communities.}, + isbn = {978-1-60558-500-0}, + keywords = {amateurs,audiences,commercialization,community of practice,creativity,learning,online community,professionals,user-generated content} +} + +@article{copland_reddit_2020, + title = {Reddit Quarantined: Can Changing Platform Affordances Reduce Hateful Material Online?}, + shorttitle = {Reddit Quarantined}, + author = {Copland, Simon}, + year = {2020}, + month = oct, + journal = {Internet Policy Review}, + volume = {9}, + number = {4}, + publisher = {{Berlin: Alexander von Humboldt Institute for Internet and Society}}, + issn = {2197-6775}, + abstract = {Can we reduce hateful material online through changing platform affordances? Studying Reddit's quarantine function, this paper argues the results of this approach are mixed.}, + file = {/home/nathante/Zotero/storage/KY4RZWR4/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/SZWA55IE/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/9KXC37K7/225653.html;/home/nathante/Zotero/storage/M6NKY3K2/reddit-quarantined-can-changing-platform-affordances-reduce-hateful-material.html} +} + +@inproceedings{cunha_are_2019, + ids = {cunha_are_2019-1,cunha_are_2019-2}, + title = {Are All Successful Communities Alike? {{Characterizing}} and Predicting the Success of Online Communities}, + shorttitle = {Are All Successful Communities Alike?}, + booktitle = {The {{World Wide Web Conference}}}, + author = {Cunha, Tiago and Jurgens, David and Tan, Chenhao and Romero, Daniel}, + year = {2019}, + month = may, + series = {{{WWW}} '19}, + pages = {318--328}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {The proliferation of online communities has created exciting opportunities to study the mechanisms that explain group success. While a growing body of research investigates community success through a single measure - typically, the number of members - we argue that there are multiple ways of measuring success. Here, we present a systematic study to understand the relations between these success definitions and test how well they can be predicted based on community properties and behaviors from the earliest period of a community's lifetime. We identify four success measures that are desirable for most communities: (i) growth in the number of members; (ii) retention of members; (iii) long term survival of the community; and (iv) volume of activities within the community. Surprisingly, we find that our measures do not exhibit very high correlations, suggesting that they capture different types of success. Additionally, we find that different success measures are predicted by different attributes of online communities, suggesting that success can be achieved through different behaviors. Our work sheds light on the basic understanding on what success represents in online communities and what predicts it. Our results suggest that success is multi-faceted and cannot be measured nor predicted by a single measurement. This insight has practical implications for the creation of new online communities and the design of platforms that facilitate such communities.}, + isbn = {978-1-4503-6674-8}, + keywords = {Group Dynamics,Online Communities,Reddit,Success}, + file = {/home/nathante/Zotero/storage/CGBFCUGX/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/IYW3WKHV/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/PFS6682S/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/SMX88EL3/Cunha et al. - 2019 - Are All Successful Communities Alike Characterizi.pdf} +} + +@inproceedings{dabbish_fresh_2012, + ids = {dabbish_fresh_2012-1}, + title = {Fresh Faces in the Crowd: Turnover, Identity, and Commitment in Online Groups}, + shorttitle = {Fresh Faces in the Crowd}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Dabbish, Laura and Farzan, Rosta and Kraut, Robert and Postmes, Tom}, + year = {2012}, + month = feb, + series = {{{CSCW}} '12}, + pages = {245--248}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Turnover is commonplace in many online groups because of low barriers of entry and exit. In offline settings, turnover can have a negative impact because of reduced attachment to the group as an entity. However, in an online setting, turnover in terms of changes in the visible membership of a group may have a very different impact. Online only a limited amount of information about members and their activities is observable; in particular, it is easier to see the behavior of the subset of members who are active than the potentially larger set who are not. In this paper, we describe an experiment examining the influence of visible membership turnover on commitment to an online group. Our results suggest that increased turnover in an online group may increase social presence, creating perceptions of liveness, in turn leading to increased levels of participation in the group. However, this result holds primarily for groups with a common identity, suggesting that attention to behavior of others may be stronger when people share an identity with those others. Our results extend understandings of attachment in an online setting as well as theory about social tuning.}, + isbn = {978-1-4503-1086-4}, + keywords = {attachment,commitment,identity.,online groups,turnover}, + file = {/home/nathante/Zotero/storage/3IQQP4JM/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf;/home/nathante/Zotero/storage/GEVF3A53/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf} +} + +@inproceedings{danescu-niculescu-mizil_no_2013, + ids = {danescu-niculescu-mizil_no_2013-1}, + title = {No Country for Old Members: User Lifecycle and Linguistic Change in Online Communities}, + shorttitle = {No Country for Old Members}, + booktitle = {Proceedings of the 22nd International Conference on {{World Wide Web}} - {{WWW}} '13}, + author = {{Danescu-Niculescu-Mizil}, Cristian and West, Robert and Jurafsky, Dan and Leskovec, Jure and Potts, Christopher}, + year = {2013}, + pages = {307--318}, + publisher = {{ACM Press}}, + address = {{Rio de Janeiro, Brazil}}, + abstract = {Vibrant online communities are in constant flux. As members join and depart, the interactional norms evolve, stimulating further changes to the membership and its social dynamics. Linguistic change\textemdash in the sense of innovation that becomes accepted as the norm\textemdash is essential to this dynamic process: it both facilitates individual expression and fosters the emergence of a collective identity. We propose a framework for tracking linguistic change as it happens and for understanding how specific users react to these evolving norms. By applying this framework to two large online communities we show that users follow a determined two-stage lifecycle with respect to their susceptibility to linguistic change: a linguistically innovative learning phase in which users adopt the language of the community followed by a conservative phase in which users stop changing and the evolving community norms pass them by.}, + isbn = {978-1-4503-2035-1}, + language = {en}, + file = {/home/nathante/Zotero/storage/L532IPRV/Danescu-Niculescu-Mizil et al. - 2013 - No Country for Old Members User Lifecycle and Lin.pdf;/home/nathante/Zotero/storage/LWECW2QM/Danescu-Niculescu-Mizil et al. - 2013 - No country for old members user lifecycle and lin.pdf} +} + +@article{datta_extracting_2019, + title = {Extracting {{Inter}}-{{Community Conflicts}} in {{Reddit}}}, + author = {Datta, Srayan and Adar, Eytan}, + year = {2019}, + month = jul, + journal = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {13}, + pages = {146--157}, + issn = {2334-0770}, + abstract = {Anti-social behaviors in social media can happen both at user and community levels. While a great deal of attention is on the individual as an `aggressor,' the banning of entire Reddit subcommunities (i.e., subreddits) demonstrates that this is a multi-layer concern. Existing research on inter-community conflict has largely focused on specific subcommunities or ideological opponents. However, antagonistic behaviors may be more pervasive and integrate into the broader network. In this work, we study the landscape of conflicts among subreddits by deriving higher-level (community) behaviors from the way individuals are sanctioned and rewarded. By constructing a conflict network, we characterize different patterns in subreddit-to-subreddit conflicts as well as communities of `co-targeted' subreddits .The dynamics of these interactions also reveals a shift in conflict focus over time.}, + copyright = {Copyright (c) 2019 Association for the Advancement of Artificial Intelligence}, + language = {en}, + file = {/home/nathante/Zotero/storage/6IA9VN8K/Datta_Adar_2019_Extracting Inter-Community Conflicts in Reddit.pdf;/home/nathante/Zotero/storage/F3MHZ7Z6/3217.html} +} + +@article{datta_identifying_2017, + title = {Identifying {{Misaligned Inter}}-{{Group Links}} and {{Communities}}}, + author = {Datta, Srayan and Phelan, Chanda and Adar, Eytan}, + year = {2017}, + month = dec, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {1}, + number = {CSCW}, + pages = {37:1--37:23}, + abstract = {Many social media systems explicitly connect individuals (e.g., Facebook or Twitter); as a result, they are the targets of most research on social networks. However, many systems do not emphasize or support explicit linking between people (e.g., Wikipedia or Reddit), and even fewer explicitly link communities. Instead, network analysis is performed through inference on implicit connections, such as co-authorship or text similarity. Depending on how inference is done and what data drove it, different networks may emerge. While correlated structures often indicate stability, in this work we demonstrate that differences, or misalignment, between inferred networks also capture interesting behavioral patterns. For example, high-text but low-author similarity often reveals communities "at war" with each other over an issue or high-author but low-text similarity can suggest community fragmentation. Because we are able to model edge direction, we also find that asymmetry in degree (in-versus-out) co-occurs with marginalized identities (subreddits related to women, people of color, LGBTQ, etc.). In this work, we provide algorithms that can identify misaligned links, network structures and communities. We then apply these techniques to Reddit to demonstrate how these algorithms can be used to decipher inter-group dynamics in social media.}, + file = {/home/nathante/Zotero/storage/52FT8LT8/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf;/home/nathante/Zotero/storage/WKCJHV6R/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf} +} + +@article{dellaposta_why_2015, + title = {Why {{Do Liberals Drink Lattes}}?}, + author = {DellaPosta, Daniel and Shi, Yongren and Macy, Michael}, + year = {2015}, + month = mar, + journal = {American Journal of Sociology}, + volume = {120}, + number = {5}, + pages = {1473--1511}, + issn = {0002-9602, 1537-5390}, + language = {en}, + file = {/home/nathante/Zotero/storage/LMVF2MJ5/DellaPosta et al_2015_Why Do Liberals Drink Lattes.pdf} +} + +@article{dvir-gvirsman_media_2017, + title = {Media Audience Homophily: {{Partisan}} Websites, Audience Identity and Polarization Processes}, + shorttitle = {Media Audience Homophily}, + author = {{Dvir-Gvirsman}, Shira}, + year = {2017}, + month = jul, + journal = {New Media \& Society}, + volume = {19}, + number = {7}, + pages = {1072--1091}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {The study suggests that media consumers favor certain websites not only due to their content but also due to their audience. A new concept is introduced: ``audience homophily,'' which describes one's preference for partisan media websites catering to a homogeneous, likeminded consumership. This attraction is explained in terms of the need for self-consistency, and I suggest that over time such behavior will polarize political identity through a spiral of reinforcement. Based on both a survey-experiment (N\,=\,300) and a panel study combined with web-tracking technology that recorded online-exposure behavior (N\,=\,397), it was found that individuals with more extreme ideology present higher levels of audience homophily and that, longitudinally, audience homophily is somewhat associated with ideological polarization, intolerance, and accessibility of political self-definition.}, + language = {en}, + keywords = {Homophily,network analysis,partisan media,reinforcing-spiral model,selective exposure}, + file = {/home/nathante/Zotero/storage/WEQEAEJ4/Dvir-Gvirsman - 2017 - Media audience homophily Partisan websites, audie.pdf} +} + +@article{ellison_benefits_2007, + ids = {ellison_benefits_2007-1}, + title = {The {{Benefits}} of {{Facebook}} ``{{Friends}}:'' {{Social Capital}} and {{College Students}}' {{Use}} of {{Online Social Network Sites}}}, + shorttitle = {The {{Benefits}} of {{Facebook}} ``{{Friends}}}, + author = {Ellison, Nicole B. and Steinfield, Charles and Lampe, Cliff}, + year = {2007}, + month = jul, + journal = {Journal of Computer-Mediated Communication}, + volume = {12}, + number = {4}, + pages = {1143--1168}, + publisher = {{Oxford Academic}}, + issn = {1083-6101}, + abstract = {This study examines the relationship between use of Facebook, a popular online social network site, and the formation and maintenance of social capital. In addition to assessing bonding and bridging social capital, we explore a dimension of social capital that assesses one's ability to stay connected with members of a previously inhabited community, which we call maintained social capital. Regression analyses conducted on results from a survey of undergraduate students (N = 286) suggest a strong association between use of Facebook and the three types of social capital, with the strongest relationship being to bridging social capital. In addition, Facebook usage was found to interact with measures of psychological well-being, suggesting that it might provide greater benefits for users experiencing low self-esteem and low life satisfaction.}, + language = {en}, + keywords = {CMC,quantitative,SNS,Social capital,survey}, + file = {/home/nathante/Zotero/storage/C6PUU2LZ/Ellison et al. - 2007 - The Benefits of Facebook “Friends” Social Capital.pdf;/home/nathante/Zotero/storage/I5D8LMF3/Ellison et al. - 2007 - The Benefits of Facebook “Friends” Social Capital.pdf;/home/nathante/Zotero/storage/CFMJSBYE/4582961.html;/home/nathante/Zotero/storage/YZWIMZS9/abstract.html} +} + +@article{faraj_online_2016, + ids = {faraj_special_2016}, + title = {Online Community as Space for Knowledge Flows}, + author = {Faraj, Samer and {von Krogh}, Georg and Monteiro, Eric and Lakhani, Karim R.}, + year = {2016}, + month = dec, + journal = {Information Systems Research}, + volume = {27}, + number = {4}, + pages = {668--684}, + issn = {1047-7047}, + abstract = {Online communities frequently create significant economic and relational value for community participants and beyond. It is widely accepted that the underlying source of such value is the collective flow of knowledge among community participants. We distinguish the conditions for flows of tacit and explicit knowledge in online communities and advance an unconventional theoretical conjecture: Online communities give rise to tacit knowledge flows between participants. The crucial condition for these flows is not the advent of novel, digital technology as often portrayed in the literature, but instead the technology's domestication by humanity and the sociality it affords. This conjecture holds profound implications for theory and research in the study of management and organization, as well as their relation to information technology.}, + file = {/home/nathante/Zotero/storage/4TH94S6Q/Faraj et al. - 2016 - Online Community as Space for Knowledge Flows.pdf;/home/nathante/Zotero/storage/NCY7A6S4/Faraj et al. - 2016 - Special Section Introduction—Online Community as S.pdf} +} + +@inproceedings{fiesler_growing_2017, + ids = {fiesler_growing_2017-1}, + title = {Growing {{Their Own}}: {{Legitimate Peripheral Participation}} for {{Computational Learning}} in an {{Online Fandom Community}}}, + shorttitle = {Growing {{Their Own}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Fiesler, Casey and Morrison, Shannon and Shapiro, R. Benjamin and Bruckman, Amy S.}, + year = {2017}, + month = feb, + series = {{{CSCW}} '17}, + pages = {1375--1386}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Online communities dedicated to the creation of fanworks (e.g., fiction or art inspired by media such as books or television shows) often serve as communities of practice for learning communication, artistic, and technical skills. In studying one successful fan fiction archive that was designed and built entirely by (predominantly women) fans, we observed processes of legitimate peripheral participation (LPP) in which some of these fans began in peripheral roles and came to be more involved in the technical aspects of the archive over time. In addition to outlining positive outcomes, we discuss the challenges of supporting learning within this CoP, particularly with respect to the burden on experts. We discuss potential implications and solutions for the problem of expert scarcity in CoPs, and propose that LPP within fan communities can be leveraged for broadening participation in computing among women.}, + isbn = {978-1-4503-4335-0}, + keywords = {broadening participation in computing,communities of practice,computing education,fandom,fanfiction,learning,legitimate peripheral participation,online communities,open source}, + file = {/home/nathante/Zotero/storage/QUSETR8Z/Fiesler et al. - 2017 - Growing Their Own Legitimate Peripheral Participa.pdf;/home/nathante/Zotero/storage/VRDFMKHZ/Fiesler et al_2017_Growing Their Own.pdf} +} + +@article{fiesler_moving_2020, + ids = {fiesler_moving_2020-1,fiesler_moving_2020-2}, + title = {Moving {{Across Lands}}: {{Online Platform Migration}} in {{Fandom Communities}}}, + shorttitle = {Moving {{Across Lands}}}, + author = {Fiesler, Casey and Dym, Brianna}, + year = {2020}, + month = may, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {4}, + number = {CSCW1}, + pages = {042:1--042:25}, + abstract = {When online platforms rise and fall, sometimes communities fade away, and sometimes they pack their bags and relocate to a new home. To explore the causes and effects of online community migration, we examine transformative fandom, a longstanding, technology-agnostic community surrounding the creation, sharing, and discussion of creative works based on existing media. For over three decades, community members have left and joined many different online spaces, from Usenet to Tumblr to platforms of their own design. Through analysis of 28 in-depth interviews and 1,886 survey responses from fandom participants, we traced these migrations, the reasons behind them, and their impact on the community. Our findings highlight catalysts for migration that provide insights into factors that contribute to success and failure of platforms, including issues surrounding policy, design, and community. Further insights into the disruptive consequences of migrations (such as social fragmentation and lost content) suggest ways that platforms might both support commitment and better support migration when it occurs.}, + file = {/home/nathante/Zotero/storage/ER8P5AJ2/Fiesler_Dym_2020_Moving Across Lands.pdf;/home/nathante/Zotero/storage/JHDILSYU/Fiesler and Dym - 2020 - Moving Across Lands Online Platform Migration in .pdf} +} + +@inproceedings{fiesler_reddit_2018, + title = {Reddit Rules! {{Characterizing}} an Ecosystem of Governance.}, + booktitle = {Proceedings of the {{AAAI International Conference}} on {{Web}} and {{Social Media}}}, + author = {Fiesler, Casey and Jiang, Jialun" Aaron" and McCann, Joshua and Frye, Kyle and Brubaker, Jed R.}, + year = {2018}, + pages = {72--81}, + publisher = {{AAAI}}, + address = {{Stanford, CA}}, + file = {/home/nathante/Zotero/storage/34TYXTGB/Fiesler - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/G9VFI2L7/Fiesler et al. - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/KT7KNG3J/Fiesler et al. - 2018 - Reddit rules! Characterizing an ecosystem of gover.pdf} +} + +@article{figeac_how_2021, + title = {How Behavioral Homophily on Social Media Influences the Perception of Tie-Strengthening within Young Adults' Personal Networks}, + author = {Figeac, Julien and Favre, Guillaume}, + year = {2021}, + month = jun, + journal = {New Media \& Society}, + pages = {14614448211020691}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study examines how social media and information-sharing behavior can influence young adults' perceptions of changes in tie strength within their own personal networks. By focusing on the extended personal networks (27.56 relationships) of young adults, we show that social media leads them to feel closer to their ``friends'' whom they think of as exhibiting online behaviors similar to their own. This behavioral homophily mainly stems from frequent reactions between friends, when they like or comment upon each other's posts. Such homophily is also related to the sharing of political news and entertaining content, which constitute a salient affordance in the ``pervasive awareness'' of social media and lead users to feel closer to those exhibiting similar content-sharing behavior. This similarity reveals how social media platforms help to shape personal networks over time, particularly by influencing user relationships with weak ties who share similar online behavior.}, + language = {en}, + keywords = {Entertaining content,homophily,information-sharing,personal networks,pervasive awareness,political news,social media,weak ties}, + file = {/home/nathante/Zotero/storage/YAKLRLVE/Figeac and Favre - 2021 - How behavioral homophily on social media influence.pdf} +} + +@misc{foote_agent-based_2018, + title = {An {{Agent}}-{{Based Model}} of {{Online Community Joining}}}, + author = {Foote, Jeremy}, + year = {2018}, + month = jul, + address = {{Evanston, IL}}, + collaborator = {TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron} +} + +@article{foote_how_2020, + title = {How Individual Behaviors Drive Inequality in Online Community Sizes: An Agent-Based Simulation}, + shorttitle = {How Individual Behaviors Drive Inequality in Online Community Sizes}, + author = {Foote, Jeremy and TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron}, + year = {2020}, + month = jun, + journal = {arXiv:2006.03119 [cs]}, + eprint = {2006.03119}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Why are online community sizes so extremely unequal? Most answers to this question have pointed to general mathematical processes drawn from physics like cumulative advantage. These explanations provide little insight into specific social dynamics or decisions that individuals make when joining and leaving communities. In addition, explanations in terms of cumulative advantage do not draw from the enormous body of social computing research that studies individual behavior. Our work bridges this divide by testing whether two influential social mechanisms used to explain community joining can also explain the distribution of community sizes. Using agent-based simulations, we evaluate how well individual-level processes of social exposure and decisions based on individual expected benefits reproduce empirical community size data from Reddit. Our simulations contribute to social computing theory by providing evidence that both processes together---but neither alone---generate realistic distributions of community sizes. Our results also illustrate the potential value of agent-based simulation to online community researchers to both evaluate and bridge individual and group-level theories.}, + archiveprefix = {arXiv}, + file = {/home/nathante/Zotero/storage/PMZDH4B2/Foote et al_2020_How individual behaviors drive inequality in online community sizes.pdf;/home/nathante/Zotero/storage/D57HFTGF/2006.html} +} + +@inproceedings{foote_starting_2017, + title = {Starting Online Communities: Motivations and Goals of Wiki Founders}, + shorttitle = {Starting {{Online Communities}}}, + booktitle = {Proceedings of the 2017 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '17)}, + author = {Foote, Jeremy and Gergle, Darren and Shaw, Aaron}, + year = {2017}, + pages = {6376--6380}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Why do people start new online communities? Previous research has studied what helps communities to grow and what motivates contributors, but the reasons that people create new communities in the first place remain unclear. We present the results of a survey of over 300 founders of new communities on the online wiki hosting site Wikia.com. We analyze the motivations and goals of wiki creators, finding that founders have diverse reasons for starting wikis and diverse ways of defining their success. Many founders see their communities as occupying narrow topics, and neither seek nor expect a large group of contributors. We also find that founders with differing goals approach community building differently. We argue that community platform designers can create interfaces that support the diverse goals of founders more effectively.}, + isbn = {978-1-4503-4655-9}, + keywords = {peer production,survey,wikis}, + file = {/home/nathante/Zotero/storage/BWAIBPUK/Foote et al. - 2017 - Starting Online Communities Motivations and Goals.pdf} +} + +@article{frey_clustering_2007, + title = {Clustering by {{Passing Messages Between Data Points}}}, + author = {Frey, Brendan J. and Dueck, Delbert}, + year = {2007}, + month = feb, + journal = {Science}, + volume = {315}, + number = {5814}, + pages = {972--976}, + publisher = {{American Association for the Advancement of Science}}, + issn = {0036-8075, 1095-9203}, + abstract = {Clustering data by identifying a subset of representative examples is important for processing sensory signals and detecting patterns in data. Such ``exemplars'' can be found by randomly choosing an initial subset of data points and then iteratively refining it, but this works well only if that initial choice is close to a good solution. We devised a method called ``affinity propagation,'' which takes as input measures of similarity between pairs of data points. Real-valued messages are exchanged between data points until a high-quality set of exemplars and corresponding clusters gradually emerges. We used affinity propagation to cluster images of faces, detect genes in microarray data, identify representative sentences in this manuscript, and identify cities that are efficiently accessed by airline travel. Affinity propagation found clusters with much lower error than other methods, and it did so in less than one-hundredth the amount of time. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data.}, + chapter = {Report}, + copyright = {American Association for the Advancement of Science}, + language = {en}, + pmid = {17218491}, + file = {/home/nathante/Zotero/storage/PVGJU5KN/Frey_Dueck_2007_Clustering by Passing Messages Between Data Points.pdf;/home/nathante/Zotero/storage/ERM5BMQT/972.html} +} + +@article{frey_emergence_2019, + title = {Emergence of Integrated Institutions in a Large Population of Self-Governing Communities}, + author = {Frey, Seth and Sumner, Robert W.}, + year = {2019}, + month = jul, + journal = {PLOS ONE}, + volume = {14}, + number = {7}, + pages = {e0216335}, + publisher = {{Public Library of Science}}, + issn = {1932-6203}, + abstract = {Most aspects of our lives are governed by large, highly developed institutions that integrate several governance tasks under one authority structure. But theorists differ as to the mechanisms that drive the development of such concentrated governance systems from rudimentary beginnings. Is the emergence of integrated governance schemes a symptom of consolidation of authority by small status groups? Or does integration occur because a complex institution has more potential responses to a complex environment? Here we examine the emergence of complex governance regimes in 5,000 sovereign, resource-constrained, self-governing online communities, ranging in scale from one to thousands of users. Each community begins with no community members and no governance infrastructure. As communities grow, they are subject to selection pressures that keep better managed servers better populated. We identify predictors of community success and test the hypothesis that governance complexity can enhance community fitness. We find that what predicts success depends on size: changes in complexity predict increased success with larger population servers. Specifically, governance rules in a large successful community are more numerous and broader in scope. They also tend to rely more on rules that concentrate power in administrators, and on rules that manage bad behavior and limited server resources. Overall, this work is consistent with theories that formal integrated governance systems emerge to organize collective responses to interdependent resource management problems, especially as factors such as population size exacerbate those problems.}, + language = {en}, + keywords = {Community ecology,Computer software,Forests,Games,Internet,Online encyclopedias,Political theory,Resource management,Social psychology,Video games}, + file = {/home/nathante/Zotero/storage/AXDJPNKE/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/DA5HAVLH/Frey_Sumner_2019_Emergence of integrated institutions in a large population of self-governing.pdf;/home/nathante/Zotero/storage/Q3FI9DBS/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/4B26ZMHH/article.html;/home/nathante/Zotero/storage/4CRK5UUM/article.html;/home/nathante/Zotero/storage/8XFADRSX/article.html} +} + +@article{fulk_connective_1996, + title = {Connective and Communal Public Goods in Interactive Communication Systems}, + author = {Fulk, Janet and Flanagin, Andrew J. and Kalman, Michael E. and Monge, Peter R. and Ryan, Timothy}, + year = {1996}, + journal = {Communication Theory}, + volume = {6}, + number = {1}, + pages = {60--87}, + issn = {1468-2885}, + abstract = {This paper extends theories of public goods to interactive communication systems. Two key public communication goods are identified. Connectivity provides point-to-point communication, and communality links members through commonly held information, such as that often found in databases. These extensions are important, we argue, because communication public goods operate differently from traditional material public goods. These differences have important implications for costs, benefits, and the realization of a critical mass of users that is necessary for realization of the good. We also explore multifunctional goods that combine various features and hybrid goods that link private goods to public ones. We examine the applicability of two key assumptions of public goods theory to interactive communication systems. First, jointness of supply specifies that consumption of a public good does not diminish its availability to others. Second, impossibility of exclusion stipulates that all members of the public have access to the good. We conclude with suggestions for further theoretical development.}, + language = {en}, + keywords = {mantaining public goods}, + file = {/home/nathante/Zotero/storage/ZJVU4TGW/Fulk et al. - 1996 - Connective and communal public goods in interactiv.pdf;/home/nathante/Zotero/storage/8J5CPWLV/4259000.html} +} + +@article{graham_boundary_2019, + title = {Boundary Maintenance and the Origins of Trolling}, + author = {Graham, Elyse}, + year = {2019}, + month = sep, + journal = {New Media \& Society}, + volume = {21}, + number = {9}, + pages = {2029--2047}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This article presents a new social framework for understanding the origins of trolling and its expansion from an obscure practice, limited to a handful of boards on Usenet, to a pervasive component of Internet culture. I argue that trolling originated, in the term of sociologists, as a form of boundary maintenance that served to distinguish communities of self-identified online insiders from others beyond the boundaries of their community and to drive outsiders away from their spaces. This framework can help us to better understand the transformations that trolling has undergone in the decades since its inception, as well as the persistence of misogyny and prejudice throughout the history of the practice.}, + language = {en}, + keywords = {Boundary maintenance,Internet communities,Internet history,online harassment,politics of cyberspace,trolling}, + file = {/home/nathante/Zotero/storage/6IN6XJWV/Graham - 2019 - Boundary maintenance and the origins of trolling.pdf} +} + +@article{granovetter_strength_1973, + title = {The {{Strength}} of {{Weak Ties}}}, + author = {Granovetter, Mark S.}, + year = {1973}, + month = may, + journal = {American Journal of Sociology}, + volume = {78}, + number = {6}, + pages = {1360--1380}, + issn = {0002-9602}, + abstract = {Analysis of social networks is suggested as a tool for linking micro and macro levels of sociological theory. The procedure is illustrated by elaboration of the macro implications of one aspect of small-scale interaction: the strength of dyadic ties. It is argued that the degree of overlap of two individuals' friendship networks varies directly with the strength of their tie to one another. The impact of this principle on diffusion of influence and information, mobility opportunity, and community organization is explored. Stress is laid on the cohesive power of weak ties. Most network models deal, implicitly, with strong ties, thus confining their applicability to small, well-defined groups. Emphasis on weak ties lends itself to discussion of relations between groups and to analysis of segments of social structure not easily defined in terms of primary groups.}, + file = {/home/nathante/Zotero/storage/GM6GICWI/225469.html} +} + +@inproceedings{grevet_combating_2013, + title = {Combating Homophily through Design}, + booktitle = {Proceedings of the 2013 Conference on {{Computer}} Supported Cooperative Work Companion}, + author = {Grevet, Catherine}, + year = {2013}, + month = feb, + series = {{{CSCW}} '13}, + pages = {57--60}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Social networking has allowed us to be in constant contact with friends from many different backgrounds, yet we are unaware of many of our friends' perspectives and opinions. Networks are highly homophilous, meaning that people tend to associate with others similar to them. This leads to homogenous clusters. How should we design social media to facilitate constructive exchanges rather than polarize individuals? In my work, I propose to look at whether users are currently aware of the homophily phenomenon in their online networks and exploring social network designs to break homophily.}, + isbn = {978-1-4503-1332-2}, + keywords = {awareness,homophily,social networks,tie strength}, + file = {/home/nathante/Zotero/storage/XFJCI35Y/Grevet - 2013 - Combating homophily through design.pdf} +} + +@inproceedings{grevet_managing_2014, + title = {Managing Political Differences in Social Media}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Grevet, Catherine and Terveen, Loren G. and Gilbert, Eric}, + year = {2014}, + month = feb, + series = {{{CSCW}} '14}, + pages = {1400--1408}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Most people associate with people like themselves, a process called homophily. Exposure to diversity, however, makes us more informed as individuals and as a society. In this paper, we investigate political disagreements on Facebook to explore the conditions under which diverse opinions can coexist online. Via a mixed methods approach comprising 103 survey responses and 13 interviews with politically engaged American social media users, we found that participants who perceived more differences with their friends engaged less on Facebook than those who perceived more homogeneity. Weak ties were particularly brittle to political disagreements, despite being the ties most likely to offer diversity. Finally, based on our findings we suggest potential design opportunities to bridge across ideological difference: 1) support exposure to weak ties; and 2) make common ground visible while friends converse.}, + isbn = {978-1-4503-2540-0}, + keywords = {facebook,homophily,politics,relationship management,self- censorship,social media,tie strength}, + file = {/home/nathante/Zotero/storage/8VK4PWVX/Grevet et al. - 2014 - Managing political differences in social media.pdf} +} + +@inproceedings{guha_birds_2015, + title = {Do {{Birds}} of a {{Feather Watch Each Other}}? {{Homophily}} and {{Social Surveillance}} in {{Location Based Social Networks}}}, + shorttitle = {Do {{Birds}} of a {{Feather Watch Each Other}}?}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Guha, Shion and Wicker, Stephen B.}, + year = {2015}, + month = feb, + series = {{{CSCW}} '15}, + pages = {1010--1020}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Location sharing applications (LSA) have proliferated in recent years. Current research principally focuses on egocentric privacy issues and design but has historically not explored the impact of surveillance on location sharing behavior. In this paper, we examine homophily in friendship and surveillance networks for 65 foursquare users. Our results indicate that location surveillance networks are strongly homophilous along the lines of race and gender while friendship networks are weakly homophilous on income. Qualitatively, an analysis of comments and interviews provides support for a discourse around location surveillance, which is mainly social, collaborative, positive and participatory. We relate these findings with prior literature on surveillance, self-presentation and homophily and situate this study in existing HCI/CSCW scholarship.}, + isbn = {978-1-4503-2922-4}, + keywords = {foursquare,homophily,privacy,surveillance,visibility,vision}, + file = {/home/nathante/Zotero/storage/4G3RN2C5/Guha and Wicker - 2015 - Do Birds of a Feather Watch Each Other Homophily .pdf} +} + +@article{halfaker_rise_2013, + title = {The Rise and Decline of an Open Collaboration System: How {{Wikipedia}}'s Reaction to Popularity Is Causing Its Decline}, + shorttitle = {The {{Rise}} and {{Decline}} of an {{Open Collaboration System}}}, + author = {Halfaker, Aaron and Geiger, R. Stuart and Morgan, Jonathan T. and Riedl, John}, + year = {2013}, + month = may, + journal = {American Behavioral Scientist}, + volume = {57}, + number = {5}, + pages = {664--688}, + issn = {0002-7642}, + abstract = {Open collaboration systems, such as Wikipedia, need to maintain a pool of volunteer contributors to remain relevant. Wikipedia was created through a tremendous number of contributions by millions of contributors. However, recent research has shown that the number of active contributors in Wikipedia has been declining steadily for years and suggests that a sharp decline in the retention of newcomers is the cause. This article presents data that show how several changes the Wikipedia community made to manage quality and consistency in the face of a massive growth in participation have ironically crippled the very growth they were designed to manage. Specifically, the restrictiveness of the encyclopedia's primary quality control mechanism and the algorithmic tools used to reject contributions are implicated as key causes of decreased newcomer retention. Furthermore, the community's formal mechanisms for norm articulation are shown to have calcified against changes\textemdash especially changes proposed by newer editors.}, + language = {en}, + file = {/home/nathante/Zotero/storage/7B7AFK58/Halfaker et al. - 2013 - The rise and decline of an open collaboration syst.pdf;/home/nathante/Zotero/storage/Y9676KNV/The Rise and Decline of an Open Collaboration Syst.pdf} +} + +@book{hannan_organizational_1989, + title = {Organizational Ecology}, + author = {Hannan, Michael T. and Freeman, John}, + year = {1989}, + edition = {First}, + publisher = {{Harvard University Press}}, + address = {{Cambridge, MA}} +} + +@article{hargittai_whose_2007, + title = {Whose {{Space}}? {{Differences}} among {{Users}} and {{Non}}-{{Users}} of {{Social Network Sites}}}, + shorttitle = {Whose {{Space}}?}, + author = {Hargittai, Eszter}, + year = {2007}, + month = oct, + journal = {Journal of Computer-Mediated Communication}, + volume = {13}, + number = {1}, + pages = {276--297}, + publisher = {{Oxford Academic}}, + abstract = {Are there systematic differences between people who use social network sites and those who stay away, despite a familiarity with them? Based on data from a survey administered to a diverse group of young adults, this article looks at the predictors of SNS usage, with particular focus on Facebook, MySpace, Xanga, and Friendster. Findings suggest that use of such sites is not randomly distributed across a group of highly wired users. A person's gender, race and ethnicity, and parental educational background are all associated with use, but in most cases only when the aggregate concept of social network sites is disaggregated by service. Additionally, people with more experience and autonomy of use are more likely to be users of such sites. Unequal participation based on user background suggests that differential adoption of such services may be contributing to digital inequality.}, + language = {en}, + file = {/home/nathante/Zotero/storage/WVFZWUGF/Hargittai - 2007 - Whose Space Differences among Users and Non-Users.pdf;/home/nathante/Zotero/storage/C5TFC2YY/4583068.html} +} + +@article{helland_diaspora_2007, + title = {Diaspora on the {{Electronic Frontier}}: {{Developing Virtual Connections}} with {{Sacred Homelands}}}, + shorttitle = {Diaspora on the {{Electronic Frontier}}}, + author = {Helland, Christopher}, + year = {2007}, + month = apr, + journal = {Journal of Computer-Mediated Communication}, + volume = {12}, + number = {3}, + pages = {956--976}, + publisher = {{Oxford Academic}}, + abstract = {This study demonstrates how diaspora religious traditions utilized the Internet to develop significant network connections among each other and also to their place of origins. By examining the early Usenet system, I argue that the religious beliefs and practices of diaspora religious traditions were a motivating factor for developing Usenet groups where geographically dispersed individuals could connect with each other in safe, supportive, and religiously tolerant environments. This article explores the new forms of religious practices that began to occur on these sites, focusing on the manner in which Internet technology and the World Wide Web were utilized for activities such as long-distance ritual practice, cyber pilgrimage, and other religiously-motivated undertakings. Through these new online religious activities, diaspora groups have been able to develop significant connections not only among people, but also between people and the sacred homeland itself.}, + language = {en}, + file = {/home/nathante/Zotero/storage/QAMFAZAW/Helland - 2007 - Diaspora on the Electronic Frontier Developing Vi.pdf;/home/nathante/Zotero/storage/WNQX9GUY/4583017.html} +} + +@inproceedings{hemetsberger_sharing_2004, + title = {Sharing and Creating Knowledge in Open-Source Communities: The Case of {{KDE}}}, + booktitle = {Paper for {{Fifth European Conference}} on {{Organizational Knowledge}}, {{Learning}}, and {{Capabilities}}, {{Innsbruck}}}, + author = {Hemetsberger, Andrea and Reinhardt, Christian}, + year = {2004} +} + +@inproceedings{hessel_science_2016, + ids = {hessel_science_2016-1}, + title = {Science, Askscience, and Badscience: On the Coexistence of Highly Related Communities}, + shorttitle = {Science, Askscience, and Badscience}, + booktitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Hessel, Jack and Tan, Chenhao and Lee, Lillian}, + year = {2016}, + month = mar, + eprint = {1612.07487}, + eprinttype = {arxiv}, + pages = {11}, + abstract = {When large social-media platforms allow users to easily formand self-organize into interest groups, highly related communities can arise. For example, the Reddit site hosts not just a group called food, but also HealthyFood, foodhacks,foodporn, and cooking, among others. Are these highly related communities created for similar classes of reasons (e.g.,to focus on a subtopic, to create a place for allegedly more ``high-minded'' discourse, etc.)? How do users allocate attention between such close alternatives when they are available or emerge over time? Are there different types of relations between close alternatives such as sharing many users vs. a new community drawing away members of an older one vs. a splinter group failing to cohere into a viable separate community? We investigate the interactions between highly related communities using data from reddit.com consisting of 975M posts and comments spanning an 8-year period. We identify a set of typical affixes that users adopt to create highly related communities and build a taxonomy of affixes. One interesting finding regarding users' behavior is: after a newer community is created, for several types of highly-related community pairs, users that engage in a newer community tend to be more active in their original community than users that do not explore, even when controlling for previous level of engagement.}, + archiveprefix = {arXiv}, + copyright = {Authors who publish a paper in this conference agree to the following terms: 1. Author(s) agree to transfer their copyrights in their article/paper to the Association for the Advancement of Artificial Intelligence (AAAI), in order to deal with future requests for reprints, translations, anthologies, reproductions, excerpts, and other publications. This grant will include, without limitation, the entire copyright in the article/paper in all countries of the world, including all renewals, extensions, and reversions thereof, whether such rights current exist or hereafter come into effect, and also the exclusive right to create electronic versions of the article/paper, to the extent that such right is not subsumed under copyright. 2. The author(s) warrants that they are the sole author and owner of the copyright in the above article/paper, except for those portions shown to be in quotations; that the article/paper is original throughout; and that the undersigned right to make the grants set forth above is complete and unencumbered. 3. The author(s) agree that if anyone brings any claim or action alleging facts that, if true, constitute a breach of any of the foregoing warranties, the author(s) will hold harmless and indemnify AAAI, their grantees, their licensees, and their distributors against any liability, whether under judgment, decree, or compromise, and any legal fees and expenses arising out of that claim or actions, and the undersigned will cooperate fully in any defense AAAI may make to such claim or action. Moreover, the undersigned agrees to cooperate in any claim or other action seeking to protect or enforce any right the undersigned has granted to AAAI in the article/paper. If any such claim or action fails because of facts that constitute a breach of any of the foregoing warranties, the undersigned agrees to reimburse whomever brings such claim or action for expenses and attorneys' fees incurred therein. 4. Author(s) retain all proprietary rights other than copyright (such as patent rights). 5. Author(s) may make personal reuse of all or portions of the above article/paper in other works of their own authorship. 6. Author(s) may reproduce, or have reproduced, their article/paper for the author's personal use, or for company use provided that AAAI copyright and the source are indicated, and that the copies are not used in a way that implies AAAI endorsement of a product or service of an employer, and that the copies per se are not offered for sale. The foregoing right shall not permit the posting of the article/paper in electronic or digital form on any computer network, except by the author or the author's employer, and then only on the author's or the employer's own web page or ftp site. Such web page or ftp site, in addition to the aforementioned requirements of this Paragraph, must provide an electronic reference or link back to the AAAI electronic server, and shall not post other AAAI copyrighted materials not of the author's or the employer's creation (including tables of contents with links to other papers) without AAAI's written permission. 7. Author(s) may make limited distribution of all or portions of their article/paper prior to publication. 8. In the case of work performed under U.S. Government contract, AAAI grants the U.S. Government royalty-free permission to reproduce all or portions of the above article/paper, and to authorize others to do so, for U.S. Government purposes. 9. In the event the above article/paper is not accepted and published by AAAI, or is withdrawn by the author(s) before acceptance by AAAI, this agreement becomes null and void.}, + language = {en}, + keywords = {Computer Science - Social and Information Networks,Physics - Physics and Society}, + file = {/home/nathante/Zotero/storage/2W6YBUBD/Hessel et al_2016_Science, AskScience, and BadScience.pdf;/home/nathante/Zotero/storage/4FLLXNV9/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/WS6TW26Q/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/3NHVFA3U/1612.html;/home/nathante/Zotero/storage/DXX4CJ7T/14739.html;/home/nathante/Zotero/storage/YSX2WN2J/13106.html} +} + +@incollection{hill_studying_2019, + title = {Studying Populations of Online Communities}, + booktitle = {The {{Oxford Handbook}} of {{Networked Communication}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + editor = {Foucault Welles, Brooke and {Gonz{\'a}lez-Bail{\'o}n}, Sandra}, + year = {2019}, + month = sep, + pages = {173--193}, + publisher = {{Oxford University Press}}, + address = {{Oxford, UK}}, + abstract = {While the large majority of published research on online communities consists of analyses conducted entirely within individual communities, this chapter argues for a population-based approach, in which researchers study groups of similar communities. For example, although there have been thousands of papers published about Wikipedia, a population-based approach might compare all wikis on a particular topic. Using examples from published empirical studies, the chapter describes five key benefits of this approach. First, it argues that population-level research increases the generalizability of findings. Next, it describes four processes and dynamics that are only possible to study using populations: community-level variables, information diffusion processes across communities, ecological dynamics, and multilevel community processes. The chapter concludes with a discussion of a series of limitations and challenges.}, + isbn = {978-0-19-046051-8}, + language = {en}, + file = {/home/nathante/Zotero/storage/39ZWGGYN/Hill and Shaw - 2019 - Studying Populations of Online Communities.pdf;/home/nathante/Zotero/storage/BTB3AQGV/oxfordhb-9780190460518-e-8.html} +} + +@inproceedings{hillman_alksjdflksfd_2014, + title = {'alksjdf;{{Lksfd}}': Tumblr and the Fandom User Experience}, + shorttitle = {'alksjdf;{{Lksfd}}'}, + booktitle = {Proceedings of the 2014 Conference on {{Designing}} Interactive Systems}, + author = {Hillman, Serena and Procyk, Jason and Neustaedter, Carman}, + year = {2014}, + month = jun, + series = {{{DIS}} '14}, + pages = {775--784}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {A growing trend is the participation in online fandom communities through the support of the blogging platform Tumblr. While past research has investigated backchannels-chatter related to live entertainment on micro-blogging sites such as Twitter-there is a lack of research on the behaviours and motivations of Tumblr users. In our study, we investigate why fandom users chose Tumblr over other social networking sites, their motivations behind participating in fandoms, and how they interact within the Tumblr community. Our findings show that users face many user interface challenges when participating in Tumblr fandoms, especially initially; yet, despite this, Tumblr fandom communities thrive with a common sense of social purpose and exclusivity where users feel they can present a more authentic reflection of themselves to those sharing similar experiences and interests. We describe how this suggests design directions for social networking and blogging sites in order to promote communities of users.}, + isbn = {978-1-4503-2902-6}, + keywords = {backchannels,entertainment,fandoms,fanfiction,micro-blogging,social networking,television,Tumblr}, + file = {/home/nathante/Zotero/storage/HZCLCKCG/Hillman et al. - 2014 - 'alksjdf\;Lksfd' tumblr and the fandom user experi.pdf} +} + +@article{himelboim_valence-based_2016, + title = {Valence-Based Homophily on {{Twitter}}: {{Network Analysis}} of {{Emotions}} and {{Political Talk}} in the 2012 {{Presidential Election}}}, + shorttitle = {Valence-Based Homophily on {{Twitter}}}, + author = {Himelboim, Itai and Sweetser, Kaye D and Tinkham, Spencer F and Cameron, Kristen and Danelo, Matthew and West, Kate}, + year = {2016}, + month = aug, + journal = {New Media \& Society}, + volume = {18}, + number = {7}, + pages = {1382--1400}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study integrates network and content analyses to examine valence-based homophily on Twitter or the tendency for individuals to interact with those expressing similar valence. During the 2012 federal election cycle, we collected Twitter conversations about 10 controversial political topics and mapped their network ties. Using network analysis, we discovered clusters\textemdash subgroups of highly self-connected users\textemdash and coded messages in each cluster for their expressed positive-to-negative emotional valence, level of support or opposition, and political leaning. We found that valence-based homophily successfully explained the selection of user interactions on Twitter, in terms of expressed emotional valence in their tweets or support versus criticism to an issue. It also finds conservative voices to be associated with negatively valenced clusters and vice versa. This study expands the theory of homophily beyond its traditional conceptualization and provides a new understanding of political-issue interactions in a social media context.}, + language = {en}, + keywords = {2012 Election,emotional valence,homophily,political talk,social networks,Twitter}, + file = {/home/nathante/Zotero/storage/QUK4ID26/Himelboim et al. - 2016 - Valence-based homophily on Twitter Network Analys.pdf} +} + +@incollection{hollingshead_fostering_2002, + ids = {hollingshead_fostering_2002-1}, + title = {Fostering Intranet Knowledge Sharing: {{An}} Integration of Transactive Memory and Public Goods Approaches}, + shorttitle = {Fostering Intranet Knowledge Sharing}, + booktitle = {Distributed Work}, + author = {Hollingshead, Andrea B. and Fulk, Janet and Monge, Peter}, + year = {2002}, + pages = {335--355}, + publisher = {{Boston Review}}, + address = {{Cambridge, MA, US}}, + abstract = {Intranets--company Web sites designed for internal use--are an important technological innovation in many organizations that can aid in knowledge management, expertise recognition, and communication. This chapter identifies the conditions under which members of work groups are more likely to contribute to the development of intranets and the conditions under which intranets are more likely to result in more efficient and effective knowledge acquisition and dissemination. To that end, two theories developed to examine nontechnological systems are integrated and extended to intranets and computer-based knowledge systems: the theory of transactive memory and the public goods theory of collective action. Transactive memory theory is useful for predicting how organizational members use intranets to acquire, store, and retrieve knowledge. Public goods theory is useful for predicting which, how much, and when members will contribute and retrieve knowledge on intranets. (PsycInfo Database Record (c) 2020 APA, all rights reserved)}, + isbn = {978-0-262-08305-8}, + keywords = {Electronic Communication,Expert Systems,Information Systems,Organizational Effectiveness,Theories,Work Teams,Working Conditions}, + file = {/home/nathante/Zotero/storage/D34UXRQE/Hollingshead et al. - Fostering Intranet Knowledge Sharing An Integrati.pdf;/home/nathante/Zotero/storage/3A3Y658C/2002-17012-014.html} +} + +@misc{hwang_why_2021, + title = {Why Do People Participate in Small Online Communities?}, + author = {Hwang, Sohyeon and Foote, Jeremy D.}, + year = {2021} +} + +@article{johnson_communication_2009, + title = {Communication {{Communities}} or ``{{CyberGhettos}}?'': {{A Path Analysis Model Examining Factors}} That {{Explain Selective Exposure}} to {{Blogs}}}, + shorttitle = {Communication {{Communities}} or ``{{CyberGhettos}}?}, + author = {Johnson, Thomas J. and Bichard, Shannon L. and Zhang, Weiwu}, + year = {2009}, + month = oct, + journal = {Journal of Computer-Mediated Communication}, + volume = {15}, + number = {1}, + pages = {60--82}, + publisher = {{Oxford Academic}}, + abstract = {This study used an online panel of Internet users to examine the degree to which blog users practice selective exposure when seeking political information. The research employed a path analysis model to explore the extent to which exposure to offline and online discussion of political issues, and offline and online media use, as well as political variables and demographic factors, predict an individual's likelihood to engage in selective exposure to blogs. The findings indicate that respondents did practice selective exposure to blogs, predominantly those who are heavy blog users, politically active both online and offline, partisan, and highly educated.}, + language = {en}, + file = {/home/nathante/Zotero/storage/VXJLUSI9/Johnson et al. - 2009 - Communication Communities or “CyberGhettos” A Pa.pdf;/home/nathante/Zotero/storage/R9C73297/4064810.html} +} + +@inproceedings{jones_rscience_2019, + title = {R/Science: {{Challenges}} and {{Opportunities}} in {{Online Science Communication}}}, + shorttitle = {R/Science}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Jones, Ridley and Colusso, Lucas and Reinecke, Katharina and Hsieh, Gary}, + year = {2019}, + month = may, + series = {{{CHI}} '19}, + pages = {1--14}, + publisher = {{Association for Computing Machinery}}, + address = {{Glasgow, Scotland Uk}}, + abstract = {Online discussion websites, such as Reddit's r/science forum, have the potential to foster science communication between researchers and the general public. However, little is known about who participates, what is discussed, and whether such websites are successful in achieving meaningful science discussions. To find out, we conducted a mixed-methods study analyzing 11,859 r/science posts and conducting interviews with 18 community members. Our results show that r/science facilitates rich information exchange and that the comments section provides a unique science communication document that guides engagement with scientific research. However, this community-sourced science communication comes largely from a knowledgeable public. We conclude with design suggestions for a number of critical problems that we uncovered: addressing the problem of topic newsworthiness and balancing broader participation and rigor.}, + isbn = {978-1-4503-5970-2}, + file = {/home/nathante/Zotero/storage/QJKUMC2A/Jones et al. - 2019 - rscience Challenges and Opportunities in Online .pdf} +} + +@article{joyce_predicting_2006, + title = {Predicting {{Continued Participation}} in {{Newsgroups}}}, + author = {Joyce, Elisabeth and Kraut, Robert E.}, + year = {2006}, + month = apr, + journal = {Journal of Computer-Mediated Communication}, + volume = {11}, + number = {3}, + pages = {723--747}, + issn = {1083-6101}, + abstract = {Turnover in online communities is very high, with most people who initially post a message to an online community never contributing again. In this paper, we test whether the responses that newcomers receive to their first posts influence the extent to which they continue to participate. The data come from initial posts made by 2,777 newcomers to six public newsgroups. We coded the content and valence of the initial post and its first response, if it received one, to see if these factors influenced newcomers' likelihood of posting again. Approximately 61\% of newcomers received a reply to their initial post, and those who got a reply were 12\% more likely to post to the community again; their probability of posting again increased from 44\% to 56\%. They were more likely to receive a response if they asked a question or wrote a longer post. Surprisingly, the quality of the response they received\textemdash its emotional tone and whether it answered a newcomer's question\textemdash did not influence the likelihood of the newcomer's posting again.}, + file = {/home/nathante/Zotero/storage/KR2VSCNN/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.pdf;/home/nathante/Zotero/storage/ZVL66I3I/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.pdf;/home/nathante/Zotero/storage/VK44NCYI/4617705.html;/home/nathante/Zotero/storage/YXZPKK8E/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.html} +} + +@inproceedings{kairam_life_2012, + title = {The Life and Death of Online Groups: Predicting Group Growth and Longevity}, + shorttitle = {The Life and Death of Online Groups}, + booktitle = {Proceedings of the Fifth {{ACM}} International Conference on {{Web}} Search and Data Mining}, + author = {Kairam, Sanjay Ram and Wang, Dan J. and Leskovec, Jure}, + year = {2012}, + month = feb, + series = {{{WSDM}} '12}, + pages = {673--682}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {We pose a fundamental question in understanding how to identify and design successful communities: What factors predict whether a community will grow and survive in the long term? Social scientists have addressed this question extensively by analyzing offline groups which endeavor to attract new members, such as social movements, finding that new individuals are influenced strongly by their ties to members of the group. As a result, prior work on the growth of communities has treated growth primarily as a diffusion processes, leading to findings about group evolution which can be difficult to explain. The proliferation of online social networks and communities, however, has created new opportunities to study, at a large scale and with very fine resolution, the mechanisms which lead to the formation, growth, and demise of online groups. In this paper, we analyze data from several thousand online social networks built on the Ning platform with the goal of understanding the factors contributing to the growth and longevity of groups within these networks. Specifically, we investigate the role that two types of growth (growth through diffusion and growth by other means) play during a group's formative stages from the perspectives of both the individual member and the group. Applying these insights to a population of groups of different ages and sizes, we build a model to classify groups which will grow rapidly over the short-term and long-term. Our model achieves over 79\% accuracy in predicting group growth over the following two months and over 78\% accuracy in predictions over the following two years. We utilize a similar approach to predict which groups will die within a year. The results of our combined analysis provide insight into how both early non-diffusion growth and a complex set of network constraints appear to contribute to the initial and continued growth and success of groups within social networks. Finally we discuss implications of this work for the design, maintenance, and analysis of online communities.}, + isbn = {978-1-4503-0747-5}, + keywords = {group formation,information diffusion,online communities,social networks}, + file = {/home/nathante/Zotero/storage/NS675EXH/Kairam et al_The Life and Death of Online Groups.pdf;/home/nathante/Zotero/storage/QZR8T2QH/Kairam et al_2012_The life and death of online groups.pdf} +} + +@incollection{karumur_content_2018, + title = {Content Is {{King}}, {{Leadership Lags}}: {{Effects}} of {{Prior Experience}} on {{Newcomer Retention}} and {{Productivity}} in {{Online Production Groups}}}, + shorttitle = {Content Is {{King}}, {{Leadership Lags}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Karumur, Raghav Pavan and Yu, Bowen and Zhu, Haiyi and Konstan, Joseph A.}, + year = {2018}, + month = apr, + pages = {1--13}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Organizers of online groups often struggle to recruit members who can most effectively carry out the group's activities and remain part of the group over time. In a study of a sample of 30,000 new editors belonging to 1,054 English WikiProjects, we empirically examine the effects of generalized prior work-productivity experience (measured by overall prior article edits), prior leadership experience (measured by overall prior project edits), and localized prior work-productivity experience (measured by pre-joining article edits on a project) on early retention and productivity. We find that (1)generalized prior work-productivity experience is positively associated with retention, but negatively associated with productivity (2) prior leadership experience is negatively associated with both retention and productivity, and (3) localized prior work-productivity experience is positively associated with both retention and productivity within that focal project. We then discuss implications to inform the designs of early interventions aimed at group success.}, + isbn = {978-1-4503-5620-6}, + keywords = {learning transfer,newcomers,online communities,online groups,peer production,prior experience,productivity,resocialization,retention,subgroups,wikipedia,wikiprojects,withdrawal}, + file = {/home/nathante/Zotero/storage/YANJLZCB/Karumur et al. - 2018 - Content is King, Leadership Lags Effects of Prior.pdf} +} + +@article{kavanaugh_community_2005, + title = {Community {{Networks}}: {{Where Offline Communities Meet Online}}}, + shorttitle = {Community {{Networks}}}, + author = {Kavanaugh, Andrea and Carroll, John M. and Rosson, Mary Beth and Zin, Than Than and Reese, Debbie Denise}, + year = {2005}, + month = jul, + journal = {Journal of Computer-Mediated Communication}, + volume = {10}, + number = {JCMC10417}, + issn = {1083-6101}, + abstract = {This study explores the design and practice of the Blacksburg Electronic Village (BEV), a mature networked community. We describe findings from longitudinal survey data on the use and social impact of community computer networking. The survey data show that increased involvement with people, issues and community since going online is explained by education, extroversion and age. Using path models, we show that a person's sense of belonging and collective efficacy, group memberships, activism and social use of the Internet act as mediating variables. These findings extend evidence in support of the argument that Internet use can strengthen social contact, community engagement and attachment. Conversely, it underlines concern about the impact of computer networking on people with lower levels of education, extroversion, efficacy, and community belonging. We suggest design strategies and innovative tools for non-experts that might increase social interaction and improve usability for disadvantaged and underrepresented individuals and groups.}, + file = {/home/nathante/Zotero/storage/IWBLRSS4/4614510.html} +} + +@inproceedings{kiene_surviving_2016, + title = {Surviving an ``{{Eternal September}}'': {{How}} an Online Community Managed a Surge of Newcomers}, + shorttitle = {Surviving an "{{Eternal September}}"}, + booktitle = {Proceedings of the 2016 {{ACM Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '16)}, + author = {Kiene, Charles and {Monroy-Hern{\'a}ndez}, Andr{\'e}s and Hill, Benjamin Mako}, + year = {2016}, + pages = {1152--1156}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {We present a qualitative analysis of interviews with participants in the NoSleep community within Reddit where millions of fans and writers of horror fiction congregate. We explore how the community handled a massive, sudden, and sustained increase in new members. Although existing theory and stories like Usenet's infamous "Eternal September" suggest that large influxes of newcomers can hurt online communities, our interviews suggest that NoSleep survived without major incident. We propose that three features of NoSleep allowed it to manage the rapid influx of newcomers gracefully: (1) an active and well-coordinated group of administrators, (2) a shared sense of community which facilitated community moderation, and (3) technological systems that mitigated norm violations. We also point to several important trade-offs and limitations.}, + isbn = {978-1-4503-3362-7}, + keywords = {newcomers,norms and governance,online communities,peer production,qualitative methods}, + file = {/home/nathante/Zotero/storage/2YPT6BUL/Kiene et al. - 2016 - Surviving an Eternal September How an Online Co.pdf;/home/nathante/Zotero/storage/S9JX8XE5/Kiene et al. - 2016 - Surviving an “Eternal September” How an online co.pdf} +} + +@article{kiene_technological_2019, + title = {Technological Frames and User Innovation: Exploring Technological Change in Community Moderation Teams}, + shorttitle = {Technological Frames and User Innovation}, + author = {Kiene, Charles and Jiang, Jialun "Aaron" and Hill, Benjamin Mako}, + year = {2019}, + month = nov, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {3}, + number = {CSCW}, + pages = {44:1--44:23}, + abstract = {Management of technological change in organizations is one of the most enduring topics in the literature on computer-supported cooperative work. The successful navigation of technological change is both more challenging and more critical in online communities that are entirely mediated by technology than it is in traditional organizations. This paper presents an analysis of 14 in-depth interviews with moderators of subcommunities of one technological platform (Reddit) that added communities on a new technological platform (Discord). Moderation teams experienced several problems related to moderating content at scale as well as a disconnect between the affordances of Discord and their assumptions based on their experiences on Reddit. We found that moderation teams used Discord's API to create scripts and bots that augmented Discord to make the platform work more like tools on Reddit. These tools were particularly important in communities struggling with scale. Our findings suggest that increasingly widespread end user programming allow users of social computing systems to innovate and deploy solutions to unanticipated design problems by transforming new technological platforms to align with their past expectations.}, + keywords = {API,bots,chat,computer-mediated communication,discord,moderation,online communities,reddit,social computing,technological change}, + file = {/home/nathante/Zotero/storage/E2PDCY58/Kiene et al. - 2019 - Technological frames and user innovation explorin.pdf;/home/nathante/Zotero/storage/U7M6IZY4/Kiene et al. - 2019 - Technological Frames and User Innovation Explorin.pdf} +} + +@article{klein_quality_2017, + title = {Quality Standards, Service Orientation, and Power in {{Airbnb}} and {{Couchsurfing}}}, + author = {Klein, Maximilian and Zhao, Jinhao and Ni, Jiajun and Johnson, Isaac and Hill, Benjamin Mako and Zhu, Haiyi}, + year = {2017}, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {1}, + number = {CSCW}, + pages = {58:1--58:21}, + issn = {2573-0142}, + abstract = {Although Couchsurfing and Airbnb are both online communities that help users host strangers in their homes, they differ in an important sense: Couchsurfing prohibits monetary payment while Airbnb is built around it.We conducted interviews with users experienced on both Couchsurfing and Airbnb ("dual-users") to better understand systemic differences between the platforms. Based on these interviews we propose that, compared to Couchsurfing, Airbnb: (1) appears to require higher quality services, (2) places more emphasis on places over people, and (3) shifts social power from hosts to guests. Using public profiles from both platforms, we present analyses exploring each theme. Finally, we present evidence showing that Airbnb's growth has coincided with a decline in Couchsurfing. Taken together, our findings paint a complex picture of the changing character of network hospitality.}, + file = {/home/nathante/Zotero/storage/WQS43NPP/Klein et al. - 2017 - Quality Standards, Service Orientation, and Power .pdf} +} + +@article{kou_understanding_2018, + title = {Understanding {{Social Roles}} in an {{Online Community}} of {{Volatile Practice}}: {{A Study}} of {{User Experience Practitioners}} on {{Reddit}}}, + shorttitle = {Understanding {{Social Roles}} in an {{Online Community}} of {{Volatile Practice}}}, + author = {Kou, Yubo and Gray, Colin M. and Toombs, Austin L. and Adams, Robin S.}, + year = {2018}, + month = dec, + journal = {ACM Transactions on Social Computing}, + volume = {1}, + number = {4}, + pages = {17:1--17:22}, + issn = {2469-7818}, + abstract = {Community of practice (CoP) is a primary framework in social computing research that addresses learning and organizing specific practices in online communities. However, the classic CoP theory does not provide a detailed account for how practices change or evolve. Against the backdrop of a rapidly changing occupational landscape, it is crucial to understand how people participate in online communities focused on practices that have a volatile nature, as well as how social computing tools can best support them. In this article, we examine user experience (UX) design as a volatile practice that has no coherent body of knowledge and lacks a concrete path for newcomers to become a UX professional. Our study site is the ``/r/userexperience'' subreddit, an online UX community where practitioners socialize and learn. Using a mixed-methods approach, we identified five distinct social roles in relation to knowledge production and dissemination in the online community of volatile practice. We demonstrate that knowledge production is highly distributed, involving the participation and sensemaking of community members of varied levels of experience. We discuss how online platforms support online community of volatile practice and how our findings contribute to the CoP literature.}, + file = {/home/nathante/Zotero/storage/NWK464BS/Kou et al. - 2018 - Understanding Social Roles in an Online Community .pdf} +} + +@book{kraut_building_2012, + ids = {kraut2012building,kraut_building_2012-1}, + title = {Building Successful Online Communities: {{Evidence}}-Based Social Design}, + author = {Kraut, Robert E. and Resnick, Paul and Kiesler, Sara}, + year = {2012}, + publisher = {{MIT Press}}, + address = {{Cambridge, MA}}, + abstract = {Uses insights from social science, psychology, and economics to offer advice on planning and managing an online community.}, + isbn = {978-0-262-29831-5}, + language = {English}, + keywords = {design,foundations of social computing}, + file = {/home/nathante/Zotero/storage/B4XSKAVW/04-kraut10-Newcomers-current.pdf;/home/nathante/Zotero/storage/CX4KDC3G/01-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/IJCEWA6L/06-Resnick10-Startup-current.pdf;/home/nathante/Zotero/storage/JEWAVXHG/02-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/RIM4D9KS/05-kiesler10-Regulation-current.pdf;/home/nathante/Zotero/storage/S6Z28BBS/03-Ren10-Commitment-current.pdf} +} + +@inproceedings{kumar_community_2018, + ids = {kumar_community_2018-1}, + title = {Community {{Interaction}} and {{Conflict}} on the {{Web}}}, + booktitle = {Proceedings of the 2018 {{World Wide Web Conference}}}, + author = {Kumar, Srijan and Hamilton, William L. and Leskovec, Jure and Jurafsky, Dan}, + year = {2018}, + month = apr, + series = {{{WWW}} '18}, + pages = {933--943}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + address = {{Lyon, France}}, + abstract = {Users organize themselves into communities on web platforms. These communities can interact with one another, often leading to conflicts and toxic interactions. However, little is known about the mechanisms of interactions between communities and how they impact users. Here we study intercommunity interactions across 36,000 communities on Reddit, examining cases where users of one community are mobilized by negative sentiment to comment in another community. We show that such conflicts tend to be initiated by a handful of communities---less than 1\% of communities start 74\% of conflicts. While conflicts tend to be initiated by highly active community members, they are carried out by significantly less active members. We find that conflicts are marked by formation of echo chambers, where users primarily talk to other users from their own community. In the long-term, conflicts have adverse effects and reduce the overall activity of users in the targeted communities. Our analysis of user interactions also suggests strategies for mitigating the negative impact of conflicts---such as increasing direct engagement between attackers and defenders. Further, we accurately predict whether a conflict will occur by creating a novel LSTM model that combines graph embeddings, user, community, and text features. This model can be used to create an early-warning system for community moderators to prevent conflicts. Altogether, this work presents a data-driven view of community interactions and conflict, and paves the way towards healthier online communities.}, + isbn = {978-1-4503-5639-8}, + keywords = {antisocial behavior,community,conflict,interaction,intercommunity,society,web}, + file = {/home/nathante/Zotero/storage/3R7J48EQ/Kumar et al_2018_Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/FPJ44933/Kumar et al. - 2018 - Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/U6GYGZDS/Kumar_et_al-2018-Community_interaction_conflict-WWW.pdf} +} + +@article{lakhani_how_2003, + title = {How Open Source Software Works: "{{Free}}" User-to-User Assistance}, + shorttitle = {How Open Source Software Works}, + author = {Lakhani, Karim R. and {von Hippel}, Eric}, + year = {2003}, + journal = {Research Policy}, + volume = {32}, + number = {6}, + pages = {923--943}, + abstract = {Research into free and open source software development projects has so far largely focused on how the major tasks of software development are organized and motivated. But a complete project requires the execution of "mundane but necessary" tasks as well. In this paper, we explore how the mundane but necessary task of field support is organized in the case of Apache web server software, and why some project participants are motivated to provide this service gratis to others. We find that the Apache field support system functions effectively. We also find that, when we partition the help system into its component tasks, 98\% of the effort expended by information providers in fact returns direct learning benefits to those providers. This finding considerably reduces the puzzle of why information providers are willing to perform this task "for free." Implications are discussed.}, + keywords = {Econometrics,FOSS,Innovation}, + file = {/home/nathante/Zotero/storage/TZST9JHU/Lakhani and von Hippel - 2003 - How open source software works.pdf} +} + +@inproceedings{lampe_motivations_2010, + title = {Motivations to Participate in Online Communities}, + booktitle = {Proceedings of the 28th International Conference on {{Human}} Factors in Computing Systems}, + author = {Lampe, Cliff and Wash, Rick and Velasquez, Alcides and Ozkaya, Elif}, + year = {2010}, + pages = {1927--1936}, + publisher = {{ACM}}, + address = {{Atlanta, Georgia, USA}}, + abstract = {A consistent theoretical and practical challenge in the design of socio-technical systems is that of motivating users to participate in and contribute to them. This study examines the case of Everything2.com users from the theoretical perspectives of Uses and Gratifications and Organizational Commitment to compare individual versus organizational motivations in user participation. We find evidence that users may continue to participate in a site for different reasons than those that led them to the site. Feelings of belonging to a site are important for both anonymous and registered users across different types of uses. Long-term users felt more dissatisfied with the site than anonymous users. Social and cognitive factors seem to be more important than issues of usability in predicting contribution to the site.}, + isbn = {978-1-60558-929-9}, + file = {/home/nathante/Zotero/storage/7NIQDKFR/Lampe et al. - 2010 - Motivations to participate in online communities.pdf} +} + +@inproceedings{lampe_slashdot_2004, + title = {Slash(Dot) and Burn: Distributed Moderation in a Large Online Conversation Space}, + shorttitle = {Slash(Dot) and Burn}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Lampe, Cliff and Resnick, Paul}, + year = {2004}, + series = {{{CHI}} '04}, + pages = {543--550}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Can a system of distributed moderation quickly and consistently separate high and low quality comments in an online conversation? Analysis of the site Slashdot.org suggests that the answer is a qualified yes, but that important challenges remain for designers of such systems. Thousands of users act as moderators. Final scores for comments are reasonably dispersed and the community generally agrees that moderations are fair. On the other hand, much of a conversation can pass before the best and worst comments are identified. Of those moderations that were judged unfair, only about half were subsequently counterbalanced by a moderation in the other direction. And comments with low scores, not at top-level, or posted late in a conversation were more likely to be overlooked by moderators.}, + isbn = {978-1-58113-702-6}, + keywords = {collaborative filtering,computer-mediated communication,recommender systems}, + file = {/home/nathante/Zotero/storage/J4ALSW7H/Lampe and Resnick - 2004 - Slash(dot) and burn distributed moderation in a l.pdf} +} + +@incollection{lazarsfeld_friendship_1954, + title = {Friendship as a Social Process: A Substantive and Methodological Analysis}, + booktitle = {Freedom and Control in Modern Society}, + author = {Lazarsfeld, Paul F. and Merton, Robert K.}, + editor = {Berger, Morroe and Abel, Theodore and Page, Charles H.}, + year = {1954}, + pages = {18--66}, + publisher = {{Van Nostrand}}, + address = {{New York}}, + abstract = {Page} +} + +@article{lazer_studying_2020, + title = {Studying Human Attention on the {{Internet}}}, + author = {Lazer, David}, + year = {2020}, + month = jan, + journal = {Proceedings of the National Academy of Sciences}, + volume = {117}, + number = {1}, + pages = {21--22}, + issn = {0027-8424, 1091-6490}, + language = {en}, + file = {/home/nathante/Zotero/storage/T8C43YAK/Lazer - 2020 - Studying human attention on the Internet.pdf} +} + +@inproceedings{leavitt_role_2017, + title = {The Role of Information Visibility in Network Gatekeeping: {{Information}} Aggregation on Reddit during Crisis Events}, + shorttitle = {The Role of Information Visibility in Network Gatekeeping}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Leavitt, Alex and Robinson, John J.}, + year = {2017}, + month = feb, + series = {{{CSCW}} '17}, + pages = {1246--1261}, + publisher = {{Association for Computing Machinery}}, + address = {{Portland, Oregon, USA}}, + abstract = {As social media platforms witness more and more contributions from participants during developing crisis events, some platforms provide affordances that support visibility for specific pieces of information. However, the design of information visibility, especially in the context of controlling information flows (through gatekeeping), may shape how participants collect and share up-to-date information in these systems. This paper looks at the field site of reddit.com through trace ethnography methods to understand how the design of reddit's platform (from algorithms to user roles) impacts the visibility of information and subsequently how participants aggregate information in response to ongoing events. Through trace ethnographic analysis, we illustrate three themes related to tensions around visibility - behavioral, structural, and relational - and show how visibility shapes the work of producing information about crises in social news sites.}, + isbn = {978-1-4503-4335-0}, + file = {/home/nathante/Zotero/storage/6PIBDNTW/Leavitt and Robinson - 2017 - The Role of Information Visibility in Network Gate.pdf} +} + +@inproceedings{leavitt_this_2015, + title = {"{{This}} Is a Throwaway Account": {{Temporary}} Technical Identities and Perceptions of Anonymity in a Massive Online Community}, + shorttitle = {"{{This}} Is a Throwaway Account"}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Leavitt, Alex}, + year = {2015}, + month = feb, + series = {{{CSCW}} '15}, + pages = {317--327}, + publisher = {{Association for Computing Machinery}}, + address = {{Vancouver, BC, Canada}}, + abstract = {This paper explores temporary identities on social media platforms and individuals' uses of these identities with respect to their perceptions of anonymity. Given the research on multiple profile maintenance, little research has examined the role that some social media platforms play in affording users with temporary identities. Further, most of the research on anonymity stops short of the concept of varying perceptions of anonymity. This paper builds on these research areas by describing the phenomenon of temporary "throwaway accounts" and their uses on reddit.com, a popular social news site. In addition to ethnographic trace analysis to examine the contexts in which throwaway accounts are adopted, this paper presents a predictive model that suggests that perceptions of anonymity significantly shape the potential uses of throwaway accounts and that women are much more likely to adopt temporary identities than men.}, + isbn = {978-1-4503-2922-4}, + file = {/home/nathante/Zotero/storage/7ITF227V/Leavitt - 2015 - This is a Throwaway Account Temporary Technical.pdf} +} + +@article{leavitt_upvote_2017, + title = {Upvote My News: {{The}} Practices of Peer Information Aggregation for Breaking News on Reddit.Com}, + shorttitle = {Upvote My News}, + author = {Leavitt, Alex and Robinson, John J.}, + year = {2017}, + month = dec, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {1}, + number = {CSCW}, + pages = {65:1--65:18}, + abstract = {Citizen participation in crisis communication increasingly occurs in social media contexts. As some platforms -- e.g., social news sites -- evolve around collaborative voting, filtering, and information sharing, the aggregation of breaking news information during crisis situations appears more often as an emergent practice in these online communities. Drawing from 53 interviews and descriptive quantitative analysis of reddit posts and comments, this paper presents a qualitative case study examining reddit.com members aggregate information during crisis events within the context of reddit's post/comment structure, crowd voting, and ranking algorithms. Using the lens of network gatekeeping, the paper shows how participants evaluate sources, organize information, and verify details to demonstrate how different affordances and limitations of information production allow or restrict particular types of network gatekeeping.}, + file = {/home/nathante/Zotero/storage/TW846G2K/Leavitt and Robinson - 2017 - Upvote My News The Practices of Peer Information .pdf} +} + +@article{leimeister_evaluation_2005, + title = {Evaluation of a {{Systematic Design}} for a {{Virtual Patient Community}}}, + author = {Leimeister, Jan Marco and Krcmar, Helmut}, + year = {2005}, + month = jul, + journal = {Journal of Computer-Mediated Communication}, + volume = {10}, + number = {JCMC1041}, + issn = {1083-6101}, + abstract = {Virtual Communities (VCs) offer ubiquitous access to information and exchange possibilities for people in similar situations, which is especially valuable for patients with chronic / life-threatening diseases. However, it is seldom considered possible to create VCs systematically. This article describes the evaluation of the design elements and factors that contributed to the success of the VC krebsgemeinschaft.de (a VC for cancer patients in the German-speaking internet), by assessing user acceptance and usage. Additionally, the existence of trust (a constituent element of working VCs) in krebsgemeinschaft.de is addressed. Based on these criteria, we empirically verify the chosen design components and generate insights into the systematic development and operation of VCs in general and VCs for patients in the German healthcare system in particular.}, + file = {/home/nathante/Zotero/storage/BI7E4R6W/Leimeister and Krcmar - 2005 - Evaluation of a Systematic Design for a Virtual Pa.pdf;/home/nathante/Zotero/storage/G39U4C3F/4614530.html} +} + +@inproceedings{liang_knowledge_2017, + ids = {liang_knowledge_2017-1}, + title = {Knowledge Sharing in Online Discussion Threads: What Predicts the Ratings?}, + shorttitle = {Knowledge Sharing in Online Discussion Threads}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Liang, Yuyang}, + year = {2017}, + month = feb, + series = {{{CSCW}} '17}, + pages = {146--154}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {As an important category of user-generated content (UGC) community, Question and Answer (Q\&A) community offers internet users opportunities to ask questions and share knowledge with others. In order to understand how the ratings of knowledge contribution quality correlate with the way knowledge is being shared in discussion threads, the study examines user behaviors and profiles in a large knowledge sharing community, /r/Techsupport, a discussion based Q\&A site in Reddit.com concerning internet and technology problems. Negative binomial regressions and negative binomial mixed models are built to investigate the relationships among thread structure, level of user activity, user profiles and the ratings of threads and comments in the community. Results indicate that in the better rated threads, the structures tend to be more centralized with heterogeneous participants discussing the problem at a deeper level. Meanwhile, contributions with good ratings are more likely to be produced by users who are more engaged in commenting behaviors.}, + isbn = {978-1-4503-4335-0}, + keywords = {knowledge sharing,network structure,online community,threaded discussion,user generated content,user profile}, + file = {/home/nathante/Zotero/storage/852P8MGY/Liang - 2017 - Knowledge Sharing in Online Discussion Threads Wh.pdf} +} + +@inproceedings{lin_better_2017, + title = {Better When It Was Smaller? {{Community}} Content and Behavior after Massive Growth.}, + shorttitle = {Better {{When It Was Smaller}}?}, + booktitle = {{{ICWSM}}}, + author = {Lin, Zhiyuan and Salehi, Niloufar and Yao, Bowen and Chen, Yiqi and Bernstein, Michael S.}, + year = {2017}, + pages = {132--141}, + publisher = {{AAAI}}, + address = {{Montreal, Canada}}, + abstract = {Online communities have a love-hate relationship with membership growth: new members bring fresh perspectives, but old-timers worry that growth interrupts the community's social dynamic and lowers content quality. To arbitrate these two theories, we analyze over 45 million comments from 10 Reddit subcommunities following an exogenous shock when each subcommunity was added to the default set for all Reddit users. Capitalizing on these natural experiments, we test for changes to the content vote patterns, linguistic patterns, and community network patterns before and after being defaulted. Results support a narrative that the communities remain high-quality and similar to their previous selves even post-growth. There is a temporary dip in upvote scores right after the communities were defaulted, but the communities quickly recover to pre-default or even higher levels. Likewise, complaints about low-quality posts do not rise in frequency after getting defaulted. Strong moderation also helps keep upvotes common and complaint levels low. Communities' language use does not become more like the rest of Reddit after getting defaulted. However, growth does have some impact on attention: community members cluster their activity around a smaller proportion of posts after the community is defaulted.}, + file = {/home/nathante/Zotero/storage/3NB3IZUR/Lin et al. - 2017 - Better When It Was Smaller Community Content and .pdf} +} + +@inproceedings{litt_just_2016, + title = {"{{Just Cast}} the {{Net}}, and {{Hopefully}} the {{Right Fish Swim}} into {{It}}": {{Audience Management}} on {{Social Network Sites}}}, + shorttitle = {\&\#x201c;{{Just Cast}} the {{Net}}, and {{Hopefully}} the {{Right Fish Swim}} into {{It}}\&\#x201d;}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer}}-{{Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Litt, Eden and Hargittai, Eszter}, + year = {2016}, + month = feb, + series = {{{CSCW}} '16}, + pages = {1488--1500}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {When users post on social network sites, they can engage in audience-reaching strategies, in an effort to reach desired audience members, as well as audience-limiting strategies, in an effort to avoid unwanted audience members. While much research has focused on users' audience-limiting strategies, little research has explicitly focused on users' audience-reaching strategies. Additionally, little work has explored either strategy at the post level. Using mixed methods involving a diary study and follow-up interviews focused on a diverse group of users' posts, this article reveals several audience-reaching strategies users engaged from altering their content to tagging. However, users in this study rarely used strategies to exclude people proactively and technologically outside of their targeted audiences, and instead broadcasted widely. Participants described several rationales for sharing broadly from skill-related issues to a reliance on the audience or site to filter the content.}, + isbn = {978-1-4503-3592-8}, + keywords = {Audience,audience management,audience-reaching strategies,imagined audience,privacy,social network sites}, + file = {/home/nathante/Zotero/storage/UKKUVHK2/Litt_Hargittai_2016_“\;Just Cast the Net, and Hopefully the Right Fish Swim into It”\;.pdf} +} + +@inproceedings{lu_investigate_2019, + title = {Investigate {{Transitions}} into {{Drug Addiction}} through {{Text Mining}} of {{Reddit Data}}}, + booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}}, + author = {Lu, John and Sridhar, Sumati and Pandey, Ritika and Hasan, Mohammad Al and Mohler, Georege}, + year = {2019}, + month = jul, + series = {{{KDD}} '19}, + pages = {2367--2375}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Increasing rates of opioid drug abuse and heightened prevalence of online support communities underscore the necessity of employing data mining techniques to better understand drug addiction using these rapidly developing online resources. In this work, we obtained data from Reddit, an online collection of forums, to gather insight into drug use/misuse using text snippets from users narratives. Specifically, using users' posts, we trained a binary classifier which predicts a user's transitions from casual drug discussion forums to drug recovery forums. We also proposed a Cox regression model that outputs likelihoods of such transitions. In doing so, we found that utterances of select drugs and certain linguistic features contained in one's posts can help predict these transitions. Using unfiltered drug-related posts, our research delineates drugs that are associated with higher rates of transitions from recreational drug discussion to support/recovery discussion, offers insight into modern drug culture, and provides tools with potential applications in combating the opioid crisis.}, + isbn = {978-1-4503-6201-6}, + keywords = {cox regression,drug addiction and recovery,reddit forum,text mining}, + file = {/home/nathante/Zotero/storage/GUQKME9M/Lu et al_2019_Investigate Transitions into Drug Addiction through Text Mining of Reddit Data.pdf} +} + +@inproceedings{ma_when_2019, + title = {When {{Do People Trust Their Social Groups}}?}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Ma, Xiao and Cheng, Justin and Iyer, Shankar and Naaman, Mor}, + year = {2019}, + month = may, + pages = {1--12}, + publisher = {{ACM}}, + address = {{Glasgow Scotland Uk}}, + isbn = {978-1-4503-5970-2}, + language = {en}, + file = {/home/nathante/Zotero/storage/ZEWUJPHL/Ma et al. - 2019 - When Do People Trust Their Social Groups.pdf} +} + +@article{majchrzak_contradictory_2013, + title = {The {{Contradictory Influence}} of {{Social Media Affordances}} on {{Online Communal Knowledge Sharing}}}, + author = {Majchrzak, Ann and Faraj, Samer and Kane, Gerald C. and Azad, Bijan}, + year = {2013}, + month = oct, + journal = {Journal of Computer-Mediated Communication}, + volume = {19}, + number = {1}, + pages = {38--55}, + publisher = {{Oxford Academic}}, + abstract = {The use of social media creates the opportunity to turn organization-wide knowledge sharing in the workplace from an intermittent, centralized knowledge management process to a continuous online knowledge conversation of strangers, unexpected interpretations and re-uses, and dynamic emergence. We theorize four affordances of social media representing different ways to engage in this publicly visible knowledge conversations: metavoicing, triggered attending, network-informed associating, and generative role-taking. We further theorize mechanisms that affect how people engage in the knowledge conversation, finding that some mechanisms, when activated, will have positive effects on moving the knowledge conversation forward, but others will have adverse consequences not intended by the organization. These emergent tensions become the basis for the implications we draw.}, + language = {en}, + file = {/home/nathante/Zotero/storage/9U9NTEVE/Majchrzak et al. - 2013 - The Contradictory Influence of Social Media Afford.pdf;/home/nathante/Zotero/storage/DBAC2BYD/4067499.html} +} + +@article{majchrzak_effect_2016, + title = {Effect of {{Knowledge}}-{{Sharing Trajectories}} on {{Innovative Outcomes}} in {{Temporary Online Crowds}}}, + author = {Majchrzak, Ann and Malhotra, Arvind}, + year = {2016}, + month = nov, + journal = {Information Systems Research}, + issn = {1047-7047}, + abstract = {There is substantial research on the effects of formal control structures (i.e., incentives, identities, organization, norms) on knowledge sharing leading to innovative outcomes in online communities. However, there is little research on how knowledge-sharing trajectories in temporary online crowds create innovative outcomes without these structures. Such research is particularly of interest in the context of temporary online crowds solicited with crowdsourcing in which there is only minimal structure for knowledge sharing. We identify eight types of crowdsourcing with different knowledge-sharing patterns. The focus of this study is on the one type of crowdsourcing\textemdash collaborative innovation challenges\textemdash in which there is the least restriction on knowledge sharing in the crowd. A content analysis was conducted of all time-stamped posts made in five different collaborative innovation challenges to identify different knowledge-sharing trajectories used. We found that a paradox-framed trajectory was more likely to be followed by innovative outcomes compared to three other knowledge-sharing trajectories. A paradox-framed trajectory is one in which a novel solution emerges when different participants post in the following sequence: (1) contributing a paradox associated with the problem objective, (2) sharing assumptions to validate the paradox, and (3) sharing initial ideas for resolving the paradox in a manner that meets the problem statement. Based on the findings, a theory of paradox-framed trajectories in temporary online crowds is presented along with implications for knowledge creation theories in general and online knowledge-creating communities in particular.}, + file = {/home/nathante/Zotero/storage/XI69RCFW/Majchrzak and Malhotra - 2016 - Effect of Knowledge-Sharing Trajectories on Innova.pdf} +} + +@article{mamie_are_2021, + title = {Are {{Anti}}-{{Feminist Communities Gateways}} to the {{Far Right}}? {{Evidence}} from {{Reddit}} and {{YouTube}}}, + shorttitle = {Are {{Anti}}-{{Feminist Communities Gateways}} to the {{Far Right}}?}, + author = {Mami{\'e}, Robin and Ribeiro, Manoel Horta and West, Robert}, + year = {2021}, + month = feb, + journal = {arXiv:2102.12837 [cs]}, + eprint = {2102.12837}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Researchers have suggested that "the Manosphere," a conglomerate of men-centered online communities, may serve as a gateway to far right movements. In that context, this paper quantitatively studies the migratory patterns between a variety of groups within the Manosphere and the Alt-right, a loosely connected far right movement that has been particularly active in mainstream social networks. Our analysis leverages over 300 million comments spread through Reddit (in 115 subreddits) and YouTube (in 526 channels) to investigate whether the audiences of channels and subreddits associated with these communities have converged between 2006 and 2018. In addition to subreddits related to the communities of interest, we also collect data on counterparts: other groups of users which we use for comparison (e.g., for YouTube we use a set of media channels). Besides measuring the similarity in the commenting user bases of these communities, we perform a migration study, calculating to which extent users in the Manosphere gradually engage with Alt-right content. Our results suggest that there is a large overlap between the user bases of the Alt-right and of the Manosphere and that members of the Manosphere have a bigger chance to engage with far right content than carefully chosen counterparts. However, our analysis also shows that migration and user base overlap varies substantially across different platforms and within the Manosphere. Members of some communities (e.g., Men's Rights Activists) gradually engage with the Alt-right significantly more than counterparts on both Reddit and YouTube, whereas for other communities, this engagement happens mostly on Reddit (e.g., Pick Up Artists). Overall, our work paints a nuanced picture of the pipeline between the Manosphere and the Alt-right, which may inform platforms' policies and moderation decisions regarding these communities.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computers and Society}, + file = {/home/nathante/Zotero/storage/33R8MJF4/Mamié et al. - 2021 - Are Anti-Feminist Communities Gateways to the Far Right.pdf;/home/nathante/Zotero/storage/N8VBLTAY/2102.html} +} + +@article{marwick_i_2011, + ids = {marwick_i_2011-1}, + title = {I Tweet Honestly, {{I}} Tweet Passionately: {{Twitter}} Users, Context Collapse, and the Imagined Audience}, + shorttitle = {I Tweet Honestly, {{I}} Tweet Passionately}, + author = {Marwick, A. E. and {boyd}, danah}, + year = {2011}, + month = feb, + journal = {New Media \& Society}, + volume = {13}, + number = {1}, + pages = {114--133}, + issn = {1461-4448}, + abstract = {Social media technologies collapse multiple audiences into single contexts, making it difficult for people to use the same techniques online that they do to handle multiplicity in face-to-face conversation. This article investigates how content producers navigate `imagined audiences' on Twitter. We talked with participants who have different types of followings to understand their techniques, including targeting different audiences, concealing subjects, and maintaining authenticity. Some techniques of audience management resemble the practices of `micro-celebrity' and personal branding, both strategic self-commodification. Our model of the networked audience assumes a many-to-many communication through which individuals conceptualize an imagined audience evoked through their tweets.}, + language = {en}, + keywords = {imagined audiences,qualitative,SNS}, + file = {/home/nathante/Zotero/storage/GHXUFS86/Marwick and boyd - 2011 - I tweet honestly, I tweet passionately Twitter us.pdf} +} + +@article{massanari_gamergate_2017, + title = {\#{{Gamergate}} and {{The Fappening}}: {{How Reddit}}'s Algorithm, Governance, and Culture Support Toxic Technocultures}, + shorttitle = {\#{{Gamergate}} and {{The Fappening}}}, + author = {Massanari, Adrienne}, + year = {2017}, + month = mar, + journal = {New Media \& Society}, + volume = {19}, + number = {3}, + pages = {329--346}, + issn = {1461-4448}, + abstract = {This article considers how the social-news and community site Reddit.com has become a hub for anti-feminist activism. Examining two recent cases of what are defined as ``toxic technocultures'' (\#Gamergate and The Fappening), this work describes how Reddit's design, algorithm, and platform politics implicitly support these kinds of cultures. In particular, this piece focuses on the ways in which Reddit's karma point system, aggregation of material across subreddits, ease of subreddit and user account creation, governance structure, and policies around offensive content serve to provide fertile ground for anti-feminist and misogynistic activism. The ways in which these events and communities reflect certain problematic aspects of geek masculinity are also considered. This research is informed by the results of a long-term participant-observation and ethnographic study into Reddit's culture and community and is grounded in actor-network theory.}, + language = {en}, + keywords = {Algorithms,design,Gamergate,gender,online communities,online harassment,platform politics,Reddit,The Fappening,toxic technocultures}, + file = {/home/nathante/Zotero/storage/D5W5JKQU/Massanari - 2017 - #Gamergate and The Fappening How Reddit’s algorit.pdf;/home/nathante/Zotero/storage/NGCFX9JB/Massanari - 2017 - #Gamergate and The Fappening How Reddit’s algorit.pdf} +} + +@article{matias_civic_2019, + title = {The Civic Labor of Volunteer Moderators Online}, + author = {Matias, J. Nathan}, + year = {2019}, + month = apr, + journal = {Social Media + Society}, + volume = {5}, + number = {2}, + pages = {1--12}, + issn = {2056-3051, 2056-3051}, + abstract = {Volunteer moderators create, support, and control public discourse for millions of people online, even as moderators' uncompensated labor upholds platform funding models. What is the meaning of this work and who is it for? In this article, I examine the meanings of volunteer moderation on the social news platform reddit. Scholarship on volunteer moderation has viewed this work separately as digital labor for platforms, civic participation in communities, or oligarchy among other moderators. In mixed-methods research sampled from over 52,000 subreddit communities and in over a dozen interviews, I show how moderators adopt all of these frames as they develop and re-develop everyday meanings of moderation\textemdash facing the platform, their communities, and other moderators alike. I also show how this civic notion of digital labor brings clarity to a strike by moderators in July 2015. Volunteer governance remains a common approach to managing social relations, conflict, and civil liberties online. Our ability to see how communities negotiate the meaning of moderation will shape our capacity to address digital governance as a society.}, + language = {en}, + file = {/home/nathante/Zotero/storage/Q8BACUUZ/Matias - 2019 - The Civic Labor of Volunteer Moderators Online.pdf} +} + +@article{mcmillan_sense_1986, + title = {Sense of Community: {{A}} Definition and Theory}, + shorttitle = {Sense of Community}, + author = {McMillan, David W. and Chavis, David M.}, + year = {1986}, + journal = {Journal of Community Psychology}, + volume = {14}, + number = {1}, + pages = {6--23}, + publisher = {{John Wiley \& Sons}}, + address = {{US}}, + issn = {1520-6629(Electronic),0090-4392(Print)}, + abstract = {Proposes that a sense of community is a feeling that members have of belonging, a feeling that members matter to one another and to the group, and a shared faith that members' needs will be met through commitment to be together. The authors apply the term community equally to territorial communities (e.g., neighborhoods) and to relational communities (e.g., professional, spiritual). The proposed definition of a sense of community has 4 elements: membership, influence, integration and fulfillment of needs, and shared emotional connection. Subelements of these elements of a sense of community and how they work dynamically together to create and maintain it are described. Hypothetical examples from a university, neighborhood, youth gang, and kibbutz are presented to illustrate the interworkings of the elements of a sense of community. It is suggested that this understanding of sense of community has implications for community treatment programs for the mentally retarded and mentally ill. Where "community" means more than residency outside of an institution, strategies can be introduced to allow the therapeutic benefits of community to be developed within group homes and to provide for better integration with communities surrounding such facilities. (90 ref) (PsycINFO Database Record (c) 2016 APA, all rights reserved)}, + keywords = {Communities,Community Psychology,Group Dynamics,Sense of Community,Theories}, + file = {/home/nathante/Zotero/storage/D5ECP4GI/1987-03834-001.html} +} + +@article{mcpherson_birds_2001, + title = {Birds of a {{Feather}}: {{Homophily}} in {{Social Networks}}}, + shorttitle = {Birds of a {{Feather}}}, + author = {McPherson, Miller and {Smith-Lovin}, Lynn and Cook, James M}, + year = {2001}, + month = aug, + journal = {Annual Review of Sociology}, + volume = {27}, + number = {1}, + pages = {415--444}, + publisher = {{Annual Reviews}}, + issn = {0360-0572}, + abstract = {Similarity breeds connection. This principle\textemdash the homophily principle\textemdash structures network ties of every type, including marriage, friendship, work, advice, support, information transfer, exchange, comembership, and other types of relationship. The result is that people's personal networks are homogeneous with regard to many sociodemographic, behavioral, and intrapersonal characteristics. Homophily limits people's social worlds in a way that has powerful implications for the information they receive, the attitudes they form, and the interactions they experience. Homophily in race and ethnicity creates the strongest divides in our personal environments, with age, religion, education, occupation, and gender following in roughly that order. Geographic propinquity, families, organizations, and isomorphic positions in social systems all create contexts in which homophilous relations form. Ties between nonsimilar individuals also dissolve at a higher rate, which sets the stage for the formation of niches (localized positions) within social space. We argue for more research on: (a) the basic ecological processes that link organizations, associations, cultural communities, social movements, and many other social forms; (b) the impact of multiplex ties on the patterns of homophily; and (c) the dynamics of network change over time through which networks and other social entities co-evolve.}, + file = {/home/nathante/Zotero/storage/DWSDWJ8E/McPherson et al. - 2001 - Birds of a Feather Homophily in Social Networks.pdf;/home/nathante/Zotero/storage/GFG4ZCE8/annurev.soc.27.1.html} +} + +@article{mcpherson_ecology_1983, + title = {An Ecology of Affiliation}, + author = {McPherson, J. Miller}, + year = {1983}, + journal = {American Sociological Review}, + volume = {48}, + number = {4}, + pages = {519--532}, + issn = {0003-1224}, + abstract = {This paper develops an ecological model of the competition of social organizations for members. The concept of the ecological niche is quantified explicitly in a way which ties together geography, time, and the social composition of organizations. A differential equation model analogous to the Lotka-Volterra competition equations in biology captures the dynamics of the system. This dynamic model is related to the niche concept in a novel way, which produces an easily understood and powerful picture of the static and dynamic structure of the community. This new perspective provides a theoretical link between the aggregate macrostructural theory of Blau (1977a,b) and the microstructural dynamics of organizational demography (Pfeffer, 1983). The model is tested with data on organizations from a midwestern city.}, + file = {/home/nathante/Zotero/storage/WIDCF8XB/McPherson - 1983 - An ecology of affiliation.pdf} +} + +@article{mittell_sites_2009, + title = {Sites of Participation: {{Wiki}} Fandom and the Case of {{Lostpedia}}}, + shorttitle = {Sites of Participation}, + author = {Mittell, Jason}, + year = {2009}, + month = jul, + journal = {Transformative Works and Cultures}, + volume = {3}, + issn = {1941-2258}, + abstract = {This essay explores the award-winning fan site Lostpedia to examine how the wiki platform enables fan engagement, structures participation, and distinguishes between various forms of content, including canon, fanon, and parody. I write as a participant-observer, with extensive experience as a Lostpedia reader and editor. The article uses the "digital breadcrumbs" of wikis to trace the history of fan creativity, participation, game play, and debates within a shared site of community fan engagement. Using the Lostpedia site as a case study of fan praxis, the article highlights how issues like competing fandoms, copyright, and modes of discourse become manifest via the user-generated content of a fan wiki.} +} + +@inproceedings{morris_comparison_2010, + title = {A {{Comparison}} of {{Information Seeking Using Search Engines}} and {{Social Networks}}}, + booktitle = {Fourth {{International AAAI Conference}} on {{Weblogs}} and {{Social Media}}}, + author = {Morris, Meredith Ringel and Teevan, Jaime and Panovich, Katrina}, + year = {2010}, + month = may, + abstract = {The Web has become an important information repository; often it is the first source a person turns to with an informa-tion need. One common way to search the Web is with a search engine. However, it is not always easy for people to find what they are looking for with keyword search, and at times the desired information may not be readily available online. An alternative, facilitated by the rise of social media, is to pose a question to one\quotedblbase s online social network. In this paper, we explore the pros and cons of using a social net-working tool to fill an information need, as compared with a search engine. We describe a study in which 12 participants searched the Web while simultaneously posing a question on the same topic to their social network, and we compare the results they found by each method.}, + copyright = {Authors who publish a paper in this conference agree to the following terms: 1. Author(s) agree to transfer their copyrights in their article/paper to the Association for the Advancement of Artificial Intelligence (AAAI), in order to deal with future requests for reprints, translations, anthologies, reproductions, excerpts, and other publications. This grant will include, without limitation, the entire copyright in the article/paper in all countries of the world, including all renewals, extensions, and reversions thereof, whether such rights current exist or hereafter come into effect, and also the exclusive right to create electronic versions of the article/paper, to the extent that such right is not subsumed under copyright. 2. The author(s) warrants that they are the sole author and owner of the copyright in the above article/paper, except for those portions shown to be in quotations; that the article/paper is original throughout; and that the undersigned right to make the grants set forth above is complete and unencumbered. 3. The author(s) agree that if anyone brings any claim or action alleging facts that, if true, constitute a breach of any of the foregoing warranties, the author(s) will hold harmless and indemnify AAAI, their grantees, their licensees, and their distributors against any liability, whether under judgment, decree, or compromise, and any legal fees and expenses arising out of that claim or actions, and the undersigned will cooperate fully in any defense AAAI may make to such claim or action. Moreover, the undersigned agrees to cooperate in any claim or other action seeking to protect or enforce any right the undersigned has granted to AAAI in the article/paper. If any such claim or action fails because of facts that constitute a breach of any of the foregoing warranties, the undersigned agrees to reimburse whomever brings such claim or action for expenses and attorneys' fees incurred therein. 4. Author(s) retain all proprietary rights other than copyright (such as patent rights). 5. Author(s) may make personal reuse of all or portions of the above article/paper in other works of their own authorship. 6. Author(s) may reproduce, or have reproduced, their article/paper for the author's personal use, or for company use provided that AAAI copyright and the source are indicated, and that the copies are not used in a way that implies AAAI endorsement of a product or service of an employer, and that the copies per se are not offered for sale. The foregoing right shall not permit the posting of the article/paper in electronic or digital form on any computer network, except by the author or the author's employer, and then only on the author's or the employer's own web page or ftp site. Such web page or ftp site, in addition to the aforementioned requirements of this Paragraph, must provide an electronic reference or link back to the AAAI electronic server, and shall not post other AAAI copyrighted materials not of the author's or the employer's creation (including tables of contents with links to other papers) without AAAI's written permission. 7. Author(s) may make limited distribution of all or portions of their article/paper prior to publication. 8. In the case of work performed under U.S. Government contract, AAAI grants the U.S. Government royalty-free permission to reproduce all or portions of the above article/paper, and to authorize others to do so, for U.S. Government purposes. 9. In the event the above article/paper is not accepted and published by AAAI, or is withdrawn by the author(s) before acceptance by AAAI, this agreement becomes null and void.}, + language = {en}, + file = {/home/nathante/Zotero/storage/MS2N5Z3X/Morris et al_2010_A Comparison of Information Seeking Using Search Engines and Social Networks.pdf;/home/nathante/Zotero/storage/D3C4PIU9/1518.html} +} + +@incollection{morris_what_2010, + title = {What Do People Ask Their Social Networks, and Why? A Survey Study of Status Message Q\&a Behavior}, + shorttitle = {What Do People Ask Their Social Networks, and Why?}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Morris, Meredith Ringel and Teevan, Jaime and Panovich, Katrina}, + year = {2010}, + month = apr, + pages = {1739--1748}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {People often turn to their friends, families, and colleagues when they have questions. The recent, rapid rise of online social networking tools has made doing this on a large scale easy and efficient. In this paper we explore the phenomenon of using social network status messages to ask questions. We conducted a survey of 624 people, asking them to share the questions they have asked and answered of their online social networks. We present detailed data on the frequency of this type of question asking, the types of questions asked, and respondents' motivations for asking their social networks rather than using more traditional search tools like Web search engines. We report on the perceived speed and quality of the answers received, as well as what motivates people to respond to questions seen in their friends' status messages. We then discuss the implications of our findings for the design of next-generation search tools.}, + isbn = {978-1-60558-929-9}, + keywords = {q\&a,social networks,social search,web search}, + file = {/home/nathante/Zotero/storage/4N6C2AYW/Morris et al_2010_What do people ask their social networks, and why.pdf} +} + +@article{muhtaseb_arab_2008, + title = {Arab {{Americans}}' {{Motives}} for {{Using}} the {{Internet}} as a {{Functional Media Alternative}} and {{Their Perceptions}} of {{U}}.{{S}}. {{Public Opinion}}}, + author = {Muhtaseb, Ahlam and Frey, Lawrence R.}, + year = {2008}, + month = apr, + journal = {Journal of Computer-Mediated Communication}, + volume = {13}, + number = {3}, + pages = {618--657}, + issn = {1083-6101}, + abstract = {This exploratory study employed uses and gratifications theory to understand Arab Americans' salient motives for using the internet and whether the internet served as a functional alternative to other media to satisfy Arab Americans' information-seeking and interpersonal needs. Spiral of silence theory also was used to investigate the relationship between Arab Americans' perceptions of U.S. public opinion and their motives for using the internet. Results from an online questionnaire survey (N = 124) indicated that information seeking was the most salient motive for using the internet and that the internet did serve as a functional alternative, with a significant percentage of the internet sources used being foreign based. There was, however, no relationship between Arab Americans' perceptions of U.S. public opinion and their motives for using the internet. The findings are discussed with respect to the use of the internet by members of this marginalized cultural group.}, + file = {/home/nathante/Zotero/storage/5PD4EGRG/Muhtaseb and Frey - 2008 - Arab Americans’ Motives for Using the Internet as .pdf;/home/nathante/Zotero/storage/WKH4PJ7L/4582964.html} +} + +@article{nissenbaum_internet_2017, + title = {Internet Memes as Contested Cultural Capital: {{The}} Case of 4chan's /b/ Board}, + shorttitle = {Internet Memes as Contested Cultural Capital}, + author = {Nissenbaum, Asaf and Shifman, Limor}, + year = {2017}, + month = apr, + journal = {New Media \& Society}, + volume = {19}, + number = {4}, + pages = {483--501}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This article explores the workings of memes as cultural capital in web-based communities. A grounded analysis of 4chan's /b/ board reveals three main formulations of memes as capital, delineating them as subcultural knowledge, unstable equilibriums, and discursive weapons. While the first formulation follows well-documented notions about subcultural knowledge as a basis for boundary work, the latter two focus on the dualities intrinsic to Internet memes. The contradiction between following conventions and supplying innovative content leads to memes' configuration as unstable equilibriums, triggering constant conflict about their ``correct'' use. Paradoxically, this struggle highlights collective identity, as it keeps shared culture at the center of discussion. Similarly, when memes are used as jabs at the most intense points of arguments, they function simultaneously as signifiers of superior authoritative status and as reminders of common affinity. Thus, the dualities underpinning memes' structure lead to their performance as contested cultural capital.}, + language = {en}, + keywords = {4chan,cultural capital,digital culture,Internet memes,web-based communities}, + file = {/home/nathante/Zotero/storage/5D4MWNNV/Nissenbaum and Shifman - 2017 - Internet memes as contested cultural capital The .pdf} +} + +@misc{noauthor_crowd_nodate, + title = {Crowd {{Size}}, {{Diversity}} and {{Performance}} | {{Proceedings}} of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}}, + howpublished = {https://dl-acm-org.offcampus.lib.washington.edu/doi/10.1145/2702123.2702469} +} + +@book{north_institutions_1990-1, + title = {Institutions, {{Institutional Change}} and {{Economic Performance}}}, + author = {North, Douglass C.}, + year = {1990}, + series = {Political {{Economy}} of {{Institutions}} and {{Decisions}}}, + publisher = {{Cambridge University Press}}, + address = {{Cambridge}}, + abstract = {Continuing his groundbreaking analysis of economic structures, Douglass North develops an analytical framework for explaining the ways in which institutions and institutional change affect the performance of economies, both at a given time and over time. Institutions exist, he argues, due to the uncertainties involved in human interaction; they are the constraints devised to structure that interaction. Yet, institutions vary widely in their consequences for economic performance; some economies develop institutions that produce growth and development, while others develop institutions that produce stagnation. North first explores the nature of institutions and explains the role of transaction and production costs in their development. The second part of the book deals with institutional change. Institutions create the incentive structure in an economy, and organisations will be created to take advantage of the opportunities provided within a given institutional framework. North argues that the kinds of skills and knowledge fostered by the structure of an economy will shape the direction of change and gradually alter the institutional framework. He then explains how institutional development may lead to a path-dependent pattern of development. In the final part of the book, North explains the implications of this analysis for economic theory and economic history. He indicates how institutional analysis must be incorporated into neo-classical theory and explores the potential for the construction of a dynamic theory of long-term economic change. Douglass C. North is Director of the Center of Political Economy and Professor of Economics and History at Washington University in St. Louis. He is a past president of the Economic History Association and Western Economics Association and a Fellow, American Academy of Arts and Sciences. He has written over sixty articles for a variety of journals and is the author of The Rise of the Western World: A New Economic History (CUP, 1973, with R.P. Thomas) and Structure and Change in Economic History (Norton, 1981). Professor North is included in Great Economists Since Keynes edited by M. Blaug (CUP, 1988 paperback ed.)}, + isbn = {978-0-521-39416-1} +} + +@inproceedings{oday_orienteering_1993, + title = {Orienteering in an Information Landscape: How Information Seekers Get from Here to There}, + shorttitle = {Orienteering in an Information Landscape}, + booktitle = {Proceedings of the {{SIGCHI}} Conference on {{Human}} Factors in Computing Systems - {{CHI}} '93}, + author = {O'Day, Vicki L. and Jeffries, Robin}, + year = {1993}, + pages = {438--445}, + publisher = {{ACM Press}}, + address = {{Amsterdam, The Netherlands}}, + isbn = {978-0-89791-575-5}, + language = {en} +} + +@article{oliver_paradox_1988, + title = {The {{Paradox}} of {{Group Size}} in {{Collective Action}}: {{A Theory}} of the {{Critical Mass}}. {{II}}.}, + shorttitle = {The {{Paradox}} of {{Group Size}} in {{Collective Action}}}, + author = {Oliver, Pamela E. and Marwell, Gerald}, + year = {1988}, + journal = {American Sociological Review}, + volume = {53}, + number = {1}, + pages = {1--8}, + issn = {0003-1224}, + abstract = {Many sociologists incorrectly believe that larger groups are less likely to support collective action than smaller ones. The effect of group size, in fact, depends on costs. If the costs of collective goods rise with the number who share in them, larger groups act less frequently than smaller ones. If the costs vary little with group size, larger groups should exhibit more collective action than smaller ones because larger groups have more resources and are more likely to have a critical mass of highly interested and resourceful actors. The positive effects of group size increase with group heterogeneity and nonrandom social ties. Paradoxically, when groups are heterogeneous, fewer contributors may be needed to provide a good to larger groups, making collective action less complex and less expensive.}, + file = {/home/nathante/Zotero/storage/KDKQCV4I/Oliver and Marwell - 1988 - The Paradox of Group Size in Collective Action A .pdf} +} + +@book{olson_logic_1965, + title = {The Logic of Collective Action: {{Public}} Goods and the Theory of Groups}, + shorttitle = {The Logic of Collective Action}, + author = {Olson, Mancur}, + year = {1965}, + publisher = {{Harvard University Press}}, + address = {{Cambridge, MA}}, + language = {English}, + keywords = {Business \& Economics / Economics / General}, + file = {/home/nathante/Zotero/storage/6D295U4U/Olson - 1965 - The logic of collective action Public goods and t.pdf} +} + +@article{oreilly_work_1989, + title = {Work {{Group Demography}}, {{Social Integration}}, and {{Turnover}}}, + author = {O'Reilly, Charles A. and Caldwell, David F. and Barnett, William P.}, + year = {1989}, + journal = {Administrative Science Quarterly}, + volume = {34}, + number = {1}, + pages = {21--37}, + publisher = {{[Sage Publications, Inc., Johnson Graduate School of Management, Cornell University]}}, + issn = {0001-8392}, + abstract = {Using 20 actual work units with 79 respondents, this study explores the relationships among group demography, social integration of the group, and individual turnover. Results suggest that heterogeneity in group tenure is associated with lower levels of group social integration which, in turn, is negatively associated with individual turnover. Models of these effects using individual-level integration measures are not significant. Further, the results suggest that it is the more distant group members who are more likely to leave. Both individual-level and group-level age demography directly affect turnover and are not moderated by social integration. The findings suggest a process by which group demography affects outcomes and support the usefulness of organizational demography for understanding group and individual functioning.} +} + +@inproceedings{orlikowski_learning_1992, + title = {Learning from Notes: {{Organizational}} Issues in Groupware Implementation}, + shorttitle = {Learning from {{Notes}}}, + booktitle = {Proceedings of the 1992 {{ACM Conference}} on {{Computer}}-Supported {{Cooperative Work}}}, + author = {Orlikowski, Wanda J.}, + year = {1992}, + series = {{{CSCW}} '92}, + pages = {362--369}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {This paper explores the introduction of groupware into an organization to understand the changes in work practices and social interaction facilitated by the technology. The results suggest that people's mental models and organizations' structure and culture significantly influence how groupware is implemented and used. Specifically, in the absence of mental models that stressed its collaborative nature, groupwae was interpreted in terms of familiar personal, stand-alone technologies such as spreadsheets. Further, the culture and structure provided few incentives or norms for cooperating or sharing expertise, hence the groupware on its own was unlikely to engender collaboration. Recognizing the central influence of these cognitive and organizational elements is critical to developers, researchers, and practitioners of groupware.}, + isbn = {978-0-89791-542-7}, + keywords = {groupware,implementation,Lotus Notes,organizational factors,Technological Frames}, + file = {/home/nathante/Zotero/storage/VAHU9XE7/Orlikowski - 1992 - Learning from Notes Organizational Issues in Grou.pdf} +} + +@article{park_human_1936, + title = {Human {{Ecology}}}, + author = {Park, Robert Ezra}, + year = {1936}, + month = jul, + journal = {American Journal of Sociology}, + volume = {42}, + number = {1}, + pages = {1--15}, + issn = {0002-9602}, + abstract = {Human ecology is an attempt to apply to the interrelations of human beings a type of analysis previously applied to the interrelations of plants and animals. The term "symbiosis" describes a type of social relationship that is biotic rather than cultural. This biotic social order comes into existence and is maintained by competition. In plant and animal societies competition is unrestricted by an institutional or moral order. Human society is a consequence and effect of this limitation of the symbiotic social order by the cultural. Different social sciences are concerned with the forms which this limitation of the natural or ecological social order assumes on (1) the economic, (2) the political, and (3) the moral level.}, + file = {/home/nathante/Zotero/storage/CBVGR8RU/Park - 1936 - Human Ecology.pdf;/home/nathante/Zotero/storage/UKMY6VUE/217327.html} +} + +@article{pfeil_cultural_2006, + ids = {pfeil_cultural_2006-1}, + title = {Cultural Differences in Collaborative Authoring of Wikipedia}, + author = {Pfeil, Ulrike and Zaphiris, Panayiotis and Ang, Chee Siang}, + year = {2006}, + journal = {Journal of Computer-Mediated Communication}, + volume = {12}, + number = {1}, + pages = {88--113}, + publisher = {{Oxford Academic}}, + issn = {1083-6101}, + abstract = {This article explores the relationship between national culture and computer-mediated communication (CMC) in Wikipedia. The articles on the topic game from the French, German, Japanese, and Dutch Wikipedia websites were studied using content analysis methods. Correlations were investigated between patterns of contributions and the four dimensions of cultural influences proposed by Hofstede (Power Distance, Collectivism versus Individualism, Femininity versus Masculinity, and Uncertainty Avoidance). The analysis revealed cultural differences in the style of contributions across the cultures investigated, some of which are correlated with the dimensions identified by Hofstede. These findings suggest that cultural differences that are observed in the physical world also exist in the virtual world.}, + language = {en}, + file = {/home/nathante/Zotero/storage/25UVU6KP/Pfeil et al. - 2006 - Cultural Differences in Collaborative Authoring of.pdf;/home/nathante/Zotero/storage/HTBSK98G/Pfeil et al. - 2006 - Cultural differences in collaborative authoring of.pdf;/home/nathante/Zotero/storage/NG42CGVS/4582988.html;/home/nathante/Zotero/storage/NN9FT3QC/4582988.html} +} + +@article{poor_computer_2014, + title = {Computer Game Modders' Motivations and Sense of Community: {{A}} Mixed-Methods Approach}, + shorttitle = {Computer Game Modders' Motivations and Sense of Community}, + author = {Poor, Nathaniel}, + year = {2014}, + month = dec, + journal = {New Media \& Society}, + volume = {16}, + number = {8}, + pages = {1249--1267}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Computer game modding, from modifying, combines several important issues: digital skills, play, community, making, and remixing. Yet, little academic work has explored the motivations and sense of community that modders have. This study is the first quantitative survey of game modders, and combines quantitative survey data with qualitative interview material. Findings suggest that modders are both old and young, mod more than one game or game series, have a strong sense of community, and enjoy helping others. Many respondents had contributed to other mods or had co-authored mods, and modding communities may function as online collaboratories. Although some research stresses how modders hope to get jobs in the gaming industry, overall the industry was not a motivator for most respondents.}, + language = {en}, + keywords = {Collaboration,games,modding,motivation,online community}, + file = {/home/nathante/Zotero/storage/SY3IWUL2/Poor - 2014 - Computer game modders’ motivations and sense of co.pdf} +} + +@article{poor_mechanisms_2005, + title = {Mechanisms of an {{Online Public Sphere}}: The {{Website Slashdot}}}, + shorttitle = {Mechanisms of an {{Online Public Sphere}}}, + author = {Poor, Nathaniel}, + year = {2005}, + month = jan, + journal = {Journal of Computer-Mediated Communication}, + volume = {10}, + number = {JCMC1028}, + issn = {1083-6101}, + abstract = {Both the theory of the public sphere and the utopian rhetoric surrounding the Internet have been a focus of scholars for some time. Given the ability of people to connect with others around the globe through the Internet, could the Internet give rise to online public spheres? If so, how would such spaces work? This article proposes that public spheres do exist on the Internet, and details how one functions. The case under study is the website Slashdot (http://slashdot.org), an online community of computer enthusiasts. The article studies the mechanisms, both normative and in code, that are vital to Slashdot's functioning, and shows how they help Slashdot function as a public sphere.}, + file = {/home/nathante/Zotero/storage/5V4CJ2HJ/4614448.html} +} + +@article{poteete_heterogeneity_2004, + title = {Heterogeneity, {{Group Size}} and {{Collective Action}}: {{The Role}} of {{Institutions}} in {{Forest Management}}}, + shorttitle = {Heterogeneity, {{Group Size}} and {{Collective Action}}}, + author = {Poteete, Amy R. and Ostrom, Elinor}, + year = {2004}, + journal = {Development and Change}, + volume = {35}, + number = {3}, + pages = {435--461}, + issn = {1467-7660}, + abstract = {Collective action for sustainable management among resource-dependent populations has important policy implications. Despite considerable progress in identifying factors that affect the prospects for collective action, no consensus exists about the role played by heterogeneity and size of group. The debate continues in part because of a lack of uniform conceptualization of these factors, the existence of non-linear relationships, and the mediating role played by institutions. This article draws on research by scholars in the International Forestry Resources and Institutions (IFRI) research network which demonstrates that some forms of heterogeneity do not negatively affect some forms of collective action. More importantly, IFRI research draws out the interrelations among group size, heterogeneity, and institutions. Institutions can affect the level of heterogeneity or compensate for it. Group size appears to have a non-linear relationship to at least some forms of collective action. Moreover, group size may be as much an indicator of institutional success as a precondition for such success.}, + language = {en}, + file = {/home/nathante/Zotero/storage/MVD6QER6/Poteete and Ostrom - 2004 - Heterogeneity, Group Size and Collective Action T.pdf} +} + +@article{ridgeway_status_1982, + title = {Status in {{Groups}}: {{The Importance}} of {{Motivation}}}, + shorttitle = {Status in {{Groups}}}, + author = {Ridgeway, Cecilia L.}, + year = {1982}, + journal = {American Sociological Review}, + volume = {47}, + number = {1}, + pages = {76--88}, + issn = {0003-1224}, + abstract = {This paper presents evidence that members' perceived motivation towards the group is an important determinant of the influence and status they attain in task-oriented groups. Following Meeker and Weitzel-O'Neill (1977) and Ridgeway (1978), it was suggested that people who enter a group with low external status characteristics (e.g., women in mixed sex groups, blacks in interracial groups) can use the communication of group-oriented motivation in combination with reasonably competent task contributions to overcome the fundamental inequality ("interaction disability") they would normally face, and achieve reasonably high levels of influence in the group. Results of an experiment using mixed and same sex groups showed that while group-oriented members are generally more influential than self-oriented ones, as predicted, the size of motivation's effect is dependent upon the member's external status characteristics. Females in male groups (low external status members) achieved fairly high influence and status when they appeared group-oriented, but very low status when self-oriented. As expected males in a female group (high external status members) achieved high influence regardless of their motivation.}, + file = {/home/nathante/Zotero/storage/F5GJIJMB/Ridgeway-1982-Status_in_groups.pdf} +} + +@book{ridgeway_status_2019, + title = {Status: Why Is It Everywhere? Why Does It Matter?}, + shorttitle = {Status}, + author = {Ridgeway, Cecilia L}, + year = {2019}, + abstract = {"Status is ubiquitous in modern life, yet our understanding of its role as a basic driver of inequality is surprisingly limited. In Status, sociologist and social psychologist Cecilia Ridgeway examines how this ancient and universal form of inequality influences today's ostensibly meritocratic institutions and why it matters. Ridgeway illuminates the complex ways in which status arises when people work together towards common goals, such as in classroom discussions, family decisions, or workplace deliberations. Ridgeway's research on status has important implications for our understanding of social inequality. Distinct from power or wealth, status is prized because it provides affirmation from others and affords access to valuable resources. Ridgeway demonstrates how the conferral of status inevitably leads to differing life outcomes for individuals, with impacts on pay, wealth creation, and health and wellbeing. Status beliefs are widely held views about who is better in society than others in terms of esteem, wealth, or competence. These beliefs ultimately confer advantages which can exacerbate social inequality. Ridgeway notes that status advantages based on race, gender, and class, such as the belief that white men are more competent than others because of their race and gender, have the greatest consequences for inequality by affording greater social and economic opportunities. Ridgeway argues that status beliefs make lower status groups less likely to challenge the status quo and greatly enhance higher status groups' ability to maintain their advantages in resources and access to positions of power. She illustrates how many lower status people, when given a baseline level of dignity and respect - being seen, for example, as poor but hardworking - will accept their lower status. She also shows that people remain willfully blind to status beliefs and their effects because recognizing them can lead to emotional discomfort. Acknowledging the insidious role of status in our lives would require many higher-status individuals to accept that they may not have succeeded based on their own merit; and many lower-status individuals would have to acknowledge that they may have been discriminated against. While Ridgeway notes the profound impact of status on society, she suggests that social inequality is not an inevitable consequence of our status beliefs. She shows how status beliefs can be undermined - as when we reject the idea that all racial and gender traits are fixed at birth, thus disrupting the idea that women and people of color are less competent than their male and white counterparts. Ridgeway both notes the profound impact of status on social inequality and charts a way forward that may allow it to have a less detrimental impact on our lives"--}, + isbn = {978-1-61044-889-5}, + language = {English}, + annotation = {OCLC: 1104214327}, + file = {/home/nathante/Zotero/storage/ZNCJF4F3/Ridgeway_2019_Status.pdf} +} + +@article{ridings_antecedents_2002, + title = {Some Antecedents and Effects of Trust in Virtual Communities}, + author = {Ridings, Catherine M and Gefen, David and Arinze, Bay}, + year = {2002}, + month = dec, + journal = {The Journal of Strategic Information Systems}, + volume = {11}, + number = {3}, + pages = {271--295}, + issn = {0963-8687}, + abstract = {This study explores several downstream effects of trust in virtual communities and the antecedents of trust in this unique type of environment. The data, applying an existing scale to measure two dimensions of trust (ability and benevolence/integrity), show that trust had a downstream effect on members' intentions to both give information and get information through the virtual community. Both these apparent dimensions of trust were increased through perceived responsive relationships in the virtual community, by a general disposition to trust, and by the belief that others confide personal information.}, + language = {en}, + keywords = {Perceived responsiveness,Trust,Virtual communities}, + file = {/home/nathante/Zotero/storage/KLVEHLMR/S0963868702000215.html} +} + +@article{ridings_virtual_2004, + ids = {ridings_virtual_2004-1}, + title = {Virtual {{Community Attraction}}: {{Why People Hang}} out {{Online}}}, + shorttitle = {Virtual {{Community Attraction}}}, + author = {Ridings, Catherine M. and Gefen, David}, + year = {2004}, + month = nov, + journal = {Journal of Computer-Mediated Communication}, + volume = {10}, + number = {1}, + abstract = {Abstract. Understanding the attraction of virtual communities is crucial to organizations that want to tap into their enormous information potential. Existing}, + language = {en}, + file = {/home/nathante/Zotero/storage/D64A3U6W/4614455.html;/home/nathante/Zotero/storage/NFKKWKZN/4614455.html} +} + +@incollection{robert_crowd_2015, + title = {Crowd {{Size}}, {{Diversity}} and {{Performance}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Robert, Lionel and Romero, Daniel M.}, + year = {2015}, + month = apr, + pages = {1379--1382}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Crowds are increasingly being adopted to solve complex problems. Size and diversity are two key characteristics of crowds; however their relationship to performance is often paradoxical. To better understand the effects of crowd size and diversity on crowd performance we conducted a study on the quality of 4,317 articles in the WikiProject Film community. The results of our study suggest that crowd size leads to better performance when crowds are more diverse. However, there is a break-even point -- smaller, less diverse crowds can outperform more diverse crowds of similar size. Our results offer new insights into the effects of size and diversity on the performance of crowds.}, + isbn = {978-1-4503-3145-6}, + keywords = {diversity,performance,team size,wikipedia}, + file = {/home/nathante/Zotero/storage/KVVXJ4WP/Robert and Romero - 2015 - Crowd Size, Diversity and Performance.pdf} +} + +@article{ruef_structure_2003, + title = {The {{Structure}} of {{Founding Teams}}: {{Homophily}}, {{Strong Ties}}, and {{Isolation}} among {{U}}.{{S}}. {{Entrepreneurs}}}, + shorttitle = {The {{Structure}} of {{Founding Teams}}}, + author = {Ruef, Martin and Aldrich, Howard E. and Carter, Nancy M.}, + year = {2003}, + journal = {American Sociological Review}, + volume = {68}, + number = {2}, + pages = {195--222}, + issn = {0003-1224}, + abstract = {The mechanisms governing the composition of formal social groups (e.g., task groups, organizational founding teams) remain poorly understood, owing to (1) a lack of representative sampling from groups found in the general population, (2) a "success" bias among researchers that leads them to consider only those groups that actually emerge and survive, and (3) a restrictive focus on some theorized mechanisms of group composition (e.g., homophily) to the exclusion of others. These shortcomings are addressed by analyzing a unique, representative data set of organizational founding teams sampled from the U.S. population. Rather than simply considering the properties of those founding teams that are empirically observed, a novel quantitative methodology generates the distribution of all possible teams, based on combinations of individual and relational characteristics. This methodology permits the exploration of five mechanisms of group composition--those based on homophily, functionality, status expectations, network constraint, and ecological constraint. Findings suggest that homophily and network constraints based on strong ties have the most pronounced effect on group composition. Social isolation (i.e., exclusion from a group) is more likely to occur as a result of ecological constraints on the availability of similar alters in a locality than as a result of status-varying membership choices.} +} + +@inproceedings{rusak_properties_2014, + title = {The Properties of {{Twitter}} Network Communications among Teenagers}, + booktitle = {Proceedings of the Companion Publication of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Rusak, Gili}, + year = {2014}, + month = feb, + series = {{{CSCW Companion}} '14}, + pages = {233--236}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {We study, quantitatively, for the first time, the traits of Twitter teenager networks. The results are compared with general population users, and show that teenagers behave uniquely. Teens tend to follow more users and increase friendships over time. They tend to friend individuals online who they already know offline. Teenagers also use Twitter as a news media and form supportive and dense communities. These results shed new light on the attributes of teenage communities. We can then utilize these ideas to find solutions to emerging problems involving the massive use of social media. For example, Twitter can be used as a positive tool for the prevention of bad habits among teens.}, + isbn = {978-1-4503-2541-7}, + keywords = {social networks,teenagers,twitter}, + file = {/home/nathante/Zotero/storage/S9RPN7JX/Rusak - 2014 - The properties of Twitter network communications a.pdf} +} + +@article{schoener_resource_1974, + title = {Resource {{Partitioning}} in {{Ecological Communities}}}, + author = {Schoener, Thomas W.}, + year = {1974}, + journal = {Science}, + volume = {185}, + number = {4145}, + pages = {27--39}, + issn = {0036-8075}, + file = {/home/nathante/Zotero/storage/R86IDGJN/1738612.pdf;/home/nathante/Zotero/storage/U4UCJ2BT/Schoener - 1974 - Resource Partitioning in Ecological Communities.pdf} +} + +@article{seering_metaphors_2020, + ids = {seering_metaphors_2020-1}, + title = {Metaphors in Moderation}, + author = {Seering, Joseph and Kaufman, Geoff and Chancellor, Stevie}, + year = {2020}, + month = oct, + journal = {New Media \& Society}, + pages = {1461444820964968}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Volunteer content moderators are essential to the social media ecosystem through the roles they play in managing and supporting online social spaces. Recent work has described moderation primarily as a functional process of actions that moderators take, such as making rules, removing content, and banning users. However, the nuanced ways in which volunteer moderators envision their roles within their communities remain understudied. Informed by insights gained from 79 interviews with volunteer moderators from three platforms, we present a conceptual map of the territory of social roles in volunteer moderation, which identifies five categories with 22 metaphorical variants that reveal moderators' implicit values and the heuristics that help them make decisions. These metaphors more clearly enunciate the roles volunteer moderators play in the broader social media content moderation apparatus and can drive purposeful engagement with volunteer moderators to better support the ways they guide and shape their communities.}, + language = {en}, + keywords = {Facebook,governance,metaphors,moderation,online communities,platforms,Reddit,Twitch}, + file = {/home/nathante/Zotero/storage/6NR5XPIH/Seering et al. - 2020 - Metaphors in moderation.pdf;/home/nathante/Zotero/storage/FY8YDBFH/Seering et al. - 2020 - Metaphors in moderation.pdf} +} + +@article{seering_moderator_2019, + title = {Moderator Engagement and Community Development in the Age of Algorithms}, + author = {Seering, Joseph and Wang, Tony and Yoon, Jina and Kaufman, Geoff}, + year = {2019}, + month = jan, + journal = {New Media \& Society}, + pages = {1461444818821316}, + issn = {1461-4448}, + abstract = {Online communities provide a forum for rich social interaction and identity development for billions of Internet users worldwide. In order to manage these communities, platform owners have increasingly turned to commercial content moderation, which includes both the use of moderation algorithms and the employment of professional moderators, rather than user-driven moderation, to detect and respond to anti-normative behaviors such as harassment and spread of offensive content. We present findings from semi-structured interviews with 56 volunteer moderators of online communities across three platforms (Twitch, Reddit, and Facebook), from which we derived a generalized model categorizing the ways moderators engage with their communities and explaining how these communities develop as a result. This model contains three processes: being and becoming a moderator; moderation tasks, actions, and responses; and rules and community development. In this work, we describe how moderators contribute to the development of meaningful communities, both with and without algorithmic support.}, + language = {en}, + file = {/home/nathante/Zotero/storage/U8QLP3DK/Seering et al. - 2019 - Moderator engagement and community development in .pdf} +} + +@inproceedings{sengupta_what_2019, + title = {What Are {{Academic Subreddits Talking About}}? {{A Comparative Analysis}} of r/Academia and r/Gradschool}, + shorttitle = {What Are {{Academic Subreddits Talking About}}?}, + booktitle = {Conference {{Companion Publication}} of the 2019 on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Sengupta, Subhasree}, + year = {2019}, + month = nov, + series = {{{CSCW}} '19}, + pages = {357--361}, + publisher = {{Association for Computing Machinery}}, + address = {{Austin, TX, USA}}, + abstract = {Graduate school and academia can often be challenging and hard to navigate. This work explores how people are using Reddit to reach out to others in academic subreddits to talk about issues one might face in their academic journey. We also explore how such discussion differs between subreddits by comparing two popularly used academic subreddits: r/gradschool and r/academia. For each subreddit, we investigated 300 posts and 500 comments. Using topic modelling, we identify and distinguish the main emergent types of posts and comments we find in these two subreddits. We find that posts in r/academia center more on the challenging aspects of academia such as plagiarism, working in academia, and mental health, whereas r/gradschool posts deal with more generic issues on graduate school life. However, we find that the way the community reacts and provides support via comments is similar in both subreddits, mostly by providing moral support and solidarity.}, + isbn = {978-1-4503-6692-2}, + file = {/home/nathante/Zotero/storage/K4K3HITN/Sengupta - 2019 - What are Academic Subreddits Talking About A Comp.pdf} +} + +@inproceedings{sharma_studying_2015, + title = {Studying and {{Modeling}} the {{Connection}} between {{People}}'s {{Preferences}} and {{Content Sharing}}}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Sharma, Amit and Cosley, Dan}, + year = {2015}, + month = feb, + series = {{{CSCW}} '15}, + pages = {1246--1257}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {People regularly share items using online social media. However, people's decisions around sharing---who shares what to whom and why---are not well understood. We present a user study involving 87 pairs of Facebook users to understand how people make their sharing decisions. We find that even when sharing to a specific individual, people's own preference for an item (individuation) dominates over the recipient's preferences (altruism). People's open-ended responses about how they share, however, indicate that they do try to personalize shares based on the recipient. To explain these contrasting results, we propose a novel process model of sharing that takes into account people's preferences and the salience of an item. We also present encouraging results for a sharing prediction model that incorporates both the senders' and the recipients' preferences. These results suggest improvements to both algorithms that support sharing in social media and to information diffusion models.}, + isbn = {978-1-4503-2922-4}, + keywords = {directed sharing,information diffusion,sharing process,user preferences}, + file = {/home/nathante/Zotero/storage/V4LGES2Z/Sharma and Cosley - 2015 - Studying and Modeling the Connection between Peopl.pdf} +} + +@incollection{shaw_communication_1964, + title = {Communication {{Networks}}}, + booktitle = {Advances in {{Experimental Social Psychology}}}, + author = {Shaw, Marvin E.}, + editor = {Berkowitz, Leonard}, + year = {1964}, + volume = {1}, + pages = {111--147}, + publisher = {{Academic Press}}, + abstract = {The communication network imposed on the group influences its problem-solving efficiency, communication activity, organizational development, and member satisfaction. This chapter provides an overview of the communication networks, methodology employed in the research on communication networks and considers some of the structural properties of these networks, and outlines the major findings of experimental investigations of the effects of networks on group process. The major network difference is between centralized and decentralized networks. The direction and magnitude of the effects are modified by the following variables: kind of task, noise, information distribution, member personality, reinforcement, and the kind of prior experience the members have had in networks. The variable having the most pronounced effect is the kind of task the group must perform. Centralized networks are generally more efficient when the task requires merely the collection of information in one place, and decentralized networks are more efficient when further operations must be performed on the information before the task can be completed. The experiments discussed in the chapter, presents a great deal about the effects of communication networks, but the precise nature of many of the relationships among variables still remains unclear, and needs much clarification, such as network characteristics, kind of task, and group composition. The communication network studies have provided a great deal of information regarding structural effects upon group behavior. However, much more remains to be done.}, + file = {/home/nathante/Zotero/storage/ZTWM2MSC/Shaw - 1964 - Communication Networks.pdf} +} + +@article{shaw_laboratories_2014, + title = {Laboratories of Oligarchy? {{How}} the Iron Law Extends to Peer Production}, + shorttitle = {Laboratories of {{Oligarchy}}?}, + author = {Shaw, Aaron and Hill, Benjamin Mako}, + year = {2014}, + journal = {Journal of Communication}, + volume = {64}, + number = {2}, + pages = {215--238}, + issn = {1460-2466}, + abstract = {Peer production projects like Wikipedia have inspired voluntary associations, collectives, social movements, and scholars to embrace open online collaboration as a model of democratic organization. However, many peer production projects exhibit entrenched leadership and deep inequalities, suggesting that they may not fulfill democratic ideals. Instead, peer production projects may conform to Robert Michels' ``iron law of oligarchy,'' which proposes that democratic membership organizations become increasingly oligarchic as they grow. Using exhaustive data of internal processes from a sample of 683 wikis, we construct empirical measures of participation and test for increases in oligarchy associated with growth in wikis' contributor bases. In contrast to previous studies, we find support for Michels' iron law and conclude that peer production entails oligarchic organizational forms.}, + language = {en}, + file = {/home/nathante/Zotero/storage/GIII687R/Shaw and Hill - 2014 - Laboratories of oligarchy How the iron law extend.pdf;/home/nathante/Zotero/storage/W3846GC6/full.html} +} + +@article{simpson_status_2012, + title = {Status {{Hierarchies}} and the {{Organization}} of {{Collective Action}}}, + author = {Simpson, Brent and Willer, Robb and Ridgeway, Cecilia L.}, + year = {2012}, + month = sep, + journal = {Sociological Theory}, + volume = {30}, + number = {3}, + pages = {149--166}, + issn = {0735-2751, 1467-9558}, + abstract = {Most work on collective action assumes that group members are undifferentiated by status, or standing, in the group. Yet such undifferentiated groups are rare, if they exist at all. Here we extend an existing sociological research program to address how extant status hierarchies help organize collective actions by coordinating how much and when group members should contribute to group efforts. We outline three theoretically derived predictions of how status hierarchies organize patterns of behavior to produce larger public goods.We review existing evidence relevant to two of the three hypotheses and present results from a preliminary experimental test of the third. Findings are consistent with the model.The tendency of these dynamics to lead status-differentiated groups to produce larger public goods may help explain the ubiquity of hierarchy in groups, despite the often negative effects of status inequalities for many group members.}, + language = {en}, + file = {/home/nathante/Zotero/storage/WVT6KAAY/Simpson et al. - 2012 - Status Hierarchies and the Organization of Collect.pdf} +} + +@article{sobre-denton_virtual_2016, + title = {Virtual Intercultural Bridgework: {{Social}} Media, Virtual Cosmopolitanism, and Activist Community-Building}, + shorttitle = {Virtual Intercultural Bridgework}, + author = {{Sobr{\'e}-Denton}, Miriam}, + year = {2016}, + month = sep, + journal = {New Media \& Society}, + volume = {18}, + number = {8}, + pages = {1715--1731}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Social media facilitates a global\textendash local orientation to the world that allows individuals to engage in virtual community-building and participate in communication to build global citizenship. This research situates virtual cosmopolitanism in the age of new media and globalization, describing it as a means for trans-local and transnational community-building for social justice movements and activism, including community liaison-building across corporeal borders and boundaries. New media as a site of imagined communities that become larger than their component parts is then analyzed through examining several virtual cosmopolitan communities. The essay concludes with assumptions about the qualities of virtual cosmopolitan communities, and recommendations for how they can facilitate intercultural liaisons for social justice activism and community-building across difference.}, + language = {en}, + keywords = {Community-building,cosmopolitan solidarity,online activism,social justice,social media,virtual cosmopolitanism}, + file = {/home/nathante/Zotero/storage/Z5D3VAMN/Sobré-Denton - 2016 - Virtual intercultural bridgework Social media, vi.pdf} +} + +@inproceedings{soliman_characterization_2019, + title = {A {{Characterization}} of {{Political Communities}} on {{Reddit}}}, + booktitle = {Proceedings of the 30th {{ACM Conference}} on {{Hypertext}} and {{Social Media}}}, + author = {Soliman, Ahmed and Hafer, Jan and Lemmerich, Florian}, + year = {2019}, + month = sep, + series = {{{HT}} '19}, + pages = {259--263}, + publisher = {{Association for Computing Machinery}}, + address = {{Hof, Germany}}, + abstract = {The social news aggregator Reddit is among the most popular websites on the internet. Many online users use the platform to anonymously share and discuss (mostly US-centric) political content. In this ongoing work, we perform a comparative large-scale analysis of political subcommunities (subreddits) on Reddit using a dataset of more than 100 million posts from around 5 million users. In particular, we investigate these communities with respect to (1) the content posted, (2) their relationships to other subreddits, and (3) the distribution of attention received in these subcommunities. We find that left-leaning communities use derogatory language less often than right-leaning communities, but are more focused on news sources reflecting their own political leaning. We also observe that right-leaning communities are more interconnected with right-leaning subreddits on European politics. Finally, the attention of individual submissions (as measured by their number of up-votes or comments received) is spread more evenly in right-leaning communities.The social news aggregator Reddit is among the most popular websites on the internet. Many online users use the platform to anonymously share and discuss (mostly US-centric) political content. In this ongoing work, we perform a comparative large-scale analysis of political subcommunities (subreddits) on Reddit using a dataset of more than 100 million posts from around 5 million users. In particular, we investigate these communities with respect to (1) the content posted, (2) their relationships to other subreddits, and (3) the distribution of attention received in these subcommunities. We find that left-leaning communities use derogatory language less often than right-leaning communities, but are more focused on news sources reflecting their own political leaning. We also observe that right-leaning communities are more interconnected with right-leaning subreddits on European politics. Finally, the attention of individual submissions (as measured by their number of up-votes or comments received) is spread more evenly in right-leaning communities.}, + isbn = {978-1-4503-6885-8}, + file = {/home/nathante/Zotero/storage/R2YM5F8X/Soliman et al. - 2019 - A Characterization of Political Communities on Red.pdf} +} + +@inproceedings{starbird_crowd_2012, + title = {Crowd Computation: Organizing Information during Mass Disruption Events}, + shorttitle = {Crowd Computation}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work Companion}}}, + author = {Starbird, Kate}, + year = {2012}, + series = {{{CSCW}} '12}, + pages = {339--342}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {This research examines large-scale human interaction occurring through social media during times of mass disruption, seeking to understand how the connected crowd acts to organize a flood of data moving through those platforms into useful information resources. The work combines empirical analysis of social media communication, interviews, and participant observation to explore how people work to organize information and how they use social media platforms to organize themselves to do this work. Synthesizing findings from four distinct, yet interrelated studies, this research progresses towards a new conceptualization of the distributed, connected work of organizing information during mass disruption events.}, + isbn = {978-1-4503-1051-2} +} + +@article{swaminathan_resource_2001, + title = {Resource Partitioning and the Evolution of Specialist Organizations: {{The}} Role of Location and Identity in the {{U}}.{{S}}. Wine Industry}, + shorttitle = {Resource {{Partitioning}} and the {{Evolution}} of {{Specialist Organizations}}}, + author = {Swaminathan, Anand}, + year = {2001}, + month = dec, + journal = {Academy of Management Journal}, + volume = {44}, + number = {6}, + pages = {1169--1185}, + issn = {0001-4273, 1948-0989}, + abstract = {Analyses of founding and mortality rates of specialist organizations in the U.S. wine industry over the period 1941-90 support Carroll's (1985) location-based resource-partitioning model\textemdash crowding of generalists in the market center creates opportunities for specialists. Further, specialists are adversely affected when they violate their organizational form's identity characteristics and also when generalists can assume a robust identity allowing them to operate in both specialist and generalist industry segments. The results suggest a prominent role for an organizational form's identity in resource partitioning.}, + language = {en}, + file = {/home/nathante/Zotero/storage/HSF2S5JM/1169.html} +} + +@inproceedings{tan_all_2015, + title = {All Who Wander: {{On}} the Prevalence and Characteristics of Multi-Community Engagement}, + shorttitle = {All Who Wander}, + booktitle = {Proceedings of the 24th {{International Conference}} on {{World Wide Web}}}, + author = {Tan, Chenhao and Lee, Lillian}, + year = {2015}, + series = {{{WWW}} '15}, + pages = {1056--1066}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + address = {{Republic and Canton of Geneva, Switzerland}}, + abstract = {Although analyzing user behavior within individual communities is an active and rich research domain, people usually interact with multiple communities both on- and off-line. How do users act in such multi-community environments? Although there are a host of intriguing aspects to this question, it has received much less attention in the research community in comparison to the intra-community case. In this paper, we examine three aspects of multi-community engagement: the sequence of communities that users post to, the language that users employ in those communities, and the feedback that users receive, using longitudinal posting behavior on Reddit as our main data source, and DBLP for auxiliary experiments. We also demonstrate the effectiveness of features drawn from these aspects in predicting users' future level of activity. One might expect that a user's trajectory mimics the "settling-down" process in real life: an initial exploration of sub-communities before settling down into a few niches. However, we find that the users in our data continually post in new communities; moreover, as time goes on, they post increasingly evenly among a more diverse set of smaller communities. Interestingly, it seems that users that eventually leave the community are "destined" to do so from the very beginning, in the sense of showing significantly different "wandering" patterns very early on in their trajectories; this finding has potentially important design implications for community maintainers. Our multi-community perspective also allows us to investigate the "situation vs. personality" debate from language usage across different communities.}, + isbn = {978-1-4503-3469-3}, + keywords = {DBLP,language,lifecycle,multiple communities,reddit}, + file = {/home/nathante/Zotero/storage/8GL2XQG3/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf;/home/nathante/Zotero/storage/J3RVCH26/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf} +} + +@inproceedings{tan_tracing_2018, + title = {Tracing Community Genealogy: How New Communities Emerge from the Old}, + shorttitle = {Tracing {{Community Genealogy}}}, + booktitle = {Proceedings of the {{Twelfth International Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} '18)}, + author = {Tan, Chenhao}, + year = {2018}, + pages = {395--404}, + publisher = {{AAAI}}, + address = {{Palo Alto, California}}, + abstract = {The process by which new communities emerge is a central research issue in the social sciences. While a growing body of research analyzes the formation of a single community by examining social networks between individuals, we introduce a novel community-centered perspective. We highlight the fact that the context in which a new community emerges contains numerous existing communities. We reveal the emerging process of communities by tracing their early members' previous community memberships.}, + file = {/home/nathante/Zotero/storage/QEAEMFYR/Tan - 2018 - Tracing Community Genealogy How New Communities E.pdf} +} + +@article{tausczik_impact_2019, + title = {The Impact of Group Size on the Discovery of Hidden Profiles in Online Discussion Groups}, + author = {Tausczik, Yla and Huang, Xiaoyun}, + year = {2019}, + month = nov, + journal = {ACM Transactions on Social Computing}, + volume = {2}, + number = {3}, + pages = {10:1--10:25}, + issn = {2469-7818}, + abstract = {Online discussions help individuals to gather knowledge and make important decisions in diverse areas from health and finance to computing and data science. Online discussion groups exhibit unique group dynamics not found in traditional small groups, such as staggered participation and asynchronous communication, and the effects of these features on knowledge sharing is not well understood. In this article, we focus on one such aspect: wide variation in group size. Using a controlled experiment with a hidden profile task, we evaluate online discussion groups' capacity to share distributed knowledge when group size ranges from 4 to 32 participants. We found that individuals in medium-sized discussions performed the best, and we suggest that this represents a tradeoff in which larger groups tend to share more facts, but have more difficulty than smaller groups at resolving misunderstandings.}, + keywords = {collective information processing,collective intelligence,Hidden profile,knowledge sharing,online forums}, + file = {/home/nathante/Zotero/storage/FNSPR8FH/Tausczik_Huang_2019_The Impact of Group Size on the Discovery of Hidden Profiles in Online.pdf} +} + +@inproceedings{teblunthuis_density_2017, + title = {Density Dependence without Resource Partitioning: Population Ecology on {{Change}}.Org}, + shorttitle = {Density {{Dependence Without Resource Partitioning}}}, + booktitle = {Companion of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + year = {2017}, + series = {{{CSCW}} '17 {{Companion}}}, + pages = {323--326}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {E-petitioning is a prominent form of Internet-based collective action. We apply theories from organizational population ecology to investigate whether similar petitions compete for signatures. We use latent Dirichlet allocation (LDA) topic modeling to identify topical niches. Using these niches, we test two theories from population ecology on 442,109 Change.org petitions. First, we find evidence for density dependence, an inverse-U-shaped relationship between the density of a petition's niche and the number of signatures the petition obtains. This suggests e-petitioning is competitive and that e-petitions draw on overlapping resource pools. Second, although resource partitioning theory predicts that topically specialized petitions will obtain more signatures in concentrated populations, we find no evidence of this. This suggests that specialists struggle to avoid competition with generalists.}, + isbn = {978-1-4503-4688-7}, + file = {/home/nathante/Zotero/storage/54585RCP/TeBlunthuis et al. - 2017 - Density dependence without resource partitioning .pdf} +} + +@article{teblunthuis_identifying_2021, + ids = {teblunthuis_community_2021,teblunthuis_community_2021-1,teblunthuis_identifying_2021-1}, + title = {Identifying {{Competition}} and {{Mutualism Between Online Groups}}}, + author = {TeBlunthuis, Nathan and Hill, Benjamin Mako}, + year = {2021}, + month = jul, + journal = {arXiv:2107.06970 [cs]}, + eprint = {2107.06970}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Platforms often host multiple online groups with highly overlapping topics and members. How can researchers and designers understand how interactions between related groups affect measures of group health? Inspired by population ecology, prior social computing research has studied competition and mutualism among related groups by correlating group size with degrees of overlap in content and membership. The resulting body of evidence is puzzling as overlaps seem sometimes to help and other times to hurt. We suggest that this confusion results from aggregating inter-group relationships into an overall environmental effect instead of focusing on networks of competition and mutualism among groups. We propose a theoretical framework based on community ecology and a method for inferring competitive and mutualistic interactions from time series participation data. We compare population and community ecology analyses of online community growth by analyzing clusters of subreddits with high user overlap but varying degrees of competition and mutualism.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3NW96WBR/TeBlunthuis_Hill_2021_Identifying Competition and Mutualism Between Online Groups.pdf;/home/nathante/Zotero/storage/XRLZFVHD/TeBlunthuis_Hill_2021_Identifying Competition and Mutualism Between Online Groups.pdf;/home/nathante/Zotero/storage/ZTDDJ9KW/TeBlunthuis and Hill - 2018 - A Community Ecology Approach for Identifying Compe.pdf;/home/nathante/Zotero/storage/MJH368X5/2107.html;/home/nathante/Zotero/storage/VK77YHAC/2107.html} +} + +@inproceedings{teblunthuis_revisiting_2018, + title = {Revisiting "{{The}} Rise and Decline" in a Population of Peer Production Projects}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '18)}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + year = {2018}, + pages = {355:1--355:7}, + publisher = {{ACM}}, + address = {{New York, NY}}, + abstract = {Do patterns of growth and stabilization found in large peer production systems such as Wikipedia occur in other communities? This study assesses the generalizability of Halfaker et al.'s influential 2013 paper on "The Rise and Decline of an Open Collaboration System." We replicate its tests of several theories related to newcomer retention and norm entrenchment using a dataset of hundreds of active peer production wikis from Wikia. We reproduce the subset of the findings from Halfaker and colleagues that we are able to test, comparing both the estimated signs and magnitudes of our models. Our results support the external validity of Halfaker et al.'s claims that quality control systems may limit the growth of peer production communities by deterring new contributors and that norms tend to become entrenched over time.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/7YEVSVQM/TeBlunthuis et al. - 2018 - Revisiting The Rise and Decline in a Population .pdf} +} + +@article{triggs_context_2019, + ids = {triggs_context_2021}, + title = {Context Collapse and Anonymity among Queer {{Reddit}} Users}, + author = {Triggs, Anthony Henry and M{\o}ller, Kristian and Neumayer, Christina}, + year = {2019}, + month = nov, + journal = {New Media \& Society}, + volume = {23}, + number = {1}, + pages = {5--21}, + publisher = {{SAGE Publications}}, + issn = {1461-4448, 1461-7315}, + abstract = {This article maps out how people in queer communities on Reddit navigate context collapse. Drawing upon data from interviews with queer Reddit users and insights from other studies of context collapse in digital media, we argue that context collapse also occurs in anonymity-based social media. The interviews reveal queer Reddit users' practices of context differentiation, occurring at four levels: somatic, system, inter-platform and intra-platform. We use these levels to map out how lesbian, gay, bisexual, transgender and queer or questioning (LGBTQ) people express their identities and find community on Reddit while seeking to minimize the risks imposed by multiple impending context collapses. Because living an authentic queer life can make subjects vulnerable, we find that despite Reddit's anonymity, sophisticated practices of context differentiation are developed and maintained. We argue that context collapse in an era of big data and social media platforms operates beyond the control of any one user, which causes problems, particularly for queer people.}, + language = {en}, + keywords = {Anonymity,bisexual,context collapse,gay,lesbian,Reddit,risk,transgender and queer or questioning}, + file = {/home/nathante/Zotero/storage/LSEXQYFM/Triggs et al. - 2021 - Context collapse and anonymity among queer Reddit .pdf} +} + +@article{tufekci_not_2013-1, + title = {"{{Not}} This One": Social Movements, the Attention Economy, and Microcelebrity Networked Activism}, + shorttitle = {" {{Not}} This One": Social Movements, the Attention Economy, and Microcelebrity Networked Activism}, + author = {Tufekci, Zeynep}, + year = {2013}, + journal = {American Behavioral Scientist}, + pages = {0002764213479369}, + issn = {0002-7642}, + file = {/home/nathante/Zotero/storage/URM9ESR8/Tufekci_2013_ Not This One.pdf;/home/nathante/Zotero/storage/ZBQFHXMF/Tufekci_2013_ Not This One.pdf} +} + +@article{turner_where_2005, + title = {Where the {{Counterculture Met}} the {{New Economy}}: {{The WELL}} and the {{Origins}} of {{Virtual Community}}}, + shorttitle = {Where the {{Counterculture Met}} the {{New Economy}}}, + author = {Turner, Fred}, + year = {2005}, + journal = {Technology and Culture}, + volume = {46}, + number = {3}, + pages = {485--512}, + issn = {1097-3729}, + abstract = {In lieu of an abstract, here is a brief excerpt of the content: Technology and Culture 46.3 (2005) 485-512 The WELL and the Origins of Virtual Community Fred Turner In 1993, freelance journalist Howard Rheingold published The Virtual Community: Homesteading on the Electronic Frontier and with it defined a new form of technologically enabled social life: virtual community. For the last eight years, he explained, he had been dialing in to a San Francisco Bay\textendash area bulletin-board system (BBS) known as the Whole Earth 'Lectronic Link, or the WELL. In the WELL's text-only environment, he conversed with friends and colleagues, met new people, and over time built up relationships of startling intimacy. For Rheingold, these relationships formed an emotional bulwark against the loneliness of a highly technologized material world. As he explained, computer networks like the WELL allowed us "to recapture the sense of cooperative spirit that so many people seemed to lose when we gained all this technology." In the disembodied precincts of cyberspace, we could connect with one another practically and emotionally and "rediscover the power of cooperation, turning cooperation into a game, a way of life\textemdash a merger of knowledge capital, social capital, and communion." In the years since Rheingold's book appeared, the Internet and the Worldwide Web have swung into public view, and both the WELL and Rheingold's notion of virtual community have become touchstones for studies of the social implications of computer networking. Yet, despite the WELL's prominence, few have rigorously explored its roots in the American counterculture of the 1960s. As its name suggests, the Whole Earth 'Lectronic Link took shape within a network of individuals and publications that first came together long before the advent of ubiquitous computer networking, with the publication of the Whole Earth Catalog. In the spring of 1968, Stewart Brand, a former Merry Prankster and coproducer of the Trips Festival that helped spark the Haight-Ashbury psychedelic scene, noticed that many of his friends had begun to leave the city for the wilds of New Mexico and Northern California. As sociologists and journalists would soon explain, these migrants marked the leading edge of what would become the largest wave of communalization in American history. Brand had just inherited a hundred thousand dollars in stock and, as he recalled several years later, imagining his friends "starting their own civilization hither and yon in the sticks" got him thinking about the L.L.Bean catalog. This in turn led him to fantasize something he called the "Access Mobile" that would offer "all manner of access materials and advice for sale cheap," including books, camping gear, blueprints for houses and machines, and subscriptions to magazines. The publication that grew out of that fantasy would quickly become one of the defining documents of the American counterculture. Sized somewhere between a tabloid newspaper and a glossy magazine, the sixty-one-page first Whole Earth Catalog presented reviews of hand tools, books, and magazines arrayed in seven thematic categories: understanding whole systems, shelter and land use, industry and craft, communications, community, nomadics, and learning. Over the next four years, in a series of biannual issues, the Catalog ballooned to more than four hundred pages, sold more than a million-and-a-half copies, won a National Book Award, and spawned dozens of imitators. It also established a relationship between information technology, economic activity, and alternative forms of community that would outlast the counterculture itself and become a key feature of the digital world. Like other members of the counterculture, those who headed back to the land suffered a deep ambivalence toward technology. On the one hand, like their counterparts on the New Left they saw the large-scale weapons technologies of the cold war and the organizations that produced them as emblems of a malevolent and ubiquitous technological bureaucracy. On the other, as they played their stereos and dropped LSD many came to believe that small-scale technologies could help bring about an alternative to that world. Dancing at the Trips Festival or simply sitting around getting high with friends, many experienced a sense of spiritual interconnection. By the late 1960s, social theorists such as Charles Reich and Theodore Roszak had begun to argue that this interconnection could become the...} +} + +@inproceedings{vasilescu_how_2014, + ids = {vasilescu_how_2014-1}, + title = {How Social {{Q}}\&{{A}} Sites Are Changing Knowledge Sharing in Open Source Software Communities}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing - {{CSCW}} '14}, + author = {Vasilescu, Bogdan and Serebrenik, Alexander and Devanbu, Prem and Filkov, Vladimir}, + year = {2014}, + pages = {342--354}, + publisher = {{ACM Press}}, + address = {{Baltimore, Maryland, USA}}, + abstract = {Historically, mailing lists have been the preferred means for coordinating development and user support activities. With the emergence and popularity growth of social Q\&A sites such as the StackExchange network (e.g., StackOverflow), this is beginning to change. Such sites offer different sociotechnical incentives to their participants than mailing lists do, e.g., rich web environments to store and manage content collaboratively, or a place to showcase their knowledge and expertise more vividly to peers or potential recruiters. A key difference between StackExchange and mailing lists is gamification, i.e., StackExchange participants compete to obtain reputation points and badges. In this paper, we use a case study of R (a widely-used tool for data analysis) to investigate how mailing list participation has evolved since the launch of StackExchange. Our main contribution is the assembly of a joint data set from the two sources, in which participants in both the r-help mailing list and StackExchange are identifiable. This permits their activities to be linked across the two resources and also over time. With this data set we found that user support activities show a strong shift away from r-help. In particular, mailing list experts are migrating to StackExchange, where their behaviour is different. First, participants active both on r-help and on StackExchange are more active than those who focus exclusively on only one of the two. Second, they provide faster answers on StackExchange than on r-help, suggesting they are motivated by the gamified environment. To our knowledge, our study is the first to directly chart the changes in behaviour of specific contributors as they migrate into gamified environments, and has important implications for knowledge management in software engineering.}, + isbn = {978-1-4503-2540-0}, + language = {en}, + keywords = {crowdsourced knowledge,gamification.,mailing lists,open source,social q\&a}, + file = {/home/nathante/Zotero/storage/6DLS9FTI/Vasilescu et al. - 2014 - How social Q&\;A sites are changing knowledge sh.pdf;/home/nathante/Zotero/storage/MNHPJRT3/Vasilescu et al. - 2014 - How social Q&A sites are changing knowledge sharin.pdf} +} + +@book{verhoef_community_2010, + title = {Community Ecology: Processes, Models, and Applications}, + shorttitle = {Community Ecology}, + author = {Verhoef, Herman A and Morin, Peter J}, + year = {2010}, + publisher = {{Oxford University Press}}, + address = {{Oxford}}, + isbn = {978-0-19-922897-3 978-0-19-922898-0}, + language = {English}, + annotation = {OCLC: 876676566} +} + +@book{von_hippel_democratizing_2006, + title = {Democratizing Innovation}, + author = {{von Hippel}, Eric}, + year = {2006}, + publisher = {{The MIT Press}}, + abstract = {Innovation is rapidly becoming democratized. Users, aided by improvements in computer and communications technology, increasingly can develop their own new products and services. These innovating users\textemdash both individuals and firms\textemdash often freely share their innovations with others, creating user-innovation communities and a rich intellectual commons. In Democratizing Innovation, Eric von Hippel looks closely at this emerging system of user-centered innovation. He explains why and when users find it profitable to develop new products and services for themselves, and why it often pays users to reveal their innovations freely for the use of all.The trend toward democratized innovation can be seen in software and information products\textemdash most notably in the free and open-source software movement\textemdash but also in physical products. Von Hippel's many examples of user innovation in action range from surgical equipment to surfboards to software security features. He shows that product and service development is concentrated among "lead users," who are ahead on marketplace trends and whose innovations are often commercially attractive. Von Hippel argues that manufacturers should redesign their innovation processes and that they should systematically seek out innovations developed by users. He points to businesses\textemdash the custom semiconductor industry is one example\textemdash that have learned to assist user-innovators by providing them with toolkits for developing new products. User innovation has a positive impact on social welfare, and von Hippel proposes that government policies, including R\&D subsidies and tax credits, should be realigned to eliminate biases against it. The goal of a democratized user-centered innovation system, says von Hippel, is well worth striving for.}, + copyright = {http://creativecommons.org/licenses/by-nc-nd/4.0}, + isbn = {978-0-262-72047-2 978-0-262-00274-5}, + language = {en}, + keywords = {innovation,org theory}, + file = {/home/nathante/Zotero/storage/ZK5N3JLA/search.html} +} + +@book{von_hippel_free_2016, + title = {Free Innovation}, + author = {{von Hippel}, Eric}, + year = {2016}, + month = nov, + edition = {1 edition}, + publisher = {{The MIT Press}}, + address = {{Cambridge, MA}}, + abstract = {A leading innovation scholar explains the growing phenomenon and impact of free innovation, in which innovations developed by consumers and given away ``for free.''In this book, Eric von Hippel, author of the influential Democratizing Innovation, integrates new theory and research findings into the framework of a ``free innovation paradigm.'' Free innovation, as he defines it, involves innovations developed by consumers who are self-rewarded for their efforts, and who give their designs away ``for free.'' It is an inherently simple grassroots innovation process, unencumbered by compensated transactions and intellectual property rights.Free innovation is already widespread in national economies and is steadily increasing in both scale and scope. Today, tens of millions of consumers are collectively spending tens of billions of dollars annually on innovation development. However, because free innovations are developed during consumers' unpaid, discretionary time and are given away rather than sold, their collective impact and value have until very recently been hidden from view. This has caused researchers, governments, and firms to focus too much on the Schumpeterian idea of innovation as a producer-dominated activity. Free innovation has both advantages and drawbacks. Because free innovators are self-rewarded by such factors as personal utility, learning, and fun, they often pioneer new areas before producers see commercial potential. At the same time, because they give away their innovations, free innovators generally have very little incentive to invest in diffusing what they create, which reduces the social value of their efforts. The best solution, von Hippel and his colleagues argue, is a division of labor between free innovators and producers, enabling each to do what they do best. The result will be both increased producer profits and increased social welfare\rule{1em}{1pt}a gain for all.}, + isbn = {978-0-262-03521-7}, + language = {English} +} + +@article{von_hippel_sticky_1994, + title = {"{{Sticky}} Information" and the Locus of Problem Solving: Implications for Innovation}, + shorttitle = {"{{Sticky Information}}" and the {{Locus}} of {{Problem Solving}}}, + author = {{von Hippel}, Eric}, + year = {1994}, + journal = {Management Science}, + volume = {40}, + number = {4}, + pages = {429--439}, + issn = {0025-1909}, + abstract = {To solve a problem, needed information and problem-solving capabilities must be brought together. Often the information used in technical problem solving is costly to acquire, transfer, and use in a new location---is, in our terms, "sticky." In this paper we explore the impact of information stickiness on the locus of innovation-related problem solving. We find, first, that when sticky information needed by problem solvers is held at one site only, problem solving will be carried out at that locus, other things being equal. Second, when more than one locus of sticky information is called upon by problem solvers, the locus of problem solving may iterate among these sites as problem solving proceeds. When the costs of such iteration are high, then, third, problems that draw upon multiple sites of sticky information will sometimes be "task partitioned" into subproblems that each draw on only one such locus, and/or, fourth, investments will be made to reduce the stickiness of information at some locations. Information stickiness appears to affect a number of issues of importance to researchers and practitioners. Among these are patterns in the diffusion of information, the specialization of firms, the locus of innovation, and the nature of problems selected by problem solvers.}, + file = {/home/nathante/Zotero/storage/VJT3KFVS/von Hippel - 1994 - Sticky information and the locus of problem solv.pdf;/home/nathante/Zotero/storage/N5WSWBCN/v_3a40_3ay_3a1994_3ai_3a4_3ap_3a429-439.html} +} + +@inproceedings{waller_generalists_2019, + title = {Generalists and {{Specialists}}: {{Using Community Embeddings}} to {{Quantify Activity Diversity}} in {{Online Platforms}}}, + shorttitle = {Generalists and {{Specialists}}}, + booktitle = {The {{World Wide Web Conference}} on - {{WWW}} '19}, + author = {Waller, Isaac and Anderson, Ashton}, + year = {2019}, + pages = {1954--1964}, + publisher = {{ACM Press}}, + address = {{San Francisco, CA, USA}}, + abstract = {In many online platforms, people must choose how broadly to allocate their energy. Should one concentrate on a narrow area of focus, and become a specialist, or apply oneself more broadly, and become a generalist? In this work, we propose a principled measure of how generalist or specialist a user is, and study behavior in online platforms through this lens. To do this, we construct highly accurate community embeddings that represent communities in a high-dimensional space. We develop sets of community analogies and use them to optimize our embeddings so that they encode community relationships extremely well. Based on these embeddings, we introduce a natural measure of activity diversity, the GS-score. Applying our embedding-based measure to online platforms, we observe a broad spectrum of user activity styles, from extreme specialists to extreme generalists, in both community membership on Reddit and programming contributions on GitHub. We find that activity diversity is related to many important phenomena of user behavior. For example, specialists are much more likely to stay in communities they contribute to, but generalists are much more likely to remain on platforms as a whole. We also find that generalists engage with significantly more diverse sets of users than specialists do. Furthermore, our methodology leads to a simple algorithm for community recommendation, matching state-of-theart methods like collaborative filtering. Our methods and results introduce an important new dimension of online user behavior and shed light on many aspects of online platform use.}, + isbn = {978-1-4503-6674-8}, + language = {en}, + keywords = {activity diversity,community embeddings,community recommendation,generalist and specialists}, + file = {/home/nathante/Zotero/storage/5F77953J/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf;/home/nathante/Zotero/storage/PK32L55Y/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf} +} + +@article{wang_impact_2012, + ids = {wang_impact_2013}, + title = {The Impact of Membership Overlap on Growth: {{An}} Ecological Competition View of Online Groups}, + shorttitle = {The Impact of Membership Overlap on Growth}, + author = {Wang, Xiaoqing and Butler, Brian S. and Ren, Yuqing}, + year = {2012}, + month = jun, + journal = {Organization Science}, + volume = {24}, + number = {2}, + pages = {414--431}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {The dominant narrative of the Internet has been one of unconstrained growth, abundance, and plenitude. It is in this context that new forms of organizing, such as online groups, have emerged. However, the same factors that underlie the utopian narrative of Internet life also give rise to numerous online groups, many of which fail to attract participants or to provide significant value. This suggests that despite the potential transformative nature of modern information technology, issues of scarcity, competition, and context may remain critical to the performance and functioning of online groups. In this paper, we draw from organizational ecology theories to develop an ecological view of online groups to explain how overlapping membership among online groups causes intergroup competition for member attention and affects a group's ability to grow. Hypotheses regarding the effects of group size, age, and membership overlap on growth are proposed and tested with data from a 64-month, longitudinal sample of 240 online discussion groups. The analysis shows that sharing members with other groups reduced future growth rates, suggesting that membership overlap puts competitive pressure on online groups. Our results also suggest that, compared with smaller and younger groups, larger and older groups experience greater difficulty in growing their membership. In addition, larger groups were more vulnerable to competitive pressure than smaller groups: larger groups experienced greater difficulty in growing their membership than smaller groups as competition intensified. Overall, our findings show how an abundance of opportunities afforded by technologies can create scarcity in user time and effort, which increases competitive pressure on online groups. Our ecological view extends organizational ecology theory to new organizational forms online and highlights the importance of studying the competitive environment of online groups.}, + file = {/home/nathante/Zotero/storage/3WI37Y9S/Wang et al. - 2013 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/D7GAZURV/Wang et al. - 2012 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/EQSW25XD/Wang et al. - 2012 - The impact of membership overlap on growth An eco.pdf;/home/nathante/Zotero/storage/8QDPVTSM/orsc.1120.html;/home/nathante/Zotero/storage/IK6SB3L8/orsc.1120.html} +} + +@inproceedings{wang_searching_2012, + ids = {wang_searching_2012-1}, + title = {Searching for the Goldilocks Zone: Trade-Offs in Managing Online Volunteer Groups}, + shorttitle = {Searching for the Goldilocks Zone}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Wang, Loxley Sijia and Chen, Jilin and Ren, Yuqing and Riedl, John}, + year = {2012}, + series = {{{CSCW}} '12}, + pages = {989--998}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Dedicated and productive members who actively contribute to community efforts are crucial to the success of online volunteer groups such as Wikipedia. What predicts member productivity? Do productive members stay longer? How does involvement in multiple projects affect member contribution to the community? In this paper, we analyze data from 648 WikiProjects to address these questions. Our results reveal two critical trade-offs in managing online volunteer groups. First, factors that increase member productivity, measured by the number of edits on Wikipedia articles, also increase likelihood of withdrawal from contributing, perhaps due to feelings of mission accomplished or burnout. Second, individual membership in multiple projects has mixed effects. It decreases the amount of work editors contribute to both the individual projects and Wikipedia as a whole. It increases withdrawal for each individual project yet reduces withdrawal from Wikipedia. We discuss how our findings expand existing theories to fit the online context and inform the design of new tools to improve online volunteer work.}, + isbn = {978-1-4503-1086-4}, + keywords = {online volunteer group,productivity,trade-off,wikipedia,withdrawal}, + file = {/home/nathante/Zotero/storage/7CKH7QT7/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf;/home/nathante/Zotero/storage/R8ALMDFI/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf;/home/nathante/Zotero/storage/Z28IT3FH/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf} +} + +@incollection{white_effects_2011, + title = {Effects of Community Size and Contact Rate in Synchronous Social Q\&a}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {White, Ryen W. and Richardson, Matthew and Liu, Yandong}, + year = {2011}, + month = may, + pages = {2837--2846}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Social question-and-answer (Q\&A) involves the location of answers to questions through communication with people. Social Q\&A systems, such as mailing lists and Web forums are popular, but their asynchronous nature can lead to high answer latency. Synchronous Q\&A systems facilitate real-time dialog, usually via instant messaging, but face challenges with interruption costs and the availability of knowledgeable answerers at question time. We ran a longitudinal study of a synchronous social Q\&A system to investigate the effects of the rate with which potential answerers were contacted (trading off time-to-answer against interruption cost) and community size (varying total number of members). We found important differences in subjective and objective measures of system performance with these variations. Our findings help us understand the costs and benefits of varying contact rate and community size in synchronous social Q\&A, and inform system design for social Q\&A.}, + isbn = {978-1-4503-0228-9}, + keywords = {community size,contact rate,synchronous social q\&a}, + file = {/home/nathante/Zotero/storage/YTF5HY6W/White et al. - 2011 - Effects of community size and contact rate in sync.pdf} +} + +@article{wu_estimating_2019, + title = {Estimating {{Attention Flow}} in {{Online Video Networks}}}, + author = {Wu, Siqi and Rizoiu, Marian-Andrei and Xie, Lexing}, + year = {2019}, + month = nov, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {3}, + number = {CSCW}, + pages = {183:1--183:25}, + abstract = {Online videos have shown tremendous increase in Internet traffic. Most video hosting sites implement recommender systems, which connect the videos into a directed network and conceptually act as a source of pathways for users to navigate. At present, little is known about how human attention is allocated over such large-scale networks, and about the impacts of the recommender systems. In this paper, we first construct the Vevo network -- a YouTube video network with 60,740 music videos interconnected by the recommendation links, and we collect their associated viewing dynamics. This results in a total of 310 million views every day over a period of 9 weeks. Next, we present large-scale measurements that connect the structure of the recommendation network and the video attention dynamics. We use the bow-tie structure to characterize the Vevo network and we find that its core component (23.1\% of the videos), which occupies most of the attention (82.6\% of the views), is made out of videos that are mainly recommended among themselves. This is indicative of the links between video recommendation and the inequality of attention allocation. Finally, we address the task of estimating the attention flow in the video recommendation network. We propose a model that accounts for the network effects for predicting video popularity, and we show it consistently outperforms the baselines. This model also identifies a group of artists gaining attention because of the recommendation network. Altogether, our observations and our models provide a new set of tools to better understand the impacts of recommender systems on collective social attention.}, + keywords = {empirical measurement,network effects,online attention,popularity prediction,recommender system,youtube}, + file = {/home/nathante/Zotero/storage/QEZJWR7U/Wu et al_2019_Estimating Attention Flow in Online Video Networks.pdf} +} + +@article{xigen_li_factors_2011, + title = {Factors Influencing the Willingness to Contribute Information to Online Communities}, + author = {{Xigen Li}}, + year = {2011}, + month = mar, + journal = {New Media \& Society}, + volume = {13}, + number = {2}, + pages = {279--296}, + issn = {1461-4448, 1461-7315}, + abstract = {This study examines the factors that influence the willingness to contribute information to online communities from the perspectives of the discretionary database and expectancy theory. The study identified four groups of variables and tested their predictive value on the willingness to contribute information to online communities. The findings confirmed the effect of the perceived value of contributing and the likelihood of getting a reward for the willingness to contribute. Cost of contribution was not a significant predictor of the willingness to contribute information. Benefit from, and interest in, the community were significant predictors, but community affinity was not. Among the four groups of variables, social approval was the strongest predictor of the willingness to contribute.}, + language = {en} +} + +@article{xu_evolution_2021, + title = {Evolution of Audience Duplication Networks among Social Networking Sites: {{Exploring}} the Influences of Preferential Attachment, Audience Size, and Niche Width}, + shorttitle = {Evolution of Audience Duplication Networks among Social Networking Sites}, + author = {Xu, Yu}, + year = {2021}, + month = feb, + journal = {New Media \& Society}, + pages = {1461444821993048}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study examines the evolution of social networking sites (SNSs) from a networked audience duplication perspective. Guided by social network theory, the theory of double jeopardy, and niche theory, this study proposes an integrated framework to explain the evolution of SNS choices of the US audience between 2016 and 2019. Shared traffic data were retrieved from comScore's Media Metrix Multi-Platform database. The empirical results of the separable temporal exponential random graph model (STERGM) confirm that preferential attachment, audience size, and niche width significantly drive the likelihood of tie formation and dissolution in the evolving audience duplication network. These effects hold true even when other endogenous structural features and exogenous nodal attributes are taken into account. Theoretical implications for the networked media landscape are discussed.}, + language = {en}, + keywords = {Audience duplication,evolution,network analysis,organizational ecology,social media}, + file = {/home/nathante/Zotero/storage/94TAHIW3/Xu - 2021 - Evolution of audience duplication networks among s.pdf} +} + +@article{zhang_configuring_2020, + title = {Configuring {{Audiences}}: {{A Case Study}} of {{Email Communication}}}, + shorttitle = {Configuring {{Audiences}}}, + author = {Zhang, Justine and Pennebaker, James and Dumais, Susan and Horvitz, Eric}, + year = {2020}, + month = may, + journal = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {4}, + number = {CSCW1}, + pages = {062:1--062:26}, + abstract = {When people communicate with each other, their choice of what to say is tied to their perceptions of the audience. For many communication channels, people have some ability to explicitly specify their audience members and the different roles they can play. While existing accounts of communication behavior have largely focused on how people tailor the content of their messages, we focus on the configuring of the audience as a complementary family of decisions in communication. We formulate a general description of audience configuration choices, highlighting key aspects of the audience that people could configure to reflect a range of communicative goals. We then illustrate these ideas via a case study of email usage-a realistic domain where audience configuration choices are particularly fine-grained and explicit in how email senders fill the To and Cc address fields. In a large collection of enterprise emails, we explore how people configure their audiences, finding salient patterns relating a sender's choice of configuration to the types of participants in the email exchange, the content of the message, and the nature of the subsequent interactions. Our formulation and findings show how analyzing audience configurations can enrich and extend existing accounts of communication behavior, and frame research directions on audience configuration decisions in communication and collaboration.}, + keywords = {audience,email,social interaction} +} + +@article{zhang_group_2011, + title = {Group Size and Incentives to Contribute: A Natural Experiment at Chinese Wikipedia}, + shorttitle = {Group Size and Incentives to Contribute}, + author = {Zhang, Xiaoquan (Michael) and Zhu, Feng}, + year = {2011}, + month = jun, + journal = {American Economic Review}, + volume = {101}, + number = {4}, + pages = {1601--1615}, + issn = {0002-8282}, + abstract = {The literature on the private provision of public goods suggests an inverse relationship between incentives to contribute and group size. We find, however, that after an exogenous reduction of group size at Chinese Wikipedia, the nonblocked contributors decrease their contributions by 42.8 percent on average. We attribute the cause to social effects: contributors receive social benefits that increase with both the amount of their contributions and group size, and the shrinking group size weakens these social benefits. Consistent with our explanation, we find that the more contributors value social benefits, the more they reduce their contributions after the block. (JEL H41, L17, L82)}, + language = {en}, + keywords = {Media,Public Goods; Open Source Products and Markets; Entertainment}, + file = {/home/nathante/Zotero/storage/63JBCUER/Zhang and Zhu - 2011 - Group Size and Incentives to Contribute A Natural.pdf;/home/nathante/Zotero/storage/BWMQ96PV/articles.html} +} + +@article{zhang_understanding_2021, + title = {Understanding the {{Diverging User Trajectories}} in {{Highly}}-{{Related Online Communities During}} the {{Covid}}-19 {{Pandemic}}}, + author = {Zhang, Jason Shuo and Keegan, Brian and Lv, Qin and Tan, Chenhao}, + year = {2021}, + journal = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {5}, + eprint = {2006.04816}, + eprinttype = {arxiv}, + pages = {12}, + abstract = {As the COVID-19 pandemic is disrupting life worldwide, related online communities are popping up. In particular, two ``new'' communities, /r/China flu and /r/Coronavirus, emerged on Reddit and have been dedicated to COVIDrelated discussions from the very beginning of this pandemic. With /r/Coronavirus promoted as the official community on Reddit, it remains an open question how users choose between these two highly-related communities. In this paper, we characterize user trajectories in these two communities from the beginning of COVID-19 to the end of September 2020. We show that new users of /r/China flu and /r/Coronavirus were similar from January to March. After that, their differences steadily increase, evidenced by both language distance and membership prediction, as the pandemic continues to unfold. Furthermore, users who started at /r/China flu from January to March were more likely to leave, while those who started in later months tend to remain highly ``loyal''. To understand this difference, we develop a movement analysis framework to understand membership changes in these two communities and identify a significant proportion of /r/China flu members (around 50\%) that moved to /r/Coronavirus in February. This movement turns out to be highly predictable based on other subreddits that users were previously active in. Our work demonstrates how two highly related communities emerge and develop their own identity in a crisis, and highlights the important role of existing communities in understanding such an emergence.}, + archiveprefix = {arXiv}, + language = {en}, + keywords = {Computer Science - Computers and Society,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3HZBRY3S/Zhang et al. - Understanding the Diverging User Trajectories in H.pdf;/home/nathante/Zotero/storage/V3QR9ASE/Zhang et al. - 2021 - Understanding the Diverging User Trajectories in H.pdf} +} + +@incollection{zhao_social_2016, + title = {The {{Social Media Ecology}}: {{User Perceptions}}, {{Strategies}} and {{Challenges}}}, + shorttitle = {The {{Social Media Ecology}}}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhao, Xuan and Lampe, Cliff and Ellison, Nicole B.}, + year = {2016}, + month = may, + pages = {89--100}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {Many existing studies of social media focus on only one platform, but the reality of users' lived experiences is that most users incorporate multiple platforms into their communication practices in order to access the people and networks they desire to influence. In order to better understand how people make sharing decisions across multiple sites, we asked our participants (N=29) to categorize all modes of communication they used, with the goal of surfacing their mental models about managing sharing across platforms. Our interview data suggest that people simultaneously consider "audience" and "content" when sharing and these needs sometimes compete with one another; that they have the strong desire to both maintain boundaries between platforms as well as allowing content and audience to permeate across these boundaries; and that they strive to stabilize their own communication ecosystem yet need to respond to changes necessitated by the emergence of new tools, practices, and contacts. We unpack the implications of these tensions and suggest future design possibilities.}, + isbn = {978-1-4503-3362-7}, + keywords = {boundary management,content sharing,media ecology,social media}, + file = {/home/nathante/Zotero/storage/44Z9658S/Zhao et al_2016_The Social Media Ecology.pdf} +} + +@inproceedings{zhu_impact_2014, + title = {The Impact of Membership Overlap on the Survival of Online Communities}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Kraut, Robert E. and Kittur, Aniket}, + year = {2014}, + month = apr, + series = {{{CHI}} '14}, + pages = {281--290}, + publisher = {{Association for Computing Machinery}}, + address = {{New York, NY, USA}}, + abstract = {If the people belong to multiple online communities, their joint membership can influence the survival of each of the communities to which they belong. Communities with many joint memberships may struggle to get enough of their members' time and attention, but find it easy to import best practices from other communities. In this paper, we study the effects of membership overlap on the survival of online communities. By analyzing the historical data of 5673 Wikia communities, we find that higher levels of membership overlap are positively associated with higher survival rates of online communities. Furthermore, we find that it is beneficial for young communities to have shared members who play a central role in other mature communities. Our contributions are two-fold. Theoretically, by examining the impact of membership overlap on the survival of online communities we identified an important mechanism underlying the success of online communities. Practically, our findings may guide community creators on how to effectively manage their members, and tool designers on how to support this task.}, + isbn = {978-1-4503-2473-1}, + keywords = {membership overlap,online communities,survival analysis}, + file = {/home/nathante/Zotero/storage/GV2D7ZKS/Zhu et al. - 2014 - The Impact of Membership Overlap on the Survival o.pdf;/home/nathante/Zotero/storage/IY4RTSGD/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf;/home/nathante/Zotero/storage/JZE5JGAZ/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf} +} + +@inproceedings{zhu_selecting_2014, + title = {Selecting an Effective Niche: {{An}} Ecological View of the Success of Online Communities}, + shorttitle = {Selecting an Effective Niche}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Chen, Jilin and Matthews, Tara and Pal, Aditya and Badenes, Hernan and Kraut, Robert E.}, + year = {2014}, + series = {{{CHI}} '14}, + pages = {301--310}, + publisher = {{ACM}}, + address = {{New York, NY, USA}}, + abstract = {Online communities serve various important functions, but many fail to thrive. Research on community success has traditionally focused on internal factors. In contrast, we take an ecological view to understand how the success of a community is influenced by other communities. We measured a community's relationship with other communities - its "niche" - through four dimensions: topic overlap, shared members, content linking, and shared offline organizational affiliation. We used a mixed-method approach, combining the quantitative analysis of 9495 online enterprise communities and interviews with community members. Our results show that too little or too much overlap in topic with other communities causes a community's activity to suffer. We also show that this main result is moderated in predictable ways by whether the community shares members with, links to content in, or shares an organizational affiliation with other communities. These findings provide new insight on community success, guiding online community designers on how to effectively position their community in relation to others.}, + isbn = {978-1-4503-2473-1}, + keywords = {online communities,success,topic overlap,workplace}, + file = {/home/nathante/Zotero/storage/FNS9RSWC/Zhu et al. - 2014 - Selecting an Effective Niche An Ecological View o.pdf;/home/nathante/Zotero/storage/KIHWVKUQ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf;/home/nathante/Zotero/storage/RFMX2CBJ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf} +} + + diff --git a/dissertations/nathante_uw_2021/refs.bib b/dissertations/nathante_uw_2021/refs.bib new file mode 100644 index 0000000..80761c4 --- /dev/null +++ b/dissertations/nathante_uw_2021/refs.bib @@ -0,0 +1,6098 @@ + +@inproceedings{ackerman_answer_1990, + title = {Answer {{Garden}}: A {{Tool}} for {{Growing Organizational Memory}}}, + shorttitle = {Answer {{Garden}}}, + booktitle = {Proceedings of the {{ACM SIGOIS}} and {{IEEE CS TC}}-{{OA Conference}} on {{Office Information Systems}}}, + author = {Ackerman, M. S. and Malone, T. W.}, + date = {1990}, + series = {{{COCS}} '90}, + pages = {31--39}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Answer Garden allows organizations to develop databases of commonly asked questions that grow “organically” as new questions arise and are answered. It is designed to help in situations (such as field service organizations and customer “hot lines”) where there is a continuing stream of questions, many of which occur over and over, but some of which the organization has never seen before. The system includes a branching network of diagnostic questions that helps users find the answers they want. If the answer is not present, the system automatically sends the question to the appropriate expert, and the answer is returned to the user as well as inserted into the branching network. Experts can also modify this network in response to users' problems. Our initial Answer Garden database contains questions and answers about how to use the X Window System.}, + isbn = {978-0-89791-358-4}, + file = {/home/nathante/Zotero/storage/Q6XN2KED/Ackerman and Malone - 1990 - Answer Garden A Tool for Growing Organizational M.pdf} +} + +@article{ackerman_intellectual_2000, + title = {The {{Intellectual Challenge}} of {{CSCW}}: The {{Gap Between Social Requirements}} and {{Technical Feasibility}}}, + shorttitle = {The {{Intellectual Challenge}} of {{CSCW}}}, + author = {Ackerman, Mark S.}, + date = {2000-09-01}, + journaltitle = {Human–Computer Interaction}, + volume = {15}, + number = {2-3}, + pages = {179--203}, + publisher = {{Taylor \& Francis}}, + issn = {0737-0024}, + abstract = {Over the last 10 years, Computer-Supported Cooperative Work (CSCW) has identified a base set of findings. These findings are taken almost as assumptions within the field. In summary, they argue that human activity is highly flexible, nuanced, and contextualized and that computational entities such as information sharing, roles, and social norms need to be similarly flexible, nuanced, and contextualized. However, current systems cannot fully support the social world uncovered by these findings. In this article I argue that there is an inherent gap between the social requirements of CSCW and its technical mechanisms. The social-technical gap is the divide between what we know we must support socially and what we can support technically. Exploring, understanding, and hopefully ameliorating this social-technical gap is the central challenge for CSCW as a field and one of the central problems for human-computer interaction. Indeed, merely attesting the continued centrality of this gap could be one of the important intellectual contributions of CSCW. I also argue that the challenge of the social-technical gap creates an opportunity to refocus CSCW.}, + keywords = {essay,overview,social computing,theory}, + annotation = {\_eprint: https://doi.org/10.1207/S15327051HCI1523\_5}, + file = {/home/nathante/Zotero/storage/6SR5GJPQ/Ackerman - 2000 - The Intellectual Challenge of CSCW The Gap Betwee.pdf;/home/nathante/Zotero/storage/E3NAR7N8/Ackerman - 2000 - The Intellectual Challenge of CSCW The Gap Betwee.pdf;/home/nathante/Zotero/storage/GCVP7ANI/S15327051HCI1523_5.html} +} + +@article{ackerman_sharing_2013, + title = {Sharing {{Knowledge}} and {{Expertise}}: The {{CSCW View}} of {{Knowledge Management}}}, + shorttitle = {Sharing {{Knowledge}} and {{Expertise}}}, + author = {Ackerman, Mark S. and Dachtera, Juri and Pipek, Volkmar and Wulf, Volker}, + date = {2013-08-21}, + journaltitle = {Computer Supported Cooperative Work (CSCW)}, + shortjournal = {Comput Supported Coop Work}, + volume = {22}, + number = {4-6}, + pages = {531--573}, + issn = {0925-9724, 1573-7551}, + abstract = {Knowledge Management (KM) is a diffuse and controversial term, which has been used by a large number of research disciplines. CSCW, over the last 20 years, has taken a critical stance towards most of these approaches, and instead, CSCW shifted the focus towards a practice-based perspective. This paper surveys CSCW researchers’ viewpoints on what has become called ‘knowledge sharing’ and ‘expertise sharing’. These are based in an understanding of the social contexts of knowledge work and practices, as well as in an emphasis on communication among knowledgeable humans. The paper provides a summary and overview of the two strands of knowledge and expertise sharing in CSCW, which, from an analytical standpoint, roughly represent ‘generations’ of research: an ‘object-centric’ and a ‘people-centric’ view. We also survey the challenges and opportunities ahead.}, + langid = {english} +} + +@inproceedings{adamic_knowledge_2008, + title = {Knowledge Sharing and Yahoo Answers: Everyone Knows Something}, + shorttitle = {Knowledge Sharing and Yahoo Answers}, + booktitle = {Proceedings of the 17th International Conference on {{World Wide Web}}}, + author = {Adamic, Lada A. and Zhang, Jun and Bakshy, Eytan and Ackerman, Mark S.}, + date = {2008-04-21}, + series = {{{WWW}} '08}, + pages = {665--674}, + publisher = {{Association for Computing Machinery}}, + location = {{Beijing, China}}, + abstract = {Yahoo Answers (YA) is a large and diverse question-answer forum, acting not only as a medium for sharing technical knowledge, but as a place where one can seek advice, gather opinions, and satisfy one's curiosity about a countless number of things. In this paper, we seek to understand YA's knowledge sharing and activity. We analyze the forum categories and cluster them according to content characteristics and patterns of interaction among the users. While interactions in some categories resemble expertise sharing forums, others incorporate discussion, everyday advice, and support. With such a diversity of categories in which one can participate, we find that some users focus narrowly on specific topics, while others participate across categories. This not only allows us to map related categories, but to characterize the entropy of the users' interests. We find that lower entropy correlates with receiving higher answer ratings, but only for categories where factual expertise is primarily sought after. We combine both user attributes and answer characteristics to predict, within a given category, whether a particular answer will be chosen as the best answer by the asker.}, + isbn = {978-1-60558-085-2}, + file = {/home/nathante/Zotero/storage/W97ZJFJS/Adamic et al_2008_Knowledge sharing and yahoo answers.pdf} +} + +@book{aldrich_organizations_2006, + title = {Organizations {{Evolving}}}, + author = {Aldrich, H.E. and Ruef, M.}, + date = {2006}, + edition = {2}, + publisher = {{SAGE Publications}}, + location = {{Thousand Oaks, CA}}, + isbn = {978-1-4129-1047-7} +} + +@inproceedings{arazy_determinants_2010, + ids = {arazy_determinants_2010-1}, + title = {Determinants of {{Wikipedia}} Quality: The Roles of Global and Local Contribution Inequality}, + shorttitle = {Determinants of {{Wikipedia Quality}}}, + booktitle = {Proceedings of the 2010 {{ACM Conference}} on {{Computer Supported Cooperative Work}} ({{CSCW}} '10)}, + author = {Arazy, Ofer and Nov, Oded}, + date = {2010}, + pages = {233--236}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {The success of Wikipedia and the relative high quality of its articles seem to contradict conventional wisdom. Recent studies have begun shedding light on the processes contributing to Wikipedia's success, highlighting the role of coordination and contribution inequality. In this study, we expand on these works in two ways. First, we make a distinction between global (Wikipedia-wide) and local (article-specific) inequality and investigate both constructs. Second, we explore both direct and indirect effects of these inequalities, exposing the intricate relationships between global inequality, local inequality, coordination, and article quality. We tested our hypotheses on a sample of a Wikipedia articles using structural equation modeling and found that global inequality exerts significant positive impact on article quality, while the effect of local inequality is indirect and is mediated by coordination}, + isbn = {978-1-60558-795-0}, + keywords = {contribution inequality,coordination,global inequality,information quality,local inequality,wikipedia}, + file = {/home/nathante/Zotero/storage/D9KXITIH/Arazy and Nov - 2010 - Determinants of Wikipedia Quality The Roles of Gl.pdf;/home/nathante/Zotero/storage/LJQVFJIK/Arazy_Nov_2010_Determinants of wikipedia quality.pdf} +} + +@article{arazy_evolutionary_2019, + title = {The Evolutionary Trajectories of Peer-Produced Artifacts: Group Composition, the Trajectories' Exploration, and the Quality of Artifacts}, + shorttitle = {The Evolutionary Trajectories of Peer-Produced Artifacts}, + author = {Arazy, Ofer and Lindberg, Aron and Rezaei, Mostafa and Samorani, Michele}, + date = {2019-12-10}, + journaltitle = {MIS Quarterly}, + shortjournal = {MIS Quarterly}, + abstract = {Members of an online community peer-produce digital artifacts by negotiating different perspectives and personal knowledge bases. These negotiations are manifested in the temporal evolution of the peer-produced artifact. In this study we conceptualize the evolution of a digital artifact as a trajectory in a feature space. Our theoretical frame suggests that through negotiations contributors' actions "pull" the trajectory and shape its movement in the feature space. We hypothesize that the type of contributors that work on a focal article influences the extent to which that article's trajectory explores alternative positions within that space, and that the trajectory's exploration is, in turn, associated with the artifact's quality. To test these hypotheses, we analyzed the trajectories of wiki articles drawn from two peer-production communities: Wikipedia and Wikia, tracking the evolution of 242 paired articles for over a decade during which the articles went through 536,745 revisions. We found that the contributors who are the most likely to increase the trajectory's exploration are those that (a) return to work on the focal artifact and (b) are unregistered members in the broader online community Further, our results show that the trajectory's exploration has a curvilinear association with article quality, indicating that exploration contributes positively to quality, but that the effect is reversed when exploration exceeds a certain level. The insights derived from this study highlight the value of an artifact-centric approach to increasing our understanding of the dynamics underlying peer-production.}, + keywords = {peer production,wikia}, + file = {/home/nathante/Zotero/storage/N79K2LTM/Arazy et al. - 2019 - The evolutionary trajectories of peer-produced art.pdf} +} + +@inproceedings{arazy_functional_2015, + title = {Functional Roles and Career Paths in {{Wikipedia}}}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Arazy, Ofer and Ortega, Felipe and Nov, Oded and Yeo, Lisa and Balila, Adam}, + date = {2015}, + series = {{{CSCW}} '15}, + pages = {1092--1105}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {An understanding of participation dynamics within online production communities requires an examination of the roles assumed by participants. Recent studies have established that the organizational structure of such communities is not flat; rather, participants can take on a variety of well-defined functional roles. What is the nature of functional roles? How have they evolved? And how do participants assume these functions? Prior studies focused primarily on participants' activities, rather than functional roles. Further, extant conceptualizations of role transitions in production communities, such as the Reader to Leader framework, emphasize a single dimension: organizational power, overlooking distinctions between functions. In contrast, in this paper we empirically study the nature and structure of functional roles within Wikipedia, seeking to validate existing theoretical frameworks. The analysis sheds new light on the nature of functional roles, revealing the intricate "career paths" resulting from participants' role transitions.}, + isbn = {978-1-4503-2922-4}, + file = {/home/nathante/Zotero/storage/ZRNAAPUH/Arazy et al. - 2015 - Functional roles and career paths in Wikipedia.pdf} +} + +@inproceedings{arazy_how_2017, + ids = {arazy2017and}, + title = {On the "How" and "Why" of Emergent Role Behaviors in {{Wikipedia}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}} - {{CSCW}} '17}, + author = {Arazy, Ofer and Liifshitz-Assaf, Hila and Nov, Oded and Daxenberger, Johannes and Balestra, Martina and Cheshire, Coye}, + date = {2017}, + pages = {2039--2051}, + publisher = {{ACM Press}}, + location = {{Portland, Oregon, USA}}, + abstract = {Research on peer-production suggests that as participants choose what actions to perform, prototypical activity patterns emerge. Recent work characterized these patterns and demonstrated that informal emergent roles are highly stable. Nonetheless, we know little about the ways in which contributors take on and shed emergent roles. The objectives of this study are to: (a) delineate the temporal dynamics of participants’ emergent role taking behaviors, and (b) identify the motivations driving role-transition behaviors. Our study links motivation to role-transition behaviors within Wikipedia. Our first sample covered eleven years and 222,119 contributors, and was used to identify four categories of temporal role-taking behaviors, that differ in their mobility between emergent roles and across Wikipedia articles. Our second examination linked the motivations of 175 new participants to their subsequent role-taking activity over 14 months. Together, the two analyses reveal that role-taking categories can be distinguished based on participants’ motivational orientation (intrinsic/extrinsic and self/others-oriented).}, + eventtitle = {The 2017 {{ACM Conference}}}, + isbn = {978-1-4503-4335-0}, + langid = {english}, + file = {/home/nathante/Zotero/storage/ZJ25SYGV/Arazy et al. - 2017 - On the How and Why of Emergent Role Behaviors .pdf} +} + +@article{arazy_turbulent_2016, + title = {Turbulent {{Stability}} of {{Emergent Roles}}: The {{Dualistic Nature}} of {{Self}}-{{Organizing Knowledge Coproduction}}}, + shorttitle = {Turbulent {{Stability}} of {{Emergent Roles}}}, + author = {Arazy, Ofer and Daxenberger, Johannes and Lifshitz-Assaf, Hila and Nov, Oded and Gurevych, Iryna}, + date = {2016-12}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {27}, + number = {4}, + pages = {792--812}, + issn = {1047-7047, 1526-5536}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GJBJ39Q9/Arazy et al. - 2016 - Turbulent Stability of Emergent Roles The Dualist.pdf} +} + +@article{armstrong_competitive_1980, + ids = {armstrong_competitive_1980-1}, + title = {Competitive {{Exclusion}}}, + author = {Armstrong, Robert A. and McGehee, Richard}, + date = {1980-02-01}, + journaltitle = {The American Naturalist}, + shortjournal = {The American Naturalist}, + volume = {115}, + number = {2}, + pages = {151--170}, + publisher = {{The University of Chicago Press}}, + issn = {0003-0147}, + abstract = {Recent developments in the mathematical theory of competitive exclusion are discussed and placed in historical perspective. The models which have been used in theoretical investigations of competitive exclusion are classified into two groups: those in which the resources regenerate according to an algebraic relationship (abiotic resource models), and those in which resource regeneration is governed by differential equations (biotic resource models). We then propose a mathematical framework for considering problems of competitive exclusion, and provide examples in which n competitors can coexist on k {$<$} n resources (both biotic and abiotic). These systems persist because of internally generated cyclic behavior. We conclude that the competitive exclusion principle applies in general only to coexistence at fixed densities.}, + file = {/home/nathante/Zotero/storage/WY46EPM3/Nat - 2021 - Competitive Exclusion.pdf;/home/nathante/Zotero/storage/6RRFPS4Z/283553.html} +} + +@article{aronow_clusterrobust_2015, + title = {Cluster–{{Robust Variance Estimation}} for {{Dyadic Data}}}, + author = {Aronow, Peter M. and Samii, Cyrus and Assenova, Valentina A.}, + date = {2015}, + journaltitle = {Political Analysis}, + volume = {23}, + number = {4}, + pages = {564--577}, + publisher = {{Cambridge University Press}}, + issn = {1047-1987, 1476-4989}, + abstract = {Dyadic data are common in the social sciences, although inference for such settings involves accounting for a complex clustering structure. Many analyses in the social sciences fail to account for the fact that multiple dyads share a member, and that errors are thus likely correlated across these dyads. We propose a non-parametric, sandwich-type robust variance estimator for linear regression to account for such clustering in dyadic data. We enumerate conditions for estimator consistency. We also extend our results to repeated and weighted observations, including directed dyads and longitudinal data, and provide an implementation for generalized linear models such as logistic regression. We examine empirical performance with simulations and an application to interstate disputes.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/T3EKTWJY/Aronow et al. - Cluster–Robust Variance Estimation for Dyadic Data.pdf;/home/nathante/Zotero/storage/ZWVDRAYS/D43E12BF35240100C7A4ED3C28912C95.html} +} + +@article{asthana_few_2018, + title = {With {{Few Eyes}}, {{All Hoaxes Are Deep}}}, + author = {Asthana, Sumit and Halfaker, Aaron}, + date = {2018-11}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {21:1--21:18}, + issn = {2573-0142}, + abstract = {Quality control is critical to open production communities like Wikipedia. Wikipedia editors enact border quality control with edits (counter-vandalism) and new article creations (new page patrolling) shortly after they are saved. In this paper, we describe a long-standing set of inefficiencies that have plagued new page patrolling by drawing a contrast to the more efficient, distributed processes for counter-vandalism. Further, to address this issue, we demonstrate an effective automated topic model based on a labeling strategy that leverages a folksonomy developed by subject specific working groups in Wikipedia (WikiProject tags) and a flexible ontology (WikiProjects Directory) to arrive at a hierarchical and uniform label set. We are able to attain very high fitness measures (macro ROC-AUC: 95.2\%, macro PR-AUC: 74.5\%) and real-time performance using word2vec-based features. Finally, we present a proposal for how incorporating this model into current tools will shift the dynamics of new article review positively.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/CV6DS2XT/Asthana and Halfaker - 2018 - With Few Eyes, All Hoaxes Are Deep.pdf} +} + +@article{astley_two_1985, + title = {The {{Two Ecologies}}: Population and {{Community Perspectives}} on {{Organizational Evolution}}}, + shorttitle = {The {{Two Ecologies}}}, + author = {Astley, W. Graham}, + date = {1985}, + journaltitle = {Administrative Science Quarterly}, + volume = {30}, + number = {2}, + eprint = {2393106}, + eprinttype = {jstor}, + pages = {224--241}, + issn = {0001-8392}, + abstract = {This paper distinguishes between two ecological perspectives on organizational evolution: population ecology and community ecology. The perspectives adopt different levels of analysis and produce contrasting views of the characteristic mode and tempo of organizational evolution. Population ecology limits investigation to evolutionary change unfolding within established populations, emphasizing factors that homogenize organizational forms and maintain population stability. Population ecology thus fails to explain how populations originate in the first place or how evolutionary change occurs through the proliferation of heterogeneous organizational types. Community ecology overcomes these limitations: it focuses on the rise and fall of populations as basic units of evolutionary change, simultaneously explaining forces that produce homogeneity and stability within populations and heterogeneity between them.}, + file = {/home/nathante/Zotero/storage/4Q76BREE/Astley - 1985 - The Two Ecologies Population and Community Perspe.pdf} +} + +@article{axelrod_evolution_1981, + title = {The Evolution of Cooperation}, + author = {Axelrod, R. and Hamilton, W. D.}, + date = {1981-03-27}, + journaltitle = {Science}, + volume = {211}, + number = {4489}, + pages = {1390--1396}, + issn = {0036-8075, 1095-9203}, + abstract = {Cooperation in organisms, whether bacteria or primates, has been a difficulty for evolutionary theory since Darwin. On the assumption that interactions between pairs of individuals occur on a probabilistic basis, a model is developed based on the concept of an evolutionarily stable strategy in the context of the Prisoner's Dilemma game. Deductions from the model, and the results of a computer tournament show how cooperation based on reciprocity can get started in an asocial world, can thrive while interacting with a wide range of other strategies, and can resist invasion once fully established. Potential applications include specific aspects of territoriality, mating, and disease.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/5W7KPW9P/1390.html} +} + +@inproceedings{balestra_investigating_2017, + title = {Investigating the {{Motivational Paths}} of {{Peer Production Newcomers}}}, + booktitle = {Proceedings of the 2017 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Balestra, Martina and Cheshire, Coye and Arazy, Ofer and Nov, Oded}, + date = {2017}, + series = {{{CHI}} '17}, + pages = {6381--6385}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Maintaining participation beyond the initial period of engagement is critical for peer production systems. Theory suggests that an increase in motivation is expected with contributors' movement from the community periphery to the core. Less is known, however, about how specific motivations change over time. We fill this gap by focusing on individual motivational paths in the formative periods of engagement, exploring which motivations change and how. We collected data on various instrumental and non-instrumental motivations at two points in study participants? Wikipedia career: when they started editing and again after six months. We found that non-instrumental motivations (including collective and intrinsic motives) decreased significantly over time, in contrast with socially-driven motivations such as norm-oriented motivates which did not change and social motives which increased marginally. The findings offer new insights into newcomers' evolving motivations, with implications for designing and managing peer-production systems.}, + isbn = {978-1-4503-4655-9}, + file = {/home/nathante/Zotero/storage/2E3UFPMA/Balestra et al. - 2017 - Investigating the Motivational Paths of Peer Produ.pdf} +} + +@article{banbura_large_2010, + title = {Large {{Bayesian}} Vector Auto Regressions}, + author = {Bańbura, Marta and Giannone, Domenico and Reichlin, Lucrezia}, + date = {2010}, + journaltitle = {Journal of Applied Econometrics}, + volume = {25}, + number = {1}, + pages = {71--92}, + issn = {1099-1255}, + abstract = {This paper shows that vector auto regression (VAR) with Bayesian shrinkage is an appropriate tool for large dynamic models. We build on the results of De Mol and co-workers (2008) and show that, when the degree of shrinkage is set in relation to the cross-sectional dimension, the forecasting performance of small monetary VARs can be improved by adding additional macroeconomic variables and sectoral information. In addition, we show that large VARs with shrinkage produce credible impulse responses and are suitable for structural analysis. Copyright © 2009 John Wiley \& Sons, Ltd.}, + langid = {english}, + annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/jae.1137}, + file = {/home/nathante/Zotero/storage/BJPRR8SM/Bańbura et al_2010_Large Bayesian vector auto regressions.pdf;/home/nathante/Zotero/storage/8WJXYLQS/jae.html} +} + +@report{band_wikipedias_2013, + type = {SSRN Scholarly Paper}, + title = {Wikipedia's {{Economic Value}}}, + author = {Band, Jonathan and Gerafi, Jonathan}, + date = {2013-10-07}, + institution = {{Social Science Research Network}}, + location = {{Rochester, NY}}, + abstract = {In the copyright policy debate, proponents of strong copyright protection tend to be dismissive of the quality of freely available content. In response to counter-examples such as open access scholarly publications and advertising-supported business models (e.g., newspaper websites and the over-the-air television broadcasts viewed by 50 million Americans), the strong copyright proponents center their attack on amateur content. In this narrative, YouTube is for cat videos and Wikipedia is a wildly unreliable source of information.}, + langid = {english}, + keywords = {Jonathan Band,Jonathan Gerafi,SSRN,Wikipedia's Economic Value}, + file = {/home/nathante/Zotero/storage/SIEJTWL2/Band and Gerafi - 2013 - Wikipedia's Economic Value.pdf;/home/nathante/Zotero/storage/HFS9JSGL/papers.html} +} + +@article{barigozzi_nets:_2019-2, + title = {{{NETS}}: Network Estimation for Time Series}, + shorttitle = {{{NETS}}}, + author = {Barigozzi, Matteo and Brownlees, Christian}, + date = {2019}, + journaltitle = {Journal of Applied Econometrics}, + volume = {34}, + number = {3}, + pages = {347--364}, + issn = {1099-1255}, + abstract = {We model a large panel of time series as a vector autoregression where the autoregressive matrices and the inverse covariance matrix of the system innovations are assumed to be sparse. The system has a network representation in terms of a directed graph representing predictive Granger relations and an undirected graph representing contemporaneous partial correlations. A LASSO algorithm called NETS is introduced to estimate the model. We apply the methodology to analyze a panel of volatility measures of 90 blue chips. The model captures an important fraction of total variability, on top of what is explained by volatility factors, and improves out-of-sample forecasting.}, + langid = {english} +} + +@article{barnett_competition_1987, + title = {Competition and Mutualism among Early Telephone Companies}, + author = {Barnett, William P. and Carroll, Glenn R.}, + date = {1987}, + journaltitle = {Administrative Science Quarterly}, + volume = {32}, + number = {3}, + eprint = {2392912}, + eprinttype = {jstor}, + pages = {400--421}, + issn = {0001-8392}, + abstract = {In an exploratory study of the early telephone industry, we search for evidence of competition and mutualism between legally autonomous companies. Neighboring companies are found to have both types of interdependencies, although their exact nature depends on organizational form. Companies in separate geographical locations are found to be competitive with each other, regardless of organizational form. The two prevalent organizational forms in the industry at this time each apparently flourished in distinct niches and were symbiotically related. The findings are interpreted within a community ecology framework.} +} + +@article{barnett_predicting_2017, + title = {Predicting International {{Facebook}} Ties through Cultural Homophily and Other Factors}, + author = {Barnett, George A and Benefield, Grace A}, + date = {2017-02-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {2}, + pages = {217--239}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study describes the structure of the international Facebook friendship network and its determinants using various predictors, including physical proximity, cultural homophily, and communication. Network analysis resulted in one group of nations, with countries that bridge geographic and linguistic clusters (France, Spain, United Kingdom, and United Arab Emirates) being the most central. Countries with international Facebook friendship ties tended to share borders, language, civilization, and migration. Physical distance, shared hyperlinks, use of common websites, telephone traffic, cultural similarity, and international student exchange were either weakly or not significantly related to international Facebook friendships.}, + langid = {english}, + keywords = {Communication network analysis,cultural homophily,Facebook,international friendship,social media (SNS)}, + file = {/home/nathante/Zotero/storage/LPCY3MMC/Barnett and Benefield - 2017 - Predicting international Facebook ties through cul.pdf} +} + +@article{baronchelli_emergence_2018, + title = {The Emergence of Consensus: A Primer}, + shorttitle = {The Emergence of Consensus}, + author = {Baronchelli, Andrea}, + date = {2018-02-01}, + journaltitle = {Open Science}, + volume = {5}, + number = {2}, + pages = {172189}, + issn = {2054-5703}, + abstract = {The origin of population-scale coordination has puzzled philosophers and scientists for centuries. Recently, game theory, evolutionary approaches and complex systems science have provided quantitative insights on the mechanisms of social consensus. However, the literature is vast and widely scattered across fields, making it hard for the single researcher to navigate it. This short review aims to provide a compact overview of the main dimensions over which the debate has unfolded and to discuss some representative examples. It focuses on those situations in which consensus emerges ‘spontaneously’ in the absence of centralized institutions and covers topics that include the macroscopic consequences of the different microscopic rules of behavioural contagion, the role of social networks and the mechanisms that prevent the formation of a consensus or alter it after it has emerged. Special attention is devoted to the recent wave of experiments on the emergence of consensus in social systems.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/BCQ4892J/Baronchelli - 2018 - The emergence of consensus a primer.pdf;/home/nathante/Zotero/storage/WPXC9FJ7/172189.html} +} + +@incollection{baum_ecological_2006, + title = {Ecological Approaches to Organizations}, + booktitle = {Sage {{Handbook}} for {{Organization Studies}}}, + author = {Baum, Joel A. C. and Shipilov, Andrew V.}, + date = {2006}, + pages = {55--110}, + publisher = {{Sage}}, + location = {{Rochester, NY}}, + abstract = {Our goal is to assess and consolidate the current state-of-the-art in organizational ecology. To accomplish this we review major theoretical statements, empirical studies, and arguments that are now being made. Although we attempt to survey ecological approaches to organizations comprehensively, because ecological research now constitutes a very large body of work, and because other extensive reviews are available (Aldrich \& Wiedenmayer, 1993; Barnett \& Carroll, 1995; Baum, 1996; Baum \& Amburgey, 2002; Baum \& Rao, 2004; Carroll, Dobrev \& Swaminathan, 2002; Galunic \& Weeks 2002; Rao, 2002; Singh \& Lumsden, 1990), we emphasize recent work that challenges and extends established theory and highlight new and emerging directions for future research that appear promising. Our appraisal focuses on two main themes - demographic processes and ecological processes.}, + file = {/home/nathante/Zotero/storage/EGQC2W5I/Baum and Shipilov - 2006 - Ecological approaches to organizations.pdf;/home/nathante/Zotero/storage/38MBRGMQ/papers.html} +} + +@article{baum_organizational_1994, + title = {Organizational {{Niches}} and the {{Dynamics}} of {{Organizational Founding}}}, + author = {Baum, Joel A. C. and Singh, Jitendra V.}, + date = {1994}, + journaltitle = {Organization Science}, + volume = {5}, + number = {4}, + eprint = {2635178}, + eprinttype = {jstor}, + pages = {483--501}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {In this paper we argue that patterns of organizational niche overlap and nonoverlap influence the organizational niches in which entrepreneurs create organizations. Organizational niches characterize the different resource requirements and productive capacities of individual organizations in a population. Depending on which organizational niches are targeted, entrepreneurs will face different competitive landscapes. For a population of day care centers (DCCs), we measure organizational niches and compute organizational niche overlaps in terms of the ages of children they are licensed to enroll. Using weights based on organizational niche overlaps, we disaggregate population density (i.e., the number of DCCs) into overlap density and nonoverlap density to measure the potential for competition and cooperation among DCCs. The overlap density of an organizational niche is equal to population density weighted by the overlaps of the focal organizational niche with all other organizational niches. Conversely, non-overlap density is equal to population density weighted by the absence of overlaps of a focal organizational niche with all other organizational niches. We hypothesize that overlap density will be negatively related to the founding rate. We expect entrepreneurs will be much less likely to target or be capable of founding organizations in crowded parts of the resource space than parts that are less densely populated. We also hypothesize that nonoverlap density will be positively related to the founding rate. This is because differentiated DCCs do not compete directly for resources, and, at the same time, their presence can have facilitative influences through complementary demand enhancement and widening social acceptance of the organization form. Supporting these predictions, a dynamic analysis showed that overlap density had a competitive effect on the founding rate, while nonoverlap density had a positive effect. Parallel effects were obtained when overlap and nonoverlap densities were further disaggregated on the basis of geographic proximity into local and diffuse components. Overall, our findings are consistent with earlier research on organizational founding at the population level, but reveal intrapopulation patterns of mutualism and competition that influence the likelihood of organizations being established in different organizational niches. The key result of this study, that location in a multidimensional resource space, together with the distribution of other competitors and noncompetitors, has a significant impact on founding probabilities serves to illuminate some of the underlying dynamics of competition and mutualism that impact strategic and entrepreneurial processes.}, + file = {/home/nathante/Zotero/storage/E2AGCRNI/Baum and Singh - 1994 - Organizational Niches and the Dynamics of Organiza.pdf} +} + +@article{baum_organizational_1994-2, + title = {Organizational {{Niches}} and the {{Dynamics}} of {{Organizational Mortality}}}, + author = {Baum, Joel A. C. and Singh, Jitendra V.}, + date = {1994}, + journaltitle = {American Journal of Sociology}, + volume = {100}, + number = {2}, + eprint = {2782073}, + eprinttype = {jstor}, + pages = {346--380}, + publisher = {{University of Chicago Press}}, + issn = {0002-9602}, + abstract = {Departing from the population-level emphasis of density dependence research in organizational ecology, the authors examine how organizational niches within populations influence patterns of competition and mutualism. Organizational niches characterize intrapopulation variation in productive capacities and resource requirements and are operationalized for a population of day care centers (DCCs) based on the ages of children they are licensed to enroll. The authors find competitive effects of overlap density, the aggregate overlap of a DCC's organizational niche with those of all others, and mutualistic effects of nonoverlap density, the aggregate nonoverlap, which are strongest among neighboring DCCs. The authors discuss the implications of their findings for studying organizational population dynamics.} +} + +@article{baumgartner_punctuated_2009, + title = {Punctuated {{Equilibrium}} in {{Comparative Perspective}}}, + author = {Baumgartner, Frank R. and Breunig, Christian and Green‐Pedersen, Christoffer and Jones, Bryan D. and Mortensen, Peter B. and Nuytemans, Michiel and Walgrave, Stefaan}, + date = {2009-07-01}, + journaltitle = {American Journal of Political Science}, + volume = {53}, + number = {3}, + pages = {603--620}, + issn = {1540-5907}, + abstract = {We explore the impact of institutional design on the distribution of changes in outputs of governmental processes in the United States, Belgium, and Denmark. Using comprehensive indicators of governmental actions over several decades, we show that in each country the level of institutional friction increases as we look at processes further along the policy cycle. Assessing multiple policymaking institutions in each country allows us to control for the nature of the policy inputs, as all the institutions we consider cover the full range of social and political issues in the country. We find that all distributions exhibit high kurtosis values, significantly higher than the Normal distribution which would be expected if changes in government attention and activities were proportionate to changes in social inputs. Further, in each country, those institutions that impose higher decision-making costs show progressively higher kurtosis values. The results suggest general patterns that we hypothesize to be related to boundedly rational behavior in a complex social environment.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/MI3L3WCA/Baumgartner et al. - 2009 - Punctuated Equilibrium in Comparative Perspective.pdf;/home/nathante/Zotero/storage/H8BBFG97/j.1540-5907.2009.00389.html} +} + +@article{baumgartner_pushshift_2020, + title = {The {{Pushshift Reddit}} Dataset}, + author = {Baumgartner, Jason and Zannettou, Savvas and Keegan, Brian and Squire, Megan and Blackburn, Jeremy}, + date = {2020-05-26}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + shortjournal = {ICWSM}, + volume = {14}, + pages = {830--839}, + issn = {2334-0770}, + langid = {english}, + keywords = {pushift,reddit}, + file = {/home/nathante/Zotero/storage/DHRFJ58I/Baumgartner et al. - 2020 - The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/G5E8SQFN/Baumgartner et al_2020_The Pushshift Reddit Dataset.pdf;/home/nathante/Zotero/storage/A8X5UY9R/2001.html;/home/nathante/Zotero/storage/B9FRQR94/7347.html} +} + +@article{becker_theory_1965, + title = {A {{Theory}} of the {{Allocation}} of {{Time}}}, + author = {Becker, Gary S.}, + date = {1965-09}, + journaltitle = {The Economic Journal}, + shortjournal = {The Economic Journal}, + volume = {75}, + number = {299}, + pages = {493}, + issn = {00130133}, + langid = {english}, + file = {/home/nathante/Zotero/storage/82WK59JA/Becker - 1965 - A Theory of the Allocation of Time.pdf} +} + +@article{belmonte_hierarchical_2014, + title = {Hierarchical {{Shrinkage}} in {{Time}}-{{Varying Parameter Models}}}, + author = {Belmonte, Miguel A. G. and Koop, Gary and Korobilis, Dimitris}, + date = {2014}, + journaltitle = {Journal of Forecasting}, + volume = {33}, + number = {1}, + pages = {80--94}, + issn = {1099-131X}, + abstract = {ABSTRACTIn this paper, we forecast EU area inflation with many predictors using time-varying parameter models. The facts that time-varying parameter models are parameter rich and the time span of our data is relatively short motivate a desire for shrinkage. In constant coefficient regression models, the Bayesian Lasso is gaining increasing popularity as an effective tool for achieving such shrinkage. In this paper, we develop econometric methods for using the Bayesian Lasso with time-varying parameter models. Our approach allows for the coefficient on each predictor to be: (i) time varying; (ii) constant over time; or (iii) shrunk to zero. The econometric methodology decides automatically to which category each coefficient belongs. Our empirical results indicate the benefits of such an approach. Copyright © 2013 John Wiley \& Sons, Ltd.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/U7CB9Y87/Belmonte et al. - 2014 - Hierarchical Shrinkage in Time-Varying Parameter M.pdf;/home/nathante/Zotero/storage/CAU9FJGR/for.html} +} + +@article{benkler_coases_2002, + title = {Coase's Penguin, or, {{Linux}} and `{{The}} Nature of the Firm'}, + author = {Benkler, Yochai}, + date = {2002-12}, + journaltitle = {The Yale Law Journal}, + volume = {112}, + number = {3}, + pages = {369}, + keywords = {Advantages,Economics,FOSS,Internet,Law,Legal Studies,Open source software,Production cooperatives,Socioeconomic factors} +} + +@incollection{benkler_peer_2015, + title = {Peer Production: A Form of Collective Intelligence}, + booktitle = {Handbook of {{Collective Intelligence}}}, + author = {Benkler, Yochai and Shaw, Aaron and Hill, Benjamin Mako}, + editor = {Malone, Thomas W. and Bernstein, Michael S.}, + date = {2015}, + pages = {175--204}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-02981-0}, + langid = {english} +} + +@report{benkler_social_2013, + type = {SSRN Scholarly Paper}, + title = {Social {{Mobilization}} and the {{Networked Public Sphere}}: Mapping the {{SOPA}}-{{PIPA Debate}}}, + shorttitle = {Social {{Mobilization}} and the {{Networked Public Sphere}}}, + author = {Benkler, Yochai and Roberts, Hal and Faris, Robert and Solow-Niederman, Alicia and Etling, Bruce}, + date = {2013}, + number = {ID 2295953}, + institution = {{Social Science Research Network}}, + location = {{Rochester, NY}}, + abstract = {This paper uses a new set of online research tools to develop a detailed study of the public debate over proposed legislation in the United States designed to give prosecutors and copyright holders new tools to pursue suspected online copyright violations. For this study, we compiled, mapped, and analyzed a set of 9,757 stories relevant to the COICA-SOPA-PIPA debate from September 2010 through the end of January 2012 using Media Cloud, an open source tool created at the Berkman Center to allow quantitative analysis of a large number of online media sources. This study applies a mixed-methods approach by combining text and link analysis with human coding and informal interviews to map the evolution of the controversy over time and to analyze the mobilization, roles, and interactions of various actors.This novel, data-driven perspective on the dynamics of the networked public sphere supports an optimistic view of the potential for networked democratic participation, and offers a view of a vibrant, diverse, and decentralized networked public sphere that exhibited broad participation, leveraged topical expertise, and focused public sentiment to shape national public policy. We find that the fourth estate function was fulfilled by a network of small-scale commercial tech media, standing non-media NGOs, and individuals, whose work was then amplified by traditional media. Mobilization was effective, and involved substantial experimentation and rapid development. We observe the rise to public awareness of an agenda originating in the networked public sphere and its framing in the teeth of substantial sums of money spent to shape the mass media narrative in favor of the legislation. Moreover, we witness what we call an attention backbone, in which more trafficked sites amplify less-visible individual voices on specific subjects. Some aspects of the events suggest that they may be particularly susceptible to these kinds of democratic features, and may not be generalizable. Nonetheless, the data suggest that, at least in this case, the networked public sphere enabled a dynamic public discourse that involved both individual and organizational participants and offered substantive discussion of complex issues contributing to affirmative political action.Find more information about the paper, including raw data available for download and an interactive visualization of the maps included in this paper, on the Berkman Center website.}, + file = {/home/nathante/Zotero/storage/P9M6MASA/Benkler et al. - 2013 - Social Mobilization and the Networked Public Spher.pdf} +} + +@book{benkler_wealth_2006, + title = {The Wealth of Networks: How Social Production Transforms Markets and Freedom}, + author = {Benkler, Yochai}, + date = {2006}, + publisher = {{Yale University Press}}, + location = {{New Haven, CT}}, + pagetotal = {528}, + keywords = {bookReview,Economics,FOSS,foundations of social computing,import,Innovation,Legal Studies,peer production} +} + +@incollection{bernstein_quantifying_2013, + title = {Quantifying the Invisible Audience in Social Networks}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Bernstein, Michael S. and Bakshy, Eytan and Burke, Moira and Karrer, Brian}, + date = {2013-04-27}, + pages = {21--30}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {When you share content in an online social network, who is listening? Users have scarce information about who actually sees their content, making their audience seem invisible and difficult to estimate. However, understanding this invisible audience can impact both science and design, since perceived audiences influence content production and self-presentation online. In this paper, we combine survey and large-scale log data to examine how well users' perceptions of their audience match their actual audience on Facebook. We find that social media users consistently underestimate their audience size for their posts, guessing that their audience is just 27\% of its true size. Qualitative coding of survey responses reveals folk theories that attempt to reverse-engineer audience size using feedback and friend count, though none of these approaches are particularly accurate. We analyze audience logs for 222,000 Facebook users' posts over the course of one month and find that publicly visible signals --- friend count, likes, and comments --- vary widely and do not strongly indicate the audience of a single post. Despite the variation, users typically reach 61\% of their friends each month. Together, our results begin to reveal the invisible undercurrents of audience attention and behavior in online social networks.}, + isbn = {978-1-4503-1899-0}, + keywords = {audience,information distribution,social networks} +} + +@article{bilgrei_broscience_2018, + title = {Broscience: Creating Trust in Online Drug Communities}, + shorttitle = {Broscience}, + author = {Bilgrei, Ola Røed}, + date = {2018-08-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {20}, + number = {8}, + pages = {2712--2727}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study explores the social mechanisms involved in online community trust. Drawing on interviews with members from two Norwegian Internet drug forums, the article illustrates how forum members evaluate the trustworthiness of online user-generated drug content, referred to as ‘broscience’. First, the shared narratives and boundaries within the forums generated a sense of collective identity, where members defined their online surroundings in terms of community trust and collaboration. Second, the subcultural argot within the forums helped members express a level of subcultural competence and authenticity, in which they were able to assess their credibility and initial trustworthiness. Third, the reputation linked to online identities created expectations and predictability as a basis for evaluating members’ trustworthiness. These findings touch upon the ambivalence of trust in an online setting and highlight the communal process that caused their ambivalence to be suspended, thereby enabling online community trust.}, + langid = {english}, + keywords = {Broscience,drugs,Internet subculture,online community,trust}, + file = {/home/nathante/Zotero/storage/WBMSUCSH/Bilgrei - 2018 - Broscience Creating trust in online drug communit.pdf} +} + +@book{bimber_collective_2012, + ids = {bimber_collective_2012-1}, + title = {Collective Action in Organizations: Interaction and Engagement in an Era of Technological Change}, + shorttitle = {Collective Action in Organizations}, + author = {Bimber, Bruce A. and Flanagin, Andrew J. and Stohl, Cynthia}, + date = {2012}, + publisher = {{Cambridge University Press}}, + location = {{New York, NY}}, + abstract = {"This book explores how people participate in public life through organizations. The authors examine The American Legion, AARP, and MoveOn, and show surprising similarities across these three organizations"--Provided by publisher. "This book offers a new theory of collective action for the age of digital media, attesting to the continued relevance of formal organizations in a time when digital media can make it seem that organizations are outdated. The authors examine the dynamics of membership in three distinctive organizations: The American Legion, AARP, and MoveOn. They develop the theory of Collective Action Space to demonstrate the important dimensions of membership and use survey and interview data to explore commonalities across the organizations, each of which exhibits four, ♯p︢articipatory styles., ♯ ̮The book shows that predictors of participation vary greatly across participatory styles, and rather little across organizations. The book wrestles with a crucial feature of contemporary collective action, wherein technology does not necessarily make people participate more, but people consistently use technology when they participate. The result is a theoretically rich and empirically fresh portrait of collective action, organization, and technology"--Provided by publisher.}, + isbn = {978-0-521-19172-2}, + langid = {english}, + pagetotal = {224} +} + +@article{blei_latent_2003, + title = {Latent Dirichlet Allocation}, + author = {Blei, David M. and Ng, Andrew Y. and Jordan, Michael I.}, + date = {2003}, + journaltitle = {The Journal of Machine Learning Research}, + volume = {3}, + pages = {993--1022}, + file = {/home/nathante/Zotero/storage/2K3E7TJH/Blei et al. - 2003 - Latent dirichlet allocation.pdf} +} + +@inproceedings{blevis_ecological_2015, + title = {Ecological {{Perspectives}} in {{HCI}}: Promise, {{Problems}}, and {{Potential}}}, + shorttitle = {Ecological {{Perspectives}} in {{HCI}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference Extended Abstracts}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Blevis, Eli and Bødker, Susanne and Flach, John and Forlizzi, Jodi and Jung, Heekyoung and Kaptelinin, Victor and Nardi, Bonnie and Rizzo, Antonio}, + date = {2015-04-18}, + series = {{{CHI EA}} '15}, + pages = {2401--2404}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {The aim of the workshop is to provide a forum for researchers and practitioners to discuss the present and future of ecological perspectives in HCI. The participants will reflect on the current uses and interpretations of "ecology" and related concepts in the field. The workshop will assess the potential of ecological perspectives in HCI for supporting rich and meaningful analysis, as well as innovative design, of interactive technologies in real-life contexts.}, + isbn = {978-1-4503-3146-3}, + keywords = {affordances,artifact ecologies,ecological psychology,ecology,habitat,information ecologies,social ecology,sustainability} +} + +@article{bowker_bonnie_2001, + title = {Bonnie {{Nardi}} and {{Vicki O}}'{{Day}}, {{Information Ecologies}}: Using {{Technology}} with {{Heart}}}, + shorttitle = {Bonnie {{Nardi}} and {{Vicki O}}'{{Day}}, {{Information Ecologies}}}, + author = {Bowker, Geoffrey C.}, + date = {2001-03}, + journaltitle = {Computer Supported Cooperative Work (CSCW)}, + shortjournal = {Computer Supported Cooperative Work (CSCW)}, + volume = {10}, + number = {1}, + pages = {143--145}, + issn = {0925-9724, 1573-7551}, + langid = {english} +} + +@book{box-steffensmeier_time_2014, + title = {Time Series Analysis for the Social Sciences}, + author = {Box-Steffensmeier, Janet M}, + date = {2014}, + abstract = {"Time-series, or longitudinal, data are ubiquitous in the social sciences. Unfortunately, analysts often treat the time-series properties of their data as a nuisance rather than a substantively meaningful dynamic process to be modeled and interpreted. Time-Series Analysis for Social Sciences provides accessible, up-to-date instruction and examples of the core methods in time-series econometrics. Janet M. Box-Steffensmeier, John R. Freeman, Jon C. Pevehouse, and Matthew P. Hitt cover a wide range of topics including ARIMA models, time-series regression, unit-root diagnosis, vector autoregressive models, error-correction models, intervention models, fractional integration, ARCH models, structural breaks, and forecasting. This book is aimed at researchers and graduate students who have taken at least one course in multivariate regression. Examples are drawn from several areas of social science, including political behavior, elections, international conflict, criminology, and comparative political economy"--}, + isbn = {978-0-521-87116-7 978-0-521-69155-0}, + langid = {english}, + annotation = {OCLC: 879601718} +} + +@article{boyd_social_2007, + title = {Social {{Network Sites}}: Definition, {{History}}, and {{Scholarship}}}, + shorttitle = {Social {{Network Sites}}}, + author = {Boyd, Danah M and Ellison, Nicole B.}, + date = {2007-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {13}, + number = {1}, + pages = {210--230}, + publisher = {{Oxford Academic}}, + abstract = {Social network sites (SNSs) are increasingly attracting the attention of academic and industry researchers intrigued by their affordances and reach. This special theme section of the Journal of Computer-Mediated Communication brings together scholarship on these emergent phenomena. In this introductory article, we describe features of SNSs and propose a comprehensive definition. We then present one perspective on the history of such sites, discussing key changes and developments. After briefly summarizing existing scholarship concerning SNSs, we discuss the articles in this special section and conclude with considerations for future research.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6BMGYUAE/Boyd and Ellison - 2007 - Social Network Sites Definition, History, and Sch.pdf;/home/nathante/Zotero/storage/JK59CLHH/4583062.html} +} + +@article{brandt_bayesian_2012, + title = {A {{Bayesian Poisson Vector Autoregression Model}}}, + author = {Brandt, Patrick T. and Sandler, Todd}, + date = {2012}, + journaltitle = {Political Analysis}, + shortjournal = {Polit. anal.}, + volume = {20}, + number = {3}, + pages = {292--315}, + issn = {1047-1987, 1476-4989}, + abstract = {Multivariate count models are rare in political science, despite the presence of many count time series. This article develops a new Bayesian Poisson vector autoregression (BaP-VAR) model that can characterize endogenous dynamic counts with no restrictions on the contemporaneous correlations. Impulse responses, decomposition of the forecast errors, and dynamic multiplier methods for the effects of exogenous covariate shocks are illustrated for the model. Two full illustrations of the model, its interpretations, and results are presented. The first example is a dynamic model that reanalyzes the patterns and predictors of superpower rivalry events. The second example applies the model to analyze the dynamics of transnational terrorist targeting decisions between 1968 and 2008. The latter example’s results have direct implications for contemporary policy about terrorists’ targeting that are both novel and innovative in the study of terrorism.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FXWYBXR7/Brandt and Sandler - 2012 - A Bayesian Poisson Vector Autoregression Model.pdf} +} + +@inproceedings{brandtzaeg_user_2008, + title = {User {{Loyalty}} and {{Online Communities}}: Why {{Members}} of {{Online Communities}} Are Not {{Faithful}}}, + shorttitle = {User {{Loyalty}} and {{Online Communities}}}, + booktitle = {Proceedings of the 2nd {{International Conference}} on {{INtelligent TEchnologies}} for Interactive {{enterTAINment}}}, + author = {Brandtzæg, Petter Bae and Heim, Jan}, + date = {2008}, + publisher = {{ICST}}, + location = {{Cancun, Mexico}}, + abstract = {Online communities are getting increasingly important for several different user groups; at the same time, community members seem to lack loyalty, as they often change from one community to another or use their community less over time. To survive and thrive, online communities must meet members’ needs. By using qualitative data are from an extensive online survey of online community users and a representative sample of Internet users, 200 responses to an open question regarding community-loyalty was analyzed. Results show that there are 9 main reasons why community-users decrease in their participation over time or, in simple terms, stop using their online community: 1) Lack of interesting people/friends attending, 2) Low quality content, 3) Low usability, 4) Harassment and bullying 5) Timeconsuming/isolating, 6) Low trust, 7) Over-commercialized, 8) Dissatisfaction with moderators and 9) Unspecified boring. The results, design implications and future research are discussed.}, + eventtitle = {2nd {{International Conference}} on {{INtelligent TEchnologies}} for Interactive {{enterTAINment}}}, + isbn = {978-963-9799-13-4}, + langid = {english}, + file = {/home/nathante/Zotero/storage/2KNF5QHS/Brandtzæg and Heim - 2008 - User Loyalty and Online Communities Why Members o.pdf} +} + +@article{brown_social_1987, + title = {Social {{Ties}} and {{Word}}-of-{{Mouth Referral Behavior}}}, + author = {Brown, Jacqueline Johnson and Reingen, Peter H.}, + date = {1987}, + journaltitle = {Journal of Consumer Research}, + volume = {14}, + number = {3}, + eprint = {2489496}, + eprinttype = {jstor}, + pages = {350--362}, + publisher = {{Oxford University Press}}, + issn = {0093-5301}, + abstract = {This article presents a network analysis of word-of-mouth referral behavior in a natural environment. The relational properties of tie strength and homophily were employed to examine referral behavior at micro and macro levels of inquiry. The study demonstrates different roles played by weak and strong social ties. At the macro level, weak ties displayed an important bridging function, allowing information to travel from one distinct subgroup of referral actors to another subgroup in the broader social system. At the micro level, strong and homophilous ties were more likely to be activated for the flow of referral information. Strong ties were also perceived as more influential than weak ties, and they were more likely to be utilized as sources of information for related goods.} +} + +@inproceedings{bryant_becoming_2005, + title = {Becoming {{Wikipedian}}: Transformation of Participation in a Collaborative Online Encyclopedia}, + shorttitle = {Becoming {{Wikipedian}}}, + booktitle = {Proceedings of the 2005 {{International ACM SIGGROUP Conference}} on {{Supporting Group Work}}}, + author = {Bryant, Susan L. and Forte, Andrea and Bruckman, Amy}, + date = {2005}, + series = {{{GROUP}} '05}, + pages = {1--10}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Traditional activities change in surprising ways when computer-mediated communication becomes a component of the activity system. In this descriptive study, we leverage two perspectives on social activity to understand the experiences of individuals who became active collaborators in Wikipedia, a prolific, cooperatively-authored online encyclopedia. Legitimate peripheral participation provides a lens for understanding participation in a community as an adaptable process that evolves over time. We use ideas from activity theory as a framework to describe our results. Finally, we describe how activity on the Wikipedia stands in striking contrast to traditional publishing and suggests a new paradigm for collaborative systems.}, + isbn = {1-59593-223-2}, + keywords = {activity theory,community,legitimate peripheral participation,qualitative,Wiki,wikipedia}, + file = {/home/nathante/Zotero/storage/VJXQFTDD/Bryant et al. - 2005 - Becoming Wikipedian transformation of participati.pdf} +} + +@article{burgelman_intraorganizational_1991, + title = {Intraorganizational {{Ecology}} of {{Strategy Making}} and {{Organizational Adaptation}}: Theory and {{Field Research}}}, + shorttitle = {Intraorganizational {{Ecology}} of {{Strategy Making}} and {{Organizational Adaptation}}}, + author = {Burgelman, Robert A.}, + date = {1991-08-01}, + journaltitle = {Organization Science}, + volume = {2}, + number = {3}, + pages = {239--262}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {This paper presents an intraorganizational ecological perspective on strategy making, and examines how internal selection may combine with external selection to explain organizational change and survival. The perspective serves to illuminate data from a field study of the evolution of Intel Corporation's corporate strategy. The data, in turn, are used to refine and deepen the conceptual framework. Relationships between induced and autonomous strategic processes and four modes of organizational adaptation are discussed. Apparent paradoxes associated with structural inertia and strategic reorientation arguments are elucidated and several new propositions derived. The paper proposes that consistently successful organizations are characterized by top managements who spend efforts on building the induced and autonomous strategic processes, as well as concerning themselves with the content of strategy; that such organizations simultaneously exercise induced and autonomous processes; and that successful reorientations in organizations are likely to have been preceded by internal experimentation and selection processes effected through the autonomous process.}, + keywords = {corporate strategy,evolutionary management,organizational ecology,selection and adaptation} +} + +@inbook{burgess_computational_2018, + ids = {foote_computational_2017}, + title = {A Computational Analysis of Social Media Scholarship}, + booktitle = {The {{SAGE Handbook}} of {{Social Media}}}, + author = {Foote, Jeremy and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + pages = {111--134}, + publisher = {{SAGE Publications Ltd}}, + location = {{1 Oliver's Yard,~55 City Road~London~EC1Y 1SP}}, + abstract = {Data from social media platforms and online communities have fueled the growth of computational social science. In this chapter, we use computational analysis to characterize the state of research on social media and demonstrate the utility of such methods. First, we discuss how to obtain datasets from the APIs published by many social media platforms. Then, we perform some of the most widely used computational analyses on a dataset of social media scholarship we extract from the Scopus bibliographic database’s API. We apply three methods: network analysis, topic modeling using latent Dirichlet allocation, and statistical prediction using machine learning. For each technique, we explain the method and demonstrate how it can be used to draw insights from our dataset. Our analyses reveal overlapping scholarly communities studying social media. We find that early social media research applied social network analysis and quantitative methods, but the most cited and influential work has come from marketing and medical research. We also find that publication venue and, to a lesser degree, textual features of papers explain the largest variation in incoming citations. We conclude with some consideration of the limitations of computational research and future directions.}, + bookauthor = {Burgess, Jean and Marwick, Alice and Poell, Thomas}, + isbn = {978-1-4129-6229-2 978-1-4739-8406-6}, + langid = {english}, + file = {/home/nathante/Zotero/storage/W8C4ULRU/Foote et al. - 2018 - A Computational Analysis of Social Media Scholarsh.pdf} +} + +@article{burnett_information_2004, + title = {Information {{Exchange}} in {{Virtual Communities}}: A {{Comparative Study}}}, + shorttitle = {Information {{Exchange}} in {{Virtual Communities}}}, + author = {Burnett, Gary and Buerkle, Harry}, + date = {2004-01-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {9}, + issn = {1083-6101}, + abstract = {Burnett's (2000) typology of information exchange in virtual communities attempts to provide a framework for examining the range of activities undertaken by participants in such communities. This study is the first in a series to apply the typology to specific virtual communities, in an effort to assess its accuracy against the day-to-day interactions to be found in two online communities. Through a comparison of these two communities using the typology, revisions to the typology are proposed which will allow it to reflect more accurately activities found within the communities. By providing a metric through which to address such questions, the revised typology will allow a richer understanding of virtual communities as social information environments.}, + issue = {JCMC922}, + file = {/home/nathante/Zotero/storage/39C7RSD8/4614481.html} +} + +@article{butler_attraction-selection-attrition_2014, + title = {An Attraction-Selection-Attrition Theory of Online Community Size and Resilience}, + author = {Butler, Brian S. and Bateman, Patrick J. and Gray, Peter H. and Diamant, E. Ilana}, + date = {2014-09}, + journaltitle = {MIS Q.}, + volume = {38}, + number = {3}, + pages = {699--728}, + issn = {0276-7783}, + abstract = {Online discussion communities play an important role in the development of relationships and the transfer of knowledge within and across organizations. Their underlying technologies enhance these processes by providing infrastructures through which group-based communication can occur. Community administrators often make decisions about technologies with the goal of enhancing the user experience, but the impact of such decisions on how a community develops must also be considered. To shed light on this complex and under-researched phenomenon, we offer a model of key latent constructs influenced by technology choices and possible causal paths by which they have dynamic effects on communities. Two important community characteristics that can be impacted are community size (number of members) and community resilience (membership that is willing to remain involved with the community in spite of variability and change in the topics discussed). To model community development, we build on attraction-selection-attrition (ASA) theory, introducing two new concepts: participation costs (how much time and effort are required to engage with content provided in a community) and topic consistency cues (how strongly a community signals that topics that may appear in the future will be consistent with what it has hosted in the past). We use the proposed ASA theory of online communities (OCASA) to develop a simulation model of community size and resilience that affirms some conventional wisdom and also has novel and counterintuitive implications. Analysis of the model leads to testable new propositions about the causal paths by which technology choices affect the emergence of community size and community resilience, and associated implications for community sustainability.}, + file = {/home/nathante/Zotero/storage/292C8XTF/Butler et al. - 2014 - An Attraction-selection-attrition Theory of Online.pdf} +} + +@article{butler_cross-purposes_2011, + title = {The Cross-Purposes of Cross-Posting: Boundary Reshaping Behavior in Online Discussion Communities}, + shorttitle = {The Cross-Purposes of Cross-Posting}, + author = {Butler, Brian S. and Wang, Xiaoqing}, + date = {2011-09-15}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {23}, + pages = {993--1010}, + issn = {1047-7047}, + abstract = {Increasingly, online discussion communities are used to support activities ranging from software development to political campaigns. An important feature of an online discussion community is its content boundaries, which are individual perceptions of what materials and discussions are part of the community and what are not, and how that community is related to others within a larger system. Yet in spite of its importance, many community infrastructures allow individual participants to reshape content boundaries by simultaneously associating their contributions with multiple online discussion communities. This reshaping behavior is a controversial aspect of the creation and management of many types of online discussion communities. On one hand, many communities explicitly discourage boundary reshaping behaviors in their frequently asked questions or terms-of-use document. On the other hand, community infrastructures continue to allow such reshaping behaviors. To explain this controversy, we theorize how the extent of boundary reshaping in an online discussion community has simultaneously positive and negative effects on its member dynamics and responsiveness. We test predictions about the conflicting effects of reshaping behaviors with 60 months of longitudinal data from 140 USENET newsgroups, focusing on cross-posting activities as a form of reshaping behavior. Empirical results are consistent with the proposed hypotheses that reshaping behaviors within a discussion community affect member dynamics and community responsiveness in both positive and negative ways. Taken together, the findings highlight the boundary-related design challenges faced by managers seeking to support ongoing activity within online discussion communities.}, + issue = {3-part-2}, + file = {/home/nathante/Zotero/storage/MHIHVXMA/Butler and Wang - 2012 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/ZDTPFJP3/Butler and Wang - 2011 - The Cross-Purposes of Cross-Posting Boundary Resh.pdf;/home/nathante/Zotero/storage/5XCPFJS9/isre.1110.html} +} + +@article{butler_membership_2001, + title = {Membership Size, Communication Activity, and Sustainability: A Resource-Based Model of Online Social Structures}, + shorttitle = {Membership {{Size}}, {{Communication Activity}}, and {{Sustainability}}}, + author = {Butler, Brian S.}, + date = {2001}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {12}, + number = {4}, + eprint = {23011457}, + eprinttype = {jstor}, + pages = {346--362}, + issn = {1047-7047}, + abstract = {As telecommunication networks become more common, there is an increasing interest in the factors underlying the development of online social structures. It has been proposed that these structures are new forms of organizing which are not subject to the same constraints as traditional social structures. However, from anecdotal evidence and case studies it is difficult to evaluate whether online social structures are subject to the same problems as traditional social structures. Drawing from prior studies of traditional social structures and empirical analyses of longitudinal data from a sample of Internet-based groups, this exploratory work considers the role of size and communication activity in sustainable online social structures. A resource-based theory of sustainable social structures is presented. Members contribute time, energy, and other resources, enabling a social structure to provide benefits for individuals. These benefits, which include information, influence, and social support, are the basis for a social structure's ability to attract and retain members. This model focuses on the system of opposing forces that link membership size as a component of resource availability and communication activity as an aspect of benefit provision to the sustainability of an online social structure. Analyses of data from a random sample of e-mail-based Internet social structures (listservs) indicate that communication activity and size have both positive and negative effects on a structure's sustainability. These results suggest that while the use of networked communication technologies may alter the form of communication, balancing the opposing impacts of membership size and communication activity in order to maintain resource availability and provide benefits for current members remains a fundamental problem underlying the development of sustainable online social structures.}, + file = {/home/nathante/Zotero/storage/4ENNLMAH/Butler - 2001 - Membership Size, Communication Activity, and Susta.pdf;/home/nathante/Zotero/storage/U7AUNAZT/Butler-2001-ISR-Membership_size_communication_activitiy_sustainability.pdf} +} + +@inproceedings{campbell_thousands_2016, + title = {Thousands of {{Positive Reviews}}: Distributed {{Mentoring}} in {{Online Fan Communities}}}, + shorttitle = {Thousands of {{Positive Reviews}}}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer}}-{{Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Campbell, Julie and Aragon, Cecilia and Davis, Katie and Evans, Sarah and Evans, Abigail and Randall, David}, + date = {2016-02-27}, + series = {{{CSCW}} '16}, + pages = {691--704}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Young people worldwide are participating in ever-increasing numbers in online fan communities. Far from mere shallow repositories of pop culture, these sites are accumulating significant evidence that sophisticated informal learning is taking place online in novel and unexpected ways. In order to understand and analyze in more detail how learning might be occurring, we conducted an in-depth nine-month ethnographic investigation of online fanfiction communities, including participant observation and fanfiction author interviews. Our observations led to the development of a theory we term distributed mentoring, which we present in detail in this paper. Distributed mentoring exemplifies one instance of how networked technology affords new extensions of behaviors that were previously bounded by time and space. Distributed mentoring holds potential for application beyond the spontaneous mentoring observed in this investigation and may help students receive diverse, thoughtful feedback in formal learning environments as well.}, + isbn = {978-1-4503-3592-8}, + keywords = {digital youth.,distributed mentoring,fanfiction,informal learning,Mentoring,online communities}, + file = {/home/nathante/Zotero/storage/D9ZM58VV/Campbell et al. - 2016 - Thousands of Positive Reviews Distributed Mentori.pdf} +} + +@incollection{campbell_variation_1965, + title = {Variation and Selective Retention in Socio-Cultural Evolution}, + booktitle = {Social {{Change In Developing Areas}}}, + author = {Campbell, Don T}, + editor = {Barringer, Herbert R. and Blanksten, George I. and Mack, Raymond W.}, + date = {1965}, + publisher = {{Schenkman Publishing Company}}, + location = {{Cambridge Mass.}}, + file = {/home/nathante/Zotero/storage/MT3K69NQ/campbell_sociocultural_evoluation_ocr.pdf} +} + +@incollection{canova_bayesian_2007, + title = {Bayesian {{VARs}}}, + booktitle = {Methods for {{Applied Macroeconomic Research}}}, + author = {Canova, Fabio}, + date = {2007}, + eprint = {j.ctvcm4hrv.13}, + eprinttype = {jstor}, + pages = {373--417}, + publisher = {{Princeton University Press}}, + abstract = {We saw in chapter 4 that VAR models can be used to characterize any vector of time series under a minimal set of conditions. We have also seen that, since VARs are reduced-form models, identification restrictions, motivated by economic theory, are needed to conduct meaningful policy analyses. Reduced-form VARs are also typically unsuitable for out-of-sample forecasting. To reasonably approximate the Wold representation, it is in fact necessary to have a VAR with long lags. A generous parametrization means that unrestricted VARs are not operational alternatives to either standard macroeconometric models, where insignificant coefficients are purged out of the specification, or}, + isbn = {978-0-691-11504-7}, + file = {/home/nathante/Zotero/storage/PGQG5UX9/Canova - 2007 - Bayesian VARs.pdf} +} + +@book{canova_methods_2011, + title = {Methods for {{Applied Macroeconomic Research}}}, + author = {Canova, Fabio}, + date = {2011-09-19}, + eprint = {WGSHNRj_DwcC}, + eprinttype = {googlebooks}, + publisher = {{Princeton University Press}}, + abstract = {The last twenty years have witnessed tremendous advances in the mathematical, statistical, and computational tools available to applied macroeconomists. This rapidly evolving field has redefined how researchers test models and validate theories. Yet until now there has been no textbook that unites the latest methods and bridges the divide between theoretical and applied work. Fabio Canova brings together dynamic equilibrium theory, data analysis, and advanced econometric and computational methods to provide the first comprehensive set of techniques for use by academic economists as well as professional macroeconomists in banking and finance, industry, and government. This graduate-level textbook is for readers knowledgeable in modern macroeconomic theory, econometrics, and computational programming using RATS, MATLAB, or Gauss. Inevitably a modern treatment of such a complex topic requires a quantitative perspective, a solid dynamic theory background, and the development of empirical and numerical methods--which is where Canova's book differs from typical graduate textbooks in macroeconomics and econometrics. Rather than list a series of estimators and their properties, Canova starts from a class of DSGE models, finds an approximate linear representation for the decision rules, and describes methods needed to estimate their parameters, examining their fit to the data. The book is complete with numerous examples and exercises. Today's economic analysts need a strong foundation in both theory and application. Methods for Applied Macroeconomic Research offers the essential tools for the next generation of macroeconomists.}, + isbn = {978-1-4008-4102-8}, + langid = {english}, + pagetotal = {509}, + file = {/home/nathante/Zotero/storage/TGBFQNPZ/64846.html} +} + +@incollection{canova_var_2007, + title = {{{VAR Models}}}, + booktitle = {Methods for {{Applied Macroeconomic Research}}}, + author = {Canova, Fabio}, + date = {2007}, + eprint = {j.ctvcm4hrv.7}, + eprinttype = {jstor}, + pages = {111--164}, + publisher = {{Princeton University Press}}, + abstract = {This chapter describes a set of techniques which stand apart from those considered in the next three chapters, in the sense that economic theory is only minimally used in the inferential process. VAR models, pioneered by Chris Sims about 25 years ago, have acquired a permanent place in the toolkit of applied macroeconomists, both to summarize the information contained in the data and to conduct certain types of policy experiments. VAR models are well-suited to the first purpose: the Wold theorem ensures that any vector of time series has a VAR representation under mild regularity conditions and this makes them}, + isbn = {978-0-691-11504-7}, + file = {/home/nathante/Zotero/storage/ZQYCMAPQ/Canova - 2007 - VAR Models.pdf} +} + +@article{carpenter_stan:_2016, + title = {Stan: A Probabilistic Programming Language}, + shorttitle = {Stan}, + author = {Carpenter, Bob and Gelman, Andrew and Hoffman, Matt and Lee, Daniel and Goodrich, Ben and Betancourt, Michael and Brubaker, Michael A. and Guo, Jiqiang and Li, Peter and Riddell, Allen}, + date = {2016}, + journaltitle = {Journal of Statistical Software}, + volume = {20}, + number = {2}, + pages = {1--37}, + file = {/home/nathante/Zotero/storage/2L4LAHJ2/Stan - Probabilistic Programming Language.pdf} +} + +@article{carriero_bayesian_2015, + title = {Bayesian {{VARs}}: Specification {{Choices}} and {{Forecast Accuracy}}}, + shorttitle = {Bayesian {{VARs}}}, + author = {Carriero, Andrea and Clark, Todd E. and Marcellino, Massimiliano}, + date = {2015}, + journaltitle = {Journal of Applied Econometrics}, + volume = {30}, + number = {1}, + pages = {46--73}, + issn = {1099-1255}, + abstract = {In this paper we discuss how the point and density forecasting performance of Bayesian vector autoregressions (BVARs) is affected by a number of specification choices. We adopt as a benchmark a common specification in the literature, a BVAR with variables entering in levels and a prior modeled along the lines of Sims and Zha (International Economic Review 1998; 39: 949–968). We then consider optimal choice of the tightness, of the lag length and of both; evaluate the relative merits of modeling in levels or growth rates; compare alternative approaches to h-step-ahead forecasting (direct, iterated and pseudo-iterated); discuss the treatment of the error variance and of cross-variable shrinkage; and assess rolling versus recursive estimation. Finally, we analyze the robustness of the results to the VAR size and composition (using also data for France, Canada and the UK, while the main analysis is for the USA). We obtain a large set of empirical results, but the overall message is that we find very small losses (and sometimes even gains) from the adoption of specification choices that make BVAR modeling quick and easy, in particular for point forecasting. This finding could therefore further enhance the diffusion of the BVAR as an econometric tool for a vast range of applications. Copyright © 2013 John Wiley \& Sons, Ltd.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/DIVRTRXW/Carriero et al. - 2015 - Bayesian VARs Specification Choices and Forecast .pdf;/home/nathante/Zotero/storage/J44RGYG2/jae.html} +} + +@article{carroll_concentration_1985, + title = {Concentration and Specialization: Dynamics of Niche Width in Populations of Organizations}, + shorttitle = {Concentration and {{Specialization}}}, + author = {Carroll, Glenn R.}, + date = {1985-05-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {90}, + number = {6}, + pages = {1262--1283}, + issn = {0002-9602}, + abstract = {This paper departs from the common practice of focusing on large, generalist organizations and shows that new organizational insights are obtined by adopting a broader, ecological perspective. The newspaper publishing industry is examined as an illustration. The ecological focus shows that many small, specialized organizations operate successfully in this industry, despite apparently high levels of local concentration. A resource-partitioning model is advanced to explain the interorganizational relationships between generalist and specialist organizations. Statistical tests of the model using historical data on 2,808 American local newspaper organizations show the merit of using the ecological perspective for analyzing industries.}, + file = {/home/nathante/Zotero/storage/G38AK5SZ/Carroll - 1985 - Concentration and specialization Dynamics of nich.pdf;/home/nathante/Zotero/storage/8PG3QCP3/228210.html} +} + +@article{carroll_density_1989, + title = {Density Dependence in the Evolution of Populations of Newspaper Organizations}, + author = {Carroll, Glenn R. and Hannan, Michael T.}, + date = {1989-08}, + journaltitle = {American Sociological Review}, + volume = {54}, + number = {4}, + eprint = {2095875}, + eprinttype = {jstor}, + pages = {524}, + issn = {00031224}, + file = {/home/nathante/Zotero/storage/TCCRW99U/DensitiyDependenceInNewspaperOrg_Carroll_Hannan_1989.pdf} +} + +@article{carroll_stochastic_1983, + ids = {carroll_stochastic_1983-1}, + title = {A Stochastic Model of Organizational Mortality: Review and Reanalysis}, + shorttitle = {A Stochastic Model of Organizational Mortality}, + author = {Carroll, Glenn R}, + date = {1983-12-01}, + journaltitle = {Social Science Research}, + shortjournal = {Social Science Research}, + volume = {12}, + number = {4}, + pages = {303--329}, + issn = {0049-089X}, + abstract = {An effort is made to integrate the research literatures of business policy and organizational sociology as they concern organizational mortality. The previous empirical studies of organizational mortality are reviewed and considered in light of current theoretical arguments. Three stochastic models are developed to test hypotheses concerning organizational mortality: the constant rate model, the Gompertz model, and Makeham's Law. The parameters of these models are estimated for 52 sets of data on organizational mortality. The findings show that Makeham's Law is the best-fitting model, although its estimation requires data with low levels of censoring. Substantively, the findings show strong support for Stinchombe's liability-of-newness hypothesis [A. L. Stinchcombe (1965), “Organizations and social structure,” in Handbook of Organizations (J. G. March, Ed.), pp. 153–193, Rand McNally, Chicago].}, + langid = {english}, + keywords = {obscolescence}, + file = {/home/nathante/Zotero/storage/YVBBBKIN/Carroll_1983_A stochastic model of organizational mortality.pdf;/home/nathante/Zotero/storage/2T6Z5LPV/0049089X83900224.html;/home/nathante/Zotero/storage/BK75HWEF/0049089X83900224.html} +} + +@article{carroll_why_2000, + title = {Why the Microbrewery Movement? Organizational Dynamics of Resource Partitioning in the {{U}}.{{S}}. Brewing Industry}, + shorttitle = {Why the {{Microbrewery Movement}}?}, + author = {Carroll, Glenn R. and Swaminathan, Anand}, + date = {2000}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {106}, + number = {3}, + eprint = {10.1086/318962}, + eprinttype = {jstor}, + pages = {715--762}, + issn = {0002-9602}, + abstract = {The number of small specialty brewers in the U.S. beer brewing industry has increased dramatically in recent decades, even as the market for beer became increasingly dominated by mass‐production brewing companies. Using the resource‐partitioning model of organizational ecology, this article shows that these two apparently contradictory trends are fundamentally interrelated. Hypotheses developed here refine the way scale competition among generalist organizations is modeled and improve the theoretical development of the sociological bases for the appeal of specialist organizations' products, especially those related to organizational identity. Evidence drawn from qualitative and quantitative research provides strong support for the theory. The article offers a brief discussion of the theoretical and substantive issues involved in application of the model to other industries and to other cultures.}, + file = {/home/nathante/Zotero/storage/X2ITSCRL/Carroll and Swaminathan - 2000 - Why the microbrewery movement Organizational dyna.pdf} +} + +@book{castells_rise_1996, + title = {Rise of {{The Network Society}} ({{Information Age Series}})}, + author = {Castells, Manuel}, + date = {1996}, + edition = {1}, + publisher = {{Wiley-Blackwell}}, + isbn = {1-55786-617-1}, + pagetotal = {481} +} + +@article{cenci_assessing_2020, + title = {Assessing the Predictability of Nonlinear Dynamics under Smooth Parameter Changes}, + author = {Cenci, Simone and Medeiros, Lucas P. and Sugihara, George and Saavedra, Serguei}, + date = {2020-01-29}, + journaltitle = {Journal of The Royal Society Interface}, + shortjournal = {Journal of The Royal Society Interface}, + volume = {17}, + number = {162}, + pages = {20190627}, + publisher = {{Royal Society}}, + abstract = {Short-term forecasts of nonlinear dynamics are important for risk-assessment studies and to inform sustainable decision-making for physical, biological and financial problems, among others. Generally, the accuracy of short-term forecasts depends upon two main factors: the capacity of learning algorithms to generalize well on unseen data and the intrinsic predictability of the dynamics. While generalization skills of learning algorithms can be assessed with well-established methods, estimating the predictability of the underlying nonlinear generating process from empirical time series remains a big challenge. Here, we show that, in changing environments, the predictability of nonlinear dynamics can be associated with the time-varying stability of the system with respect to smooth changes in model parameters, i.e. its local structural stability. Using synthetic data, we demonstrate that forecasts from locally structurally unstable states in smoothly changing environments can produce significantly large prediction errors, and we provide a systematic methodology to identify these states from data. Finally, we illustrate the practical applicability of our results using an empirical dataset. Overall, this study provides a framework to associate an uncertainty level with short-term forecasts made in smoothly changing environments.}, + file = {/home/nathante/Zotero/storage/7GP4IHYY/Cenci et al_2020_Assessing the predictability of nonlinear dynamics under smooth parameter.pdf;/home/nathante/Zotero/storage/XYD4DTBB/rsif.2019.html} +} + +@article{cenci_non-parametric_2019, + title = {Non-Parametric Estimation of the Structural Stability of Non-Equilibrium Community Dynamics}, + author = {Cenci, Simone and Saavedra, Serguei}, + date = {2019-06}, + journaltitle = {Nature Ecology \& Evolution}, + volume = {3}, + number = {6}, + pages = {912--918}, + publisher = {{Nature Publishing Group}}, + issn = {2397-334X}, + abstract = {Environmental factors are important drivers of community dynamics. Yet, despite extensive research, it is still extremely challenging to predict the effect of environmental changes on the dynamics of ecological communities. Equilibrium- and model-based approaches have provided a theoretical framework with which to investigate this problem systematically. However, the applicability of this framework to empirical data has been limited because equilibrium dynamics of populations within communities are seldom observed in nature and exact equations for community dynamics are rarely known. To overcome these limitations, here we develop a data-driven non-parametric framework to estimate the tolerance of non-equilibrium community dynamics to environmental perturbations (that is, their structural stability). Following our approach, we show that in non-equilibrium systems, structural stability can vary significantly across time. As a case study, we investigate the structural stability of a rocky intertidal community with dynamics at the edge of chaos. The structural stability of the community as a whole exhibited a clear seasonal pattern, despite the persistent chaotic dynamics of individual populations. Importantly, we show that this seasonal pattern of structural stability is causally driven by sea temperature. Overall, our approach provides novel opportunities for estimating the tolerance of ecological communities to environmental changes within a non-parametric framework.}, + issue = {6}, + langid = {english}, + keywords = {_tablet}, + file = {/home/nathante/Zotero/storage/IVN95DQL/Cenci_Saavedra_2019_Non-parametric estimation of the structural stability of non-equilibrium.pdf;/home/nathante/Zotero/storage/YVUB966N/Cenci_Saavedra_2019_Non-parametric estimation of the structural stability of non-equilibrium.pdf;/home/nathante/Zotero/storage/KXS6YBEH/s41559-019-0879-1.html;/home/nathante/Zotero/storage/UEDZDI82/s41559-019-0879-1.html} +} + +@article{cenci_regularized_2019, + title = {Regularized {{S}}-Map for Inference and Forecasting with Noisy Ecological Time Series}, + author = {Cenci, Simone and Sugihara, George and Saavedra, Serguei}, + date = {2019}, + journaltitle = {Methods in Ecology and Evolution}, + volume = {10}, + number = {5}, + pages = {650--660}, + issn = {2041-210X}, + abstract = {It is well known that fluctuations of species abundances observed in ecological time series emerge from an interplay between deterministic nonlinear dynamics and stochastic forces. Importantly, nonlinearity and stochasticity introduce significant challenges to the analysis of ecological time series, such as the inference of the effect of species interactions on community dynamics and forecasting of species abundances. Local linear fits with state-space-dependent kernel functions, known as S-maps, provide an efficient method to infer Jacobian coefficients (a proxy for the local effect of species interactions) and to make reliable forecasts from nonlinear time series. Yet, while it has been shown that the S-map outperforms existing methods for nonparametric inference and forecasting, the methodology is sensitive to process noise. To overcome this limitation, we integrate the S-map with different regularization schemes. To validate our approach, we test our methodology against different levels of noise and nonlinearity using three standard population dynamics models. We show that an appropriate choice of the regularization scheme, alongside an accurate choice of the kernel functions, can significantly improve the in-sample inference of Jacobian coefficients and the out-of-sample forecast of species abundances in the presence of process noise. We further validate our methodology using two empirical time series of marine microbial communities. Our results illustrate that the regularized S-map is an efficient method for nonparametric inference and forecasting from noisy, nonlinear, ecological time series. Yet, attention must be paid on the regularization scheme and the structure of the kernel for whether inference or forecasting is the ultimate goal of a research study.}, + langid = {english}, + keywords = {_tablet,nonlinear time series,out-of-sample forecast,parameter inference,process noise,regularization,S-map}, + annotation = {\_eprint: https://besjournals.onlinelibrary.wiley.com/doi/pdf/10.1111/2041-210X.13150}, + file = {/home/nathante/Zotero/storage/J9VXK8CH/Cenci et al_2019_Regularized S-map for inference and forecasting with noisy ecological time.pdf;/home/nathante/Zotero/storage/WTD6LD6D/2041-210X.html} +} + +@article{cenci_uncertainty_2018, + title = {Uncertainty Quantification of the Effects of Biotic Interactions on Community Dynamics from Nonlinear Time-Series Data}, + author = {Cenci, Simone and Saavedra, Serguei}, + date = {2018-10-31}, + journaltitle = {Journal of The Royal Society Interface}, + shortjournal = {Journal of The Royal Society Interface}, + volume = {15}, + number = {147}, + pages = {20180695}, + publisher = {{Royal Society}}, + abstract = {Biotic interactions are expected to play a major role in shaping the dynamics of ecological systems. Yet, quantifying the effects of biotic interactions has been challenging due to a lack of appropriate methods to extract accurate measurements of interaction parameters from experimental data. One of the main limitations of existing methods is that the parameters inferred from noisy, sparsely sampled, nonlinear data are seldom uniquely identifiable. That is, many different parameters can be compatible with the same dataset and can generalize to independent data equally well. Hence, it is difficult to justify conclusive assertions about the effect of biotic interactions without information about their associated uncertainty. Here, we develop an ensemble method based on model averaging to quantify the uncertainty associated with the effect of biotic interactions on community dynamics from non-equilibrium ecological time-series data. Our method is able to detect the most informative time intervals for each biotic interaction within a multivariate time series and can be easily adapted to different regression schemes. Overall, this novel approach can be used to associate a time-dependent uncertainty with the effect of biotic interactions. Moreover, because we quantify uncertainty with minimal assumptions about the data-generating process, our approach can be applied to any data for which interactions among variables strongly affect the overall dynamics of the system.}, + file = {/home/nathante/Zotero/storage/HZYK4XGH/Cenci_Saavedra_2018_Uncertainty quantification of the effects of biotic interactions on community.pdf;/home/nathante/Zotero/storage/DTGV6Y3C/rsif.2018.html} +} + +@article{certain_how_2018, + title = {How Do {{MAR}}(1) Models Cope with Hidden Nonlinearities in Ecological Dynamics?}, + author = {Certain, Grégoire and Barraquand, Frédéric and Gårdmark, Anna}, + date = {2018-09-01}, + journaltitle = {Methods in Ecology and Evolution}, + shortjournal = {Methods in Ecology and Evolution}, + volume = {9}, + number = {9}, + pages = {1975--1995}, + issn = {2041-210X}, + abstract = {Abstract Multivariate autoregressive (MAR) models are an increasingly popular technique to infer interaction strengths between species in a community and to predict the community response to environmental change. The most commonly employed MAR(1) models, with one time lag, can be viewed either as multispecies competition models with Gompertz density dependence or, more generally, as a linear approximation of more complex, nonlinear dynamics around stable equilibria. This latter interpretation allows for broader applicability, but may come at a cost in terms of interpretation of estimates and reliability of both short- and long-term predictions. We investigate what these costs might be by fitting MAR(1) models to simulated 2-species competition, consumer-resource and host?parasitoid systems, as well as a larger food web influenced by the environment. We review how MAR(1) coefficients can be interpreted and evaluate how reliable are estimates of interaction strength, rank, or sign; accuracy of short-term forecasts; as well as the ability of MAR(1) models to predict the long-term responses of communities submitted to environmental change such as PRESS perturbations. The net effects of species j on species i are usually (90\%-95\%) well recovered in terms of sign or rank, with the notable exception of overcompensatory dynamics. In actual values, net effects of species j on species i are not well recovered when the underlying dynamics are nonlinear. MAR(1) models are better at making short-term qualitative forecasts (next point going up or down) than at predicting long-term responses to environmental perturbations, which can be severely over- as well as underestimated. We conclude that when applying MAR(1) models to ecological data, inferences on net effects among species should be limited to signs, or the Gompertz assumption should be tested and discussed. This particular assumption on density-dependence (log-linearity) is also required for unbiased long-term predictions. Overall, we think that MAR(1) models are highly useful tools to resolve and characterize community dynamics, but we recommend to use them in conjunction with alternative, nonlinear models resembling the ecological context in order to improve their interpretation in specific applications.}, + file = {/home/nathante/Zotero/storage/PHIQKS3T/2041-210X.html} +} + +@article{champion_underproduction_2021, + title = {Underproduction: An Approach for Measuring Risk in Open Source Software}, + author = {Champion, Kaylea and Hill, Benjamin Mako}, + date = {2021-02-27}, + journaltitle = {IEEE International Conference on Software Analysis, Evolution and Reengineering}, + shortjournal = {IEEE SANER}, + eprint = {2103.00352}, + eprinttype = {arxiv}, + primaryclass = {cs.SE}, + abstract = {The widespread adoption of Free/Libre and Open Source Software (FLOSS) means that the ongoing maintenance of many widely used software components relies on the collaborative effort of volunteers who set their own priorities and choose their own tasks. We argue that this has created a new form of risk that we call 'underproduction' which occurs when the supply of software engineering labor becomes out of alignment with the demand of people who rely on the software produced. We present a conceptual framework for identifying relative underproduction in software as well as a statistical method for applying our framework to a comprehensive dataset from the Debian GNU/Linux distribution that includes 21,902 source packages and the full history of 461,656 bugs. We draw on this application to present two experiments: (1) a demonstration of how our technique can be used to identify at-risk software packages in a large FLOSS repository and (2) a validation of these results using an alternate indicator of package risk. Our analysis demonstrates both the utility of our approach and reveals the existence of widespread underproduction in a range of widely-installed software components in Debian.}, + archiveprefix = {arXiv} +} + +@inproceedings{chancellor_norms_2018, + title = {Norms {{Matter}}: Contrasting {{Social Support Around Behavior Change}} in {{Online Weight Loss Communities}}}, + shorttitle = {Norms {{Matter}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Chancellor, Stevie and Hu, Andrea and De Choudhury, Munmun}, + date = {2018-04-21}, + series = {{{CHI}} '18}, + pages = {1--14}, + publisher = {{Association for Computing Machinery}}, + location = {{Montreal QC, Canada}}, + abstract = {Online health communities (OHCs) provide support across conditions; for weight loss, OHCs offer support to foster positive behavior change. However, weight loss behaviors can also be subverted on OHCs to promote disordered eating practices. Using comments as proxies for support, we use computational linguistic methods to juxtapose similarities and differences in two Reddit weight loss communities, r/proED and r/loseit. We employ language modeling and find that word use in both communities is largely similar. Then, by building a word embedding model, specifically a deep neural network on comment words, we contrast the context of word use and find differences that imply different behavior change goals in these OHCs. Finally, these content and context norms predict whether a comment comes from r/proED or r/loseit. We show that norms matter in understanding how different OHCs provision support to promote behavior change and discuss the implications for design and moderation of OHCs.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/77YDPVB6/Chancellor et al. - 2018 - Norms Matter Contrasting Social Support Around Be.pdf} +} + +@article{chandrasekharan_crossmod:_2019, + title = {Crossmod: A Cross-Community Learning-Based System to Assist Reddit Moderators}, + shorttitle = {Crossmod}, + author = {Chandrasekharan, Eshwar and Gandhi, Chaitrali and Mustelier, Matthew Wortley and Gilbert, Eric}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {1--30}, + issn = {2573-0142}, + issue = {CSCW}, + langid = {english}, + file = {/home/nathante/Zotero/storage/HLXKLJYX/Chandrasekharan et al. - Crossmod A Cross-Community Learning-based System .pdf;/home/nathante/Zotero/storage/YXMAQZAG/Chandrasekharan et al. - 2019 - Crossmod A Cross-Community Learning-based System .pdf} +} + +@article{chandrasekharan_internets_2018, + title = {The Internet's Hidden Rules: An Empirical Study of Reddit Norm Violations at Micro, Meso, and Macro Scales}, + shorttitle = {The {{Internet}}'s {{Hidden Rules}}}, + author = {Chandrasekharan, Eshwar and Samory, Mattia and Jhaver, Shagun and Charvat, Hunter and Bruckman, Amy and Lampe, Cliff and Eisenstein, Jacob and Gilbert, Eric}, + date = {2018}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {32:1--32:25}, + issn = {2573-0142}, + abstract = {Norms are central to how online communities are governed. Yet, norms are also emergent, arise from interaction, and can vary significantly between communities---making them challenging to study at scale. In this paper, we study community norms on Reddit in a large-scale, empirical manner. Via 2.8M comments removed by moderators of 100 top subreddits over 10 months, we use both computational and qualitative methods to identify three types of norms: macro norms that are universal to most parts of Reddit; meso norms that are shared across certain groups of subreddits; and micro norms that are specific to individual, relatively unique subreddits. Given the size of Reddit's user base---and the wide range of topics covered by different subreddits---we argue this represents the first large-scale census of the norms in broader internet culture. In other words, these findings shed light on what Reddit values, and how widely-held those values are. We conclude by discussing implications for the design of new and existing online communities.}, + issue = {CSCW}, + keywords = {community norms,mixed methods.,moderation,online communities}, + file = {/home/nathante/Zotero/storage/2CA9ZVFB/Chandrasekharan et al. - 2018 - The Internet's Hidden Rules An Empirical Study of.pdf;/home/nathante/Zotero/storage/HUP7XT5H/Chandrasekharan et al_2018_The Internet's Hidden Rules.pdf} +} + +@online{chandrasekharan_quarantined_2020, + title = {Quarantined! {{Examining}} the {{Effects}} of a {{Community}}-{{Wide Moderation Intervention}} on {{Reddit}}}, + author = {Chandrasekharan, Eshwar and Jhaver, Shagun and Bruckman, Amy and Gilbert, Eric}, + date = {2020-09-24}, + eprint = {2009.11483}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Should social media platforms intervene when communities repeatedly break rules? What actions can they consider? In light of this hotly debated issue, platforms have begun experimenting with softer alternatives to outright bans. We examine one such intervention called quarantining, that impedes direct access to and promotion of controversial communities. Specifically, we present two case studies of what happened when Reddit quarantined the influential communities r/TheRedPill (TRP) and r/The\_Donald (TD). Working with over 85M Reddit posts, we apply causal inference methods to examine the quarantine’s effects on TRP and TD. We find that the quarantine made it more difficult to recruit new members: new user influx to TRP and TD decreased by 79.5\% and 58\%, respectively. Despite quarantining, existing users’ misogyny and racism levels remained unaffected. We conclude by reflecting on the effectiveness of this design friction in limiting the influence of toxic communities and discuss broader implications for content moderation.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/CB26SNVJ/Chandrasekharan et al. - 2020 - Quarantined! Examining the Effects of a Community-.pdf} +} + +@article{chandrasekharan_you_2017, + ids = {chandrasekharan_you_2017-1}, + title = {You Can't Stay Here: The Efficacy of Reddit's 2015 Ban Examined through Hate Speech}, + shorttitle = {You Can't Stay Here}, + author = {Chandrasekharan, Eshwar and Pavalanathan, Umashanthi and Srinivasan, Anirudh and Glynn, Adam and Eisenstein, Jacob and Gilbert, Eric}, + date = {2017-12}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {31:1--31:22}, + issn = {2573-0142}, + abstract = {In 2015, Reddit closed several subreddits-foremost among them r/fatpeoplehate and r/CoonTown-due to violations of Reddit's anti-harassment policy. However, the effectiveness of banning as a moderation approach remains unclear: banning might diminish hateful behavior, or it may relocate such behavior to different parts of the site. We study the ban of r/fatpeoplehate and r/CoonTown in terms of its effect on both participating users and affected subreddits. Working from over 100M Reddit posts and comments, we generate hate speech lexicons to examine variations in hate speech usage via causal inference methods. We find that the ban worked for Reddit. More accounts than expected discontinued using the site; those that stayed drastically decreased their hate speech usage-by at least 80\%. Though many subreddits saw an influx of r/fatpeoplehate and r/CoonTown "migrants," those subreddits saw no significant changes in hate speech usage. In other words, other subreddits did not inherit the problem. We conclude by reflecting on the apparent success of the ban, discussing implications for online moderation, Reddit and internet communities more broadly.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/5Z8CCRM2/Chandrasekharan et al. - 2017 - You Can'T Stay Here The Efficacy of Reddit's 2015.pdf} +} + +@inproceedings{chang_specialization_2014, + title = {Specialization, Homophily, and Gender in a Social Curation Site: Findings from Pinterest}, + shorttitle = {Specialization, Homophily, and Gender in a Social Curation Site}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Chang, Shuo and Kumar, Vikas and Gilbert, Eric and Terveen, Loren G.}, + date = {2014-02-15}, + series = {{{CSCW}} '14}, + pages = {674--686}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Pinterest is a popular social curation site where people collect, organize, and share pictures of items. We studied a fundamental issue for such sites: what patterns of activity attract attention (audience and content reposting)-- We organized our studies around two key factors: the extent to which users specialize in particular topics, and homophily among users. We also considered the existence of differences between female and male users. We found: (a) women and men differed in the types of content they collected and the degree to which they specialized; male Pinterest users were not particularly interested in stereotypically male topics; (b) sharing diverse types of content increases your following, but only up to a certain point; (c) homophily drives repinning: people repin content from other users who share their interests; homophily also affects following, but to a lesser extent. Our findings suggest strategies both for users (e.g., strategies to attract an audience) and maintainers (e.g., content recommendation methods) of social curation sites.}, + isbn = {978-1-4503-2540-0}, + keywords = {data analysis,social network,topic detection,user profiling}, + file = {/home/nathante/Zotero/storage/RVP6MZ6S/Chang et al. - 2014 - Specialization, homophily, and gender in a social .pdf} +} + +@book{charmaz_constructing_2015, + ids = {charmaz_constructing_2014}, + title = {Constructing Grounded Theory: A Practical Guide through Qualitative Analysis}, + shorttitle = {Constructing {{Grounded Theory}}}, + author = {Charmaz, Kathy}, + date = {2015}, + edition = {2}, + publisher = {{SAGE}}, + location = {{Thousand Oaks, California}}, + isbn = {0-7619-7352-4} +} + +@article{chen_impact_2019, + title = {The {{Impact}} of {{Media Censorship}}: 1984 or {{Brave New World}}?}, + shorttitle = {The {{Impact}} of {{Media Censorship}}}, + author = {Chen, Yuyu and Yang, David Y.}, + date = {2019-06}, + journaltitle = {American Economic Review}, + volume = {109}, + number = {6}, + pages = {2294--2332}, + issn = {0002-8282}, + abstract = {Media censorship is a hallmark of authoritarian regimes. We conduct a field experiment in China to measure the effects of providing citizens with access to an uncensored internet. We track subjects' media consumption, beliefs regarding the media, economic beliefs, political attitudes, and behaviors over 18 months. We find four main results: (i) free access alone does not induce subjects to acquire politically sensitive information; (ii) temporary encouragement leads to a persistent increase in acquisition, indicating that demand is not permanently low; (iii) acquisition brings broad, substantial, and persistent changes to knowledge, beliefs, attitudes, and intended behaviors; and (iv) social transmission of information is statistically significant but small in magnitude. We calibrate a simple model to show that the combination of low demand for uncensored information and the moderate social transmission means China's censorship apparatus may remain robust to a large number of citizens receiving access to an uncensored internet.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/DI644H6E/Chen and Yang - 2019 - The Impact of Media Censorship 1984 or Brave New .pdf;/home/nathante/Zotero/storage/FZC97WCG/Chen and Yang - 2019 - The Impact of Media Censorship 1984 or Brave New .pdf;/home/nathante/Zotero/storage/95EW4R3G/articles.html} +} + +@article{chesney_other_2004, + title = {“Other People Benefit. i Benefit from Their Work.” {{Sharing Guitar Tabs Online}}}, + author = {Chesney, Thomas}, + date = {2004-11-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {This paper reports the results of a study into a public space Internet portal which publishes guitar tabs (tablature) online, to examine what motivates people to participate in this activity and what benefits they get from doing so. A guitar tab is essentially sheet music for guitarists. The study examines why people contribute when it is easier for them not to publish their tabs and simply use the tabs that other people have posted. Answers to this will have implications for businesses wanting to encourage their employees to share their knowledge. An open ended questionnaire was sent to 183 tab publishers with a usable response rate of 39\%, which is considered high for surveys. The questionnaire sought to gather data on motivations, benefits and community interaction. The paper begins with a review of relevant theories of knowledge sharing and publishing, in particular the private-collective model of innovation (von Hippel \& von Krogh, 2003) which is used to analyze the results. Motivations are listed as under two categories, self and altruistic, with the most popular motivation being to share the songs with others, which is from the altruistic category. The most common benefit is personal satisfaction. The results show tab publishing fits with the private-collective model of innovation which means that a tab published online can be seen as a public good, as it is available to all, that has significant private elements. These private elements are the benefits that tab publishers get which the people who only use tabs without contributing their own, do not. The implications of the work are as follows. Enjoyment of the domain seems to be an important factor in motivating knowledge sharing. People who feel like they are part of a community and get satisfaction from being part of a community, will be more likely to contribute. The act of sharing knowledge should be as close to effortless as possible to encourage contributions. The act of preparing (collecting, collating etc.) the material to be shared should have meaning in itself for the person who is preparing it. If the act of sharing leads to increased status in the community people will be more likely to contribute. To encourage knowledge sharing, those who make use of the shared knowledge should be encouraged to give positive feedback to the person who shared it. To date, there has been little empirical work examining online posting forums.}, + issue = {JCMC1012}, + file = {/home/nathante/Zotero/storage/JWW5X2DI/4614460.html} +} + +@online{choi_spontaneous_2020, + title = {Spontaneous versus Interaction-Driven Burstiness in Human Dynamics: The Case of {{Wikipedia}} Edit History}, + shorttitle = {Spontaneous versus Interaction-Driven Burstiness in Human Dynamics}, + author = {Choi, Jeehye and Hiraoka, Takayuki and Jo, Hang-Hyun}, + date = {2020-11-03}, + eprint = {2011.01562}, + eprinttype = {arxiv}, + primaryclass = {physics}, + abstract = {The origin of non-Poissonian or bursty temporal patterns observed in various datasets for human social dynamics has been extensively studied, yet its understanding still remains incomplete. Considering the fact that humans are social beings, a fundamental question arises: Is the bursty human dynamics dominated by individual characteristics or by interaction between individuals? In this paper we address this question by analyzing the Wikipedia edit history to see how spontaneous individual editors are in initiating bursty periods of editing, i.e., spontaneous burstiness, and to what extent individual behaviors are driven by interaction with other editors in those periods, i.e., interaction-driven burstiness. We quantify the degree of initiative (DOI) of an editor of interest in each Wikipedia article by using the statistics of bursty periods containing the editor's edits. The integrated value of the DOI over all relevant timescales reveals which is dominant between spontaneous and interaction-driven burstiness. We empirically find that this value tends to be larger for weaker temporal correlations in the editor's editing behavior and/or stronger editorial correlations. These empirical findings are successfully confirmed by deriving an analytic form of the DOI from a model capturing the essential features of the edit sequence. Thus our approach provides a deeper insight into the origin and underlying mechanisms of bursts in human social dynamics.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Physics - Physics and Society}, + file = {/home/nathante/Zotero/storage/PVEY34PE/Choi et al. - 2020 - Spontaneous versus interaction-driven burstiness i.pdf} +} + +@inproceedings{choudhury_social_2016, + title = {Social {{Media Participation}} in an {{Activist Movement}} for {{Racial Equality}}}, + booktitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Choudhury, Munmun De and Jhaver, Shagun and Sugar, Benjamin and Weber, Ingmar}, + date = {2016-03-31}, + abstract = {From the Arab Spring to the Occupy Movement, social media has been instrumental in driving and supporting socio-political movements throughout the world. In this paper, we present one of the first social media investigations of an activist movement around racial discrimination and police violence, known as “Black Lives Matter”. Considering Twitter as a sensor for the broader community’s perception of the events related to the movement, we study participation over time, the geographical differences in this participation, and its relationship to protests that unfolded on the ground. We find evidence for continued participation across four temporally separated events related to the movement, with notable changes in engagement and language over time. We also find that participants from regions of historically high rates of black victimization due to police violence tend to express greater negativity and make more references to loss of life. Finally, we observe that social media attributes of affect, behavior and language can predict future protest participation on the ground. We discuss the role of social media in enabling collective action around this unique movement and how social media platforms may help understand perceptions on a socially contested and sensitive issue like race.}, + eventtitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FF8RRANF/De Choudhury et al_2016_Social Media Participation in an Activist Movement for Racial Equality.pdf;/home/nathante/Zotero/storage/PIRFXX7F/Choudhury et al_2016_Social Media Participation in an Activist Movement for Racial Equality.pdf;/home/nathante/Zotero/storage/ZBVVGIXA/De Choudhury et al_2016_Social Media Participation in an Activist Movement for Racial Equality.pdf;/home/nathante/Zotero/storage/6XM9W7ZH/13168.html;/home/nathante/Zotero/storage/HIXCPVI3/13168.html;/home/nathante/Zotero/storage/SF7VFSH4/13168.html} +} + +@article{ciampaglia_production_2015, + title = {The Production of Information in the Attention Economy}, + author = {Ciampaglia, Giovanni Luca and Flammini, Alessandro and Menczer, Filippo}, + date = {2015-05-19}, + journaltitle = {Scientific Reports}, + volume = {5}, + pages = {9452}, + issn = {2045-2322}, + file = {/home/nathante/Zotero/storage/Z5SM58N9/srep09452.pdf} +} + +@book{coleman_foundations_1990, + title = {Foundations of Social Theory}, + author = {Coleman, James Samuel}, + date = {1990}, + publisher = {{The Belknap Press of Harvard University Press}}, + location = {{Cambridge, Mass.}}, + isbn = {978-0-674-31226-5}, + langid = {english} +} + +@article{coleman_social_1988, + title = {Social {{Capital}} in the {{Creation}} of {{Human Capital}}}, + author = {Coleman, James S.}, + date = {1988}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {94}, + eprint = {2780243}, + eprinttype = {jstor}, + pages = {S95-S120}, + issn = {0002-9602}, + abstract = {In this paper, the concept of social capital is introduced and illustrated, its forms are described, the social structural conditions under which it arises are examined, and it is used in an analysis of dropouts from high school. Use of the concept of social capital is part of a general theoretical strategy discussed in the paper: taking rational action as a starting point but rejecting the extreme individualistic premises that often accompany it. The conception of social capital as a resource for action is one way of introducing social structure into the rational action paradigm. Three forms of social capital are examined: obligations and expectations, information channels, and social norms. The role of closure in the social structure in facilitating the first and third of these forms of social capital is described. An analysis of the effect of the lack of social capital available to high school sophomores on dropping out of school before graduation is carried out. The effect of social capital within the family and in the community outside the family is examined.}, + file = {/home/nathante/Zotero/storage/8B8X2LBV/Coleman - 1988 - Social Capital in the Creation of Human Capital.pdf;/home/nathante/Zotero/storage/83B63Z3Y/Coleman - 1988 - Social Capital in the Creation of Human Capital.html} +} + +@inproceedings{cook_contribution_2009, + title = {Contribution, Commercialization \& Audience: Understanding Participation in an Online Creative Community}, + shorttitle = {Contribution, Commercialization \& Audience}, + booktitle = {Proceedings of the {{ACM}} 2009 International Conference on {{Supporting}} Group Work}, + author = {Cook, Eric and Teasley, Stephanie D. and Ackerman, Mark S.}, + date = {2009-05-10}, + series = {{{GROUP}} '09}, + pages = {41--50}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {This paper presents a qualitative study of attitudes towards participation and contribution in an online creative community. The setting of the work is an online community of practice focused on the use and development of a user-customizable music software package called Reaktor. Findings from the study highlight four emergent topics in the discourse related to user contributions to the community: contribution assessment, support for learning, perceptions of audience and tensions about commercialization. Our analysis of these topics frames discussion about the value and challenges of attending to amateur and professional users in online creative communities.}, + isbn = {978-1-60558-500-0}, + keywords = {amateurs,audiences,commercialization,community of practice,creativity,learning,online community,professionals,user-generated content} +} + +@article{copland_reddit_2020, + title = {Reddit Quarantined: Can Changing Platform Affordances Reduce Hateful Material Online?}, + shorttitle = {Reddit Quarantined}, + author = {Copland, Simon}, + date = {2020-10-21}, + journaltitle = {Internet Policy Review}, + volume = {9}, + number = {4}, + publisher = {{Berlin: Alexander von Humboldt Institute for Internet and Society}}, + issn = {2197-6775}, + abstract = {Can we reduce hateful material online through changing platform affordances? Studying Reddit’s quarantine function, this paper argues the results of this approach are mixed.}, + file = {/home/nathante/Zotero/storage/KY4RZWR4/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/SZWA55IE/Copland_2020_Reddit quarantined.pdf;/home/nathante/Zotero/storage/9KXC37K7/225653.html;/home/nathante/Zotero/storage/M6NKY3K2/reddit-quarantined-can-changing-platform-affordances-reduce-hateful-material.html} +} + +@article{cress_competition_1997, + title = {Competition and {{Commitment}} in {{Voluntary Memberships}}: The {{Paradox}} of {{Persistence}} and {{Participation}}}, + shorttitle = {Competition and {{Commitment}} in {{Voluntary Memberships}}}, + author = {Cress, Daniel M. and McPherson, J. Miller and Rotolo, Thomas}, + date = {1997-03-01}, + journaltitle = {Sociological Perspectives}, + shortjournal = {Sociological Perspectives}, + volume = {40}, + number = {1}, + pages = {61--79}, + issn = {0731-1214}, + abstract = {Much of the research on voluntary associations has argued that commitment to the group determines member participation and persistence. In this framework, highly committed members participate to a greater degree than less committed members, and maintain their connection with the group over longer periods of time. Less committed members, on the other hand, participate sporadically, and tend to drop their memberships easily. This commitment thesis implies a positive relationship between participation and persistence: the more the member participates, the longer the duration of membership. We argue that this individual level thesis should be supplanted by a system level understanding, in which the competition among social groups for individual resources determines persistence and participation. This competition thesis predicts a negative relationship between persistence of membership and participation in group activities: the more the member participates, the shorter the average duration of membership. We use event history analysis to test these opposing hypotheses on a sample of 1587 membership spells covering a fifteen year time period. We find strong and consistent support for the competition thesis.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/VWW9ZVQI/Cress et al. - 1997 - Competition and Commitment in Voluntary Membership.pdf} +} + +@inproceedings{cunha_are_2019, + ids = {cunha_are_2019-1,cunha_are_2019-2}, + title = {Are All Successful Communities Alike? Characterizing and Predicting the Success of Online Communities}, + shorttitle = {Are All Successful Communities Alike?}, + booktitle = {The {{World Wide Web Conference}}}, + author = {Cunha, Tiago and Jurgens, David and Tan, Chenhao and Romero, Daniel}, + date = {2019-05-13}, + series = {{{WWW}} '19}, + pages = {318--328}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {The proliferation of online communities has created exciting opportunities to study the mechanisms that explain group success. While a growing body of research investigates community success through a single measure - typically, the number of members - we argue that there are multiple ways of measuring success. Here, we present a systematic study to understand the relations between these success definitions and test how well they can be predicted based on community properties and behaviors from the earliest period of a community's lifetime. We identify four success measures that are desirable for most communities: (i) growth in the number of members; (ii) retention of members; (iii) long term survival of the community; and (iv) volume of activities within the community. Surprisingly, we find that our measures do not exhibit very high correlations, suggesting that they capture different types of success. Additionally, we find that different success measures are predicted by different attributes of online communities, suggesting that success can be achieved through different behaviors. Our work sheds light on the basic understanding on what success represents in online communities and what predicts it. Our results suggest that success is multi-faceted and cannot be measured nor predicted by a single measurement. This insight has practical implications for the creation of new online communities and the design of platforms that facilitate such communities.}, + isbn = {978-1-4503-6674-8}, + keywords = {Group Dynamics,Online Communities,Reddit,Success}, + file = {/home/nathante/Zotero/storage/CGBFCUGX/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/IYW3WKHV/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/PFS6682S/Cunha et al_2019_Are All Successful Communities Alike.pdf;/home/nathante/Zotero/storage/SMX88EL3/Cunha et al. - 2019 - Are All Successful Communities Alike Characterizi.pdf} +} + +@inproceedings{dabbish_fresh_2012, + ids = {dabbish_fresh_2012-1}, + title = {Fresh Faces in the Crowd: Turnover, Identity, and Commitment in Online Groups}, + shorttitle = {Fresh Faces in the Crowd}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Dabbish, Laura and Farzan, Rosta and Kraut, Robert and Postmes, Tom}, + date = {2012-02-11}, + series = {{{CSCW}} '12}, + pages = {245--248}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Turnover is commonplace in many online groups because of low barriers of entry and exit. In offline settings, turnover can have a negative impact because of reduced attachment to the group as an entity. However, in an online setting, turnover in terms of changes in the visible membership of a group may have a very different impact. Online only a limited amount of information about members and their activities is observable; in particular, it is easier to see the behavior of the subset of members who are active than the potentially larger set who are not. In this paper, we describe an experiment examining the influence of visible membership turnover on commitment to an online group. Our results suggest that increased turnover in an online group may increase social presence, creating perceptions of liveness, in turn leading to increased levels of participation in the group. However, this result holds primarily for groups with a common identity, suggesting that attention to behavior of others may be stronger when people share an identity with those others. Our results extend understandings of attachment in an online setting as well as theory about social tuning.}, + isbn = {978-1-4503-1086-4}, + keywords = {attachment,commitment,identity.,online groups,turnover}, + file = {/home/nathante/Zotero/storage/3IQQP4JM/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf;/home/nathante/Zotero/storage/GEVF3A53/Dabbish et al. - 2012 - Fresh faces in the crowd turnover, identity, and .pdf} +} + +@inproceedings{danescu-niculescu-mizil_no_2013, + ids = {danescu-niculescu-mizil_no_2013-1}, + title = {No Country for Old Members: User Lifecycle and Linguistic Change in Online Communities}, + shorttitle = {No Country for Old Members}, + booktitle = {Proceedings of the 22nd International Conference on {{World Wide Web}} - {{WWW}} '13}, + author = {Danescu-Niculescu-Mizil, Cristian and West, Robert and Jurafsky, Dan and Leskovec, Jure and Potts, Christopher}, + date = {2013}, + pages = {307--318}, + publisher = {{ACM Press}}, + location = {{Rio de Janeiro, Brazil}}, + abstract = {Vibrant online communities are in constant flux. As members join and depart, the interactional norms evolve, stimulating further changes to the membership and its social dynamics. Linguistic change—in the sense of innovation that becomes accepted as the norm—is essential to this dynamic process: it both facilitates individual expression and fosters the emergence of a collective identity. We propose a framework for tracking linguistic change as it happens and for understanding how specific users react to these evolving norms. By applying this framework to two large online communities we show that users follow a determined two-stage lifecycle with respect to their susceptibility to linguistic change: a linguistically innovative learning phase in which users adopt the language of the community followed by a conservative phase in which users stop changing and the evolving community norms pass them by.}, + eventtitle = {The 22nd International Conference}, + isbn = {978-1-4503-2035-1}, + langid = {english}, + venue = {Rio de Janeiro, Brazil}, + file = {/home/nathante/Zotero/storage/L532IPRV/Danescu-Niculescu-Mizil et al. - 2013 - No Country for Old Members User Lifecycle and Lin.pdf;/home/nathante/Zotero/storage/LWECW2QM/Danescu-Niculescu-Mizil et al. - 2013 - No country for old members user lifecycle and lin.pdf} +} + +@article{datta_extracting_2019, + title = {Extracting {{Inter}}-{{Community Conflicts}} in {{Reddit}}}, + author = {Datta, Srayan and Adar, Eytan}, + date = {2019-07-06}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {13}, + pages = {146--157}, + issn = {2334-0770}, + abstract = {Anti-social behaviors in social media can happen both at user and community levels. While a great deal of attention is on the individual as an ‘aggressor,’ the banning of entire Reddit subcommunities (i.e., subreddits) demonstrates that this is a multi-layer concern. Existing research on inter-community conflict has largely focused on specific subcommunities or ideological opponents. However, antagonistic behaviors may be more pervasive and integrate into the broader network. In this work, we study the landscape of conflicts among subreddits by deriving higher-level (community) behaviors from the way individuals are sanctioned and rewarded. By constructing a conflict network, we characterize different patterns in subreddit-to-subreddit conflicts as well as communities of ‘co-targeted’ subreddits .The dynamics of these interactions also reveals a shift in conflict focus over time.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6IA9VN8K/Datta_Adar_2019_Extracting Inter-Community Conflicts in Reddit.pdf;/home/nathante/Zotero/storage/F3MHZ7Z6/3217.html} +} + +@article{datta_identifying_2017, + title = {Identifying {{Misaligned Inter}}-{{Group Links}} and {{Communities}}}, + author = {Datta, Srayan and Phelan, Chanda and Adar, Eytan}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {37:1--37:23}, + abstract = {Many social media systems explicitly connect individuals (e.g., Facebook or Twitter); as a result, they are the targets of most research on social networks. However, many systems do not emphasize or support explicit linking between people (e.g., Wikipedia or Reddit), and even fewer explicitly link communities. Instead, network analysis is performed through inference on implicit connections, such as co-authorship or text similarity. Depending on how inference is done and what data drove it, different networks may emerge. While correlated structures often indicate stability, in this work we demonstrate that differences, or misalignment, between inferred networks also capture interesting behavioral patterns. For example, high-text but low-author similarity often reveals communities "at war" with each other over an issue or high-author but low-text similarity can suggest community fragmentation. Because we are able to model edge direction, we also find that asymmetry in degree (in-versus-out) co-occurs with marginalized identities (subreddits related to women, people of color, LGBTQ, etc.). In this work, we provide algorithms that can identify misaligned links, network structures and communities. We then apply these techniques to Reddit to demonstrate how these algorithms can be used to decipher inter-group dynamics in social media.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/52FT8LT8/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf;/home/nathante/Zotero/storage/WKCJHV6R/Datta et al. - 2017 - Identifying Misaligned Inter-Group Links and Commu.pdf} +} + +@article{de_choudhury_mental_2014, + title = {Mental Health Discourse on Reddit: Self-Disclosure, Social Support, and Anonymity}, + shorttitle = {Mental Health Discourse on Reddit}, + author = {De Choudhury, Munmun and De, Sushovan}, + date = {2014-05-16}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + shortjournal = {ICWSM}, + volume = {8}, + number = {1}, + pages = {71--80}, + issn = {2334-0770}, + issue = {1}, + langid = {english}, + keywords = {disinhibition}, + file = {/home/nathante/Zotero/storage/KNC2AQLL/Choudhury and De - 2014 - Mental Health Discourse on reddit Self-Disclosure.pdf;/home/nathante/Zotero/storage/GE2HKJ48/14526.html} +} + +@online{del_tredici_semantic_2018, + title = {Semantic {{Variation}} in {{Online Communities}} of {{Practice}}}, + author = {Del Tredici, Marco and Fernández, Raquel}, + date = {2018-06-15}, + eprint = {1806.05847}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {We introduce a framework for quantifying semantic variation of common words in Communities of Practice and in sets of topic-related communities. We show that while some meaning shifts are shared across related communities, others are community-specific, and therefore independent from the discussed topic. We propose such findings as evidence in favour of sociolinguistic theories of socially-driven semantic variation. Results are evaluated using an independent language modelling task. Furthermore, we investigate extralinguistic features and show that factors such as prominence and dissemination of words are related to semantic variation.}, + archiveprefix = {arXiv}, + langid = {english}, + file = {/home/nathante/Zotero/storage/E62NF57M/Del Tredici and Fernández - 2018 - Semantic Variation in Online Communities of Practi.pdf} +} + +@article{dellaposta_why_2015, + title = {Why {{Do Liberals Drink Lattes}}?}, + author = {DellaPosta, Daniel and Shi, Yongren and Macy, Michael}, + date = {2015-03}, + journaltitle = {American Journal of Sociology}, + volume = {120}, + number = {5}, + pages = {1473--1511}, + issn = {0002-9602, 1537-5390}, + langid = {english}, + file = {/home/nathante/Zotero/storage/LMVF2MJ5/DellaPosta et al_2015_Why Do Liberals Drink Lattes.pdf} +} + +@article{deyle_tracking_2016, + title = {Tracking and Forecasting Ecosystem Interactions in Real Time}, + author = {Deyle, Ethan R. and May, Robert M. and Munch, Stephan B. and Sugihara, George}, + date = {2016-01-13}, + journaltitle = {Proceedings of the Royal Society B: Biological Sciences}, + shortjournal = {Proc. R. Soc. B.}, + volume = {283}, + number = {1822}, + pages = {20152258}, + issn = {0962-8452, 1471-2954}, + abstract = {Evidence shows that species interactions are not constant but change as the ecosystem shifts to new states. Although controlled experiments and model investigations demonstrate how nonlinear interactions can arise in principle, empirical tools to track and predict them in nature are lacking. Here we present a practical method, using available time-series data, to measure and forecast changing interactions in real systems, and identify the underlying mechanisms. The method is illustrated with model data from a marine mesocosm experiment and limnologic field data from Sparkling Lake, WI, USA. From simple to complex, these examples demonstrate the feasibility of quantifying, predicting and understanding state-dependent, nonlinear interactions as they occur in situ and in real time—a requirement for managing resources in a nonlinear, non-equilibrium world.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/QJ4YSNWC/Deyle et al_2016_Tracking and forecasting ecosystem interactions in real time.pdf} +} + +@article{dimaggio_iron_1983, + title = {The {{Iron Cage Revisited}}: Institutional {{Isomorphism}} and {{Collective Rationality}} in {{Organizational Fields}}}, + shorttitle = {The {{Iron Cage Revisited}}}, + author = {DiMaggio, Paul J. and Powell, Walter W.}, + date = {1983}, + journaltitle = {American Sociological Review}, + volume = {48}, + number = {2}, + eprint = {2095101}, + eprinttype = {jstor}, + pages = {147--160}, + issn = {0003-1224}, + abstract = {[What makes organizations so similar? We contend that the engine of rationalization and bureaucratization has moved from the competitive marketplace to the state and the professions. Once a set of organizations emerges as a field, a paradox arises: rational actors make their organizations increasingly similar as they try to change them. We describe three isomorphic processes--coercive, mimetic, and normative--leading to this outcome. We then specify hypotheses about the impact of resource centralization and dependency, goal ambiguity and technical uncertainty, and professionalization and structuration on isomorphic change. Finally, we suggest implications for theories of organizations and social change.]}, + keywords = {Organization Behavior,Sociology}, + file = {/home/nathante/Zotero/storage/9A5PXKRT/DiMaggio and Powell - 1983 - The iron cage revisited Institutional isomorphism.pdf;/home/nathante/Zotero/storage/AQWAT6RA/2095101.html} +} + +@article{dimaggio_social_2001, + title = {Social Implications of the {{Internet}}}, + author = {DiMaggio, Paul and Hargittai, Eszter and Neuman, W. Russell and Robinson, John P.}, + date = {2001-08}, + journaltitle = {Annual Review of Sociology}, + volume = {27}, + number = {1}, + pages = {307--336}, + abstract = {The Internet is a critically important research site for sociologists testing theories of technology diffusion and media effects, particularly because it is a medium uniquely capable of integrating modes of communication and forms of content. Current research tends to focus on the Internet's implications in five domains: 1) inequality (the “digital divide”); 2) community and social capital; 3) political participation; 4) organizations and other economic institutions; and 5) cultural participation and cultural diversity. A recurrent theme across domains is that the Internet tends to complement rather than displace existing media and patterns of behavior. Thus in each domain, utopian claims and dystopic warnings based on extrapolations from technical possibilities have given way to more nuanced and circumscribed understandings of how Internet use adapts to existing patterns, permits certain innovations, and reinforces particular kinds of change. Moreover, in each domain the ultimate social implications of this new technology depend on economic, legal, and policy decisions that are shaping the Internet as it becomes institutionalized. Sociologists need to study the Internet more actively and, particularly, to synthesize research findings on individual user behavior with macroscopic analyses of institutional and political-economic factors that constrain that behavior.}, + file = {/home/nathante/Zotero/storage/DQUKUVBM/DiMaggio et al. - 2001 - Social implications of the internet.pdf} +} + +@article{dimmick_theory_1984, + title = {The {{Theory}} of the {{Niche}}: Quantifying {{Competition Among Media Industries}}}, + shorttitle = {The {{Theory}} of the {{Niche}}}, + author = {Dimmick, John and Rothenbuhler, Eric}, + date = {1984-03-01}, + journaltitle = {Journal of Communication}, + volume = {34}, + number = {1}, + pages = {103--119}, + issn = {1460-2466}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GDM85NW7/Dimmick and Rothenbuhler - 1984 - The Theory of the Niche Quantifying Competition A.pdf;/home/nathante/Zotero/storage/3RUMQPRP/abstract.html} +} + +@article{dobrev_dynamics_2001, + title = {Dynamics of Niche Width and Resource Partitioning}, + author = {Dobrev, Stanislav D. and Kim, Tai‐Young and Hannan, Michael T.}, + date = {2001}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {106}, + number = {5}, + eprint = {10.1086/320821}, + eprinttype = {jstor}, + pages = {1299--1337}, + issn = {0002-9602}, + abstract = {This article examines the effects of crowding in a market center on rates of change in organizational niche width and on organizational mortality. It proposes that, although firms with wide niches benefit from risk spreading and economies of scale, they are simultaneously exposed to intense competition. An analysis of organizational dynamics in automobile manufacturing firms in France, Germany, and Great Britain shows that competitive pressure not only increases the hazard of disbanding but also prompts organizational transformations that give rise to processes of resource partitioning. Emphasizing the content/process distinction in conceptualizing organizational change, the article finds that the process effect of changes in niche width and position increases mortality hazards. We discuss our findings in light of the processes investigated by the ecological theories of density dependence, resource partitioning, and structural inertia, and point to the theoretical links that help to integrate these theories.}, + file = {/home/nathante/Zotero/storage/7HQIXSCS/Dobrev et al. - 2001 - Dynamics of niche width and resource partitioning.pdf} +} + +@article{dobrev_evolution_2002, + title = {The {{Evolution}} of {{Organizational Niches}}: U.{{S}}. {{Automobile Manufacturers}}, 1885–1981}, + shorttitle = {The {{Evolution}} of {{Organizational Niches}}}, + author = {Dobrev, Stanislav D. and Kim, Tai-Young and Carroll, Glenn R.}, + date = {2002-06-01}, + journaltitle = {Administrative Science Quarterly}, + shortjournal = {Administrative Science Quarterly}, + volume = {47}, + number = {2}, + pages = {233--264}, + publisher = {{SAGE Publications Inc}}, + issn = {0001-8392}, + abstract = {Although the niche figures prominently in contemporary theories of organization, analysts often fail to tie micro processes within the niche to long-term changes in the broader environment. In this paper, we advance arguments about the relationship between an organization's niche and evolution in the structure of its organizational population over time. We focus on the technological niche and processes of positioning and crowding among firms in the niche space, relating them to the level of concentration among all firms in the market. Building on previous empirical studies in organizational ecology, we study the evolution of concentration in the American automobile industry from 1885 to 1981 and estimate models of the hazard of exit of individual producers from the market. The findings show that niche and concentration interact in complex ways, yielding a more unified depiction of organizational evolution than typically described or reported.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/B6XNLXEX/Dobrev et al_2002_The Evolution of Organizational Niches.pdf} +} + +@article{dobrev_shifting_2003, + ids = {dobrev_shifting_2003-1}, + title = {Shifting {{Gears}}, {{Shifting Niches}}: Organizational {{Inertia}} and {{Change}} in the {{Evolution}} of the {{U}}.{{S}}. {{Automobile Industry}}, 1885-1981}, + shorttitle = {Shifting {{Gears}}, {{Shifting Niches}}}, + author = {Dobrev, Stanislav D. and Kim, Tai-Young and Carroll, Glenn R.}, + date = {2003}, + journaltitle = {Organization Science}, + volume = {14}, + number = {3}, + eprint = {4135136}, + eprinttype = {jstor}, + pages = {264--282}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {We examine how experiential learning affects organizational change and its consequences on firm mortality. We develop hypotheses about the interactions of experiences with a specific type of organizational change on the one hand, and environmental stability, organizational size, and organizational niche width on the other hand. Our findings draw from analysis of the U.S. automobile industry between 1885 and 1981 and support the general prediction that "process" effects of change in the organizational core elevate the hazard of failure. We also find that a dynamic interpretation of organizational environments as comprised of other organizations helps to explicate the interplay between organization and environmental forces that shape the occurrence and outcome of transformation.}, + file = {/home/nathante/Zotero/storage/TJUKWSQJ/Dobrev et al_2003_Shifting Gears, Shifting Niches.pdf} +} + +@article{doerfel_evolutionary_2010, + title = {The Evolutionary Role of Interorganizational Communication: Modeling Social Capital in Disaster Contexts}, + shorttitle = {The {{Evolutionary Role}} of {{Interorganizational Communication}}}, + author = {Doerfel, Marya L. and Lai, Chih-Hui and Chewning, Lisa V.}, + date = {2010-04-01}, + journaltitle = {Human Communication Research}, + shortjournal = {HCR}, + volume = {36}, + number = {2}, + pages = {125--162}, + issn = {1468-2958}, + abstract = {Employing a community ecology perspective, this study examines how interorganizational (IO) communication and social capital (SC) facilitated organizational recovery after Hurricane Katrina. In-depth interviews with 56 New Orleans organizations enabled longitudinal analysis and a grounded theory model that illustrates how communication differentiated four phases of recovery: personal emergency, professional emergency, transition, rebuilding. Communicative action taking place across phases corresponds with the evolutionary mechanisms. Most organizations did not turn to interorganizational relationships (IORs) until the transitional phase, during which indirect ties were critical and incoming versus outgoing communication was substantively different. Organizations did not consistently use IO SC until the last phase. This study underlines the fact that organizations and their systems are fundamentally human and (re)constructed through communicative action.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/8V785QCD/Doerfel et al. - 2010 - The Evolutionary Role of Interorganizational Commu.pdf;/home/nathante/Zotero/storage/YXGI7HM5/4107472.html} +} + +@article{dormann_method_2014, + title = {A Method for Detecting Modules in Quantitative Bipartite Networks}, + author = {Dormann, Carsten F. and Strauss, Rouven}, + date = {2014}, + journaltitle = {Methods in Ecology and Evolution}, + volume = {5}, + number = {1}, + pages = {90--98}, + issn = {2041-210X}, + abstract = {Ecological networks are often composed of different subcommunities (often referred to as modules). Identifying such modules has the potential to develop a better understanding of the assembly of ecological communities and to investigate functional overlap or specialization. The most informative form of networks are quantitative or weighted networks. Here, we introduce an algorithm to identify modules in quantitative bipartite (or two-mode) networks. It is based on the hierarchical random graphs concept of Clauset et al. (2008 Nature 453: 98–101) and is extended to include quantitative information and adapted to work with bipartite graphs. We define the algorithm, which we call QuanBiMo, sketch its performance on simulated data and illustrate its potential usefulness with a case study. Modules are detected with a higher accuracy in simulated quantitative networks than in their binary counterparts. Even at high levels of noise, QuanBiMo still classifies 70\% of links correctly as within- or between-modules. Recursively applying the algorithm results in additional information of within-module organization of the network. The algorithm introduced here must be seen as a considerable improvement over the current standard of algorithms for binary networks. Due to its higher sensitivity, it is likely to lead to be useful for detecting modules in the typically noisy data of ecological networks.}, + langid = {english}, + keywords = {compartments,groups,modularity,null model,pollination networks,weighted networks}, + annotation = {\_eprint: https://besjournals.onlinelibrary.wiley.com/doi/pdf/10.1111/2041-210X.12139}, + file = {/home/nathante/Zotero/storage/UI392VVJ/Dormann_Strauss_2014_A method for detecting modules in quantitative bipartite networks.pdf;/home/nathante/Zotero/storage/WDLTVHCF/2041-210X.html} +} + +@thesis{driscoll_hobbyist_2014, + title = {Hobbyist Inter-Networking and the Popular Internet Imaginary: Forgotten Histories of Networked Personal Computing, 1978-1998}, + shorttitle = {Hobbyist Inter-Networking and the Popular Internet Imaginary}, + author = {Driscoll, Kevin}, + date = {2014}, + institution = {{ProQuest Dissertations Publishing}}, + abstract = {Popular social computing began in the late-1970s with the emergence of dial-up bulletin-board systems (BBS). For nearly two decades, tens of thousands of dial-up computer networks were run out of the homes and offices of hobbyists, volunteers, and entrepreneurs throughout North America. It was on these bulletin board systems that personal computer owners first began to use their machines for popular communication. The history of BBSing portrays amateurs, hobbyists, and enthusiasts as key agents in the development and diffusion of social computing. Indeed, the users and administrators of early BBSes were the first to confront the fundamental challenges of living and working in online communities. Their experiences and experiments with anonymity, identity, privacy, sexuality, and trust established norms and values that were reproduced in the commercial services and social media systems to follow. Restoring the popular memory of the BBS movement confers legitimacy on amateur users to speak with authority about the present and future of internet technology and policy.}, + editora = {Jenkins, Henry and Bar, Francois and Trope, Alison}, + editoratype = {collaborator}, + langid = {english} +} + +@inproceedings{ducheneaut_alone_2006, + title = {"{{Alone}} Together?": Exploring the Social Dynamics of Massively Multiplayer Online Games}, + shorttitle = {"{{Alone Together}}?}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Ducheneaut, Nicolas and Yee, Nicholas and Nickell, Eric and Moore, Robert J.}, + date = {2006}, + series = {{{CHI}} '06}, + pages = {407--416}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Massively Multiplayer Online Games (MMOGs) routinely attract millions of players but little empirical data is available to assess their players' social experiences. In this paper, we use longitudinal data collected directly from the game to examine play and grouping patterns in one of the largest MMOGs: World of Warcraft. Our observations show that the prevalence and extent of social activities in MMOGs might have been previously over-estimated, and that gaming communities face important challenges affecting their cohesion and eventual longevity. We discuss the implications of our findings for the design of future games and other online social spaces.}, + isbn = {978-1-59593-372-0}, + keywords = {activity metrics,massively multiplayer online games,MUDs,Online Communities,quantitative,social dynamics}, + file = {/home/nathante/Zotero/storage/SXB825WY/Ducheneaut et al. - 2006 - Alone together exploring the social dynamics o.pdf;/home/nathante/Zotero/storage/ZMRDXAKE/Ducheneaut et al. - 2006 - Alone Together Exploring the Social Dynamics o.pdf} +} + +@article{dumais_latent_2004, + title = {Latent Semantic Analysis}, + author = {Dumais, Susan T.}, + date = {2004}, + journaltitle = {Annual Review of Information Science and Technology}, + volume = {38}, + number = {1}, + pages = {188--230}, + issn = {1550-8382}, + langid = {english}, + annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1002/aris.1440380105}, + file = {/home/nathante/Zotero/storage/FJRA3VVC/Dumais_2004_Latent semantic analysis.pdf;/home/nathante/Zotero/storage/Y6WQY5QL/aris.html} +} + +@article{dvir-gvirsman_media_2017, + title = {Media Audience Homophily: Partisan Websites, Audience Identity and Polarization Processes}, + shorttitle = {Media Audience Homophily}, + author = {Dvir-Gvirsman, Shira}, + date = {2017-07-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {7}, + pages = {1072--1091}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {The study suggests that media consumers favor certain websites not only due to their content but also due to their audience. A new concept is introduced: “audience homophily,” which describes one’s preference for partisan media websites catering to a homogeneous, likeminded consumership. This attraction is explained in terms of the need for self-consistency, and I suggest that over time such behavior will polarize political identity through a spiral of reinforcement. Based on both a survey-experiment (N\,=\,300) and a panel study combined with web-tracking technology that recorded online-exposure behavior (N\,=\,397), it was found that individuals with more extreme ideology present higher levels of audience homophily and that, longitudinally, audience homophily is somewhat associated with ideological polarization, intolerance, and accessibility of political self-definition.}, + langid = {english}, + keywords = {Homophily,network analysis,partisan media,reinforcing-spiral model,selective exposure}, + file = {/home/nathante/Zotero/storage/WEQEAEJ4/Dvir-Gvirsman - 2017 - Media audience homophily Partisan websites, audie.pdf} +} + +@incollection{eldredge_punctuated_1972, + title = {Punctuated Equilibria: An Alternative to Phyletic Gradualism}, + booktitle = {Models in {{Paleobiology}}}, + author = {Eldredge, Niles and Gould, Stephen Jay}, + editor = {Schopf, Thomas J.M.}, + date = {1972}, + eprint = {3ULyAgAAQBAJ}, + eprinttype = {googlebooks}, + pages = {82--115}, + publisher = {{Freeman, Cooper and Company}}, + location = {{San Fransisco}}, + abstract = {Evolutionary biology is one of the most fascinating and controversial fields of science, and its principles are fundamental to science as a whole. Presented here are 48 classic papers, selected and introduced by two of the world’s most distinguished evolutionary biologists, Francisco J. Ayala and John C. Avise. The volume reveals, in chronological order, 150 years of evolutionary biology, from the field’s origins to recent discoveries and reinterpretations based on new theory and evidence.A perfect book for seminar courses in biology, zoology, botany, ecology or evolution, this comprehensive tour of landmark publications traces scholarly thought from the foggy nineteenth-century birth of evolutionary biology to the mapping of the human genome. Each selection is preceded by a short essay that explains its significance.The papers represent hallmark publications by seminal thinkers in the field such as Charlesworth, Dobzhansky, Ehrlich and Raven, Gould and Lewontin, Hamilton, Hardy, Hillis, Margulis, Maynard Smith, Mayr, MacArthur and Wilson, McClintock, Simpson, Trivers, Watson and Crick, West-Eberhard, and Wright. Subjects include natural selection, adaptation, and complex design, as well as mutation, chromosome speciation, and pseudogenes.In short, Essential Readings in Evolutionary Biology provides a captivating history of the foundation and growth of biology’s central discipline.}, + isbn = {978-1-4214-1305-1}, + langid = {english}, + file = {/home/nathante/Zotero/storage/2M6QZZV6/Eldredge and Gould - 1972 - Punctuated equilibria an alternative to phyletic .pdf} +} + +@article{ellison_benefits_2007, + ids = {ellison_benefits_2007-1}, + title = {The {{Benefits}} of {{Facebook}} “{{Friends}}:” {{Social Capital}} and {{College Students}}’ {{Use}} of {{Online Social Network Sites}}}, + shorttitle = {The {{Benefits}} of {{Facebook}} “{{Friends}}}, + author = {Ellison, Nicole B. and Steinfield, Charles and Lampe, Cliff}, + date = {2007-07-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + volume = {12}, + number = {4}, + pages = {1143--1168}, + publisher = {{Oxford Academic}}, + issn = {1083-6101}, + abstract = {This study examines the relationship between use of Facebook, a popular online social network site, and the formation and maintenance of social capital. In addition to assessing bonding and bridging social capital, we explore a dimension of social capital that assesses one’s ability to stay connected with members of a previously inhabited community, which we call maintained social capital. Regression analyses conducted on results from a survey of undergraduate students (N = 286) suggest a strong association between use of Facebook and the three types of social capital, with the strongest relationship being to bridging social capital. In addition, Facebook usage was found to interact with measures of psychological well-being, suggesting that it might provide greater benefits for users experiencing low self-esteem and low life satisfaction.}, + langid = {english}, + keywords = {CMC,quantitative,SNS,Social capital,survey}, + file = {/home/nathante/Zotero/storage/C6PUU2LZ/Ellison et al. - 2007 - The Benefits of Facebook “Friends” Social Capital.pdf;/home/nathante/Zotero/storage/I5D8LMF3/Ellison et al. - 2007 - The Benefits of Facebook “Friends” Social Capital.pdf;/home/nathante/Zotero/storage/CFMJSBYE/4582961.html;/home/nathante/Zotero/storage/YZWIMZS9/abstract.html} +} + +@article{faraj_online_2016, + ids = {faraj_special_2016}, + title = {Online Community as Space for Knowledge Flows}, + author = {Faraj, Samer and von Krogh, Georg and Monteiro, Eric and Lakhani, Karim R.}, + options = {useprefix=true}, + date = {2016-12-01}, + journaltitle = {Information Systems Research}, + shortjournal = {INFORMS}, + volume = {27}, + number = {4}, + pages = {668--684}, + issn = {1047-7047}, + abstract = {Online communities frequently create significant economic and relational value for community participants and beyond. It is widely accepted that the underlying source of such value is the collective flow of knowledge among community participants. We distinguish the conditions for flows of tacit and explicit knowledge in online communities and advance an unconventional theoretical conjecture: Online communities give rise to tacit knowledge flows between participants. The crucial condition for these flows is not the advent of novel, digital technology as often portrayed in the literature, but instead the technology’s domestication by humanity and the sociality it affords. This conjecture holds profound implications for theory and research in the study of management and organization, as well as their relation to information technology.}, + file = {/home/nathante/Zotero/storage/4TH94S6Q/Faraj et al. - 2016 - Online Community as Space for Knowledge Flows.pdf;/home/nathante/Zotero/storage/NCY7A6S4/Faraj et al. - 2016 - Special Section Introduction—Online Community as S.pdf} +} + +@inproceedings{fiesler_growing_2017, + ids = {fiesler_growing_2017-1}, + title = {Growing {{Their Own}}: Legitimate {{Peripheral Participation}} for {{Computational Learning}} in an {{Online Fandom Community}}}, + shorttitle = {Growing {{Their Own}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Fiesler, Casey and Morrison, Shannon and Shapiro, R. Benjamin and Bruckman, Amy S.}, + date = {2017-02-25}, + series = {{{CSCW}} '17}, + pages = {1375--1386}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Online communities dedicated to the creation of fanworks (e.g., fiction or art inspired by media such as books or television shows) often serve as communities of practice for learning communication, artistic, and technical skills. In studying one successful fan fiction archive that was designed and built entirely by (predominantly women) fans, we observed processes of legitimate peripheral participation (LPP) in which some of these fans began in peripheral roles and came to be more involved in the technical aspects of the archive over time. In addition to outlining positive outcomes, we discuss the challenges of supporting learning within this CoP, particularly with respect to the burden on experts. We discuss potential implications and solutions for the problem of expert scarcity in CoPs, and propose that LPP within fan communities can be leveraged for broadening participation in computing among women.}, + isbn = {978-1-4503-4335-0}, + keywords = {broadening participation in computing,communities of practice,computing education,fandom,fanfiction,learning,legitimate peripheral participation,online communities,open source}, + file = {/home/nathante/Zotero/storage/QUSETR8Z/Fiesler et al. - 2017 - Growing Their Own Legitimate Peripheral Participa.pdf;/home/nathante/Zotero/storage/VRDFMKHZ/Fiesler et al_2017_Growing Their Own.pdf} +} + +@article{fiesler_moving_2020, + ids = {fiesler_moving_2020-1,fiesler_moving_2020-2}, + title = {Moving {{Across Lands}}: Online {{Platform Migration}} in {{Fandom Communities}}}, + shorttitle = {Moving {{Across Lands}}}, + author = {Fiesler, Casey and Dym, Brianna}, + date = {2020-05-28}, + journaltitle = {Proc. ACM Hum.-Comput. Interact}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {4}, + pages = {042:1--042:25}, + abstract = {When online platforms rise and fall, sometimes communities fade away, and sometimes they pack their bags and relocate to a new home. To explore the causes and effects of online community migration, we examine transformative fandom, a longstanding, technology-agnostic community surrounding the creation, sharing, and discussion of creative works based on existing media. For over three decades, community members have left and joined many different online spaces, from Usenet to Tumblr to platforms of their own design. Through analysis of 28 in-depth interviews and 1,886 survey responses from fandom participants, we traced these migrations, the reasons behind them, and their impact on the community. Our findings highlight catalysts for migration that provide insights into factors that contribute to success and failure of platforms, including issues surrounding policy, design, and community. Further insights into the disruptive consequences of migrations (such as social fragmentation and lost content) suggest ways that platforms might both support commitment and better support migration when it occurs.}, + issue = {CSCW1}, + file = {/home/nathante/Zotero/storage/ER8P5AJ2/Fiesler_Dym_2020_Moving Across Lands.pdf;/home/nathante/Zotero/storage/JHDILSYU/Fiesler and Dym - 2020 - Moving Across Lands Online Platform Migration in .pdf} +} + +@inproceedings{fiesler_reddit_2018, + title = {Reddit Rules! {{Characterizing}} an Ecosystem of Governance.}, + booktitle = {Proceedings of the {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Fiesler, Casey and Jiang, Jialun" Aaron" and McCann, Joshua and Frye, Kyle and Brubaker, Jed R.}, + date = {2018}, + pages = {72--81}, + publisher = {{AAAI}}, + location = {{Stanford, CA}}, + eventtitle = {{{ICWSM}}}, + file = {/home/nathante/Zotero/storage/34TYXTGB/Fiesler - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/G9VFI2L7/Fiesler et al. - Reddit Rules! Characterizing an Ecosystem of Gover.pdf;/home/nathante/Zotero/storage/KT7KNG3J/Fiesler et al. - 2018 - Reddit rules! Characterizing an ecosystem of gover.pdf} +} + +@article{figeac_how_2021, + title = {How Behavioral Homophily on Social Media Influences the Perception of Tie-Strengthening within Young Adults’ Personal Networks}, + author = {Figeac, Julien and Favre, Guillaume}, + date = {2021-06-25}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {14614448211020691}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study examines how social media and information-sharing behavior can influence young adults’ perceptions of changes in tie strength within their own personal networks. By focusing on the extended personal networks (27.56 relationships) of young adults, we show that social media leads them to feel closer to their “friends” whom they think of as exhibiting online behaviors similar to their own. This behavioral homophily mainly stems from frequent reactions between friends, when they like or comment upon each other’s posts. Such homophily is also related to the sharing of political news and entertaining content, which constitute a salient affordance in the “pervasive awareness” of social media and lead users to feel closer to those exhibiting similar content-sharing behavior. This similarity reveals how social media platforms help to shape personal networks over time, particularly by influencing user relationships with weak ties who share similar online behavior.}, + langid = {english}, + keywords = {Entertaining content,homophily,information-sharing,personal networks,pervasive awareness,political news,social media,weak ties}, + file = {/home/nathante/Zotero/storage/YAKLRLVE/Figeac and Favre - 2021 - How behavioral homophily on social media influence.pdf} +} + +@unpublished{foote_agent-based_2018, + title = {An {{Agent}}-{{Based Model}} of {{Online Community Joining}}}, + author = {Foote, Jeremy}, + date = {2018-07}, + editora = {TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron}, + editoratype = {collaborator}, + eventtitle = {International {{Conference}} on {{Computational Social Science}} ({{IC2S2}})}, + venue = {{Evanston, IL}} +} + +@inproceedings{foote_behavior_2018, + title = {The Behavior and Network Position of Peer Production Founders}, + booktitle = {{{iConference}} 2018: Transforming {{Digital Worlds}}}, + author = {Foote, Jeremy and Contractor, Noshir}, + editor = {Chowdhury, Gobinda and McLeod, Julie and Gillet, Val and Willett, Peter}, + date = {2018}, + series = {Lecture {{Notes}} in {{Computer Science}}}, + pages = {99--106}, + publisher = {{Springer}}, + abstract = {Online peer production projects, such as Wikipedia and open-source software, have become important producers of cultural and technological goods. While much research has been done on the way that large existing projects work, little is known about how projects get started or who starts them. Nor is it clear how much influence founders have on the future trajectory of a community. We measure the behavior and social networks of 60,959 users on Wikia.com over a two month period. We compare the activity, local network positions, and global network positions of future founders and non-founders. We then explore the relationship between these measures and the relative growth of a founder’s wikis. We suggest hypotheses for future research based on this exploratory analysis.}, + isbn = {978-3-319-78105-1}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6I8T7IER/Foote and Contractor - 2018 - The Behavior and Network Position of Peer Producti.pdf;/home/nathante/Zotero/storage/QW9VAHSU/10.html} +} + +@thesis{foote_formation_2019, + type = {PhD dissertation}, + title = {The Formation and Growth of Collaborative Online Organizations}, + author = {Foote, Jeremy}, + date = {2019}, + institution = {{Northwestern University}}, + location = {{Evanston, IL}}, + abstract = {Explore millions of resources from scholarly journals, books, newspapers, videos and more, on the ProQuest Platform.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FATUNJ49/2.html} +} + +@online{foote_how_2020, + title = {How Individual Behaviors Drive Inequality in Online Community Sizes: An Agent-Based Simulation}, + shorttitle = {How Individual Behaviors Drive Inequality in Online Community Sizes}, + author = {Foote, Jeremy and TeBlunthuis, Nathan and Hill, Benjamin Mako and Shaw, Aaron}, + date = {2020-06-04}, + eprint = {2006.03119}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Why are online community sizes so extremely unequal? Most answers to this question have pointed to general mathematical processes drawn from physics like cumulative advantage. These explanations provide little insight into specific social dynamics or decisions that individuals make when joining and leaving communities. In addition, explanations in terms of cumulative advantage do not draw from the enormous body of social computing research that studies individual behavior. Our work bridges this divide by testing whether two influential social mechanisms used to explain community joining can also explain the distribution of community sizes. Using agent-based simulations, we evaluate how well individual-level processes of social exposure and decisions based on individual expected benefits reproduce empirical community size data from Reddit. Our simulations contribute to social computing theory by providing evidence that both processes together---but neither alone---generate realistic distributions of community sizes. Our results also illustrate the potential value of agent-based simulation to online community researchers to both evaluate and bridge individual and group-level theories.}, + archiveprefix = {arXiv}, + file = {/home/nathante/Zotero/storage/PMZDH4B2/Foote et al_2020_How individual behaviors drive inequality in online community sizes.pdf;/home/nathante/Zotero/storage/D57HFTGF/2006.html} +} + +@dataset{foote_replication_2017, + title = {Replication Data for: Starting Online Communities: Motivations and Goals of Wiki Founders}, + shorttitle = {Replication {{Data}} For}, + author = {Foote, Jeremy and Gergle, Darren and Shaw, Aaron}, + date = {2017-05-12}, + journaltitle = {Harvard Dataverse}, + abstract = {Anonymized survey data from our CHI 2017 Note: Starting Online Communities: Motivations and Goals of Wiki Founders}, + langid = {english} +} + +@inproceedings{foote_starting_2017, + title = {Starting Online Communities: Motivations and Goals of Wiki Founders}, + shorttitle = {Starting {{Online Communities}}}, + booktitle = {Proceedings of the 2017 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '17)}, + author = {Foote, Jeremy and Gergle, Darren and Shaw, Aaron}, + date = {2017}, + pages = {6376--6380}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Why do people start new online communities? Previous research has studied what helps communities to grow and what motivates contributors, but the reasons that people create new communities in the first place remain unclear. We present the results of a survey of over 300 founders of new communities on the online wiki hosting site Wikia.com. We analyze the motivations and goals of wiki creators, finding that founders have diverse reasons for starting wikis and diverse ways of defining their success. Many founders see their communities as occupying narrow topics, and neither seek nor expect a large group of contributors. We also find that founders with differing goals approach community building differently. We argue that community platform designers can create interfaces that support the diverse goals of founders more effectively.}, + isbn = {978-1-4503-4655-9}, + keywords = {peer production,survey,wikis}, + file = {/home/nathante/Zotero/storage/BWAIBPUK/Foote et al. - 2017 - Starting Online Communities Motivations and Goals.pdf} +} + +@article{fortunato_community_2010, + title = {Community Detection in Graphs}, + author = {Fortunato, Santo}, + date = {2010-02}, + journaltitle = {Physics Reports}, + shortjournal = {Physics Reports}, + volume = {486}, + number = {3-5}, + eprint = {0906.0612}, + eprinttype = {arxiv}, + pages = {75--174}, + issn = {03701573}, + abstract = {The modern science of networks has brought significant advances to our understanding of complex systems. One of the most relevant features of graphs representing real systems is community structure, or clustering, i. e. the organization of vertices in clusters, with many edges joining vertices of the same cluster and comparatively few edges joining vertices of different clusters. Such clusters, or communities, can be considered as fairly independent compartments of a graph, playing a similar role like, e. g., the tissues or the organs in the human body. Detecting communities is of great importance in sociology, biology and computer science, disciplines where systems are often represented as graphs. This problem is very hard and not yet satisfactorily solved, despite the huge effort of a large interdisciplinary community of scientists working on it over the past few years. We will attempt a thorough exposition of the topic, from the definition of the main elements of the problem, to the presentation of most methods developed, with a special focus on techniques designed by statistical physicists, from the discussion of crucial issues like the significance of clustering and how methods should be tested and compared against each other, to the description of applications to real networks.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Information Retrieval,Condensed Matter - Statistical Mechanics,Physics - Biological Physics,Physics - Computational Physics,Physics - Physics and Society,Quantitative Biology - Quantitative Methods}, + file = {/home/nathante/Zotero/storage/TV2TW34A/Fortunato_2010_Community detection in graphs.pdf;/home/nathante/Zotero/storage/9U2MCQYD/0906.html} +} + +@article{fortunato_community_2016, + title = {Community Detection in Networks: A User Guide}, + shorttitle = {Community Detection in Networks}, + author = {Fortunato, Santo and Hric, Darko}, + date = {2016-11}, + journaltitle = {Physics Reports}, + shortjournal = {Physics Reports}, + volume = {659}, + eprint = {1608.00163}, + eprinttype = {arxiv}, + pages = {1--44}, + issn = {03701573}, + abstract = {Community detection in networks is one of the most popular topics of modern network science. Communities, or clusters, are usually groups of vertices having higher probability of being connected to each other than to members of other groups, though other patterns are possible. Identifying communities is an ill-defined problem. There are no universal protocols on the fundamental ingredients, like the definition of community itself, nor on other crucial issues, like the validation of algorithms and the comparison of their performances. This has generated a number of confusions and misconceptions, which undermine the progress in the field. We offer a guided tour through the main aspects of the problem. We also point out strengths and weaknesses of popular methods, and give directions to their use.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Information Retrieval,Computer Science - Social and Information Networks,Physics - Physics and Society}, + file = {/home/nathante/Zotero/storage/VTZJUPV4/Fortunato_Hric_2016_Community detection in networks.pdf;/home/nathante/Zotero/storage/5FILHBS2/1608.html} +} + +@article{fotouhi_babak_evolution_2019-1, + title = {Evolution of Cooperation on Large Networks with Community Structure}, + author = {{Fotouhi Babak} and {Momeni Naghmeh} and {Allen Benjamin} and {Nowak Martin A.}}, + date = {2019-03-29}, + journaltitle = {Journal of The Royal Society Interface}, + shortjournal = {Journal of The Royal Society Interface}, + volume = {16}, + number = {152}, + pages = {20180677}, + abstract = {Cooperation is a major factor in the evolution of human societies. The structure of social networks, which affects the dynamics of cooperation and other interpersonal phenomena, have common structural signatures. One of these signatures is the tendency to organize as groups. This tendency gives rise to networks with community structure, which are composed of distinct modules. In this paper, we study analytically the evolutionary game dynamics on large modular networks in the limit of weak selection. We obtain novel analytical conditions such that natural selection favours cooperation over defection. We calculate the transition point for each community to favour cooperation. We find that a critical inter-community link creation probability exists for given group density, such that the overall network supports cooperation even if individual communities inhibit it. As a byproduct, we present solutions for the critical benefit-to-cost ratio which perform with remarkable accuracy for diverse generative network models, including those with community structure and heavy-tailed degree distributions. We also demonstrate the generalizability of the results to arbitrary two-player games.} +} + +@article{freeman_community_2006, + ids = {freeman_community_2006-1}, + title = {Community Ecology and the Sociology of Organizations}, + author = {Freeman, John H. and Audia, Pino G.}, + date = {2006}, + journaltitle = {Annual Review of Sociology}, + shortjournal = {Annual Review of Sociology}, + volume = {32}, + eprint = {29737735}, + eprinttype = {jstor}, + pages = {145--169}, + issn = {0360-0572}, + abstract = {Research on organizations is increasingly informed by analysis of community context. Community can be conceptualized as sets of relations between organizational forms or as places where organizations are located in resource space or in geography. In both modes, organizations operate interdependently with social institutions and with other units of social structure. Because such relationships channel flows of resources, opportunities are granted or withheld from social actors depending in part on their organization connections. Such considerations encourage analyses of organizations in ways that spread the relevance of results beyond organizationally defined research problem areas.}, + file = {/home/nathante/Zotero/storage/UT6RXR39/Freeman_Audia_2006_Community Ecology and the Sociology of Organizations.pdf} +} + +@article{freeman_liability_1983, + title = {The {{Liability}} of {{Newness}}: Age {{Dependence}} in {{Organizational Death Rates}}}, + shorttitle = {The {{Liability}} of {{Newness}}}, + author = {Freeman, John and Carroll, Glenn R. and Hannan, Michael T.}, + date = {1983}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {48}, + number = {5}, + eprint = {2094928}, + eprinttype = {jstor}, + pages = {692--710}, + issn = {0003-1224}, + abstract = {Age dependence in organizational death rates is studied using data on three populations of organizations: national labor unions, semiconductor electronics manufacturers, and newspaper publishing companies. There is a liability of newness in each of these populations but it differs depending on whether death occurs through dissolution or by absorption through merger. Liabilities of smallness and bigness are also identified but controlling for them does not eliminate age dependence.}, + file = {/home/nathante/Zotero/storage/CT32HPF2/Freeman et al. - 1983 - The Liability of Newness Age Dependence in Organi.pdf} +} + +@article{freeman_niche_1983, + title = {Niche Width and the Dynamics of Organizational Populations}, + author = {Freeman, John and Hannan, Michael T.}, + date = {1983}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {88}, + number = {6}, + eprint = {2778966}, + eprinttype = {jstor}, + pages = {1116--1145}, + issn = {0002-9602}, + abstract = {This paper explores the effects of environmental variability and grain on the niche width of organizational populations. It develops a model of the manner in which environmental variations affect the life changes of specialist and generalist organizations. This model predicts that death rates of generalists exceed those of specialists in fine-grained environments, regardless of the level of variability, but that generalists have lower death rates when environmental variation is both coarse grained and large. The model is applied to a sample of restaurant organizations in 18 California cities. Maximum likelihood estimates and tests confirm the major predictions of the model.}, + file = {/home/nathante/Zotero/storage/6UL6YJ3Y/Freeman and Hannan - 1983 - Niche Width and the Dynamics of Organizational Pop.pdf} +} + +@article{frey_clustering_2007, + title = {Clustering by {{Passing Messages Between Data Points}}}, + author = {Frey, Brendan J. and Dueck, Delbert}, + date = {2007-02-16}, + journaltitle = {Science}, + volume = {315}, + number = {5814}, + eprint = {17218491}, + eprinttype = {pmid}, + pages = {972--976}, + publisher = {{American Association for the Advancement of Science}}, + issn = {0036-8075, 1095-9203}, + abstract = {Clustering data by identifying a subset of representative examples is important for processing sensory signals and detecting patterns in data. Such “exemplars” can be found by randomly choosing an initial subset of data points and then iteratively refining it, but this works well only if that initial choice is close to a good solution. We devised a method called “affinity propagation,” which takes as input measures of similarity between pairs of data points. Real-valued messages are exchanged between data points until a high-quality set of exemplars and corresponding clusters gradually emerges. We used affinity propagation to cluster images of faces, detect genes in microarray data, identify representative sentences in this manuscript, and identify cities that are efficiently accessed by airline travel. Affinity propagation found clusters with much lower error than other methods, and it did so in less than one-hundredth the amount of time. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data. An algorithm that exchanges messages about the similarity of pairs of data points speeds identification of representative examples in a complex data set, such as genes in DNA data.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PVGJU5KN/Frey_Dueck_2007_Clustering by Passing Messages Between Data Points.pdf;/home/nathante/Zotero/storage/ERM5BMQT/972.html} +} + +@unpublished{frey_designing_2019, + type = {Preprint}, + title = {Designing for Participation and Change in Digital Institutions}, + author = {Frey, Seth and Keegan, Brian and Krafft, Peter}, + date = {2019-02-22}, + eprint = {1902.08728}, + eprinttype = {arxiv}, + abstract = {Whether we recognize it or not, the Internet is rife with exciting and original institutional forms that are transforming social organization on and offline. Issues of governance in these Internet platforms and other digital institutions have posed a challenge for software engineers, many of whom have little exposure to the relevant history or theory of institutional design. Here, we offer one useful framework with an aim to stimulate dialogue between computer scientists and political scientists. The dominant guiding practices for the design of digital institutions to date in human-computer interaction, computer-supported cooperative work, and the tech industry at large have been an incentive-focused behavioral engineering paradigm, a collection of atheoretical approaches such as A/B-testing, and incremental issue-driven software engineering. One institutional analysis framework that has been useful in the design of traditional institutions is the body of resource governance literature known as the “Ostrom Workshop”. A key finding of this literature that has yet to be broadly incorporated in the design of many digital institutions is the importance of including participatory change process mechanisms in what is called a “constitutional layer” of institutional design—in other words, defining rules that allow and facilitate diverse stakeholder participation in the ongoing process of institutional design change. We explore to what extent this consideration is met or could be better met in three varied cases of digital institutions: cryptocurrencies, cannabis informatics, and amateur Minecraft server governance. Examining such highly varied cases allows us to demonstrate the broad relevance of constitutional layers in many different types of digital institutions.}, + archiveprefix = {arXiv}, + howpublished = {Preprint}, + langid = {english}, + keywords = {Computer Science - Social and Information Networks,H.5.3,J.4,K.4.3}, + file = {/home/nathante/Zotero/storage/7BNDR3M8/Krafft et al. - 2019 - Designing for Participation and Change in Digital .pdf} +} + +@article{frey_emergence_2019, + title = {Emergence of Integrated Institutions in a Large Population of Self-Governing Communities}, + author = {Frey, Seth and Sumner, Robert W.}, + date = {2019-07-11}, + journaltitle = {PLOS ONE}, + shortjournal = {PLOS ONE}, + volume = {14}, + number = {7}, + pages = {e0216335}, + publisher = {{Public Library of Science}}, + issn = {1932-6203}, + abstract = {Most aspects of our lives are governed by large, highly developed institutions that integrate several governance tasks under one authority structure. But theorists differ as to the mechanisms that drive the development of such concentrated governance systems from rudimentary beginnings. Is the emergence of integrated governance schemes a symptom of consolidation of authority by small status groups? Or does integration occur because a complex institution has more potential responses to a complex environment? Here we examine the emergence of complex governance regimes in 5,000 sovereign, resource-constrained, self-governing online communities, ranging in scale from one to thousands of users. Each community begins with no community members and no governance infrastructure. As communities grow, they are subject to selection pressures that keep better managed servers better populated. We identify predictors of community success and test the hypothesis that governance complexity can enhance community fitness. We find that what predicts success depends on size: changes in complexity predict increased success with larger population servers. Specifically, governance rules in a large successful community are more numerous and broader in scope. They also tend to rely more on rules that concentrate power in administrators, and on rules that manage bad behavior and limited server resources. Overall, this work is consistent with theories that formal integrated governance systems emerge to organize collective responses to interdependent resource management problems, especially as factors such as population size exacerbate those problems.}, + langid = {english}, + keywords = {Community ecology,Computer software,Forests,Games,Internet,Online encyclopedias,Political theory,Resource management,Social psychology,Video games}, + file = {/home/nathante/Zotero/storage/AXDJPNKE/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/DA5HAVLH/Frey_Sumner_2019_Emergence of integrated institutions in a large population of self-governing.pdf;/home/nathante/Zotero/storage/Q3FI9DBS/Frey and Sumner - 2019 - Emergence of integrated institutions in a large po.pdf;/home/nathante/Zotero/storage/4B26ZMHH/article.html;/home/nathante/Zotero/storage/4CRK5UUM/article.html;/home/nathante/Zotero/storage/8XFADRSX/article.html} +} + +@article{fulk_connective_1996, + title = {Connective and Communal Public Goods in Interactive Communication Systems}, + author = {Fulk, Janet and Flanagin, Andrew J. and Kalman, Michael E. and Monge, Peter R. and Ryan, Timothy}, + date = {1996}, + journaltitle = {Communication Theory}, + volume = {6}, + number = {1}, + pages = {60--87}, + issn = {1468-2885}, + abstract = {This paper extends theories of public goods to interactive communication systems. Two key public communication goods are identified. Connectivity provides point-to-point communication, and communality links members through commonly held information, such as that often found in databases. These extensions are important, we argue, because communication public goods operate differently from traditional material public goods. These differences have important implications for costs, benefits, and the realization of a critical mass of users that is necessary for realization of the good. We also explore multifunctional goods that combine various features and hybrid goods that link private goods to public ones. We examine the applicability of two key assumptions of public goods theory to interactive communication systems. First, jointness of supply specifies that consumption of a public good does not diminish its availability to others. Second, impossibility of exclusion stipulates that all members of the public have access to the good. We conclude with suggestions for further theoretical development.}, + langid = {english}, + keywords = {mantaining public goods}, + file = {/home/nathante/Zotero/storage/ZJVU4TGW/Fulk et al. - 1996 - Connective and communal public goods in interactiv.pdf;/home/nathante/Zotero/storage/8J5CPWLV/4259000.html} +} + +@article{gan_gender_2018, + title = {Gender, Feedback, and Learners' Decisions to Share Their Creative Computing Projects}, + author = {Gan, Emilia F. and Hill, Benjamin Mako and Dasgupta, Sayamindu}, + date = {2018-11}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {2}, + pages = {54:1-54:23}, + abstract = {Although informal online learning communities are made possible by users' decisions to share their creations, participation by females and other marginalized groups remains stubbornly low in technical communities. Using descriptive statistics and a unique dataset of shared and unshared projects from over 1.1 million users of Scratch-a collaborative programming community for young people-we show that while girls share less initially, this trend flips among experienced users. Using Bayesian regression analyses, we show that this relationship can largely be attributed to differences in the way boys and girls participate. We also find that while prior positive feedback is correlated with increased sharing among inexperienced users, this effect also reverses with experience or with the addition of controls. Our findings provide a description of the dynamics behind online learners' decisions to share, open new research questions, and point to several lessons for system designers.}, + issue = {CSCW}, + langid = {english}, + keywords = {broadening participation,computer mediated communication,creative learning,gender differences,online communities,scratch,social computing and social navigation,social learning}, + file = {/home/nathante/Zotero/storage/II3Z28KL/Gan et al. - 2018 - Gender, feedback, and learners' decisions to share.pdf} +} + +@inproceedings{geiger_using_2013, + title = {Using Edit Sessions to Measure Participation in {{Wikipedia}}}, + booktitle = {Proceedings of the 2013 Conference on {{Computer}} Supported Cooperative Work}, + author = {Geiger, R. Stuart and Halfaker, Aaron}, + date = {2013}, + pages = {861--870}, + publisher = {{ACM}}, + keywords = {activity,labor,labor-hours,peer production,quantitative methods,sessions,wikipedia,work,work practices}, + file = {/home/nathante/Zotero/storage/6EZ7WJ4T/Geiger and Halfaker - 2013 - Using edit sessions to measure participation in Wi.pdf;/home/nathante/Zotero/storage/9Z6ATSSC/cscw-sessions.pdf;/home/nathante/Zotero/storage/MFHWZS8R/Geiger and Halfaker - 2013 - Using Edit Sessions to Measure Participation in Wi.pdf;/home/nathante/Zotero/storage/VZ7BVKRW/citation.html} +} + +@article{gelman_why_2012, + title = {Why We (Usually) Don't Have to Worry about Multiple Comparisons}, + author = {Gelman, Andrew and Hill, Jennifer and Yajima, Masanao}, + date = {2012-04-01}, + journaltitle = {Journal of Research on Educational Effectiveness}, + volume = {5}, + number = {2}, + pages = {189--211}, + publisher = {{Routledge}}, + issn = {1934-5747}, + abstract = {Applied researchers often find themselves making statistical inferences in settings that would seem to require multiple comparisons adjustments. We challenge the Type I error paradigm that underlies these corrections. Moreover we posit that the problem of multiple comparisons can disappear entirely when viewed from a hierarchical Bayesian perspective. We propose building multilevel models in the settings where multiple comparisons arise. Multilevel models perform partial pooling (shifting estimates toward each other), whereas classical procedures typically keep the centers of intervals stationary, adjusting for multiple comparisons by making the intervals wider (or, equivalently, adjusting the p values corresponding to intervals of fixed width). Thus, multilevel models address the multiple comparisons problem and also yield more efficient estimates, especially in settings with low group-level variation, which is where multiple comparisons are a particular concern.}, + keywords = {Bayesian inference,hierarchical modeling,multiple comparisons,statistical significance,Type S error}, + annotation = {\_eprint: https://doi.org/10.1080/19345747.2011.618213} +} + +@article{gneiting_strictly_2007, + title = {Strictly {{Proper Scoring Rules}}, {{Prediction}}, and {{Estimation}}}, + author = {Gneiting, Tilmann and Raftery, Adrian E.}, + date = {2007-03-01}, + journaltitle = {Journal of the American Statistical Association}, + volume = {102}, + number = {477}, + pages = {359--378}, + publisher = {{Taylor \& Francis}}, + issn = {0162-1459}, + abstract = {Scoring rules assess the quality of probabilistic forecasts, by assigning a numerical score based on the predictive distribution and on the event or value that materializes. A scoring rule is proper if the forecaster maximizes the expected score for an observation drawn from the distributionF if he or she issues the probabilistic forecast F, rather than G ≠ F. It is strictly proper if the maximum is unique. In prediction problems, proper scoring rules encourage the forecaster to make careful assessments and to be honest. In estimation problems, strictly proper scoring rules provide attractive loss and utility functions that can be tailored to the problem at hand. This article reviews and develops the theory of proper scoring rules on general probability spaces, and proposes and discusses examples thereof. Proper scoring rules derive from convex functions and relate to information measures, entropy functions, and Bregman divergences. In the case of categorical variables, we prove a rigorous version of the Savage representation. Examples of scoring rules for probabilistic forecasts in the form of predictive densities include the logarithmic, spherical, pseudospherical, and quadratic scores. The continuous ranked probability score applies to probabilistic forecasts that take the form of predictive cumulative distribution functions. It generalizes the absolute error and forms a special case of a new and very general type of score, the energy score. Like many other scoring rules, the energy score admits a kernel representation in terms of negative definite functions, with links to inequalities of Hoeffding type, in both univariate and multivariate settings. Proper scoring rules for quantile and interval forecasts are also discussed. We relate proper scoring rules to Bayes factors and to cross-validation, and propose a novel form of cross-validation known as random-fold cross-validation. A case study on probabilistic weather forecasts in the North American Pacific Northwest illustrates the importance of propriety. We note optimum score approaches to point and quantile estimation, and propose the intuitively appealing interval score as a utility function in interval estimation that addresses width as well as coverage.}, + keywords = {Bayes factor,Bregman divergence,Brier score,Coherent,Continuous ranked probability score,Cross-validation,Entropy,Kernel score,Loss function,Minimum contrast estimation,Negative definite function,Prediction interval,Predictive distribution,Quantile forecast,Scoring rule,Skill score,Strictly proper,Utility function}, + annotation = {\_eprint: https://doi.org/10.1198/016214506000001437}, + file = {/home/nathante/Zotero/storage/ZDL34XF9/Gneiting_Raftery_2007_Strictly Proper Scoring Rules, Prediction, and Estimation.pdf;/home/nathante/Zotero/storage/6DRGUBQI/016214506000001437.html} +} + +@inproceedings{gorbatai_exploring_2011, + title = {Exploring {{Underproduction}} in {{Wikipedia}}}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Gorbatai, Andreea D.}, + date = {2011}, + series = {{{WikiSym}} '11}, + pages = {205--206}, + abstract = {Researchers have used Wikipedia data to identify a wide range of antecedents to success in collective production. But we have not yet inquired whether collective production creates those public goods which bring most value-add from a social perspective. In this poster I explore two key circumstances in which collective production can fail to respond to social need: when goods fail to attain high quality despite (1) high demand or (2) explicit designation by producers as highly important. In the context of Wikipedia. I propose first to examine articles that remain low quality, or underproduced, despite the fact they are viewed often; and second, to examine articles that remain low quality despite the fact that they were identified as important by Wikipedia contributors. This research highlights the fact that collective production needs to be examined not only by itself but also in the context of a market for goods in order to ascertain the benefits of this production form. The final version of this study will integrate data on underproduced articles with data on knowledge categories to uncover systematic patterns of underproduction at the category level and predict which categories are most in need of quality improvement. Additionally I will use in-depth qualitative methods to examine the mechanisms through which underproduction occurs in select knowledge categories to distill practical recommendations for collective production improvement.}, + isbn = {978-1-4503-0909-7}, + keywords = {collective production,social goods,underproduction} +} + +@article{graeff_battle_2014, + title = {The Battle for ‘{{Trayvon Martin}}’: Mapping a Media Controversy Online and off-Line}, + shorttitle = {The Battle for ‘{{Trayvon Martin}}’}, + author = {Graeff, Erhardt and Stempeck, Matt and Zuckerman, Ethan}, + date = {2014-01}, + journaltitle = {First Monday}, + volume = {19}, + number = {2}, + issn = {13960466}, + langid = {english}, + keywords = {controversy mapping,media cloud,networked gatekeeping,political networks,quantitative media analysis}, + file = {/home/nathante/Zotero/storage/EXNM66WB/Graeff et al. - 2014 - The battle for ‘Trayvon Martin’ Mapping a media c.pdf;/home/nathante/Zotero/storage/BW5KPRPA/4947.html;/home/nathante/Zotero/storage/T7J9BSVG/3821.html} +} + +@article{graham_boundary_2019, + title = {Boundary Maintenance and the Origins of Trolling}, + author = {Graham, Elyse}, + date = {2019-09-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {21}, + number = {9}, + pages = {2029--2047}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This article presents a new social framework for understanding the origins of trolling and its expansion from an obscure practice, limited to a handful of boards on Usenet, to a pervasive component of Internet culture. I argue that trolling originated, in the term of sociologists, as a form of boundary maintenance that served to distinguish communities of self-identified online insiders from others beyond the boundaries of their community and to drive outsiders away from their spaces. This framework can help us to better understand the transformations that trolling has undergone in the decades since its inception, as well as the persistence of misogyny and prejudice throughout the history of the practice.}, + langid = {english}, + keywords = {Boundary maintenance,Internet communities,Internet history,online harassment,politics of cyberspace,trolling}, + file = {/home/nathante/Zotero/storage/6IN6XJWV/Graham - 2019 - Boundary maintenance and the origins of trolling.pdf} +} + +@online{graham_dyadic_2019, + title = {Dyadic {{Regression}}}, + author = {Graham, Bryan S.}, + date = {2019-08-23}, + eprint = {1908.09029}, + eprinttype = {arxiv}, + primaryclass = {econ, stat}, + abstract = {Dyadic data, where outcomes reflecting pairwise interaction among sampled units are of primary interest, arise frequently in social science research. Regression analyses with such data feature prominently in many research literatures (e.g., gravity models of trade). The dependence structure associated with dyadic data raises special estimation and, especially, inference issues. This chapter reviews currently available methods for (parametric) dyadic regression analysis and presents guidelines for empirical researchers.}, + archiveprefix = {arXiv}, + keywords = {62F12,Economics - Econometrics,Statistics - Applications}, + file = {/home/nathante/Zotero/storage/BXRL9YEI/Graham - 2019 - Dyadic Regression.pdf;/home/nathante/Zotero/storage/FVV64SD8/1908.html} +} + +@article{granovetter_strength_1973, + title = {The {{Strength}} of {{Weak Ties}}}, + author = {Granovetter, Mark S.}, + date = {1973-05-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {78}, + number = {6}, + pages = {1360--1380}, + issn = {0002-9602}, + abstract = {Analysis of social networks is suggested as a tool for linking micro and macro levels of sociological theory. The procedure is illustrated by elaboration of the macro implications of one aspect of small-scale interaction: the strength of dyadic ties. It is argued that the degree of overlap of two individuals' friendship networks varies directly with the strength of their tie to one another. The impact of this principle on diffusion of influence and information, mobility opportunity, and community organization is explored. Stress is laid on the cohesive power of weak ties. Most network models deal, implicitly, with strong ties, thus confining their applicability to small, well-defined groups. Emphasis on weak ties lends itself to discussion of relations between groups and to analysis of segments of social structure not easily defined in terms of primary groups.}, + file = {/home/nathante/Zotero/storage/GM6GICWI/225469.html} +} + +@thesis{graves_open_2013, + type = {Thesis}, + ids = {graves_open_2013-1}, + title = {Open Source Software Development as a Complex System}, + author = {Graves, John David Nicholas}, + date = {2013}, + institution = {{Auckland University of Technology}}, + abstract = {Open Source Software Development is an approach to software development involving open, public exposure of the source code of a computer program under development (hence, ‘open source’). Each open source program is shared online as a project in a source code repository. The so-called ‘open source community’ is the system which coordinates the work of software developers on the code in the repositories. This research explored the growth dynamics of this system, first by launching open source projects and then via simulation. Following (Barabasi \& Albert, 1999) and a biodiversity model (Hubbell, 2001), simulations of a complex system driven by preferential attachment, where popular projects attract more developers and grow (subject to some attrition), provided a systematic explanation for the lack of growth typical of single-developer projects. In this multi-methodological study, the lack of growth in the research projects empirically demonstrated the need for a theoretical understanding of open source project initiation and growth while the subsequent simulation results showed how the pattern of no growth (one developer) projects could be explained by a simple model.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PZVK297T/Graves - 2013 - Open source software development as a complex syst.pdf;/home/nathante/Zotero/storage/TDXFC3JV/5729.html} +} + +@article{greve_jumping_1995, + title = {Jumping {{Ship}}: The {{Diffusion}} of {{Strategy Abandonment}}}, + shorttitle = {Jumping {{Ship}}}, + author = {Greve, Henrich R.}, + date = {1995}, + journaltitle = {Administrative Science Quarterly}, + volume = {40}, + number = {3}, + eprint = {2393793}, + eprinttype = {jstor}, + pages = {444--473}, + publisher = {{[Sage Publications, Inc., Johnson Graduate School of Management, Cornell University]}}, + issn = {0001-8392}, + abstract = {This paper focuses on organizations' abandonment of strategy, which may be driven jointly by contagion and competition from other organizations. This paper treats both explanations but emphasizes contagion. I argue that strategy abandonment is contagious because the future performance of current and alternative strategies is highly uncertain, causing decision makers to examine the actions of other organizations in the industry for clues to the correct action. Contagion from organizations easily observed by the focal organization is stronger than contagion from other organizations, causing corporate links across markets to become important routes for the contagion of strategy abandonment. This theory is tested on a sample of radio stations abandoning a strategy and is supported by evidence that contagion of abandonment occurs through the influence of an organization's social reference groups.} +} + +@inproceedings{grevet_combating_2013, + title = {Combating Homophily through Design}, + booktitle = {Proceedings of the 2013 Conference on {{Computer}} Supported Cooperative Work Companion}, + author = {Grevet, Catherine}, + date = {2013-02-23}, + series = {{{CSCW}} '13}, + pages = {57--60}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Social networking has allowed us to be in constant contact with friends from many different backgrounds, yet we are unaware of many of our friends' perspectives and opinions. Networks are highly homophilous, meaning that people tend to associate with others similar to them. This leads to homogenous clusters. How should we design social media to facilitate constructive exchanges rather than polarize individuals? In my work, I propose to look at whether users are currently aware of the homophily phenomenon in their online networks and exploring social network designs to break homophily.}, + isbn = {978-1-4503-1332-2}, + keywords = {awareness,homophily,social networks,tie strength}, + file = {/home/nathante/Zotero/storage/XFJCI35Y/Grevet - 2013 - Combating homophily through design.pdf} +} + +@inproceedings{grevet_managing_2014, + title = {Managing Political Differences in Social Media}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Grevet, Catherine and Terveen, Loren G. and Gilbert, Eric}, + date = {2014-02-15}, + series = {{{CSCW}} '14}, + pages = {1400--1408}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Most people associate with people like themselves, a process called homophily. Exposure to diversity, however, makes us more informed as individuals and as a society. In this paper, we investigate political disagreements on Facebook to explore the conditions under which diverse opinions can coexist online. Via a mixed methods approach comprising 103 survey responses and 13 interviews with politically engaged American social media users, we found that participants who perceived more differences with their friends engaged less on Facebook than those who perceived more homogeneity. Weak ties were particularly brittle to political disagreements, despite being the ties most likely to offer diversity. Finally, based on our findings we suggest potential design opportunities to bridge across ideological difference: 1) support exposure to weak ties; and 2) make common ground visible while friends converse.}, + isbn = {978-1-4503-2540-0}, + keywords = {facebook,homophily,politics,relationship management,self- censorship,social media,tie strength}, + file = {/home/nathante/Zotero/storage/8VK4PWVX/Grevet et al. - 2014 - Managing political differences in social media.pdf} +} + +@article{gu_competition_2007, + ids = {gu_competition_2007-1}, + title = {Competition among Virtual Communities and User Valuation: The Case of Investing-Related Communities}, + shorttitle = {Competition {{Among Virtual Communities}} and {{User Valuation}}}, + author = {Gu, Bin and Konana, Prabhudev and Rajagopalan, Balaji and Chen, Hsuan-Wei Michelle}, + date = {2007}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + volume = {18}, + number = {1}, + eprint = {23211832}, + eprinttype = {jstor}, + pages = {68--85}, + issn = {1047-7047}, + abstract = {Virtual communities are a significant source of information for consumers and businesses. This research examines how users value virtual communities and how virtual communities differ in their value propositions. In particular, this research examines the nature of trade-offs between information quantity and quality, and explores the sources of positive and negative externalities in virtual communities. The analyses are based on more than 500,000 postings collected from three large virtual investing-related communities (VICs) for 14 different stocks over a period of four years. The findings suggest that the VICs engage in differentiated competition as they face trade-offs between information quantity and quality. This differentiation among VICs, in turn, attracts users with different characteristics. We find both positive and negative externalities at work in virtual communities. We propose and validate that the key factor that determines the direction of network externalities is posting quality. The contributions of the study include the extension of our understanding of the virtual community evaluation by users, the exposition of competition between virtual communities, the role of network externalities in virtual communities, and the development of an algorithmic methodology to evaluate the quality (noise or signal) of textual data. The insights from the study provide useful guidance for design and management of VICs.}, + file = {/home/nathante/Zotero/storage/ACBCQ93N/Gu et al. - 2007 - Competition Among Virtual Communities and User Val.pdf;/home/nathante/Zotero/storage/KJZXB8P6/Gu et al. - 2007 - Competition Among Virtual Communities and User Val.pdf} +} + +@inproceedings{guha_birds_2015, + title = {Do {{Birds}} of a {{Feather Watch Each Other}}? Homophily and {{Social Surveillance}} in {{Location Based Social Networks}}}, + shorttitle = {Do {{Birds}} of a {{Feather Watch Each Other}}?}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Guha, Shion and Wicker, Stephen B.}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {1010--1020}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Location sharing applications (LSA) have proliferated in recent years. Current research principally focuses on egocentric privacy issues and design but has historically not explored the impact of surveillance on location sharing behavior. In this paper, we examine homophily in friendship and surveillance networks for 65 foursquare users. Our results indicate that location surveillance networks are strongly homophilous along the lines of race and gender while friendship networks are weakly homophilous on income. Qualitatively, an analysis of comments and interviews provides support for a discourse around location surveillance, which is mainly social, collaborative, positive and participatory. We relate these findings with prior literature on surveillance, self-presentation and homophily and situate this study in existing HCI/CSCW scholarship.}, + isbn = {978-1-4503-2922-4}, + keywords = {foursquare,homophily,privacy,surveillance,visibility,vision}, + file = {/home/nathante/Zotero/storage/4G3RN2C5/Guha and Wicker - 2015 - Do Birds of a Feather Watch Each Other Homophily .pdf} +} + +@inproceedings{hale_cross-language_2015, + title = {Cross-Language {{Wikipedia Editing}} of {{Okinawa}}, {{Japan}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Hale, Scott A.}, + date = {2015}, + pages = {183--192}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {This article analyzes users who edit Wikipedia articles about Okinawa, Japan, in English and Japanese. It finds these users are among the most active and dedicated users in their primary languages, where they make many large, high-quality edits. However, when these users edit in their non-primary languages, they tend to make edits of a different type that are overall smaller in size and more often restricted to the narrow set of articles that exist in both languages. Design changes to motivate wider contributions from users in their non-primary languages and to encourage multilingual users to transfer more information across language divides are presented.}, + isbn = {978-1-4503-3145-6}, + file = {/home/nathante/Zotero/storage/WL3BSR4A/Hale - 2015 - Cross-language Wikipedia Editing of Okinawa, Japan.pdf} +} + +@article{halfaker_rise_2013, + title = {The Rise and Decline of an Open Collaboration System: How {{Wikipedia}}'s Reaction to Popularity Is Causing Its Decline}, + shorttitle = {The {{Rise}} and {{Decline}} of an {{Open Collaboration System}}}, + author = {Halfaker, Aaron and Geiger, R. Stuart and Morgan, Jonathan T. and Riedl, John}, + date = {2013-05-01}, + journaltitle = {American Behavioral Scientist}, + shortjournal = {American Behavioral Scientist}, + volume = {57}, + number = {5}, + pages = {664--688}, + issn = {0002-7642}, + abstract = {Open collaboration systems, such as Wikipedia, need to maintain a pool of volunteer contributors to remain relevant. Wikipedia was created through a tremendous number of contributions by millions of contributors. However, recent research has shown that the number of active contributors in Wikipedia has been declining steadily for years and suggests that a sharp decline in the retention of newcomers is the cause. This article presents data that show how several changes the Wikipedia community made to manage quality and consistency in the face of a massive growth in participation have ironically crippled the very growth they were designed to manage. Specifically, the restrictiveness of the encyclopedia’s primary quality control mechanism and the algorithmic tools used to reject contributions are implicated as key causes of decreased newcomer retention. Furthermore, the community’s formal mechanisms for norm articulation are shown to have calcified against changes—especially changes proposed by newer editors.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/7B7AFK58/Halfaker et al. - 2013 - The rise and decline of an open collaboration syst.pdf;/home/nathante/Zotero/storage/Y9676KNV/The Rise and Decline of an Open Collaboration Syst.pdf} +} + +@online{hamilton_loyalty_2017, + title = {Loyalty in Online Communities}, + author = {Hamilton, William L. and Zhang, Justine and Danescu-Niculescu-Mizil, Cristian and Jurafsky, Dan and Leskovec, Jure}, + date = {2017-05-24}, + eprint = {1703.03386}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Loyalty is an essential component of multi-community engagement. When users have the choice to engage with a variety of different communities, they often become loyal to just one, focusing on that community at the expense of others. However, it is unclear how loyalty is manifested in user behavior, or whether loyalty is encouraged by certain community characteristics. In this paper we operationalize loyalty as a user-community relation: users loyal to a community consistently prefer it over all others; loyal communities retain their loyal users over time. By exploring this relation using a large dataset of discussion communities from Reddit, we reveal that loyalty is manifested in remarkably consistent behaviors across a wide spectrum of communities. Loyal users employ language that signals collective identity and engage with more esoteric, less popular content, indicating they may play a curational role in surfacing new material. Loyal communities have denser user-user interaction networks and lower rates of triadic closure, suggesting that community-level loyalty is associated with more cohesive interactions and less fragmentation into subgroups. We exploit these general patterns to predict future rates of loyalty. Our results show that a user's propensity to become loyal is apparent from their first interactions with a community, suggesting that some users are intrinsically loyal from the very beginning.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computation and Language,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/HQQUMCBD/Hamilton et al_2017_Loyalty in Online Communities.pdf;/home/nathante/Zotero/storage/5RE84JF9/1703.html} +} + +@book{hannan_concepts_2019, + title = {Concepts and Categories: Foundations for Sociological and Cultural Analysis}, + shorttitle = {Concepts and Categories}, + author = {Hannan, Michael T}, + date = {2019}, + abstract = {Why do people like books, music, or movies that adhere consistently to genre conventions? Why is it hard for politicians to take positions that cross ideological boundaries? Why do we have dramatically different expectations of companies that are categorized as social media platforms as opposed to news media sites? The answers to these questions require an understanding of how people use basic concepts in their everyday lives to give meaning to objects, other people, and social situations and actions. In this book, a team of sociologists presents a groundbreaking model of concepts and categorization that can guide sociological and cultural analysis of a wide variety of social situations. Drawing on research in various fields, including cognitive science, computational linguistics, and psychology, the book develops an innovative view of concepts. It argues that concepts have meanings that are probabilistic rather than sharp, occupying fuzzy, overlapping positions in a "conceptual space." Measurements of distances in this space reveal our mental representations of categories. Using this model, important yet commonplace phenomena such as our routine buying decisions can be quantified in terms of the cognitive distance between concepts. Concepts and Categories provides an essential set of formal theoretical tools and illustrates their application using an eclectic set of methodologies, from micro-level controlled experiments to macro-level language processing. It illuminates how explicit attention to concepts and categories can give us a new understanding of everyday situations and interactions.}, + isbn = {978-0-231-19272-9}, + langid = {english}, + annotation = {OCLC: 1083703599} +} + +@book{hannan_logics_2007, + ids = {hannan_logics_2012}, + title = {Logics of Organization Theory: Audiences, Codes, and Ecologies}, + shorttitle = {Logics of Organization Theory}, + author = {Hannan, Michael T and Pólos, László and Carroll, Glenn}, + date = {2007}, + publisher = {{Princeton University Press}}, + location = {{Princeton, N.J.}}, + abstract = {"Building theories of organizations is challenging: theories are partial and "folk" categories are fuzzy. The commonly used tools--first-order logic and its foundational set theory--are ill-suited for handling these complications. Here, three leading authorities rethink organization theory. Logics of Organization Theory sets forth and applies a new language for theory building based on a nonmonotonic logic and fuzzy set theory. In doing so, not only does it mark a major advance in organizational theory, but it also draws lessons for theory building elsewhere in the social sciences. Organizational research typically analyzes organizations in categories such as "bank," "hospital," or "university." These categories have been treated as crisp analytical constructs designed by researchers. But sociologists increasingly view categories as constructed by audiences. This book builds on cognitive psychology and anthropology to develop an audience-based theory of organizational categories. It applies this framework and the new language of theory building to organizational ecology. It reconstructs and integrates four central theory fragments, and in so doing reveals unexpected connections and new insights."--Publisher description.}, + isbn = {978-1-4008-4301-5}, + langid = {english}, + annotation = {OCLC: 646517503} +} + +@book{hannan_organizational_1989, + title = {Organizational Ecology}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1989}, + edition = {1}, + publisher = {{Harvard University Press}}, + location = {{Cambridge, MA}} +} + +@article{hannan_organizational_2003, + title = {The {{Organizational Niche}}}, + author = {Hannan, Michael T. and Carroll, Glenn R. and Pólos, László}, + date = {2003}, + journaltitle = {Sociological Theory}, + volume = {21}, + number = {4}, + pages = {309--340}, + issn = {1467-9558}, + abstract = {Although the concept of niche has been extremely useful in sociological theory and research, some aspects of the concept have not been clearly developed. This article advances a theoretical reconstruction of the concept of niche, with special application to organizations. The proposed formal model unifies several active lines of sociological theory. It also extends the notion of the niche from the realm of behaviors to apply to the rules coding social identities and organizational forms. The reconstruction gives deeper insight into the niche of an organizational population as well as individual organizations. Finally, the model analyzes the (thus far) tacit assumption that niches are convex, examines the implications of convexity for commonly used measures of niche width, and provides a general sociological argument for the predominance of convex niches.}, + langid = {english}, + annotation = {\_eprint: https://onlinelibrary.wiley.com/doi/pdf/10.1046/j.1467-9558.2003.00192.x}, + file = {/home/nathante/Zotero/storage/QVMN4EMC/Hannan et al_2003_The Organizational Niche.pdf;/home/nathante/Zotero/storage/QPAD2LBR/j.1467-9558.2003.00192.html} +} + +@article{hannan_population_1977, + title = {The Population Ecology of Organizations}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1977}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {82}, + number = {5}, + eprint = {2777807}, + eprinttype = {jstor}, + pages = {929--964}, + issn = {0002-9602}, + abstract = {A population ecology perspective on organization-environment relations is proposed as an alternative to the dominant adaptation perspective. The strength of inertial pressures on organizational structure suggests the application of models that depend on competition and selection in populations of organizations. Several such models as well as issues that arise in attempts to apply them to the organization-environment problem are discussed.}, + file = {/home/nathante/Zotero/storage/TVD48Q77/Hannan and Freeman - 1977 - The Population Ecology of Organizations.pdf} +} + +@article{hannan_structural_1984, + title = {Structural Inertia and Organizational Change}, + author = {Hannan, Michael T. and Freeman, John}, + date = {1984-04}, + journaltitle = {American Sociological Review}, + volume = {49}, + number = {2}, + eprint = {2095567}, + eprinttype = {jstor}, + pages = {149}, + issn = {00031224}, + file = {/home/nathante/Zotero/storage/DRMDTJYH/Hannan and Freeman - 1984 - Structural inertia and organizational change.pdf} +} + +@article{hardin_competitive_1960, + title = {The {{Competitive Exclusion Principle}}}, + author = {Hardin, Garrett}, + date = {1960}, + journaltitle = {Science}, + volume = {131}, + number = {3409}, + eprint = {1705965}, + eprinttype = {jstor}, + pages = {1292--1297}, + publisher = {{American Association for the Advancement of Science}}, + issn = {0036-8075} +} + +@article{hargittai_whose_2007, + title = {Whose {{Space}}? Differences among {{Users}} and {{Non}}-{{Users}} of {{Social Network Sites}}}, + shorttitle = {Whose {{Space}}?}, + author = {Hargittai, Eszter}, + date = {2007-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {13}, + number = {1}, + pages = {276--297}, + publisher = {{Oxford Academic}}, + abstract = {Are there systematic differences between people who use social network sites and those who stay away, despite a familiarity with them? Based on data from a survey administered to a diverse group of young adults, this article looks at the predictors of SNS usage, with particular focus on Facebook, MySpace, Xanga, and Friendster. Findings suggest that use of such sites is not randomly distributed across a group of highly wired users. A person’s gender, race and ethnicity, and parental educational background are all associated with use, but in most cases only when the aggregate concept of social network sites is disaggregated by service. Additionally, people with more experience and autonomy of use are more likely to be users of such sites. Unequal participation based on user background suggests that differential adoption of such services may be contributing to digital inequality.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/WVFZWUGF/Hargittai - 2007 - Whose Space Differences among Users and Non-Users.pdf;/home/nathante/Zotero/storage/C5TFC2YY/4583068.html} +} + +@article{haveman_follow_1993, + title = {Follow the {{Leader}}: Mimetic {{Isomorphism}} and {{Entry Into New Markets}}}, + shorttitle = {Follow the {{Leader}}}, + author = {Haveman, Heather A.}, + date = {1993}, + journaltitle = {Administrative Science Quarterly}, + volume = {38}, + number = {4}, + eprint = {2393338}, + eprinttype = {jstor}, + pages = {593--627}, + publisher = {{[Sage Publications, Inc., Johnson Graduate School of Management, Cornell University]}}, + issn = {0001-8392}, + abstract = {This paper combines organizational ecology and neoinstitutional theory to explain the process of diversification, specifically, how the structure of markets affects rates of market entry. I extend the density-dependence model of competition and legitimation, which has been used to study organizational founding and failure, to the process of organizational change through entry into new markets. I argue that the number of organizations operating in a particular market will have an inverted-U-shaped relationship with the rate of entry into that market. I also examine propositions, drawn from neoinstitutional theory, that organizations will follow similar and successful organizations into new markets. I assess the link between entry into new markets and (1) the number of organizations operating in those markets similar to a potential entrant and (2) the number of successful organizations in those markets. I also explore whether these two mimetic processes act in concert by examining whether successful potential entrants to a market are influenced by the presence of other successful organizations. I test these hypotheses on a population of savings and loan associations. I find that these firms imitate large and profitable organizations, but I find only limited evidence of imitation of similarly sized organizations, as large organizations copy the actions of other large organizations.}, + file = {/home/nathante/Zotero/storage/UDA8NLIN/Haveman_1993_Follow the Leader.pdf} +} + +@book{hawley_human_1986, + title = {Human Ecology: A Theoretical Essay}, + shorttitle = {Human Ecology}, + author = {Hawley, Amos Henry}, + date = {1986}, + publisher = {{University of Chicago Press}}, + location = {{Chicago; London}}, + isbn = {978-0-226-31983-4 978-0-226-31984-1}, + langid = {english}, + annotation = {OCLC: 993363851} +} + +@unpublished{healy_ecology_2003, + type = {Working Paper}, + title = {The Ecology of Open-Source Software Development}, + author = {Healy, Kieran and Schussman, Alan}, + date = {2003}, + abstract = {Open Source Software (OSS) is an innovative method of developing software applications that has been very successful over the past eight to ten years. A number of theories have emerged to explain its success, mainly from economics and law. We analyze a very large sample of OSS projects and find striking patterns in the overall structure of the development community. The distribution of projects on a range of activity measures is spectacularly skewed, with only a relatively tiny number of projects showing evidence of the strong collaborative activity which is supposed to characterize OSS. Our findings are consistent with prior, smaller-scale empirical research. We argue that these findings pose problems for the dominant accounts of OSS. We suggest that the gulf between active and inactive projects may be explained by social-structural features of the community which have received little attention in the existing literature. We suggest some hypotheses that might better predict the observed ecology of projects.}, + howpublished = {Working Paper}, + keywords = {Do Not Cite,FOSS}, + file = {/home/nathante/Zotero/storage/6VRGKZI6/Healy and Schussman - 2003 - The ecology of open-source software development.pdf} +} + +@online{heaps_enforcing_2020, + title = {Enforcing Stationarity through the Prior in Vector Autoregressions}, + author = {Heaps, Sarah E.}, + date = {2020-04-20}, + eprint = {2004.09455}, + eprinttype = {arxiv}, + primaryclass = {stat}, + abstract = {Stationarity is a very common assumption in time series analysis. A vector autoregressive (VAR) process is stationary if and only if the roots of its characteristic equation lie outside the unit circle, constraining the autoregressive coefficient matrices to lie in the stationary region. However, the stationary region has a highly complex geometry which impedes specification of a prior distribution. In this work, an unconstrained reparameterisation of a stationary VAR model is presented. The new parameters are based on partial autocorrelation matrices, which are interpretable, and can be transformed bijectively to the space of unconstrained square matrices. This transformation preserves various structural forms of the partial autocorrelation matrices and readily facilitates specification of a prior. Properties of this prior are described along with an important special case which is exchangeable with respect to the order of the elements in the observation vector. Posterior inference and computation are described and implemented using Hamiltonian Monte Carlo via Stan. The prior and inferential procedures are illustrated with an application to a macroeconomic time series which highlights the benefits of enforcing stationarity.}, + archiveprefix = {arXiv}, + version = {1}, + keywords = {_tablet,VAR}, + file = {/home/nathante/Zotero/storage/VNW4X7ZM/Heaps_2020_Enforcing stationarity through the prior in vector autoregressions.pdf;/home/nathante/Zotero/storage/AKKHZYXS/2004.html} +} + +@article{helland_diaspora_2007, + title = {Diaspora on the {{Electronic Frontier}}: Developing {{Virtual Connections}} with {{Sacred Homelands}}}, + shorttitle = {Diaspora on the {{Electronic Frontier}}}, + author = {Helland, Christopher}, + date = {2007-04-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {12}, + number = {3}, + pages = {956--976}, + publisher = {{Oxford Academic}}, + abstract = {This study demonstrates how diaspora religious traditions utilized the Internet to develop significant network connections among each other and also to their place of origins. By examining the early Usenet system, I argue that the religious beliefs and practices of diaspora religious traditions were a motivating factor for developing Usenet groups where geographically dispersed individuals could connect with each other in safe, supportive, and religiously tolerant environments. This article explores the new forms of religious practices that began to occur on these sites, focusing on the manner in which Internet technology and the World Wide Web were utilized for activities such as long-distance ritual practice, cyber pilgrimage, and other religiously-motivated undertakings. Through these new online religious activities, diaspora groups have been able to develop significant connections not only among people, but also between people and the sacred homeland itself.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/QAMFAZAW/Helland - 2007 - Diaspora on the Electronic Frontier Developing Vi.pdf;/home/nathante/Zotero/storage/WNQX9GUY/4583017.html} +} + +@inproceedings{hemetsberger_sharing_2004, + title = {Sharing and Creating Knowledge in Open-Source Communities: The Case of {{KDE}}}, + booktitle = {Paper for {{Fifth European Conference}} on {{Organizational Knowledge}}, {{Learning}}, and {{Capabilities}}, {{Innsbruck}}}, + author = {Hemetsberger, Andrea and Reinhardt, Christian}, + date = {2004} +} + +@article{hertling_dbkwik:_nodate-1, + title = {{{DBkWik}}: A {{Consolidated Knowledge Graph}} from {{Thousands}} of {{Wikis}}}, + author = {Hertling, Sven and Paulheim, Heiko}, + pages = {8}, + abstract = {Popular knowledge graphs such as DBpedia and YAGO are built from Wikipedia, and therefore similar in coverage. In contrast, Wikifarms like Fandom contain Wikis for specific topics, which are often complementary to the information contained in Wikipedia, and thus DBpedia and YAGO. Extracting these Wikis with the DBpedia extraction framework is possible, but results in many isolated knowledge graphs. In this paper, we show how to create one consolidated knowledge graph, called DBkWik, from thousands of Wikis. We perform entity resolution and schema matching, and show that the resulting large-scale knowledge graph is complementary to DBpedia.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/JL9J56EN/Hertling and Paulheim - DBkWik A Consolidated Knowledge Graph from Thousa.pdf} +} + +@inproceedings{hessel_science_2016, + ids = {hessel_science_2016-1}, + title = {Science, Askscience, and Badscience: On the Coexistence of Highly Related Communities}, + shorttitle = {Science, Askscience, and Badscience}, + booktitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Hessel, Jack and Tan, Chenhao and Lee, Lillian}, + date = {2016-03-31}, + eprint = {1612.07487}, + eprinttype = {arxiv}, + pages = {11}, + abstract = {When large social-media platforms allow users to easily formand self-organize into interest groups, highly related communities can arise. For example, the Reddit site hosts not just a group called food, but also HealthyFood, foodhacks,foodporn, and cooking, among others. Are these highly related communities created for similar classes of reasons (e.g.,to focus on a subtopic, to create a place for allegedly more “high-minded” discourse, etc.)? How do users allocate attention between such close alternatives when they are available or emerge over time? Are there different types of relations between close alternatives such as sharing many users vs. a new community drawing away members of an older one vs. a splinter group failing to cohere into a viable separate community? We investigate the interactions between highly related communities using data from reddit.com consisting of 975M posts and comments spanning an 8-year period. We identify a set of typical affixes that users adopt to create highly related communities and build a taxonomy of affixes. One interesting finding regarding users’ behavior is: after a newer community is created, for several types of highly-related community pairs, users that engage in a newer community tend to be more active in their original community than users that do not explore, even when controlling for previous level of engagement.}, + archiveprefix = {arXiv}, + eventtitle = {Tenth {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + langid = {english}, + keywords = {Computer Science - Social and Information Networks,Physics - Physics and Society}, + file = {/home/nathante/Zotero/storage/2W6YBUBD/Hessel et al_2016_Science, AskScience, and BadScience.pdf;/home/nathante/Zotero/storage/4FLLXNV9/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/WS6TW26Q/Hessel et al. - 2016 - Science, AskScience, and BadScience On the Coexis.pdf;/home/nathante/Zotero/storage/3NHVFA3U/1612.html;/home/nathante/Zotero/storage/DXX4CJ7T/14739.html;/home/nathante/Zotero/storage/YSX2WN2J/13106.html} +} + +@book{heyes_cognitive_2018, + title = {Cognitive Gadgets The Cultural Evolution of Thinking}, + author = {Heyes, Cecilia}, + date = {2018}, + publisher = {{Harvard University Press}}, + isbn = {978-0-674-98515-5}, + langid = {No Linguistic Content}, + annotation = {OCLC: 8162788163} +} + +@inproceedings{hill_almost_2011, + title = {Almost {{Wikipedia}}: What {{Eight Collaborative Encyclopedia Projects Reveal About Mechanisms}} of {{Collective Action}}}, + shorttitle = {Almost {{Wikipedia}}}, + author = {Hill, Benjamin Mako}, + date = {2011}, + location = {{Harvard University, Cambridge, MA}}, + eventtitle = {Berkman {{Center}} for {{Internet}} and {{Society Luncheon Series Presentation}}} +} + +@incollection{hill_almost_2013, + title = {Almost {{Wikipedia}}: What Eight Early Online Collaborative Encyclopedia Projects Reveal about the Mechanisms of Collective Action.}, + booktitle = {Essays on Volunteer Mobilization in Peer Production}, + author = {Hill, Benjamin Mako}, + date = {2013}, + publisher = {{Massachusetts Institute of Technology}}, + location = {{Cambridge, Massachusetts}}, + annotation = {PhD Dissertation} +} + +@inproceedings{hill_consider_2014, + title = {Consider the Redirect: A Missing Dimension of {{Wikipedia}} Research}, + shorttitle = {Consider the {{Redirect}}}, + booktitle = {Proceedings of {{The International Symposium}} on {{Open Collaboration}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + date = {2014}, + series = {{{OpenSym}} '14}, + pages = {28:1--28:4}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Redirects are special pages in wikis that silently transport visitors to other pages. Although redirects make up a majority of all article pages in English Wikipedia, they have attracted very little attention and are rarely taken into account by researchers. This note describes redirects and illustrates why they play an important role in shaping activity in Wikipedia. We also present a novel longitudinal dataset of redirects for English Wikipedia and the software used to produce it. Using this dataset, we revisit several important published findings about Wikipedia to show that accounting for redirects can have important effects on research.}, + isbn = {978-1-4503-3016-9}, + file = {/home/nathante/Zotero/storage/QBK2TIWQ/Hill and Shaw - 2014 - Consider the Redirect A Missing Dimension of Wiki.pdf} +} + +@book{hill_debian_2005, + title = {Debian {{GNU}}/{{Linux}} 3.1 {{Bible}}}, + author = {Hill, Benjamin Mako}, + date = {2005}, + publisher = {{Wiley Pub}}, + location = {{Indianapolis, Ind}}, + editora = {Harris, David B}, + editoratype = {collaborator}, + keywords = {FOSS} +} + +@software{hill_mediawiki_2018, + title = {Mediawiki Dump Tools}, + author = {Hill, Benjamin Mako and TeBlunthuis, Nathan}, + date = {2018-09-03}, + version = {a4e60a9f} +} + +@book{hill_official_2008, + title = {Official {{Ubuntu}} Book}, + author = {Hill, Benjamin Mako and Burger, Corey and Jesse, Jonathan and Bacon, Jono}, + date = {2008}, + edition = {3}, + publisher = {{Prentice Hall}}, + isbn = {0-13-713668-4}, + keywords = {FOSS} +} + +@inproceedings{hill_page_2015, + title = {Page Protection: Another Missing Dimension of {{Wikipedia}} Research}, + shorttitle = {Page {{Protection}}}, + booktitle = {Proceedings of the 11th {{International Symposium}} on {{Open Collaboration}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + date = {2015}, + series = {{{OpenSym}} '15}, + pages = {15:1--15:4}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Page protection is a feature of wiki software that allows administrators to restrict contributions to particular pages. For example, pages are frequently protected so that they can only be edited by administrators. Page protection affects tens of thousands of pages in English Wikipedia and renders many of Wikipedia's most visible pages uneditable by the vast majority of visitors. That said, page protection has attracted very little attention and is rarely taken into account by researchers. This note describes page protection and illustrates why it plays an important role in shaping user behavior on wikis. We also present a new longitudinal dataset of page protection events for English Wikipedia, the software used to produce it, and results from tests that support both the validity of the dataset and the impact of page protection on patterns of editing.}, + isbn = {978-1-4503-3666-6}, + keywords = {page protection,wikipedia}, + file = {/home/nathante/Zotero/storage/VH9BNJVA/Hill and Shaw - 2015 - Page Protection Another Missing Dimension of Wiki.pdf} +} + +@incollection{hill_studying_2019, + title = {Studying Populations of Online Communities}, + booktitle = {The {{Oxford Handbook}} of {{Networked Communication}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + editor = {Foucault Welles, Brooke and González-Bailón, Sandra}, + date = {2019-09}, + pages = {173--193}, + publisher = {{Oxford University Press}}, + location = {{Oxford, UK}}, + abstract = {While the large majority of published research on online communities consists of analyses conducted entirely within individual communities, this chapter argues for a population-based approach, in which researchers study groups of similar communities. For example, although there have been thousands of papers published about Wikipedia, a population-based approach might compare all wikis on a particular topic. Using examples from published empirical studies, the chapter describes five key benefits of this approach. First, it argues that population-level research increases the generalizability of findings. Next, it describes four processes and dynamics that are only possible to study using populations: community-level variables, information diffusion processes across communities, ecological dynamics, and multilevel community processes. The chapter concludes with a discussion of a series of limitations and challenges.}, + isbn = {978-0-19-046051-8}, + langid = {english}, + file = {/home/nathante/Zotero/storage/39ZWGGYN/Hill and Shaw - 2019 - Studying Populations of Online Communities.pdf;/home/nathante/Zotero/storage/BTB3AQGV/oxfordhb-9780190460518-e-8.html} +} + +@incollection{hill_whither_2018, + title = {Whither Peer Production}, + booktitle = {Decentralizing the {{Commons}}}, + author = {Hill, Benjamin Mako}, + editor = {Hassan, Samer and De Felippi, Primavera}, + date = {2018}, + publisher = {{Institute for Network Culture}}, + location = {{Amsterdam, The Netherlands}}, + annotation = {Forthcoming} +} + +@article{hill_wikipedia_2013, + ids = {hill_wikipedia_2013-1}, + title = {The {{Wikipedia}} Gender Gap Revisited: Characterizing Survey Response Bias with Propensity Score Estimation}, + shorttitle = {The {{Wikipedia Gender Gap Revisited}}}, + author = {Hill, Benjamin Mako and Shaw, Aaron}, + date = {2013-06-26}, + journaltitle = {PLoS ONE}, + shortjournal = {PLoS ONE}, + volume = {8}, + number = {6}, + pages = {e65782}, + publisher = {{Public Library of Science}}, + abstract = {Opt-in surveys are the most widespread method used to study participation in online communities, but produce biased results in the absence of adjustments for non-response. A 2008 survey conducted by the Wikimedia Foundation and United Nations University at Maastricht is the source of a frequently cited statistic that less than 13\% of Wikipedia contributors are female. However, the same study suggested that only 39.9\% of Wikipedia readers in the US were female – a finding contradicted by a representative survey of American adults by the Pew Research Center conducted less than two months later. Combining these two datasets through an application and extension of a propensity score estimation technique used to model survey non-response bias, we construct revised estimates, contingent on explicit assumptions, for several of the Wikimedia Foundation and United Nations University at Maastricht claims about Wikipedia editors. We estimate that the proportion of female US adult editors was 27.5\% higher than the original study reported (22.7\%, versus 17.8\%), and that the total proportion of female editors was 26.8\% higher (16.1\%, versus 12.7\%).}, + keywords = {Internet,Language,Online encyclopedias,Schools,Survey research,Surveys,United States,Universities}, + file = {/home/nathante/Zotero/storage/WWED7HE2/Hill and Shaw - 2013 - The Wikipedia Gender Gap Revisited Characterizing.pdf;/home/nathante/Zotero/storage/BGLYPWPW/article.html} +} + +@inproceedings{hillman_alksjdflksfd_2014, + title = {'alksjdf;{{Lksfd}}': Tumblr and the Fandom User Experience}, + shorttitle = {'alksjdf;{{Lksfd}}'}, + booktitle = {Proceedings of the 2014 Conference on {{Designing}} Interactive Systems}, + author = {Hillman, Serena and Procyk, Jason and Neustaedter, Carman}, + date = {2014-06-21}, + series = {{{DIS}} '14}, + pages = {775--784}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {A growing trend is the participation in online fandom communities through the support of the blogging platform Tumblr. While past research has investigated backchannels-chatter related to live entertainment on micro-blogging sites such as Twitter-there is a lack of research on the behaviours and motivations of Tumblr users. In our study, we investigate why fandom users chose Tumblr over other social networking sites, their motivations behind participating in fandoms, and how they interact within the Tumblr community. Our findings show that users face many user interface challenges when participating in Tumblr fandoms, especially initially; yet, despite this, Tumblr fandom communities thrive with a common sense of social purpose and exclusivity where users feel they can present a more authentic reflection of themselves to those sharing similar experiences and interests. We describe how this suggests design directions for social networking and blogging sites in order to promote communities of users.}, + isbn = {978-1-4503-2902-6}, + keywords = {backchannels,entertainment,fandoms,fanfiction,micro-blogging,social networking,television,Tumblr}, + file = {/home/nathante/Zotero/storage/HZCLCKCG/Hillman et al. - 2014 - 'alksjdf\;Lksfd' tumblr and the fandom user experi.pdf} +} + +@article{himelboim_valence-based_2016, + title = {Valence-Based Homophily on {{Twitter}}: Network {{Analysis}} of {{Emotions}} and {{Political Talk}} in the 2012 {{Presidential Election}}}, + shorttitle = {Valence-Based Homophily on {{Twitter}}}, + author = {Himelboim, Itai and Sweetser, Kaye D and Tinkham, Spencer F and Cameron, Kristen and Danelo, Matthew and West, Kate}, + date = {2016-08-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {18}, + number = {7}, + pages = {1382--1400}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study integrates network and content analyses to examine valence-based homophily on Twitter or the tendency for individuals to interact with those expressing similar valence. During the 2012 federal election cycle, we collected Twitter conversations about 10 controversial political topics and mapped their network ties. Using network analysis, we discovered clusters—subgroups of highly self-connected users—and coded messages in each cluster for their expressed positive-to-negative emotional valence, level of support or opposition, and political leaning. We found that valence-based homophily successfully explained the selection of user interactions on Twitter, in terms of expressed emotional valence in their tweets or support versus criticism to an issue. It also finds conservative voices to be associated with negatively valenced clusters and vice versa. This study expands the theory of homophily beyond its traditional conceptualization and provides a new understanding of political-issue interactions in a social media context.}, + langid = {english}, + keywords = {2012 Election,emotional valence,homophily,political talk,social networks,Twitter}, + file = {/home/nathante/Zotero/storage/QUK4ID26/Himelboim et al. - 2016 - Valence-based homophily on Twitter Network Analys.pdf} +} + +@article{hirsch_sacrifice_1990-1, + title = {Sacrifice for the {{Cause}}: Group {{Processes}}, {{Recruitment}}, and {{Commitment}} in a {{Student Social Movement}}}, + shorttitle = {Sacrifice for the {{Cause}}}, + author = {Hirsch, Eric L.}, + date = {1990}, + journaltitle = {American Sociological Review}, + volume = {55}, + number = {2}, + eprint = {2095630}, + eprinttype = {jstor}, + pages = {243--254}, + issn = {0003-1224}, + abstract = {[Recruitment and commitment in protest movements are best explained by analyzing group-level political processes such as consciousness-raising, collective empowerment, polarization, and collective decision-making. Such processes increase protesters' political solidarity--their commitment to the cause and their belief in the non-institutional tactics that further that cause. Other frameworks, such as the rational choice and collective behavior approaches, are less adequate in accounting for recruitment and commitment. Rational choice perspectives neglect group processes by suggesting that decisions about whether to join or stay at a protest are based largely on isolated individual cost/benefit calculations. The collective behavior view that protests are spawned by confused and insecure individuals in situations of social unrest cannot be reconciled with the fact that most protests originate among close-knit groups of politically committed activists using carefully planned strategies and tactics. These conclusions are based on a study of the 1985 Columbia University divestment protest.]}, + file = {/home/nathante/Zotero/storage/3IITPEFE/Hirsch - 1990 - Sacrifice for the Cause Group Processes, Recruitm.pdf} +} + +@book{hirschman_exit_1970, + title = {Exit, {{Voice}}, and {{Loyalty}}: Responses to {{Decline}} in {{Firms}}, {{Organizations}}, and {{States}}}, + shorttitle = {Exit, {{Voice}}, and {{Loyalty}}}, + author = {Hirschman, Albert O.}, + date = {1970}, + publisher = {{Harvard University Press}}, + abstract = {An innovator in contemporary thought on economic and political development looks here at decline rather than growth. Albert O. Hirschman makes a basic distinction between alternative ways of reacting to deterioration in business firms and, in general, to dissatisfaction with organizations: one, “exit,” is for the member to quit the organization or for the customer to switch to the competing product, and the other, “voice,” is for members or customers to agitate and exert influence for change “from within.”The efficiency of the competitive mechanism, with its total reliance on exit, is questioned for certain important situations. As exit often undercuts voice while being unable to counteract decline, loyalty is seen in the function of retarding exit and of permitting voice to play its proper role.The interplay of the three concepts turns out to illuminate a wide range of economic, social, and political phenomena. As the author states in the preface, “having found my own unifying way of looking at issues as diverse as competition and the two-party system, divorce and the American character, black power and the failure of 'unhappy' top officials to resign over Vietnam, I decided to let myself go a little.”}, + isbn = {978-0-674-27660-4}, + langid = {english}, + pagetotal = {180}, + file = {/home/nathante/Zotero/storage/87VQQN7Z/Hirschman - 1970 - Exit, Voice, and Loyalty Responses to Decline in .pdf} +} + +@article{hofman_prediction_2017, + title = {Prediction and Explanation in Social Systems}, + author = {Hofman, Jake M. and Sharma, Amit and Watts, Duncan J.}, + date = {2017}, + journaltitle = {Science}, + volume = {355}, + number = {6324}, + eprint = {28154051}, + eprinttype = {pmid}, + pages = {486--488}, + issn = {0036-8075, 1095-9203}, + abstract = {Historically, social scientists have sought out explanations of human and social phenomena that provide interpretable causal mechanisms, while often ignoring their predictive accuracy. We argue that the increasingly computational nature of social science is beginning to reverse this traditional bias against prediction; however, it has also highlighted three important issues that require resolution. First, current practices for evaluating predictions must be better standardized. Second, theoretical limits to predictive accuracy in complex social systems must be better characterized, thereby setting expectations for what can be predicted or explained. Third, predictive accuracy and interpretability must be recognized as complements, not substitutes, when evaluating explanations. Resolving these three issues will lead to better, more replicable, and more useful social science.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/ISWU5DEQ/Hofman et al. - 2017 - Prediction and explanation in social systems.pdf;/home/nathante/Zotero/storage/TSUJV7Y3/486.html} +} + +@incollection{hollingshead_fostering_2002, + ids = {hollingshead_fostering_2002-1}, + title = {Fostering Intranet Knowledge Sharing: An Integration of Transactive Memory and Public Goods Approaches}, + shorttitle = {Fostering Intranet Knowledge Sharing}, + booktitle = {Distributed Work}, + author = {Hollingshead, Andrea B. and Fulk, Janet and Monge, Peter}, + date = {2002}, + pages = {335--355}, + publisher = {{Boston Review}}, + location = {{Cambridge, MA, US}}, + abstract = {Intranets--company Web sites designed for internal use--are an important technological innovation in many organizations that can aid in knowledge management, expertise recognition, and communication. This chapter identifies the conditions under which members of work groups are more likely to contribute to the development of intranets and the conditions under which intranets are more likely to result in more efficient and effective knowledge acquisition and dissemination. To that end, two theories developed to examine nontechnological systems are integrated and extended to intranets and computer-based knowledge systems: the theory of transactive memory and the public goods theory of collective action. Transactive memory theory is useful for predicting how organizational members use intranets to acquire, store, and retrieve knowledge. Public goods theory is useful for predicting which, how much, and when members will contribute and retrieve knowledge on intranets. (PsycInfo Database Record (c) 2020 APA, all rights reserved)}, + isbn = {978-0-262-08305-8}, + keywords = {Electronic Communication,Expert Systems,Information Systems,Organizational Effectiveness,Theories,Work Teams,Working Conditions}, + file = {/home/nathante/Zotero/storage/D34UXRQE/Hollingshead et al. - Fostering Intranet Knowledge Sharing An Integrati.pdf;/home/nathante/Zotero/storage/3A3Y658C/2002-17012-014.html} +} + +@online{hollister_twitter_2021, + title = {Twitter Is Deleting {{Trump}}’s Attempts to Circumvent Ban}, + author = {Hollister, Sean}, + date = {2021-01-08T20:45:51-05:00}, + abstract = {He suggested he would build his own platform in now-deleted messages.}, + langid = {english}, + organization = {{The Verge}}, + file = {/home/nathante/Zotero/storage/A7QDJJ3Y/trump-tried-to-evade-his-ban-with-potus-but-those-tweets-were-instantly-deleted.html} +} + +@inproceedings{hwang_why_2021, + title = {Why Do People Participate in Small Online Communities?}, + booktitle = {Proceedings of the {{ACM}} on {{Human}}-{{Computer Interaction}}}, + author = {Hwang, Sohyeon and Foote, Jeremy D.}, + date = {2021}, + eventtitle = {{{CSCW}}}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/H4FXQNBH/Hwang and Foote - 2021 - Why do people participate in small online communit.pdf;/home/nathante/Zotero/storage/UQYVIDWS/Hwang and Foote - 2021 - Why do people participate in small online communit.pdf} +} + +@article{iriberri_life-cycle_2009, + title = {A Life-Cycle Perspective on Online Community Success}, + author = {Iriberri, Alicia and Leroy, Gondy}, + date = {2009-02}, + journaltitle = {ACM Computing Surveys}, + shortjournal = {ACM Comput. Surv.}, + volume = {41}, + number = {2}, + pages = {1--29}, + issn = {0360-0300, 1557-7341}, + abstract = {Using the information systems lifecycle as a unifying framework, we review online communities research and propose a sequence for incorporating success conditions during initiation and development to increase their chances of becoming a successful community, one in which members participate actively and develop lasting relationships. Online communities evolve following distinctive lifecycle stages and recommendations for success are more or less relevant depending on the developmental stage of the online community. In addition, the goal of the online community under study determines the components to include in the development of a successful online community. Online community builders and researchers will benefit from this review of the conditions that help online communities succeed.}, + langid = {english}, + keywords = {lifecycle,literature review,Online communities,success factors}, + file = {/home/nathante/Zotero/storage/3BRDSVKE/Iriberri and Leroy - 2009 - A life-cycle perspective on online community succe.pdf;/home/nathante/Zotero/storage/3V8BAWQT/Iriberri and Leroy - 2009 - A life-cycle perspective on online community succe.pdf} +} + +@article{ives_estimating_2003, + title = {Estimating {{Community Stability}} and {{Ecological Interactions}} from {{Time}}-{{Series Data}}}, + author = {Ives, A. R. and Dennis, B. and Cottingham, K. L. and Carpenter, S. R.}, + date = {2003-05}, + journaltitle = {Ecological Monographs}, + shortjournal = {Ecological Monographs}, + volume = {73}, + number = {2}, + pages = {301--330}, + issn = {0012-9615}, + abstract = {Natural ecological communities are continuously buffeted by a varying environment, often making it difficult to measure the stability of communities using concepts requiring the existence of an equilibrium point. Instead of an equilibrium point, the equilibrial state of communities subject to environmental stochasticity is a stationary distribution, which is characterized by means, variances, and other statistical moments. Here, we derive three properties of stochastic multispecies communities that measure different characteristics associated with community stability. These properties can be estimated from multispecies time-series data using first-order multivariate autoregressive (MAR(1)) models. We demonstrate how to estimate the parameters of MAR(1) models and obtain confidence intervals for both parameters and the measures of stability. We also address the problem of estimation when there is observation (measurement) error. To illustrate these methods, we compare the stability of the planktonic communities in three lakes in which nutrient loading and planktivorous fish abundance were experimentally manipulated. MAR(1) models and the statistical methods we present can be used to identify dynamically important interactions between species and to test hypotheses about stability and other dynamical properties of naturally varying ecological communities. Thus, they can be used to integrate theoretical and empirical studies of community dynamics.}, + langid = {english}, + keywords = {_tablet}, + file = {/home/nathante/Zotero/storage/S394LE96/Ives et al_2003_Estimating Community Stability and Ecological Interactions from Time-Series Data.pdf} +} + +@article{jarvenpaa_communication_1998, + ids = {jarvenpaa_communication_1998-1}, + title = {Communication and Trust in Global Virtual Teams}, + author = {Jarvenpaa, Sirkka L. and Leidner, Dorothy E.}, + date = {1998-06-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + volume = {3}, + number = {4}, + pages = {0--0}, + issn = {1083-6101}, + abstract = {This paper explores the challenges of creating and maintaining trust in a global virtual team whose members transcend time, space, and culture. The challenges are highlighted by integrating recent literature on work teams, computer-mediated communication groups, cross-cultural communication, and interpersonal and organizational trust. To explore these challenges empirically, we report on a series of descriptive case studies on global virtual teams whose members were separated by location and culture, were challenged by a common collaborative project, and for whom the only economically and practically viable communication medium was asynchronous and synchronous computer-mediated communication. The results suggest that global virtual teams may experience a form of ‘swift’ trust but such trust appears to be very fragile and temporal. The study raises a number of issues to be explored and debated by future research. Pragmatically, the study describes communication behaviors that might facilitate trust in global virtual teams.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/CULRNXBT/abstract.html;/home/nathante/Zotero/storage/VMME55NA/4584374.html} +} + +@article{jiang_linguistic_2018, + title = {Linguistic {{Signals}} under {{Misinformation}} and {{Fact}}-{{Checking}}: Evidence from {{User Comments}} on {{Social Media}}}, + shorttitle = {Linguistic {{Signals}} under {{Misinformation}} and {{Fact}}-{{Checking}}}, + author = {Jiang, Shan and Wilson, Christo}, + date = {2018-11-01}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {82:1--82:23}, + abstract = {Misinformation and fact-checking are opposite forces in the news environment: the former creates inaccuracies to mislead people, while the latter provides evidence to rebut the former. These news articles are often posted on social media and attract user engagement in the form of comments. In this paper, we investigate linguistic (especially emotional and topical) signals expressed in user comments in the presence of misinformation and fact-checking. We collect and analyze a dataset of 5,303 social media posts with 2,614,374 user comments from Facebook, Twitter, and YouTube, and associate these posts to fact-check articles from Snopes and PolitiFact for veracity rulings (i.e., from true to false). We find that linguistic signals in user comments vary significantly with the veracity of posts, e.g., we observe more misinformation-awareness signals and extensive emoji and swear word usage with falser posts. We further show that these signals can help to detect misinformation. In addition, we find that while there are signals indicating positive effects after fact-checking, there are also signals indicating potential "backfire" effects.}, + issue = {CSCW}, + keywords = {"fake news",fact-checking,misinformation,social computing,social media}, + file = {/home/nathante/Zotero/storage/6IZA4RDR/Jiang_Wilson_2018_Linguistic Signals under Misinformation and Fact-Checking.pdf} +} + +@article{jiang_moderation_2019, + title = {Moderation Challenges in Voice-Based Online Communities on {{Discord}}}, + author = {Jiang, Jialun "Aaron" and Kiene, Charles and Middler, Skyler and Brubaker, Jed R. and Fiesler, Casey}, + date = {2019}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + series = {{{CSCW}} '19}, + volume = {3}, + pages = {23}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/ZLSXRJ5J/Jiang et al. - 2019 - Moderation challenges in voice-based online commun.pdf} +} + +@online{jing_sameness_2019, + ids = {jing_sameness_2019-1}, + title = {Sameness {{Attracts}}, {{Novelty Disturbs}}, but {{Outliers Flourish}} in {{Fanfiction Online}}}, + author = {Jing, Elise and DeDeo, Simon and Ahn, Yong-Yeol}, + date = {2019-04-16}, + eprint = {1904.07741}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {The nature of what people enjoy is not just a central question for the creative industry, it is a driving force of cultural evolution. It is widely believed that successful cultural products balance novelty and conventionality: they provide something familiar but at least somewhat divergent from what has come before, and occupy a satisfying middle ground between "more of the same" and "too strange". We test this belief using a large dataset of over half a million works of fanfiction from the website Archive of Our Own (AO3), looking at how the recognition a work receives varies with its novelty. We quantify the novelty through a term-based language model, and a topic model, in the context of existing works within the same fandom. Contrary to the balance theory, we find that the lowest-novelty are the most popular and that popularity declines monotonically with novelty. A few exceptions can be found: extremely popular works that are among the highest novelty within the fandom. Taken together, our findings not only challenge the traditional theory of the hedonic value of novelty, they invert it: people prefer the least novel things, are repelled by the middle ground, and have an occasional enthusiasm for extreme outliers. It suggests that cultural evolution must work against inertia --- the appetite people have to continually reconsume the familiar, and may resemble a punctuated equilibrium rather than a smooth evolution.}, + archiveprefix = {arXiv}, + keywords = {cultural evolution,novelty}, + file = {/home/nathante/Zotero/storage/PU4D24ZW/Jing et al. - 2019 - Sameness Attracts, Novelty Disturbs, but Outliers .pdf;/home/nathante/Zotero/storage/SJNNBUWE/Jing et al_2019_Sameness Attracts, Novelty Disturbs, but Outliers Flourish in Fanfiction Online.pdf;/home/nathante/Zotero/storage/ENUI7ANA/1904.html;/home/nathante/Zotero/storage/HWUJ5XXT/1904.html} +} + +@article{johnson_communication_2009, + title = {Communication {{Communities}} or “{{CyberGhettos}}?”: A {{Path Analysis Model Examining Factors}} That {{Explain Selective Exposure}} to {{Blogs}}}, + shorttitle = {Communication {{Communities}} or “{{CyberGhettos}}?}, + author = {Johnson, Thomas J. and Bichard, Shannon L. and Zhang, Weiwu}, + date = {2009-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {15}, + number = {1}, + pages = {60--82}, + publisher = {{Oxford Academic}}, + abstract = {This study used an online panel of Internet users to examine the degree to which blog users practice selective exposure when seeking political information. The research employed a path analysis model to explore the extent to which exposure to offline and online discussion of political issues, and offline and online media use, as well as political variables and demographic factors, predict an individual's likelihood to engage in selective exposure to blogs. The findings indicate that respondents did practice selective exposure to blogs, predominantly those who are heavy blog users, politically active both online and offline, partisan, and highly educated.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/VXJLUSI9/Johnson et al. - 2009 - Communication Communities or “CyberGhettos” A Pa.pdf;/home/nathante/Zotero/storage/R9C73297/4064810.html} +} + +@article{johnson_emergence_2014, + title = {Emergence of Power Laws in Online Communities: The Role of Social Mechanisms and Preferential Attachment.}, + shorttitle = {Emergence of {{Power Laws}} in {{Online Communities}}}, + author = {Johnson, Steven L. and Faraj, Samer and Kudaravalli, Srinivas}, + date = {2014}, + journaltitle = {Management Information Systems Quarterly}, + volume = {38}, + number = {3}, + pages = {795--808}, + file = {/home/nathante/Zotero/storage/MPZJHWCB/Johnson et al. - 2014 - Emergence of power laws in online communities The.pdf;/home/nathante/Zotero/storage/525WPBUV/10.html} +} + +@inproceedings{jones_rscience_2019, + title = {R/Science: Challenges and {{Opportunities}} in {{Online Science Communication}}}, + shorttitle = {R/Science}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Jones, Ridley and Colusso, Lucas and Reinecke, Katharina and Hsieh, Gary}, + date = {2019-05-02}, + series = {{{CHI}} '19}, + pages = {1--14}, + publisher = {{Association for Computing Machinery}}, + location = {{Glasgow, Scotland Uk}}, + abstract = {Online discussion websites, such as Reddit's r/science forum, have the potential to foster science communication between researchers and the general public. However, little is known about who participates, what is discussed, and whether such websites are successful in achieving meaningful science discussions. To find out, we conducted a mixed-methods study analyzing 11,859 r/science posts and conducting interviews with 18 community members. Our results show that r/science facilitates rich information exchange and that the comments section provides a unique science communication document that guides engagement with scientific research. However, this community-sourced science communication comes largely from a knowledgeable public. We conclude with design suggestions for a number of critical problems that we uncovered: addressing the problem of topic newsworthiness and balancing broader participation and rigor.}, + isbn = {978-1-4503-5970-2}, + file = {/home/nathante/Zotero/storage/QJKUMC2A/Jones et al. - 2019 - rscience Challenges and Opportunities in Online .pdf} +} + +@article{jordan_evaluating_2019, + title = {Evaluating {{Probabilistic Forecasts}} with {{scoringRules}}}, + author = {Jordan, Alexander and Krüger, Fabian and Lerch, Sebastian}, + date = {2019-08-21}, + journaltitle = {Journal of Statistical Software}, + volume = {90}, + number = {1}, + pages = {1--37}, + issn = {1548-7660}, + issue = {1}, + langid = {english}, + keywords = {comparative evaluation,ensemble forecasts,out-of-sample evaluation,predictive distributions,proper scoring rules,R,score computation}, + file = {/home/nathante/Zotero/storage/4FH4NMHR/Jordan et al_2019_Evaluating Probabilistic Forecasts with scoringRules.pdf} +} + +@article{joyce_predicting_2006, + title = {Predicting {{Continued Participation}} in {{Newsgroups}}}, + author = {Joyce, Elisabeth and Kraut, Robert E.}, + date = {2006-04-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {11}, + number = {3}, + pages = {723--747}, + issn = {1083-6101}, + abstract = {Turnover in online communities is very high, with most people who initially post a message to an online community never contributing again. In this paper, we test whether the responses that newcomers receive to their first posts influence the extent to which they continue to participate. The data come from initial posts made by 2,777 newcomers to six public newsgroups. We coded the content and valence of the initial post and its first response, if it received one, to see if these factors influenced newcomers’ likelihood of posting again. Approximately 61\% of newcomers received a reply to their initial post, and those who got a reply were 12\% more likely to post to the community again; their probability of posting again increased from 44\% to 56\%. They were more likely to receive a response if they asked a question or wrote a longer post. Surprisingly, the quality of the response they received—its emotional tone and whether it answered a newcomer’s question—did not influence the likelihood of the newcomer’s posting again.}, + file = {/home/nathante/Zotero/storage/KR2VSCNN/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.pdf;/home/nathante/Zotero/storage/ZVL66I3I/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.pdf;/home/nathante/Zotero/storage/VK44NCYI/4617705.html;/home/nathante/Zotero/storage/YXZPKK8E/Joyce and Kraut - 2006 - Predicting Continued Participation in Newsgroups.html} +} + +@inproceedings{kairam_life_2012, + title = {The Life and Death of Online Groups: Predicting Group Growth and Longevity}, + shorttitle = {The Life and Death of Online Groups}, + booktitle = {Proceedings of the Fifth {{ACM}} International Conference on {{Web}} Search and Data Mining}, + author = {Kairam, Sanjay Ram and Wang, Dan J. and Leskovec, Jure}, + date = {2012-02-08}, + series = {{{WSDM}} '12}, + pages = {673--682}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {We pose a fundamental question in understanding how to identify and design successful communities: What factors predict whether a community will grow and survive in the long term? Social scientists have addressed this question extensively by analyzing offline groups which endeavor to attract new members, such as social movements, finding that new individuals are influenced strongly by their ties to members of the group. As a result, prior work on the growth of communities has treated growth primarily as a diffusion processes, leading to findings about group evolution which can be difficult to explain. The proliferation of online social networks and communities, however, has created new opportunities to study, at a large scale and with very fine resolution, the mechanisms which lead to the formation, growth, and demise of online groups. In this paper, we analyze data from several thousand online social networks built on the Ning platform with the goal of understanding the factors contributing to the growth and longevity of groups within these networks. Specifically, we investigate the role that two types of growth (growth through diffusion and growth by other means) play during a group's formative stages from the perspectives of both the individual member and the group. Applying these insights to a population of groups of different ages and sizes, we build a model to classify groups which will grow rapidly over the short-term and long-term. Our model achieves over 79\% accuracy in predicting group growth over the following two months and over 78\% accuracy in predictions over the following two years. We utilize a similar approach to predict which groups will die within a year. The results of our combined analysis provide insight into how both early non-diffusion growth and a complex set of network constraints appear to contribute to the initial and continued growth and success of groups within social networks. Finally we discuss implications of this work for the design, maintenance, and analysis of online communities.}, + isbn = {978-1-4503-0747-5}, + keywords = {group formation,information diffusion,online communities,social networks}, + file = {/home/nathante/Zotero/storage/NS675EXH/Kairam et al_The Life and Death of Online Groups.pdf;/home/nathante/Zotero/storage/QZR8T2QH/Kairam et al_2012_The life and death of online groups.pdf} +} + +@book{kantz_nonlinear_2003, + title = {Nonlinear {{Time Series Analysis}}}, + author = {Kantz, Holger and Schreiber, Thomas}, + date = {2003}, + edition = {2}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge}}, + abstract = {The paradigm of deterministic chaos has influenced thinking in many fields of science. Chaotic systems show rich and surprising mathematical structures. In the applied sciences, deterministic chaos provides a striking explanation for irregular behaviour and anomalies in systems which do not seem to be inherently stochastic. The most direct link between chaos theory and the real world is the analysis of time series from real systems in terms of nonlinear dynamics. Experimental technique and data analysis have seen such dramatic progress that, by now, most fundamental properties of nonlinear dynamical systems have been observed in the laboratory. Great efforts are being made to exploit ideas from chaos theory wherever the data displays more structure than can be captured by traditional methods. Problems of this kind are typical in biology and physiology but also in geophysics, economics, and many other sciences.}, + isbn = {978-0-521-52902-0}, + file = {/home/nathante/Zotero/storage/BQVXZ6AD/519783E4E8A2C3DCD4641E42765309C7.html} +} + +@incollection{karumur_content_2018, + title = {Content Is {{King}}, {{Leadership Lags}}: Effects of {{Prior Experience}} on {{Newcomer Retention}} and {{Productivity}} in {{Online Production Groups}}}, + shorttitle = {Content Is {{King}}, {{Leadership Lags}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Karumur, Raghav Pavan and Yu, Bowen and Zhu, Haiyi and Konstan, Joseph A.}, + date = {2018-04-21}, + pages = {1--13}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Organizers of online groups often struggle to recruit members who can most effectively carry out the group's activities and remain part of the group over time. In a study of a sample of 30,000 new editors belonging to 1,054 English WikiProjects, we empirically examine the effects of generalized prior work-productivity experience (measured by overall prior article edits), prior leadership experience (measured by overall prior project edits), and localized prior work-productivity experience (measured by pre-joining article edits on a project) on early retention and productivity. We find that (1)generalized prior work-productivity experience is positively associated with retention, but negatively associated with productivity (2) prior leadership experience is negatively associated with both retention and productivity, and (3) localized prior work-productivity experience is positively associated with both retention and productivity within that focal project. We then discuss implications to inform the designs of early interventions aimed at group success.}, + isbn = {978-1-4503-5620-6}, + keywords = {learning transfer,newcomers,online communities,online groups,peer production,prior experience,productivity,resocialization,retention,subgroups,wikipedia,wikiprojects,withdrawal}, + file = {/home/nathante/Zotero/storage/YANJLZCB/Karumur et al. - 2018 - Content is King, Leadership Lags Effects of Prior.pdf} +} + +@article{katz_network_1985, + title = {Network {{Externalities}}, {{Competition}}, and {{Compatibility}}}, + author = {Katz, Michael L. and Shapiro, Carl}, + date = {1985}, + journaltitle = {The American Economic Review}, + volume = {75}, + number = {3}, + eprint = {1814809}, + eprinttype = {jstor}, + pages = {424--440}, + publisher = {{American Economic Association}}, + issn = {0002-8282}, + file = {/home/nathante/Zotero/storage/FPC475A5/Katz_Shapiro_1985_Network Externalities, Competition, and Compatibility.pdf} +} + +@article{kavanaugh_community_2005, + title = {Community {{Networks}}: Where {{Offline Communities Meet Online}}}, + shorttitle = {Community {{Networks}}}, + author = {Kavanaugh, Andrea and Carroll, John M. and Rosson, Mary Beth and Zin, Than Than and Reese, Debbie Denise}, + date = {2005-07-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {This study explores the design and practice of the Blacksburg Electronic Village (BEV), a mature networked community. We describe findings from longitudinal survey data on the use and social impact of community computer networking. The survey data show that increased involvement with people, issues and community since going online is explained by education, extroversion and age. Using path models, we show that a person's sense of belonging and collective efficacy, group memberships, activism and social use of the Internet act as mediating variables. These findings extend evidence in support of the argument that Internet use can strengthen social contact, community engagement and attachment. Conversely, it underlines concern about the impact of computer networking on people with lower levels of education, extroversion, efficacy, and community belonging. We suggest design strategies and innovative tools for non-experts that might increase social interaction and improve usability for disadvantaged and underrepresented individuals and groups.}, + issue = {JCMC10417}, + file = {/home/nathante/Zotero/storage/IWBLRSS4/4614510.html} +} + +@inproceedings{keegan_analyzing_2016, + title = {Analyzing {{Organizational Routines}} in {{Online Knowledge Collaborations}}: A {{Case}} for {{Sequence Analysis}} in {{CSCW}}}, + shorttitle = {Analyzing {{Organizational Routines}} in {{Online Knowledge Collaborations}}}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer}}-{{Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Keegan, Brian and Lev, Shakked and Arazy, Ofer}, + date = {2016}, + series = {{{CSCW}} '16}, + pages = {1065--1079}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Research into socio-technical systems like Wikipedia has overlooked important structural patterns in the coordination of distributed work. This paper argues for a conceptual reorientation towards sequences as a fundamental unit of analysis for understanding work routines in online knowledge collaboration. We outline a research agenda for researchers in computer-supported cooperative work (CSCW) to understand the relationships, patterns, antecedents, and consequences of sequential behavior using methods already developed in fields like bio-informatics. Using a data set of 37,515 revisions from 16,616 unique editors to 96 Wikipedia articles as a case study, we analyze the prevalence and significance of different sequences of editing patterns. We illustrate the mixed method potential of sequence approaches by interpreting the frequent patterns as general classes of behavioral motifs. We conclude by discussing the methodological opportunities for using sequence analysis for expanding existing approaches to analyzing and theorizing about co-production routines in online knowledge collaboration.}, + isbn = {978-1-4503-3592-8}, + file = {/home/nathante/Zotero/storage/9AK33B8M/Keegan et al. - 2016 - Analyzing Organizational Routines in Online Knowle.pdf} +} + +@article{kiene_managing_2018, + title = {Managing Organizational Culture in Online Group Mergers}, + author = {Kiene, Charles and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + journaltitle = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {89:1-89-21}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/NV8YEK8W/Kiene et al. - 2018 - Managing organizational culture in online group me.pdf} +} + +@inproceedings{kiene_surviving_2016, + title = {Surviving an “{{Eternal September}}”: How an Online Community Managed a Surge of Newcomers}, + shorttitle = {Surviving an "{{Eternal September}}"}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Kiene, Charles and Monroy-Hernández, Andrés and Hill, Benjamin Mako}, + date = {2016}, + pages = {1152--1156}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {We present a qualitative analysis of interviews with participants in the NoSleep community within Reddit where millions of fans and writers of horror fiction congregate. We explore how the community handled a massive, sudden, and sustained increase in new members. Although existing theory and stories like Usenet's infamous "Eternal September" suggest that large influxes of newcomers can hurt online communities, our interviews suggest that NoSleep survived without major incident. We propose that three features of NoSleep allowed it to manage the rapid influx of newcomers gracefully: (1) an active and well-coordinated group of administrators, (2) a shared sense of community which facilitated community moderation, and (3) technological systems that mitigated norm violations. We also point to several important trade-offs and limitations.}, + isbn = {978-1-4503-3362-7}, + keywords = {newcomers,norms and governance,online communities,peer production,qualitative methods}, + file = {/home/nathante/Zotero/storage/2YPT6BUL/Kiene et al. - 2016 - Surviving an Eternal September How an Online Co.pdf;/home/nathante/Zotero/storage/S9JX8XE5/Kiene et al. - 2016 - Surviving an “Eternal September” How an online co.pdf} +} + +@article{kiene_technological_2019, + title = {Technological Frames and User Innovation: Exploring Technological Change in Community Moderation Teams}, + shorttitle = {Technological Frames and User Innovation}, + author = {Kiene, Charles and Jiang, Jialun "Aaron" and Hill, Benjamin Mako}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {44:1--44:23}, + abstract = {Management of technological change in organizations is one of the most enduring topics in the literature on computer-supported cooperative work. The successful navigation of technological change is both more challenging and more critical in online communities that are entirely mediated by technology than it is in traditional organizations. This paper presents an analysis of 14 in-depth interviews with moderators of subcommunities of one technological platform (Reddit) that added communities on a new technological platform (Discord). Moderation teams experienced several problems related to moderating content at scale as well as a disconnect between the affordances of Discord and their assumptions based on their experiences on Reddit. We found that moderation teams used Discord's API to create scripts and bots that augmented Discord to make the platform work more like tools on Reddit. These tools were particularly important in communities struggling with scale. Our findings suggest that increasingly widespread end user programming allow users of social computing systems to innovate and deploy solutions to unanticipated design problems by transforming new technological platforms to align with their past expectations.}, + issue = {CSCW}, + keywords = {API,bots,chat,computer-mediated communication,discord,moderation,online communities,reddit,social computing,technological change}, + file = {/home/nathante/Zotero/storage/E2PDCY58/Kiene et al. - 2019 - Technological frames and user innovation explorin.pdf;/home/nathante/Zotero/storage/U7M6IZY4/Kiene et al. - 2019 - Technological Frames and User Innovation Explorin.pdf} +} + +@unpublished{kiene_why_2021, + title = {Why {{These Rules}}? Measuring {{How Adaptation}} and {{Leadership Shapes Online Community Governance}}}, + author = {Kiene, Charles and TeBlunthuis, Nathan and Hill, Benjamin Mako}, + date = {2021} +} + +@article{klein_quality_2017, + title = {Quality Standards, Service Orientation, and Power in {{Airbnb}} and {{Couchsurfing}}}, + author = {Klein, Maximilian and Zhao, Jinhao and Ni, Jiajun and Johnson, Isaac and Hill, Benjamin Mako and Zhu, Haiyi}, + date = {2017}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {58:1--58:21}, + issn = {2573-0142}, + abstract = {Although Couchsurfing and Airbnb are both online communities that help users host strangers in their homes, they differ in an important sense: Couchsurfing prohibits monetary payment while Airbnb is built around it.We conducted interviews with users experienced on both Couchsurfing and Airbnb ("dual-users") to better understand systemic differences between the platforms. Based on these interviews we propose that, compared to Couchsurfing, Airbnb: (1) appears to require higher quality services, (2) places more emphasis on places over people, and (3) shifts social power from hosts to guests. Using public profiles from both platforms, we present analyses exploring each theme. Finally, we present evidence showing that Airbnb's growth has coincided with a decline in Couchsurfing. Taken together, our findings paint a complex picture of the changing character of network hospitality.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/WQS43NPP/Klein et al. - 2017 - Quality Standards, Service Orientation, and Power .pdf} +} + +@article{koh_encouraging_2007, + title = {Encouraging Participation in Virtual Communities}, + author = {Koh, Joon and Kim, Young-Gul and Butler, Brian and Bock, Gee-Woo}, + date = {2007-02-01}, + journaltitle = {Communications of the ACM}, + shortjournal = {Commun. ACM}, + volume = {50}, + number = {2}, + pages = {68--73}, + issn = {00010782}, + langid = {english}, + file = {/home/nathante/Zotero/storage/TP9FPWMG/Koh et al. - 2007 - Encouraging participation in virtual communities.pdf} +} + +@article{koop_large_2013, + title = {Large Time-Varying Parameter {{VARs}}}, + author = {Koop, Gary and Korobilis, Dimitris}, + date = {2013-12-01}, + journaltitle = {Journal of Econometrics}, + shortjournal = {Journal of Econometrics}, + series = {Dynamic {{Econometric Modeling}} and {{Forecasting}}}, + volume = {177}, + number = {2}, + pages = {185--198}, + issn = {0304-4076}, + abstract = {In this paper, we develop methods for estimation and forecasting in large time-varying parameter vector autoregressive models (TVP-VARs). To overcome computational constraints, we draw on ideas from the dynamic model averaging literature which achieve reductions in the computational burden through the use forgetting factors. We then extend the TVP-VAR~so that its dimension can change over time. For instance, we can have a large TVP-VAR as the forecasting model at some points in time, but a smaller TVP-VAR at others. A final extension lies in the development of a new method for estimating, in a time-varying manner, the parameter(s)~of the shrinkage priors commonly-used with large VARs. These extensions are operationalized through the use of forgetting factor methods and are, thus, computationally simple. An empirical application involving forecasting inflation, real output and interest rates demonstrates the feasibility and usefulness of our approach.}, + file = {/home/nathante/Zotero/storage/3UP4CT6P/Koop and Korobilis - 2013 - Large time-varying parameter VARs.pdf;/home/nathante/Zotero/storage/9SXVW4A8/S0304407613000845.html} +} + +@article{kou_understanding_2018, + title = {Understanding {{Social Roles}} in an {{Online Community}} of {{Volatile Practice}}: A {{Study}} of {{User Experience Practitioners}} on {{Reddit}}}, + shorttitle = {Understanding {{Social Roles}} in an {{Online Community}} of {{Volatile Practice}}}, + author = {Kou, Yubo and Gray, Colin M. and Toombs, Austin L. and Adams, Robin S.}, + date = {2018-12-21}, + journaltitle = {ACM Transactions on Social Computing}, + shortjournal = {Trans. Soc. Comput.}, + volume = {1}, + number = {4}, + pages = {17:1--17:22}, + issn = {2469-7818}, + abstract = {Community of practice (CoP) is a primary framework in social computing research that addresses learning and organizing specific practices in online communities. However, the classic CoP theory does not provide a detailed account for how practices change or evolve. Against the backdrop of a rapidly changing occupational landscape, it is crucial to understand how people participate in online communities focused on practices that have a volatile nature, as well as how social computing tools can best support them. In this article, we examine user experience (UX) design as a volatile practice that has no coherent body of knowledge and lacks a concrete path for newcomers to become a UX professional. Our study site is the “/r/userexperience” subreddit, an online UX community where practitioners socialize and learn. Using a mixed-methods approach, we identified five distinct social roles in relation to knowledge production and dissemination in the online community of volatile practice. We demonstrate that knowledge production is highly distributed, involving the participation and sensemaking of community members of varied levels of experience. We discuss how online platforms support online community of volatile practice and how our findings contribute to the CoP literature.}, + file = {/home/nathante/Zotero/storage/NWK464BS/Kou et al. - 2018 - Understanding Social Roles in an Online Community .pdf} +} + +@article{krafft_disinformation_2020, + title = {Disinformation by {{Design}}: The {{Use}} of {{Evidence Collages}} and {{Platform Filtering}} in a {{Media Manipulation Campaign}}}, + shorttitle = {Disinformation by {{Design}}}, + author = {Krafft, P. M. and Donovan, Joan}, + date = {2020-03-03}, + journaltitle = {Political Communication}, + volume = {37}, + number = {2}, + pages = {194--214}, + publisher = {{Routledge}}, + issn = {1058-4609}, + abstract = {Disinformation campaigns such as those perpetrated by far-right groups in the United States seek to erode democratic social institutions. Looking to understand these phenomena, previous models of disinformation have emphasized identity-confirmation and misleading presentation of facts to explain why such disinformation is shared. A risk of these accounts, which conjure images of echo chambers and filter bubbles, is portraying people who accept disinformation as relatively passive recipients or conduits. Here we conduct a case study of tactics of disinformation to show how platform design and decentralized communication contribute to advancing the spread of disinformation even when that disinformation is continuously and actively challenged where it appears. Contrary to a view of disinformation flowing within homogeneous echo chambers, in our case study we observe substantial skepticism against disinformation narratives as they form. To examine how disinformation spreads amidst skepticism in this case, we employ a document-driven multi-site trace ethnography to analyze a contested rumor that crossed anonymous message boards, the conservative media ecosystem, and other platforms. We identify two important factors that filtered out skepticism and contested explanations, which facilitated the transformation of this rumor into a disinformation campaign: (1) the aggregation of information into evidence collages—image files that aggregate positive evidence—and (2) platform filtering—the decontextualization of information as these claims crossed platforms. Our findings provide an elucidation of “trading up the chain” dynamics explored by previous researchers and a counterpoint to the relatively mechanistic accounts of passive disinformation propagation that dominate the quantitative literature. We conclude with a discussion of how these factors relate to the communication power available to disparate groups at different times, as well as practical implications for inferring intent from social media traces and practical implications for the design of social media platforms.}, + keywords = {4chan,Alt-right,disinformation,media manipulation,tactics}, + annotation = {\_eprint: https://doi.org/10.1080/10584609.2019.1686094}, + file = {/home/nathante/Zotero/storage/3EQB8KSG/Krafft_Donovan_2020_Disinformation by Design.pdf;/home/nathante/Zotero/storage/MRRVEJWU/10584609.2019.html} +} + +@book{kraut_building_2012, + ids = {kraut2012building,kraut_building_2012-1}, + title = {Building Successful Online Communities: Evidence-Based Social Design}, + author = {Kraut, Robert E. and Resnick, Paul and Kiesler, Sara}, + date = {2012}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + abstract = {Uses insights from social science, psychology, and economics to offer advice on planning and managing an online community.}, + isbn = {978-0-262-29831-5}, + langid = {english}, + keywords = {design,foundations of social computing}, + file = {/home/nathante/Zotero/storage/B4XSKAVW/04-kraut10-Newcomers-current.pdf;/home/nathante/Zotero/storage/CX4KDC3G/01-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/IJCEWA6L/06-Resnick10-Startup-current.pdf;/home/nathante/Zotero/storage/JEWAVXHG/02-Resnick10-Intro-current.pdf;/home/nathante/Zotero/storage/RIM4D9KS/05-kiesler10-Regulation-current.pdf;/home/nathante/Zotero/storage/S6Z28BBS/03-Ren10-Commitment-current.pdf} +} + +@inproceedings{kraut_role_2014, + ids = {kraut_role_2014-1}, + title = {The {{Role}} of {{Founders}} in {{Building Online Groups}}}, + booktitle = {Proceedings of the 17th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Kraut, Robert E. and Fiore, Andrew T.}, + date = {2014}, + series = {{{CSCW}} '14}, + pages = {722--732}, + publisher = {{ACM}}, + location = {{Baltimore, Maryland, USA}}, + abstract = {As a class, online groups are popular, but many die before they become successful. This research traced the fate of 472,231 new online groups. By the end of a 3-month observation period, 57\% of the groups had died, ceasing to post new content. Founders' human and social capital before the group was formed, the decisions they made when they created the group and their behavior in the group during its first week all predicted group survival. Many of the results suggest that founders create more successful groups if they have more resources (e.g., more online friends) and opportunities for acquiring relevant skills (e.g., more experience with online groups) and are more active in their group. However, founders who are too controlling seem to present a threat their groups. Their groups are more likely to fail if they are the only group administrator, if they have ties to all group members and if they were responsible for adding all group members.}, + isbn = {978-1-4503-2540-0}, + keywords = {birth,death,facebook,founder,online groups}, + file = {/home/nathante/Zotero/storage/EXXNCL5Q/Kraut_Fiore_2014_The role of founders in building online groups.pdf;/home/nathante/Zotero/storage/Z25RMXV6/Kraut and Fiore - 2014 - The Role of Founders in Building Online Groups.pdf;/home/nathante/Zotero/storage/BPWDS3GX/citation.html} +} + +@book{kropotkin_mutual_2012, + title = {Mutual Aid: A Factor of Evolution}, + shorttitle = {Mutual {{Aid}}}, + author = {Kropotkin, Peter}, + date = {2012-05-02}, + origdate = {1902}, + publisher = {{Courier Corporation}}, + abstract = {In this cornerstone of modern liberal social theory, Peter Kropotkin states that the most effective human and animal communities are essentially cooperative, rather than competitive. Kropotkin based this classic on his observations of natural phenomena and history, forming a work of stunning and well-reasoned scholarship. Essential to the understanding of human evolution as well as social organization, it offers a powerful counterpoint to the tenets of Social Darwinism. It also cites persuasive evidence of human nature's innate compatibility with anarchist society."Kropotkin's basic argument is correct," noted evolutionary biologist Stephen Jay Gould. "Struggle does occur in many modes, and some lead to cooperation among members of a species as the best pathway to advantage for individuals." Anthropologist Ashley Montagu declared that "Mutual Aid will never be any more out of date than will the Declaration of Independence. New facts may increasingly become available, but we can already see that they will serve largely to support Kropotkin's conclusion that 'in the ethical progress of man, mutual support—not mutual struggle—has had the leading part.'" Physician and author Alex Comfort asserted that "Kropotkin profoundly influenced human biology by his theory of Mutual Aid. . . . He was one of the first systematic students of animal communities, and may be regarded as the founder of modern social ecology."}, + isbn = {978-0-486-12153-6}, + langid = {english}, + pagetotal = {338} +} + +@article{kubiszewski_production_2010, + title = {The Production and Allocation of Information as a Good That Is Enhanced with Increased Use}, + author = {Kubiszewski, Ida and Farley, Joshua and Costanza, Robert}, + date = {2010-04-01}, + journaltitle = {Ecological Economics}, + shortjournal = {Ecological Economics}, + series = {Special {{Section}} - {{Payments}} for {{Environmental Services}}: Reconciling {{Theory}} and {{Practice}}}, + volume = {69}, + number = {6}, + pages = {1344--1354}, + issn = {0921-8009}, + abstract = {Information has some unique characteristics. Unlike most other goods and services, it is neither rival (use by one prevents use by others) nor non-rival (use by one does not affect use by others), but is enhanced with increased use, or ‘additive’. Therefore a unique allocation system for both the production and consumption of information is needed. Under the current market-based allocation system, production of information is often limited through the exclusive rights produced by patents and copyrights. This limits scientists' ability to share and build on each other's knowledge. We break the problem down into three separate questions: (1) do markets generate the type of information most important for modern society? (2) are markets the most appropriate institution for producing that information? and (3) once information is produced, are markets the most effective way of maximizing the social value of that information? We conclude that systematic market failures make it unlikely that markets will generate the most important types of information, while the unique characteristics of information reduce the cost-effectiveness of markets in generating information and in maximizing its social value. We then discuss alternative methods that do not have these shortcomings, and that would lead to greater overall economic efficiency, social justice and ecological sustainability. These methods include monetary prizes, publicly funded research from which the produced information is released into the public domain, and status driven incentive structures like those in academia and the “open-source” community.}, + langid = {english}, + keywords = {Allocation,Anti-rival,Copyrights,Information,Intellectual property rights,Knowledge,Market failure,Patents}, + file = {/home/nathante/Zotero/storage/DX84YZM7/S092180091000039X.html} +} + +@book{kuhn_structure_1970, + ids = {kuhn_structure_2015}, + title = {The Structure of Scientific Revolutions}, + author = {Kuhn, Thomas S}, + date = {1970}, + publisher = {{University of Chicago Press}}, + langid = {english}, + annotation = {OCLC: 959412835} +} + +@inproceedings{kumar_community_2018, + ids = {kumar_community_2018-1}, + title = {Community {{Interaction}} and {{Conflict}} on the {{Web}}}, + booktitle = {Proceedings of the 2018 {{World Wide Web Conference}}}, + author = {Kumar, Srijan and Hamilton, William L. and Leskovec, Jure and Jurafsky, Dan}, + date = {2018-04-23}, + series = {{{WWW}} '18}, + pages = {933--943}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + location = {{Lyon, France}}, + abstract = {Users organize themselves into communities on web platforms. These communities can interact with one another, often leading to conflicts and toxic interactions. However, little is known about the mechanisms of interactions between communities and how they impact users. Here we study intercommunity interactions across 36,000 communities on Reddit, examining cases where users of one community are mobilized by negative sentiment to comment in another community. We show that such conflicts tend to be initiated by a handful of communities---less than 1\% of communities start 74\% of conflicts. While conflicts tend to be initiated by highly active community members, they are carried out by significantly less active members. We find that conflicts are marked by formation of echo chambers, where users primarily talk to other users from their own community. In the long-term, conflicts have adverse effects and reduce the overall activity of users in the targeted communities. Our analysis of user interactions also suggests strategies for mitigating the negative impact of conflicts---such as increasing direct engagement between attackers and defenders. Further, we accurately predict whether a conflict will occur by creating a novel LSTM model that combines graph embeddings, user, community, and text features. This model can be used to create an early-warning system for community moderators to prevent conflicts. Altogether, this work presents a data-driven view of community interactions and conflict, and paves the way towards healthier online communities.}, + isbn = {978-1-4503-5639-8}, + keywords = {antisocial behavior,community,conflict,interaction,intercommunity,society,web}, + file = {/home/nathante/Zotero/storage/3R7J48EQ/Kumar et al_2018_Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/FPJ44933/Kumar et al. - 2018 - Community Interaction and Conflict on the Web.pdf;/home/nathante/Zotero/storage/U6GYGZDS/Kumar_et_al-2018-Community_interaction_conflict-WWW.pdf} +} + +@article{lai_can_2014, + title = {Can {{Our Group Survive}}? An {{Investigation}} of the {{Evolution}} of {{Mixed}}-{{Mode Groups}}*}, + shorttitle = {Can {{Our Group Survive}}?}, + author = {Lai, Chih-Hui}, + date = {2014-07-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {19}, + number = {4}, + pages = {839--854}, + issn = {1083-6101}, + abstract = {Applying an ecological and evolutionary perspective, this study examines the evolution and the sustainability of “mixed-mode groups,” a type of voluntary association created and organized online to interact physically in geographically defined ways. Meetup.com is a website that facilitates the creation and coordination of mixed-mode groups. Analysis of interviews with 34 Meetup group organizers and a longitudinal analysis of 100 randomly selected Meetup groups revealed the evolutionary processes at the group and population level, respectively. Specifically, the ecological factor of population density, the demographic factor of group age, the group's profit orientation, experience of leadership change and shared leadership, and external ties played decisive roles in predicting group survival. Implications of the findings for theoretical and practical contributions are discussed.}, + file = {/home/nathante/Zotero/storage/N3RD6DF2/Lai_2014_Can Our Group Survive.pdf;/home/nathante/Zotero/storage/9UKKZT3W/4067560.html} +} + +@article{lai_understanding_2014, + ids = {lai_understanding_2014-1}, + title = {Understanding the Evolution of Bona Fide Mixed-Mode Groups: An Example of {{Meetup}} Groups}, + shorttitle = {Understanding the Evolution of Bona Fide Mixed-Mode Groups}, + author = {Lai, Chih-Hui}, + date = {2014}, + journaltitle = {First Monday}, + issn = {1396-0466}, + abstract = {This study examines the evolution of an emerging form of social organization: mixed–mode groups. These are Internet–established but operate as in–person voluntary associations. Through longitudinal observations and interviews with 34 group organizers of Meetup.com, a good example of mixed–mode groups, findings of this study revealed the iterative and simultaneous variation–selection–retention (V–S–R) mechanisms enacted by groups as they evolved. Building on permeable boundaries and multiple memberships, these bona fide groups also exhibited different forms of interaction with other groups as well as the local community.}, + langid = {english}, + keywords = {bona fide groups,ecology and evolutionary,mixed modalities,social network}, + file = {/home/nathante/Zotero/storage/4Z2ZDLTZ/4681.html;/home/nathante/Zotero/storage/KRN67LWA/4681.html} +} + +@article{lakhani_how_2003, + title = {How Open Source Software Works: "{{Free}}" User-to-User Assistance}, + shorttitle = {How Open Source Software Works}, + author = {Lakhani, Karim R. and von Hippel, Eric}, + options = {useprefix=true}, + date = {2003}, + journaltitle = {Research Policy}, + volume = {32}, + number = {6}, + pages = {923--943}, + abstract = {Research into free and open source software development projects has so far largely focused on how the major tasks of software development are organized and motivated. But a complete project requires the execution of "mundane but necessary" tasks as well. In this paper, we explore how the mundane but necessary task of field support is organized in the case of Apache web server software, and why some project participants are motivated to provide this service gratis to others. We find that the Apache field support system functions effectively. We also find that, when we partition the help system into its component tasks, 98\% of the effort expended by information providers in fact returns direct learning benefits to those providers. This finding considerably reduces the puzzle of why information providers are willing to perform this task "for free." Implications are discussed.}, + keywords = {Econometrics,FOSS,Innovation}, + file = {/home/nathante/Zotero/storage/TZST9JHU/Lakhani and von Hippel - 2003 - How open source software works.pdf} +} + +@inproceedings{lam_wp:clubhouse?:_2011, + title = {{{WP}}:Clubhouse?: An {{Exploration}} of {{Wikipedia}}'s {{Gender Imbalance}}}, + shorttitle = {{{WP}}}, + booktitle = {Proceedings of the 7th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Lam, Shyong (Tony) K. and Uduwage, Anuradha and Dong, Zhenhua and Sen, Shilad and Musicant, David R. and Terveen, Loren and Riedl, John}, + date = {2011}, + series = {{{WikiSym}} '11}, + pages = {1--10}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Wikipedia has rapidly become an invaluable destination for millions of information-seeking users. However, media reports suggest an important challenge: only a small fraction of Wikipedia's legion of volunteer editors are female. In the current work, we present a scientific exploration of the gender imbalance in the English Wikipedia's population of editors. We look at the nature of the imbalance itself, its effects on the quality of the encyclopedia, and several conflict-related factors that may be contributing to the gender gap. Our findings confirm the presence of a large gender gap among editors and a corresponding gender-oriented disparity in the content of Wikipedia's articles. Further, we find evidence hinting at a culture that may be resistant to female participation.}, + isbn = {978-1-4503-0909-7}, + file = {/home/nathante/Zotero/storage/EUWCPP57/Lam et al. - 2011 - WPClubhouse An Exploration of Wikipedia's Gende.pdf;/home/nathante/Zotero/storage/KR457VCD/p1-lam.pdf} +} + +@inproceedings{lampe_follow_2005, + title = {Follow the (Slash) Dot: Effects of Feedback on New Members in an Online Community}, + shorttitle = {Follow the (Slash) Dot}, + booktitle = {Proceedings of the 2005 International {{ACM SIGGROUP}} Conference on {{Supporting}} Group Work}, + author = {Lampe, Cliff and Johnston, Erik}, + date = {2005}, + pages = {11--20}, + publisher = {{ACM}}, + location = {{Sanibel Island, Florida, USA}}, + eventtitle = {{{GROUP}}}, + file = {/home/nathante/Zotero/storage/YHZ4RP4U/p11-lampe.pdf} +} + +@inproceedings{lampe_motivations_2010, + title = {Motivations to Participate in Online Communities}, + booktitle = {Proceedings of the 28th International Conference on {{Human}} Factors in Computing Systems}, + author = {Lampe, Cliff and Wash, Rick and Velasquez, Alcides and Ozkaya, Elif}, + date = {2010}, + pages = {1927--1936}, + publisher = {{ACM}}, + location = {{Atlanta, Georgia, USA}}, + abstract = {A consistent theoretical and practical challenge in the design of socio-technical systems is that of motivating users to participate in and contribute to them. This study examines the case of Everything2.com users from the theoretical perspectives of Uses and Gratifications and Organizational Commitment to compare individual versus organizational motivations in user participation. We find evidence that users may continue to participate in a site for different reasons than those that led them to the site. Feelings of belonging to a site are important for both anonymous and registered users across different types of uses. Long-term users felt more dissatisfied with the site than anonymous users. Social and cognitive factors seem to be more important than issues of usability in predicting contribution to the site.}, + isbn = {978-1-60558-929-9}, + file = {/home/nathante/Zotero/storage/7NIQDKFR/Lampe et al. - 2010 - Motivations to participate in online communities.pdf} +} + +@inproceedings{lampe_slashdot_2004, + title = {Slash(Dot) and Burn: Distributed Moderation in a Large Online Conversation Space}, + shorttitle = {Slash(Dot) and Burn}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Lampe, Cliff and Resnick, Paul}, + date = {2004}, + series = {{{CHI}} '04}, + pages = {543--550}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Can a system of distributed moderation quickly and consistently separate high and low quality comments in an online conversation? Analysis of the site Slashdot.org suggests that the answer is a qualified yes, but that important challenges remain for designers of such systems. Thousands of users act as moderators. Final scores for comments are reasonably dispersed and the community generally agrees that moderations are fair. On the other hand, much of a conversation can pass before the best and worst comments are identified. Of those moderations that were judged unfair, only about half were subsequently counterbalanced by a moderation in the other direction. And comments with low scores, not at top-level, or posted late in a conversation were more likely to be overlooked by moderators.}, + isbn = {978-1-58113-702-6}, + keywords = {collaborative filtering,computer-mediated communication,recommender systems}, + file = {/home/nathante/Zotero/storage/J4ALSW7H/Lampe and Resnick - 2004 - Slash(dot) and burn distributed moderation in a l.pdf} +} + +@inproceedings{lanzara_knowledge_2003, + title = {The Knowledge Ecology of Open-Source Software Projects}, + booktitle = {19th {{EGOS Colloquium}}, {{Copenhagen}}}, + author = {Lanzara, Giovan Francesco and Morner, Michele and others}, + date = {2003}, + annotation = {00082}, + file = {/home/nathante/Zotero/storage/MY6MJGIC/Lanzara et al_2003_The knowledge ecology of open-source software projects.pdf} +} + +@book{lave_situated_1991, + title = {Situated Learning: Legitimate Peripheral Participation}, + shorttitle = {Situated {{Learning}}}, + author = {Lave, Jean and Wenger, Etienne}, + date = {1991}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge, UK}}, + abstract = {In this important theoretical treatise, Jean Lave, anthropologist, and Etienne Wenger, computer scientist, push forward the notion of situated learning–that learning is fundamentally a social process and not solely in the learner's head. The authors maintain that learning viewed as situated activity has as its central defining characteristic a process they call legitimate peripheral participation. Learners participate in communities of practitioners, moving toward full participation in the sociocultural practices of a community. Legitimate peripheral participation provides a way to speak about crucial relations between newcomers and oldtimers and about their activities, identities, artifacts, knowledge and practice. The communities discussed in the book are midwives, tailors, quartermasters, butchers, and recovering alcoholics, however, the process by which participants in those communities learn can be generalized to other social groups.}, + isbn = {978-0-521-42374-8}, + langid = {english}, + keywords = {Education / Educational Psychology,Psychology / Cognitive Psychology & Cognition,Psychology / Developmental / General,Psychology / General,Psychology / Personality} +} + +@incollection{lazarsfeld_friendship_1954, + title = {Friendship as a Social Process: A Substantive and Methodological Analysis}, + booktitle = {Freedom and Control in Modern Society}, + author = {Lazarsfeld, Paul F. and Merton, Robert K.}, + editor = {Berger, Morroe and Abel, Theodore and Page, Charles H.}, + date = {1954}, + pages = {18--66}, + publisher = {{Van Nostrand}}, + location = {{New York}}, + abstract = {Page} +} + +@article{lazer_network_2007, + title = {The {{Network Structure}} of {{Exploration}} and {{Exploitation}}}, + author = {Lazer, David and Friedman, Allan}, + date = {2007-12-01}, + journaltitle = {Administrative Science Quarterly}, + shortjournal = {Administrative Science Quarterly}, + volume = {52}, + number = {4}, + pages = {667--694}, + issn = {0001-8392}, + abstract = {Whether as team members brainstorming or cultures experimenting with new technologies, problem solvers communicate and share ideas. This paper examines how the structure of communication networks among actors can affect system-level performance. We present an agent-based computer simulation model of information sharing in which the less successful emulate the more successful. Results suggest that when agents are dealing with a complex problem, the more efficient the network at disseminating information, the better the short-run but the lower the long-run performance of the system. The dynamic underlying this result is that an inefficient network maintains diversity in the system and is thus better for exploration than an efficient network, supporting a more thorough search for solutions in the long run. For intermediate time frames, there is an inverted-U relationship between connectedness and performance, in which both poorly and well-connected systems perform badly, and moderately connected systems perform best. This curvilinear relationship between connectivity and group performance can be seen in several diverse instances of organizational and social behavior.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/CQCKNER7/Lazer and Friedman - 2007 - The Network Structure of Exploration and Exploitat.pdf} +} + +@article{lazer_studying_2020, + title = {Studying Human Attention on the {{Internet}}}, + author = {Lazer, David}, + date = {2020-01-07}, + journaltitle = {Proceedings of the National Academy of Sciences}, + shortjournal = {Proc Natl Acad Sci USA}, + volume = {117}, + number = {1}, + pages = {21--22}, + issn = {0027-8424, 1091-6490}, + langid = {english}, + file = {/home/nathante/Zotero/storage/T8C43YAK/Lazer - 2020 - Studying human attention on the Internet.pdf} +} + +@inproceedings{leavitt_role_2017, + title = {The Role of Information Visibility in Network Gatekeeping: Information Aggregation on Reddit during Crisis Events}, + shorttitle = {The Role of Information Visibility in Network Gatekeeping}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Leavitt, Alex and Robinson, John J.}, + date = {2017-02-25}, + series = {{{CSCW}} '17}, + pages = {1246--1261}, + publisher = {{Association for Computing Machinery}}, + location = {{Portland, Oregon, USA}}, + abstract = {As social media platforms witness more and more contributions from participants during developing crisis events, some platforms provide affordances that support visibility for specific pieces of information. However, the design of information visibility, especially in the context of controlling information flows (through gatekeeping), may shape how participants collect and share up-to-date information in these systems. This paper looks at the field site of reddit.com through trace ethnography methods to understand how the design of reddit's platform (from algorithms to user roles) impacts the visibility of information and subsequently how participants aggregate information in response to ongoing events. Through trace ethnographic analysis, we illustrate three themes related to tensions around visibility - behavioral, structural, and relational - and show how visibility shapes the work of producing information about crises in social news sites.}, + isbn = {978-1-4503-4335-0}, + file = {/home/nathante/Zotero/storage/6PIBDNTW/Leavitt and Robinson - 2017 - The Role of Information Visibility in Network Gate.pdf} +} + +@inproceedings{leavitt_this_2015, + title = {"{{This}} Is a Throwaway Account": Temporary Technical Identities and Perceptions of Anonymity in a Massive Online Community}, + shorttitle = {"{{This}} Is a Throwaway Account"}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Leavitt, Alex}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {317--327}, + publisher = {{Association for Computing Machinery}}, + location = {{Vancouver, BC, Canada}}, + abstract = {This paper explores temporary identities on social media platforms and individuals' uses of these identities with respect to their perceptions of anonymity. Given the research on multiple profile maintenance, little research has examined the role that some social media platforms play in affording users with temporary identities. Further, most of the research on anonymity stops short of the concept of varying perceptions of anonymity. This paper builds on these research areas by describing the phenomenon of temporary "throwaway accounts" and their uses on reddit.com, a popular social news site. In addition to ethnographic trace analysis to examine the contexts in which throwaway accounts are adopted, this paper presents a predictive model that suggests that perceptions of anonymity significantly shape the potential uses of throwaway accounts and that women are much more likely to adopt temporary identities than men.}, + isbn = {978-1-4503-2922-4}, + file = {/home/nathante/Zotero/storage/7ITF227V/Leavitt - 2015 - This is a Throwaway Account Temporary Technical.pdf} +} + +@article{leavitt_upvote_2017, + title = {Upvote My News: The Practices of Peer Information Aggregation for Breaking News on Reddit.Com}, + shorttitle = {Upvote My News}, + author = {Leavitt, Alex and Robinson, John J.}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {1}, + pages = {65:1--65:18}, + abstract = {Citizen participation in crisis communication increasingly occurs in social media contexts. As some platforms -- e.g., social news sites -- evolve around collaborative voting, filtering, and information sharing, the aggregation of breaking news information during crisis situations appears more often as an emergent practice in these online communities. Drawing from 53 interviews and descriptive quantitative analysis of reddit posts and comments, this paper presents a qualitative case study examining reddit.com members aggregate information during crisis events within the context of reddit's post/comment structure, crowd voting, and ranking algorithms. Using the lens of network gatekeeping, the paper shows how participants evaluate sources, organize information, and verify details to demonstrate how different affordances and limitations of information production allow or restrict particular types of network gatekeeping.}, + issue = {CSCW}, + file = {/home/nathante/Zotero/storage/TW846G2K/Leavitt and Robinson - 2017 - Upvote My News The Practices of Peer Information .pdf} +} + +@article{lee_coevolution_2011, + title = {The Coevolution of Multiplex Communication Networks in Organizational Communities}, + author = {Lee, f and Monge, Peter}, + date = {2011}, + journaltitle = {Journal of Communication}, + volume = {61}, + number = {4}, + pages = {758--779}, + issn = {1460-2466}, + abstract = {This research examines the evolutionary patterns and determinants of multiplex organizational communication networks. Based on the data between 1997 and 2005 collected from the records of development projects in the field of Information and Communication Technology for Development, the study demonstrates that dynamics in one network are significant drivers of tie formation in the other network at both dyadic and triadic levels. In particular, results show that the effects of common third-party ties and structural embeddedness exist across multiplex networks. Further, the study suggests that resource similarity of organizational dyads, resource width, and organizational centrality have positive effects on the propensity for multiplex ties. These results have implications for organizations' communication networking strategies in a wide variety of organizational communities.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/NDMFP6LL/Lee and Monge - 2011 - The Coevolution of Multiplex Communication Network.pdf;/home/nathante/Zotero/storage/URK65X23/Lee and Monge - 2011 - The Coevolution of Multiplex Communication Network.pdf;/home/nathante/Zotero/storage/23W6FUJ5/abstract.html;/home/nathante/Zotero/storage/VHYAREB8/j.1460-2466.2011.01566.html} +} + +@article{leimeister_evaluation_2005, + title = {Evaluation of a {{Systematic Design}} for a {{Virtual Patient Community}}}, + author = {Leimeister, Jan Marco and Krcmar, Helmut}, + date = {2005-07-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {Virtual Communities (VCs) offer ubiquitous access to information and exchange possibilities for people in similar situations, which is especially valuable for patients with chronic / life-threatening diseases. However, it is seldom considered possible to create VCs systematically. This article describes the evaluation of the design elements and factors that contributed to the success of the VC krebsgemeinschaft.de (a VC for cancer patients in the German-speaking internet), by assessing user acceptance and usage. Additionally, the existence of trust (a constituent element of working VCs) in krebsgemeinschaft.de is addressed. Based on these criteria, we empirically verify the chosen design components and generate insights into the systematic development and operation of VCs in general and VCs for patients in the German healthcare system in particular.}, + issue = {JCMC1041}, + file = {/home/nathante/Zotero/storage/BI7E4R6W/Leimeister and Krcmar - 2005 - Evaluation of a Systematic Design for a Virtual Pa.pdf;/home/nathante/Zotero/storage/G39U4C3F/4614530.html} +} + +@article{levin_community_1970, + title = {Community {{Equilibria}} and {{Stability}}, and an {{Extension}} of the {{Competitive Exclusion Principle}}}, + author = {Levin, Simon A.}, + date = {1970-09}, + journaltitle = {The American Naturalist}, + shortjournal = {The American Naturalist}, + volume = {104}, + number = {939}, + pages = {413--423}, + issn = {0003-0147, 1537-5323}, + langid = {english}, + file = {/home/nathante/Zotero/storage/5G99UCM8/Levin - 1970 - Community Equilibria and Stability, and an Extensi.pdf} +} + +@inproceedings{liang_knowledge_2017, + ids = {liang_knowledge_2017-1}, + title = {Knowledge Sharing in Online Discussion Threads: What Predicts the Ratings?}, + shorttitle = {Knowledge Sharing in Online Discussion Threads}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Liang, Yuyang}, + date = {2017-02-25}, + series = {{{CSCW}} '17}, + pages = {146--154}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {As an important category of user-generated content (UGC) community, Question and Answer (Q\&A) community offers internet users opportunities to ask questions and share knowledge with others. In order to understand how the ratings of knowledge contribution quality correlate with the way knowledge is being shared in discussion threads, the study examines user behaviors and profiles in a large knowledge sharing community, /r/Techsupport, a discussion based Q\&A site in Reddit.com concerning internet and technology problems. Negative binomial regressions and negative binomial mixed models are built to investigate the relationships among thread structure, level of user activity, user profiles and the ratings of threads and comments in the community. Results indicate that in the better rated threads, the structures tend to be more centralized with heterogeneous participants discussing the problem at a deeper level. Meanwhile, contributions with good ratings are more likely to be produced by users who are more engaged in commenting behaviors.}, + isbn = {978-1-4503-4335-0}, + keywords = {knowledge sharing,network structure,online community,threaded discussion,user generated content,user profile}, + file = {/home/nathante/Zotero/storage/852P8MGY/Liang - 2017 - Knowledge Sharing in Online Discussion Threads Wh.pdf} +} + +@inproceedings{lin_better_2017, + title = {Better When It Was Smaller? Community Content and Behavior after Massive Growth.}, + shorttitle = {Better {{When It Was Smaller}}?}, + booktitle = {Eleventh {{International AAAI Conference}} on {{Web}} and {{Social Media}}}, + author = {Lin, Zhiyuan and Salehi, Niloufar and Yao, Bowen and Chen, Yiqi and Bernstein, Michael S.}, + date = {2017}, + pages = {132--141}, + publisher = {{AAAI}}, + location = {{Montreal, Canada}}, + abstract = {Online communities have a love-hate relationship with membership growth: new members bring fresh perspectives, but old-timers worry that growth interrupts the community’s social dynamic and lowers content quality. To arbitrate these two theories, we analyze over 45 million comments from 10 Reddit subcommunities following an exogenous shock when each subcommunity was added to the default set for all Reddit users. Capitalizing on these natural experiments, we test for changes to the content vote patterns, linguistic patterns, and community network patterns before and after being defaulted. Results support a narrative that the communities remain high-quality and similar to their previous selves even post-growth. There is a temporary dip in upvote scores right after the communities were defaulted, but the communities quickly recover to pre-default or even higher levels. Likewise, complaints about low-quality posts do not rise in frequency after getting defaulted. Strong moderation also helps keep upvotes common and complaint levels low. Communities’ language use does not become more like the rest of Reddit after getting defaulted. However, growth does have some impact on attention: community members cluster their activity around a smaller proportion of posts after the community is defaulted.}, + eventtitle = {{{ICWSM}}}, + file = {/home/nathante/Zotero/storage/3NB3IZUR/Lin et al. - 2017 - Better When It Was Smaller Community Content and .pdf} +} + +@article{lin_power_nodate, + title = {Power {{Iteration Clustering}}}, + author = {Lin, Frank and Cohen, William W}, + pages = {8}, + abstract = {We present a simple and scalable graph clustering method called power iteration clustering (PIC). PIC finds a very low-dimensional embedding of a dataset using truncated power iteration on a normalized pair-wise similarity matrix of the data. This embedding turns out to be an effective cluster indicator, consistently outperforming widely used spectral methods such as NCut on real datasets. PIC is very fast on large datasets, running over 1,000 times faster than an NCut implementation based on the state-of-the-art IRAM eigenvector computation technique.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/8XB5KA6M/Lin and Cohen - Power Iteration Clustering.pdf} +} + +@inproceedings{litt_just_2016, + title = {"{{Just Cast}} the {{Net}}, and {{Hopefully}} the {{Right Fish Swim}} into {{It}}": Audience {{Management}} on {{Social Network Sites}}}, + shorttitle = {\&\#x201c;{{Just Cast}} the {{Net}}, and {{Hopefully}} the {{Right Fish Swim}} into {{It}}\&\#x201d;}, + booktitle = {Proceedings of the 19th {{ACM Conference}} on {{Computer}}-{{Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Litt, Eden and Hargittai, Eszter}, + date = {2016-02-27}, + series = {{{CSCW}} '16}, + pages = {1488--1500}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {When users post on social network sites, they can engage in audience-reaching strategies, in an effort to reach desired audience members, as well as audience-limiting strategies, in an effort to avoid unwanted audience members. While much research has focused on users' audience-limiting strategies, little research has explicitly focused on users' audience-reaching strategies. Additionally, little work has explored either strategy at the post level. Using mixed methods involving a diary study and follow-up interviews focused on a diverse group of users' posts, this article reveals several audience-reaching strategies users engaged from altering their content to tagging. However, users in this study rarely used strategies to exclude people proactively and technologically outside of their targeted audiences, and instead broadcasted widely. Participants described several rationales for sharing broadly from skill-related issues to a reliance on the audience or site to filter the content.}, + isbn = {978-1-4503-3592-8}, + keywords = {Audience,audience management,audience-reaching strategies,imagined audience,privacy,social network sites}, + file = {/home/nathante/Zotero/storage/UKKUVHK2/Litt_Hargittai_2016_“\;Just Cast the Net, and Hopefully the Right Fish Swim into It”\;.pdf} +} + +@inproceedings{lu_investigate_2019, + title = {Investigate {{Transitions}} into {{Drug Addiction}} through {{Text Mining}} of {{Reddit Data}}}, + booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}}, + author = {Lu, John and Sridhar, Sumati and Pandey, Ritika and Hasan, Mohammad Al and Mohler, Georege}, + date = {2019-07-25}, + series = {{{KDD}} '19}, + pages = {2367--2375}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Increasing rates of opioid drug abuse and heightened prevalence of online support communities underscore the necessity of employing data mining techniques to better understand drug addiction using these rapidly developing online resources. In this work, we obtained data from Reddit, an online collection of forums, to gather insight into drug use/misuse using text snippets from users narratives. Specifically, using users' posts, we trained a binary classifier which predicts a user's transitions from casual drug discussion forums to drug recovery forums. We also proposed a Cox regression model that outputs likelihoods of such transitions. In doing so, we found that utterances of select drugs and certain linguistic features contained in one's posts can help predict these transitions. Using unfiltered drug-related posts, our research delineates drugs that are associated with higher rates of transitions from recreational drug discussion to support/recovery discussion, offers insight into modern drug culture, and provides tools with potential applications in combating the opioid crisis.}, + isbn = {978-1-4503-6201-6}, + keywords = {cox regression,drug addiction and recovery,reddit forum,text mining}, + file = {/home/nathante/Zotero/storage/GUQKME9M/Lu et al_2019_Investigate Transitions into Drug Addiction through Text Mining of Reddit Data.pdf} +} + +@inproceedings{ludford_think_2004, + title = {Think {{Different}}: Increasing {{Online Community Participation Using Uniqueness}} and {{Group Dissimilarity}}}, + shorttitle = {Think {{Different}}}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Ludford, Pamela J. and Cosley, Dan and Frankowski, Dan and Terveen, Loren}, + date = {2004}, + series = {{{CHI}} '04}, + pages = {631--638}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Online communities can help people form productive relationships. Unfortunately, this potential is not always fulfilled: many communities fail, and designers don't have a solid understanding of why. We know community activity begets activity. The trick, however, is to inspire participation in the first place. Social theories suggest methods to spark positive community participation. We carried out a field experiment that tested two such theories. We formed discussion communities around an existing movie recommendation web site, manipulating two factors: (1) similarity-we controlled how similar group members' movie ratings were; and (2) uniqueness-we told members how their movie ratings (with respect to a discussion topic) were unique within the group. Both factors positively influenced participation. The results offer a practical success story in applying social science theory to the design of online communities.}, + isbn = {978-1-58113-702-6}, + venue = {Vienna, Austria}, + file = {/home/nathante/Zotero/storage/94P38A6I/Ludford et al. - 2004 - Think Different Increasing Online Community Parti.pdf} +} + +@inproceedings{luo_causal_2014, + title = {Causal {{Inference}} in {{Social Media Using Convergent Cross Mapping}}}, + booktitle = {2014 {{IEEE Joint Intelligence}} and {{Security Informatics Conference}}}, + author = {Luo, C. and Zheng, X. and Zeng, D.}, + date = {2014-09}, + pages = {260--263}, + abstract = {Revealing underlying causal structure in social media is critical to understanding how users interact, on which a lot of security intelligence applications can be built. Existing causal inference methods for social media usually rely on limited explicit causal context, pre-assume certain user interaction model, or neglect the nonlinear nature of social interaction, which could lead to bias estimations of causality. Inspired from recent advance in causality detection in complex ecosystems, we propose to take advantage of a novel nonlinear state space reconstruction based approach, namely Convergent Cross Mapping, to perform causal inference in social media. Experimental results on real world social media datasets show the effectiveness of the proposed method in causal inference and user behavior prediction in social media.}, + eventtitle = {2014 {{IEEE Joint Intelligence}} and {{Security Informatics Conference}}}, + file = {/home/nathante/Zotero/storage/PQJPPNVK/Luo et al. - 2014 - Causal Inference in Social Media Using Convergent .pdf;/home/nathante/Zotero/storage/YEGDGLZH/6975587.html} +} + +@book{luxburg_tutorial_2007, + title = {A {{Tutorial}} on {{Spectral Clustering}}}, + author = {Luxburg, Ulrike Von}, + date = {2007}, + abstract = {In recent years, spectral clustering has become one of the most popular modern clustering algorithms. It is simple to implement, can be solved efficiently by standard linear algebra software, and very often outperforms traditional clustering algorithms such as the k-means algorithm. On the first glance spectral clustering appears slightly mysterious, and it is not obvious to see why it works at all and what it really does. The goal of this tutorial is to give some intuition on to those questions. We describe different graph Laplacians and their basic properties, present the most common spectral clustering algorithms, and derive those algorithms from scratch by several different approaches. Advantages and disadvantages of the different spectral clustering algorithms are discussed.}, + file = {/home/nathante/Zotero/storage/4UMVLMTD/Luxburg_2007_A Tutorial on Spectral Clustering.pdf;/home/nathante/Zotero/storage/BUBB3PKN/summary.html} +} + +@article{lykourentzou_when_2018, + title = {When {{Crowds Give You Lemons}}: Filtering {{Innovative Ideas}} Using a {{Diverse}}-{{Bag}}-of-{{Lemons Strategy}}}, + shorttitle = {When {{Crowds Give You Lemons}}}, + author = {Lykourentzou, Ioanna and Ahmed, Faez and Papastathis, Costas and Sadien, Irwyn and Papangelis, Konstantinos}, + date = {2018-11-01}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {2}, + pages = {115:1--115:23}, + abstract = {Following successful crowd ideation contests, organizations in search of the "next big thing" are left with hundreds of ideas. Expert-based idea filtering is lengthy and costly; therefore, crowd-based strategies are often employed. Unfortunately, these strategies typically (1) do not separate the mediocre from the excellent, and (2) direct all the attention to certain idea concepts, while others starve. We introduce DBLemons - a crowd-based idea filtering strategy that addresses these issues by (1) asking voters to identify the worst rather than the best ideas using a "bag of lemons'' voting approach, and (2) by exposing voters to a wider idea spectrum, thanks to a dynamic diversity-based ranking system balancing idea quality and coverage. We compare DBLemons against two state-of-the-art idea filtering strategies in a real-world setting. Results show that DBLemons is more accurate, less time-consuming, and reduces the idea space in half while still retaining 94\% of the top ideas.}, + issue = {CSCW}, + keywords = {diversity,filtering,open innovation}, + file = {/home/nathante/Zotero/storage/MRHXRNFG/Lykourentzou et al_2018_When Crowds Give You Lemons.pdf} +} + +@inproceedings{ma_when_2019, + title = {When {{Do People Trust Their Social Groups}}?}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Ma, Xiao and Cheng, Justin and Iyer, Shankar and Naaman, Mor}, + date = {2019-05-02}, + pages = {1--12}, + publisher = {{ACM}}, + location = {{Glasgow Scotland Uk}}, + eventtitle = {{{CHI}} '19: {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + isbn = {978-1-4503-5970-2}, + langid = {english}, + file = {/home/nathante/Zotero/storage/ZEWUJPHL/Ma et al. - 2019 - When Do People Trust Their Social Groups.pdf} +} + +@article{majchrzak_contradictory_2013, + title = {The {{Contradictory Influence}} of {{Social Media Affordances}} on {{Online Communal Knowledge Sharing}}}, + author = {Majchrzak, Ann and Faraj, Samer and Kane, Gerald C. and Azad, Bijan}, + date = {2013-10-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {19}, + number = {1}, + pages = {38--55}, + publisher = {{Oxford Academic}}, + abstract = {The use of social media creates the opportunity to turn organization-wide knowledge sharing in the workplace from an intermittent, centralized knowledge management process to a continuous online knowledge conversation of strangers, unexpected interpretations and re-uses, and dynamic emergence. We theorize four affordances of social media representing different ways to engage in this publicly visible knowledge conversations: metavoicing, triggered attending, network-informed associating, and generative role-taking. We further theorize mechanisms that affect how people engage in the knowledge conversation, finding that some mechanisms, when activated, will have positive effects on moving the knowledge conversation forward, but others will have adverse consequences not intended by the organization. These emergent tensions become the basis for the implications we draw.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/9U9NTEVE/Majchrzak et al. - 2013 - The Contradictory Influence of Social Media Afford.pdf;/home/nathante/Zotero/storage/DBAC2BYD/4067499.html} +} + +@article{majchrzak_effect_2016, + title = {Effect of {{Knowledge}}-{{Sharing Trajectories}} on {{Innovative Outcomes}} in {{Temporary Online Crowds}}}, + author = {Majchrzak, Ann and Malhotra, Arvind}, + date = {2016-11-10}, + journaltitle = {Information Systems Research}, + shortjournal = {Information Systems Research}, + issn = {1047-7047}, + abstract = {There is substantial research on the effects of formal control structures (i.e., incentives, identities, organization, norms) on knowledge sharing leading to innovative outcomes in online communities. However, there is little research on how knowledge-sharing trajectories in temporary online crowds create innovative outcomes without these structures. Such research is particularly of interest in the context of temporary online crowds solicited with crowdsourcing in which there is only minimal structure for knowledge sharing. We identify eight types of crowdsourcing with different knowledge-sharing patterns. The focus of this study is on the one type of crowdsourcing—collaborative innovation challenges—in which there is the least restriction on knowledge sharing in the crowd. A content analysis was conducted of all time-stamped posts made in five different collaborative innovation challenges to identify different knowledge-sharing trajectories used. We found that a paradox-framed trajectory was more likely to be followed by innovative outcomes compared to three other knowledge-sharing trajectories. A paradox-framed trajectory is one in which a novel solution emerges when different participants post in the following sequence: (1) contributing a paradox associated with the problem objective, (2) sharing assumptions to validate the paradox, and (3) sharing initial ideas for resolving the paradox in a manner that meets the problem statement. Based on the findings, a theory of paradox-framed trajectories in temporary online crowds is presented along with implications for knowledge creation theories in general and online knowledge-creating communities in particular.}, + file = {/home/nathante/Zotero/storage/XI69RCFW/Majchrzak and Malhotra - 2016 - Effect of Knowledge-Sharing Trajectories on Innova.pdf} +} + +@inproceedings{maldeniya_herding_2020, + title = {Herding a {{Deluge}} of {{Good Samaritans}}: How {{GitHub Projects Respond}} to {{Increased Attention}}}, + shorttitle = {Herding a {{Deluge}} of {{Good Samaritans}}}, + author = {Maldeniya, Danaja and Budak, Ceren and Robert Jr., Lionel P. and Romero, Daniel M.}, + date = {2020-04-20}, + pages = {2055--2065}, + publisher = {{ACM}}, + abstract = {Collaborative crowdsourcing is a well-established model of work, especially in the case of open source software development. The structure and operation of these virtual and loosely-knit teams differ from traditional organizations. As such, little is known about how their behavior may change in response to an increase in external attention. To understand these dynamics, we analyze millions of actions of thousands of contributors in over 1100 open source software projects that topped the GitHub Trending Projects page and thus experienced a large increase in attention, in comparison to a control group of projects identified through propensity score matching. In carrying out our research, we use the lens of organizational change, which considers the challenges teams face during rapid growth and how they adapt their work routines, organizational structure, and management style. We show that trending results in an explosive growth in the effective team size. However, most newcomers make only shallow and transient contributions. In response, the original team transitions towards administrative roles, responding to requests and reviewing work done by newcomers. Projects evolve towards a more distributed coordination model with newcomers becoming more central, albeit in limited ways. Additionally, teams become more modular with subgroups specializing in different aspects of the project. We discuss broader implications for collaborative crowdsourcing teams that face attention shocks.}, + isbn = {978-1-4503-7023-3}, + langid = {english}, + file = {/home/nathante/Zotero/storage/P2G8EEA3/Maldeniya et al. - 2020 - Herding a Deluge of Good Samaritans How GitHub Pr.pdf} +} + +@online{mamie_are_2021, + title = {Are {{Anti}}-{{Feminist Communities Gateways}} to the {{Far Right}}? Evidence from {{Reddit}} and {{YouTube}}}, + shorttitle = {Are {{Anti}}-{{Feminist Communities Gateways}} to the {{Far Right}}?}, + author = {Mamié, Robin and Ribeiro, Manoel Horta and West, Robert}, + date = {2021-02-25}, + eprint = {2102.12837}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Researchers have suggested that "the Manosphere," a conglomerate of men-centered online communities, may serve as a gateway to far right movements. In that context, this paper quantitatively studies the migratory patterns between a variety of groups within the Manosphere and the Alt-right, a loosely connected far right movement that has been particularly active in mainstream social networks. Our analysis leverages over 300 million comments spread through Reddit (in 115 subreddits) and YouTube (in 526 channels) to investigate whether the audiences of channels and subreddits associated with these communities have converged between 2006 and 2018. In addition to subreddits related to the communities of interest, we also collect data on counterparts: other groups of users which we use for comparison (e.g., for YouTube we use a set of media channels). Besides measuring the similarity in the commenting user bases of these communities, we perform a migration study, calculating to which extent users in the Manosphere gradually engage with Alt-right content. Our results suggest that there is a large overlap between the user bases of the Alt-right and of the Manosphere and that members of the Manosphere have a bigger chance to engage with far right content than carefully chosen counterparts. However, our analysis also shows that migration and user base overlap varies substantially across different platforms and within the Manosphere. Members of some communities (e.g., Men's Rights Activists) gradually engage with the Alt-right significantly more than counterparts on both Reddit and YouTube, whereas for other communities, this engagement happens mostly on Reddit (e.g., Pick Up Artists). Overall, our work paints a nuanced picture of the pipeline between the Manosphere and the Alt-right, which may inform platforms' policies and moderation decisions regarding these communities.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Computers and Society}, + file = {/home/nathante/Zotero/storage/33R8MJF4/Mamié et al. - 2021 - Are Anti-Feminist Communities Gateways to the Far Right.pdf;/home/nathante/Zotero/storage/N8VBLTAY/2102.html} +} + +@book{manning_introduction_2018, + title = {Introduction to Information Retrieval}, + author = {Manning, Christopher D and Raghavan, Prabhakar and Schütze, Hinrich and {Cambridge University Press}}, + date = {2018}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge}}, + isbn = {978-0-521-86571-5}, + langid = {english}, + annotation = {OCLC: 1077323048} +} + +@book{margetts_political_2015, + title = {Political Turbulence: How Social Media Shape Collective Action}, + shorttitle = {Political {{Turbulence}}}, + author = {Margetts, Helen and John, Peter and Hale, Scott and Yasseri, Taha}, + date = {2015-11-24}, + publisher = {{Princeton University Press}}, + location = {{Princeton, NJ}}, + abstract = {As people spend increasing proportions of their daily lives using social media, such as Twitter and Facebook, they are being invited to support myriad political causes by sharing, liking, endorsing, or downloading. Chain reactions caused by these tiny acts of participation form a growing part of collective action today, from neighborhood campaigns to global political movements. Political Turbulence reveals that, in fact, most attempts at collective action online do not succeed, but some give rise to huge mobilizations--even revolutions. Drawing on large-scale data generated from the Internet and real-world events, this book shows how mobilizations that succeed are unpredictable, unstable, and often unsustainable. To better understand this unruly new force in the political world, the authors use experiments that test how social media influence citizens deciding whether or not to participate. They show how different personality types react to social influences and identify which types of people are willing to participate at an early stage in a mobilization when there are few supporters or signals of viability. The authors argue that pluralism is the model of democracy that is emerging in the social media age--not the ordered, organized vision of early pluralists, but a chaotic, turbulent form of politics. This book demonstrates how data science and experimentation with social data can provide a methodological toolkit for understanding, shaping, and perhaps even predicting the outcomes of this democratic turbulence.}, + isbn = {978-0-691-15922-5}, + langid = {english}, + pagetotal = {304}, + file = {/home/nathante/Zotero/storage/EF6XBIQ7/Margetts et al. - 2015 - Political Turbulence How Social Media Shape Colle.pdf;/home/nathante/Zotero/storage/JEHM4KWG/Political Turbulence_ How Social Media Sha - Helen Margetts.azw3} +} + +@article{margolin_normative_2012, + title = {Normative {{Influences}} on {{Network Structure}} in the {{Evolution}} of the {{Children}}’s {{Rights NGO Network}}, 1977-2004:}, + shorttitle = {Normative {{Influences}} on {{Network Structure}} in the {{Evolution}} of the {{Children}}’s {{Rights NGO Network}}, 1977-2004}, + author = {Margolin, Drew B. and Shen, Cuihua and Lee, Seungyoon and Weber, Matthew S. and Fulk, Janet and Monge, Peter}, + date = {2012-10-23}, + journaltitle = {Communication Research}, + abstract = {This study examines the impact of legitimacy on the dynamics of interorganizational networks within the nongovernmental organizations’ children’s rights communi...}, + langid = {english}, + keywords = {codification,community ecology,evolution,network evolution,NGOs,norms,SIENA}, + file = {/home/nathante/Zotero/storage/295X7HRD/Margolin et al_2012_Normative Influences on Network Structure in the Evolution of the Children’s.pdf;/home/nathante/Zotero/storage/T494X64A/0093650212463731.html} +} + +@book{marwell_critical_1993, + title = {The Critical Mass in Collective Action: A Micro-Social Theory}, + shorttitle = {The Critical Mass in Collective Action}, + author = {Marwell, Gerald and Oliver, Pamela}, + date = {1993}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge, UK}}, + isbn = {978-0-521-30839-7}, + langid = {english}, + file = {/home/nathante/Zotero/storage/Q8MVNRHA/Marwell and Oliver - 1993 - The critical mass in collective action A micro-so.pdf} +} + +@article{marwick_i_2011, + ids = {marwick_i_2011-1}, + title = {I Tweet Honestly, {{I}} Tweet Passionately: Twitter Users, Context Collapse, and the Imagined Audience}, + shorttitle = {I Tweet Honestly, {{I}} Tweet Passionately}, + author = {Marwick, A. E. and {boyd}, danah}, + date = {2011-02-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {13}, + number = {1}, + pages = {114--133}, + issn = {1461-4448}, + abstract = {Social media technologies collapse multiple audiences into single contexts, making it difficult for people to use the same techniques online that they do to handle multiplicity in face-to-face conversation. This article investigates how content producers navigate ‘imagined audiences’ on Twitter. We talked with participants who have different types of followings to understand their techniques, including targeting different audiences, concealing subjects, and maintaining authenticity. Some techniques of audience management resemble the practices of ‘micro-celebrity’ and personal branding, both strategic self-commodification. Our model of the networked audience assumes a many-to-many communication through which individuals conceptualize an imagined audience evoked through their tweets.}, + langid = {english}, + keywords = {imagined audiences,qualitative,SNS}, + file = {/home/nathante/Zotero/storage/GHXUFS86/Marwick and boyd - 2011 - I tweet honestly, I tweet passionately Twitter us.pdf} +} + +@article{massanari_gamergate_2017, + title = {\#{{Gamergate}} and {{The Fappening}}: How {{Reddit}}’s Algorithm, Governance, and Culture Support Toxic Technocultures}, + shorttitle = {\#{{Gamergate}} and {{The Fappening}}}, + author = {Massanari, Adrienne}, + date = {2017-03-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {3}, + pages = {329--346}, + issn = {1461-4448}, + abstract = {This article considers how the social-news and community site Reddit.com has become a hub for anti-feminist activism. Examining two recent cases of what are defined as “toxic technocultures” (\#Gamergate and The Fappening), this work describes how Reddit’s design, algorithm, and platform politics implicitly support these kinds of cultures. In particular, this piece focuses on the ways in which Reddit’s karma point system, aggregation of material across subreddits, ease of subreddit and user account creation, governance structure, and policies around offensive content serve to provide fertile ground for anti-feminist and misogynistic activism. The ways in which these events and communities reflect certain problematic aspects of geek masculinity are also considered. This research is informed by the results of a long-term participant-observation and ethnographic study into Reddit’s culture and community and is grounded in actor-network theory.}, + langid = {english}, + keywords = {Algorithms,design,Gamergate,gender,online communities,online harassment,platform politics,Reddit,The Fappening,toxic technocultures}, + file = {/home/nathante/Zotero/storage/D5W5JKQU/Massanari - 2017 - #Gamergate and The Fappening How Reddit’s algorit.pdf;/home/nathante/Zotero/storage/NGCFX9JB/Massanari - 2017 - #Gamergate and The Fappening How Reddit’s algorit.pdf} +} + +@book{matei_structural_2017, + title = {Structural Differentiation in Social Media: Adhocracy, Entropy, and the "1 \% Effect"}, + shorttitle = {Structural Differentiation in Social Media}, + author = {Matei, Sorin A and Britt, Brian C}, + date = {2017}, + series = {Lecture {{Notes}} in {{Social Networks}}}, + publisher = {{Springer}}, + abstract = {This book explores community dynamics within social media. Using Wikipedia as an example, the volume explores communities that rely upon commons-based peer production. Fundamental theoretical principles spanning such domains as organizational configurations, leadership roles, and social evolutionary theory are developed. In the context of Wikipedia, these theories explain how a functional elite of highly productive editors has emerged and why they are responsible for a majority of the content. It explains how the elite shapes the project and how this group tends to become stable and increasingly influential over time. Wikipedia has developed a new and resilient social hierarchy, an adhocracy, which combines features of traditional and new, online, social organizations. The book presents a set of practical approaches for using these theories in real-world practice. This work fundamentally changes the way we think about social media leadership and evolution, emphasizing the crucial contributions of leadership, of elite social roles, and of group global structure to the overall success and stability of large social media projects. Written in an accessible and direct style, the book will be of interest to academics as well as professionals with an interest in social media and commons-based peer production processes.}, + isbn = {978-3-319-64425-7}, + langid = {english} +} + +@article{matias_civic_2019, + title = {The Civic Labor of Volunteer Moderators Online}, + author = {Matias, J. Nathan}, + date = {2019-04}, + journaltitle = {Social Media + Society}, + volume = {5}, + number = {2}, + pages = {1--12}, + issn = {2056-3051, 2056-3051}, + abstract = {Volunteer moderators create, support, and control public discourse for millions of people online, even as moderators’ uncompensated labor upholds platform funding models. What is the meaning of this work and who is it for? In this article, I examine the meanings of volunteer moderation on the social news platform reddit. Scholarship on volunteer moderation has viewed this work separately as digital labor for platforms, civic participation in communities, or oligarchy among other moderators. In mixed-methods research sampled from over 52,000 subreddit communities and in over a dozen interviews, I show how moderators adopt all of these frames as they develop and re-develop everyday meanings of moderation—facing the platform, their communities, and other moderators alike. I also show how this civic notion of digital labor brings clarity to a strike by moderators in July 2015. Volunteer governance remains a common approach to managing social relations, conflict, and civil liberties online. Our ability to see how communities negotiate the meaning of moderation will shape our capacity to address digital governance as a society.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/Q8BACUUZ/Matias - 2019 - The Civic Labor of Volunteer Moderators Online.pdf} +} + +@inproceedings{matias_civilservant:_2018, + ids = {matias_civilservant_2018}, + title = {Civilservant: Community-Led Experiments in Platform Governance}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Matias, J. Nathan and Mou, Merry}, + date = {2018}, + series = {{{CHI}} '18}, + pages = {9:1--9:13}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {As online platforms monitor and intervene in the daily lives of billions of people, platforms are being used to govern enduring social problems. Field experiments could inform wise uses of this power if tensions between democratic values and experimentation could be resolved. In this paper, we introduce CivilServant, a novel experimentation infrastructure that online communities and their moderators use to evaluate policies and replicate each others' findings. We situate CivilServant in the political history of policy experiments and present design considerations for community participation, ethics, and replication. Based on two case studies of community-led experiments and public debriefings on the reddit platform, we share findings on community deliberation about experiment results. We also report on uses of evidence, finding that experiments informed moderator practices, community policies, and replications by communities and platforms. We discuss the implications of these findings for evaluating platform governance in an open, democratic, experimenting society.}, + eventtitle = {{{CHI}}}, + isbn = {978-1-4503-5620-6}, + keywords = {action research,ethics,field experiments,governance,moderation,platforms,policy evaluation,randomized trials}, + file = {/home/nathante/Zotero/storage/LT2I993T/Matias and Mou - 2018 - CivilServant Community-Led Experiments in Platfor.pdf;/home/nathante/Zotero/storage/RCF9QH3M/Matias and Mou - 2018 - CivilServant Community-Led Experiments in Platfor.pdf} +} + +@inproceedings{matias_going_2016, + title = {Going Dark: Social Factors in Collective Action against Platform Operators in the {{Reddit}} Blackout}, + shorttitle = {Going {{Dark}}}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}} ({{CHI}} '16)}, + author = {Matias, J. Nathan}, + date = {2016}, + pages = {1138--1151}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {This paper describes how people who lead communities on online platforms join together in mass collective action to influence platform operators. I investigate this by analyzing a protest against the social news platform reddit by moderators of 2,278 subreddit communities in July 2015. These moderators collectively disabled their subreddits, preventing millions of readers from accessing major parts of reddit and convincing the company to negotiate over their demands. This paper offers a descriptive analysis of the protest, combining qualitative content analysis, interviews, and quantitative analysis with the population of 52,735 active subreddits. Through participatory hypotheses testing with moderators, this study reveals social factors including the grievances of moderators, relations with platform operators, relations among moderators, subreddit resources, subreddit isolation, and moderators' relations with their subreddits that can lead to participation in mass collective action against a platform.}, + isbn = {978-1-4503-3362-7}, + file = {/home/nathante/Zotero/storage/F5C8CDTK/Matias - 2016 - Going Dark Social Factors in Collective Action Ag.pdf} +} + +@article{may_notes_1975, + title = {Some {{Notes}} on {{Estimating}} the {{Competition Matrix}}, a}, + author = {May, Robert M.}, + date = {1975}, + journaltitle = {Ecology}, + volume = {56}, + number = {3}, + pages = {737--741}, + issn = {1939-9170}, + abstract = {Recent theoretical and field work on communities of interacting species has employed various forms for estimating the competition matrix elements, αij, from utilization coefficients pia (which measure the relative utilization of the path resource category by the ith species). Some little—known properties of these forms are discussed. When more than one resource dimension is involved, there is in general no substitute for measuring the species' full multidimensional utilization functions; a critical discussion is given of the estimation of such multidimensional competition coefficient αij by products of one—dimensional coefficients.}, + langid = {english}, + annotation = {\_eprint: https://esajournals.onlinelibrary.wiley.com/doi/pdf/10.2307/1935511}, + file = {/home/nathante/Zotero/storage/JSUXHH75/May_1975_Some Notes on Estimating the Competition Matrix, a.pdf;/home/nathante/Zotero/storage/SSTXH6T8/1935511.html} +} + +@incollection{mccarthy_enduring_2001, + title = {The Enduring Vitality of the Resource Mobilization Theory of Social Movements}, + booktitle = {Handbook of Sociological Theory}, + author = {McCarthy, John D. and Zald, Mayer N.}, + editor = {Turner, Jonathan H.}, + date = {2001}, + series = {Handbooks of {{Sociology}} and {{Social Research}}}, + pages = {533--565}, + publisher = {{Springer}}, + location = {{Boston, MA}}, + isbn = {978-0-387-36274-8}, + file = {/home/nathante/Zotero/storage/DVDHJ4RZ/McCarthy and Zald - 2001 - The Enduring Vitality of the Resource Mobilization.pdf} +} + +@article{mccarthy_resource_1977, + title = {Resource Mobilization and Social Movements: A Partial Theory}, + author = {McCarthy, John D. and Zald, Mayer N.}, + date = {1977}, + journaltitle = {The American Journal of Sociology}, + volume = {82}, + number = {6}, + eprint = {2777934}, + eprinttype = {jstor}, + pages = {1212--1241}, + issn = {00029602}, + abstract = {Past analysis of social movements and social movement organizations has normally assumed a close link between the frustrations or grievances of a collectivity of actors and the growth and decline of movement activity. Questioning the theoretical centrality of this assumption directs social movement analysis away from its heavy emphasis upon the social psychology of social movement participants; it can then be more easily integrated with structural theories of social process. This essay presents a set of concepts and related propositions drawn from a resource mobilization perspective. It emphasizes the variety and sources of resources; the relationship of social movements to the media, authorities, and other parties; and the interaction among movement organizations. Propositions are developed to explain social movement activity at several levels of inclusiveness-the social movement sector, the social movement industry, and social movement organization.}, + file = {/home/nathante/Zotero/storage/3DVTMQPJ/McCarthy and Zald - 1977 - Resource Mobilization and Social Movements A Part.pdf} +} + +@article{mcinnes_hdbscan_2017, + title = {Hdbscan: Hierarchical Density Based Clustering}, + shorttitle = {Hdbscan}, + author = {McInnes, Leland and Healy, John and Astels, Steve}, + date = {2017-03-21}, + journaltitle = {The Journal of Open Source Software}, + shortjournal = {JOSS}, + volume = {2}, + number = {11}, + pages = {205}, + issn = {2475-9066}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6B488I3N/McInnes et al. - 2017 - hdbscan Hierarchical density based clustering.pdf} +} + +@incollection{mcleod_concept_2005, + title = {Concept {{Explication}} and {{Theory Construction}}}, + booktitle = {{{THe Evolution}} of {{Key Mass Communication Concepts}}}, + author = {McLeod, Jack M. and Pan, Zhongdang}, + date = {2005}, + pages = {13--76}, + publisher = {{Hampton Press}}, + file = {/home/nathante/Zotero/storage/WWECBN7V/112a56c287fc2ce37a26cf2c09d4a5ce71bacca2a1d0825652884b0242b07e92.pdf} +} + +@inproceedings{mcmahon_substantial_2017, + title = {The Substantial Interdependence of {{Wikipedia}} and {{Google}}: A Case Study on the Relationship between Peer Production Communities and Information Technologies}, + shorttitle = {The {{Substantial Interdependence}} of {{Wikipedia}} and {{Google}}}, + booktitle = {International {{AAAI Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} 2017)}, + author = {McMahon, Connor and Johnson, Isaac L. and Hecht, Brent J.}, + date = {2017}, + pages = {142--151}, + publisher = {{AAAI}}, + location = {{Palo Alto, California}}, + file = {/home/nathante/Zotero/storage/6TX35RFQ/McMahon et al. - 2017 - The substantial interdependence of Wikipedia and G.pdf} +} + +@article{mcmillan_sense_1986, + title = {Sense of Community: A Definition and Theory}, + shorttitle = {Sense of Community}, + author = {McMillan, David W. and Chavis, David M.}, + date = {1986}, + journaltitle = {Journal of Community Psychology}, + volume = {14}, + number = {1}, + pages = {6--23}, + publisher = {{John Wiley \& Sons}}, + location = {{US}}, + issn = {1520-6629(Electronic),0090-4392(Print)}, + abstract = {Proposes that a sense of community is a feeling that members have of belonging, a feeling that members matter to one another and to the group, and a shared faith that members' needs will be met through commitment to be together. The authors apply the term community equally to territorial communities (e.g., neighborhoods) and to relational communities (e.g., professional, spiritual). The proposed definition of a sense of community has 4 elements: membership, influence, integration and fulfillment of needs, and shared emotional connection. Subelements of these elements of a sense of community and how they work dynamically together to create and maintain it are described. Hypothetical examples from a university, neighborhood, youth gang, and kibbutz are presented to illustrate the interworkings of the elements of a sense of community. It is suggested that this understanding of sense of community has implications for community treatment programs for the mentally retarded and mentally ill. Where "community" means more than residency outside of an institution, strategies can be introduced to allow the therapeutic benefits of community to be developed within group homes and to provide for better integration with communities surrounding such facilities. (90 ref) (PsycINFO Database Record (c) 2016 APA, all rights reserved)}, + keywords = {Communities,Community Psychology,Group Dynamics,Sense of Community,Theories}, + file = {/home/nathante/Zotero/storage/D5ECP4GI/1987-03834-001.html} +} + +@article{mcphee_mathematical_1981, + title = {Mathematical {{Modeling}} in {{Communication Research}}: An {{Overview}}}, + shorttitle = {Mathematical {{Modeling}} in {{Communication Research}}}, + author = {McPhee, Robert D. and Poole, Marshall Scott}, + date = {1981-12-01}, + journaltitle = {Annals of the International Communication Association}, + volume = {5}, + number = {1}, + pages = {159--191}, + issn = {2380-8985}, + file = {/home/nathante/Zotero/storage/FH4ZJJ98/McPhee and Poole - 1981 - Mathematical Modeling in Communication Research A.pdf;/home/nathante/Zotero/storage/DFLKR5FZ/23808985.1981.html} +} + +@article{mcpherson_birds_2001, + title = {Birds of a {{Feather}}: Homophily in {{Social Networks}}}, + shorttitle = {Birds of a {{Feather}}}, + author = {McPherson, Miller and Smith-Lovin, Lynn and Cook, James M}, + date = {2001-08-01}, + journaltitle = {Annual Review of Sociology}, + shortjournal = {Annu. Rev. Sociol.}, + volume = {27}, + number = {1}, + pages = {415--444}, + publisher = {{Annual Reviews}}, + issn = {0360-0572}, + abstract = {Similarity breeds connection. This principle—the homophily principle—structures network ties of every type, including marriage, friendship, work, advice, support, information transfer, exchange, comembership, and other types of relationship. The result is that people's personal networks are homogeneous with regard to many sociodemographic, behavioral, and intrapersonal characteristics. Homophily limits people's social worlds in a way that has powerful implications for the information they receive, the attitudes they form, and the interactions they experience. Homophily in race and ethnicity creates the strongest divides in our personal environments, with age, religion, education, occupation, and gender following in roughly that order. Geographic propinquity, families, organizations, and isomorphic positions in social systems all create contexts in which homophilous relations form. Ties between nonsimilar individuals also dissolve at a higher rate, which sets the stage for the formation of niches (localized positions) within social space. We argue for more research on: (a) the basic ecological processes that link organizations, associations, cultural communities, social movements, and many other social forms; (b) the impact of multiplex ties on the patterns of homophily; and (c) the dynamics of network change over time through which networks and other social entities co-evolve.}, + file = {/home/nathante/Zotero/storage/DWSDWJ8E/McPherson et al. - 2001 - Birds of a Feather Homophily in Social Networks.pdf;/home/nathante/Zotero/storage/GFG4ZCE8/annurev.soc.27.1.html} +} + +@article{mcpherson_ecology_1983, + title = {An Ecology of Affiliation}, + author = {McPherson, J. Miller}, + date = {1983}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {48}, + number = {4}, + eprint = {2117719}, + eprinttype = {jstor}, + pages = {519--532}, + issn = {0003-1224}, + abstract = {This paper develops an ecological model of the competition of social organizations for members. The concept of the ecological niche is quantified explicitly in a way which ties together geography, time, and the social composition of organizations. A differential equation model analogous to the Lotka-Volterra competition equations in biology captures the dynamics of the system. This dynamic model is related to the niche concept in a novel way, which produces an easily understood and powerful picture of the static and dynamic structure of the community. This new perspective provides a theoretical link between the aggregate macrostructural theory of Blau (1977a,b) and the microstructural dynamics of organizational demography (Pfeffer, 1983). The model is tested with data on organizations from a midwestern city.}, + file = {/home/nathante/Zotero/storage/WIDCF8XB/McPherson - 1983 - An ecology of affiliation.pdf} +} + +@article{mcpherson_evolution_1991, + title = {Evolution on a {{Dancing Landscape}}: Organizations and {{Networks}} in {{Dynamic Blau Space}}}, + shorttitle = {Evolution on a {{Dancing Landscape}}}, + author = {McPherson, J. Miller and Ranger-Moore, James R.}, + date = {1991-09}, + journaltitle = {Social Forces}, + shortjournal = {Social Forces}, + volume = {70}, + number = {1}, + pages = {19--43}, + issn = {00377732}, + abstract = {This article develops and tests an evolutionary model of the growth, decline, and demographic dynamics of voluntary organizations. The model demonstrates a strong analogy between the adaptive landscape of Sewall Wright (1931) and the exploitation surfaces generated by a model of member selection and retention for voluntary associations. The article connects the processes of membership recruitment and loss to the social networks connecting individuals. The model generates dynamic hypotheses about the time path of organizations in sociodemographic dimensions. A key idea in this model is that membership selection processes at the individual level produce adaptation in communities of organizations. The article concludes with an empirical example and some discussion of the implications of the model for a variety of research literatures.}, + file = {/home/nathante/Zotero/storage/HVQWNZE6/McPherson and Ranger-Moore - 1991 - Evolution on a Dancing Landscape Organizations an.pdf} +} + +@article{mcpherson_social_1992-1, + title = {Social {{Networks}} and {{Organizational Dynamics}}}, + author = {McPherson, J. Miller and Popielarz, Pamela A. and Drobnic, Sonja}, + date = {1992}, + journaltitle = {American Sociological Review}, + volume = {57}, + number = {2}, + eprint = {2096202}, + eprinttype = {jstor}, + pages = {153--170}, + issn = {0003-1224}, + abstract = {[In this paper we develop and test a theory of the dynamic behavior of voluntary groups. The theory combines an image of social network structure with the concept of natural selection to model changes in group composition over time. We consider the group to be a population of members subject to natural selection in sociodemographic space. According to the theory, the probability that members will enter or leave the group depends upon the number and strength of social network ties that connect group members to each other and to nonmembers. We analyze an event history dataset constructed from interviews using the Life History Calendar method and information on ego-centered social networks developed from the General Social Survey Network Module. We test the hypothesis that network connections inside a group are associated with reduced membership turnover, while connections outside the group increase turnover. We find that weak ties and network connections that span greater distances in sociodemographic space are positively correlated with leaving current groups and joining new ones. We conclude that weak ties are a major source of change in group composition.]}, + file = {/home/nathante/Zotero/storage/3LJGYFDX/McPherson et al. - 1992 - Social Networks and Organizational Dynamics.pdf} +} + +@article{mcpherson_testing_1996, + title = {Testing a {{Dynamic Model}} of {{Social Composition}}: Diversity and {{Change}} in {{Voluntary Groups}}}, + shorttitle = {Testing a {{Dynamic Model}} of {{Social Composition}}}, + author = {McPherson, J. Miller and Rotolo, Thomas}, + date = {1996}, + journaltitle = {American Sociological Review}, + volume = {61}, + number = {2}, + eprint = {2096330}, + eprinttype = {jstor}, + pages = {179--202}, + issn = {0003-1224}, + abstract = {[We test a dynamic model of the social composition of voluntary groups. The model is based on the idea that sociodemographic variables define social niches in which voluntary groups grow and decline, share and compete, and change or remain static. The flow of individuals through such groups depends on the competition of other groups for their time and other resources. We build a dynamic model of this process and show how this model can account for changes in the social composition and the social heterogeneity of voluntary groups. We use life history data on the group affiliations of 1,050 individuals from 1974 to 1989 to test hypotheses about the diversity of education among group members and about the mean level of education of the members. Our data strongly support the hypotheses.]}, + file = {/home/nathante/Zotero/storage/KCQZTDG3/McPherson and Rotolo - 1996 - Testing a Dynamic Model of Social Composition Div.pdf} +} + +@article{menge_competition_1972, + title = {Competition for {{Food}} between {{Two Intertidal Starfish Species}} and Its {{Effect}} on {{Body Size}} and {{Feeding}}}, + author = {Menge, Bruce A.}, + date = {1972-07-01}, + journaltitle = {Ecology}, + volume = {53}, + number = {4}, + pages = {635--644}, + issn = {1939-9170}, + abstract = {Two predaceous intertidal starfish that overlap broadly with respect to food, space, and time were found to compete for a limited food supply in the San Juan Islands, Washington State. The experiment involved complete removal of a larger (up to 600 g average wet weight) starfish (Pisaster ochraceus) from a small island—reef and addition of them to a second island—reef while a third reef served as a control. In response to Pisaster removal, the mean individual wet weight of the smaller (maximum size = 45 g wet weight) asteroid (Leptasterias hexactis) increased significantly in 15 months. Addition of Pisaster resulted in a significant decrease in Leptasterias size; no change in average Leptasterias size was observed on the control reef. A highly significant inverse correlation between the estimated biomass densities (wet weight/m2) of the two species at 10 areas suggests that competition is widespread and that the species are generally in competitive equilibrium. A major consequence of the small size of Leptasterias is an apparent inability to capture larger prey. Coexistence seems based upon "specialization" by each predator on different—sized prey. Evidently, reduced competition stress results in an increase in Leptsterias's community role as a predator. Conversely, when competition is severe, the community role of Leptasterias appears unimportant.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/45XBILMY/Menge_1972_Competition for Food between Two Intertidal Starfish Species and its Effect on.pdf;/home/nathante/Zotero/storage/KGMIJVUZ/Menge - 1972 - Competition for Food between Two Intertidal Starfi.html} +} + +@inproceedings{menking_people_2019, + ids = {menking_people_2019-1}, + title = {People Who Can Take It: How Women {{Wikipedians}} Negotiate and Navigate Safety}, + booktitle = {Proceedings of the 2019 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Menking, Amanda and Erickson, Ingrid and Pratt, Wanda}, + date = {2019-05}, + series = {{{CHI}} '19}, + pages = {472:1--472:14}, + publisher = {{Association for Computing Machinery}}, + location = {{Glasgow, Scotland, UK}}, + abstract = {Wikipedia is one of the most successful online communities in history, yet it struggles to attract and retain women editors-a phenomenon known as the gender gap. We investigate this gap by focusing on the voices of experienced women Wikipedians. In this interview-based study (N=25), we identify a core theme among these voices: safety. We reveal how our participants perceive safety within their community, how they manage their safety both conceptually and physically, and how they act on this understanding to create safe spaces on and off Wikipedia. Our analysis shows Wikipedia functions as both a multidimensional and porous space encompassing a spectrum of safety. Navigating this space requires these women to employ sophisticated tactics related to identity management, boundary management, and emotion work. We conclude with a set of provocations to spur the design of future online environments that encourage equity, inclusivity, and safety for historically marginalized users.}, + isbn = {978-1-4503-5970-2}, + langid = {english}, + keywords = {gender gap,online communities,participation,safe spaces,safety,wikipedia}, + file = {/home/nathante/Zotero/storage/AIBWULEC/Menking et al_2019_People Who Can Take It.pdf;/home/nathante/Zotero/storage/QEPWUCE5/Menking et al. - 2019 - How women Wikipedians negotiate and navigate safety.pdf} +} + +@incollection{merton_sociological_1968, + title = {On {{Sociological Theories}} of the {{Middle Range}}}, + booktitle = {Social {{Theory}} and {{Social Structure}}}, + author = {Merton, Robert}, + date = {1968}, + publisher = {{The Free Press}}, + location = {{New York, NY}}, + file = {/home/nathante/Zotero/storage/LR9B4LLM/02.29_merton_middle_range.pdf} +} + +@article{meyer_social_1994, + title = {Social {{Movement Spillover}}}, + author = {Meyer, David S. and Whittier, Nancy}, + date = {1994-05-01}, + journaltitle = {Social Problems}, + volume = {41}, + number = {2}, + eprint = {3096934}, + eprinttype = {jstor}, + pages = {277--298}, + issn = {0037-7791}, + abstract = {Social movements are not distinct and self-contained; rather, they grow from and give birth to other movements, work in coalition with other movements, and influence each other indirectly through their effects on the larger cultural and political environment. Building on both political process and collective identity perspectives, this paper uses a case study of the women's movement's impact on U.S. peace movement activity in the 1980s to develop a theory of movement-movement influence. We argue that this influence is shown by: 1) the adoption of feminist ideological frames by the peace movement; 2) the spread of the women's movement's tactical innovations into peace protest; 3) increased presence of women in leadership positions in both the institutionally-oriented and direct action wings of the movement; and 4) the adoption of organizational structures that built on feminist processes designed to avoid hierarchy. Drawing data from both movements at local and national levels, we suggest four mechanisms of transmission between the movements: 1) organizational coalitions; 2) overlapping social movement communities; 3) shared personnel; and 4) broader changes in the external environment. Social movement spillover effects have implications for our understanding of both the continuity and impact of social protest movements.}, + file = {/home/nathante/Zotero/storage/DDRPFW8T/Meyer and Whittier - 1994 - Social Movement Spillover.pdf;/home/nathante/Zotero/storage/G3JTXS4I/41SocProbs277.pdf;/home/nathante/Zotero/storage/FMZ84WB9/Page.html} +} + +@article{minkoff_interorganizational_1995, + title = {Interorganizational Influences on the Founding of African American Organizations, 1955–1985}, + author = {Minkoff, Debra C.}, + date = {1995-03-01}, + journaltitle = {Sociological Forum}, + shortjournal = {Sociol Forum}, + volume = {10}, + number = {1}, + pages = {51--79}, + issn = {1573-7861}, + abstract = {This paper examines the relationship between traditions of social action and patterns of organizational development, using data on the formation of national African American protest, advocacy, and service organizations between 1955 and 1985. Following research in organizational ecology, Poisson regression is used to examine the association between organizational density and organizational formation across strategic forms. The results provide some support for the idea that interorganizational influences are important in shaping the contours of the African American social movement industry. Outside funding, internal organizational capacities and protest levels also play a significant role.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/BFLEJ2X4/Minkoff_1995_Interorganizational influences on the founding of african american.pdf} +} + +@article{minkoff_sequencing_1997, + title = {The Sequencing of Social Movements}, + author = {Minkoff, Debra C.}, + date = {1997-10-01}, + journaltitle = {American Sociological Review}, + volume = {62}, + number = {5}, + eprint = {2657360}, + eprinttype = {jstor}, + pages = {779--799}, + issn = {00031224}, + file = {/home/nathante/Zotero/storage/5NIVST68/Minkoff-ASR-1997.pdf;/home/nathante/Zotero/storage/XFTWFNT7/Minkoff - 1997 - The Sequencing of Social Movements.pdf} +} + +@article{mittell_sites_2009, + title = {Sites of Participation: Wiki Fandom and the Case of {{Lostpedia}}}, + shorttitle = {Sites of Participation}, + author = {Mittell, Jason}, + date = {2009-07-09}, + journaltitle = {Transformative Works and Cultures}, + shortjournal = {TWC}, + volume = {3}, + issn = {1941-2258}, + abstract = {This essay explores the award-winning fan site Lostpedia to examine how the wiki platform enables fan engagement, structures participation, and distinguishes between various forms of content, including canon, fanon, and parody. I write as a participant-observer, with extensive experience as a Lostpedia reader and editor. The article uses the "digital breadcrumbs" of wikis to trace the history of fan creativity, participation, game play, and debates within a shared site of community fan engagement. Using the Lostpedia site as a case study of fan praxis, the article highlights how issues like competing fandoms, copyright, and modes of discourse become manifest via the user-generated content of a fan wiki.} +} + +@article{monge_communication_2008, + title = {Communication Network Evolution in Organizational Communities}, + author = {Monge, Peter R. and Heiss, Bettina M. and Margolin, Drew B.}, + date = {2008-11-01}, + journaltitle = {Communication Theory}, + volume = {18}, + number = {4}, + pages = {449--477}, + issn = {1468-2885}, + abstract = {Organizational communities are typically defined as populations of organizations that are tied together by networks of communication and other relations in overlapping resource niches. Traditionally, evolutionary theorists and researchers have examined organizational populations that comprise organizational communities by focusing on their properties rather than on the networks that link them. However, a full understanding of the evolution of organizational communities requires insight into both organizations and their networks. Consequently, this article presents a variety of conceptual tools for applying evolutionary theory to organizations, organizational communities, and their networks, including the notions of relational carrying capacity and linkage fitness. It illustrates evolutionary principles, such as variation, selection, and retention, that lead to the formation, growth, maintenance, and eventual demise of communication and other network linkages. This perspective allows us to understand the ways in which community survival and success are as dependent on their communication linkages as they are on the organizations they connect. The article concludes with suggestions for potential applications of evolutionary theory to other areas of human communication.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/EX9I2ZQ7/Monge et al. - 2008 - Communication network evolution in organizational .pdf;/home/nathante/Zotero/storage/CGNEW4L6/abstract.html} +} + +@article{monge_evolution_2008, + ids = {monge_evolution_2008-1}, + title = {The Evolution of Organizational Communication}, + author = {Monge, Peter and Poole, Marshall Scott}, + date = {2008-12-01}, + journaltitle = {Journal of Communication}, + shortjournal = {J Commun}, + volume = {58}, + number = {4}, + pages = {679--692}, + issn = {0021-9916}, + abstract = {Organizational communication, by its very definition, constitutes an intersection, one that exists between the study of human communication and the study of hum}, + langid = {english}, + file = {/home/nathante/Zotero/storage/AXJW4Y2U/Monge Poole - 2008 - The Evolution of Organizational Communication.pdf;/home/nathante/Zotero/storage/CIRHK5AY/Monge and Poole - 2008 - The evolution of organizational communication.pdf;/home/nathante/Zotero/storage/6S8MJ277/4098380.html;/home/nathante/Zotero/storage/ZSAC9PZZ/abstract.html} +} + +@article{monge_evolutionary_2011, + title = {Evolutionary and Ecological Models for Organizational Communication}, + author = {Monge, Peter R. and Lee, Seungyoon and Fulk, Janet and Frank, Lauren B. and Margolin, Drew and Schultz, Courtney and Shen, Cuihua and Weber, Matthew}, + date = {2011}, + journaltitle = {Advancing research in organizational communication through qualitative methodology, Management Communication Quarterly}, + volume = {25}, + number = {1}, + pages = {26--34} +} + +@article{monge_research_2011, + title = {Research {{Methods}} for {{Studying Evolutionary}} and {{Ecological Processes}} in {{Organizational Communication}}:}, + shorttitle = {Research {{Methods}} for {{Studying Evolutionary}} and {{Ecological Processes}} in {{Organizational Communication}}}, + author = {Monge, Peter and Lee, Seungyoon and Fulk, Janet and Weber, Matthew and Shen, Cuihua and Schultz, Courtney and Margolin, Drew and Gould, Jessica and Frank, Lauren B.}, + date = {2011-04-17}, + journaltitle = {Management Communication Quarterly}, + shortjournal = {Management Communication Quarterly}, + volume = {25}, + number = {2}, + pages = {211--251}, + issn = {0893-3189}, + abstract = {In a previous MCQ article, Monge et al. overviewed the fundamental concepts and processes of evolutionary theory and their applications to key issues in organiz...}, + langid = {english}, + keywords = {and predator-prey models,ecology,event history analysis,evolutionary theory,network analysis,NKC models,organizational communication,research methods,sequence analysis,simulation}, + file = {/home/nathante/Zotero/storage/UNUF6JS5/Monge et al_2011_Research Methods for Studying Evolutionary and Ecological Processes in.pdf;/home/nathante/Zotero/storage/VHEYY3I3/Monge et al. - 2011 - Research Methods for Studying Evolutionary and Eco.pdf;/home/nathante/Zotero/storage/WMGZJHPR/0893318911399447.html} +} + +@inproceedings{morris_comparison_2010, + title = {A {{Comparison}} of {{Information Seeking Using Search Engines}} and {{Social Networks}}}, + booktitle = {Fourth {{International AAAI Conference}} on {{Weblogs}} and {{Social Media}}}, + author = {Morris, Meredith Ringel and Teevan, Jaime and Panovich, Katrina}, + date = {2010-05-16}, + abstract = {The Web has become an important information repository; often it is the first source a person turns to with an informa-tion need. One common way to search the Web is with a search engine. However, it is not always easy for people to find what they are looking for with keyword search, and at times the desired information may not be readily available online. An alternative, facilitated by the rise of social media, is to pose a question to one‟s online social network. In this paper, we explore the pros and cons of using a social net-working tool to fill an information need, as compared with a search engine. We describe a study in which 12 participants searched the Web while simultaneously posing a question on the same topic to their social network, and we compare the results they found by each method.}, + eventtitle = {Fourth {{International AAAI Conference}} on {{Weblogs}} and {{Social Media}}}, + langid = {english}, + file = {/home/nathante/Zotero/storage/MS2N5Z3X/Morris et al_2010_A Comparison of Information Seeking Using Search Engines and Social Networks.pdf;/home/nathante/Zotero/storage/D3C4PIU9/1518.html} +} + +@incollection{morris_what_2010, + title = {What Do People Ask Their Social Networks, and Why? A Survey Study of Status Message Q\&a Behavior}, + shorttitle = {What Do People Ask Their Social Networks, and Why?}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Morris, Meredith Ringel and Teevan, Jaime and Panovich, Katrina}, + date = {2010-04-10}, + pages = {1739--1748}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {People often turn to their friends, families, and colleagues when they have questions. The recent, rapid rise of online social networking tools has made doing this on a large scale easy and efficient. In this paper we explore the phenomenon of using social network status messages to ask questions. We conducted a survey of 624 people, asking them to share the questions they have asked and answered of their online social networks. We present detailed data on the frequency of this type of question asking, the types of questions asked, and respondents' motivations for asking their social networks rather than using more traditional search tools like Web search engines. We report on the perceived speed and quality of the answers received, as well as what motivates people to respond to questions seen in their friends' status messages. We then discuss the implications of our findings for the design of next-generation search tools.}, + isbn = {978-1-60558-929-9}, + keywords = {q&a,social networks,social search,web search}, + file = {/home/nathante/Zotero/storage/4N6C2AYW/Morris et al_2010_What do people ask their social networks, and why.pdf} +} + +@article{muhtaseb_arab_2008, + title = {Arab {{Americans}}’ {{Motives}} for {{Using}} the {{Internet}} as a {{Functional Media Alternative}} and {{Their Perceptions}} of {{U}}.{{S}}. {{Public Opinion}}}, + author = {Muhtaseb, Ahlam and Frey, Lawrence R.}, + date = {2008-04-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {13}, + number = {3}, + pages = {618--657}, + issn = {1083-6101}, + abstract = {This exploratory study employed uses and gratifications theory to understand Arab Americans’ salient motives for using the internet and whether the internet served as a functional alternative to other media to satisfy Arab Americans’ information-seeking and interpersonal needs. Spiral of silence theory also was used to investigate the relationship between Arab Americans’ perceptions of U.S. public opinion and their motives for using the internet. Results from an online questionnaire survey (N = 124) indicated that information seeking was the most salient motive for using the internet and that the internet did serve as a functional alternative, with a significant percentage of the internet sources used being foreign based. There was, however, no relationship between Arab Americans’ perceptions of U.S. public opinion and their motives for using the internet. The findings are discussed with respect to the use of the internet by members of this marginalized cultural group.}, + file = {/home/nathante/Zotero/storage/5PD4EGRG/Muhtaseb and Frey - 2008 - Arab Americans’ Motives for Using the Internet as .pdf;/home/nathante/Zotero/storage/WKH4PJ7L/4582964.html} +} + +@article{munch_frequently_2020, + title = {Frequently Asked Questions about Nonlinear Dynamics and Empirical Dynamic Modelling}, + author = {Munch, Stephan B and Brias, Antoine and Sugihara, George and Rogers, Tanya L}, + date = {2020-07-01}, + journaltitle = {ICES Journal of Marine Science}, + shortjournal = {ICES Journal of Marine Science}, + volume = {77}, + number = {4}, + pages = {1463--1479}, + issn = {1054-3139}, + abstract = {Complex nonlinear dynamics are ubiquitous in marine ecology. Empirical dynamic modelling can be used to infer ecosystem dynamics and species interactions while making minimal assumptions. Although there is growing enthusiasm for applying these methods, the background required to understand them is not typically part of contemporary marine ecology curricula, leading to numerous questions and potential misunderstanding. In this study, we provide a brief overview of empirical dynamic modelling, followed by answers to the ten most frequently asked questions about nonlinear dynamics and nonlinear forecasting.}, + file = {/home/nathante/Zotero/storage/6IMNXF2N/Munch et al_2020_Frequently asked questions about nonlinear dynamics and empirical dynamic.pdf;/home/nathante/Zotero/storage/IB8A25JI/5643857.html} +} + +@article{nagaraj_how_2021, + title = {How {{Competition Affects Contributions}} to {{Open Source Platforms}}: Evidence from {{OpenStreetMap}} and {{Google Maps}}}, + author = {Nagaraj, Abhishek and Piezunka, Henning}, + date = {2021}, + pages = {58}, + abstract = {Open source platforms often face competition from commercial alternatives and yet we lack an understanding of whether and how commercial competition affects contributions to open source platforms. We study how contributions to OpenStreetMap, a widely-used open source mapping platform, changed following the competitive entry of Google Maps. We exploit the phased entry of Google Maps in different countries over time to isolate the effect of competition. We find that the entry of Google Maps has a negative effect on contributions to OpenStreetMap, illustrating that commercial competition plays an important role in shaping open source contributions. We then examine if different contributors react differently to competitive entry, finding that new contributors (those who are contributing for the first time) decrease their contributions while pre-existing contributors (those who contributed before competitive entry) increase their contributions. We find that the reduction in new contributors seems to be driven by a reduction in consumption of the open source platform. The increase in contributions by pre-existing contributors seems to be associated with their attachment to the platform, which is anchored in their ideological inclination towards open source and to a lesser extent, the social interaction it offers with their fellow contributors.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/QQXAH9ZN/Nagaraj and Piezunka - How Competition Affects Contributions to Open Sour.pdf} +} + +@dataset{narayan_replication_2017, + title = {Replication Data for: The {{Wikipedia Adventure}}: Field Evaluation of an Interactive Tutorial for New Users}, + shorttitle = {Replication {{Data}} For}, + author = {Narayan, Sneha and Orlowitz, Jake and Morgan, Jonathan T. and Shaw, Aaron D. and Hill, Benjamin Mako}, + date = {2017-06-07}, + journaltitle = {Harvard Dataverse}, + abstract = {This dataset contains the data and code necessary to replicate work in the following paper: Narayan, Sneha, Jake Orlowitz, Jonathan Morgan, Benjamin Mako Hill, and Aaron Shaw. 2017. “The Wikipedia Adventure: Field Evaluation of an Interactive Tutorial for New Users.” in Proceedings of the 20th ACM Conference on Computer-Supported Cooperative Work \& Social Computing (CSCW '17). New York, New York: ACM Press. http://dx.doi.org/10.1145/2998181.2998307 The published paper contains two studies. Study 1 is a descriptive analysis of a survey of Wikipedia editors who played a gamified tutorial. Study 2 is a field experiment that evaluated the same the tutorial. These data are the data used in the field experiment described in Study 2. Description of Files This dataset contains the following files beyond this README: twa.RData — An RData file that includes all variables used in Study 2. twa\_analysis.R — A GNU R script that includes all the code used to generate the tables and plots related to Study 2 in the paper. The RData file contains one variable (d) which is an R dataframe (i.e., table) that includes the following columns: userid (integer): The unique numerical ID representing each user on in our sample. These are 8-digit integers and describe public accounts on Wikipedia. sample.date (date string): The day the user was recruited to the study. Dates are formatted in “YYYY-MM-DD” format. In the case of invitees, it is the date their invitation was sent. For users in the control group, these is the date that they would have been invited to the study. edits.all (integer): The total number of edits made by the user on Wikipedia in the 180 days after they joined the study. Edits to user's user pages, user talk pages and subpages are ignored. edits.ns0 (integer): The total number of edits made by user to article pages on Wikipedia in the 180 days after they joined the study. edits.talk (integer): The total number of edits made by user to talk pages on Wikipedia in the 180 days after they joined the study. Edits to a user's user page, user talk page and subpages are ignored. treat (logical): TRUE if the user was invited, FALSE if the user was in control group. play (logical): TRUE if the user played the game. FALSE if the user did not. All users in control are listed as FALSE because any user who had not been invited to the game but played was removed. twa.level (integer): Takes a value 0 of if the user has not played the game. Ranges from 1 to 7 for those who did, indicating the highest level they reached in the game. quality.score (float). This is the average word persistence (over a 6 revision window) over all edits made by this userid. Our measure of word persistence (persistent word revision per word) is a measure of edit quality developed by Halfaker et al. that tracks how long words in an edit persist after subsequent revisions are made to the wiki-page. For more information on how word persistence is calculated, see the following paper: Halfaker, Aaron, Aniket Kittur, Robert Kraut, and John Riedl. 2009. “A Jury of Your Peers: Quality, Experience and Ownership in Wikipedia.” In Proceedings of the 5th International Symposium on Wikis and Open Collaboration (OpenSym '09), 1–10. New York, New York: ACM Press. doi:10.1145/1641309.1641332. Or this page: https://meta.wikimedia.org/wiki/Research:Content\_persistence How we created twa.RData The files twa.RData combines datasets drawn from three places: A dataset created by Wikimedia Foundation staff that tracked the details of the experiment and how far people got in the game. The variables userid, sample.date, treat, play, and twa.level were all generated in a dataset created by WMF staff when The Wikipedia Adventure was deployed. All users in the sample created their accounts within 2 days before the date they were entered into the study. None of them had received a Teahouse invitation, a Level 4 user warning, or been blocked from editing at the time that they entered the study. Additionally, all users made at least one edit after the day they were invited. Users were sorted randomly into treatment and control groups, based on which they either received or did not receive an invite to play The Wikipedia Adventure. Edit and text persistence data drawn from public XML dumps created on May 21st, 2015. We used publicly available XML dumps to generate the outcome variables, namely edits.all, edits.ns0, edits.talk and quality.score. We first extracted all edits made by users in our sample during the six month period since they joined the study, excluding edits made to user pages or user talk pages using. We parsed the XML dumps using the Python based wikiq and MediaWikiUtilities software online at: http://projects.mako.cc/source/?p=mediawiki\_dump\_tools https://github.com/mediawiki-utilities/python-mediawiki-utilities We obtained the XML dumps from: https://dumps.wikimedia.org/enwiki/ A list of edits made by users in our study that were subsequently deleted, created on August 3rd, 2015. The WMF staff created a dataset that listed all the edits made by users in our study that were deleted before August 3rd, 2015. We made the decision to include these edits in our counts, so as to measure the total level of participation undertaken by each editor. If a user in our study made article or talk page edits that were subsequently deleted, we would use the deleted edit logs to identify them, and increment the variables edits.all, edits.ns0, and edits.talk as appropriate. We decided that all edits drawn from the deleted edit logs would be defined to have an edit persistence score of 0, since they were deleted from Wikipedia. We “manually” merged these datasets together. Contact Us For more details about the dataset, please see our paper. If you notice any bugs or issues with these data or code, please contact Sneha Narayan (snehanarayan@u.northwestern.edu) or the other authors of this paper.}, + langid = {english} +} + +@inproceedings{narayan_wikipedia_2017, + title = {The {{Wikipedia Adventure}}: Field Evaluation of an Interactive Tutorial for New Users}, + shorttitle = {The {{Wikipedia Adventure}}}, + booktitle = {Proceedings of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Narayan, Sneha and Orlowitz, Jake and Morgan, Jonathan and Hill, Benjamin Mako and Shaw, Aaron}, + date = {2017}, + series = {{{CSCW}} '17}, + pages = {1785--1799}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Integrating new users into a community with complex norms presents a challenge for peer production projects like Wikipedia. We present The Wikipedia Adventure (TWA): an interactive tutorial that offers a structured and gamified introduction to Wikipedia. In addition to describing the design of the system, we present two empirical evaluations. First, we report on a survey of users, who responded very positively to the tutorial. Second, we report results from a large-scale invitation-based field experiment that tests whether using TWA increased newcomers' subsequent contributions to Wikipedia. We find no effect of either using the tutorial or of being invited to do so over a period of 180 days. We conclude that TWA produces a positive socialization experience for those who choose to use it, but that it does not alter patterns of newcomer activity. We reflect on the implications of these mixed results for the evaluation of similar social computing systems.}, + isbn = {978-1-4503-4335-0}, + file = {/home/nathante/Zotero/storage/3ZFPBYSH/p1785-narayan.pdf} +} + +@book{nardi_information_1999, + title = {Information {{Ecologies}} : Using Technology with Heart}, + author = {Nardi, Bonnie A. and O'Day, Vicki L.}, + date = {1999}, + publisher = {{The MIT Press}}, + location = {{Cambridge, Massachusetts}}, + file = {/home/nathante/Zotero/storage/EFBVQ3YV/Nardi and O'Day - 2000 - Information ecologies using technology with heart.pdf} +} + +@article{negro_category_2011-1, + title = {Category {{Reinterpretation}} and {{Defection}}: Modernism and {{Tradition}} in {{Italian Winemaking}}}, + shorttitle = {Category {{Reinterpretation}} and {{Defection}}}, + author = {Negro, Giacomo and Hannan, Michael T. and Rao, Hayagreeva}, + date = {2011-12}, + journaltitle = {Organization Science}, + volume = {22}, + number = {6}, + pages = {1449--1463}, + issn = {1047-7039, 1526-5455}, + langid = {english}, + file = {/home/nathante/Zotero/storage/V8VIC48T/Negro et al. - 2011 - Category Reinterpretation and Defection Modernism.pdf} +} + +@article{newell_user_nodate, + title = {User {{Migration}} in {{Online Social Networks}}: A {{Case Study}} on {{Reddit During}} a {{Period}} of {{Community Unrest}}}, + author = {Newell, Edward and Jurgens, David and Saleem, Haji Mohammad and Vala, Hardik and Sassine, Jad and Armstrong, Caitrin and Ruths, Derek}, + pages = {10}, + abstract = {Platforms like Reddit have attracted large and vibrant communities, but the individuals in those communities are free to migrate to other platforms at any time. History has borne this out with the mass migration from Slashdot to Digg. The underlying motivations of individuals who migrate between platforms, and the conditions that favor migration online are not well-understood. We examine Reddit during a period of community unrest affecting millions of users in the summer of 2015, and analyze large-scale changes in user behavior and migration patterns to Reddit-like alternative platforms. Using self-reported statements from user comments, surveys, and a computational analysis of the activity of users with accounts on multiple platforms, we identify the primary motivations driving user migration. While a notable number of Reddit users left for other platforms, we found that an important pull factor that enabled Reddit to retain users was its long tail of niche content. Other platforms may reach critical mass to support popular or “mainstream” topics, but Reddit’s large userbase provides a key advantage in supporting niche topics.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/YM2YE9P9/Newell et al. - User Migration in Online Social Networks A Case S.pdf} +} + +@article{nissenbaum_internet_2017, + title = {Internet Memes as Contested Cultural Capital: The Case of 4chan’s /b/ Board}, + shorttitle = {Internet Memes as Contested Cultural Capital}, + author = {Nissenbaum, Asaf and Shifman, Limor}, + date = {2017-04-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {19}, + number = {4}, + pages = {483--501}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This article explores the workings of memes as cultural capital in web-based communities. A grounded analysis of 4chan’s /b/ board reveals three main formulations of memes as capital, delineating them as subcultural knowledge, unstable equilibriums, and discursive weapons. While the first formulation follows well-documented notions about subcultural knowledge as a basis for boundary work, the latter two focus on the dualities intrinsic to Internet memes. The contradiction between following conventions and supplying innovative content leads to memes’ configuration as unstable equilibriums, triggering constant conflict about their “correct” use. Paradoxically, this struggle highlights collective identity, as it keeps shared culture at the center of discussion. Similarly, when memes are used as jabs at the most intense points of arguments, they function simultaneously as signifiers of superior authoritative status and as reminders of common affinity. Thus, the dualities underpinning memes’ structure lead to their performance as contested cultural capital.}, + langid = {english}, + keywords = {4chan,cultural capital,digital culture,Internet memes,web-based communities}, + file = {/home/nathante/Zotero/storage/5D4MWNNV/Nissenbaum and Shifman - 2017 - Internet memes as contested cultural capital The .pdf} +} + +@online{noauthor_amy_nodate, + title = {Amy {{X Zhang}} (@amyxzh) / {{Twitter}}}, + abstract = {@UWCSE professor 👩🏻‍🏫 PI @SocFuturesLab making social tech better! PhD @MIT\_CSAIL Prev @stanfordhci @BKCHarvard @Gates\_Cambridge, tennis 🎾 @rutgersU she/her}, + langid = {english}, + organization = {{Twitter}}, + file = {/home/nathante/Zotero/storage/GJ5V7BSH/amyxzh.html} +} + +@online{noauthor_crowd_nodate, + title = {Crowd {{Size}}, {{Diversity}} and {{Performance}} | {{Proceedings}} of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}} +} + +@inreference{noauthor_digg_2021, + title = {Digg}, + booktitle = {Wikipedia}, + date = {2021-08-26T10:31:32Z}, + abstract = {Digg is an American news aggregator with a curated front page, aiming to select stories specifically for the Internet audience such as science, trending political issues, and viral Internet issues. It was launched in its current form on July 31, 2012, with support for sharing content to other social platforms such as Twitter and Facebook. It formerly had been a popular social news website, allowing people to vote web content up or down, called digging and burying, respectively. In 2012, Quantcast estimated Digg's monthly U.S. unique visits at 3.8 million. Digg's popularity prompted the creation of similar sites such as Reddit.In July 2008, the former company took part in advanced acquisition talks with Google for a reported \$200 million price tag, but the deal ultimately fell through. After a controversial 2010 redesign and the departure of co-founders Jay Adelson and Kevin Rose, in July 2012 Digg was sold in three parts: the Digg brand, website, and technology were sold to Betaworks for an estimated \$500,000; 15 staff were transferred to The Washington Post's "SocialCode" for a reported \$12 million; and a suite of patents was sold to LinkedIn for about \$4 million.Digg was purchased by BuySellAds, an advertising company, for an undisclosed amount in April 2018.}, + langid = {english}, + annotation = {Page Version ID: 1040737272}, + file = {/home/nathante/Zotero/storage/CBTI7R5J/index.html} +} + +@book{noauthor_econometric_2008, + title = {Econometric {{Analysis}} of {{Cross Section}} and {{Panel Data}}}, + date = {2008}, + publisher = {{MIT Press}}, + location = {{Erscheinungsort nicht ermittelbar}}, + isbn = {978-0-262-23258-6}, + langid = {english}, + annotation = {OCLC: 762013440} +} + +@book{north_institutions_1990-1, + title = {Institutions, {{Institutional Change}} and {{Economic Performance}}}, + author = {North, Douglass C.}, + date = {1990}, + series = {Political {{Economy}} of {{Institutions}} and {{Decisions}}}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge}}, + abstract = {Continuing his groundbreaking analysis of economic structures, Douglass North develops an analytical framework for explaining the ways in which institutions and institutional change affect the performance of economies, both at a given time and over time. Institutions exist, he argues, due to the uncertainties involved in human interaction; they are the constraints devised to structure that interaction. Yet, institutions vary widely in their consequences for economic performance; some economies develop institutions that produce growth and development, while others develop institutions that produce stagnation. North first explores the nature of institutions and explains the role of transaction and production costs in their development. The second part of the book deals with institutional change. Institutions create the incentive structure in an economy, and organisations will be created to take advantage of the opportunities provided within a given institutional framework. North argues that the kinds of skills and knowledge fostered by the structure of an economy will shape the direction of change and gradually alter the institutional framework. He then explains how institutional development may lead to a path-dependent pattern of development. In the final part of the book, North explains the implications of this analysis for economic theory and economic history. He indicates how institutional analysis must be incorporated into neo-classical theory and explores the potential for the construction of a dynamic theory of long-term economic change. Douglass C. North is Director of the Center of Political Economy and Professor of Economics and History at Washington University in St. Louis. He is a past president of the Economic History Association and Western Economics Association and a Fellow, American Academy of Arts and Sciences. He has written over sixty articles for a variety of journals and is the author of The Rise of the Western World: A New Economic History (CUP, 1973, with R.P. Thomas) and Structure and Change in Economic History (Norton, 1981). Professor North is included in Great Economists Since Keynes edited by M. Blaug (CUP, 1988 paperback ed.)}, + isbn = {978-0-521-39416-1} +} + +@article{novak_characterizing_2016, + title = {Characterizing {{Species Interactions}} to {{Understand Press Perturbations}}: What {{Is}} the {{Community Matrix}}?}, + shorttitle = {Characterizing {{Species Interactions}} to {{Understand Press Perturbations}}}, + author = {Novak, Mark and Yeakel, Justin D. and Noble, Andrew E. and Doak, Daniel F. and Emmerson, Mark and Estes, James A. and Jacob, Ute and Tinker, M. Timothy and Wootton, J. Timothy}, + date = {2016}, + journaltitle = {Annual Review of Ecology, Evolution, and Systematics}, + volume = {47}, + number = {1}, + pages = {409--432}, + abstract = {The community matrix is among ecology's most important mathematical abstractions, formally encapsulating the interconnected network of effects that species have on one another's populations. Despite its importance, the term “community matrix” has been applied to multiple types of matrices that have differing interpretations. This has hindered the application of theory for understanding community structure and perturbation responses. Here, we clarify the correspondence and distinctions among the Interaction matrix, the Alpha matrix, and the Jacobian matrix, terms that are frequently used interchangeably as well as synonymously with the term “community matrix.” We illustrate how these matrices correspond to different ways of characterizing interaction strengths, how they permit insights regarding different types of press perturbations, and how these are related by a simple scaling relationship. Connections to additional interaction strength characterizations encapsulated by the Beta matrix, the Gamma matrix, and the Removal matrix are also discussed. Our synthesis highlights the empirical challenges that remain in using these tools to understand actual communities.}, + annotation = {\_eprint: https://doi.org/10.1146/annurev-ecolsys-032416-010215}, + file = {/home/nathante/Zotero/storage/5JMM6PUA/Novak et al_2016_Characterizing Species Interactions to Understand Press Perturbations.pdf} +} + +@inproceedings{oday_orienteering_1993, + title = {Orienteering in an Information Landscape: How Information Seekers Get from Here to There}, + shorttitle = {Orienteering in an Information Landscape}, + booktitle = {Proceedings of the {{SIGCHI}} Conference on {{Human}} Factors in Computing Systems - {{CHI}} '93}, + author = {O'Day, Vicki L. and Jeffries, Robin}, + date = {1993}, + pages = {438--445}, + publisher = {{ACM Press}}, + location = {{Amsterdam, The Netherlands}}, + eventtitle = {The {{SIGCHI}} Conference}, + isbn = {978-0-89791-575-5}, + langid = {english} +} + +@article{oliver_paradox_1988, + title = {The {{Paradox}} of {{Group Size}} in {{Collective Action}}: A {{Theory}} of the {{Critical Mass}}. {{II}}.}, + shorttitle = {The {{Paradox}} of {{Group Size}} in {{Collective Action}}}, + author = {Oliver, Pamela E. and Marwell, Gerald}, + date = {1988}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {53}, + number = {1}, + eprint = {2095728}, + eprinttype = {jstor}, + pages = {1--8}, + issn = {0003-1224}, + abstract = {Many sociologists incorrectly believe that larger groups are less likely to support collective action than smaller ones. The effect of group size, in fact, depends on costs. If the costs of collective goods rise with the number who share in them, larger groups act less frequently than smaller ones. If the costs vary little with group size, larger groups should exhibit more collective action than smaller ones because larger groups have more resources and are more likely to have a critical mass of highly interested and resourceful actors. The positive effects of group size increase with group heterogeneity and nonrandom social ties. Paradoxically, when groups are heterogeneous, fewer contributors may be needed to provide a good to larger groups, making collective action less complex and less expensive.}, + file = {/home/nathante/Zotero/storage/KDKQCV4I/Oliver and Marwell - 1988 - The Paradox of Group Size in Collective Action A .pdf} +} + +@book{olson_logic_1965, + title = {The Logic of Collective Action: Public Goods and the Theory of Groups}, + shorttitle = {The Logic of Collective Action}, + author = {Olson, Mancur}, + date = {1965}, + publisher = {{Harvard University Press}}, + location = {{Cambridge, MA}}, + langid = {english}, + keywords = {Business & Economics / Economics / General}, + file = {/home/nathante/Zotero/storage/6D295U4U/Olson - 1965 - The logic of collective action Public goods and t.pdf} +} + +@article{olzak_ecology_2001, + title = {The Ecology of Tactical Overlap}, + author = {Olzak, Susan and Uhrig, S. C. Noah}, + date = {2001-10}, + journaltitle = {American Sociological Review}, + volume = {66}, + number = {5}, + eprint = {3088954}, + eprinttype = {jstor}, + pages = {694}, + issn = {00031224}, + keywords = {uses overlap for density}, + file = {/home/nathante/Zotero/storage/23WSU752/3088954.pdf} +} + +@article{opp_modeling_2011-1, + title = {Modeling {{Micro}}-{{Macro Relationships}}: Problems and {{Solutions}}}, + shorttitle = {Modeling {{Micro}}-{{Macro Relationships}}}, + author = {Opp, Karl-Dieter}, + date = {2011-01-25}, + journaltitle = {The Journal of Mathematical Sociology}, + volume = {35}, + number = {1-3}, + pages = {209--234}, + issn = {0022-250X, 1545-5874}, + abstract = {This article discusses several problems of the micro-macro model, as it is depicted in its simplest form as the Coleman scheme. There is a macroproposition, its independent variables have causal effects on independent variables of a microtheory, and the dependent variable of the micro-theory has a causal impact on the dependent variable of the macroproposition. This scheme is used to identify the basic possible problems of micro-macro modeling which are then discussed. Strengths and possible weaknesses of a wide version of the theory of rational action are analyzed. The article further provides a detailed analysis of the relationships between the micro- and macro-level.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/H4UFEE5E/Opp - 2011 - Modeling Micro-Macro Relationships Problems and S.pdf} +} + +@article{oreilly_work_1989, + title = {Work {{Group Demography}}, {{Social Integration}}, and {{Turnover}}}, + author = {O'Reilly, Charles A. and Caldwell, David F. and Barnett, William P.}, + date = {1989}, + journaltitle = {Administrative Science Quarterly}, + volume = {34}, + number = {1}, + eprint = {2392984}, + eprinttype = {jstor}, + pages = {21--37}, + publisher = {{[Sage Publications, Inc., Johnson Graduate School of Management, Cornell University]}}, + issn = {0001-8392}, + abstract = {Using 20 actual work units with 79 respondents, this study explores the relationships among group demography, social integration of the group, and individual turnover. Results suggest that heterogeneity in group tenure is associated with lower levels of group social integration which, in turn, is negatively associated with individual turnover. Models of these effects using individual-level integration measures are not significant. Further, the results suggest that it is the more distant group members who are more likely to leave. Both individual-level and group-level age demography directly affect turnover and are not moderated by social integration. The findings suggest a process by which group demography affects outcomes and support the usefulness of organizational demography for understanding group and individual functioning.} +} + +@inproceedings{orlikowski_learning_1992, + title = {Learning from Notes: Organizational Issues in Groupware Implementation}, + shorttitle = {Learning from {{Notes}}}, + booktitle = {Proceedings of the 1992 {{ACM Conference}} on {{Computer}}-Supported {{Cooperative Work}}}, + author = {Orlikowski, Wanda J.}, + date = {1992}, + series = {{{CSCW}} '92}, + pages = {362--369}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {This paper explores the introduction of groupware into an organization to understand the changes in work practices and social interaction facilitated by the technology. The results suggest that people’s mental models and organizations’ structure and culture significantly influence how groupware is implemented and used. Specifically, in the absence of mental models that stressed its collaborative nature, groupwae was interpreted in terms of familiar personal, stand-alone technologies such as spreadsheets. Further, the culture and structure provided few incentives or norms for cooperating or sharing expertise, hence the groupware on its own was unlikely to engender collaboration. Recognizing the central influence of these cognitive and organizational elements is critical to developers, researchers, and practitioners of groupware.}, + isbn = {978-0-89791-542-7}, + venue = {Toronto, Ontario, Canada}, + keywords = {groupware,implementation,Lotus Notes,organizational factors,Technological Frames}, + file = {/home/nathante/Zotero/storage/VAHU9XE7/Orlikowski - 1992 - Learning from Notes Organizational Issues in Grou.pdf} +} + +@incollection{ostrom_public_1977, + title = {Public Goods and Public Choices}, + booktitle = {Alternatives {{For Delivering Public Services}}: Toward {{Improved Performance}}}, + author = {Ostrom, Vincent and Ostrom, Elinor}, + editor = {Savas, Emanuel S.}, + date = {1977}, + pages = {7--49}, + publisher = {{Westview Press}}, + location = {{Boulder, CO}} +} + +@article{park_human_1936, + title = {Human {{Ecology}}}, + author = {Park, Robert Ezra}, + date = {1936-07-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {42}, + number = {1}, + pages = {1--15}, + issn = {0002-9602}, + abstract = {Human ecology is an attempt to apply to the interrelations of human beings a type of analysis previously applied to the interrelations of plants and animals. The term "symbiosis" describes a type of social relationship that is biotic rather than cultural. This biotic social order comes into existence and is maintained by competition. In plant and animal societies competition is unrestricted by an institutional or moral order. Human society is a consequence and effect of this limitation of the symbiotic social order by the cultural. Different social sciences are concerned with the forms which this limitation of the natural or ecological social order assumes on (1) the economic, (2) the political, and (3) the moral level.}, + file = {/home/nathante/Zotero/storage/CBVGR8RU/Park - 1936 - Human Ecology.pdf;/home/nathante/Zotero/storage/UKMY6VUE/217327.html} +} + +@article{pedregosa_scikit-learn:_2011, + ids = {pedregosa_scikit-learn_2011}, + title = {Scikit-Learn: Machine Learning in Python}, + shorttitle = {Scikit-Learn}, + author = {Pedregosa, Fabian and Varoquaux, Gaël and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, Édouard}, + date = {2011-10}, + journaltitle = {Journal of Machine Learning Research}, + volume = {12}, + number = {85}, + pages = {2825--2830}, + abstract = {Scikit-learn is a Python module integrating a wide range of state-of-the-art machine learning algorithms for medium-scale supervised and unsupervised problems. This package focuses on bringing machine learning to non-specialists using a general-purpose high-level language. Emphasis is put on ease of use, performance, documentation, and API consistency. It has minimal dependencies and is distributed under the simplified BSD license, encouraging its use in both academic and commercial settings. Source code, binaries, and documentation can be downloaded from http://scikit-learn.sourceforge.net.}, + file = {/home/nathante/Zotero/storage/4TQWE3MC/Pedregosa et al_2011_Scikit-learn.pdf;/home/nathante/Zotero/storage/6XS2PM2P/Pedregosa et al. - 2011 - Scikit-learn Machine Learning in Python.pdf} +} + +@book{peters_speaking_1999, + title = {Speaking into the Air: A History of the Idea of Communication}, + shorttitle = {Speaking into the Air}, + author = {Peters, John Durham}, + date = {1999}, + publisher = {{The University of Chicago press}}, + location = {{Chicago; London}}, + isbn = {978-0-226-66277-0}, + langid = {english} +} + +@article{pfaff_var_2008, + title = {{{VAR}}, {{SVAR}} and {{SVEC Models}}: Implementation {{Within R Package}} Vars}, + shorttitle = {{{VAR}}, {{SVAR}} and {{SVEC Models}}}, + author = {Pfaff, Bernhard}, + date = {2008-07-29}, + journaltitle = {Journal of Statistical Software}, + volume = {27}, + number = {1}, + pages = {1--32}, + issn = {1548-7660}, + issue = {1}, + langid = {english}, + file = {/home/nathante/Zotero/storage/RH6KYQN4/Pfaff_2008_VAR, SVAR and SVEC Models.pdf;/home/nathante/Zotero/storage/TV4UBIR4/v027i04.html} +} + +@article{pfeil_cultural_2006, + ids = {pfeil_cultural_2006-1}, + title = {Cultural Differences in Collaborative Authoring of Wikipedia}, + author = {Pfeil, Ulrike and Zaphiris, Panayiotis and Ang, Chee Siang}, + date = {2006}, + journaltitle = {Journal of Computer-Mediated Communication}, + volume = {12}, + number = {1}, + pages = {88--113}, + publisher = {{Oxford Academic}}, + issn = {1083-6101}, + abstract = {This article explores the relationship between national culture and computer-mediated communication (CMC) in Wikipedia. The articles on the topic game from the French, German, Japanese, and Dutch Wikipedia websites were studied using content analysis methods. Correlations were investigated between patterns of contributions and the four dimensions of cultural influences proposed by Hofstede (Power Distance, Collectivism versus Individualism, Femininity versus Masculinity, and Uncertainty Avoidance). The analysis revealed cultural differences in the style of contributions across the cultures investigated, some of which are correlated with the dimensions identified by Hofstede. These findings suggest that cultural differences that are observed in the physical world also exist in the virtual world.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/25UVU6KP/Pfeil et al. - 2006 - Cultural Differences in Collaborative Authoring of.pdf;/home/nathante/Zotero/storage/HTBSK98G/Pfeil et al. - 2006 - Cultural differences in collaborative authoring of.pdf;/home/nathante/Zotero/storage/NG42CGVS/4582988.html;/home/nathante/Zotero/storage/NN9FT3QC/4582988.html} +} + +@article{piantadosi_ecological_1988, + title = {The Ecological Fallacy}, + author = {Piantadosi, Steven and Byar, David P and Green, Sylvan B}, + date = {1988}, + journaltitle = {American Journal of Epidemiology}, + volume = {127}, + pages = {893--904}, + langid = {english}, + file = {/home/nathante/Zotero/storage/2UZWZ4L5/Piantadosi et al. - THE ECOLOGICAL FALLACY.pdf} +} + +@online{piccardi_value_2021, + title = {On the {{Value}} of {{Wikipedia}} as a {{Gateway}} to the {{Web}}}, + author = {Piccardi, Tiziano and Redi, Miriam and Colavizza, Giovanni and West, Robert}, + date = {2021-02-15}, + eprint = {2102.07385}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {By linking to external websites, Wikipedia can act as a gateway to the Web. To date, however, little is known about the amount of traffic generated by Wikipedia’s external links. We fill this gap in a detailed analysis of usage logs gathered from Wikipedia users’ client devices. Our analysis proceeds in three steps: First, we quantify the level of engagement with external links, finding that, in one month, English Wikipedia generated 43M clicks to external websites, in roughly even parts via links in infoboxes, cited references, and article bodies. Official links listed in infoboxes have by far the highest click-through rate (CTR), 2.47\% on average. In particular, official links associated with articles about businesses, educational institutions, and websites have the highest CTR, whereas official links associated with articles about geographical content, television, and music have the lowest CTR. Second, we investigate patterns of engagement with external links, finding that Wikipedia frequently serves as a stepping stone between search engines and third-party websites, effectively fulfilling information needs that search engines do not meet. Third, we quantify the hypothetical economic value of the clicks received by external websites from English Wikipedia, by estimating that the respective website owners would need to pay a total of \$7–13 million per month to obtain the same volume of traffic via sponsored search. Overall, these findings shed light on Wikipedia’s role not only as an important source of information, but also as a high-traffic gateway to the broader Web ecosystem.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computers and Society,Computer Science - Digital Libraries}, + file = {/home/nathante/Zotero/storage/5WJ3IXV7/Piccardi et al. - 2021 - On the Value of Wikipedia as a Gateway to the Web.pdf} +} + +@article{pikovsky_reconstruction_2016, + title = {Reconstruction of a Neural Network from a Time Series of Firing Rates}, + author = {Pikovsky, A.}, + date = {2016-06-20}, + journaltitle = {Physical Review E}, + shortjournal = {Phys. Rev. E}, + volume = {93}, + number = {6}, + pages = {062313}, + abstract = {Randomly coupled neural fields demonstrate irregular variation of firing rates, if the coupling is strong enough, as has been shown by [Phys. Rev. Lett. 61, 259 (1988)]. We present a method for reconstruction of the coupling matrix from a time series of irregular firing rates. The approach is based on the particular property of the nonlinearity in the coupling, as the latter is determined by a sigmoidal gain function. We demonstrate that for a large enough data set and a small measurement noise, the method gives an accurate estimation of the coupling matrix and of other parameters of the system, including the gain function.}, + file = {/home/nathante/Zotero/storage/HJJ6V4F9/Pikovsky - 2016 - Reconstruction of a neural network from a time ser.pdf;/home/nathante/Zotero/storage/QFCBD7F5/PhysRevE.93.html} +} + +@article{pontikes_ecology_2014, + title = {An {{Ecology}} of {{Social Categories}}}, + author = {Pontikes, Elizabeth and Hannan, Michael}, + date = {2014}, + journaltitle = {Sociological Science}, + volume = {1}, + pages = {311--343}, + issn = {23306696}, + abstract = {This article proposes that meaningful social classification emerges from an ecological dynamic that operates in two planes: feature space and label space. It takes a dynamic view of classification, allowing objects’ movements in both spaces to change the meaning of social categories. The first part of the theory argues that agents assign labels to objects based on perceptions of their similarities to existing members of a category. The second part of the theory shows that an object’s perceived similarity to members of other categories reduces its typicality in a focal category. This means that for categories with a high degree of overlap with other categories in label space (lenient categories), the link between feature-based similarities and labeling weakens. The findings suggest that social classification will likely evolve to contain both constraining and lenient categories. The theory implies that this process is self-reinforcing, so that constraining categories become more constraining, whereas lenient categories become more lenient.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/XPRTHWKT/Pontikes and Hannan - 2014 - An Ecology of Social Categories.pdf} +} + +@article{poor_computer_2014, + title = {Computer Game Modders’ Motivations and Sense of Community: A Mixed-Methods Approach}, + shorttitle = {Computer Game Modders’ Motivations and Sense of Community}, + author = {Poor, Nathaniel}, + date = {2014-12-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {16}, + number = {8}, + pages = {1249--1267}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Computer game modding, from modifying, combines several important issues: digital skills, play, community, making, and remixing. Yet, little academic work has explored the motivations and sense of community that modders have. This study is the first quantitative survey of game modders, and combines quantitative survey data with qualitative interview material. Findings suggest that modders are both old and young, mod more than one game or game series, have a strong sense of community, and enjoy helping others. Many respondents had contributed to other mods or had co-authored mods, and modding communities may function as online collaboratories. Although some research stresses how modders hope to get jobs in the gaming industry, overall the industry was not a motivator for most respondents.}, + langid = {english}, + keywords = {Collaboration,games,modding,motivation,online community}, + file = {/home/nathante/Zotero/storage/SY3IWUL2/Poor - 2014 - Computer game modders’ motivations and sense of co.pdf} +} + +@article{poor_mechanisms_2005, + title = {Mechanisms of an {{Online Public Sphere}}: The {{Website Slashdot}}}, + shorttitle = {Mechanisms of an {{Online Public Sphere}}}, + author = {Poor, Nathaniel}, + date = {2005-01-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {Journal of Computer-Mediated Communication}, + volume = {10}, + issn = {1083-6101}, + abstract = {Both the theory of the public sphere and the utopian rhetoric surrounding the Internet have been a focus of scholars for some time. Given the ability of people to connect with others around the globe through the Internet, could the Internet give rise to online public spheres? If so, how would such spaces work? This article proposes that public spheres do exist on the Internet, and details how one functions. The case under study is the website Slashdot (http://slashdot.org), an online community of computer enthusiasts. The article studies the mechanisms, both normative and in code, that are vital to Slashdot's functioning, and shows how they help Slashdot function as a public sphere.}, + issue = {JCMC1028}, + file = {/home/nathante/Zotero/storage/5V4CJ2HJ/4614448.html} +} + +@article{popielarz_edge_1995, + title = {On the {{Edge}} or {{In Between}}: Niche {{Position}}, {{Niche Overlap}}, and the {{Duration}} of {{Voluntary Association Memberships}}}, + shorttitle = {On the {{Edge}} or {{In Between}}}, + author = {Popielarz, Pamela A. and McPherson, J. Miller}, + date = {1995-11-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {101}, + number = {3}, + pages = {698--720}, + issn = {0002-9602}, + abstract = {This paper aims to explain a major barrier to societal integration: the remarkable homogeneity of voluntary associations. The explanation derives from an ecological theory of voluntary affiliation that asserts that organizations compete for members in a property space defined by the sociodemographic characteristics of members. Voluntary organizations lose fastest those members who are either atypical of the group (the niche edge hypothesis) or subject to competition from other groups (the niche overlap hypotheis). The authors analyze an event-history data set, generated by the life-history calendar approach, of 2,813 voluntary association membership pells. The results, which strongly support both the niche edge and niche overlap hypotheses, substantiate the competitive ecological model of group structure.}, + file = {/home/nathante/Zotero/storage/6FLG9VFY/Popielarz and McPherson - 1995 - On the Edge or In Between Niche Position, Niche O.pdf;/home/nathante/Zotero/storage/B82LWTGA/230757.html} +} + +@article{poteete_heterogeneity_2004, + title = {Heterogeneity, {{Group Size}} and {{Collective Action}}: The {{Role}} of {{Institutions}} in {{Forest Management}}}, + shorttitle = {Heterogeneity, {{Group Size}} and {{Collective Action}}}, + author = {Poteete, Amy R. and Ostrom, Elinor}, + date = {2004}, + journaltitle = {Development and Change}, + volume = {35}, + number = {3}, + pages = {435--461}, + issn = {1467-7660}, + abstract = {Collective action for sustainable management among resource-dependent populations has important policy implications. Despite considerable progress in identifying factors that affect the prospects for collective action, no consensus exists about the role played by heterogeneity and size of group. The debate continues in part because of a lack of uniform conceptualization of these factors, the existence of non-linear relationships, and the mediating role played by institutions. This article draws on research by scholars in the International Forestry Resources and Institutions (IFRI) research network which demonstrates that some forms of heterogeneity do not negatively affect some forms of collective action. More importantly, IFRI research draws out the interrelations among group size, heterogeneity, and institutions. Institutions can affect the level of heterogeneity or compensate for it. Group size appears to have a non-linear relationship to at least some forms of collective action. Moreover, group size may be as much an indicator of institutional success as a precondition for such success.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/MVD6QER6/Poteete and Ostrom - 2004 - Heterogeneity, Group Size and Collective Action T.pdf} +} + +@article{powell_network_2005, + title = {Network {{Dynamics}} and {{Field Evolution}}: The {{Growth}} of {{Interorganizational Collaboration}} in the {{Life Sciences}}}, + shorttitle = {Network {{Dynamics}} and {{Field Evolution}}}, + author = {Powell, Walter W. and White, Douglas R. and Koput, Kenneth W. and Owen‐Smith, Jason}, + date = {2005-01-01}, + journaltitle = {American Journal of Sociology}, + shortjournal = {American Journal of Sociology}, + volume = {110}, + number = {4}, + pages = {1132--1205}, + issn = {0002-9602}, + abstract = {A recursive analysis of network and institutional evolution is offered to account for the decentralized structure of the commercial field of the life sciences. Four alternative logics of attachment—accumulative advantage, homophily, follow‐the‐trend, and multiconnectivity—are tested to explain the structure and dynamics of interorganizational collaboration in biotechnology. Using multiple novel methods, the authors demonstrate how different rules for affiliation shape network evolution. Commercialization strategies pursued by early corporate entrants are supplanted by universities, research institutes, venture capital, and small firms. As organizations increase their collaborative activities and diversify their ties to others, cohesive subnetworks form, characterized by multiple, independent pathways. These structural components, in turn, condition the choices and opportunities available to members of a field, thereby reinforcing an attachment logic based on differential connections to diverse partners.}, + file = {/home/nathante/Zotero/storage/EF4XB53L/Powell et al. - 2005 - Network Dynamics and Field Evolution The Growth o.pdf;/home/nathante/Zotero/storage/LHDCZSJ8/Powell et al. - 2005 - Network Dynamics and Field Evolution The Growth o.pdf;/home/nathante/Zotero/storage/DMFDV96J/421508.html;/home/nathante/Zotero/storage/IA9J8P9S/421508.html} +} + +@inproceedings{raban_empirical_2010, + title = {An Empirical Study of Critical Mass and Online Community Survival}, + booktitle = {Proceedings of the 2010 {{ACM}} Conference on {{Computer}} Supported Cooperative Work}, + author = {Raban, Daphne R. and Moldovan, Mihai and Jones, Quentin}, + date = {2010-02-06}, + series = {{{CSCW}} '10}, + pages = {71--80}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {There is general consensus that critical mass at inception ensures the sustained success of online communities. However, no clear understanding of what constitutes such a 'critical mass' exists and too few quantitative studies have been conducted into the relationship between initial online community interaction and its longer term success to draw any conclusions. In this paper we start to address this gap through a large-scale study of the relationship between IRC chat channel survival and initial chat channel community interactions. A sample 282 chat channel births was used for survival analysis which explored the relationship between the overall user activity in each channel at its inception and the channel's life expectancy. Significant relationships were observed between online community lifespan and critical mass measures: 1) message volume, 2) user population heterogeneity and 3) production functions. The results lend support to the Critical Mass Theory of collective action.}, + isbn = {978-1-60558-795-0}, + keywords = {chat,computer-mediated communication,critical mass,irc,online community system design,synchronous communication}, + file = {/home/nathante/Zotero/storage/3R624RH8/Raban et al. - 2010 - An empirical study of critical mass and online com.pdf;/home/nathante/Zotero/storage/FWUBVMVX/Raban et al. - 2010 - An Empirical Study of Critical Mass and Online Com.pdf;/home/nathante/Zotero/storage/R3HYF2TL/Raban et al. - 2010 - An Empirical Study of Critical Mass and Online Com.pdf} +} + +@book{rankin_official_2009, + title = {The Official {{Ubuntu}} Server Book}, + author = {Rankin, Kyle and Hill, Benjamin Mako}, + date = {2009}, + publisher = {{Prentice Hall}}, + location = {{Upper Saddle River, NJ}}, + isbn = {978-0-13-702118-5}, + langid = {english}, + annotation = {OCLC: 1001929364} +} + +@article{ransbotham_membership_2011, + title = {Membership Turnover and Collaboration Success in Online Communities: Explaining Rises and Falls from Grace in {{Wikipedia}}}, + shorttitle = {Membership Turnover and Collaboration Success in Online Communities}, + author = {Ransbotham, Sam and Kane, Gerald C.}, + date = {2011}, + journaltitle = {MIS Quarterly}, + volume = {35}, + number = {3}, + pages = {613}, + file = {/home/nathante/Zotero/storage/76S4J3K6/8.html} +} + +@article{ratkiewicz_characterizing_2010, + ids = {ratkiewicz_characterizing_2010-1}, + title = {Characterizing and {{Modeling}} the {{Dynamics}} of {{Online Popularity}}}, + author = {Ratkiewicz, Jacob and Fortunato, Santo and Flammini, Alessandro and Menczer, Filippo and Vespignani, Alessandro}, + date = {2010}, + journaltitle = {Physical Review Letters}, + shortjournal = {Phys. Rev. Lett.}, + volume = {105}, + number = {15}, + publisher = {{American Physical Society}}, + issn = {0031-9007, 1079-7114}, + langid = {english}, + file = {/home/nathante/Zotero/storage/FVI92CW6/Ratkiewicz et al_2010_Characterizing and Modeling the Dynamics of Online Popularity.pdf;/home/nathante/Zotero/storage/K9SB2PSJ/Ratkiewicz et al. - 2010 - Characterizing and Modeling the Dynamics of Online.pdf;/home/nathante/Zotero/storage/E96VF6G5/PhysRevLett.105.html} +} + +@incollection{resnick_starting_2012, + title = {Starting New Online Communities}, + booktitle = {Building Successful Online Communities: Evidence-Based Social Design}, + author = {Resnick, Paul and Konstan, Joseph and Chen, Yan and Kraut, Robert E}, + date = {2012}, + pages = {231--280}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-29831-5}, + file = {/home/nathante/Zotero/storage/GFUVQWNN/06-Resnick10-Startup-current.pdf} +} + +@article{ridgeway_status_1982, + title = {Status in {{Groups}}: The {{Importance}} of {{Motivation}}}, + shorttitle = {Status in {{Groups}}}, + author = {Ridgeway, Cecilia L.}, + date = {1982}, + journaltitle = {American Sociological Review}, + volume = {47}, + number = {1}, + eprint = {2095043}, + eprinttype = {jstor}, + pages = {76--88}, + issn = {0003-1224}, + abstract = {This paper presents evidence that members' perceived motivation towards the group is an important determinant of the influence and status they attain in task-oriented groups. Following Meeker and Weitzel-O'Neill (1977) and Ridgeway (1978), it was suggested that people who enter a group with low external status characteristics (e.g., women in mixed sex groups, blacks in interracial groups) can use the communication of group-oriented motivation in combination with reasonably competent task contributions to overcome the fundamental inequality ("interaction disability") they would normally face, and achieve reasonably high levels of influence in the group. Results of an experiment using mixed and same sex groups showed that while group-oriented members are generally more influential than self-oriented ones, as predicted, the size of motivation's effect is dependent upon the member's external status characteristics. Females in male groups (low external status members) achieved fairly high influence and status when they appeared group-oriented, but very low status when self-oriented. As expected males in a female group (high external status members) achieved high influence regardless of their motivation.}, + file = {/home/nathante/Zotero/storage/F5GJIJMB/Ridgeway-1982-Status_in_groups.pdf} +} + +@book{ridgeway_status_2019, + title = {Status: Why Is It Everywhere? Why Does It Matter?}, + shorttitle = {Status}, + author = {Ridgeway, Cecilia L}, + date = {2019}, + abstract = {"Status is ubiquitous in modern life, yet our understanding of its role as a basic driver of inequality is surprisingly limited. In Status, sociologist and social psychologist Cecilia Ridgeway examines how this ancient and universal form of inequality influences today's ostensibly meritocratic institutions and why it matters. Ridgeway illuminates the complex ways in which status arises when people work together towards common goals, such as in classroom discussions, family decisions, or workplace deliberations. Ridgeway's research on status has important implications for our understanding of social inequality. Distinct from power or wealth, status is prized because it provides affirmation from others and affords access to valuable resources. Ridgeway demonstrates how the conferral of status inevitably leads to differing life outcomes for individuals, with impacts on pay, wealth creation, and health and wellbeing. Status beliefs are widely held views about who is better in society than others in terms of esteem, wealth, or competence. These beliefs ultimately confer advantages which can exacerbate social inequality. Ridgeway notes that status advantages based on race, gender, and class, such as the belief that white men are more competent than others because of their race and gender, have the greatest consequences for inequality by affording greater social and economic opportunities. Ridgeway argues that status beliefs make lower status groups less likely to challenge the status quo and greatly enhance higher status groups' ability to maintain their advantages in resources and access to positions of power. She illustrates how many lower status people, when given a baseline level of dignity and respect - being seen, for example, as poor but hardworking - will accept their lower status. She also shows that people remain willfully blind to status beliefs and their effects because recognizing them can lead to emotional discomfort. Acknowledging the insidious role of status in our lives would require many higher-status individuals to accept that they may not have succeeded based on their own merit; and many lower-status individuals would have to acknowledge that they may have been discriminated against. While Ridgeway notes the profound impact of status on society, she suggests that social inequality is not an inevitable consequence of our status beliefs. She shows how status beliefs can be undermined - as when we reject the idea that all racial and gender traits are fixed at birth, thus disrupting the idea that women and people of color are less competent than their male and white counterparts. Ridgeway both notes the profound impact of status on social inequality and charts a way forward that may allow it to have a less detrimental impact on our lives"--}, + isbn = {978-1-61044-889-5}, + langid = {english}, + annotation = {OCLC: 1104214327}, + file = {/home/nathante/Zotero/storage/ZNCJF4F3/Ridgeway_2019_Status.pdf} +} + +@article{ridings_antecedents_2002, + title = {Some Antecedents and Effects of Trust in Virtual Communities}, + author = {Ridings, Catherine M and Gefen, David and Arinze, Bay}, + date = {2002-12-01}, + journaltitle = {The Journal of Strategic Information Systems}, + shortjournal = {The Journal of Strategic Information Systems}, + volume = {11}, + number = {3}, + pages = {271--295}, + issn = {0963-8687}, + abstract = {This study explores several downstream effects of trust in virtual communities and the antecedents of trust in this unique type of environment. The data, applying an existing scale to measure two dimensions of trust (ability and benevolence/integrity), show that trust had a downstream effect on members' intentions to both give information and get information through the virtual community. Both these apparent dimensions of trust were increased through perceived responsive relationships in the virtual community, by a general disposition to trust, and by the belief that others confide personal information.}, + langid = {english}, + keywords = {Perceived responsiveness,Trust,Virtual communities}, + file = {/home/nathante/Zotero/storage/KLVEHLMR/S0963868702000215.html} +} + +@article{ridings_virtual_2004, + ids = {ridings_virtual_2004-1}, + title = {Virtual {{Community Attraction}}: Why {{People Hang}} out {{Online}}}, + shorttitle = {Virtual {{Community Attraction}}}, + author = {Ridings, Catherine M. and Gefen, David}, + date = {2004-11-01}, + journaltitle = {Journal of Computer-Mediated Communication}, + shortjournal = {J Comput Mediat Commun}, + volume = {10}, + number = {1}, + abstract = {Abstract. Understanding the attraction of virtual communities is crucial to organizations that want to tap into their enormous information potential. Existing}, + langid = {english}, + file = {/home/nathante/Zotero/storage/D64A3U6W/4614455.html;/home/nathante/Zotero/storage/NFKKWKZN/4614455.html} +} + +@incollection{robert_crowd_2015, + title = {Crowd {{Size}}, {{Diversity}} and {{Performance}}}, + booktitle = {Proceedings of the 33rd {{Annual ACM Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Robert, Lionel and Romero, Daniel M.}, + date = {2015-04-18}, + pages = {1379--1382}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Crowds are increasingly being adopted to solve complex problems. Size and diversity are two key characteristics of crowds; however their relationship to performance is often paradoxical. To better understand the effects of crowd size and diversity on crowd performance we conducted a study on the quality of 4,317 articles in the WikiProject Film community. The results of our study suggest that crowd size leads to better performance when crowds are more diverse. However, there is a break-even point -- smaller, less diverse crowds can outperform more diverse crowds of similar size. Our results offer new insights into the effects of size and diversity on the performance of crowds.}, + isbn = {978-1-4503-3145-6}, + keywords = {diversity,performance,team size,wikipedia}, + file = {/home/nathante/Zotero/storage/KVVXJ4WP/Robert and Romero - 2015 - Crowd Size, Diversity and Performance.pdf} +} + +@inproceedings{roberts_structural_2013, + title = {The {{Structural Topic Model}} and {{Applied Social Science}}}, + booktitle = {2013 {{Workshop}} on {{Topic Models}}: Computation, {{Application}}, and {{Evaluation}}.}, + author = {Roberts, Margaret E and Tingley, Dustin and Stewart, Brandon M and Airoldi, Edoardo M}, + date = {2013}, + pages = {4}, + abstract = {We develop the Structural Topic Model which provides a general way to incorporate corpus structure or document metadata into the standard topic model. Document-level covariates enter the model through a simple generalized linear model framework in the prior distributions controlling either topical prevalence or topical content. We demonstrate the model’s use in two applied problems: the analysis of open-ended responses in a survey experiment about immigration policy, and understanding differing media coverage of China’s rise.}, + eventtitle = {Advances in {{Neural Information Processing Systems}}}, + langid = {english}, + file = {/home/nathante/Zotero/storage/3RKHWAPT/Roberts et al. - The Structural Topic Model and Applied Social Scie.pdf} +} + +@article{robinson_ecological_1950, + title = {Ecological {{Correlations}} and the {{Behavior}} of {{Individuals}}}, + author = {Robinson, W. S.}, + date = {1950}, + journaltitle = {American Sociological Review}, + volume = {15}, + number = {3}, + eprint = {2087176}, + eprinttype = {jstor}, + pages = {351--357}, + publisher = {{[American Sociological Association, Sage Publications, Inc.]}}, + issn = {0003-1224}, + file = {/home/nathante/Zotero/storage/8SXZCILH/Robinson_1950_Ecological Correlations and the Behavior of Individuals.pdf} +} + +@book{rogers_diffusion_1962, + title = {Diffusion of {{Innovations}}}, + author = {Rogers, Everett M.}, + date = {1962}, + publisher = {{The Free Press of Glencoe}}, + location = {{New York, NY}} +} + +@article{romer_endogenous_1990, + ids = {romer_endogenous_nodate}, + title = {Endogenous {{Technological Change}}}, + author = {Romer, Paul M.}, + date = {1990-10-01}, + journaltitle = {Journal of Political Economy}, + shortjournal = {Journal of Political Economy}, + volume = {98}, + pages = {S71-S102}, + publisher = {{The University of Chicago Press}}, + issn = {0022-3808}, + abstract = {Growth in this model is driven by technological change that arises from intentional investment decisions made by profit-maximizing agents. The distinguishing feature of the technology as an input is that it is neither a conventional good nor a public good; it is a nonrival, partially excludable good. Because of the nonconvexity introduced by a nonrival good, price-taking competition cannot be supported. Instead, the equilibrium is one with monopolistic competition. The main conclusions are that the stock of human capital determines the rate of growth, that too little human capital is devoted to research in equilibrium, that integration into world markets will increase growth rates, and that having a large population is not sufficient to generate growth.}, + issue = {5, Part 2}, + file = {/home/nathante/Zotero/storage/7P2Z89NB/Romer - Endogenous Technological Change.pdf;/home/nathante/Zotero/storage/LWDU35L4/Romer_1990_Endogenous Technological Change.pdf;/home/nathante/Zotero/storage/ZGZ7ARQX/261725.html} +} + +@article{roughgarden_competition_1983, + title = {Competition and {{Theory}} in {{Community Ecology}}}, + author = {Roughgarden, Jonathan}, + date = {1983-11-01}, + journaltitle = {The American Naturalist}, + shortjournal = {The American Naturalist}, + volume = {122}, + number = {5}, + pages = {583--601}, + publisher = {{The University of Chicago Press}}, + issn = {0003-0147}, + file = {/home/nathante/Zotero/storage/GTX2ZMUV/Roughgarden_1983_Competition and Theory in Community Ecology.pdf;/home/nathante/Zotero/storage/KW74SQ2C/284160.html} +} + +@article{rousseeuw_silhouettes_1987, + title = {Silhouettes: A Graphical Aid to the Interpretation and Validation of Cluster Analysis}, + shorttitle = {Silhouettes}, + author = {Rousseeuw, Peter J.}, + date = {1987-11-01}, + journaltitle = {Journal of Computational and Applied Mathematics}, + shortjournal = {Journal of Computational and Applied Mathematics}, + volume = {20}, + pages = {53--65}, + issn = {0377-0427}, + abstract = {A new graphical display is proposed for partitioning techniques. Each cluster is represented by a so-called silhouette, which is based on the comparison of its tightness and separation. This silhouette shows which objects lie well within their cluster, and which ones are merely somewhere in between clusters. The entire clustering is displayed by combining the silhouettes into a single plot, allowing an appreciation of the relative quality of the clusters and an overview of the data configuration. The average silhouette width provides an evaluation of clustering validity, and might be used to select an ‘appropriate’ number of clusters.}, + langid = {english}, + keywords = {classification,cluster analysis,clustering validity,Graphical display}, + file = {/home/nathante/Zotero/storage/FP4RLR43/Rousseeuw_1987_Silhouettes.pdf;/home/nathante/Zotero/storage/SPBGRW8Q/0377042787901257.html} +} + +@article{ruef_credit_2009, + title = {Credit and {{Classification}}: The {{Impact}} of {{Industry Boundaries}} in {{Nineteenth}}-{{Century America}}}, + shorttitle = {Credit and {{Classification}}}, + author = {Ruef, Martin and Patterson, Kelly}, + date = {2009-09-01}, + journaltitle = {Administrative Science Quarterly}, + shortjournal = {Administrative Science Quarterly}, + volume = {54}, + number = {3}, + pages = {486--520}, + issn = {0001-8392}, + abstract = {In this article, we examine how issues of multi-category membership (hybridity) were handled during the evolution of one of the first general systems of industrial classification in the United States, the credit rating schema of R. G. Dun and Company. Drawing on a repeated cross-sectional study of credit evaluations during the postbellum period (1870–1900), our empirical analyses suggest that organizational membership in multiple categories need not be problematic when classification systems themselves are emergent or in flux and when organizations avoid rare combinations or identities involving ambiguous components. As Dun's schema became institutionalized, boundaries between industries were more clearly defined and boundary violations became subject to increased attention and penalty by credit reporters. Our perspective highlights the utility of an evolutionary perspective and tests its implications for the salience of distinct mechanisms of hybridity.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/6P8JPZX3/Ruef and Patterson - 2009 - Credit and Classification The Impact of Industry .pdf} +} + +@article{ruef_emergence_2000, + title = {The {{Emergence}} of {{Organizational Forms}}: A {{Community Ecology Approach}}}, + shorttitle = {The {{Emergence}} of {{Organizational Forms}}}, + author = {Ruef, Martin}, + date = {2000-11-01}, + journaltitle = {American Journal of Sociology}, + volume = {106}, + number = {3}, + pages = {658--714}, + publisher = {{The University of Chicago Press}}, + issn = {0002-9602}, + abstract = {This article introduces a new ecological approach to the study of form emergence based on the notion of an organizational community—a bounded set of forms with related identities. Applying the approach to 48 organizational forms in the health care sector, this study suggests that the development of novel forms is affected by the positioning of their identities with respect to existing form identities in the community, by the aggregate density and size of organizations matching those existing identities, and by the amount of attention directed at identity attributes by sector participants. Findings show that the process of form emergence is subject to population‐dependent effects akin to those noted previously for organizational entries within established populations. The aggregate density and size of organizations with similar identities increase the probability of form emergence to a point (cross‐form legitimation), but highly saturated regions of the identity space tend to be uninviting to new forms (cross‐form competition).}, + file = {/home/nathante/Zotero/storage/X6KXYEI5/Ruef - 2000 - The Emergence of Organizational Forms A Community.pdf;/home/nathante/Zotero/storage/NHGAJDIR/318963.html} +} + +@article{ruef_structure_2003, + title = {The {{Structure}} of {{Founding Teams}}: Homophily, {{Strong Ties}}, and {{Isolation}} among {{U}}.{{S}}. {{Entrepreneurs}}}, + shorttitle = {The {{Structure}} of {{Founding Teams}}}, + author = {Ruef, Martin and Aldrich, Howard E. and Carter, Nancy M.}, + date = {2003}, + journaltitle = {American Sociological Review}, + shortjournal = {American Sociological Review}, + volume = {68}, + number = {2}, + eprint = {1519766}, + eprinttype = {jstor}, + pages = {195--222}, + issn = {0003-1224}, + abstract = {The mechanisms governing the composition of formal social groups (e.g., task groups, organizational founding teams) remain poorly understood, owing to (1) a lack of representative sampling from groups found in the general population, (2) a "success" bias among researchers that leads them to consider only those groups that actually emerge and survive, and (3) a restrictive focus on some theorized mechanisms of group composition (e.g., homophily) to the exclusion of others. These shortcomings are addressed by analyzing a unique, representative data set of organizational founding teams sampled from the U.S. population. Rather than simply considering the properties of those founding teams that are empirically observed, a novel quantitative methodology generates the distribution of all possible teams, based on combinations of individual and relational characteristics. This methodology permits the exploration of five mechanisms of group composition--those based on homophily, functionality, status expectations, network constraint, and ecological constraint. Findings suggest that homophily and network constraints based on strong ties have the most pronounced effect on group composition. Social isolation (i.e., exclusion from a group) is more likely to occur as a result of ecological constraints on the availability of similar alters in a locality than as a result of status-varying membership choices.} +} + +@inproceedings{rusak_properties_2014, + title = {The Properties of {{Twitter}} Network Communications among Teenagers}, + booktitle = {Proceedings of the Companion Publication of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing}, + author = {Rusak, Gili}, + date = {2014-02-15}, + series = {{{CSCW Companion}} '14}, + pages = {233--236}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {We study, quantitatively, for the first time, the traits of Twitter teenager networks. The results are compared with general population users, and show that teenagers behave uniquely. Teens tend to follow more users and increase friendships over time. They tend to friend individuals online who they already know offline. Teenagers also use Twitter as a news media and form supportive and dense communities. These results shed new light on the attributes of teenage communities. We can then utilize these ideas to find solutions to emerging problems involving the massive use of social media. For example, Twitter can be used as a positive tool for the prevention of bad habits among teens.}, + isbn = {978-1-4503-2541-7}, + keywords = {social networks,teenagers,twitter}, + file = {/home/nathante/Zotero/storage/S9RPN7JX/Rusak - 2014 - The properties of Twitter network communications a.pdf} +} + +@book{sabatier_theories_2014, + title = {Theories of the {{Policy Process}}}, + author = {Sabatier, Paul A. and Weible, Christopher M.}, + date = {2014-07-08}, + eprint = {MzkGAwAAQBAJ}, + eprinttype = {googlebooks}, + publisher = {{Avalon Publishing}}, + abstract = {Theories of the Policy Process provides a forum for experts in the most established and widely used theoretical frameworks in policy process research to present the basic propositions, empirical evidence, latest updates, and promising future research opportunities of each framework. This well-regarded volume covers such enduring classics as Multiple Streams (Herweg et al.), Punctuated Equilibrium (Baumgartner et al.), Advocacy Coalition Framework (Jenkins-Smith et al.), Institutional Analysis and Development Framework (Schlager and Cox), and Policy Diffusion (Berry and Berry), as well as two newer theories-Policy Feedback (Mettler and SoRelle) and Narrative Policy Framework (Shanahan et al.).The fourth edition includes discussion of global and comparative perspectives in each theoretical chapter plus a brand-new chapter that explores how these theories have been adapted for, and employed in, non-American and non-Western contexts. An expanded introduction and revised conclusion fully examines and contextualizes the history, trajectories, and functions of public policy research. Since its first publication in 1999, Theories of the Policy Process has been, and remains, the quintessential gateway to the field of policy process research for students, scholars, and practitioners.}, + isbn = {978-0-8133-4927-5}, + langid = {english}, + pagetotal = {433} +} + +@book{sayama_introduction_2015, + title = {Introduction to the {{Modeling}} and {{Analysis}} of {{Complex Systems}}}, + author = {Sayama, Hiroki}, + date = {2015}, + publisher = {{Open SUNY Textbooks, Milne Library}}, + location = {{Geneseo, NY}}, + isbn = {978-1-942341-09-3}, + langid = {english}, + file = {/home/nathante/Zotero/storage/PYSMX3D8/Sayama - Introduction to the Modeling and Analysis of Compl.pdf} +} + +@article{schelling_dynamic_1971, + title = {Dynamic Models of Segregation}, + author = {Schelling, Thomas C.}, + date = {1971-07-01}, + journaltitle = {The Journal of Mathematical Sociology}, + volume = {1}, + number = {2}, + pages = {143--186}, + issn = {0022-250X}, + abstract = {Some segregation results from the practices of organizations, some from specialized communication systems, some from correlation with a variable that is non‐random; and some results from the interplay of individual choices. This is an abstract study of the interactive dynamics of discriminatory individual choices. One model is a simulation in which individual members of two recognizable groups distribute themselves in neighborhoods defined by reference to their own locations. A second model is analytic and deals with compartmented space. A final section applies the analytics to ‘neighborhood tipping.’ The systemic effects are found to be overwhelming: there is no simple correspondence of individual incentive to collective results. Exaggerated separation and patterning result from the dynamics of movement. Inferences about individual motives can usually not be drawn from aggregate patterns. Some unexpected phenomena, like density and vacancy, are generated. A general theory of ‘tipping’ begins to emerge.}, + file = {/home/nathante/Zotero/storage/INC5WT7W/Schelling - 1971 - Dynamic models of segregation.pdf} +} + +@book{schelling_micromotives_1978, + title = {Micromotives and Macrobehavior}, + author = {Schelling, Thomas C.}, + date = {1978}, + publisher = {{WW Norton \& Company}}, + file = {/home/nathante/Zotero/storage/EQX3VVB9/Schelling - Micromotives and Macrobehavior.pdf} +} + +@article{schmidt_taking_1992, + ids = {schmidt_taking_1992-1}, + title = {Taking {{CSCW}} Seriously}, + author = {Schmidt, Kjeld and Bannon, Liam}, + date = {1992-03-01}, + journaltitle = {Computer Supported Cooperative Work (CSCW)}, + shortjournal = {Comput Supported Coop Work}, + volume = {1}, + number = {1-2}, + pages = {7--40}, + issn = {0925-9724, 1573-7551}, + abstract = {The topic of Computer Supported Cooperative Work (CSCW) has attracted much attention in the last few years. While the field is obviously still in the process of development, there is a marked ambiguity about the exact focus of the field. This lack of focus may hinder its further development and lead to its dissipation. In this paper we set out an approach to CSCW as a field of research which we believe provides a coherent conceptual framework for this area, suggesting that it should be concerned with thesupport requirements of cooperative work arrangements. This provides a more principled, comprehensive, and, in our opinion, more useful conception of the field than that provided by the conception of CSCW as being focused on computer support for groups. We then investigate the consequences of taking this alternative conception seriously, in terms of research directions for the field. As an indication of the fruits of this approach, we discuss the concept of ‘articulation work’ and its relevance to CSCW. This raises a host of interesting problems that are marginalized in the work on small group support but critical to the success of CSCW systems ‘in the large’, i. e., that are designed to meet current work requirements in the everyday world.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/HUJ6YDST/Schmidt and Bannon - 1992 - Taking CSCW seriously.pdf;/home/nathante/Zotero/storage/SWILLV5L/Schmidt and Bannon - 1992 - Taking CSCW seriously.pdf;/home/nathante/Zotero/storage/3UWJYFQ4/Schmidt and Bannon - 1992 - Taking CSCW seriously.html} +} + +@article{schoener_resource_1974, + title = {Resource {{Partitioning}} in {{Ecological Communities}}}, + author = {Schoener, Thomas W.}, + date = {1974}, + journaltitle = {Science}, + volume = {185}, + number = {4145}, + eprint = {1738612}, + eprinttype = {jstor}, + pages = {27--39}, + issn = {0036-8075}, + file = {/home/nathante/Zotero/storage/R86IDGJN/1738612.pdf;/home/nathante/Zotero/storage/U4UCJ2BT/Schoener - 1974 - Resource Partitioning in Ecological Communities.pdf} +} + +@book{schweik_internet_2012, + title = {Internet Success: A Study of Open-Source Software Commons}, + shorttitle = {Internet Success}, + author = {Schweik, Charles M. and English, Robert C.}, + date = {2012}, + publisher = {{MIT Press}}, + location = {{Cambridge, MA}}, + isbn = {978-0-262-01725-1}, + pagetotal = {351} +} + +@article{seering_metaphors_2020, + ids = {seering_metaphors_2020-1}, + title = {Metaphors in Moderation}, + author = {Seering, Joseph and Kaufman, Geoff and Chancellor, Stevie}, + date = {2020-10-20}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {1461444820964968}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Volunteer content moderators are essential to the social media ecosystem through the roles they play in managing and supporting online social spaces. Recent work has described moderation primarily as a functional process of actions that moderators take, such as making rules, removing content, and banning users. However, the nuanced ways in which volunteer moderators envision their roles within their communities remain understudied. Informed by insights gained from 79 interviews with volunteer moderators from three platforms, we present a conceptual map of the territory of social roles in volunteer moderation, which identifies five categories with 22 metaphorical variants that reveal moderators’ implicit values and the heuristics that help them make decisions. These metaphors more clearly enunciate the roles volunteer moderators play in the broader social media content moderation apparatus and can drive purposeful engagement with volunteer moderators to better support the ways they guide and shape their communities.}, + langid = {english}, + keywords = {Facebook,governance,metaphors,moderation,online communities,platforms,Reddit,Twitch}, + file = {/home/nathante/Zotero/storage/6NR5XPIH/Seering et al. - 2020 - Metaphors in moderation.pdf;/home/nathante/Zotero/storage/FY8YDBFH/Seering et al. - 2020 - Metaphors in moderation.pdf} +} + +@article{seering_moderator_2019, + title = {Moderator Engagement and Community Development in the Age of Algorithms}, + author = {Seering, Joseph and Wang, Tony and Yoon, Jina and Kaufman, Geoff}, + date = {2019-01-11}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {1461444818821316}, + issn = {1461-4448}, + abstract = {Online communities provide a forum for rich social interaction and identity development for billions of Internet users worldwide. In order to manage these communities, platform owners have increasingly turned to commercial content moderation, which includes both the use of moderation algorithms and the employment of professional moderators, rather than user-driven moderation, to detect and respond to anti-normative behaviors such as harassment and spread of offensive content. We present findings from semi-structured interviews with 56 volunteer moderators of online communities across three platforms (Twitch, Reddit, and Facebook), from which we derived a generalized model categorizing the ways moderators engage with their communities and explaining how these communities develop as a result. This model contains three processes: being and becoming a moderator; moderation tasks, actions, and responses; and rules and community development. In this work, we describe how moderators contribute to the development of meaningful communities, both with and without algorithmic support.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/U8QLP3DK/Seering et al. - 2019 - Moderator engagement and community development in .pdf} +} + +@inproceedings{sengupta_what_2019, + title = {What Are {{Academic Subreddits Talking About}}? A {{Comparative Analysis}} of r/Academia and r/Gradschool}, + shorttitle = {What Are {{Academic Subreddits Talking About}}?}, + booktitle = {Conference {{Companion Publication}} of the 2019 on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {Sengupta, Subhasree}, + date = {2019-11-09}, + series = {{{CSCW}} '19}, + pages = {357--361}, + publisher = {{Association for Computing Machinery}}, + location = {{Austin, TX, USA}}, + abstract = {Graduate school and academia can often be challenging and hard to navigate. This work explores how people are using Reddit to reach out to others in academic subreddits to talk about issues one might face in their academic journey. We also explore how such discussion differs between subreddits by comparing two popularly used academic subreddits: r/gradschool and r/academia. For each subreddit, we investigated 300 posts and 500 comments. Using topic modelling, we identify and distinguish the main emergent types of posts and comments we find in these two subreddits. We find that posts in r/academia center more on the challenging aspects of academia such as plagiarism, working in academia, and mental health, whereas r/gradschool posts deal with more generic issues on graduate school life. However, we find that the way the community reacts and provides support via comments is similar in both subreddits, mostly by providing moral support and solidarity.}, + isbn = {978-1-4503-6692-2}, + file = {/home/nathante/Zotero/storage/K4K3HITN/Sengupta - 2019 - What are Academic Subreddits Talking About A Comp.pdf} +} + +@article{shah_motivation_2006, + title = {Motivation, Governance, and the Viability of Hybrid Forms in Open Source Software Development}, + author = {Shah, Sonali K.}, + date = {2006-07-01}, + journaltitle = {Management Science}, + volume = {52}, + number = {7}, + pages = {1000--1014}, + abstract = {Open source software projects rely on the voluntary efforts of thousands of software developers, yet we know little about why developers choose to participate in this collective development process. This paper inductively derives a framework for understanding participation from the perspective of the individual software developer based on data from two software communities with different governance structures. In both communities, a need for software-related improvements drives initial participation. The majority of participants leave the community once their needs are met, however, a small subset remains involved. For this set of developers, motives evolve over time and participation becomes a hobby. These hobbyists are critical to the long-term viability of the software code: They take on tasks that might otherwise go undone and work to maintain the simplicity and modularity of the code. Governance structures affect this evolution of motives. Implications for firms interested in implementing hybrid strategies designed to combine the advantages of open source software development with proprietary ownership and control are discussed.}, + keywords = {FOSS,Management,To Read}, + file = {/home/nathante/Zotero/storage/9FVVZ6B3/mnsc.1060.pdf;/home/nathante/Zotero/storage/T3DTX9AQ/Shah - 2006 - Motivation, Governance, and the Viability of Hybri.pdf;/home/nathante/Zotero/storage/WAISB3HF/1000.html} +} + +@inproceedings{sharma_studying_2015, + title = {Studying and {{Modeling}} the {{Connection}} between {{People}}'s {{Preferences}} and {{Content Sharing}}}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Sharma, Amit and Cosley, Dan}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {1246--1257}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {People regularly share items using online social media. However, people's decisions around sharing---who shares what to whom and why---are not well understood. We present a user study involving 87 pairs of Facebook users to understand how people make their sharing decisions. We find that even when sharing to a specific individual, people's own preference for an item (individuation) dominates over the recipient's preferences (altruism). People's open-ended responses about how they share, however, indicate that they do try to personalize shares based on the recipient. To explain these contrasting results, we propose a novel process model of sharing that takes into account people's preferences and the salience of an item. We also present encouraging results for a sharing prediction model that incorporates both the senders' and the recipients' preferences. These results suggest improvements to both algorithms that support sharing in social media and to information diffusion models.}, + isbn = {978-1-4503-2922-4}, + keywords = {directed sharing,information diffusion,sharing process,user preferences}, + file = {/home/nathante/Zotero/storage/V4LGES2Z/Sharma and Cosley - 2015 - Studying and Modeling the Connection between Peopl.pdf} +} + +@incollection{shaw_communication_1964, + title = {Communication {{Networks}}}, + booktitle = {Advances in {{Experimental Social Psychology}}}, + author = {Shaw, Marvin E.}, + editor = {Berkowitz, Leonard}, + date = {1964}, + volume = {1}, + pages = {111--147}, + publisher = {{Academic Press}}, + abstract = {The communication network imposed on the group influences its problem-solving efficiency, communication activity, organizational development, and member satisfaction. This chapter provides an overview of the communication networks, methodology employed in the research on communication networks and considers some of the structural properties of these networks, and outlines the major findings of experimental investigations of the effects of networks on group process. The major network difference is between centralized and decentralized networks. The direction and magnitude of the effects are modified by the following variables: kind of task, noise, information distribution, member personality, reinforcement, and the kind of prior experience the members have had in networks. The variable having the most pronounced effect is the kind of task the group must perform. Centralized networks are generally more efficient when the task requires merely the collection of information in one place, and decentralized networks are more efficient when further operations must be performed on the information before the task can be completed. The experiments discussed in the chapter, presents a great deal about the effects of communication networks, but the precise nature of many of the relationships among variables still remains unclear, and needs much clarification, such as network characteristics, kind of task, and group composition. The communication network studies have provided a great deal of information regarding structural effects upon group behavior. However, much more remains to be done.}, + file = {/home/nathante/Zotero/storage/ZTWM2MSC/Shaw - 1964 - Communication Networks.pdf} +} + +@article{shaw_laboratories_2014, + title = {Laboratories of Oligarchy? How the Iron Law Extends to Peer Production}, + shorttitle = {Laboratories of {{Oligarchy}}?}, + author = {Shaw, Aaron and Hill, Benjamin Mako}, + date = {2014}, + journaltitle = {Journal of Communication}, + shortjournal = {J Commun}, + volume = {64}, + number = {2}, + pages = {215--238}, + issn = {1460-2466}, + abstract = {Peer production projects like Wikipedia have inspired voluntary associations, collectives, social movements, and scholars to embrace open online collaboration as a model of democratic organization. However, many peer production projects exhibit entrenched leadership and deep inequalities, suggesting that they may not fulfill democratic ideals. Instead, peer production projects may conform to Robert Michels' “iron law of oligarchy,” which proposes that democratic membership organizations become increasingly oligarchic as they grow. Using exhaustive data of internal processes from a sample of 683 wikis, we construct empirical measures of participation and test for increases in oligarchy associated with growth in wikis' contributor bases. In contrast to previous studies, we find support for Michels' iron law and conclude that peer production entails oligarchic organizational forms.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/GIII687R/Shaw and Hill - 2014 - Laboratories of oligarchy How the iron law extend.pdf;/home/nathante/Zotero/storage/W3846GC6/full.html} +} + +@article{shaw_pipeline_2018, + title = {The {{Pipeline}} of {{Online Participation Inequalities}}: The {{Case}} of {{Wikipedia Editing}}}, + shorttitle = {The {{Pipeline}} of {{Online Participation Inequalities}}}, + author = {Shaw, Aaron and Hargittai, Eszter}, + date = {2018-02-01}, + journaltitle = {Journal of Communication}, + shortjournal = {Journal of Communication}, + volume = {68}, + number = {1}, + pages = {143--168}, + issn = {0021-9916}, + abstract = {Digital inequalities undermine the democratizing potential of the Internet. While many people engage in public discourse through participatory media, knowledge gaps limit engagement in the networked public sphere. Participatory web platforms have unique potential to facilitate a more equitable production of knowledge. This paper conceptualizes a pipeline of online participation and models the awareness and behaviors necessary to become a contributor to the networked public sphere. We test the theory with the case of Wikipedia editing, relying on survey data from a diverse, national sample of U.S. adults. Our findings underscore the multidimensionality of digital inequalities and suggest new pathways toward closing knowledge gaps by highlighting the importance of education and Internet skills for online stratification processes.}, + keywords = {Digital Inequality,Internet & society,Internet Skills,Knowledge Gap,Knowledge gap theory (Communication),online participation,Social participation,Social stratification,Survey Research,wikipedia}, + file = {/home/nathante/Zotero/storage/IIFZGIVP/Shaw and Hargittai - 2018 - The pipeline of online participation inequalities.pdf;/home/nathante/Zotero/storage/NCJPN2PQ/Shaw and Hargittai - 2018 - The Pipeline of Online Participation Inequalities.pdf;/home/nathante/Zotero/storage/8VA8V6VV/Shaw and Hargittai - 2018.html;/home/nathante/Zotero/storage/WAUM42SV/4915319.html} +} + +@article{shen_evolution_2014, + title = {The Evolution of Social Ties Online: A Longitudinal Study in a Massively Multiplayer Online Game}, + shorttitle = {The Evolution of Social Ties Online}, + author = {Shen, Cuihua and Monge, Peter and Williams, Dmitri}, + date = {2014-10-01}, + journaltitle = {Journal of the Association for Information Science and Technology}, + shortjournal = {J Assn Inf Sci Tec}, + volume = {65}, + number = {10}, + pages = {2127--2137}, + issn = {2330-1643}, + abstract = {How do social ties in online worlds evolve over time? This research examined the dynamic processes of relationship formation, maintenance, and demise in a massively multiplayer online game. Drawing from evolutionary and ecological theories of social networks, this study focuses on the impact of three sets of evolutionary factors in the context of social relationships in the online game EverQuest II (EQII): the aging and maturation processes, social architecture of the game, and homophily and proximity. A longitudinal analysis of tie persistence and decay demonstrated the transient nature of social relationships in EQII, but ties became considerably more durable over time. Also, character level similarity, shared guild membership, and geographic proximity were powerful mechanisms in preserving social relationships.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/C4NMGZEB/Shen et al. - 2014 - The evolution of social ties online A longitudina.pdf;/home/nathante/Zotero/storage/MPXNFPXA/Shen et al. - 2014 - The evolution of social ties online A longitudina.pdf;/home/nathante/Zotero/storage/M7ZJXDM5/abstract.html} +} + +@article{shen_virtual_2014-1, + title = {Virtual Brokerage and Closure: Network Structure and Social Capital in a Massively Multiplayer Online Game}, + shorttitle = {Virtual {{Brokerage}} and {{Closure}}}, + author = {Shen, Cuihua and Monge, Peter R. and Williams, Dmitri}, + date = {2014-06-01}, + journaltitle = {Communication Research}, + shortjournal = {Communication Research}, + volume = {41}, + number = {4}, + pages = {459--480}, + issn = {0093-6502}, + abstract = {This study proposes a structural approach to examining online bridging and bonding social capital in a large virtual world. It tests the effects of individual players’ network brokerage and closure on their task performance and trust of other players. Bridging social capital is operationalized as brokerage, the extent to which one is tied to disconnected others, and bonding social capital as closure, the extent to which one is embedded in a densely connected group. Social networks were constructed from behavioral server logs of EverQuest II, a Massively Multiplayer Online Game. Results provided strong support for the structural model, demonstrating that players’ network brokerage positively predicted their task performance in the game and players embedded in closed networks were more likely to trust each other.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/XQX36GBZ/Shen et al. - 2014 - Virtual brokerage and closure Network structure a.pdf} +} + +@article{shi_member_2017-1, + title = {A {{Member Saved Is}} a {{Member Earned}}? The {{Recruitment}}-{{Retention Trade}}-{{Off}} and {{Organizational Strategies}} for {{Membership Growth}}}, + shorttitle = {A {{Member Saved Is}} a {{Member Earned}}?}, + author = {Shi, Yongren and Dokshin, Fedor A. and Genkin, Michael and Brashears, Matthew E.}, + date = {2017-04-01}, + journaltitle = {American Sociological Review}, + shortjournal = {Am Sociol Rev}, + volume = {82}, + number = {2}, + pages = {407--434}, + issn = {0003-1224}, + abstract = {A long line of research documents the essential role of social networks in mediating the recruitment and retention of members in organizations. But organizations also comprise a primary context where people form social ties. We investigate how the network structure an organization creates among its members influences its ability to grow and reproduce. In particular, we propose that two dimensions of organizational strategy influence affiliation dynamics: (1) the extent to which an organization induces social interaction among its members (social encapsulation), and (2) the time and energy that an organization demands of its members (time and energy demand). We examine membership dynamics in an ecology where competitor organizations deploying varied strategies vie for the same pool of members. Results show a curvilinear relationship between membership growth and the rate of social encapsulation. Furthermore, we find that time and energy demand mediates the effect of social encapsulation by shaping its members’ opportunities for maintaining external affiliations. Different opportunity structures result in different levels of network turnover, thus either reinforcing or dissolving intra-organizational ties. For most types of organizations, attaining sustained growth requires a balance between open networks (for recruitment) and network closure (for retention).}, + langid = {english}, + file = {/home/nathante/Zotero/storage/46ZZ3CGY/Shi et al. - 2017 - A Member Saved Is a Member Earned The Recruitment.pdf} +} + +@article{shi_wisdom_2019, + title = {The Wisdom of Polarized Crowds}, + author = {Shi, Feng and Teplitskiy, Misha and Duede, Eamon and Evans, James A.}, + date = {2019-04}, + journaltitle = {Nature Human Behaviour}, + volume = {3}, + number = {4}, + pages = {329}, + issn = {2397-3374}, + abstract = {This article explores the effect of ideological polarization on team performance. By analysing millions of edits to Wikipedia, the authors reveal that politically diverse editor teams produce higher-quality articles than homogeneous or moderate teams, and they identify the mechanisms responsible for producing these superior articles.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/5AJIP7BF/Shi et al_2019_The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/E7S9VG4I/Shi et al. - 2019 - The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/YVYHDNGP/Shi et al_2019_The wisdom of polarized crowds.pdf;/home/nathante/Zotero/storage/BPKFC376/s41562-019-0541-6.html;/home/nathante/Zotero/storage/PTAPHWSK/s41562-019-0541-6.html;/home/nathante/Zotero/storage/RLZLXT6Y/s41562-019-0541-6.html} +} + +@book{shirky_here_2008, + title = {Here Comes Everybody : The Power of Organizing without Organizations}, + author = {Shirky, Clay.}, + date = {2008}, + publisher = {{Penguin Press}}, + location = {{New York, NY}}, + abstract = {An examination of how the rapid spread of new forms of social interaction enabled by technology is changing the way humans form groups and exist within them, with profound long-term economic and social effects--for good and for ill. Our age's new technologies of social networking are evolving, and evolving us, into new groups doing new things in new ways, and old and new groups alike doing the old things better and more easily. Hierarchical structures that exist to manage the work of groups are seeing their raisons d'e\^tre swiftly eroded by the rising tide. Business models are being destroyed, transformed, born at dizzying speeds, and the larger social impact is profound. Clay Shirky is one of our wisest observers of the transformational power of the new forms of tech-enabled social interaction, and this is his reckoning with the ramifications of all this on what we do and who we are.--From publisher description. Discusses and uses examples of how digital networks transform the ability of humans to gather and cooperate with one another.}, + isbn = {978-1-59420-153-0}, + langid = {english}, + keywords = {FOSS,Media Studies}, + file = {/home/nathante/Zotero/storage/DHBTQ79D/shirky-2008.pdf} +} + +@article{siggelkow_temporarily_2003, + title = {Temporarily {{Divide}} to {{Conquer}}: Centralized, {{Decentralized}}, and {{Reintegrated Organizational Approaches}} to {{Exploration}} and {{Adaptation}}}, + shorttitle = {Temporarily {{Divide}} to {{Conquer}}}, + author = {Siggelkow, Nicolaj and Levinthal, Daniel A.}, + date = {2003-12-01}, + journaltitle = {Organization Science}, + volume = {14}, + number = {6}, + pages = {650--669}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {To create a competitive advantage, firms need to find activity configurations that are not only internally consistent, but also appropriate given the firm's current environment. This challenge is particularly acute after firms have experienced an environmental change that has shifted the existing competitive landscape and created new, high-performing sets of activity choices. How should firms organize to explore and search such an altered performance landscape? While it has been noted that adaptive entities need to maintain a balance of exploration and exploitation, little is known about how different organizational structures moderate this balance. With the help of an agentbased simulation model, we study the value of three different organizational structures: a centralized organization, in which decisions are made only at the level of the firm as a whole; a decentralized organization, in which decisions are made independently in two divisions; and a temporarily decentralized firm, which starts out with a decentralized structure and later reintegrates. We find that if interactions among a firm's activities are pervasive, neither the centralized nor the permanently decentralized organizational structure leads to high performance. In this case, temporary decentralization—an organizational structure that has not found much attention in the literature—yields the highest long-term performance. This organizational structure allows the firm both to avoid low-performing activity configurations and to eventually coordinate across its divisions. Thus, even if the decision problem a .rm faces is not fully decomposable, a temporary bifurcation can lead to a higher long-term performance outcome. Initial decentralized exploration is, however, costly in the short run, as compared to centralized exploration. As a result, a tradeoff exists between the short-term costs of decentralized exploration and the longterm benefits of reaching higher performance. As interactions across and within divisions increase, the optimal length of decentralized exploration tends to grow. Paralleling our first result, we further show that even if a decision problem is decomposable, that is, can be perfectly modularized, it can be beneficial to create a temporary decision allocation that creates "unnecessary" interdependencies across the subsystems. This benefit arises in particular when the modules are complex by themselves. In both cases, an initial phase of exploration, enabled by an appropriate organizational structure, followed by refinement and coordination, enabled by a different structure, leads to high performance. To illustrate our general model, we focus on incumbent firms' responses to the Internet and discuss implications for the product design process.}, + keywords = {Activity Systems,Agent-Based Simulations,E-Commerce,Organizational Adaptation,Organizational Design}, + file = {/home/nathante/Zotero/storage/BFSDUBNA/Siggelkow_Levinthal_2003_Temporarily Divide to Conquer.pdf} +} + +@article{simpson_status_2012, + title = {Status {{Hierarchies}} and the {{Organization}} of {{Collective Action}}}, + author = {Simpson, Brent and Willer, Robb and Ridgeway, Cecilia L.}, + date = {2012-09}, + journaltitle = {Sociological Theory}, + volume = {30}, + number = {3}, + pages = {149--166}, + issn = {0735-2751, 1467-9558}, + abstract = {Most work on collective action assumes that group members are undifferentiated by status, or standing, in the group. Yet such undifferentiated groups are rare, if they exist at all. Here we extend an existing sociological research program to address how extant status hierarchies help organize collective actions by coordinating how much and when group members should contribute to group efforts. We outline three theoretically derived predictions of how status hierarchies organize patterns of behavior to produce larger public goods.We review existing evidence relevant to two of the three hypotheses and present results from a preliminary experimental test of the third. Findings are consistent with the model.The tendency of these dynamics to lead status-differentiated groups to produce larger public goods may help explain the ubiquity of hierarchy in groups, despite the often negative effects of status inequalities for many group members.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/WVT6KAAY/Simpson et al. - 2012 - Status Hierarchies and the Organization of Collect.pdf} +} + +@article{sims_macroeconomics_1980, + title = {Macroeconomics and {{Reality}}}, + author = {Sims, Christopher A.}, + date = {1980}, + journaltitle = {Econometrica}, + volume = {48}, + number = {1}, + eprint = {1912017}, + eprinttype = {jstor}, + pages = {1--48}, + issn = {0012-9682}, + abstract = {[Existing strategies for econometric analysis related to macroeconomics are subject to a number of serious objections, some recently formulated, some old. These objections are summarized in this paper, and it is argued that taken together they make it unlikely that macroeconomic models are in fact over identified, as the existing statistical theory usually assumes. The implications of this conclusion are explored, and an example of econometric work in a non-standard style, taking account of the objections to the standard style, is presented.]}, + file = {/home/nathante/Zotero/storage/5L9AKP48/Sims - 1980 - Macroeconomics and Reality.pdf} +} + +@book{singer_applied_2003, + title = {Applied Longitudinal Data Analysis: Modeling Change and Event Occurrence}, + shorttitle = {Applied {{Longitudinal Data Analysis}}}, + author = {Singer, Judith D. and Willett, John B.}, + date = {2003}, + publisher = {{Oxford University Press}}, + location = {{New York, NY}}, + isbn = {0-19-515296-4} +} + +@article{sobre-denton_virtual_2016, + title = {Virtual Intercultural Bridgework: Social Media, Virtual Cosmopolitanism, and Activist Community-Building}, + shorttitle = {Virtual Intercultural Bridgework}, + author = {Sobré-Denton, Miriam}, + date = {2016-09-01}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {18}, + number = {8}, + pages = {1715--1731}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Social media facilitates a global–local orientation to the world that allows individuals to engage in virtual community-building and participate in communication to build global citizenship. This research situates virtual cosmopolitanism in the age of new media and globalization, describing it as a means for trans-local and transnational community-building for social justice movements and activism, including community liaison-building across corporeal borders and boundaries. New media as a site of imagined communities that become larger than their component parts is then analyzed through examining several virtual cosmopolitan communities. The essay concludes with assumptions about the qualities of virtual cosmopolitan communities, and recommendations for how they can facilitate intercultural liaisons for social justice activism and community-building across difference.}, + langid = {english}, + keywords = {Community-building,cosmopolitan solidarity,online activism,social justice,social media,virtual cosmopolitanism}, + file = {/home/nathante/Zotero/storage/Z5D3VAMN/Sobré-Denton - 2016 - Virtual intercultural bridgework Social media, vi.pdf} +} + +@inproceedings{soliman_characterization_2019, + title = {A {{Characterization}} of {{Political Communities}} on {{Reddit}}}, + booktitle = {Proceedings of the 30th {{ACM Conference}} on {{Hypertext}} and {{Social Media}}}, + author = {Soliman, Ahmed and Hafer, Jan and Lemmerich, Florian}, + date = {2019-09-12}, + series = {{{HT}} '19}, + pages = {259--263}, + publisher = {{Association for Computing Machinery}}, + location = {{Hof, Germany}}, + abstract = {The social news aggregator Reddit is among the most popular websites on the internet. Many online users use the platform to anonymously share and discuss (mostly US-centric) political content. In this ongoing work, we perform a comparative large-scale analysis of political subcommunities (subreddits) on Reddit using a dataset of more than 100 million posts from around 5 million users. In particular, we investigate these communities with respect to (1) the content posted, (2) their relationships to other subreddits, and (3) the distribution of attention received in these subcommunities. We find that left-leaning communities use derogatory language less often than right-leaning communities, but are more focused on news sources reflecting their own political leaning. We also observe that right-leaning communities are more interconnected with right-leaning subreddits on European politics. Finally, the attention of individual submissions (as measured by their number of up-votes or comments received) is spread more evenly in right-leaning communities.The social news aggregator Reddit is among the most popular websites on the internet. Many online users use the platform to anonymously share and discuss (mostly US-centric) political content. In this ongoing work, we perform a comparative large-scale analysis of political subcommunities (subreddits) on Reddit using a dataset of more than 100 million posts from around 5 million users. In particular, we investigate these communities with respect to (1) the content posted, (2) their relationships to other subreddits, and (3) the distribution of attention received in these subcommunities. We find that left-leaning communities use derogatory language less often than right-leaning communities, but are more focused on news sources reflecting their own political leaning. We also observe that right-leaning communities are more interconnected with right-leaning subreddits on European politics. Finally, the attention of individual submissions (as measured by their number of up-votes or comments received) is spread more evenly in right-leaning communities.}, + isbn = {978-1-4503-6885-8}, + file = {/home/nathante/Zotero/storage/R2YM5F8X/Soliman et al. - 2019 - A Characterization of Political Communities on Red.pdf} +} + +@inproceedings{solomon_critical_2014, + title = {Critical Mass of What? Exploring Community Growth in {{WikiProjects}}}, + shorttitle = {Critical Mass of What?}, + booktitle = {Proceedings of the {{Eighth International AAAI Conference}} on {{Weblogs}} and {{Social Media}} ({{ICWSM}} '16)}, + author = {Solomon, Jacob and Wash, Rick}, + date = {2014-05-16}, + publisher = {{AAAI}}, + location = {{Palo Alto, CA}}, + abstract = {Fledgling online communities often hope to achieve critical mass so that the community becomes sustainable. This concept however is not well understood. At what point does a community achieve critical mass, and how does the community know this? Furthermore, online communities become sustainable when they achieve a mass of what? We explore this question by analyzing growth in a large number of online communities on Wikipedia. We find that individual communities often have different patterns of growth of membership from its pattern of growth of contribution or production. We also find that in the early stages of community development, building membership has a greater impact on community production and activity in later periods than accumulating many contributions early on, and this is especially true when there is more diversity in the early participants in a community. We also show that participation from a community's "power users" in its early stage is not as valuable to sustainability as the collective contributions of those who make only small contributions. We argue that critical mass is established by developing a diverse set of community members with heterogeneous interests and resources, and not purely by accumulating content.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/HZ22VP6J/Solomon and Wash - 2014 - Critical mass of what Exploring community growth .pdf;/home/nathante/Zotero/storage/GDPU5N7H/8104.html} +} + +@article{sorensen_recruitment-based_2004, + ids = {sorensen_recruitment-based_2004-1}, + title = {Recruitment-Based Competition between Industries: A Community Ecology}, + shorttitle = {Recruitment-Based Competition between Industries}, + author = {Sørensen, Jesper B.}, + date = {2004-02-01}, + journaltitle = {Industrial and Corporate Change}, + shortjournal = {Ind Corp Change}, + volume = {13}, + number = {1}, + pages = {149--170}, + publisher = {{Oxford Academic}}, + issn = {0960-6491}, + abstract = {Abstract. Because entrepreneurs often must recruit labor in order to launch their ventures, the labor market is a potential source of constraint in the entrepr}, + langid = {english}, + file = {/home/nathante/Zotero/storage/Z4KJZUBF/Sorensen - 2004 - Recruitment-based competition between industries .pdf;/home/nathante/Zotero/storage/PHCVIXUJ/707535.html} +} + +@article{soule_competition_2008, + title = {Competition and Resource Partitioning in Three Social Movement Industries}, + author = {Soule, Sarah A. and King, Brayden G.}, + date = {2008-05}, + journaltitle = {The American Journal of Sociology}, + volume = {113}, + number = {6}, + eprint = {25145846}, + eprinttype = {jstor}, + pages = {1568--1610}, + issn = {00029602}, + abstract = {Drawing hypotheses from resource mobilization and resource partitioning theories (RMT and RPT), this article examines how inter-organizational competition and social movement industry (SMI) concentration affect the level of tactical and goal specialization of protest organizations associated with the peace, women's, and environmental movements. Additionally, the article examines how specialization affects the survival of these organizations. By and large, the findings are commensurate with the expectations of RMT and RPT. Results indicate that interorganizational competition leads to more specialized tactical and goal repertoires. Concentration in the SMI also leads to specialization, but this is only true for less established organizations. Results also indicate that tactical and goal specialization decrease organizational survival, unless the industry is highly concentrated.}, + file = {/home/nathante/Zotero/storage/TG4RWD3T/Soule and King - 2008 - Competition and Resource Partitioning in Three Soc.pdf} +} + +@inproceedings{starbird_crowd_2012, + title = {Crowd Computation: Organizing Information during Mass Disruption Events}, + shorttitle = {Crowd Computation}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work Companion}}}, + author = {Starbird, Kate}, + date = {2012}, + series = {{{CSCW}} '12}, + pages = {339--342}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {This research examines large-scale human interaction occurring through social media during times of mass disruption, seeking to understand how the connected crowd acts to organize a flood of data moving through those platforms into useful information resources. The work combines empirical analysis of social media communication, interviews, and participant observation to explore how people work to organize information and how they use social media platforms to organize themselves to do this work. Synthesizing findings from four distinct, yet interrelated studies, this research progresses towards a new conceptualization of the distributed, connected work of organizing information during mass disruption events.}, + isbn = {978-1-4503-1051-2} +} + +@article{stinchcombe_social_1965, + title = {Social Structure and Organizations}, + author = {Stinchcombe, Arthur L}, + date = {1965}, + journaltitle = {Handbook of Organizations}, + shortjournal = {Handbook of Organizations}, + pages = {142--193}, + keywords = {liability of newness}, + file = {/home/nathante/Zotero/storage/SA82QMEE/Stinchcombe_1965_Social structure and organizations.pdf} +} + +@incollection{suchman_supporting_1996, + title = {Supporting {{Articulation Work}}}, + booktitle = {Computerization and {{Controversy}}: Value {{Conflicts}} and {{Social Choices}}}, + author = {Suchman, Lucy}, + editor = {Kling, Rob}, + date = {1996}, + eprint = {9wlN9eOomacC}, + eprinttype = {googlebooks}, + pages = {407--423}, + publisher = {{Morgan Kaufmann}}, + abstract = {The Second Edition of Computerization and Controversy: Value Conflicts and Social Choices is a collection of 78 articles that examine the social aspects of computerization from a variety of perspectives, many presenting important viewpoints not often discussed in the conventional literature. A number of paired articles comprise thought-provoking head-on debate. Fields represented include computer science, information systems, management, journalism, psychology, law, library science, and sociology. This volume introduces some of the major controversies surrounding the computerization of society and helps readers recognize the social processes that drive and shape computerization.Division into eight provocatively titled sections facilitates course planning for classroom or seminar use. A lead article for each section frames the major controversies, locates the selections within the debates, and points to other relevant literature. Features A fully revised and updated version of the first anthological treatment of the subjectOrganized to facilitate course planning for classroom or seminar useProvides coverage of the influence of computers on a wide variety of fields including computer science, information systems, management, journalism, psychology, law, library science, and sociologyIncludes discussion of the following issues related to computerization:Does computerization demonstrably improve the productivity of organizations?Should computer systems be designed to empower workers?Does electronic mail facilitate the formation of new communities, or does it undermine intimate interaction?Is computerization likely to reduce privacy and personal freedom?}, + isbn = {978-0-12-415040-9}, + langid = {english}, + keywords = {_tablet,Computers / Computer Science,Computers / Human-Computer Interaction (HCI)}, + file = {/home/nathante/Zotero/storage/GNBXUDMJ/Suchman_1996_Supporting Articulation Work.pdf} +} + +@article{sugihara_detecting_2012, + title = {Detecting {{Causality}} in {{Complex Ecosystems}}}, + author = {Sugihara, George and May, Robert and Ye, Hao and Hsieh, Chih-hao and Deyle, Ethan and Fogarty, Michael and Munch, Stephan}, + date = {2012-09-20}, + journaltitle = {Science}, + eprint = {22997134}, + eprinttype = {pmid}, + pages = {1227079}, + issn = {0036-8075, 1095-9203}, + abstract = {Identifying causal networks is important for effective policy and management recommendations on climate, epidemiology, financial regulation, and much else. Here, we introduce a method, based on nonlinear state space reconstruction, that can distinguish causality from correlation. It extends to nonseparable weakly connected dynamic systems (cases not covered by the current Granger causality paradigm). The approach is illustrated both by simple models (where, in contrast to the real world, we know the underlying equations/relations and so can check the validity of our method) and by application to real ecological systems, including the controversial sardine-anchovy-temperature problem.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/88Z9BXNQ/Sugihara et al. - 2012 - Detecting Causality in Complex Ecosystems.pdf;/home/nathante/Zotero/storage/IXJEHNSL/tab-pdf.html} +} + +@article{sugihara_nonlinear_1990, + title = {Nonlinear Forecasting as a Way of Distinguishing Chaos from Measurement Error in Time Series}, + author = {Sugihara, George and May, Robert M.}, + date = {1990-04}, + journaltitle = {Nature}, + volume = {344}, + number = {6268}, + pages = {734--741}, + publisher = {{Nature Publishing Group}}, + issn = {1476-4687}, + abstract = {An approach is presented for making short-term predictions about the trajectories of chaotic dynamical systems. The method is applied to data on measles, chickenpox, and marine phytoplankton populations, to show how apparent noise associated with deterministic chaos can be distinguished from sampling error and other sources of externally induced environmental noise.}, + issue = {6268}, + langid = {english}, + file = {/home/nathante/Zotero/storage/QTEKXCSR/Sugihara_May_1990_Nonlinear forecasting as a way of distinguishing chaos from measurement error.pdf;/home/nathante/Zotero/storage/IW7GR3D4/344734a0.html} +} + +@article{sugihara_nonlinear_1994, + title = {Nonlinear Forecasting for the Classification of Natural Time Series}, + author = {Sugihara, George and Grenfell, Bryan Thomas and May, Robert McCredie and Tong, H.}, + date = {1994-09-15}, + journaltitle = {Philosophical Transactions of the Royal Society of London. Series A: Physical and Engineering Sciences}, + shortjournal = {Philosophical Transactions of the Royal Society of London. Series A: Physical and Engineering Sciences}, + volume = {348}, + number = {1688}, + pages = {477--495}, + publisher = {{Royal Society}}, + abstract = {There is a growing trend in the natural sciences to view time series as products of dynamical systems. This viewpoint has proven to be particularly useful in stimulating debate and insight into the nature of the underlying generating mechanisms. Here I review some of the issues concerning the use of forecasting in the detection of nonlinearities and possible chaos, particularly with regard to stochastic chaos. Moreover, it is shown how recent attempts to measure meaningful Lyapunov exponents for ecological data are fundamentally flawed, and that when observational noise is convolved with process noise, computing Lyapunov exponents for the real system will be difficult. Such problems pave the way for more operational definitions of dynamic complexity (cf. Yao \& Tong, this volume) . Aside from its use in the characterization of chaos, nonlinear forecasting can be used more broadly in pragmatic classification problems. Here I review a recent example of nonlinear forecasting as it is applied to classify human heart rhythms. In particular, it is shown how forecast nonlinearity can be a good discriminator of the physiological effects of age, and how prediction-decay may discriminate heartdisease. In so doing, I introduce a method for characterizing nonlinearity using ‘S-maps’ and a method for analysing multiple short time series with composite attractors.}, + file = {/home/nathante/Zotero/storage/TGW3IUGS/Sugihara et al_1994_Nonlinear forecasting for the classification of natural time series.pdf;/home/nathante/Zotero/storage/CGSTKS5R/rsta.1994.html} +} + +@inproceedings{suh_singularity_2009, + title = {The Singularity Is Not near: Slowing Growth of {{Wikipedia}}}, + shorttitle = {The {{Singularity}} Is {{Not Near}}}, + booktitle = {Proceedings of the 5th {{International Symposium}} on {{Wikis}} and {{Open Collaboration}}}, + author = {Suh, Bongwon and Convertino, Gregorio and Chi, Ed H. and Pirolli, Peter}, + date = {2009}, + series = {{{WikiSym}} '09}, + pages = {1--10}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Prior research on Wikipedia has characterized the growth in content and editors as being fundamentally exponential in nature, extrapolating current trends into the future. We show that recent editing activity suggests that Wikipedia growth has slowed, and perhaps plateaued, indicating that it may have come against its limits to growth. We measure growth, population shifts, and patterns of editor and administrator activities, contrasting these against past results where possible. Both the rate of page growth and editor growth has declined. As growth has declined, there are indicators of increased coordination and overhead costs, exclusion of newcomers, and resistance to new edits. We discuss some possible explanations for these new developments in Wikipedia including decreased opportunities for sharing existing knowledge and increased bureaucratic stress on the socio-technical system itself.}, + isbn = {978-1-60558-730-1}, + file = {/home/nathante/Zotero/storage/WTEMKAUC/Suh et al. - 2009 - The singularity is not near slowing growth of Wik.pdf} +} + +@article{swaminathan_resource_2001, + title = {Resource Partitioning and the Evolution of Specialist Organizations: The Role of Location and Identity in the {{U}}.{{S}}. Wine Industry}, + shorttitle = {Resource {{Partitioning}} and the {{Evolution}} of {{Specialist Organizations}}}, + author = {Swaminathan, Anand}, + date = {2001-12-01}, + journaltitle = {Academy of Management Journal}, + shortjournal = {ACAD MANAGE J}, + volume = {44}, + number = {6}, + pages = {1169--1185}, + issn = {0001-4273, 1948-0989}, + abstract = {Analyses of founding and mortality rates of specialist organizations in the U.S. wine industry over the period 1941-90 support Carroll’s (1985) location-based resource-partitioning model—crowding of generalists in the market center creates opportunities for specialists. Further, specialists are adversely affected when they violate their organizational form’s identity characteristics and also when generalists can assume a robust identity allowing them to operate in both specialist and generalist industry segments. The results suggest a prominent role for an organizational form’s identity in resource partitioning.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/HSF2S5JM/1169.html} +} + +@inproceedings{tan_all_2015, + title = {All Who Wander: On the Prevalence and Characteristics of Multi-Community Engagement}, + shorttitle = {All Who Wander}, + booktitle = {Proceedings of the 24th {{International Conference}} on {{World Wide Web}}}, + author = {Tan, Chenhao and Lee, Lillian}, + date = {2015}, + series = {{{WWW}} '15}, + pages = {1056--1066}, + publisher = {{International World Wide Web Conferences Steering Committee}}, + location = {{Republic and Canton of Geneva, Switzerland}}, + abstract = {Although analyzing user behavior within individual communities is an active and rich research domain, people usually interact with multiple communities both on- and off-line. How do users act in such multi-community environments? Although there are a host of intriguing aspects to this question, it has received much less attention in the research community in comparison to the intra-community case. In this paper, we examine three aspects of multi-community engagement: the sequence of communities that users post to, the language that users employ in those communities, and the feedback that users receive, using longitudinal posting behavior on Reddit as our main data source, and DBLP for auxiliary experiments. We also demonstrate the effectiveness of features drawn from these aspects in predicting users' future level of activity. One might expect that a user's trajectory mimics the "settling-down" process in real life: an initial exploration of sub-communities before settling down into a few niches. However, we find that the users in our data continually post in new communities; moreover, as time goes on, they post increasingly evenly among a more diverse set of smaller communities. Interestingly, it seems that users that eventually leave the community are "destined" to do so from the very beginning, in the sense of showing significantly different "wandering" patterns very early on in their trajectories; this finding has potentially important design implications for community maintainers. Our multi-community perspective also allows us to investigate the "situation vs. personality" debate from language usage across different communities.}, + isbn = {978-1-4503-3469-3}, + keywords = {DBLP,language,lifecycle,multiple communities,reddit}, + file = {/home/nathante/Zotero/storage/8GL2XQG3/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf;/home/nathante/Zotero/storage/J3RVCH26/Tan and Lee - 2015 - All Who Wander On the Prevalence and Characterist.pdf} +} + +@inproceedings{tan_tracing_2018, + title = {Tracing Community Genealogy: How New Communities Emerge from the Old}, + shorttitle = {Tracing {{Community Genealogy}}}, + booktitle = {Proceedings of the {{Twelfth International Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} '18)}, + author = {Tan, Chenhao}, + date = {2018}, + pages = {395--404}, + publisher = {{AAAI}}, + location = {{Palo Alto, California}}, + abstract = {The process by which new communities emerge is a central research issue in the social sciences. While a growing body of research analyzes the formation of a single community by examining social networks between individuals, we introduce a novel community-centered perspective. We highlight the fact that the context in which a new community emerges contains numerous existing communities. We reveal the emerging process of communities by tracing their early members’ previous community memberships.}, + file = {/home/nathante/Zotero/storage/QEAEMFYR/Tan - 2018 - Tracing Community Genealogy How New Communities E.pdf} +} + +@article{tausczik_impact_2019, + title = {The Impact of Group Size on the Discovery of Hidden Profiles in Online Discussion Groups}, + author = {Tausczik, Yla and Huang, Xiaoyun}, + date = {2019-11-14}, + journaltitle = {ACM Transactions on Social Computing}, + shortjournal = {Trans. Soc. Comput.}, + volume = {2}, + number = {3}, + pages = {10:1--10:25}, + issn = {2469-7818}, + abstract = {Online discussions help individuals to gather knowledge and make important decisions in diverse areas from health and finance to computing and data science. Online discussion groups exhibit unique group dynamics not found in traditional small groups, such as staggered participation and asynchronous communication, and the effects of these features on knowledge sharing is not well understood. In this article, we focus on one such aspect: wide variation in group size. Using a controlled experiment with a hidden profile task, we evaluate online discussion groups’ capacity to share distributed knowledge when group size ranges from 4 to 32 participants. We found that individuals in medium-sized discussions performed the best, and we suggest that this represents a tradeoff in which larger groups tend to share more facts, but have more difficulty than smaller groups at resolving misunderstandings.}, + keywords = {collective information processing,collective intelligence,Hidden profile,knowledge sharing,online forums}, + file = {/home/nathante/Zotero/storage/FNSPR8FH/Tausczik_Huang_2019_The Impact of Group Size on the Discovery of Hidden Profiles in Online.pdf} +} + +@inproceedings{teblunthuis_density_2017, + title = {Density Dependence without Resource Partitioning: Population Ecology on {{Change}}.Org}, + shorttitle = {Density {{Dependence Without Resource Partitioning}}}, + booktitle = {Companion of the 2017 {{ACM Conference}} on {{Computer Supported Cooperative Work}} and {{Social Computing}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2017}, + series = {{{CSCW}} '17 {{Companion}}}, + pages = {323--326}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {E-petitioning is a prominent form of Internet-based collective action. We apply theories from organizational population ecology to investigate whether similar petitions compete for signatures. We use latent Dirichlet allocation (LDA) topic modeling to identify topical niches. Using these niches, we test two theories from population ecology on 442,109 Change.org petitions. First, we find evidence for density dependence, an inverse-U-shaped relationship between the density of a petition's niche and the number of signatures the petition obtains. This suggests e-petitioning is competitive and that e-petitions draw on overlapping resource pools. Second, although resource partitioning theory predicts that topically specialized petitions will obtain more signatures in concentrated populations, we find no evidence of this. This suggests that specialists struggle to avoid competition with generalists.}, + isbn = {978-1-4503-4688-7}, + file = {/home/nathante/Zotero/storage/54585RCP/TeBlunthuis et al. - 2017 - Density dependence without resource partitioning .pdf} +} + +@thesis{teblunthuis_density_2017-1, + type = {Master of Arts Thesis}, + ids = {teblunthuis_density_2017-2,teblunthuis_density_2018}, + title = {Density Dependence without Resource Partitioning on an Online Petitioning Platform}, + author = {TeBlunthuis, Nathan}, + date = {2017}, + institution = {{University of Washington}}, + location = {{Seattle, Washington}}, + abstract = {Online petitions are a collective action tactic that leverages digital affordances in pursuit of discursive opportunities. Prior efforts to explain why some petitions are more successful than others emphasize signer motivations, petition framing, social media, or resources from movement organizations. We advance a key insight of organizational ecology: population-level variables like density and concentration also constrain success. We use latent Dirichlet allocation (LDA) topic models to measure overlap density and frame specialization. We then model how ecological dynamics affect petition signature counts. We observe density dependence: a curvilinear relationship between overlap density and success. We anticipated resource partitioning: specialists enjoy competitive advantages under concentration, but we find no evidence for it. We discuss boundary conditions for ecological dynamics commonly found in organizational fields induced by the distinctive scope of e-tactic platforms. Platforms may produce concentration without advantages for specialists by lowering entry costs for generalists and specialists alike.}, + langid = {american}, + file = {/home/nathante/Zotero/storage/XFELN2Z6/TeBlunthuis - 2018 - Density dependence without resource partitioning o.pdf} +} + +@article{teblunthuis_effects_2021, + ids = {teblunthuis_effects_2021-1}, + title = {Effects of {{Algorithmic Flagging}} on {{Fairness}}: Quasi-Experimental {{Evidence}} from {{Wikipedia}}}, + shorttitle = {Effects of {{Algorithmic Flagging}} on {{Fairness}}}, + author = {TeBlunthuis, Nathan and Hill, Benjamin Mako and Halfaker, Aaron}, + date = {2021-04-22}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction (CSCW '21)}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {5}, + pages = {56:1--56:27}, + abstract = {Online community moderators often rely on social signals such as whether or not a user has an account or a profile page as clues that users may cause problems. Reliance on these clues can lead to "overprofiling'' bias when moderators focus on these signals but overlook the misbehavior of others. We propose that algorithmic flagging systems deployed to improve the efficiency of moderation work can also make moderation actions more fair to these users by reducing reliance on social signals and making norm violations by everyone else more visible. We analyze moderator behavior in Wikipedia as mediated by RCFilters, a system which displays social signals and algorithmic flags, and estimate the causal effect of being flagged on moderator actions. We show that algorithmically flagged edits are reverted more often, especially those by established editors with positive social signals, and that flagging decreases the likelihood that moderation actions will be undone. Our results suggest that algorithmic flagging systems can lead to increased fairness in some contexts but that the relationship is complex and contingent.}, + issue = {CSCW1}, + keywords = {ai,causal inference,community norms,fairness,machine learning,moderation,online communities,peer production,sociotechnical systems,wikipedia}, + file = {/home/nathante/Zotero/storage/DAQJVL52/TeBlunthuis et al. - 2021 - Effects of Algorithmic Flagging on Fairness Quasi.pdf;/home/nathante/Zotero/storage/WCBHHDU8/TeBlunthuis et al. - 2021 - Effects of Algorithmic Flagging on Fairness Quasi.pdf} +} + +@online{teblunthuis_identifying_2021, + ids = {teblunthuis_community_2021,teblunthuis_community_2021-1,teblunthuis_identifying_2021-1}, + title = {Identifying {{Competition}} and {{Mutualism Between Online Groups}}}, + author = {TeBlunthuis, Nathan and Hill, Benjamin Mako}, + date = {2021-07-14}, + eprint = {2107.06970}, + eprinttype = {arxiv}, + primaryclass = {cs}, + abstract = {Platforms often host multiple online groups with highly overlapping topics and members. How can researchers and designers understand how interactions between related groups affect measures of group health? Inspired by population ecology, prior social computing research has studied competition and mutualism among related groups by correlating group size with degrees of overlap in content and membership. The resulting body of evidence is puzzling as overlaps seem sometimes to help and other times to hurt. We suggest that this confusion results from aggregating inter-group relationships into an overall environmental effect instead of focusing on networks of competition and mutualism among groups. We propose a theoretical framework based on community ecology and a method for inferring competitive and mutualistic interactions from time series participation data. We compare population and community ecology analyses of online community growth by analyzing clusters of subreddits with high user overlap but varying degrees of competition and mutualism.}, + archiveprefix = {arXiv}, + keywords = {Computer Science - Human-Computer Interaction,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3NW96WBR/TeBlunthuis_Hill_2021_Identifying Competition and Mutualism Between Online Groups.pdf;/home/nathante/Zotero/storage/XRLZFVHD/TeBlunthuis_Hill_2021_Identifying Competition and Mutualism Between Online Groups.pdf;/home/nathante/Zotero/storage/ZTDDJ9KW/TeBlunthuis and Hill - 2018 - A Community Ecology Approach for Identifying Compe.pdf;/home/nathante/Zotero/storage/MJH368X5/2107.html;/home/nathante/Zotero/storage/VK77YHAC/2107.html} +} + +@article{teblunthuis_no_2021, + title = {No {{Community Can Do Everything}}: Why {{People Participate}} in {{Similar Online Communities}}}, + author = {TeBlunthuis, Nathan and Kiene, Charles and Brown, Isabella and Levi, Laura (Alia) and McGinnis, Nicole and Hill, Benjamin Mako}, + date = {2021} +} + +@unpublished{teblunthuis_population_2020, + title = {The Population Ecology of Online Collective Action}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2020-06-19}, + eventtitle = {6th {{International Conference}} on {{Computational Social Science}}} +} + +@inproceedings{teblunthuis_revisiting_2018, + title = {Revisiting "{{The}} Rise and Decline" in a Population of Peer Production Projects}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {TeBlunthuis, Nathan and Shaw, Aaron and Hill, Benjamin Mako}, + date = {2018}, + pages = {355:1--355:7}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Do patterns of growth and stabilization found in large peer production systems such as Wikipedia occur in other communities? This study assesses the generalizability of Halfaker et al.'s influential 2013 paper on "The Rise and Decline of an Open Collaboration System." We replicate its tests of several theories related to newcomer retention and norm entrenchment using a dataset of hundreds of active peer production wikis from Wikia. We reproduce the subset of the findings from Halfaker and colleagues that we are able to test, comparing both the estimated signs and magnitudes of our models. Our results support the external validity of Halfaker et al.'s claims that quality control systems may limit the growth of peer production communities by deterring new contributors and that norms tend to become entrenched over time.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/7YEVSVQM/TeBlunthuis et al. - 2018 - Revisiting The Rise and Decline in a Population .pdf} +} + +@inproceedings{thornton_tagging_2012, + title = {Tagging Wikipedia: Ollaboratively Creating a Category System}, + shorttitle = {Tagging {{Wikipedia}}}, + booktitle = {Proceedings of the 17th {{ACM International Conference}} on {{Supporting Group Work}}}, + author = {Thornton, Katherine and McDonald, David W.}, + date = {2012}, + series = {{{GROUP}} '12}, + pages = {219--228}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {Category systems have traditionally been created by small committees of people who had authority over the system they were designing. With the rise of large-scale social media systems, category schemes are being created by groups with differing perspectives, values, and expectations for how categories will be used. Prior studies of social tagging and folksonomy focused on the application and evolution of the collective category scheme, but struggled to uncover some of the collective rationale undergirding the decision-making processes in those schemes. In this paper, we qualitatively analyze the early discussions among editors of Wikipedia about the design and creation of its category system. We highlight three themes that dominated the discussion: hierarchy, scope and navigation, and relate these themes to their more formal roots in the information science literature. We distill out four styles of collaboration with regard to category systems that apply broadly to social tagging and other folksonomies. We conclude the paper with implications for collaborative tools and category systems as applied to large-scale collaborative systems.}, + isbn = {978-1-4503-1486-2}, + keywords = {categorization,information organization,wikipedia}, + file = {/home/nathante/Zotero/storage/CCDWH5LG/Thornton and McDonald - 2012 - Tagging Wikipedia Collaboratively Creating a Cate.pdf;/home/nathante/Zotero/storage/JCMW5EKV/Thornton and McDonald - 2012 - Tagging Wikipedia Collaboratively Creating a Cate.pdf} +} + +@article{triggs_context_2019, + ids = {triggs_context_2021}, + title = {Context Collapse and Anonymity among Queer {{Reddit}} Users}, + author = {Triggs, Anthony Henry and Møller, Kristian and Neumayer, Christina}, + date = {2019-11-27}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {23}, + number = {1}, + pages = {5--21}, + publisher = {{SAGE Publications}}, + issn = {1461-4448, 1461-7315}, + abstract = {This article maps out how people in queer communities on Reddit navigate context collapse. Drawing upon data from interviews with queer Reddit users and insights from other studies of context collapse in digital media, we argue that context collapse also occurs in anonymity-based social media. The interviews reveal queer Reddit users’ practices of context differentiation, occurring at four levels: somatic, system, inter-platform and intra-platform. We use these levels to map out how lesbian, gay, bisexual, transgender and queer or questioning (LGBTQ) people express their identities and find community on Reddit while seeking to minimize the risks imposed by multiple impending context collapses. Because living an authentic queer life can make subjects vulnerable, we find that despite Reddit’s anonymity, sophisticated practices of context differentiation are developed and maintained. We argue that context collapse in an era of big data and social media platforms operates beyond the control of any one user, which causes problems, particularly for queer people.}, + langid = {english}, + keywords = {Anonymity,bisexual,context collapse,gay,lesbian,Reddit,risk,transgender and queer or questioning}, + file = {/home/nathante/Zotero/storage/LSEXQYFM/Triggs et al. - 2021 - Context collapse and anonymity among queer Reddit .pdf} +} + +@article{tripodi_ms_2021, + title = {Ms. {{Categorized}}: Gender, Notability, and Inequality on {{Wikipedia}}}, + shorttitle = {Ms. {{Categorized}}}, + author = {Tripodi, Francesca}, + date = {2021-06-27}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {14614448211023772}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {Gender is one of the most pervasive and insidious forms of inequality. For example, English-language Wikipedia contains more than 1.5 million biographies about notable writers, inventors, and academics, but less than 19\% of these biographies are about women. To try and improve these statistics, activists host “edit-a-thons” to increase the visibility of notable women. While this strategy helps create several biographies previously inexistent, it fails to address a more inconspicuous form of gender exclusion. Drawing on ethnographic observations, interviews, and quantitative analysis of web-scraped metadata, this article demonstrates that biographies about women who meet Wikipedia’s criteria for inclusion are more frequently considered non-notable and nominated for deletion compared to men’s biographies. This disproportionate rate is another dimension of gender inequality previously unexplored by social scientists and provides broader insights into how women’s achievements are (under)valued.}, + langid = {english}, + keywords = {Articles for Deletion,gender gap,gender inequality,metadata,Wikipedia}, + file = {/home/nathante/Zotero/storage/IBR95ZNY/Tripodi_2021_Ms.pdf} +} + +@inproceedings{tsugawa_impact_2019, + ids = {tsugawa_impact_2019-2}, + title = {The Impact of Social Network Structure on the Growth and Survival of Online Communities}, + booktitle = {Proceedings of the 2019 {{IEEE}}/{{ACM International Conference}} on {{Advances}} in {{Social Networks Analysis}} and {{Mining}}}, + author = {Tsugawa, Sho and Niida, Sumaru}, + date = {2019-08-27}, + series = {{{ASONAM}} '19}, + pages = {1112--1119}, + publisher = {{Association for Computing Machinery}}, + location = {{Vancouver, British Columbia, Canada}}, + abstract = {While online communities are important platforms for various social activities, many online communities fail to survive, which motivates researchers to investigate factors affecting the growth and survival of online communities. We comprehensively examine the effects of a wide variety of social network features on the growth and survival of communities in Reddit. We show that several social network features, including clique ratio, density, clustering coefficient, reciprocity and centralization, have significant effects on the survival of communities. In contrast, we also show that social network features examined in this paper only have weak effects on the growth of communities. Moreover, we conducted experiments predicting future growth and survival of online communities from social network features. The results show that social network features are useful for predicting the survival of communities but not for predicting their growth.}, + isbn = {978-1-4503-6868-1}, + file = {/home/nathante/Zotero/storage/8JF3SZ74/Tsugawa and Niida - 2019 - The impact of social network structure on the grow.pdf;/home/nathante/Zotero/storage/J9RMRP49/Tsugawa_Niida_2019_The impact of social network structure on the growth and survival of online.pdf;/home/nathante/Zotero/storage/L4EQ4VRI/Tsugawa_Niida_2019_The impact of social network structure on the growth and survival of online.pdf} +} + +@article{tufekci_not_2013-1, + title = {"{{Not}} This One": Social Movements, the Attention Economy, and Microcelebrity Networked Activism}, + shorttitle = {" {{Not}} This One": Social Movements, the Attention Economy, and Microcelebrity Networked Activism}, + author = {Tufekci, Zeynep}, + date = {2013}, + journaltitle = {American Behavioral Scientist}, + pages = {0002764213479369}, + issn = {0002-7642}, + file = {/home/nathante/Zotero/storage/URM9ESR8/Tufekci_2013_ Not This One.pdf;/home/nathante/Zotero/storage/ZBQFHXMF/Tufekci_2013_ Not This One.pdf} +} + +@article{turner_where_2005, + title = {Where the {{Counterculture Met}} the {{New Economy}}: The {{WELL}} and the {{Origins}} of {{Virtual Community}}}, + shorttitle = {Where the {{Counterculture Met}} the {{New Economy}}}, + author = {Turner, Fred}, + date = {2005}, + journaltitle = {Technology and Culture}, + volume = {46}, + number = {3}, + pages = {485--512}, + issn = {1097-3729}, + abstract = {In lieu of an abstract, here is a brief excerpt of the content: Technology and Culture 46.3 (2005) 485-512 The WELL and the Origins of Virtual Community Fred Turner In 1993, freelance journalist Howard Rheingold published The Virtual Community: Homesteading on the Electronic Frontier and with it defined a new form of technologically enabled social life: virtual community. For the last eight years, he explained, he had been dialing in to a San Francisco Bay–area bulletin-board system (BBS) known as the Whole Earth 'Lectronic Link, or the WELL. In the WELL's text-only environment, he conversed with friends and colleagues, met new people, and over time built up relationships of startling intimacy. For Rheingold, these relationships formed an emotional bulwark against the loneliness of a highly technologized material world. As he explained, computer networks like the WELL allowed us "to recapture the sense of cooperative spirit that so many people seemed to lose when we gained all this technology." In the disembodied precincts of cyberspace, we could connect with one another practically and emotionally and "rediscover the power of cooperation, turning cooperation into a game, a way of life—a merger of knowledge capital, social capital, and communion." In the years since Rheingold's book appeared, the Internet and the Worldwide Web have swung into public view, and both the WELL and Rheingold's notion of virtual community have become touchstones for studies of the social implications of computer networking. Yet, despite the WELL's prominence, few have rigorously explored its roots in the American counterculture of the 1960s. As its name suggests, the Whole Earth 'Lectronic Link took shape within a network of individuals and publications that first came together long before the advent of ubiquitous computer networking, with the publication of the Whole Earth Catalog. In the spring of 1968, Stewart Brand, a former Merry Prankster and coproducer of the Trips Festival that helped spark the Haight-Ashbury psychedelic scene, noticed that many of his friends had begun to leave the city for the wilds of New Mexico and Northern California. As sociologists and journalists would soon explain, these migrants marked the leading edge of what would become the largest wave of communalization in American history. Brand had just inherited a hundred thousand dollars in stock and, as he recalled several years later, imagining his friends "starting their own civilization hither and yon in the sticks" got him thinking about the L.L.Bean catalog. This in turn led him to fantasize something he called the "Access Mobile" that would offer "all manner of access materials and advice for sale cheap," including books, camping gear, blueprints for houses and machines, and subscriptions to magazines. The publication that grew out of that fantasy would quickly become one of the defining documents of the American counterculture. Sized somewhere between a tabloid newspaper and a glossy magazine, the sixty-one-page first Whole Earth Catalog presented reviews of hand tools, books, and magazines arrayed in seven thematic categories: understanding whole systems, shelter and land use, industry and craft, communications, community, nomadics, and learning. Over the next four years, in a series of biannual issues, the Catalog ballooned to more than four hundred pages, sold more than a million-and-a-half copies, won a National Book Award, and spawned dozens of imitators. It also established a relationship between information technology, economic activity, and alternative forms of community that would outlast the counterculture itself and become a key feature of the digital world. Like other members of the counterculture, those who headed back to the land suffered a deep ambivalence toward technology. On the one hand, like their counterparts on the New Left they saw the large-scale weapons technologies of the cold war and the organizations that produced them as emblems of a malevolent and ubiquitous technological bureaucracy. On the other, as they played their stereos and dropped LSD many came to believe that small-scale technologies could help bring about an alternative to that world. Dancing at the Trips Festival or simply sitting around getting high with friends, many experienced a sense of spiritual interconnection. By the late 1960s, social theorists such as Charles Reich and Theodore Roszak had begun to argue that this interconnection could become the...} +} + +@article{ushio_fluctuating_2018, + title = {Fluctuating Interaction Network and Time-Varying Stability of a Natural Fish Community}, + author = {Ushio, Masayuki and Hsieh, Chih-hao and Masuda, Reiji and Deyle, Ethan R. and Ye, Hao and Chang, Chun-Wei and Sugihara, George and Kondoh, Michio}, + date = {2018-02}, + journaltitle = {Nature}, + volume = {554}, + number = {7692}, + pages = {360--363}, + publisher = {{Nature Publishing Group}}, + issn = {1476-4687}, + abstract = {A method for modelling time-varying dynamic stability in a natural marine fish community finds that seasonal patterns in community stability are driven by species diversity and interspecific interactions.}, + issue = {7692}, + langid = {english}, + file = {/home/nathante/Zotero/storage/G8RNW5UU/Ushio et al_2018_Fluctuating interaction network and time-varying stability of a natural fish.pdf;/home/nathante/Zotero/storage/FWFXSVKR/nature25504.html} +} + +@article{vandermeer_community_1970, + title = {The {{Community Matrix}} and the {{Number}} of {{Species}} in a {{Community}}}, + author = {Vandermeer, John H.}, + date = {1970-01-01}, + journaltitle = {The American Naturalist}, + shortjournal = {The American Naturalist}, + volume = {104}, + number = {935}, + pages = {73--83}, + publisher = {{The University of Chicago Press}}, + issn = {0003-0147}, + abstract = {In this paper I am concerned with the number of species that will be held in stable equilibrium in a community of competing organisms, using the general form of the Lotka-Volterra competition equations for m species. Defining Ki as the saturation density for the ith species and αij as the competition coefficient between species i and j, and Ni as the equilibrium density of species i, the number of species will be determined by N̄, K̄, , var (K), the covariances among the α's, and the covariance between α and N. In particular, the number of species increases as K̄ increases but as N̄, , cov (α), cov (α,N) and variance of K decrease.}, + file = {/home/nathante/Zotero/storage/3CWCLI6Z/282641.html} +} + +@inproceedings{vasilescu_how_2014, + ids = {vasilescu_how_2014-1}, + title = {How Social {{Q}}\&{{A}} Sites Are Changing Knowledge Sharing in Open Source Software Communities}, + booktitle = {Proceedings of the 17th {{ACM}} Conference on {{Computer}} Supported Cooperative Work \& Social Computing - {{CSCW}} '14}, + author = {Vasilescu, Bogdan and Serebrenik, Alexander and Devanbu, Prem and Filkov, Vladimir}, + date = {2014}, + pages = {342--354}, + publisher = {{ACM Press}}, + location = {{Baltimore, Maryland, USA}}, + abstract = {Historically, mailing lists have been the preferred means for coordinating development and user support activities. With the emergence and popularity growth of social Q\&A sites such as the StackExchange network (e.g., StackOverflow), this is beginning to change. Such sites offer different sociotechnical incentives to their participants than mailing lists do, e.g., rich web environments to store and manage content collaboratively, or a place to showcase their knowledge and expertise more vividly to peers or potential recruiters. A key difference between StackExchange and mailing lists is gamification, i.e., StackExchange participants compete to obtain reputation points and badges. In this paper, we use a case study of R (a widely-used tool for data analysis) to investigate how mailing list participation has evolved since the launch of StackExchange. Our main contribution is the assembly of a joint data set from the two sources, in which participants in both the r-help mailing list and StackExchange are identifiable. This permits their activities to be linked across the two resources and also over time. With this data set we found that user support activities show a strong shift away from r-help. In particular, mailing list experts are migrating to StackExchange, where their behaviour is different. First, participants active both on r-help and on StackExchange are more active than those who focus exclusively on only one of the two. Second, they provide faster answers on StackExchange than on r-help, suggesting they are motivated by the gamified environment. To our knowledge, our study is the first to directly chart the changes in behaviour of specific contributors as they migrate into gamified environments, and has important implications for knowledge management in software engineering.}, + eventtitle = {The 17th {{ACM}} Conference}, + isbn = {978-1-4503-2540-0}, + langid = {english}, + keywords = {crowdsourced knowledge,gamification.,mailing lists,open source,social q&a}, + file = {/home/nathante/Zotero/storage/6DLS9FTI/Vasilescu et al. - 2014 - How social Q&\;A sites are changing knowledge sh.pdf;/home/nathante/Zotero/storage/MNHPJRT3/Vasilescu et al. - 2014 - How social Q&A sites are changing knowledge sharin.pdf} +} + +@article{ven_explaining_1995, + title = {Explaining {{Development}} and {{Change}} in {{Organizations}}}, + author = {Ven, Andrew H. Van De and Poole, Marshall Scott}, + date = {1995-07-01}, + journaltitle = {Academy of Management Review}, + shortjournal = {ACAD MANAGE REV}, + volume = {20}, + number = {3}, + pages = {510--540}, + issn = {0363-7425, 1930-3807}, + abstract = {This article introduces four basic theories that may serve as building blocks for explaining processes of change in organizations: life cycle, teleology, dialectics, and evolution. These four theories represent different sequences of change events that are driven by different conceptual motors and operate at different organizational levels. This article identifies the circumstances when each theory applies and proposes how interplay among the theories produces a wide variety of more complex theories of change and development in organizational life.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/APD9T5KZ/258786.pdf;/home/nathante/Zotero/storage/FBX2F2XQ/510.html} +} + +@book{verhoef_community_2010, + title = {Community Ecology: Processes, Models, and Applications}, + shorttitle = {Community Ecology}, + author = {Verhoef, Herman A and Morin, Peter J}, + date = {2010}, + publisher = {{Oxford University Press}}, + location = {{Oxford}}, + isbn = {978-0-19-922897-3 978-0-19-922898-0}, + langid = {english}, + annotation = {OCLC: 876676566} +} + +@inproceedings{vincent_examining_2018, + title = {Examining {{Wikipedia}} with a Broader Lens: Quantifying the Value of {{Wikipedia}}'s Relationships with Other Large-Scale Online Communities}, + shorttitle = {Examining {{Wikipedia With}} a {{Broader Lens}}}, + booktitle = {Proceedings of the 2018 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Vincent, Nicholas and Johnson, Isaac and Hecht, Brent}, + date = {2018}, + series = {{{CHI}} '18}, + pages = {566:1--566:13}, + publisher = {{ACM}}, + location = {{New York, NY}}, + abstract = {The extensive Wikipedia literature has largely considered Wikipedia in isolation, outside of the context of its broader Internet ecosystem. Very recent research has demonstrated the significance of this limitation, identifying critical relationships between Google and Wikipedia that are highly relevant to many areas of Wikipedia-based research and practice. This paper extends this recent research beyond search engines to examine Wikipedia's relationships with large-scale online communities, Stack Overflow and Reddit in particular. We find evidence of consequential, albeit unidirectional relationships. Wikipedia provides substantial value to both communities, with Wikipedia content increasing visitation, engagement, and revenue, but we find little evidence that these websites contribute to Wikipedia in return. Overall, these findings highlight important connections between Wikipedia and its broader ecosystem that should be considered by researchers studying Wikipedia. Critically, our results also emphasize the key role that volunteer-created Wikipedia content plays in improving other websites, even contributing to revenue generation.}, + isbn = {978-1-4503-5620-6}, + file = {/home/nathante/Zotero/storage/8YF9QUFS/Vincent et al. - 2018 - Examining Wikipedia With a Broader Lens Quantifyi.pdf;/home/nathante/Zotero/storage/FHXYQSZK/Vincent et al. - 2018 - Examining Wikipedia With a Broader Lens Quantifyi.pdf} +} + +@book{von_hippel_democratizing_2006, + title = {Democratizing Innovation}, + author = {von Hippel, Eric}, + options = {useprefix=true}, + date = {2006}, + publisher = {{The MIT Press}}, + abstract = {Innovation is rapidly becoming democratized. Users, aided by improvements in computer and communications technology, increasingly can develop their own new products and services. These innovating users—both individuals and firms—often freely share their innovations with others, creating user-innovation communities and a rich intellectual commons. In Democratizing Innovation, Eric von Hippel looks closely at this emerging system of user-centered innovation. He explains why and when users find it profitable to develop new products and services for themselves, and why it often pays users to reveal their innovations freely for the use of all.The trend toward democratized innovation can be seen in software and information products—most notably in the free and open-source software movement—but also in physical products. Von Hippel's many examples of user innovation in action range from surgical equipment to surfboards to software security features. He shows that product and service development is concentrated among "lead users," who are ahead on marketplace trends and whose innovations are often commercially attractive. Von Hippel argues that manufacturers should redesign their innovation processes and that they should systematically seek out innovations developed by users. He points to businesses—the custom semiconductor industry is one example—that have learned to assist user-innovators by providing them with toolkits for developing new products. User innovation has a positive impact on social welfare, and von Hippel proposes that government policies, including R\&D subsidies and tax credits, should be realigned to eliminate biases against it. The goal of a democratized user-centered innovation system, says von Hippel, is well worth striving for.}, + isbn = {978-0-262-72047-2 978-0-262-00274-5}, + langid = {english}, + keywords = {innovation,org theory}, + file = {/home/nathante/Zotero/storage/ZK5N3JLA/search.html} +} + +@book{von_hippel_free_2016, + title = {Free Innovation}, + author = {von Hippel, Eric}, + options = {useprefix=true}, + date = {2016-11-18}, + edition = {1 edition}, + publisher = {{The MIT Press}}, + location = {{Cambridge, MA}}, + abstract = {A leading innovation scholar explains the growing phenomenon and impact of free innovation, in which innovations developed by consumers and given away “for free.”In this book, Eric von Hippel, author of the influential Democratizing Innovation, integrates new theory and research findings into the framework of a “free innovation paradigm.” Free innovation, as he defines it, involves innovations developed by consumers who are self-rewarded for their efforts, and who give their designs away “for free.” It is an inherently simple grassroots innovation process, unencumbered by compensated transactions and intellectual property rights.Free innovation is already widespread in national economies and is steadily increasing in both scale and scope. Today, tens of millions of consumers are collectively spending tens of billions of dollars annually on innovation development. However, because free innovations are developed during consumers' unpaid, discretionary time and are given away rather than sold, their collective impact and value have until very recently been hidden from view. This has caused researchers, governments, and firms to focus too much on the Schumpeterian idea of innovation as a producer-dominated activity. Free innovation has both advantages and drawbacks. Because free innovators are self-rewarded by such factors as personal utility, learning, and fun, they often pioneer new areas before producers see commercial potential. At the same time, because they give away their innovations, free innovators generally have very little incentive to invest in diffusing what they create, which reduces the social value of their efforts. The best solution, von Hippel and his colleagues argue, is a division of labor between free innovators and producers, enabling each to do what they do best. The result will be both increased producer profits and increased social welfare―a gain for all.}, + isbn = {978-0-262-03521-7}, + langid = {english}, + pagetotal = {240} +} + +@article{von_hippel_sticky_1994, + title = {"{{Sticky}} Information" and the Locus of Problem Solving: Implications for Innovation}, + shorttitle = {"{{Sticky Information}}" and the {{Locus}} of {{Problem Solving}}}, + author = {von Hippel, Eric}, + options = {useprefix=true}, + date = {1994}, + journaltitle = {Management Science}, + volume = {40}, + number = {4}, + pages = {429--439}, + issn = {0025-1909}, + abstract = {To solve a problem, needed information and problem-solving capabilities must be brought together. Often the information used in technical problem solving is costly to acquire, transfer, and use in a new location---is, in our terms, "sticky." In this paper we explore the impact of information stickiness on the locus of innovation-related problem solving. We find, first, that when sticky information needed by problem solvers is held at one site only, problem solving will be carried out at that locus, other things being equal. Second, when more than one locus of sticky information is called upon by problem solvers, the locus of problem solving may iterate among these sites as problem solving proceeds. When the costs of such iteration are high, then, third, problems that draw upon multiple sites of sticky information will sometimes be "task partitioned" into subproblems that each draw on only one such locus, and/or, fourth, investments will be made to reduce the stickiness of information at some locations. Information stickiness appears to affect a number of issues of importance to researchers and practitioners. Among these are patterns in the diffusion of information, the specialization of firms, the locus of innovation, and the nature of problems selected by problem solvers.}, + file = {/home/nathante/Zotero/storage/VJT3KFVS/von Hippel - 1994 - Sticky information and the locus of problem solv.pdf;/home/nathante/Zotero/storage/N5WSWBCN/v_3a40_3ay_3a1994_3ai_3a4_3ap_3a429-439.html} +} + +@inproceedings{waller_generalists_2019, + title = {Generalists and {{Specialists}}: Using {{Community Embeddings}} to {{Quantify Activity Diversity}} in {{Online Platforms}}}, + shorttitle = {Generalists and {{Specialists}}}, + booktitle = {The {{World Wide Web Conference}} on - {{WWW}} '19}, + author = {Waller, Isaac and Anderson, Ashton}, + date = {2019}, + pages = {1954--1964}, + publisher = {{ACM Press}}, + location = {{San Francisco, CA, USA}}, + abstract = {In many online platforms, people must choose how broadly to allocate their energy. Should one concentrate on a narrow area of focus, and become a specialist, or apply oneself more broadly, and become a generalist? In this work, we propose a principled measure of how generalist or specialist a user is, and study behavior in online platforms through this lens. To do this, we construct highly accurate community embeddings that represent communities in a high-dimensional space. We develop sets of community analogies and use them to optimize our embeddings so that they encode community relationships extremely well. Based on these embeddings, we introduce a natural measure of activity diversity, the GS-score. Applying our embedding-based measure to online platforms, we observe a broad spectrum of user activity styles, from extreme specialists to extreme generalists, in both community membership on Reddit and programming contributions on GitHub. We find that activity diversity is related to many important phenomena of user behavior. For example, specialists are much more likely to stay in communities they contribute to, but generalists are much more likely to remain on platforms as a whole. We also find that generalists engage with significantly more diverse sets of users than specialists do. Furthermore, our methodology leads to a simple algorithm for community recommendation, matching state-of-theart methods like collaborative filtering. Our methods and results introduce an important new dimension of online user behavior and shed light on many aspects of online platform use.}, + eventtitle = {The {{World Wide Web Conference}}}, + isbn = {978-1-4503-6674-8}, + langid = {english}, + keywords = {activity diversity,community embeddings,community recommendation,generalist and specialists}, + file = {/home/nathante/Zotero/storage/5F77953J/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf;/home/nathante/Zotero/storage/PK32L55Y/Waller and Anderson - 2019 - Generalists and Specialists Using Community Embed.pdf} +} + +@inproceedings{wang_coming_2015, + title = {Coming of {{Age}} ({{Digitally}}): An {{Ecological View}} of {{Social Media Use}} among {{College Students}}}, + shorttitle = {Coming of {{Age}} ({{Digitally}})}, + booktitle = {Proceedings of the 18th {{ACM Conference}} on {{Computer Supported Cooperative Work}} \& {{Social Computing}}}, + author = {Wang, Yiran and Niiya, Melissa and Mark, Gloria and Reich, Stephanie M. and Warschauer, Mark}, + date = {2015-02-28}, + series = {{{CSCW}} '15}, + pages = {571--582}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {We take an ecological approach to studying social media use and its relation to mood among college students. We conducted a mixed-methods study of computer and phone logging with daily surveys and interviews to track college students' use of social media during all waking hours over seven days. Continual and infrequent checkers show different preferences of social media sites. Age differences also were found. Lower classmen tend to be heavier users and to primarily use Facebook, while upper classmen use social media less frequently and utilize sites other than Facebook more often. Factor analysis reveals that social media use clusters into patterns of content-sharing, text-based entertainment/discussion, relationships, and video consumption. The more constantly one checks social media daily, the less positive is one's mood. Our results suggest that students construct their own patterns of social media usage to meet their changing needs in their environment. The findings can inform further investigation into social media use as a benefit and/or distraction for students.}, + isbn = {978-1-4503-2922-4}, + keywords = {college students,computer logging,facebook,in situ study,social media}, + file = {/home/nathante/Zotero/storage/B6BFNKKK/Wang et al_2015_Coming of Age (Digitally).pdf} +} + +@article{wang_data_2016, + title = {Data Based Identification and Prediction of Nonlinear and Complex Dynamical Systems}, + author = {Wang, Wen-Xu and Lai, Ying-Cheng and Grebogi, Celso}, + date = {2016-07-12}, + journaltitle = {Physics Reports}, + shortjournal = {Physics Reports}, + series = {Data Based Identification and Prediction of Nonlinear and Complex Dynamical Systems}, + volume = {644}, + pages = {1--76}, + issn = {0370-1573}, + abstract = {The problem of reconstructing nonlinear and complex dynamical systems from measured data or time series is central to many scientific disciplines including physical, biological, computer, and social sciences, as well as engineering and economics. The classic approach to phase-space reconstruction through the methodology of delay-coordinate embedding has been practiced for more than three decades, but the paradigm is effective mostly for low-dimensional dynamical systems. Often, the methodology yields only a topological correspondence of the original system. There are situations in various fields of science and engineering where the systems of interest are complex and high dimensional with many interacting components. A complex system typically exhibits a rich variety of collective dynamics, and it is of great interest to be able to detect, classify, understand, predict, and control the dynamics using data that are becoming increasingly accessible due to the advances of modern information technology. To accomplish these goals, especially prediction and control, an accurate reconstruction of the original system is required. Nonlinear and complex systems identification aims at inferring, from data, the mathematical equations that govern the dynamical evolution and the complex interaction patterns, or topology, among the various components of the system. With successful reconstruction of the system equations and the connecting topology, it may be possible to address challenging and significant problems such as identification of causal relations among the interacting components and detection of hidden nodes. The “inverse” problem thus presents a grand challenge, requiring new paradigms beyond the traditional delay-coordinate embedding methodology. The past fifteen years have witnessed rapid development of contemporary complex graph theory with broad applications in interdisciplinary science and engineering. The combination of graph, information, and nonlinear dynamical systems theories with tools from statistical physics, optimization, engineering control, applied mathematics, and scientific computing enables the development of a number of paradigms to address the problem of nonlinear and complex systems reconstruction. In this Review, we describe the recent advances in this forefront and rapidly evolving field, with a focus on compressive sensing based methods. In particular, compressive sensing is a paradigm developed in recent years in applied mathematics, electrical engineering, and nonlinear physics to reconstruct sparse signals using only limited data. It has broad applications ranging from image compression/reconstruction to the analysis of large-scale sensor networks, and it has become a powerful technique to obtain high-fidelity signals for applications where sufficient observations are not available. We will describe in detail how compressive sensing can be exploited to address a diverse array of problems in data based reconstruction of nonlinear and complex networked systems. The problems include identification of chaotic systems and prediction of catastrophic bifurcations, forecasting future attractors of time-varying nonlinear systems, reconstruction of complex networks with oscillatory and evolutionary game dynamics, detection of hidden nodes, identification of chaotic elements in neuronal networks, reconstruction of complex geospatial networks and nodal positioning, and reconstruction of complex spreading networks with binary data.. A number of alternative methods, such as those based on system response to external driving, synchronization, and noise-induced dynamical correlation, will also be discussed. Due to the high relevance of network reconstruction to biological sciences, a special section is devoted to a brief survey of the current methods to infer biological networks. Finally, a number of open problems including control and controllability of complex nonlinear dynamical networks are discussed. The methods outlined in this Review are principled on various concepts in complexity science and engineering such as phase transitions, bifurcations, stabilities, and robustness. The methodologies have the potential to significantly improve our ability to understand a variety of complex dynamical systems ranging from gene regulatory systems to social networks toward the ultimate goal of controlling such systems.}, + file = {/home/nathante/Zotero/storage/UUYAPUUB/Wang et al. - 2016 - Data based identification and prediction of nonlin.pdf;/home/nathante/Zotero/storage/PWJCA6NU/S037015731630134X.html} +} + +@article{wang_impact_2012, + ids = {wang_impact_2013}, + title = {The Impact of Membership Overlap on Growth: An Ecological Competition View of Online Groups}, + shorttitle = {The Impact of Membership Overlap on Growth}, + author = {Wang, Xiaoqing and Butler, Brian S. and Ren, Yuqing}, + date = {2012-06-15}, + journaltitle = {Organization Science}, + shortjournal = {Organization Science}, + volume = {24}, + number = {2}, + pages = {414--431}, + publisher = {{INFORMS}}, + issn = {1047-7039}, + abstract = {The dominant narrative of the Internet has been one of unconstrained growth, abundance, and plenitude. It is in this context that new forms of organizing, such as online groups, have emerged. However, the same factors that underlie the utopian narrative of Internet life also give rise to numerous online groups, many of which fail to attract participants or to provide significant value. This suggests that despite the potential transformative nature of modern information technology, issues of scarcity, competition, and context may remain critical to the performance and functioning of online groups. In this paper, we draw from organizational ecology theories to develop an ecological view of online groups to explain how overlapping membership among online groups causes intergroup competition for member attention and affects a group's ability to grow. Hypotheses regarding the effects of group size, age, and membership overlap on growth are proposed and tested with data from a 64-month, longitudinal sample of 240 online discussion groups. The analysis shows that sharing members with other groups reduced future growth rates, suggesting that membership overlap puts competitive pressure on online groups. Our results also suggest that, compared with smaller and younger groups, larger and older groups experience greater difficulty in growing their membership. In addition, larger groups were more vulnerable to competitive pressure than smaller groups: larger groups experienced greater difficulty in growing their membership than smaller groups as competition intensified. Overall, our findings show how an abundance of opportunities afforded by technologies can create scarcity in user time and effort, which increases competitive pressure on online groups. Our ecological view extends organizational ecology theory to new organizational forms online and highlights the importance of studying the competitive environment of online groups.}, + file = {/home/nathante/Zotero/storage/3WI37Y9S/Wang et al. - 2013 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/D7GAZURV/Wang et al. - 2012 - The Impact of Membership Overlap on Growth An Eco.pdf;/home/nathante/Zotero/storage/EQSW25XD/Wang et al. - 2012 - The impact of membership overlap on growth An eco.pdf;/home/nathante/Zotero/storage/8QDPVTSM/orsc.1120.html;/home/nathante/Zotero/storage/IK6SB3L8/orsc.1120.html} +} + +@inproceedings{wang_searching_2012, + ids = {wang_searching_2012-1}, + title = {Searching for the Goldilocks Zone: Trade-Offs in Managing Online Volunteer Groups}, + shorttitle = {Searching for the Goldilocks Zone}, + booktitle = {Proceedings of the {{ACM}} 2012 Conference on {{Computer Supported Cooperative Work}}}, + author = {Wang, Loxley Sijia and Chen, Jilin and Ren, Yuqing and Riedl, John}, + date = {2012}, + series = {{{CSCW}} '12}, + pages = {989--998}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Dedicated and productive members who actively contribute to community efforts are crucial to the success of online volunteer groups such as Wikipedia. What predicts member productivity? Do productive members stay longer? How does involvement in multiple projects affect member contribution to the community? In this paper, we analyze data from 648 WikiProjects to address these questions. Our results reveal two critical trade-offs in managing online volunteer groups. First, factors that increase member productivity, measured by the number of edits on Wikipedia articles, also increase likelihood of withdrawal from contributing, perhaps due to feelings of mission accomplished or burnout. Second, individual membership in multiple projects has mixed effects. It decreases the amount of work editors contribute to both the individual projects and Wikipedia as a whole. It increases withdrawal for each individual project yet reduces withdrawal from Wikipedia. We discuss how our findings expand existing theories to fit the online context and inform the design of new tools to improve online volunteer work.}, + isbn = {978-1-4503-1086-4}, + keywords = {online volunteer group,productivity,trade-off,wikipedia,withdrawal}, + file = {/home/nathante/Zotero/storage/7CKH7QT7/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf;/home/nathante/Zotero/storage/R8ALMDFI/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf;/home/nathante/Zotero/storage/Z28IT3FH/Wang et al. - 2012 - Searching for the goldilocks zone trade-offs in m.pdf} +} + +@inproceedings{warncke-wang_misalignment_2015, + title = {Misalignment between Supply and Demand of Quality Content in Peer Production Communities}, + booktitle = {Proceedings of the {{Ninth International AAAI Conference}} on {{Web}} and {{Social Media}} ({{ICWSM}} '15)}, + author = {Warncke-Wang, Morten and Ranjan, Vivek and Terveen, Loren and Hecht, Brent}, + date = {2015}, + pages = {493--502}, + abstract = {In peer production communities, individual community members typically decide for themselves where to make contributions, often driven by factors such as “fun” or a belief that “information should be free”. However, the extent to which this bottom-up, interest-driven content production paradigm meets the needs of consumers of this content is unclear. In this paper, we introduce an analytical framework for studying the relationship between content production and consumption in peer production communities. Applying our framework to four large Wikipedia language editions, we find extensive misalignment between production and consumption in all of them. We also show that this misalignment has an enormous effect on Wikipedias readers. For example, over 1.5 billion monthly pageviews in the English Wikipedia go to articles that would be of much higher quality if editors optimally distributed their work to meet reader demand. Examining misalignment in more detail, we observe that there is an excess of high-quality content about certain specific topics, and that the majority of articles with insufficient quality are in a stable state (i.e. not breaking news). Finally, we discuss technolo- gies and community practises that can help reduce the misalignment between the supply of and demand for high-quality content in peer production communities.}, + langid = {english} +} + +@article{wasko_why_2005, + title = {Why {{Should I Share}}? Examining {{Social Capital}} and {{Knowledge Contribution}} in {{Electronic Networks}} of {{Practice}}}, + shorttitle = {Why {{Should I Share}}?}, + author = {Wasko, Molly McLure and Faraj, Samer}, + date = {2005}, + journaltitle = {MIS Quarterly}, + volume = {29}, + number = {1}, + eprint = {25148667}, + eprinttype = {jstor}, + pages = {35--57}, + publisher = {{Management Information Systems Research Center, University of Minnesota}}, + issn = {0276-7783}, + abstract = {Electronic networks of practice are computer-mediated discussion forums focused on problems of practice that enable individuals to exchange advice and ideas with others based on common interests. However, why individuals help strangers in these electronic networks is not well understood: there is no immediate benefit to the contributor, and free-riders are able to acquire the same knowledge as everyone else. To understand this paradox, we apply theories of collective action to examine how individual motivations and social capital influence knowledge contribution in electronic networks. This study reports on the activities of one electronic network supporting a professional legal association. Using archival, network, survey, and content analysis data, we empirically test a model of knowledge contribution. We find that people contribute their knowledge when they perceive that it enhances their professional reputations, when they have the experience to share, and when they are structurally embedded in the network. Surprisingly, contributions occur without regard to expectations of reciprocity from others or high levels of commitment to the network.}, + file = {/home/nathante/Zotero/storage/JHMZDCUP/Wasko_Faraj_2005_Why Should I Share.pdf} +} + +@article{weber_emergence_2016, + ids = {weber_emergence_2016-1}, + title = {The {{Emergence}} and {{Evolution}} of {{Social Networking Sites}} as an {{Organizational Form}}}, + author = {Weber, Matthew S. and Fulk, Janet and Monge, Peter}, + date = {2016-02-11}, + journaltitle = {Management Communication Quarterly}, + shortjournal = {Management Communication Quarterly}, + pages = {0893318916629547}, + publisher = {{SAGE Publications Inc}}, + issn = {0893-3189, 1552-6798}, + abstract = {A number of new organizational structures have emerged in recent years, including peer production networks, digitally organized social movements, and social networking sites (SNSs). Researchers have devoted considerable attention to these phenomena as groups and communities. This article takes a complementary approach by conceptualizing them as organizational forms, with focus on the emergence of SNSs as a distinct organizational form. Community ecology theory is implemented to explicate the emergence and subsequent legitimation of organizational forms, providing a foundation for understanding how new forms emerge through interaction with the surrounding environment. Industry data and historical records are utilized to illustrate the development of one specific form: online SNSs. This analysis demonstrates that legitimation is an ongoing process of replication of features, but legitimacy also occurs through recognition from adjacent populations. Findings illustrate the validity of alternative processes of form legitimacy.}, + langid = {english}, + keywords = {emergence,evolution,legitimacy,organizational form,social media}, + file = {/home/nathante/Zotero/storage/74PWFFY5/Weber et al_2016_The Emergence and Evolution of Social Networking Sites as an Organizational Form.pdf;/home/nathante/Zotero/storage/W5N27PGW/Weber et al_2016_The Emergence and Evolution of Social Networking Sites as an Organizational Form.pdf} +} + +@unpublished{weber_political_2000, + title = {The {{Political Economy}} of {{Open Source Software}}}, + author = {Weber, Steven}, + date = {2000-06}, + langid = {english}, + file = {/home/nathante/Zotero/storage/MZQLT27W/Weber - The Political Economy of Open Source Software.pdf} +} + +@incollection{white_effects_2011, + title = {Effects of Community Size and Contact Rate in Synchronous Social Q\&a}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {White, Ryen W. and Richardson, Matthew and Liu, Yandong}, + date = {2011-05-07}, + pages = {2837--2846}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Social question-and-answer (Q\&A) involves the location of answers to questions through communication with people. Social Q\&A systems, such as mailing lists and Web forums are popular, but their asynchronous nature can lead to high answer latency. Synchronous Q\&A systems facilitate real-time dialog, usually via instant messaging, but face challenges with interruption costs and the availability of knowledgeable answerers at question time. We ran a longitudinal study of a synchronous social Q\&A system to investigate the effects of the rate with which potential answerers were contacted (trading off time-to-answer against interruption cost) and community size (varying total number of members). We found important differences in subjective and objective measures of system performance with these variations. Our findings help us understand the costs and benefits of varying contact rate and community size in synchronous social Q\&A, and inform system design for social Q\&A.}, + isbn = {978-1-4503-0228-9}, + keywords = {community size,contact rate,synchronous social q&a}, + file = {/home/nathante/Zotero/storage/YTF5HY6W/White et al. - 2011 - Effects of community size and contact rate in sync.pdf} +} + +@article{williamson_economics_1981, + title = {The Economics of Organization: The Transaction Cost Approach}, + author = {Williamson, Oliver E.}, + date = {1981-11}, + journaltitle = {The American Journal of Sociology}, + volume = {87}, + number = {3}, + eprint = {2778934}, + eprinttype = {jstor}, + pages = {548--577}, + issn = {00029602}, + abstract = {The transaction cost approach to the study of economic organization regards the transaction as the basic unit of analysis and holds that an understanding of transaction cost economizing is central to the study of organizations. Applications of this approach require that transactions be dimensionalized and that alternative governance structures be described. Economizing is accomplished by assigning transactions to governance structures in a discriminating way. The approach applies both to the determination of efficient boundaries, as between firms and markets, and to the organization of internal transactions, including the design of employment relations. The approach is compared and contrasted with selected parts of the organization theory literature.}, + keywords = {Economics,Sociology}, + file = {/home/nathante/Zotero/storage/JHWPCT8H/Williamson - 1981 - The economics of organization The transaction cos.pdf} +} + +@book{wooldridge_econometric_2011, + title = {Econometric Analysis of Cross Section and Panel Data.}, + author = {Wooldridge, Jeffrey M}, + date = {2011}, + publisher = {{MIT}}, + location = {{Cambridge, Mass.}}, + isbn = {978-0-262-23258-6}, + langid = {english}, + annotation = {OCLC: 476619515} +} + +@book{worster_natures_1994, + title = {Nature's Economy: A History of Ecological Ideas}, + shorttitle = {Nature's Economy}, + author = {Worster, Donald}, + date = {1994}, + publisher = {{Cambridge University Press}}, + location = {{Cambridge; New York, NY, USA}}, + abstract = {Nature's Economy is a wide-ranging investigation of ecology's past. It traces the origins of the concept, discusses the thinkers who have shaped it, and shows how it in turn has shaped the modern perception of our place in nature.}, + isbn = {978-1-107-26680-3}, + langid = {english}, + annotation = {OCLC: 855524849}, + file = {/home/nathante/Zotero/storage/E2XXC7KJ/(Studies in Environment and History) Worster D.-Nature's Economy_ A History of Ecological Ideas-Cambridge University Press (1994).djvu} +} + +@article{wu_estimating_2019, + title = {Estimating {{Attention Flow}} in {{Online Video Networks}}}, + author = {Wu, Siqi and Rizoiu, Marian-Andrei and Xie, Lexing}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {183:1--183:25}, + abstract = {Online videos have shown tremendous increase in Internet traffic. Most video hosting sites implement recommender systems, which connect the videos into a directed network and conceptually act as a source of pathways for users to navigate. At present, little is known about how human attention is allocated over such large-scale networks, and about the impacts of the recommender systems. In this paper, we first construct the Vevo network -- a YouTube video network with 60,740 music videos interconnected by the recommendation links, and we collect their associated viewing dynamics. This results in a total of 310 million views every day over a period of 9 weeks. Next, we present large-scale measurements that connect the structure of the recommendation network and the video attention dynamics. We use the bow-tie structure to characterize the Vevo network and we find that its core component (23.1\% of the videos), which occupies most of the attention (82.6\% of the views), is made out of videos that are mainly recommended among themselves. This is indicative of the links between video recommendation and the inequality of attention allocation. Finally, we address the task of estimating the attention flow in the video recommendation network. We propose a model that accounts for the network effects for predicting video popularity, and we show it consistently outperforms the baselines. This model also identifies a group of artists gaining attention because of the recommendation network. Altogether, our observations and our models provide a new set of tools to better understand the impacts of recommender systems on collective social attention.}, + issue = {CSCW}, + keywords = {empirical measurement,network effects,online attention,popularity prediction,recommender system,youtube}, + file = {/home/nathante/Zotero/storage/QEZJWR7U/Wu et al_2019_Estimating Attention Flow in Online Video Networks.pdf} +} + +@article{xigen_li_factors_2011, + title = {Factors Influencing the Willingness to Contribute Information to Online Communities}, + author = {{Xigen Li}}, + date = {2011-03}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + volume = {13}, + number = {2}, + pages = {279--296}, + issn = {1461-4448, 1461-7315}, + abstract = {This study examines the factors that influence the willingness to contribute information to online communities from the perspectives of the discretionary database and expectancy theory. The study identified four groups of variables and tested their predictive value on the willingness to contribute information to online communities. The findings confirmed the effect of the perceived value of contributing and the likelihood of getting a reward for the willingness to contribute. Cost of contribution was not a significant predictor of the willingness to contribute information. Benefit from, and interest in, the community were significant predictors, but community affinity was not. Among the four groups of variables, social approval was the strongest predictor of the willingness to contribute.}, + langid = {english} +} + +@article{xu_evolution_2021, + title = {Evolution of Audience Duplication Networks among Social Networking Sites: Exploring the Influences of Preferential Attachment, Audience Size, and Niche Width}, + shorttitle = {Evolution of Audience Duplication Networks among Social Networking Sites}, + author = {Xu, Yu}, + date = {2021-02-15}, + journaltitle = {New Media \& Society}, + shortjournal = {New Media \& Society}, + pages = {1461444821993048}, + publisher = {{SAGE Publications}}, + issn = {1461-4448}, + abstract = {This study examines the evolution of social networking sites (SNSs) from a networked audience duplication perspective. Guided by social network theory, the theory of double jeopardy, and niche theory, this study proposes an integrated framework to explain the evolution of SNS choices of the US audience between 2016 and 2019. Shared traffic data were retrieved from comScore’s Media Metrix Multi-Platform database. The empirical results of the separable temporal exponential random graph model (STERGM) confirm that preferential attachment, audience size, and niche width significantly drive the likelihood of tie formation and dissolution in the evolving audience duplication network. These effects hold true even when other endogenous structural features and exogenous nodal attributes are taken into account. Theoretical implications for the networked media landscape are discussed.}, + langid = {english}, + keywords = {Audience duplication,evolution,network analysis,organizational ecology,social media}, + file = {/home/nathante/Zotero/storage/94TAHIW3/Xu - 2021 - Evolution of audience duplication networks among s.pdf} +} + +@article{xu_modeling_2017, + title = {Modeling the Adoption of Social Media by Newspaper Organizations: An Organizational Ecology Approach}, + shorttitle = {Modeling the Adoption of Social Media by Newspaper Organizations}, + author = {Xu, Yu}, + date = {2017-02-01}, + journaltitle = {Telematics and Informatics}, + shortjournal = {Telematics and Informatics}, + volume = {34}, + number = {1}, + pages = {151--163}, + issn = {0736-5853}, + abstract = {Although the ecological approach has been utilized in the field of communication, no prior research has applied this perspective to examine the organizational selection of social media. This study employs the framework of density dependence to understand what drives the adoption of social media by organizations. Fixed-effects negative binominal regression models were run to test the hypotheses that predicted the founding rates of 2007 Chinese newspaper organizations in 31 provincial units on Sina Weibo from August 2009 to June 2015. The results show that the founding rate of party newspapers exhibits inverted U-shaped relationships to local or non-local party newspaper density and non-party newspaper density. At the same time, the density dependence hypothesis is supported only for the effect of non-party newspaper density outside the provincial unit on the founding rate of non-party newspapers. Unexpectedly, non-party newspaper density within the provincial boundary exerts no significant influence on this founding rate. Both local and non-local party newspaper densities significantly and negatively influence the decisions to start homepages on Sina Weibo among non-newspaper organizations. Discussion and implications are provided.}, + file = {/home/nathante/Zotero/storage/FZT5VZNZ/Xu - 2017 - Modeling the adoption of social media by newspaper.pdf;/home/nathante/Zotero/storage/JYGYT3XA/XU - 2018 - The Ecological Dynamics of Organizational Change .pdf;/home/nathante/Zotero/storage/YLREBQ4E/S0736585315300812.html} +} + +@article{yarchi_political_2021, + title = {Political {{Polarization}} on the {{Digital Sphere}}: A {{Cross}}-Platform, {{Over}}-Time {{Analysis}} of {{Interactional}}, {{Positional}}, and {{Affective Polarization}} on {{Social Media}}}, + shorttitle = {Political {{Polarization}} on the {{Digital Sphere}}}, + author = {Yarchi, Moran and Baden, Christian and Kligler-Vilenchik, Neta}, + date = {2021-03-15}, + journaltitle = {Political Communication}, + volume = {38}, + number = {1-2}, + pages = {98--139}, + publisher = {{Routledge}}, + issn = {1058-4609}, + abstract = {Political polarization on the digital sphere poses a real challenge to many democracies around the world. Although the issue has received some scholarly attention, there is a need to improve the conceptual precision in the increasingly blurry debate. The use of computational communication science approaches allows us to track political conversations in a fine-grained manner within their natural settings – the realm of interactive social media. The present study combines different algorithmic approaches to studying social media data in order to capture both the interactional structure and content of dynamic political talk online. We conducted an analysis of political polarization across social media platforms (analyzing Facebook, Twitter, and WhatsApp) over 16 months, with close to a quarter million online contributions regarding a political controversy in Israel. Our comprehensive measurement of interactive political talk enables us to address three key aspects of political polarization: (1) interactional polarization – homophilic versus heterophilic user interactions; (2) positional polarization – the positions expressed, and (3) affective polarization – the emotions and attitudes expressed. Our findings indicate that political polarization on social media cannot be conceptualized as a unified phenomenon, as there are significant cross-platform differences. While interactions on Twitter largely conform to established expectations (homophilic interaction patterns, aggravating positional polarization, pronounced inter-group hostility), on WhatsApp, de-polarization occurred over time. Surprisingly, Facebook was found to be the least homophilic platform in terms of interactions, positions, and emotions expressed. Our analysis points to key conceptual distinctions and raises important questions about the drivers and dynamics of political polarization online.}, + keywords = {computational communication science approach,cross-platform analysis,over-time analysis,Political polarization,social media}, + annotation = {\_eprint: https://doi.org/10.1080/10584609.2020.1785067}, + file = {/home/nathante/Zotero/storage/2FVADM6B/Yarchi et al_2021_Political Polarization on the Digital Sphere.pdf;/home/nathante/Zotero/storage/7YA6IE6V/10584609.2020.html} +} + +@article{ye_distinguishing_2015, + title = {Distinguishing Time-Delayed Causal Interactions Using Convergent Cross Mapping}, + author = {Ye, Hao and Deyle, Ethan R. and Gilarranz, Luis J. and Sugihara, George}, + date = {2015-10-05}, + journaltitle = {Scientific Reports}, + volume = {5}, + pages = {14750}, + issn = {2045-2322}, + abstract = {An important problem across many scientific fields is the identification of causal effects from observational data alone. Recent methods (convergent cross mapping, CCM) have made substantial progress on this problem by applying the idea of nonlinear attractor reconstruction to time series data. Here, we expand upon the technique of CCM by explicitly considering time lags. Applying this extended method to representative examples (model simulations, a laboratory predator-prey experiment, temperature and greenhouse gas reconstructions from the Vostok ice core, and long-term ecological time series collected in the Southern California Bight), we demonstrate the ability to identify different time-delayed interactions, distinguish between synchrony induced by strong unidirectional-forcing and true bidirectional causality, and resolve transitive causal chains.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/V7Z48B5L/Ye et al. - 2015 - Distinguishing time-delayed causal interactions us.pdf;/home/nathante/Zotero/storage/ZQPFWK7T/srep14750.html} +} + +@article{yu_out_2017, + title = {Out {{With The Old}}, {{In With The New}}?: Unpacking {{Member Turnover}} in {{Online Production Groups}}}, + shorttitle = {Out {{With The Old}}, {{In With The New}}?}, + author = {Yu, Bowen and Wang, Xinyi and Lin, Allen Yilun and Ren, Yuqing and Terveen, Loren and Zhu, Haiyi}, + date = {2017-12-06}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + volume = {1}, + pages = {1--19}, + issn = {25730142}, + issue = {CSCW}, + langid = {english}, + file = {/home/nathante/Zotero/storage/Z8R9ZKUE/Yu et al. - 2017 - Out With The Old, In With The New Unpacking Memb.pdf} +} + +@article{zhang_community_2017, + title = {Community Identity and User Engagement in a Multi-Community Landscape}, + author = {Zhang, Justine and Hamilton, William L. and Danescu-Niculescu-Mizil, Cristian and Jurafsky, Dan and Leskovec, Jure}, + date = {2017-05}, + journaltitle = {Proceedings of the International AAAI Conference on Weblogs and Social Media. International AAAI Conference on Weblogs and Social Media}, + shortjournal = {Proc Int AAAI Conf Weblogs Soc Media}, + volume = {2017}, + eprint = {29354325}, + eprinttype = {pmid}, + pages = {377--386}, + issn = {2162-3449}, + abstract = {A community’s identity defines and shapes its internal dynamics. Our current understanding of this interplay is mostly limited to glimpses gathered from isolated studies of individual communities. In this work we provide a systematic exploration of the nature of this relation across a wide variety of online communities. To this end we introduce a quantitative, language-based typology reflecting two key aspects of a community’s identity: how distinctive, and how temporally dynamic it is. By mapping almost 300 Reddit communities into the landscape induced by this typology, we reveal regularities in how patterns of user engagement vary with the characteristics of a community., Our results suggest that the way new and existing users engage with a community depends strongly and systematically on the nature of the collective identity it fosters, in ways that are highly consequential to community maintainers. For example, communities with distinctive and highly dynamic identities are more likely to retain their users. However, such niche communities also exhibit much larger acculturation gaps between existing users and newcomers, which potentially hinder the integration of the latter., More generally, our methodology reveals differences in how various social phenomena manifest across communities, and shows that structuring the multi-community landscape can lead to a better understanding of the systematic nature of this diversity.}, + pmcid = {PMC5774974}, + file = {/home/nathante/Zotero/storage/DZEYKKSS/Zhang et al. - 2017 - Community Identity and User Engagement in a Multi-.pdf;/home/nathante/Zotero/storage/MMY3NEQ4/Zhang et al_2017_Community Identity and User Engagement in a Multi-Community Landscape.pdf;/home/nathante/Zotero/storage/FFFSAVRR/14904.html} +} + +@article{zhang_configuring_2020, + title = {Configuring {{Audiences}}: A {{Case Study}} of {{Email Communication}}}, + shorttitle = {Configuring {{Audiences}}}, + author = {Zhang, Justine and Pennebaker, James and Dumais, Susan and Horvitz, Eric}, + date = {2020-05-28}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {4}, + pages = {062:1--062:26}, + abstract = {When people communicate with each other, their choice of what to say is tied to their perceptions of the audience. For many communication channels, people have some ability to explicitly specify their audience members and the different roles they can play. While existing accounts of communication behavior have largely focused on how people tailor the content of their messages, we focus on the configuring of the audience as a complementary family of decisions in communication. We formulate a general description of audience configuration choices, highlighting key aspects of the audience that people could configure to reflect a range of communicative goals. We then illustrate these ideas via a case study of email usage-a realistic domain where audience configuration choices are particularly fine-grained and explicit in how email senders fill the To and Cc address fields. In a large collection of enterprise emails, we explore how people configure their audiences, finding salient patterns relating a sender's choice of configuration to the types of participants in the email exchange, the content of the message, and the nature of the subsequent interactions. Our formulation and findings show how analyzing audience configurations can enrich and extend existing accounts of communication behavior, and frame research directions on audience configuration decisions in communication and collaboration.}, + issue = {CSCW1}, + keywords = {audience,email,social interaction} +} + +@article{zhang_group_2011, + title = {Group Size and Incentives to Contribute: A Natural Experiment at Chinese Wikipedia}, + shorttitle = {Group Size and Incentives to Contribute}, + author = {Zhang, Xiaoquan Michael and Zhu, Feng}, + date = {2011-06}, + journaltitle = {American Economic Review}, + volume = {101}, + number = {4}, + pages = {1601--1615}, + issn = {0002-8282}, + abstract = {The literature on the private provision of public goods suggests an inverse relationship between incentives to contribute and group size. We find, however, that after an exogenous reduction of group size at Chinese Wikipedia, the nonblocked contributors decrease their contributions by 42.8 percent on average. We attribute the cause to social effects: contributors receive social benefits that increase with both the amount of their contributions and group size, and the shrinking group size weakens these social benefits. Consistent with our explanation, we find that the more contributors value social benefits, the more they reduce their contributions after the block. (JEL H41, L17, L82)}, + langid = {english}, + keywords = {Media,Public Goods; Open Source Products and Markets; Entertainment}, + file = {/home/nathante/Zotero/storage/63JBCUER/Zhang and Zhu - 2011 - Group Size and Incentives to Contribute A Natural.pdf;/home/nathante/Zotero/storage/BWMQ96PV/articles.html} +} + +@article{zhang_intergroup_2019, + title = {Intergroup {{Contact}} in the {{Wild}}: Characterizing {{Language Differences}} between {{Intergroup}} and {{Single}}-Group {{Members}} in {{NBA}}-Related {{Discussion Forums}}}, + shorttitle = {Intergroup {{Contact}} in the {{Wild}}}, + author = {Zhang, Jason Shuo and Tan, Chenhao and Lv, Qin}, + date = {2019-11-07}, + journaltitle = {Proceedings of the ACM on Human-Computer Interaction}, + shortjournal = {Proc. ACM Hum.-Comput. Interact.}, + volume = {3}, + pages = {193:1--193:35}, + abstract = {Intergroup contact has long been considered as an effective strategy to reduce prejudice between groups. However, recent studies suggest that exposure to opposing groups in online platforms can exacerbate polarization. To further understand the behavior of individuals who actively engage in intergroup contact in practice, we provide a large-scale observational study of intragroup behavioral differences between members with and without intergroup contact. We leverage the existing structure of NBA-related discussion forums on Reddit to study the context of professional sports. We identify fans of each NBA team as members of a group and trace whether they have intergroup contact. Our results show that members with intergroup contact use more negative and abusive language in their affiliated group than those without such contact, after controlling for activity levels. We further quantify different levels of intergroup contact and show that there may exist nonlinear mechanisms regarding how intergroup contact relates to intragroup behavior. Our findings provide complementary evidence to experimental studies in a novel context and also shed light on possible reasons for the different outcomes in prior studies.}, + issue = {CSCW}, + keywords = {intergroup contact,intragroup behavior,language usage,nba-related discussion forums,polarization}, + file = {/home/nathante/Zotero/storage/B5RRUXKC/Zhang et al_2019_Intergroup Contact in the Wild.pdf} +} + +@article{zhang_participation_2019, + title = {Participation of {{New Editors}} after {{Times}} of {{Shock}} on {{Wikipedia}}}, + author = {Zhang, Ark Fangzhou and Wang, Ruihan and Blohm, Eric and Budak, Ceren and Jr, Lionel P. Robert and Romero, Daniel M.}, + date = {2019-07-06}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {13}, + pages = {560--571}, + issn = {2334-0770}, + abstract = {User participation is vital to the success of collaborative crowdsourcing platforms such as Wikipedia. Previously user participation has been studied during “normal times”. However, less is known about participation following shocks that draw attention to an article. Such events can be recruiting opportunities due to increased attention; but can also pose a threat to the quality and control of the article and drive away newcomers. We study the collaborative dynamics of Wikipedia articles after times corresponding to shocks generated by drastic increases in attention as indicated by data from Google trends. We find that participation following such events is indeed different from participation during normal times–both newcomers and incumbents participate at higher rates during shocks. We also identify collaboration dynamics that mediate the effects of shocks on continued participation after the shock. The impact of shocks on participation is mediated by the amount of negative feedback given to newcomers in the form of reverted edits and the amount of coordination editors engage in through edits of the article’s talk page.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/LC9DB9EY/Zhang et al_2019_Participation of New Editors after Times of Shock on Wikipedia.pdf} +} + +@article{zhang_understanding_2021, + title = {Understanding the {{Diverging User Trajectories}} in {{Highly}}-{{Related Online Communities During}} the {{Covid}}-19 {{Pandemic}}}, + author = {Zhang, Jason Shuo and Keegan, Brian and Lv, Qin and Tan, Chenhao}, + date = {2021}, + journaltitle = {Proceedings of the International AAAI Conference on Web and Social Media}, + volume = {5}, + eprint = {2006.04816}, + eprinttype = {arxiv}, + pages = {12}, + abstract = {As the COVID-19 pandemic is disrupting life worldwide, related online communities are popping up. In particular, two “new” communities, /r/China flu and /r/Coronavirus, emerged on Reddit and have been dedicated to COVIDrelated discussions from the very beginning of this pandemic. With /r/Coronavirus promoted as the official community on Reddit, it remains an open question how users choose between these two highly-related communities. In this paper, we characterize user trajectories in these two communities from the beginning of COVID-19 to the end of September 2020. We show that new users of /r/China flu and /r/Coronavirus were similar from January to March. After that, their differences steadily increase, evidenced by both language distance and membership prediction, as the pandemic continues to unfold. Furthermore, users who started at /r/China flu from January to March were more likely to leave, while those who started in later months tend to remain highly “loyal”. To understand this difference, we develop a movement analysis framework to understand membership changes in these two communities and identify a significant proportion of /r/China flu members (around 50\%) that moved to /r/Coronavirus in February. This movement turns out to be highly predictable based on other subreddits that users were previously active in. Our work demonstrates how two highly related communities emerge and develop their own identity in a crisis, and highlights the important role of existing communities in understanding such an emergence.}, + archiveprefix = {arXiv}, + langid = {english}, + keywords = {Computer Science - Computers and Society,Computer Science - Social and Information Networks}, + file = {/home/nathante/Zotero/storage/3HZBRY3S/Zhang et al. - Understanding the Diverging User Trajectories in H.pdf;/home/nathante/Zotero/storage/V3QR9ASE/Zhang et al. - 2021 - Understanding the Diverging User Trajectories in H.pdf} +} + +@article{zhao_dynamic_2016, + title = {Dynamic Dependence Networks: Financial Time Series Forecasting and Portfolio Decisions}, + shorttitle = {Dynamic Dependence Networks}, + author = {Zhao, Zoey Yi and Xie, Meng and West, Mike}, + date = {2016}, + journaltitle = {Applied Stochastic Models in Business and Industry}, + volume = {32}, + number = {3}, + pages = {311--332}, + issn = {1526-4025}, + abstract = {We discuss Bayesian forecasting of increasingly high-dimensional time series, a key area of application of stochastic dynamic models in the financial industry and allied areas of business. Novel state-space models characterizing sparse patterns of dependence among multiple time series extend existing multivariate volatility models to enable scaling to higher numbers of individual time series. The theory of these dynamic dependence network models shows how the individual series can be decoupled for sequential analysis and then recoupled for applied forecasting and decision analysis. Decoupling allows fast, efficient analysis of each of the series in individual univariate models that are linked – for later recoupling – through a theoretical multivariate volatility structure defined by a sparse underlying graphical model. Computational advances are especially significant in connection with model uncertainty about the sparsity patterns among series that define this graphical model; Bayesian model averaging using discounting of historical information builds substantially on this computational advance. An extensive, detailed case study showcases the use of these models and the improvements in forecasting and financial portfolio investment decisions that are achievable. Using a long series of daily international currencies, stock indices and commodity prices, the case study includes evaluations of multi-day forecasts and Bayesian portfolio analysis with a variety of practical utility functions, as well as comparisons against commodity trading advisor benchmarks. Copyright © 2016 John Wiley \& Sons, Ltd.}, + langid = {english}, + file = {/home/nathante/Zotero/storage/YHCBUCAT/Zhao et al. - 2016 - Dynamic dependence networks Financial time series.pdf;/home/nathante/Zotero/storage/NHCZGM9Y/asmb.html} +} + +@incollection{zhao_social_2016, + title = {The {{Social Media Ecology}}: User {{Perceptions}}, {{Strategies}} and {{Challenges}}}, + shorttitle = {The {{Social Media Ecology}}}, + booktitle = {Proceedings of the 2016 {{CHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhao, Xuan and Lampe, Cliff and Ellison, Nicole B.}, + date = {2016-05-07}, + pages = {89--100}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {Many existing studies of social media focus on only one platform, but the reality of users' lived experiences is that most users incorporate multiple platforms into their communication practices in order to access the people and networks they desire to influence. In order to better understand how people make sharing decisions across multiple sites, we asked our participants (N=29) to categorize all modes of communication they used, with the goal of surfacing their mental models about managing sharing across platforms. Our interview data suggest that people simultaneously consider "audience" and "content" when sharing and these needs sometimes compete with one another; that they have the strong desire to both maintain boundaries between platforms as well as allowing content and audience to permeate across these boundaries; and that they strive to stabilize their own communication ecosystem yet need to respond to changes necessitated by the emergence of new tools, practices, and contacts. We unpack the implications of these tensions and suggest future design possibilities.}, + isbn = {978-1-4503-3362-7}, + keywords = {boundary management,content sharing,media ecology,social media}, + file = {/home/nathante/Zotero/storage/44Z9658S/Zhao et al_2016_The Social Media Ecology.pdf} +} + +@inproceedings{zhu_impact_2014, + title = {The Impact of Membership Overlap on the Survival of Online Communities}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Kraut, Robert E. and Kittur, Aniket}, + date = {2014-04-26}, + series = {{{CHI}} '14}, + pages = {281--290}, + publisher = {{Association for Computing Machinery}}, + location = {{New York, NY, USA}}, + abstract = {If the people belong to multiple online communities, their joint membership can influence the survival of each of the communities to which they belong. Communities with many joint memberships may struggle to get enough of their members' time and attention, but find it easy to import best practices from other communities. In this paper, we study the effects of membership overlap on the survival of online communities. By analyzing the historical data of 5673 Wikia communities, we find that higher levels of membership overlap are positively associated with higher survival rates of online communities. Furthermore, we find that it is beneficial for young communities to have shared members who play a central role in other mature communities. Our contributions are two-fold. Theoretically, by examining the impact of membership overlap on the survival of online communities we identified an important mechanism underlying the success of online communities. Practically, our findings may guide community creators on how to effectively manage their members, and tool designers on how to support this task.}, + isbn = {978-1-4503-2473-1}, + keywords = {membership overlap,online communities,survival analysis}, + file = {/home/nathante/Zotero/storage/GV2D7ZKS/Zhu et al. - 2014 - The Impact of Membership Overlap on the Survival o.pdf;/home/nathante/Zotero/storage/IY4RTSGD/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf;/home/nathante/Zotero/storage/JZE5JGAZ/Zhu et al. - 2014 - The impact of membership overlap on the survival o.pdf} +} + +@inproceedings{zhu_selecting_2014, + title = {Selecting an Effective Niche: An Ecological View of the Success of Online Communities}, + shorttitle = {Selecting an Effective Niche}, + booktitle = {Proceedings of the {{SIGCHI Conference}} on {{Human Factors}} in {{Computing Systems}}}, + author = {Zhu, Haiyi and Chen, Jilin and Matthews, Tara and Pal, Aditya and Badenes, Hernan and Kraut, Robert E.}, + date = {2014}, + series = {{{CHI}} '14}, + pages = {301--310}, + publisher = {{ACM}}, + location = {{New York, NY, USA}}, + abstract = {Online communities serve various important functions, but many fail to thrive. Research on community success has traditionally focused on internal factors. In contrast, we take an ecological view to understand how the success of a community is influenced by other communities. We measured a community's relationship with other communities - its "niche" - through four dimensions: topic overlap, shared members, content linking, and shared offline organizational affiliation. We used a mixed-method approach, combining the quantitative analysis of 9495 online enterprise communities and interviews with community members. Our results show that too little or too much overlap in topic with other communities causes a community's activity to suffer. We also show that this main result is moderated in predictable ways by whether the community shares members with, links to content in, or shares an organizational affiliation with other communities. These findings provide new insight on community success, guiding online community designers on how to effectively position their community in relation to others.}, + isbn = {978-1-4503-2473-1}, + venue = {Toronto, Ontario, Canada}, + keywords = {online communities,success,topic overlap,workplace}, + file = {/home/nathante/Zotero/storage/FNS9RSWC/Zhu et al. - 2014 - Selecting an Effective Niche An Ecological View o.pdf;/home/nathante/Zotero/storage/KIHWVKUQ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf;/home/nathante/Zotero/storage/RFMX2CBJ/Zhu et al. - 2014 - Selecting an effective niche an ecological view o.pdf} +} + + diff --git a/dissertations/nathante_uw_2021/resources/network-figures.tex b/dissertations/nathante_uw_2021/resources/network-figures.tex new file mode 120000 index 0000000..6dace5a --- /dev/null +++ b/dissertations/nathante_uw_2021/resources/network-figures.tex @@ -0,0 +1 @@ +/home/nathante/partitioning_reddit/diss_paper/resources/network-figures.tex \ No newline at end of file diff --git a/dissertations/nathante_uw_2021/title_page.pdf b/dissertations/nathante_uw_2021/title_page.pdf new file mode 100644 index 0000000..f193120 Binary files /dev/null and b/dissertations/nathante_uw_2021/title_page.pdf differ diff --git a/irb_uw/euds_interviews-20210618/[EUDS Interview] Interview Protocol.pdf b/irb_uw/euds_interviews-20210618/[EUDS Interview] Interview Protocol.pdf new file mode 100644 index 0000000..7f22ad7 Binary files /dev/null and b/irb_uw/euds_interviews-20210618/[EUDS Interview] Interview Protocol.pdf differ diff --git a/irb_uw/euds_interviews-20210618/[EUDS Interview] Participant info sheet.pdf b/irb_uw/euds_interviews-20210618/[EUDS Interview] Participant info sheet.pdf new file mode 100644 index 0000000..5c06ddf Binary files /dev/null and b/irb_uw/euds_interviews-20210618/[EUDS Interview] Participant info sheet.pdf differ diff --git a/irb_uw/euds_interviews-20210618/[EUDS Interview] Recruitment Questionnaire - Google Forms.pdf b/irb_uw/euds_interviews-20210618/[EUDS Interview] Recruitment Questionnaire - Google Forms.pdf new file mode 100644 index 0000000..66d38a6 Binary files /dev/null and b/irb_uw/euds_interviews-20210618/[EUDS Interview] Recruitment Questionnaire - Google Forms.pdf differ diff --git a/irb_uw/euds_interviews-20210618/[EUDS Interview] Recruitment message .pdf b/irb_uw/euds_interviews-20210618/[EUDS Interview] Recruitment message .pdf new file mode 100644 index 0000000..70fa801 Binary files /dev/null and b/irb_uw/euds_interviews-20210618/[EUDS Interview] Recruitment message .pdf differ diff --git a/irb_uw/euds_interviews-20210618/[EUDS Interview] UW_IRB_protocol.docx b/irb_uw/euds_interviews-20210618/[EUDS Interview] UW_IRB_protocol.docx new file mode 100644 index 0000000..cc3699d Binary files /dev/null and b/irb_uw/euds_interviews-20210618/[EUDS Interview] UW_IRB_protocol.docx differ diff --git a/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Consent Form.pdf b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Consent Form.pdf new file mode 100644 index 0000000..8309453 Binary files /dev/null and b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Consent Form.pdf differ diff --git a/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Consent and Interview Protocol.pdf b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Consent and Interview Protocol.pdf new file mode 100644 index 0000000..163f88e Binary files /dev/null and b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Consent and Interview Protocol.pdf differ diff --git a/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Key Information Sheet.pdf b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Key Information Sheet.pdf new file mode 100644 index 0000000..95f29a1 Binary files /dev/null and b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Key Information Sheet.pdf differ diff --git a/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Recruitment Text.pdf b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Recruitment Text.pdf new file mode 100644 index 0000000..101e0f6 Binary files /dev/null and b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] Recruitment Text.pdf differ diff --git a/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] ZIPLINE APPLICATION IRB Protocol.docx b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] ZIPLINE APPLICATION IRB Protocol.docx new file mode 100644 index 0000000..0709efc Binary files /dev/null and b/irb_uw/india_wikipedia_interviews-20191107/[India Wikipedia] ZIPLINE APPLICATION IRB Protocol.docx differ