code to create final case discussion grades

This still needs to be check over but this is new code to build the final grades. Current threshold for minimum questions comes from 1000 simulated classes (simulation.R).
2022-03-05 20:51:15 -08:00
parent 35bf83e9f6
commit 78ac188f04
3 changed files with 134 additions and 38 deletions
--- a/assessment_and_tracking/compute_final_case_grades.R
+++ b/assessment_and_tracking/compute_final_case_grades.R
@@ -1,72 +1,147 @@
 ## load in the data
 #################################
+myuw <- read.csv("../data/2022_winter_COM_481_A_students.csv", stringsAsFactors=FALSE)

-myuw <- read.csv("myuw-COMMLD_570_A_spring_2021_students.csv", stringsAsFactors=FALSE)
-
-## class-level variables
-question.grades <- c("GOOD"=100, "FAIR"=100-(50/3.3), "WEAK"=100-(50/(3.3)*2))
-
+current.dir <- getwd()
 source("../assessment_and_tracking/track_participation.R")
-setwd("case_grades")
+setwd(current.dir)

 rownames(d) <- d$unique.name
+call.list$timestamp <- as.Date(call.list$timestamp)
+
+## class-level variables
+question.grades <- c("PLUS"=100, "CHECK"=100-(50/3.3), "MINUS"=100-(50/(3.3)*2))
+missed.question.penalty <- (50/3.3) * 0.2 ## 1/5 of a full point on the GPA scale
+
+## inspect set the absence threashold
+ggplot(d) + aes(x=absences) + geom_histogram(binwidth=1, fill="white",color="black")
+## absence.threshold <- median(d$absences)
+absence.threshold <- 4 ## TODO talk about this
+
+## inspect and set the questions cutoff
+## questions.cutoff <- median(d$num.calls)
+## median(d$num.calls)
+## questions.cutoff <- nrow(call.list) / nrow(d) ## TODO talk about this
+## first these are the people were were not called simply because they got unlucky
+
+ ## this is the 95% percentile based on simulation in simulation.R
+questions.cutoff <- 4

 ## show the distribution of assessments
 table(call.list$assessment)
 prop.table(table(call.list$assessment))
-table(call.list$answered)
-prop.table(table(call.list$answered))
+
+table(call.list.full$answered)
+prop.table(table(call.list.full$answered))

 total.questions.asked <- nrow(call.list)

+## find out how man questions folks have present/absent for
+##########################################################
+calls.per.day <- data.frame(day=as.Date(names(table(call.list$timestamp))),
+                            questions.asked=as.numeric(table(call.list$timestamp)))
+
+## function to return the numbers of calls present for or zero if they
+## were absent
+calls.for.student.day <- function (day, student.id) {
+    if (any(absence$unique.name == student.id & absence$date.absent == day)) {
+        return(0)
+    } else {
+        return(calls.per.day$questions.asked[calls.per.day$day == day])
+    }
+}
+
+compute.questions.present.for.student <- function (student.id) {
+    sum(unlist(lapply(unique(calls.per.day$day), calls.for.student.day, student.id)))
+}
+
+## create new column with number of questions present
+d$q.present <- unlist(lapply(d$unique.name, compute.questions.present.for.student))
+d$prop.asked <- d$num.calls / d$q.present
+
+## generate statistics using these new variables
+prop.asks.quantiles <- quantile(d$prop.asked, probs=seq(0,1, 0.01))
+prop.asks.quantiles <- prop.asks.quantiles[!duplicated(prop.asks.quantiles)]
+
+d$prop.asked.quant <- cut(d$prop.asked, right=FALSE, breaks=c(prop.asks.quantiles, 1),
+    labels=names(prop.asks.quantiles)[1:(length(prop.asks.quantiles))])
+
 ## generate grades
 ##########################################################

-d$part.grade <- NA
-
 ## print the median number of questions for (a) everybody and (b)
 ## people that have been present 75% of the time
 median(d$num.calls)

-questions.cutoff <- median(d$num.calls)
-
 ## helper function to generate average grade minus number of missing
 gen.part.grade <- function (x.unique.name) {
    q.scores <- question.grades[call.list$assessment[call.list$unique.name == x.unique.name]]
    base.score <- mean(q.scores, na.rm=TRUE)

    ## number of missing days
-    # missing.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])
+    missing.in.class.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])

    ## return the final score
    data.frame(unique.name=x.unique.name,
-               part.grade=(base.score))
+               base.grade=base.score,
+               missing.in.class.days=missing.in.class.days)
 }


+## create the base grades which do NOT include missing questions
 tmp <- do.call("rbind", lapply(d$unique.name, gen.part.grade))
+d <- merge(d, tmp)
+rownames(d) <- d$unique.name

-d[as.character(tmp$unique.name), "part.grade"] <- tmp$part.grade
+## apply the penality for number of days we called on them and they were gone
+d$part.grade <- d$base.grade - d$missing.in.class.days * missed.question.penalty
+d$part.grade.orig <- d$part.grade

-## generate the baseline participation grades as per the process above
+## first we handle the zeros
+## step 1: first double check the people who have zeros and ensure that they didn't "just" get unlucky"
+d[d$num.calls == 0,]

+## set those people to 0 :(
+d[d$num.calls == 0]
+d$part.grade[d$num.calls == 0] <- 0
+
+## step 2: identify the people who were were not asked "enough" questions but were unlucky/lucky
+## penalized.unique.names <- d$unique.name[d$num.calls < median(d$num.calls) & d$absences > median(d$absences)]
+
+## first these are the people were were not called simply because they got unlucky
+d[d$num.calls < questions.cutoff & d$absences < absence.threshold,]
+
+## first these are the people were were not called simply because they got unlucky
+penalized.unique.names <- d$unique.name[d$num.calls < questions.cutoff & d$absences > absence.threshold]
+d[d$unique.name %in% penalized.unique.names,]
+
+## now add "zeros" for every questions that is below the normal
+d[as.character(penalized.unique.names),"part.grade"] <- ((
+    (questions.cutoff - d[as.character(penalized.unique.names),"num.calls"] * 0) +
+    (d[as.character(penalized.unique.names),"num.calls"] * d[as.character(penalized.unique.names),"part.grade"]) )
+    / questions.cutoff)
+
+d[as.character(penalized.unique.names),]
+
+## TODO ensure this is right. i think it is
 ## map part grades back to 4.0 letter scale and points
-d$part.4point <-round((d$part.grade / (50/3.3)) - 2.6, 2)
+d$part.4point <- round((d$part.grade / (50/3.3)) - 2.6, 2)

-d[sort.list(d$part.4point),]
+d[sort.list(d$part.4point, decreasing=TRUE),
+  c("unique.name", "short.name", "num.calls", "absences", "part.4point")]


-## writing out data
+## writing out data to CSV
 d.print <- merge(d, myuw[,c("StudentNo", "FirstName", "LastName", "UWNetID")],
-           by.x="student.num", by.y="StudentNo")
-write.csv(d.print, file="final_participation_grades.csv")
+                 by.x="unique.name", by.y="StudentNo")
+write.csv(d.print, file="../data/final_participation_grades.csv")

-## library(rmarkdown)
+library(rmarkdown)

-## for (x.unique.name in d$unique.name) {
-##     render(input="../../assessment_and_tracking/student_report_template.Rmd",
-##            output_format="html_document",
-##            output_file=paste("../data/case_grades/student_reports/",
-##                              d.print$UWNetID[d.print$unique.name == x.unique.name],
-##                              sep=""))
-## }
+for (id in d$unique.name) {
+    render(input="student_report_template.Rmd",
+           output_format="html_document",
+           output_file=paste("../data/case_grades/",
+                             d.print$unique.name[d.print$unique.name == id],
+                             sep=""))
+}
--- a/assessment_and_tracking/simulation.R
+++ b/assessment_and_tracking/simulation.R
@@ -0,0 +1,24 @@
+weight.fac <- 2
+num.calls <- 373
+num.students <- 76
+
+gen.calls.per.students <- function (x) {
+    raw.weights <<- rep(1, num.students)
+    names(raw.weights) <- seq(1, num.students)
+
+    table(sapply(1:num.calls, function (i) {
+        probs <- raw.weights / sum(raw.weights)
+        selected <- sample(names(raw.weights), 1, prob=probs)
+        ## update the raw.weights
+        raw.weights[selected] <<- raw.weights[selected] / weight.fac
+                                        #print(raw.weights)
+        return(selected)
+    }))
+}
+
+
+simulated.call.list <- unlist(lapply(1:1000, gen.calls.per.students))
+hist(simulated.call.list)
+
+quantile(simulated.call.list, probs=seq(0,1,by=0.01))
+quantile(simulated.call.list, probs=0.05)
--- a/assessment_and_tracking/student_report_template.Rmd
+++ b/assessment_and_tracking/student_report_template.Rmd
@@ -1,22 +1,19 @@
-**Student Name:** `r paste(d.print[d.print$discord.name == x.discord.name, c("FirstName", "LastName")])`
+**Student Name:** `r paste(d.print[d.print$unique.name == id, c("LastName", "FirstName")])` (`r id`)

-**Discord Name:** `r d.print[d.print$discord.name == x.discord.name, c("discord.name")]`
+**Participation grade:** `r d.print$part.4point[d.print$unique.name == id]`

-**Participation grade:** `r d.print$part.4point[d.print$discord.name == x.discord.name]`
+**Questions asked:** `r d.print[d$unique.name == id, "num.calls"]`

-**Questions asked:** `r d.print[d$discord.name == x.discord.name, "prev.questions"]`
+**Days Absent:** `r d.print[d.print$unique.name == id, "absences"]` / `r length(unique(as.Date(unique(call.list$timestamp))))`

-**Days Absent:** `r d.print[d.print$discord.name == x.discord.name, "days.absent"]` / `r case.sessions`
+**Missing in class days:** `r d.print[d$unique.name == id, "missing.in.class.days"]` (base grade lowered by 0.2 per day)

 **List of questions:**

 ```{r echo=FALSE}
-call.list[call.list$discord.name == x.discord.name,]
+call.list[call.list$unique.name == id,]
 ```

-**Luckiness:** `r d.print[d.print$discord.name == x.discord.name, "prop.asked.quant"]`
-
-If you a student has a luckiness over 50% that means that they were helped by the weighting of the system and/or got lucky. We did not penalize *any* students with a luckiness under 50% for absences.