17
0

code to create final case discussion grades

This still needs to be check over but this is new code to build the
final grades. Current threshold for minimum questions comes from 1000
simulated classes (simulation.R).
This commit is contained in:
Benjamin Mako Hill 2022-03-05 20:51:15 -08:00
parent 35bf83e9f6
commit 78ac188f04
3 changed files with 134 additions and 38 deletions

View File

@ -1,72 +1,147 @@
## load in the data
#################################
myuw <- read.csv("../data/2022_winter_COM_481_A_students.csv", stringsAsFactors=FALSE)
myuw <- read.csv("myuw-COMMLD_570_A_spring_2021_students.csv", stringsAsFactors=FALSE)
## class-level variables
question.grades <- c("GOOD"=100, "FAIR"=100-(50/3.3), "WEAK"=100-(50/(3.3)*2))
current.dir <- getwd()
source("../assessment_and_tracking/track_participation.R")
setwd("case_grades")
setwd(current.dir)
rownames(d) <- d$unique.name
call.list$timestamp <- as.Date(call.list$timestamp)
## class-level variables
question.grades <- c("PLUS"=100, "CHECK"=100-(50/3.3), "MINUS"=100-(50/(3.3)*2))
missed.question.penalty <- (50/3.3) * 0.2 ## 1/5 of a full point on the GPA scale
## inspect set the absence threashold
ggplot(d) + aes(x=absences) + geom_histogram(binwidth=1, fill="white",color="black")
## absence.threshold <- median(d$absences)
absence.threshold <- 4 ## TODO talk about this
## inspect and set the questions cutoff
## questions.cutoff <- median(d$num.calls)
## median(d$num.calls)
## questions.cutoff <- nrow(call.list) / nrow(d) ## TODO talk about this
## first these are the people were were not called simply because they got unlucky
## this is the 95% percentile based on simulation in simulation.R
questions.cutoff <- 4
## show the distribution of assessments
table(call.list$assessment)
prop.table(table(call.list$assessment))
table(call.list$answered)
prop.table(table(call.list$answered))
table(call.list.full$answered)
prop.table(table(call.list.full$answered))
total.questions.asked <- nrow(call.list)
## find out how man questions folks have present/absent for
##########################################################
calls.per.day <- data.frame(day=as.Date(names(table(call.list$timestamp))),
questions.asked=as.numeric(table(call.list$timestamp)))
## function to return the numbers of calls present for or zero if they
## were absent
calls.for.student.day <- function (day, student.id) {
if (any(absence$unique.name == student.id & absence$date.absent == day)) {
return(0)
} else {
return(calls.per.day$questions.asked[calls.per.day$day == day])
}
}
compute.questions.present.for.student <- function (student.id) {
sum(unlist(lapply(unique(calls.per.day$day), calls.for.student.day, student.id)))
}
## create new column with number of questions present
d$q.present <- unlist(lapply(d$unique.name, compute.questions.present.for.student))
d$prop.asked <- d$num.calls / d$q.present
## generate statistics using these new variables
prop.asks.quantiles <- quantile(d$prop.asked, probs=seq(0,1, 0.01))
prop.asks.quantiles <- prop.asks.quantiles[!duplicated(prop.asks.quantiles)]
d$prop.asked.quant <- cut(d$prop.asked, right=FALSE, breaks=c(prop.asks.quantiles, 1),
labels=names(prop.asks.quantiles)[1:(length(prop.asks.quantiles))])
## generate grades
##########################################################
d$part.grade <- NA
## print the median number of questions for (a) everybody and (b)
## people that have been present 75% of the time
median(d$num.calls)
questions.cutoff <- median(d$num.calls)
## helper function to generate average grade minus number of missing
gen.part.grade <- function (x.unique.name) {
q.scores <- question.grades[call.list$assessment[call.list$unique.name == x.unique.name]]
base.score <- mean(q.scores, na.rm=TRUE)
## number of missing days
# missing.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])
missing.in.class.days <- nrow(missing.in.class[missing.in.class$unique.name == x.unique.name,])
## return the final score
data.frame(unique.name=x.unique.name,
part.grade=(base.score))
base.grade=base.score,
missing.in.class.days=missing.in.class.days)
}
## create the base grades which do NOT include missing questions
tmp <- do.call("rbind", lapply(d$unique.name, gen.part.grade))
d <- merge(d, tmp)
rownames(d) <- d$unique.name
d[as.character(tmp$unique.name), "part.grade"] <- tmp$part.grade
## apply the penality for number of days we called on them and they were gone
d$part.grade <- d$base.grade - d$missing.in.class.days * missed.question.penalty
d$part.grade.orig <- d$part.grade
## generate the baseline participation grades as per the process above
## first we handle the zeros
## step 1: first double check the people who have zeros and ensure that they didn't "just" get unlucky"
d[d$num.calls == 0,]
## set those people to 0 :(
d[d$num.calls == 0]
d$part.grade[d$num.calls == 0] <- 0
## step 2: identify the people who were were not asked "enough" questions but were unlucky/lucky
## penalized.unique.names <- d$unique.name[d$num.calls < median(d$num.calls) & d$absences > median(d$absences)]
## first these are the people were were not called simply because they got unlucky
d[d$num.calls < questions.cutoff & d$absences < absence.threshold,]
## first these are the people were were not called simply because they got unlucky
penalized.unique.names <- d$unique.name[d$num.calls < questions.cutoff & d$absences > absence.threshold]
d[d$unique.name %in% penalized.unique.names,]
## now add "zeros" for every questions that is below the normal
d[as.character(penalized.unique.names),"part.grade"] <- ((
(questions.cutoff - d[as.character(penalized.unique.names),"num.calls"] * 0) +
(d[as.character(penalized.unique.names),"num.calls"] * d[as.character(penalized.unique.names),"part.grade"]) )
/ questions.cutoff)
d[as.character(penalized.unique.names),]
## TODO ensure this is right. i think it is
## map part grades back to 4.0 letter scale and points
d$part.4point <- round((d$part.grade / (50/3.3)) - 2.6, 2)
d[sort.list(d$part.4point),]
d[sort.list(d$part.4point, decreasing=TRUE),
c("unique.name", "short.name", "num.calls", "absences", "part.4point")]
## writing out data
## writing out data to CSV
d.print <- merge(d, myuw[,c("StudentNo", "FirstName", "LastName", "UWNetID")],
by.x="student.num", by.y="StudentNo")
write.csv(d.print, file="final_participation_grades.csv")
by.x="unique.name", by.y="StudentNo")
write.csv(d.print, file="../data/final_participation_grades.csv")
## library(rmarkdown)
library(rmarkdown)
## for (x.unique.name in d$unique.name) {
## render(input="../../assessment_and_tracking/student_report_template.Rmd",
## output_format="html_document",
## output_file=paste("../data/case_grades/student_reports/",
## d.print$UWNetID[d.print$unique.name == x.unique.name],
## sep=""))
## }
for (id in d$unique.name) {
render(input="student_report_template.Rmd",
output_format="html_document",
output_file=paste("../data/case_grades/",
d.print$unique.name[d.print$unique.name == id],
sep=""))
}

View File

@ -0,0 +1,24 @@
weight.fac <- 2
num.calls <- 373
num.students <- 76
gen.calls.per.students <- function (x) {
raw.weights <<- rep(1, num.students)
names(raw.weights) <- seq(1, num.students)
table(sapply(1:num.calls, function (i) {
probs <- raw.weights / sum(raw.weights)
selected <- sample(names(raw.weights), 1, prob=probs)
## update the raw.weights
raw.weights[selected] <<- raw.weights[selected] / weight.fac
#print(raw.weights)
return(selected)
}))
}
simulated.call.list <- unlist(lapply(1:1000, gen.calls.per.students))
hist(simulated.call.list)
quantile(simulated.call.list, probs=seq(0,1,by=0.01))
quantile(simulated.call.list, probs=0.05)

View File

@ -1,22 +1,19 @@
**Student Name:** `r paste(d.print[d.print$discord.name == x.discord.name, c("FirstName", "LastName")])`
**Student Name:** `r paste(d.print[d.print$unique.name == id, c("LastName", "FirstName")])` (`r id`)
**Discord Name:** `r d.print[d.print$discord.name == x.discord.name, c("discord.name")]`
**Participation grade:** `r d.print$part.4point[d.print$unique.name == id]`
**Participation grade:** `r d.print$part.4point[d.print$discord.name == x.discord.name]`
**Questions asked:** `r d.print[d$unique.name == id, "num.calls"]`
**Questions asked:** `r d.print[d$discord.name == x.discord.name, "prev.questions"]`
**Days Absent:** `r d.print[d.print$unique.name == id, "absences"]` / `r length(unique(as.Date(unique(call.list$timestamp))))`
**Days Absent:** `r d.print[d.print$discord.name == x.discord.name, "days.absent"]` / `r case.sessions`
**Missing in class days:** `r d.print[d$unique.name == id, "missing.in.class.days"]` (base grade lowered by 0.2 per day)
**List of questions:**
```{r echo=FALSE}
call.list[call.list$discord.name == x.discord.name,]
call.list[call.list$unique.name == id,]
```
**Luckiness:** `r d.print[d.print$discord.name == x.discord.name, "prop.asked.quant"]`
If you a student has a luckiness over 50% that means that they were helped by the weighting of the system and/or got lucky. We did not penalize *any* students with a luckiness under 50% for absences.