MSCAnalysis/index.Rmd

391 lines
11 KiB
Plaintext
Raw Permalink Normal View History

2022-10-28 18:09:40 +00:00
---
title: "Analysis of matrix spec bias-rumors"
author: "MTRNord"
2022-10-28 18:18:35 +00:00
output:
github_document:
toc: true
toc_depth: 2
dev: jpeg
2022-10-29 19:50:44 +00:00
pdf_document: default
2022-10-28 18:09:40 +00:00
---
The following data is based purely on public knowledge. This means data is fetched from github and gitlab as best as I was able to.
# Fetching the MSCs
2022-10-29 19:50:44 +00:00
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
# Setup things
2022-10-28 18:09:40 +00:00
library(gh)
library(tidyverse)
2022-10-29 21:36:07 +00:00
library(hrbrthemes)
library(survminer)
2022-10-28 18:09:40 +00:00
# import_roboto_condensed()
# extrafont::loadfonts(device = "win")
2022-10-28 18:09:40 +00:00
2022-10-29 21:36:07 +00:00
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
2022-10-29 19:50:44 +00:00
```
2022-10-28 18:09:40 +00:00
2022-10-29 19:50:44 +00:00
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
cleanup_data <- function(prs_gql) {
prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
for (i in 1:length(prs_gql)) {
prs_gql[[i]] <- prs_gql[[i]]$node
prs_gql[[i]]$temp_labels <- NA
if (length(prs_gql[[i]]$labels$edges) >= 1) {
prs_gql[[i]]$temp_labels <- list()
for (y in 1:length(prs_gql[[i]]$labels$edges)) {
prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
}
prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
2022-10-28 18:09:40 +00:00
}
2022-10-29 19:50:44 +00:00
prs_gql[[i]]$labels <- NA
prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
prs_gql[[i]]$temp_labels <- NULL
}
prs_gql <- as.data.frame(do.call(rbind, prs_gql))
if (!("mergedAt" %in% colnames(prs_gql))) {
prs_gql$mergedAt <- NA
prs_gql$isPR <- FALSE
} else {
prs_gql$isPR <- TRUE
}
for (i in rownames(prs_gql)) {
author <- prs_gql[i, "author"]
if (!is.null(author)) {
prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
}
}
return(prs_gql)
2022-10-28 18:09:40 +00:00
}
2022-10-29 19:50:44 +00:00
```
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
if (!exists("issues_gql_all")) {
issue_query <- 'query($after: String) {
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
issues(
states: [OPEN, CLOSED]
orderBy: {field: CREATED_AT, direction: ASC}
first: 100
after: $after
) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
title
url
author {
login
}
closedAt
createdAt
labels(first: 100) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
name
createdAt
}
}
}
}
}
}
}
}'
pr_query <- 'query($after: String) {
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
pullRequests(
states: [OPEN, CLOSED, MERGED]
orderBy: {field: CREATED_AT, direction: ASC}
first: 100
after: $after
) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
title
url
author {
login
}
closedAt
mergedAt
createdAt
labels(first: 100) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
name
createdAt
}
}
}
}
}
}
}
}'
issues_gql <- gh_gql(issue_query)
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
issues_gql <- cleanup_data(issues_gql)
gql_data <- list(issues_gql)
# Paginate API
while (issues_gql_pageinfo$hasNextPage) {
variables <- list()
variables$after <- issues_gql_pageinfo$endCursor
issues_gql <- gh_gql(issue_query, variables = variables)
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
issues_gql <- cleanup_data(issues_gql)
gql_data[[length(gql_data) + 1]] <- issues_gql
}
prs_gql <- gh_gql(pr_query)
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
prs_gql <- cleanup_data(prs_gql)
gql_data[[length(gql_data) + 1]] <- prs_gql
# Paginate API
while (prs_gql_pageinfo$hasNextPage) {
variables <- list()
variables$after <- prs_gql_pageinfo$endCursor
prs_gql <- gh_gql(pr_query, variables = variables)
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
prs_gql <- cleanup_data(prs_gql)
gql_data[[length(gql_data) + 1]] <- prs_gql
}
2022-10-28 18:09:40 +00:00
2022-10-29 19:50:44 +00:00
issues_gql_all <- do.call(rbind, gql_data)
# Cleanup
rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)
issues_gql_all <- issues_gql_all |>
2022-10-28 18:09:40 +00:00
rowwise()
2022-10-29 19:50:44 +00:00
}
2022-10-28 18:09:40 +00:00
```
# Get Employee association from Github and Gitlab
Please note that in the current PDF this is not yet hooked up to gitlab or checking the github workplace field. It may also exclude some users that are not detectable.
2022-10-29 19:50:44 +00:00
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
2022-10-28 18:09:40 +00:00
# TODO also check against gitlab
# TODO also check workplace thingy
# Compile a list of who is who
2022-10-29 12:58:17 +00:00
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
element_employee <- list()
2022-10-29 19:50:44 +00:00
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
2022-10-29 12:58:17 +00:00
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
beeper_employee <- list("Fizzadar")
users <- list()
# Get orgs of users on github
2022-10-29 19:50:44 +00:00
for (i in rownames(issues_gql_all)) {
user <- issues_gql_all[i, "author"]
user <- paste(unlist(user), collapse = "")
if (is.na(user) || is.null(user) || user == "") {
next
}
if ((user %in% users) || (user %in% sct_employee)) {
next
}
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
orgs <- as.data.frame(do.call(rbind, orgs_raw))
if ("vector-im" %in% orgs$login) {
element_employee[[length(element_employee) + 1]] <- user
} else if ("beeper" %in% orgs$login) {
beeper_employee[[length(beeper_employee) + 1]] <- user
} else if ("Famedly" %in% orgs$login) {
famedly_employee[[length(famedly_employee) + 1]] <- user
}
users[[length(users) + 1]] <- user
2022-10-29 12:58:17 +00:00
}
2022-10-29 19:50:44 +00:00
rm(orgs, orgs_raw, user, author, i, users)
2022-10-28 18:09:40 +00:00
}
```
2022-10-29 19:50:44 +00:00
# Get times for state transitions
```{r message=FALSE, warning=FALSE, tidy = "styler"}
# Opened to Proposal transition
opened_to_proposal <- issues_gql_all |>
filter(!is.na(labels) && is.element("proposal", labels$name)) |>
select(title, author, createdAt, labels)
for (i in 1:length(opened_to_proposal$labels)) {
opened_to_proposal$labels[[i]] <- opened_to_proposal$labels[[i]] |>
rowwise() |>
filter(is.element("proposal", name))
}
opened_to_proposal <- opened_to_proposal |>
mutate(proposalAt = labels$createdAt) |>
select(title, author, createdAt, proposalAt)
opened_to_proposal <- opened_to_proposal |>
mutate(Company = case_when(
!is.null(author) && is.element(author, element_employee) ~ "Element",
!is.null(author) && is.element(author, sct_employee) ~ "SCT",
!is.null(author) && is.element(author, famedly_employee) ~ "Famedly",
!is.null(author) && is.element(author, beeper_employee) ~ "Beeper",
TRUE ~ "Other"
)) |>
group_by(Company)
# Proposal to having impl
# FIXME this doesnt work at this time as we dont know when the labels got removed. We need timelineItems() in the graphql query for this
# proposal_to_impl <- issues_gql_all |>
# filter(!is.na(labels) && is.element("proposal", labels$name)) |>
# select(title, author, createdAt)
2022-10-29 19:50:44 +00:00
```
2022-10-28 18:09:40 +00:00
# MSCs by Company (all kind)
Note that this does not adjust for private vs company MSCs.
2022-10-29 19:50:44 +00:00
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
2022-10-28 18:09:40 +00:00
# Filter MSCs by company
2022-10-29 21:36:07 +00:00
mscs <- issues_gql_all |>
filter(!is.na(labels) && is.element("proposal", labels$name))
mscs_element <- mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, element_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
2022-10-29 21:36:07 +00:00
mscs_sct <- mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, sct_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
2022-10-29 21:36:07 +00:00
mscs_famedly <- mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, famedly_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
2022-10-29 21:36:07 +00:00
mscs_beeper <- mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, beeper_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
2022-10-29 21:36:07 +00:00
mscs_other <- nrow(mscs) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct
2022-10-28 18:09:40 +00:00
# Display Data
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(mscs_beeper, mscs_element, mscs_famedly, mscs_other, mscs_sct))
data <- map_df(data, rev)
2022-10-29 21:36:07 +00:00
data$Company <- factor(data$Company, levels = data$Company)
2022-10-28 18:09:40 +00:00
# Basic piechart
2022-10-29 21:36:07 +00:00
ggplot(data, aes(x = Company, y = Count)) +
geom_bar(stat = "identity", fill = "#69b3a2") +
2022-10-29 21:36:07 +00:00
theme_ipsum() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none"
) +
2022-10-29 19:50:44 +00:00
labs(
2022-10-29 21:36:07 +00:00
title = str_wrap("MSCs from companies", 40),
caption = "source: Github API",
x = "Issues"
) +
xlab("") +
geom_text(
aes(label = Count),
hjust = 1.5,
colour = "white"
) +
coord_flip()
2022-10-28 18:09:40 +00:00
```
# Merged MSCs by Company
Note that this does not adjust for private vs company MSCs.
2022-10-29 19:50:44 +00:00
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
2022-10-28 18:09:40 +00:00
# Filter for only merged MSCs
2022-10-29 19:50:44 +00:00
merged_mscs <- issues_gql_all |>
filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))
2022-10-28 18:09:40 +00:00
# Filter MSCs by company
merged_element <- merged_mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, element_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
2022-10-28 18:27:33 +00:00
merged_sct <- merged_mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, sct_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
merged_famedly <- merged_mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, famedly_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
merged_beeper <- merged_mscs |>
2022-10-29 19:50:44 +00:00
filter(!is.null(author) && is.element(author, beeper_employee)) |>
nrow()
2022-10-28 18:09:40 +00:00
2022-10-29 19:50:44 +00:00
merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct
2022-10-28 18:09:40 +00:00
# Display Data
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(merged_beeper, merged_element, merged_famedly, merged_other, merged_sct))
data <- map_df(data, rev)
2022-10-29 21:36:07 +00:00
data$Company <- factor(data$Company, levels = data$Company)
2022-10-28 18:09:40 +00:00
# Basic piechart
2022-10-29 21:36:07 +00:00
ggplot(data, aes(x = Company, y = Count)) +
geom_bar(stat = "identity", fill = "#69b3a2") +
2022-10-29 21:36:07 +00:00
theme_ipsum() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position = "none"
) +
2022-10-29 19:50:44 +00:00
labs(
2022-10-29 21:36:07 +00:00
title = str_wrap("Merged MSCs from companies", 40),
caption = "source: Github API",
x = "Issues"
) +
xlab("") +
geom_text(
aes(label = Count),
hjust = 1.5,
colour = "white"
) +
coord_flip()
2022-10-28 18:09:40 +00:00
```