391 lines
11 KiB
Plaintext
391 lines
11 KiB
Plaintext
---
|
|
title: "Analysis of matrix spec bias-rumors"
|
|
author: "MTRNord"
|
|
output:
|
|
github_document:
|
|
toc: true
|
|
toc_depth: 2
|
|
dev: jpeg
|
|
pdf_document: default
|
|
---
|
|
|
|
The following data is based purely on public knowledge. This means data is fetched from github and gitlab as best as I was able to.
|
|
|
|
# Fetching the MSCs
|
|
|
|
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
|
# Setup things
|
|
library(gh)
|
|
library(tidyverse)
|
|
library(hrbrthemes)
|
|
library(survminer)
|
|
|
|
# import_roboto_condensed()
|
|
# extrafont::loadfonts(device = "win")
|
|
|
|
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
|
|
```
|
|
|
|
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
|
|
|
cleanup_data <- function(prs_gql) {
|
|
prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
|
|
for (i in 1:length(prs_gql)) {
|
|
prs_gql[[i]] <- prs_gql[[i]]$node
|
|
prs_gql[[i]]$temp_labels <- NA
|
|
if (length(prs_gql[[i]]$labels$edges) >= 1) {
|
|
prs_gql[[i]]$temp_labels <- list()
|
|
for (y in 1:length(prs_gql[[i]]$labels$edges)) {
|
|
prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
|
|
}
|
|
|
|
prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
|
|
prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
|
|
}
|
|
prs_gql[[i]]$labels <- NA
|
|
prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
|
|
prs_gql[[i]]$temp_labels <- NULL
|
|
}
|
|
prs_gql <- as.data.frame(do.call(rbind, prs_gql))
|
|
if (!("mergedAt" %in% colnames(prs_gql))) {
|
|
prs_gql$mergedAt <- NA
|
|
prs_gql$isPR <- FALSE
|
|
} else {
|
|
prs_gql$isPR <- TRUE
|
|
}
|
|
|
|
for (i in rownames(prs_gql)) {
|
|
author <- prs_gql[i, "author"]
|
|
if (!is.null(author)) {
|
|
prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
|
|
}
|
|
}
|
|
|
|
return(prs_gql)
|
|
}
|
|
```
|
|
|
|
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
|
|
|
if (!exists("issues_gql_all")) {
|
|
issue_query <- 'query($after: String) {
|
|
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
|
issues(
|
|
states: [OPEN, CLOSED]
|
|
orderBy: {field: CREATED_AT, direction: ASC}
|
|
first: 100
|
|
after: $after
|
|
) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
title
|
|
url
|
|
author {
|
|
login
|
|
}
|
|
closedAt
|
|
createdAt
|
|
labels(first: 100) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
name
|
|
createdAt
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}'
|
|
|
|
pr_query <- 'query($after: String) {
|
|
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
|
pullRequests(
|
|
states: [OPEN, CLOSED, MERGED]
|
|
orderBy: {field: CREATED_AT, direction: ASC}
|
|
first: 100
|
|
after: $after
|
|
) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
title
|
|
url
|
|
author {
|
|
login
|
|
}
|
|
closedAt
|
|
mergedAt
|
|
createdAt
|
|
labels(first: 100) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
name
|
|
createdAt
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}'
|
|
|
|
issues_gql <- gh_gql(issue_query)
|
|
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
|
issues_gql <- cleanup_data(issues_gql)
|
|
|
|
gql_data <- list(issues_gql)
|
|
|
|
# Paginate API
|
|
while (issues_gql_pageinfo$hasNextPage) {
|
|
variables <- list()
|
|
variables$after <- issues_gql_pageinfo$endCursor
|
|
issues_gql <- gh_gql(issue_query, variables = variables)
|
|
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
|
issues_gql <- cleanup_data(issues_gql)
|
|
gql_data[[length(gql_data) + 1]] <- issues_gql
|
|
}
|
|
|
|
prs_gql <- gh_gql(pr_query)
|
|
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
|
prs_gql <- cleanup_data(prs_gql)
|
|
gql_data[[length(gql_data) + 1]] <- prs_gql
|
|
|
|
# Paginate API
|
|
while (prs_gql_pageinfo$hasNextPage) {
|
|
variables <- list()
|
|
variables$after <- prs_gql_pageinfo$endCursor
|
|
prs_gql <- gh_gql(pr_query, variables = variables)
|
|
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
|
prs_gql <- cleanup_data(prs_gql)
|
|
gql_data[[length(gql_data) + 1]] <- prs_gql
|
|
}
|
|
|
|
issues_gql_all <- do.call(rbind, gql_data)
|
|
|
|
# Cleanup
|
|
rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)
|
|
|
|
|
|
issues_gql_all <- issues_gql_all |>
|
|
rowwise()
|
|
}
|
|
```
|
|
|
|
# Get Employee association from Github and Gitlab
|
|
|
|
Please note that in the current PDF this is not yet hooked up to gitlab or checking the github workplace field. It may also exclude some users that are not detectable.
|
|
|
|
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
|
|
|
# TODO also check against gitlab
|
|
# TODO also check workplace thingy
|
|
|
|
# Compile a list of who is who
|
|
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
|
|
element_employee <- list()
|
|
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
|
|
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
|
|
beeper_employee <- list("Fizzadar")
|
|
users <- list()
|
|
# Get orgs of users on github
|
|
for (i in rownames(issues_gql_all)) {
|
|
user <- issues_gql_all[i, "author"]
|
|
user <- paste(unlist(user), collapse = "")
|
|
if (is.na(user) || is.null(user) || user == "") {
|
|
next
|
|
}
|
|
if ((user %in% users) || (user %in% sct_employee)) {
|
|
next
|
|
}
|
|
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
|
|
orgs <- as.data.frame(do.call(rbind, orgs_raw))
|
|
|
|
|
|
if ("vector-im" %in% orgs$login) {
|
|
element_employee[[length(element_employee) + 1]] <- user
|
|
} else if ("beeper" %in% orgs$login) {
|
|
beeper_employee[[length(beeper_employee) + 1]] <- user
|
|
} else if ("Famedly" %in% orgs$login) {
|
|
famedly_employee[[length(famedly_employee) + 1]] <- user
|
|
}
|
|
users[[length(users) + 1]] <- user
|
|
}
|
|
rm(orgs, orgs_raw, user, author, i, users)
|
|
}
|
|
```
|
|
|
|
# Get times for state transitions
|
|
|
|
```{r message=FALSE, warning=FALSE, tidy = "styler"}
|
|
# Opened to Proposal transition
|
|
opened_to_proposal <- issues_gql_all |>
|
|
filter(!is.na(labels) && is.element("proposal", labels$name)) |>
|
|
select(title, author, createdAt, labels)
|
|
for (i in 1:length(opened_to_proposal$labels)) {
|
|
opened_to_proposal$labels[[i]] <- opened_to_proposal$labels[[i]] |>
|
|
rowwise() |>
|
|
filter(is.element("proposal", name))
|
|
}
|
|
opened_to_proposal <- opened_to_proposal |>
|
|
mutate(proposalAt = labels$createdAt) |>
|
|
select(title, author, createdAt, proposalAt)
|
|
|
|
opened_to_proposal <- opened_to_proposal |>
|
|
mutate(Company = case_when(
|
|
!is.null(author) && is.element(author, element_employee) ~ "Element",
|
|
!is.null(author) && is.element(author, sct_employee) ~ "SCT",
|
|
!is.null(author) && is.element(author, famedly_employee) ~ "Famedly",
|
|
!is.null(author) && is.element(author, beeper_employee) ~ "Beeper",
|
|
TRUE ~ "Other"
|
|
)) |>
|
|
group_by(Company)
|
|
|
|
# Proposal to having impl
|
|
# FIXME this doesnt work at this time as we dont know when the labels got removed. We need timelineItems() in the graphql query for this
|
|
# proposal_to_impl <- issues_gql_all |>
|
|
# filter(!is.na(labels) && is.element("proposal", labels$name)) |>
|
|
# select(title, author, createdAt)
|
|
```
|
|
|
|
# MSCs by Company (all kind)
|
|
|
|
Note that this does not adjust for private vs company MSCs.
|
|
|
|
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
|
|
|
# Filter MSCs by company
|
|
mscs <- issues_gql_all |>
|
|
filter(!is.na(labels) && is.element("proposal", labels$name))
|
|
|
|
|
|
mscs_element <- mscs |>
|
|
filter(!is.null(author) && is.element(author, element_employee)) |>
|
|
nrow()
|
|
|
|
mscs_sct <- mscs |>
|
|
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
|
nrow()
|
|
|
|
mscs_famedly <- mscs |>
|
|
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
|
nrow()
|
|
|
|
mscs_beeper <- mscs |>
|
|
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
|
nrow()
|
|
|
|
mscs_other <- nrow(mscs) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct
|
|
|
|
# Display Data
|
|
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(mscs_beeper, mscs_element, mscs_famedly, mscs_other, mscs_sct))
|
|
data <- map_df(data, rev)
|
|
data$Company <- factor(data$Company, levels = data$Company)
|
|
|
|
|
|
# Basic piechart
|
|
ggplot(data, aes(x = Company, y = Count)) +
|
|
geom_bar(stat = "identity", fill = "#69b3a2") +
|
|
theme_ipsum() +
|
|
theme(
|
|
panel.grid.minor.y = element_blank(),
|
|
panel.grid.major.y = element_blank(),
|
|
legend.position = "none"
|
|
) +
|
|
labs(
|
|
title = str_wrap("MSCs from companies", 40),
|
|
caption = "source: Github API",
|
|
x = "Issues"
|
|
) +
|
|
xlab("") +
|
|
geom_text(
|
|
aes(label = Count),
|
|
hjust = 1.5,
|
|
colour = "white"
|
|
) +
|
|
coord_flip()
|
|
```
|
|
|
|
# Merged MSCs by Company
|
|
|
|
Note that this does not adjust for private vs company MSCs.
|
|
|
|
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
|
|
|
# Filter for only merged MSCs
|
|
merged_mscs <- issues_gql_all |>
|
|
filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))
|
|
|
|
# Filter MSCs by company
|
|
merged_element <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, element_employee)) |>
|
|
nrow()
|
|
|
|
merged_sct <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
|
nrow()
|
|
|
|
merged_famedly <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
|
nrow()
|
|
|
|
merged_beeper <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
|
nrow()
|
|
|
|
merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct
|
|
|
|
# Display Data
|
|
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(merged_beeper, merged_element, merged_famedly, merged_other, merged_sct))
|
|
data <- map_df(data, rev)
|
|
data$Company <- factor(data$Company, levels = data$Company)
|
|
|
|
|
|
# Basic piechart
|
|
ggplot(data, aes(x = Company, y = Count)) +
|
|
geom_bar(stat = "identity", fill = "#69b3a2") +
|
|
theme_ipsum() +
|
|
theme(
|
|
panel.grid.minor.y = element_blank(),
|
|
panel.grid.major.y = element_blank(),
|
|
legend.position = "none"
|
|
) +
|
|
labs(
|
|
title = str_wrap("Merged MSCs from companies", 40),
|
|
caption = "source: Github API",
|
|
x = "Issues"
|
|
) +
|
|
xlab("") +
|
|
geom_text(
|
|
aes(label = Count),
|
|
hjust = 1.5,
|
|
colour = "white"
|
|
) +
|
|
coord_flip()
|
|
```
|