398 lines
11 KiB
Markdown
398 lines
11 KiB
Markdown
Analysis of matrix spec bias-rumors
|
|
================
|
|
MTRNord
|
|
|
|
- <a href="#fetching-the-mscs" id="toc-fetching-the-mscs">Fetching the
|
|
MSCs</a>
|
|
- <a href="#get-employee-association-from-github-and-gitlab"
|
|
id="toc-get-employee-association-from-github-and-gitlab">Get Employee
|
|
association from Github and Gitlab</a>
|
|
- <a href="#get-times-for-state-transitions"
|
|
id="toc-get-times-for-state-transitions">Get times for state
|
|
transitions</a>
|
|
- <a href="#mscs-by-company-all-kind"
|
|
id="toc-mscs-by-company-all-kind">MSCs by Company (all kind)</a>
|
|
- <a href="#merged-mscs-by-company" id="toc-merged-mscs-by-company">Merged
|
|
MSCs by Company</a>
|
|
|
|
The following data is based purely on public knowledge. This means data
|
|
is fetched from github and gitlab as best as I was able to.
|
|
|
|
# Fetching the MSCs
|
|
|
|
``` r
|
|
# Setup things
|
|
library(gh)
|
|
library(tidyverse)
|
|
library(hrbrthemes)
|
|
library(survminer)
|
|
|
|
# import_roboto_condensed()
|
|
# extrafont::loadfonts(device = "win")
|
|
|
|
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
|
|
```
|
|
|
|
``` r
|
|
cleanup_data <- function(prs_gql) {
|
|
prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
|
|
for (i in 1:length(prs_gql)) {
|
|
prs_gql[[i]] <- prs_gql[[i]]$node
|
|
prs_gql[[i]]$temp_labels <- NA
|
|
if (length(prs_gql[[i]]$labels$edges) >= 1) {
|
|
prs_gql[[i]]$temp_labels <- list()
|
|
for (y in 1:length(prs_gql[[i]]$labels$edges)) {
|
|
prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
|
|
}
|
|
|
|
prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
|
|
prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
|
|
}
|
|
prs_gql[[i]]$labels <- NA
|
|
prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
|
|
prs_gql[[i]]$temp_labels <- NULL
|
|
}
|
|
prs_gql <- as.data.frame(do.call(rbind, prs_gql))
|
|
if (!("mergedAt" %in% colnames(prs_gql))) {
|
|
prs_gql$mergedAt <- NA
|
|
prs_gql$isPR <- FALSE
|
|
} else {
|
|
prs_gql$isPR <- TRUE
|
|
}
|
|
|
|
for (i in rownames(prs_gql)) {
|
|
author <- prs_gql[i, "author"]
|
|
if (!is.null(author)) {
|
|
prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
|
|
}
|
|
}
|
|
|
|
return(prs_gql)
|
|
}
|
|
```
|
|
|
|
``` r
|
|
if (!exists("issues_gql_all")) {
|
|
issue_query <- 'query($after: String) {
|
|
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
|
issues(
|
|
states: [OPEN, CLOSED]
|
|
orderBy: {field: CREATED_AT, direction: ASC}
|
|
first: 100
|
|
after: $after
|
|
) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
title
|
|
url
|
|
author {
|
|
login
|
|
}
|
|
closedAt
|
|
createdAt
|
|
labels(first: 100) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
name
|
|
createdAt
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}'
|
|
|
|
pr_query <- 'query($after: String) {
|
|
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
|
pullRequests(
|
|
states: [OPEN, CLOSED, MERGED]
|
|
orderBy: {field: CREATED_AT, direction: ASC}
|
|
first: 100
|
|
after: $after
|
|
) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
title
|
|
url
|
|
author {
|
|
login
|
|
}
|
|
closedAt
|
|
mergedAt
|
|
createdAt
|
|
labels(first: 100) {
|
|
pageInfo {
|
|
startCursor
|
|
endCursor
|
|
hasNextPage
|
|
hasPreviousPage
|
|
}
|
|
edges {
|
|
node {
|
|
name
|
|
createdAt
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}'
|
|
|
|
issues_gql <- gh_gql(issue_query)
|
|
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
|
issues_gql <- cleanup_data(issues_gql)
|
|
|
|
gql_data <- list(issues_gql)
|
|
|
|
# Paginate API
|
|
while (issues_gql_pageinfo$hasNextPage) {
|
|
variables <- list()
|
|
variables$after <- issues_gql_pageinfo$endCursor
|
|
issues_gql <- gh_gql(issue_query, variables = variables)
|
|
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
|
issues_gql <- cleanup_data(issues_gql)
|
|
gql_data[[length(gql_data) + 1]] <- issues_gql
|
|
}
|
|
|
|
prs_gql <- gh_gql(pr_query)
|
|
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
|
prs_gql <- cleanup_data(prs_gql)
|
|
gql_data[[length(gql_data) + 1]] <- prs_gql
|
|
|
|
# Paginate API
|
|
while (prs_gql_pageinfo$hasNextPage) {
|
|
variables <- list()
|
|
variables$after <- prs_gql_pageinfo$endCursor
|
|
prs_gql <- gh_gql(pr_query, variables = variables)
|
|
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
|
prs_gql <- cleanup_data(prs_gql)
|
|
gql_data[[length(gql_data) + 1]] <- prs_gql
|
|
}
|
|
|
|
issues_gql_all <- do.call(rbind, gql_data)
|
|
|
|
# Cleanup
|
|
rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)
|
|
|
|
|
|
issues_gql_all <- issues_gql_all |>
|
|
rowwise()
|
|
}
|
|
```
|
|
|
|
# Get Employee association from Github and Gitlab
|
|
|
|
Please note that in the current PDF this is not yet hooked up to gitlab
|
|
or checking the github workplace field. It may also exclude some users
|
|
that are not detectable.
|
|
|
|
``` r
|
|
# TODO also check against gitlab
|
|
# TODO also check workplace thingy
|
|
|
|
# Compile a list of who is who
|
|
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
|
|
element_employee <- list()
|
|
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
|
|
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
|
|
beeper_employee <- list("Fizzadar")
|
|
users <- list()
|
|
# Get orgs of users on github
|
|
for (i in rownames(issues_gql_all)) {
|
|
user <- issues_gql_all[i, "author"]
|
|
user <- paste(unlist(user), collapse = "")
|
|
if (is.na(user) || is.null(user) || user == "") {
|
|
next
|
|
}
|
|
if ((user %in% users) || (user %in% sct_employee)) {
|
|
next
|
|
}
|
|
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
|
|
orgs <- as.data.frame(do.call(rbind, orgs_raw))
|
|
|
|
if ("vector-im" %in% orgs$login) {
|
|
element_employee[[length(element_employee) + 1]] <- user
|
|
} else if ("beeper" %in% orgs$login) {
|
|
beeper_employee[[length(beeper_employee) + 1]] <- user
|
|
} else if ("Famedly" %in% orgs$login) {
|
|
famedly_employee[[length(famedly_employee) + 1]] <- user
|
|
}
|
|
users[[length(users) + 1]] <- user
|
|
}
|
|
rm(orgs, orgs_raw, user, author, i, users)
|
|
}
|
|
```
|
|
|
|
# Get times for state transitions
|
|
|
|
``` r
|
|
# Opened to Proposal transition
|
|
opened_to_proposal <- issues_gql_all |>
|
|
filter(!is.na(labels) && is.element("proposal", labels$name)) |>
|
|
select(title, author, createdAt, labels)
|
|
for (i in 1:length(opened_to_proposal$labels)) {
|
|
opened_to_proposal$labels[[i]] <- opened_to_proposal$labels[[i]] |>
|
|
rowwise() |>
|
|
filter(is.element("proposal", name))
|
|
}
|
|
opened_to_proposal <- opened_to_proposal |>
|
|
mutate(proposalAt = labels$createdAt) |>
|
|
select(title, author, createdAt, proposalAt)
|
|
|
|
opened_to_proposal <- opened_to_proposal |>
|
|
mutate(Company = case_when(
|
|
!is.null(author) && is.element(author, element_employee) ~ "Element",
|
|
!is.null(author) && is.element(author, sct_employee) ~ "SCT",
|
|
!is.null(author) && is.element(author, famedly_employee) ~ "Famedly",
|
|
!is.null(author) && is.element(author, beeper_employee) ~ "Beeper",
|
|
TRUE ~ "Other"
|
|
)) |>
|
|
group_by(Company)
|
|
|
|
# Proposal to having impl
|
|
# FIXME this doesnt work at this time as we dont know when the labels got removed. We need timelineItems() in the graphql query for this
|
|
# proposal_to_impl <- issues_gql_all |>
|
|
# filter(!is.na(labels) && is.element("proposal", labels$name)) |>
|
|
# select(title, author, createdAt)
|
|
```
|
|
|
|
# MSCs by Company (all kind)
|
|
|
|
Note that this does not adjust for private vs company MSCs.
|
|
|
|
``` r
|
|
# Filter MSCs by company
|
|
mscs <- issues_gql_all |>
|
|
filter(!is.na(labels) && is.element("proposal", labels$name))
|
|
|
|
|
|
mscs_element <- mscs |>
|
|
filter(!is.null(author) && is.element(author, element_employee)) |>
|
|
nrow()
|
|
|
|
mscs_sct <- mscs |>
|
|
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
|
nrow()
|
|
|
|
mscs_famedly <- mscs |>
|
|
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
|
nrow()
|
|
|
|
mscs_beeper <- mscs |>
|
|
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
|
nrow()
|
|
|
|
mscs_other <- nrow(mscs) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct
|
|
|
|
# Display Data
|
|
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(mscs_beeper, mscs_element, mscs_famedly, mscs_other, mscs_sct))
|
|
data <- map_df(data, rev)
|
|
data$Company <- factor(data$Company, levels = data$Company)
|
|
|
|
|
|
# Basic piechart
|
|
ggplot(data, aes(x = Company, y = Count)) +
|
|
geom_bar(stat = "identity", fill = "#69b3a2") +
|
|
theme_ipsum() +
|
|
theme(
|
|
panel.grid.minor.y = element_blank(),
|
|
panel.grid.major.y = element_blank(),
|
|
legend.position = "none"
|
|
) +
|
|
labs(
|
|
title = str_wrap("MSCs from companies", 40),
|
|
caption = "source: Github API",
|
|
x = "Issues"
|
|
) +
|
|
xlab("") +
|
|
geom_text(
|
|
aes(label = Count),
|
|
hjust = 1.5,
|
|
colour = "white"
|
|
) +
|
|
coord_flip()
|
|
```
|
|
|
|
![](index_files/figure-gfm/unnamed-chunk-6-1.jpeg)<!-- -->
|
|
|
|
# Merged MSCs by Company
|
|
|
|
Note that this does not adjust for private vs company MSCs.
|
|
|
|
``` r
|
|
# Filter for only merged MSCs
|
|
merged_mscs <- issues_gql_all |>
|
|
filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))
|
|
|
|
# Filter MSCs by company
|
|
merged_element <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, element_employee)) |>
|
|
nrow()
|
|
|
|
merged_sct <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
|
nrow()
|
|
|
|
merged_famedly <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
|
nrow()
|
|
|
|
merged_beeper <- merged_mscs |>
|
|
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
|
nrow()
|
|
|
|
merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct
|
|
|
|
# Display Data
|
|
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(merged_beeper, merged_element, merged_famedly, merged_other, merged_sct))
|
|
data <- map_df(data, rev)
|
|
data$Company <- factor(data$Company, levels = data$Company)
|
|
|
|
|
|
# Basic piechart
|
|
ggplot(data, aes(x = Company, y = Count)) +
|
|
geom_bar(stat = "identity", fill = "#69b3a2") +
|
|
theme_ipsum() +
|
|
theme(
|
|
panel.grid.minor.y = element_blank(),
|
|
panel.grid.major.y = element_blank(),
|
|
legend.position = "none"
|
|
) +
|
|
labs(
|
|
title = str_wrap("Merged MSCs from companies", 40),
|
|
caption = "source: Github API",
|
|
x = "Issues"
|
|
) +
|
|
xlab("") +
|
|
geom_text(
|
|
aes(label = Count),
|
|
hjust = 1.5,
|
|
colour = "white"
|
|
) +
|
|
coord_flip()
|
|
```
|
|
|
|
![](index_files/figure-gfm/unnamed-chunk-7-1.jpeg)<!-- -->
|