Switch to graphql and bar charts
This commit is contained in:
parent
349c2248e6
commit
83ca2c5014
413
index.Rmd
413
index.Rmd
|
@ -6,69 +6,201 @@ output:
|
|||
toc: true
|
||||
toc_depth: 2
|
||||
dev: jpeg
|
||||
pdf_document: default
|
||||
---
|
||||
|
||||
```{r setup, echo=FALSE, message=FALSE, warning=FALSE}
|
||||
# Set so that long lines in R will be wrapped:
|
||||
knitr::opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)
|
||||
```
|
||||
|
||||
The following data is based purely on public knowledge. This means data is fetched from github and gitlab as best as I was able to.
|
||||
|
||||
# Fetching the MSCs
|
||||
|
||||
```{r message=FALSE, warning=FALSE, dpi=300}
|
||||
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
||||
# Setup things
|
||||
library(gh)
|
||||
library(tidyverse)
|
||||
library(nplyr)
|
||||
library(ggplot2)
|
||||
|
||||
# Set theme
|
||||
theme_set(theme_bw())
|
||||
|
||||
# gh_whoami()
|
||||
```
|
||||
|
||||
# Fetch all issues that are a proposal
|
||||
if (!exists("issues_all")) {
|
||||
issues_raw <- gh(
|
||||
"GET /repos/matrix-org/matrix-spec-proposals/issues", sort = "created",
|
||||
state = "all", direction = "asc", labels = "proposal"
|
||||
)
|
||||
issues <- as.data.frame(do.call(rbind, issues_raw))
|
||||
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
||||
|
||||
data <- list()
|
||||
data[[1]] <- issues
|
||||
cleanup_data <- function(prs_gql) {
|
||||
prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
|
||||
for (i in 1:length(prs_gql)) {
|
||||
prs_gql[[i]] <- prs_gql[[i]]$node
|
||||
prs_gql[[i]]$temp_labels <- NA
|
||||
if (length(prs_gql[[i]]$labels$edges) >= 1) {
|
||||
prs_gql[[i]]$temp_labels <- list()
|
||||
for (y in 1:length(prs_gql[[i]]$labels$edges)) {
|
||||
prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
|
||||
}
|
||||
|
||||
# Paginate API
|
||||
while (TRUE) {
|
||||
issues_raw <- try(
|
||||
{
|
||||
gh_next(issues_raw)
|
||||
}, silent = TRUE
|
||||
)
|
||||
if (inherits(issues_raw, "try-error")) {
|
||||
break
|
||||
} else {
|
||||
temp <- as.data.frame(do.call(rbind, issues_raw))
|
||||
data[[length(data) + 1]] <- temp
|
||||
}
|
||||
prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
|
||||
prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
|
||||
}
|
||||
issues_all <- do.call(rbind, data)
|
||||
}
|
||||
prs_gql[[i]]$labels <- NA
|
||||
prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
|
||||
prs_gql[[i]]$temp_labels <- NULL
|
||||
}
|
||||
prs_gql <- as.data.frame(do.call(rbind, prs_gql))
|
||||
if (!("mergedAt" %in% colnames(prs_gql))) {
|
||||
prs_gql$mergedAt <- NA
|
||||
prs_gql$isPR <- FALSE
|
||||
} else {
|
||||
prs_gql$isPR <- TRUE
|
||||
}
|
||||
|
||||
# Filter out corrupt issues without labels
|
||||
filtered_issues <- issues_all |>
|
||||
filter(
|
||||
sapply(labels, length, simplify = TRUE) > 0
|
||||
) |>
|
||||
for (i in rownames(prs_gql)) {
|
||||
author <- prs_gql[i, "author"]
|
||||
if (!is.null(author)) {
|
||||
prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
|
||||
}
|
||||
}
|
||||
|
||||
return(prs_gql)
|
||||
}
|
||||
```
|
||||
|
||||
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
||||
|
||||
if (!exists("issues_gql_all")) {
|
||||
issue_query <- 'query($after: String) {
|
||||
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
||||
issues(
|
||||
states: [OPEN, CLOSED]
|
||||
orderBy: {field: CREATED_AT, direction: ASC}
|
||||
first: 100
|
||||
after: $after
|
||||
) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
title
|
||||
url
|
||||
author {
|
||||
login
|
||||
}
|
||||
closedAt
|
||||
createdAt
|
||||
labels(first: 100) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
name
|
||||
createdAt
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
|
||||
pr_query <- 'query($after: String) {
|
||||
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
||||
pullRequests(
|
||||
states: [OPEN, CLOSED, MERGED]
|
||||
orderBy: {field: CREATED_AT, direction: ASC}
|
||||
first: 100
|
||||
after: $after
|
||||
) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
title
|
||||
url
|
||||
author {
|
||||
login
|
||||
}
|
||||
closedAt
|
||||
mergedAt
|
||||
createdAt
|
||||
labels(first: 100) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
name
|
||||
createdAt
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
|
||||
issues_gql <- gh_gql(issue_query)
|
||||
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
||||
issues_gql <- cleanup_data(issues_gql)
|
||||
|
||||
gql_data <- list(issues_gql)
|
||||
|
||||
# Paginate API
|
||||
while (issues_gql_pageinfo$hasNextPage) {
|
||||
variables <- list()
|
||||
variables$after <- issues_gql_pageinfo$endCursor
|
||||
issues_gql <- gh_gql(issue_query, variables = variables)
|
||||
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
||||
issues_gql <- cleanup_data(issues_gql)
|
||||
gql_data[[length(gql_data) + 1]] <- issues_gql
|
||||
}
|
||||
|
||||
prs_gql <- gh_gql(pr_query)
|
||||
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
||||
prs_gql <- cleanup_data(prs_gql)
|
||||
gql_data[[length(gql_data) + 1]] <- prs_gql
|
||||
|
||||
# Paginate API
|
||||
while (prs_gql_pageinfo$hasNextPage) {
|
||||
variables <- list()
|
||||
variables$after <- prs_gql_pageinfo$endCursor
|
||||
prs_gql <- gh_gql(pr_query, variables = variables)
|
||||
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
||||
prs_gql <- cleanup_data(prs_gql)
|
||||
gql_data[[length(gql_data) + 1]] <- prs_gql
|
||||
}
|
||||
|
||||
issues_gql_all <- do.call(rbind, gql_data)
|
||||
|
||||
# Cleanup
|
||||
rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)
|
||||
|
||||
|
||||
issues_gql_all <- issues_gql_all |>
|
||||
rowwise()
|
||||
filtered_issues$labels <- map(filtered_issues$labels, ~do.call(rbind, .))
|
||||
}
|
||||
```
|
||||
|
||||
# Get Employee association from Github and Gitlab
|
||||
|
||||
Please note that in the current PDF this is not yet hooked up to gitlab or checking the github workplace field. It may also exclude some users that are not detectable.
|
||||
|
||||
```{r message=FALSE, warning=FALSE, dpi=300}
|
||||
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
||||
|
||||
# TODO also check against gitlab
|
||||
# TODO also check workplace thingy
|
||||
|
@ -76,173 +208,144 @@ Please note that in the current PDF this is not yet hooked up to gitlab or check
|
|||
# Compile a list of who is who
|
||||
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
|
||||
element_employee <- list()
|
||||
sct_employee <- c("ara4n","erikjohnston","richvdh","dbkr","uhoreg","anoadragon453","turt2live","KitsuneRal")
|
||||
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
|
||||
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
|
||||
beeper_employee <- list("Fizzadar")
|
||||
users <- list()
|
||||
# Get orgs of users on github
|
||||
for (i in 1:nrow(filtered_issues)) {
|
||||
user <- filtered_issues[i, ]$user[[1]]$login
|
||||
if ((user %in% users) || (user %in% sct_employee)) {
|
||||
next
|
||||
}
|
||||
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
|
||||
orgs <- as.data.frame(do.call(rbind, orgs_raw))
|
||||
|
||||
|
||||
if ("vector-im" %in% orgs$login) {
|
||||
element_employee[[length(element_employee) + 1]] <- user
|
||||
} else if ("beeper" %in% orgs$login) {
|
||||
beeper_employee[[length(beeper_employee) + 1]] <- user
|
||||
} else if ("Famedly" %in% orgs$login) {
|
||||
famedly_employee[[length(famedly_employee) + 1]] <- user
|
||||
}
|
||||
users[[length(users) + 1]] <- user
|
||||
for (i in rownames(issues_gql_all)) {
|
||||
user <- issues_gql_all[i, "author"]
|
||||
user <- paste(unlist(user), collapse = "")
|
||||
if (is.na(user) || is.null(user) || user == "") {
|
||||
next
|
||||
}
|
||||
if ((user %in% users) || (user %in% sct_employee)) {
|
||||
next
|
||||
}
|
||||
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
|
||||
orgs <- as.data.frame(do.call(rbind, orgs_raw))
|
||||
|
||||
|
||||
if ("vector-im" %in% orgs$login) {
|
||||
element_employee[[length(element_employee) + 1]] <- user
|
||||
} else if ("beeper" %in% orgs$login) {
|
||||
beeper_employee[[length(beeper_employee) + 1]] <- user
|
||||
} else if ("Famedly" %in% orgs$login) {
|
||||
famedly_employee[[length(famedly_employee) + 1]] <- user
|
||||
}
|
||||
users[[length(users) + 1]] <- user
|
||||
}
|
||||
rm(orgs, orgs_raw, user, author, i, users)
|
||||
}
|
||||
```
|
||||
|
||||
# Get times for state transitions
|
||||
|
||||
```{r message=FALSE, warning=FALSE, tidy = "styler"}
|
||||
opened_to_proposal <- issues_gql_all
|
||||
```
|
||||
|
||||
# MSCs by Company (all kind)
|
||||
|
||||
Note that this does not adjust for private vs company MSCs.
|
||||
|
||||
```{r message=FALSE, warning=FALSE, dpi=300}
|
||||
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
||||
|
||||
# Filter MSCs by company
|
||||
merged_element <- filtered_issues |>
|
||||
filter(user$login %in% element_employee) |>
|
||||
nrow()
|
||||
mscs_element <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, element_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_sct <- filtered_issues |>
|
||||
filter(user$login %in% sct_employee) |>
|
||||
nrow()
|
||||
mscs_sct <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_famedly <- filtered_issues |>
|
||||
filter(user$login %in% famedly_employee) |>
|
||||
nrow()
|
||||
mscs_famedly <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_beeper <- filtered_issues |>
|
||||
filter(user$login %in% beeper_employee) |>
|
||||
nrow()
|
||||
mscs_beeper <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_other <- filtered_issues |>
|
||||
filter(!(user$login %in% element_employee)) |>
|
||||
filter(!(user$login %in% famedly_employee)) |>
|
||||
filter(!(user$login %in% beeper_employee)) |>
|
||||
filter(!(user$login %in% sct_employee)) |>
|
||||
nrow()
|
||||
mscs_other <- nrow(issues_gql_all) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct
|
||||
|
||||
column_names <- c("Count")
|
||||
# Display Data
|
||||
data <- data.frame(
|
||||
group = c("Element", "Beeper", "Famedly", "SCT", "Other"),
|
||||
value = c(merged_element, merged_beeper, merged_famedly, merged_sct, merged_other)
|
||||
)
|
||||
|
||||
# Compute the position of labels
|
||||
data <- data |>
|
||||
filter(value != 0) |>
|
||||
arrange(desc(group)) |>
|
||||
mutate(
|
||||
prop = value/sum(data$value) *
|
||||
100
|
||||
) |>
|
||||
mutate(
|
||||
ypos = cumsum(prop) -
|
||||
0.5 * prop
|
||||
)
|
||||
data <- data.frame(column_names = column_names, Element = mscs_element, Beeper = mscs_beeper, Famedly = mscs_famedly, SCT = mscs_sct, Other = mscs_other)
|
||||
data2 <- data.frame(t(data[-1]))
|
||||
colnames(data2) <- data[, 1]
|
||||
data <- data2
|
||||
data <- cbind(Company = rownames(data), data)
|
||||
rownames(data) <- 1:nrow(data)
|
||||
rownames(data) <- NULL
|
||||
rm(data2)
|
||||
|
||||
|
||||
# Basic piechart
|
||||
ggplot(data, aes(x = "", y = prop, fill = group)) +
|
||||
geom_bar(stat = "identity", width = 1, color = "white") +
|
||||
coord_polar("y", start = 0) +
|
||||
theme_void() + labs(
|
||||
title = str_wrap("Percentage of MSCs by Contributors associated with companies", 40),
|
||||
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
|
||||
geom_bar(stat = "identity") +
|
||||
labs(
|
||||
title = str_wrap("Number of MSCs by Contributors associated with companies", 40),
|
||||
subtitle = str_wrap(
|
||||
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60
|
||||
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60
|
||||
),
|
||||
caption = "source: Github API"
|
||||
) +
|
||||
theme(legend.position = "none") +
|
||||
geom_text(
|
||||
aes(y = ypos, label = group),
|
||||
color = "white", size = 5
|
||||
) +
|
||||
scale_fill_brewer(palette = "Set1")
|
||||
)
|
||||
```
|
||||
|
||||
# Merged MSCs by Company
|
||||
|
||||
Note that this does not adjust for private vs company MSCs.
|
||||
|
||||
```{r message=FALSE, warning=FALSE, dpi=300}
|
||||
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
|
||||
|
||||
# Filter for only merged MSCs
|
||||
merged_mscs <- filtered_issues |>
|
||||
filter("proposal" %in% labels) |>
|
||||
filter(("disposition-merge" %in% labels) | ("merged" %in% labels))
|
||||
merged_mscs <- issues_gql_all |>
|
||||
filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))
|
||||
|
||||
# Filter MSCs by company
|
||||
merged_element <- merged_mscs |>
|
||||
filter(user$login %in% element_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, element_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_sct <- merged_mscs |>
|
||||
filter(user$login %in% sct_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_famedly <- merged_mscs |>
|
||||
filter(user$login %in% famedly_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_beeper <- merged_mscs |>
|
||||
filter(user$login %in% beeper_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_other <- merged_mscs |>
|
||||
filter(!(user$login %in% element_employee)) |>
|
||||
filter(!(user$login %in% famedly_employee)) |>
|
||||
filter(!(user$login %in% beeper_employee)) |>
|
||||
filter(!(user$login %in% sct_employee)) |>
|
||||
nrow()
|
||||
merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct
|
||||
|
||||
# Display Data
|
||||
data <- data.frame(
|
||||
group = c("Element", "Beeper", "Famedly", "SCT", "Other"),
|
||||
value = c(merged_element, merged_beeper, merged_famedly, merged_sct, merged_other)
|
||||
)
|
||||
|
||||
# Compute the position of labels
|
||||
data <- data |>
|
||||
filter(value != 0) |>
|
||||
arrange(desc(group)) |>
|
||||
mutate(
|
||||
prop = value/sum(data$value) *
|
||||
100
|
||||
) |>
|
||||
mutate(
|
||||
ypos = cumsum(prop) -
|
||||
0.5 * prop
|
||||
)
|
||||
column_names <- c("Count")
|
||||
# Display Data
|
||||
data <- data.frame(column_names = column_names, Element = merged_element, Beeper = merged_beeper, Famedly = merged_famedly, SCT = merged_sct, Other = merged_other)
|
||||
data2 <- data.frame(t(data[-1]))
|
||||
colnames(data2) <- data[, 1]
|
||||
data <- data2
|
||||
data <- cbind(Company = rownames(data), data)
|
||||
rownames(data) <- 1:nrow(data)
|
||||
rownames(data) <- NULL
|
||||
rm(data2)
|
||||
|
||||
|
||||
# Basic piechart
|
||||
ggplot(data, aes(x = "", y = prop, fill = group)) +
|
||||
geom_bar(stat = "identity", width = 1, color = "white") +
|
||||
coord_polar("y", start = 0) +
|
||||
theme_void() + labs(
|
||||
title = str_wrap("Percentage of merged MSCs by Contributors associated with companies", 40),
|
||||
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
|
||||
geom_bar(stat = "identity") +
|
||||
labs(
|
||||
title = str_wrap("Number of merged MSCs by Contributors associated with companies", 40),
|
||||
subtitle = str_wrap(
|
||||
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60
|
||||
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60
|
||||
),
|
||||
caption = "source: Github API"
|
||||
) +
|
||||
theme(legend.position = "none") +
|
||||
geom_text(
|
||||
aes(y = ypos, label = group),
|
||||
color = "white", size = 5
|
||||
) +
|
||||
scale_fill_brewer(palette = "Set1")
|
||||
)
|
||||
```
|
||||
|
|
396
index.md
396
index.md
|
@ -7,6 +7,9 @@ MTRNord
|
|||
- <a href="#get-employee-association-from-github-and-gitlab"
|
||||
id="toc-get-employee-association-from-github-and-gitlab">Get Employee
|
||||
association from Github and Gitlab</a>
|
||||
- <a href="#get-times-for-state-transitions"
|
||||
id="toc-get-times-for-state-transitions">Get times for state
|
||||
transitions</a>
|
||||
- <a href="#mscs-by-company-all-kind"
|
||||
id="toc-mscs-by-company-all-kind">MSCs by Company (all kind)</a>
|
||||
- <a href="#merged-mscs-by-company" id="toc-merged-mscs-by-company">Merged
|
||||
|
@ -18,45 +21,184 @@ is fetched from github and gitlab as best as I was able to.
|
|||
# Fetching the MSCs
|
||||
|
||||
``` r
|
||||
# Setup things
|
||||
library(gh)
|
||||
library(tidyverse)
|
||||
library(nplyr)
|
||||
library(ggplot2)
|
||||
|
||||
# Set theme
|
||||
theme_set(theme_bw())
|
||||
|
||||
# gh_whoami()
|
||||
```
|
||||
|
||||
# Fetch all issues that are a proposal
|
||||
if (!exists("issues_all")) {
|
||||
issues_raw <- gh("GET /repos/matrix-org/matrix-spec-proposals/issues",
|
||||
sort = "created", state = "all", direction = "asc", labels = "proposal")
|
||||
issues <- as.data.frame(do.call(rbind, issues_raw))
|
||||
``` r
|
||||
cleanup_data <- function(prs_gql) {
|
||||
prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
|
||||
for (i in 1:length(prs_gql)) {
|
||||
prs_gql[[i]] <- prs_gql[[i]]$node
|
||||
prs_gql[[i]]$temp_labels <- NA
|
||||
if (length(prs_gql[[i]]$labels$edges) >= 1) {
|
||||
prs_gql[[i]]$temp_labels <- list()
|
||||
for (y in 1:length(prs_gql[[i]]$labels$edges)) {
|
||||
prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
|
||||
}
|
||||
|
||||
data <- list()
|
||||
data[[1]] <- issues
|
||||
|
||||
# Paginate API
|
||||
while (TRUE) {
|
||||
issues_raw <- try({
|
||||
gh_next(issues_raw)
|
||||
}, silent = TRUE)
|
||||
if (inherits(issues_raw, "try-error")) {
|
||||
break
|
||||
} else {
|
||||
temp <- as.data.frame(do.call(rbind, issues_raw))
|
||||
data[[length(data) + 1]] <- temp
|
||||
}
|
||||
prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
|
||||
prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
|
||||
}
|
||||
issues_all <- do.call(rbind, data)
|
||||
}
|
||||
prs_gql[[i]]$labels <- NA
|
||||
prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
|
||||
prs_gql[[i]]$temp_labels <- NULL
|
||||
}
|
||||
prs_gql <- as.data.frame(do.call(rbind, prs_gql))
|
||||
if (!("mergedAt" %in% colnames(prs_gql))) {
|
||||
prs_gql$mergedAt <- NA
|
||||
prs_gql$isPR <- FALSE
|
||||
} else {
|
||||
prs_gql$isPR <- TRUE
|
||||
}
|
||||
|
||||
# Filter out corrupt issues without labels
|
||||
filtered_issues <- issues_all |>
|
||||
filter(sapply(labels, length, simplify = TRUE) > 0) |>
|
||||
for (i in rownames(prs_gql)) {
|
||||
author <- prs_gql[i, "author"]
|
||||
if (!is.null(author)) {
|
||||
prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
|
||||
}
|
||||
}
|
||||
|
||||
return(prs_gql)
|
||||
}
|
||||
```
|
||||
|
||||
``` r
|
||||
if (!exists("issues_gql_all")) {
|
||||
issue_query <- 'query($after: String) {
|
||||
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
||||
issues(
|
||||
states: [OPEN, CLOSED]
|
||||
orderBy: {field: CREATED_AT, direction: ASC}
|
||||
first: 100
|
||||
after: $after
|
||||
) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
title
|
||||
url
|
||||
author {
|
||||
login
|
||||
}
|
||||
closedAt
|
||||
createdAt
|
||||
labels(first: 100) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
name
|
||||
createdAt
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
|
||||
pr_query <- 'query($after: String) {
|
||||
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
|
||||
pullRequests(
|
||||
states: [OPEN, CLOSED, MERGED]
|
||||
orderBy: {field: CREATED_AT, direction: ASC}
|
||||
first: 100
|
||||
after: $after
|
||||
) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
title
|
||||
url
|
||||
author {
|
||||
login
|
||||
}
|
||||
closedAt
|
||||
mergedAt
|
||||
createdAt
|
||||
labels(first: 100) {
|
||||
pageInfo {
|
||||
startCursor
|
||||
endCursor
|
||||
hasNextPage
|
||||
hasPreviousPage
|
||||
}
|
||||
edges {
|
||||
node {
|
||||
name
|
||||
createdAt
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
|
||||
issues_gql <- gh_gql(issue_query)
|
||||
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
||||
issues_gql <- cleanup_data(issues_gql)
|
||||
|
||||
gql_data <- list(issues_gql)
|
||||
|
||||
# Paginate API
|
||||
while (issues_gql_pageinfo$hasNextPage) {
|
||||
variables <- list()
|
||||
variables$after <- issues_gql_pageinfo$endCursor
|
||||
issues_gql <- gh_gql(issue_query, variables = variables)
|
||||
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
|
||||
issues_gql <- cleanup_data(issues_gql)
|
||||
gql_data[[length(gql_data) + 1]] <- issues_gql
|
||||
}
|
||||
|
||||
prs_gql <- gh_gql(pr_query)
|
||||
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
||||
prs_gql <- cleanup_data(prs_gql)
|
||||
gql_data[[length(gql_data) + 1]] <- prs_gql
|
||||
|
||||
# Paginate API
|
||||
while (prs_gql_pageinfo$hasNextPage) {
|
||||
variables <- list()
|
||||
variables$after <- prs_gql_pageinfo$endCursor
|
||||
prs_gql <- gh_gql(pr_query, variables = variables)
|
||||
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
|
||||
prs_gql <- cleanup_data(prs_gql)
|
||||
gql_data[[length(gql_data) + 1]] <- prs_gql
|
||||
}
|
||||
|
||||
issues_gql_all <- do.call(rbind, gql_data)
|
||||
|
||||
# Cleanup
|
||||
rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)
|
||||
|
||||
|
||||
issues_gql_all <- issues_gql_all |>
|
||||
rowwise()
|
||||
filtered_issues$labels <- map(filtered_issues$labels, ~do.call(rbind,
|
||||
.))
|
||||
}
|
||||
```
|
||||
|
||||
# Get Employee association from Github and Gitlab
|
||||
|
@ -66,92 +208,98 @@ or checking the github workplace field. It may also exclude some users
|
|||
that are not detectable.
|
||||
|
||||
``` r
|
||||
# TODO also check against gitlab TODO also check workplace
|
||||
# thingy
|
||||
# TODO also check against gitlab
|
||||
# TODO also check workplace thingy
|
||||
|
||||
# Compile a list of who is who
|
||||
if (!exists("element_employee") || !exists("famedly_employee") ||
|
||||
!exists("beeper_employee")) {
|
||||
element_employee <- list()
|
||||
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr",
|
||||
"uhoreg", "anoadragon453", "turt2live", "KitsuneRal")
|
||||
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
|
||||
beeper_employee <- list("Fizzadar")
|
||||
users <- list()
|
||||
# Get orgs of users on github
|
||||
for (i in 1:nrow(filtered_issues)) {
|
||||
user <- filtered_issues[i, ]$user[[1]]$login
|
||||
if ((user %in% users) || (user %in% sct_employee)) {
|
||||
next
|
||||
}
|
||||
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
|
||||
orgs <- as.data.frame(do.call(rbind, orgs_raw))
|
||||
|
||||
if ("vector-im" %in% orgs$login) {
|
||||
element_employee[[length(element_employee) + 1]] <- user
|
||||
} else if ("beeper" %in% orgs$login) {
|
||||
beeper_employee[[length(beeper_employee) + 1]] <- user
|
||||
} else if ("Famedly" %in% orgs$login) {
|
||||
famedly_employee[[length(famedly_employee) + 1]] <- user
|
||||
}
|
||||
users[[length(users) + 1]] <- user
|
||||
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
|
||||
element_employee <- list()
|
||||
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
|
||||
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
|
||||
beeper_employee <- list("Fizzadar")
|
||||
users <- list()
|
||||
# Get orgs of users on github
|
||||
for (i in rownames(issues_gql_all)) {
|
||||
user <- issues_gql_all[i, "author"]
|
||||
user <- paste(unlist(user), collapse = "")
|
||||
if (is.na(user) || is.null(user) || user == "") {
|
||||
next
|
||||
}
|
||||
if ((user %in% users) || (user %in% sct_employee)) {
|
||||
next
|
||||
}
|
||||
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
|
||||
orgs <- as.data.frame(do.call(rbind, orgs_raw))
|
||||
|
||||
if ("vector-im" %in% orgs$login) {
|
||||
element_employee[[length(element_employee) + 1]] <- user
|
||||
} else if ("beeper" %in% orgs$login) {
|
||||
beeper_employee[[length(beeper_employee) + 1]] <- user
|
||||
} else if ("Famedly" %in% orgs$login) {
|
||||
famedly_employee[[length(famedly_employee) + 1]] <- user
|
||||
}
|
||||
users[[length(users) + 1]] <- user
|
||||
}
|
||||
rm(orgs, orgs_raw, user, author, i, users)
|
||||
}
|
||||
```
|
||||
|
||||
# Get times for state transitions
|
||||
|
||||
``` r
|
||||
opened_to_proposal <- issues_gql_all
|
||||
```
|
||||
|
||||
# MSCs by Company (all kind)
|
||||
|
||||
Note that this does not adjust for private vs company MSCs.
|
||||
|
||||
``` r
|
||||
# Filter MSCs by company
|
||||
merged_element <- filtered_issues |>
|
||||
filter(user$login %in% element_employee) |>
|
||||
nrow()
|
||||
mscs_element <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, element_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_sct <- filtered_issues |>
|
||||
filter(user$login %in% sct_employee) |>
|
||||
nrow()
|
||||
mscs_sct <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_famedly <- filtered_issues |>
|
||||
filter(user$login %in% famedly_employee) |>
|
||||
nrow()
|
||||
mscs_famedly <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_beeper <- filtered_issues |>
|
||||
filter(user$login %in% beeper_employee) |>
|
||||
nrow()
|
||||
mscs_beeper <- issues_gql_all |>
|
||||
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_other <- filtered_issues |>
|
||||
filter(!(user$login %in% element_employee)) |>
|
||||
filter(!(user$login %in% famedly_employee)) |>
|
||||
filter(!(user$login %in% beeper_employee)) |>
|
||||
filter(!(user$login %in% sct_employee)) |>
|
||||
nrow()
|
||||
mscs_other <- nrow(issues_gql_all) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct
|
||||
|
||||
column_names <- c("Count")
|
||||
# Display Data
|
||||
data <- data.frame(group = c("Element", "Beeper", "Famedly",
|
||||
"SCT", "Other"), value = c(merged_element, merged_beeper,
|
||||
merged_famedly, merged_sct, merged_other))
|
||||
|
||||
# Compute the position of labels
|
||||
data <- data |>
|
||||
filter(value != 0) |>
|
||||
arrange(desc(group)) |>
|
||||
mutate(prop = value/sum(data$value) * 100) |>
|
||||
mutate(ypos = cumsum(prop) - 0.5 * prop)
|
||||
data <- data.frame(column_names = column_names, Element = mscs_element, Beeper = mscs_beeper, Famedly = mscs_famedly, SCT = mscs_sct, Other = mscs_other)
|
||||
data2 <- data.frame(t(data[-1]))
|
||||
colnames(data2) <- data[, 1]
|
||||
data <- data2
|
||||
data <- cbind(Company = rownames(data), data)
|
||||
rownames(data) <- 1:nrow(data)
|
||||
rownames(data) <- NULL
|
||||
rm(data2)
|
||||
|
||||
|
||||
# Basic piechart
|
||||
ggplot(data, aes(x = "", y = prop, fill = group)) + geom_bar(stat = "identity",
|
||||
width = 1, color = "white") + coord_polar("y", start = 0) +
|
||||
theme_void() + labs(title = str_wrap("Percentage of MSCs by Contributors associated with companies",
|
||||
40), subtitle = str_wrap("Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60), caption = "source: Github API") + theme(legend.position = "none") +
|
||||
geom_text(aes(y = ypos, label = group), color = "white",
|
||||
size = 5) + scale_fill_brewer(palette = "Set1")
|
||||
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
|
||||
geom_bar(stat = "identity") +
|
||||
labs(
|
||||
title = str_wrap("Number of MSCs by Contributors associated with companies", 40),
|
||||
subtitle = str_wrap(
|
||||
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60
|
||||
),
|
||||
caption = "source: Github API"
|
||||
)
|
||||
```
|
||||
|
||||
![](index_files/figure-gfm/unnamed-chunk-3-1.jpeg)<!-- -->
|
||||
![](index_files/figure-gfm/unnamed-chunk-6-1.jpeg)<!-- -->
|
||||
|
||||
# Merged MSCs by Company
|
||||
|
||||
|
@ -159,56 +307,52 @@ Note that this does not adjust for private vs company MSCs.
|
|||
|
||||
``` r
|
||||
# Filter for only merged MSCs
|
||||
merged_mscs <- filtered_issues |>
|
||||
filter("proposal" %in% labels) |>
|
||||
filter(("disposition-merge" %in% labels) | ("merged" %in%
|
||||
labels))
|
||||
merged_mscs <- issues_gql_all |>
|
||||
filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))
|
||||
|
||||
# Filter MSCs by company
|
||||
merged_element <- merged_mscs |>
|
||||
filter(user$login %in% element_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, element_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_sct <- merged_mscs |>
|
||||
filter(user$login %in% sct_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, sct_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_famedly <- merged_mscs |>
|
||||
filter(user$login %in% famedly_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, famedly_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_beeper <- merged_mscs |>
|
||||
filter(user$login %in% beeper_employee) |>
|
||||
nrow()
|
||||
filter(!is.null(author) && is.element(author, beeper_employee)) |>
|
||||
nrow()
|
||||
|
||||
merged_other <- merged_mscs |>
|
||||
filter(!(user$login %in% element_employee)) |>
|
||||
filter(!(user$login %in% famedly_employee)) |>
|
||||
filter(!(user$login %in% beeper_employee)) |>
|
||||
filter(!(user$login %in% sct_employee)) |>
|
||||
nrow()
|
||||
merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct
|
||||
|
||||
# Display Data
|
||||
data <- data.frame(group = c("Element", "Beeper", "Famedly",
|
||||
"SCT", "Other"), value = c(merged_element, merged_beeper,
|
||||
merged_famedly, merged_sct, merged_other))
|
||||
|
||||
# Compute the position of labels
|
||||
data <- data |>
|
||||
filter(value != 0) |>
|
||||
arrange(desc(group)) |>
|
||||
mutate(prop = value/sum(data$value) * 100) |>
|
||||
mutate(ypos = cumsum(prop) - 0.5 * prop)
|
||||
column_names <- c("Count")
|
||||
# Display Data
|
||||
data <- data.frame(column_names = column_names, Element = merged_element, Beeper = merged_beeper, Famedly = merged_famedly, SCT = merged_sct, Other = merged_other)
|
||||
data2 <- data.frame(t(data[-1]))
|
||||
colnames(data2) <- data[, 1]
|
||||
data <- data2
|
||||
data <- cbind(Company = rownames(data), data)
|
||||
rownames(data) <- 1:nrow(data)
|
||||
rownames(data) <- NULL
|
||||
rm(data2)
|
||||
|
||||
|
||||
# Basic piechart
|
||||
ggplot(data, aes(x = "", y = prop, fill = group)) + geom_bar(stat = "identity",
|
||||
width = 1, color = "white") + coord_polar("y", start = 0) +
|
||||
theme_void() + labs(title = str_wrap("Percentage of merged MSCs by Contributors associated with companies",
|
||||
40), subtitle = str_wrap("Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60), caption = "source: Github API") + theme(legend.position = "none") +
|
||||
geom_text(aes(y = ypos, label = group), color = "white",
|
||||
size = 5) + scale_fill_brewer(palette = "Set1")
|
||||
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
|
||||
geom_bar(stat = "identity") +
|
||||
labs(
|
||||
title = str_wrap("Number of merged MSCs by Contributors associated with companies", 40),
|
||||
subtitle = str_wrap(
|
||||
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
|
||||
60
|
||||
),
|
||||
caption = "source: Github API"
|
||||
)
|
||||
```
|
||||
|
||||
![](index_files/figure-gfm/unnamed-chunk-4-1.jpeg)<!-- -->
|
||||
![](index_files/figure-gfm/unnamed-chunk-7-1.jpeg)<!-- -->
|
||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 140 KiB |
Binary file not shown.
Before Width: | Height: | Size: 144 KiB |
Binary file not shown.
After Width: | Height: | Size: 167 KiB |
Binary file not shown.
After Width: | Height: | Size: 167 KiB |
Loading…
Reference in New Issue