Switch to graphql and bar charts

This commit is contained in:
Marcel Radzio 2022-10-29 21:50:44 +02:00
parent 349c2248e6
commit 83ca2c5014
7 changed files with 528 additions and 281 deletions

413
index.Rmd
View File

@ -6,69 +6,201 @@ output:
toc: true
toc_depth: 2
dev: jpeg
pdf_document: default
---
```{r setup, echo=FALSE, message=FALSE, warning=FALSE}
# Set so that long lines in R will be wrapped:
knitr::opts_chunk$set(tidy.opts = list(width.cutoff = 60), tidy = TRUE)
```
The following data is based purely on public knowledge. This means data is fetched from github and gitlab as best as I was able to.
# Fetching the MSCs
```{r message=FALSE, warning=FALSE, dpi=300}
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
# Setup things
library(gh)
library(tidyverse)
library(nplyr)
library(ggplot2)
# Set theme
theme_set(theme_bw())
# gh_whoami()
```
# Fetch all issues that are a proposal
if (!exists("issues_all")) {
issues_raw <- gh(
"GET /repos/matrix-org/matrix-spec-proposals/issues", sort = "created",
state = "all", direction = "asc", labels = "proposal"
)
issues <- as.data.frame(do.call(rbind, issues_raw))
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
data <- list()
data[[1]] <- issues
cleanup_data <- function(prs_gql) {
prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
for (i in 1:length(prs_gql)) {
prs_gql[[i]] <- prs_gql[[i]]$node
prs_gql[[i]]$temp_labels <- NA
if (length(prs_gql[[i]]$labels$edges) >= 1) {
prs_gql[[i]]$temp_labels <- list()
for (y in 1:length(prs_gql[[i]]$labels$edges)) {
prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
}
# Paginate API
while (TRUE) {
issues_raw <- try(
{
gh_next(issues_raw)
}, silent = TRUE
)
if (inherits(issues_raw, "try-error")) {
break
} else {
temp <- as.data.frame(do.call(rbind, issues_raw))
data[[length(data) + 1]] <- temp
}
prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
}
issues_all <- do.call(rbind, data)
}
prs_gql[[i]]$labels <- NA
prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
prs_gql[[i]]$temp_labels <- NULL
}
prs_gql <- as.data.frame(do.call(rbind, prs_gql))
if (!("mergedAt" %in% colnames(prs_gql))) {
prs_gql$mergedAt <- NA
prs_gql$isPR <- FALSE
} else {
prs_gql$isPR <- TRUE
}
# Filter out corrupt issues without labels
filtered_issues <- issues_all |>
filter(
sapply(labels, length, simplify = TRUE) > 0
) |>
for (i in rownames(prs_gql)) {
author <- prs_gql[i, "author"]
if (!is.null(author)) {
prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
}
}
return(prs_gql)
}
```
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
if (!exists("issues_gql_all")) {
issue_query <- 'query($after: String) {
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
issues(
states: [OPEN, CLOSED]
orderBy: {field: CREATED_AT, direction: ASC}
first: 100
after: $after
) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
title
url
author {
login
}
closedAt
createdAt
labels(first: 100) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
name
createdAt
}
}
}
}
}
}
}
}'
pr_query <- 'query($after: String) {
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
pullRequests(
states: [OPEN, CLOSED, MERGED]
orderBy: {field: CREATED_AT, direction: ASC}
first: 100
after: $after
) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
title
url
author {
login
}
closedAt
mergedAt
createdAt
labels(first: 100) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
name
createdAt
}
}
}
}
}
}
}
}'
issues_gql <- gh_gql(issue_query)
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
issues_gql <- cleanup_data(issues_gql)
gql_data <- list(issues_gql)
# Paginate API
while (issues_gql_pageinfo$hasNextPage) {
variables <- list()
variables$after <- issues_gql_pageinfo$endCursor
issues_gql <- gh_gql(issue_query, variables = variables)
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
issues_gql <- cleanup_data(issues_gql)
gql_data[[length(gql_data) + 1]] <- issues_gql
}
prs_gql <- gh_gql(pr_query)
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
prs_gql <- cleanup_data(prs_gql)
gql_data[[length(gql_data) + 1]] <- prs_gql
# Paginate API
while (prs_gql_pageinfo$hasNextPage) {
variables <- list()
variables$after <- prs_gql_pageinfo$endCursor
prs_gql <- gh_gql(pr_query, variables = variables)
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
prs_gql <- cleanup_data(prs_gql)
gql_data[[length(gql_data) + 1]] <- prs_gql
}
issues_gql_all <- do.call(rbind, gql_data)
# Cleanup
rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)
issues_gql_all <- issues_gql_all |>
rowwise()
filtered_issues$labels <- map(filtered_issues$labels, ~do.call(rbind, .))
}
```
# Get Employee association from Github and Gitlab
Please note that in the current PDF this is not yet hooked up to gitlab or checking the github workplace field. It may also exclude some users that are not detectable.
```{r message=FALSE, warning=FALSE, dpi=300}
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
# TODO also check against gitlab
# TODO also check workplace thingy
@ -76,173 +208,144 @@ Please note that in the current PDF this is not yet hooked up to gitlab or check
# Compile a list of who is who
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
element_employee <- list()
sct_employee <- c("ara4n","erikjohnston","richvdh","dbkr","uhoreg","anoadragon453","turt2live","KitsuneRal")
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
beeper_employee <- list("Fizzadar")
users <- list()
# Get orgs of users on github
for (i in 1:nrow(filtered_issues)) {
user <- filtered_issues[i, ]$user[[1]]$login
if ((user %in% users) || (user %in% sct_employee)) {
next
}
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
orgs <- as.data.frame(do.call(rbind, orgs_raw))
if ("vector-im" %in% orgs$login) {
element_employee[[length(element_employee) + 1]] <- user
} else if ("beeper" %in% orgs$login) {
beeper_employee[[length(beeper_employee) + 1]] <- user
} else if ("Famedly" %in% orgs$login) {
famedly_employee[[length(famedly_employee) + 1]] <- user
}
users[[length(users) + 1]] <- user
for (i in rownames(issues_gql_all)) {
user <- issues_gql_all[i, "author"]
user <- paste(unlist(user), collapse = "")
if (is.na(user) || is.null(user) || user == "") {
next
}
if ((user %in% users) || (user %in% sct_employee)) {
next
}
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
orgs <- as.data.frame(do.call(rbind, orgs_raw))
if ("vector-im" %in% orgs$login) {
element_employee[[length(element_employee) + 1]] <- user
} else if ("beeper" %in% orgs$login) {
beeper_employee[[length(beeper_employee) + 1]] <- user
} else if ("Famedly" %in% orgs$login) {
famedly_employee[[length(famedly_employee) + 1]] <- user
}
users[[length(users) + 1]] <- user
}
rm(orgs, orgs_raw, user, author, i, users)
}
```
# Get times for state transitions
```{r message=FALSE, warning=FALSE, tidy = "styler"}
opened_to_proposal <- issues_gql_all
```
# MSCs by Company (all kind)
Note that this does not adjust for private vs company MSCs.
```{r message=FALSE, warning=FALSE, dpi=300}
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
# Filter MSCs by company
merged_element <- filtered_issues |>
filter(user$login %in% element_employee) |>
nrow()
mscs_element <- issues_gql_all |>
filter(!is.null(author) && is.element(author, element_employee)) |>
nrow()
merged_sct <- filtered_issues |>
filter(user$login %in% sct_employee) |>
nrow()
mscs_sct <- issues_gql_all |>
filter(!is.null(author) && is.element(author, sct_employee)) |>
nrow()
merged_famedly <- filtered_issues |>
filter(user$login %in% famedly_employee) |>
nrow()
mscs_famedly <- issues_gql_all |>
filter(!is.null(author) && is.element(author, famedly_employee)) |>
nrow()
merged_beeper <- filtered_issues |>
filter(user$login %in% beeper_employee) |>
nrow()
mscs_beeper <- issues_gql_all |>
filter(!is.null(author) && is.element(author, beeper_employee)) |>
nrow()
merged_other <- filtered_issues |>
filter(!(user$login %in% element_employee)) |>
filter(!(user$login %in% famedly_employee)) |>
filter(!(user$login %in% beeper_employee)) |>
filter(!(user$login %in% sct_employee)) |>
nrow()
mscs_other <- nrow(issues_gql_all) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct
column_names <- c("Count")
# Display Data
data <- data.frame(
group = c("Element", "Beeper", "Famedly", "SCT", "Other"),
value = c(merged_element, merged_beeper, merged_famedly, merged_sct, merged_other)
)
# Compute the position of labels
data <- data |>
filter(value != 0) |>
arrange(desc(group)) |>
mutate(
prop = value/sum(data$value) *
100
) |>
mutate(
ypos = cumsum(prop) -
0.5 * prop
)
data <- data.frame(column_names = column_names, Element = mscs_element, Beeper = mscs_beeper, Famedly = mscs_famedly, SCT = mscs_sct, Other = mscs_other)
data2 <- data.frame(t(data[-1]))
colnames(data2) <- data[, 1]
data <- data2
data <- cbind(Company = rownames(data), data)
rownames(data) <- 1:nrow(data)
rownames(data) <- NULL
rm(data2)
# Basic piechart
ggplot(data, aes(x = "", y = prop, fill = group)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
theme_void() + labs(
title = str_wrap("Percentage of MSCs by Contributors associated with companies", 40),
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
geom_bar(stat = "identity") +
labs(
title = str_wrap("Number of MSCs by Contributors associated with companies", 40),
subtitle = str_wrap(
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60
),
caption = "source: Github API"
) +
theme(legend.position = "none") +
geom_text(
aes(y = ypos, label = group),
color = "white", size = 5
) +
scale_fill_brewer(palette = "Set1")
)
```
# Merged MSCs by Company
Note that this does not adjust for private vs company MSCs.
```{r message=FALSE, warning=FALSE, dpi=300}
```{r message=FALSE, warning=FALSE, dpi=300, tidy = "styler"}
# Filter for only merged MSCs
merged_mscs <- filtered_issues |>
filter("proposal" %in% labels) |>
filter(("disposition-merge" %in% labels) | ("merged" %in% labels))
merged_mscs <- issues_gql_all |>
filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))
# Filter MSCs by company
merged_element <- merged_mscs |>
filter(user$login %in% element_employee) |>
nrow()
filter(!is.null(author) && is.element(author, element_employee)) |>
nrow()
merged_sct <- merged_mscs |>
filter(user$login %in% sct_employee) |>
nrow()
filter(!is.null(author) && is.element(author, sct_employee)) |>
nrow()
merged_famedly <- merged_mscs |>
filter(user$login %in% famedly_employee) |>
nrow()
filter(!is.null(author) && is.element(author, famedly_employee)) |>
nrow()
merged_beeper <- merged_mscs |>
filter(user$login %in% beeper_employee) |>
nrow()
filter(!is.null(author) && is.element(author, beeper_employee)) |>
nrow()
merged_other <- merged_mscs |>
filter(!(user$login %in% element_employee)) |>
filter(!(user$login %in% famedly_employee)) |>
filter(!(user$login %in% beeper_employee)) |>
filter(!(user$login %in% sct_employee)) |>
nrow()
merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct
# Display Data
data <- data.frame(
group = c("Element", "Beeper", "Famedly", "SCT", "Other"),
value = c(merged_element, merged_beeper, merged_famedly, merged_sct, merged_other)
)
# Compute the position of labels
data <- data |>
filter(value != 0) |>
arrange(desc(group)) |>
mutate(
prop = value/sum(data$value) *
100
) |>
mutate(
ypos = cumsum(prop) -
0.5 * prop
)
column_names <- c("Count")
# Display Data
data <- data.frame(column_names = column_names, Element = merged_element, Beeper = merged_beeper, Famedly = merged_famedly, SCT = merged_sct, Other = merged_other)
data2 <- data.frame(t(data[-1]))
colnames(data2) <- data[, 1]
data <- data2
data <- cbind(Company = rownames(data), data)
rownames(data) <- 1:nrow(data)
rownames(data) <- NULL
rm(data2)
# Basic piechart
ggplot(data, aes(x = "", y = prop, fill = group)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y", start = 0) +
theme_void() + labs(
title = str_wrap("Percentage of merged MSCs by Contributors associated with companies", 40),
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
geom_bar(stat = "identity") +
labs(
title = str_wrap("Number of merged MSCs by Contributors associated with companies", 40),
subtitle = str_wrap(
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60
),
caption = "source: Github API"
) +
theme(legend.position = "none") +
geom_text(
aes(y = ypos, label = group),
color = "white", size = 5
) +
scale_fill_brewer(palette = "Set1")
)
```

396
index.md
View File

@ -7,6 +7,9 @@ MTRNord
- <a href="#get-employee-association-from-github-and-gitlab"
id="toc-get-employee-association-from-github-and-gitlab">Get Employee
association from Github and Gitlab</a>
- <a href="#get-times-for-state-transitions"
id="toc-get-times-for-state-transitions">Get times for state
transitions</a>
- <a href="#mscs-by-company-all-kind"
id="toc-mscs-by-company-all-kind">MSCs by Company (all kind)</a>
- <a href="#merged-mscs-by-company" id="toc-merged-mscs-by-company">Merged
@ -18,45 +21,184 @@ is fetched from github and gitlab as best as I was able to.
# Fetching the MSCs
``` r
# Setup things
library(gh)
library(tidyverse)
library(nplyr)
library(ggplot2)
# Set theme
theme_set(theme_bw())
# gh_whoami()
```
# Fetch all issues that are a proposal
if (!exists("issues_all")) {
issues_raw <- gh("GET /repos/matrix-org/matrix-spec-proposals/issues",
sort = "created", state = "all", direction = "asc", labels = "proposal")
issues <- as.data.frame(do.call(rbind, issues_raw))
``` r
cleanup_data <- function(prs_gql) {
prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
for (i in 1:length(prs_gql)) {
prs_gql[[i]] <- prs_gql[[i]]$node
prs_gql[[i]]$temp_labels <- NA
if (length(prs_gql[[i]]$labels$edges) >= 1) {
prs_gql[[i]]$temp_labels <- list()
for (y in 1:length(prs_gql[[i]]$labels$edges)) {
prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
}
data <- list()
data[[1]] <- issues
# Paginate API
while (TRUE) {
issues_raw <- try({
gh_next(issues_raw)
}, silent = TRUE)
if (inherits(issues_raw, "try-error")) {
break
} else {
temp <- as.data.frame(do.call(rbind, issues_raw))
data[[length(data) + 1]] <- temp
}
prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
}
issues_all <- do.call(rbind, data)
}
prs_gql[[i]]$labels <- NA
prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
prs_gql[[i]]$temp_labels <- NULL
}
prs_gql <- as.data.frame(do.call(rbind, prs_gql))
if (!("mergedAt" %in% colnames(prs_gql))) {
prs_gql$mergedAt <- NA
prs_gql$isPR <- FALSE
} else {
prs_gql$isPR <- TRUE
}
# Filter out corrupt issues without labels
filtered_issues <- issues_all |>
filter(sapply(labels, length, simplify = TRUE) > 0) |>
for (i in rownames(prs_gql)) {
author <- prs_gql[i, "author"]
if (!is.null(author)) {
prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
}
}
return(prs_gql)
}
```
``` r
if (!exists("issues_gql_all")) {
issue_query <- 'query($after: String) {
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
issues(
states: [OPEN, CLOSED]
orderBy: {field: CREATED_AT, direction: ASC}
first: 100
after: $after
) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
title
url
author {
login
}
closedAt
createdAt
labels(first: 100) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
name
createdAt
}
}
}
}
}
}
}
}'
pr_query <- 'query($after: String) {
repository(owner: "matrix-org", name: "matrix-spec-proposals") {
pullRequests(
states: [OPEN, CLOSED, MERGED]
orderBy: {field: CREATED_AT, direction: ASC}
first: 100
after: $after
) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
title
url
author {
login
}
closedAt
mergedAt
createdAt
labels(first: 100) {
pageInfo {
startCursor
endCursor
hasNextPage
hasPreviousPage
}
edges {
node {
name
createdAt
}
}
}
}
}
}
}
}'
issues_gql <- gh_gql(issue_query)
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
issues_gql <- cleanup_data(issues_gql)
gql_data <- list(issues_gql)
# Paginate API
while (issues_gql_pageinfo$hasNextPage) {
variables <- list()
variables$after <- issues_gql_pageinfo$endCursor
issues_gql <- gh_gql(issue_query, variables = variables)
issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
issues_gql <- cleanup_data(issues_gql)
gql_data[[length(gql_data) + 1]] <- issues_gql
}
prs_gql <- gh_gql(pr_query)
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
prs_gql <- cleanup_data(prs_gql)
gql_data[[length(gql_data) + 1]] <- prs_gql
# Paginate API
while (prs_gql_pageinfo$hasNextPage) {
variables <- list()
variables$after <- prs_gql_pageinfo$endCursor
prs_gql <- gh_gql(pr_query, variables = variables)
prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
prs_gql <- cleanup_data(prs_gql)
gql_data[[length(gql_data) + 1]] <- prs_gql
}
issues_gql_all <- do.call(rbind, gql_data)
# Cleanup
rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)
issues_gql_all <- issues_gql_all |>
rowwise()
filtered_issues$labels <- map(filtered_issues$labels, ~do.call(rbind,
.))
}
```
# Get Employee association from Github and Gitlab
@ -66,92 +208,98 @@ or checking the github workplace field. It may also exclude some users
that are not detectable.
``` r
# TODO also check against gitlab TODO also check workplace
# thingy
# TODO also check against gitlab
# TODO also check workplace thingy
# Compile a list of who is who
if (!exists("element_employee") || !exists("famedly_employee") ||
!exists("beeper_employee")) {
element_employee <- list()
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr",
"uhoreg", "anoadragon453", "turt2live", "KitsuneRal")
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
beeper_employee <- list("Fizzadar")
users <- list()
# Get orgs of users on github
for (i in 1:nrow(filtered_issues)) {
user <- filtered_issues[i, ]$user[[1]]$login
if ((user %in% users) || (user %in% sct_employee)) {
next
}
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
orgs <- as.data.frame(do.call(rbind, orgs_raw))
if ("vector-im" %in% orgs$login) {
element_employee[[length(element_employee) + 1]] <- user
} else if ("beeper" %in% orgs$login) {
beeper_employee[[length(beeper_employee) + 1]] <- user
} else if ("Famedly" %in% orgs$login) {
famedly_employee[[length(famedly_employee) + 1]] <- user
}
users[[length(users) + 1]] <- user
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
element_employee <- list()
sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
beeper_employee <- list("Fizzadar")
users <- list()
# Get orgs of users on github
for (i in rownames(issues_gql_all)) {
user <- issues_gql_all[i, "author"]
user <- paste(unlist(user), collapse = "")
if (is.na(user) || is.null(user) || user == "") {
next
}
if ((user %in% users) || (user %in% sct_employee)) {
next
}
orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
orgs <- as.data.frame(do.call(rbind, orgs_raw))
if ("vector-im" %in% orgs$login) {
element_employee[[length(element_employee) + 1]] <- user
} else if ("beeper" %in% orgs$login) {
beeper_employee[[length(beeper_employee) + 1]] <- user
} else if ("Famedly" %in% orgs$login) {
famedly_employee[[length(famedly_employee) + 1]] <- user
}
users[[length(users) + 1]] <- user
}
rm(orgs, orgs_raw, user, author, i, users)
}
```
# Get times for state transitions
``` r
opened_to_proposal <- issues_gql_all
```
# MSCs by Company (all kind)
Note that this does not adjust for private vs company MSCs.
``` r
# Filter MSCs by company
merged_element <- filtered_issues |>
filter(user$login %in% element_employee) |>
nrow()
mscs_element <- issues_gql_all |>
filter(!is.null(author) && is.element(author, element_employee)) |>
nrow()
merged_sct <- filtered_issues |>
filter(user$login %in% sct_employee) |>
nrow()
mscs_sct <- issues_gql_all |>
filter(!is.null(author) && is.element(author, sct_employee)) |>
nrow()
merged_famedly <- filtered_issues |>
filter(user$login %in% famedly_employee) |>
nrow()
mscs_famedly <- issues_gql_all |>
filter(!is.null(author) && is.element(author, famedly_employee)) |>
nrow()
merged_beeper <- filtered_issues |>
filter(user$login %in% beeper_employee) |>
nrow()
mscs_beeper <- issues_gql_all |>
filter(!is.null(author) && is.element(author, beeper_employee)) |>
nrow()
merged_other <- filtered_issues |>
filter(!(user$login %in% element_employee)) |>
filter(!(user$login %in% famedly_employee)) |>
filter(!(user$login %in% beeper_employee)) |>
filter(!(user$login %in% sct_employee)) |>
nrow()
mscs_other <- nrow(issues_gql_all) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct
column_names <- c("Count")
# Display Data
data <- data.frame(group = c("Element", "Beeper", "Famedly",
"SCT", "Other"), value = c(merged_element, merged_beeper,
merged_famedly, merged_sct, merged_other))
# Compute the position of labels
data <- data |>
filter(value != 0) |>
arrange(desc(group)) |>
mutate(prop = value/sum(data$value) * 100) |>
mutate(ypos = cumsum(prop) - 0.5 * prop)
data <- data.frame(column_names = column_names, Element = mscs_element, Beeper = mscs_beeper, Famedly = mscs_famedly, SCT = mscs_sct, Other = mscs_other)
data2 <- data.frame(t(data[-1]))
colnames(data2) <- data[, 1]
data <- data2
data <- cbind(Company = rownames(data), data)
rownames(data) <- 1:nrow(data)
rownames(data) <- NULL
rm(data2)
# Basic piechart
ggplot(data, aes(x = "", y = prop, fill = group)) + geom_bar(stat = "identity",
width = 1, color = "white") + coord_polar("y", start = 0) +
theme_void() + labs(title = str_wrap("Percentage of MSCs by Contributors associated with companies",
40), subtitle = str_wrap("Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60), caption = "source: Github API") + theme(legend.position = "none") +
geom_text(aes(y = ypos, label = group), color = "white",
size = 5) + scale_fill_brewer(palette = "Set1")
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
geom_bar(stat = "identity") +
labs(
title = str_wrap("Number of MSCs by Contributors associated with companies", 40),
subtitle = str_wrap(
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60
),
caption = "source: Github API"
)
```
![](index_files/figure-gfm/unnamed-chunk-3-1.jpeg)<!-- -->
![](index_files/figure-gfm/unnamed-chunk-6-1.jpeg)<!-- -->
# Merged MSCs by Company
@ -159,56 +307,52 @@ Note that this does not adjust for private vs company MSCs.
``` r
# Filter for only merged MSCs
merged_mscs <- filtered_issues |>
filter("proposal" %in% labels) |>
filter(("disposition-merge" %in% labels) | ("merged" %in%
labels))
merged_mscs <- issues_gql_all |>
filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))
# Filter MSCs by company
merged_element <- merged_mscs |>
filter(user$login %in% element_employee) |>
nrow()
filter(!is.null(author) && is.element(author, element_employee)) |>
nrow()
merged_sct <- merged_mscs |>
filter(user$login %in% sct_employee) |>
nrow()
filter(!is.null(author) && is.element(author, sct_employee)) |>
nrow()
merged_famedly <- merged_mscs |>
filter(user$login %in% famedly_employee) |>
nrow()
filter(!is.null(author) && is.element(author, famedly_employee)) |>
nrow()
merged_beeper <- merged_mscs |>
filter(user$login %in% beeper_employee) |>
nrow()
filter(!is.null(author) && is.element(author, beeper_employee)) |>
nrow()
merged_other <- merged_mscs |>
filter(!(user$login %in% element_employee)) |>
filter(!(user$login %in% famedly_employee)) |>
filter(!(user$login %in% beeper_employee)) |>
filter(!(user$login %in% sct_employee)) |>
nrow()
merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct
# Display Data
data <- data.frame(group = c("Element", "Beeper", "Famedly",
"SCT", "Other"), value = c(merged_element, merged_beeper,
merged_famedly, merged_sct, merged_other))
# Compute the position of labels
data <- data |>
filter(value != 0) |>
arrange(desc(group)) |>
mutate(prop = value/sum(data$value) * 100) |>
mutate(ypos = cumsum(prop) - 0.5 * prop)
column_names <- c("Count")
# Display Data
data <- data.frame(column_names = column_names, Element = merged_element, Beeper = merged_beeper, Famedly = merged_famedly, SCT = merged_sct, Other = merged_other)
data2 <- data.frame(t(data[-1]))
colnames(data2) <- data[, 1]
data <- data2
data <- cbind(Company = rownames(data), data)
rownames(data) <- 1:nrow(data)
rownames(data) <- NULL
rm(data2)
# Basic piechart
ggplot(data, aes(x = "", y = prop, fill = group)) + geom_bar(stat = "identity",
width = 1, color = "white") + coord_polar("y", start = 0) +
theme_void() + labs(title = str_wrap("Percentage of merged MSCs by Contributors associated with companies",
40), subtitle = str_wrap("Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60), caption = "source: Github API") + theme(legend.position = "none") +
geom_text(aes(y = ypos, label = group), color = "white",
size = 5) + scale_fill_brewer(palette = "Set1")
ggplot(data, aes(x = Company, y = Count, fill = Company)) +
geom_bar(stat = "identity") +
labs(
title = str_wrap("Number of merged MSCs by Contributors associated with companies", 40),
subtitle = str_wrap(
"Note that people may have gotten mixed or people with multiple hats may have MSCs landing in the wrong category",
60
),
caption = "source: Github API"
)
```
![](index_files/figure-gfm/unnamed-chunk-4-1.jpeg)<!-- -->
![](index_files/figure-gfm/unnamed-chunk-7-1.jpeg)<!-- -->

BIN
index.pdf Normal file

Binary file not shown.

Binary file not shown.

Before

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 144 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 167 KiB