MSCAnalysis/index.md

Analysis of matrix spec bias-rumors
================
MTRNord

- <a href="#fetching-the-mscs" id="toc-fetching-the-mscs">Fetching the
  MSCs</a>
- <a href="#get-employee-association-from-github-and-gitlab"
  id="toc-get-employee-association-from-github-and-gitlab">Get Employee
  association from Github and Gitlab</a>
- <a href="#get-times-for-state-transitions"
  id="toc-get-times-for-state-transitions">Get times for state
  transitions</a>
- <a href="#mscs-by-company-all-kind"
  id="toc-mscs-by-company-all-kind">MSCs by Company (all kind)</a>
- <a href="#merged-mscs-by-company" id="toc-merged-mscs-by-company">Merged
  MSCs by Company</a>

The following data is based purely on public knowledge. This means data
is fetched from github and gitlab as best as I was able to.

# Fetching the MSCs

``` r
# Setup things
library(gh)
library(tidyverse)
library(hrbrthemes)
library(survminer)

# import_roboto_condensed()
# extrafont::loadfonts(device = "win")

cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")
```

``` r
cleanup_data <- function(prs_gql) {
  prs_gql <- prs_gql[[1]][[1]][[1]][[2]]
  for (i in 1:length(prs_gql)) {
    prs_gql[[i]] <- prs_gql[[i]]$node
    prs_gql[[i]]$temp_labels <- NA
    if (length(prs_gql[[i]]$labels$edges) >= 1) {
      prs_gql[[i]]$temp_labels <- list()
      for (y in 1:length(prs_gql[[i]]$labels$edges)) {
        prs_gql[[i]]$temp_labels[[y]] <- prs_gql[[i]]$labels$edges[[y]]$node
      }

      prs_gql[[i]]$temp_labels <- map(prs_gql[[i]]$temp_labels, as.data.frame)
      prs_gql[[i]]$temp_labels <- do.call(rbind, prs_gql[[i]]$temp_labels)
    }
    prs_gql[[i]]$labels <- NA
    prs_gql[[i]]$labels <- prs_gql[[i]]$temp_labels
    prs_gql[[i]]$temp_labels <- NULL
  }
  prs_gql <- as.data.frame(do.call(rbind, prs_gql))
  if (!("mergedAt" %in% colnames(prs_gql))) {
    prs_gql$mergedAt <- NA
    prs_gql$isPR <- FALSE
  } else {
    prs_gql$isPR <- TRUE
  }

  for (i in rownames(prs_gql)) {
    author <- prs_gql[i, "author"]
    if (!is.null(author)) {
      prs_gql[i, "author"] <- do.call(rbind.data.frame, author)
    }
  }

  return(prs_gql)
}
```

``` r
if (!exists("issues_gql_all")) {
  issue_query <- 'query($after: String) {
  repository(owner: "matrix-org", name: "matrix-spec-proposals") {
    issues(
      states: [OPEN, CLOSED]
      orderBy: {field: CREATED_AT, direction: ASC}
      first: 100
      after: $after
    ) {
      pageInfo {
        startCursor
        endCursor
        hasNextPage
        hasPreviousPage
      }
      edges {
        node {
          title
          url
          author {
            login
          }
          closedAt
          createdAt
          labels(first: 100) {
            pageInfo {
              startCursor
              endCursor
              hasNextPage
              hasPreviousPage
            }
            edges {
              node {
                name
                createdAt
              }
            }
          }
        }
      }
    }
  }
}'

  pr_query <- 'query($after: String) {
  repository(owner: "matrix-org", name: "matrix-spec-proposals") {
    pullRequests(
      states: [OPEN, CLOSED, MERGED]
      orderBy: {field: CREATED_AT, direction: ASC}
      first: 100
      after: $after
    ) {
      pageInfo {
        startCursor
        endCursor
        hasNextPage
        hasPreviousPage
      }
      edges {
        node {
          title
          url
          author {
            login
          }
          closedAt
          mergedAt
          createdAt
          labels(first: 100) {
            pageInfo {
              startCursor
              endCursor
              hasNextPage
              hasPreviousPage
            }
            edges {
              node {
                name
                createdAt
              }
            }
          }
        }
      }
    }
  }
}'

  issues_gql <- gh_gql(issue_query)
  issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
  issues_gql <- cleanup_data(issues_gql)

  gql_data <- list(issues_gql)

  # Paginate API
  while (issues_gql_pageinfo$hasNextPage) {
    variables <- list()
    variables$after <- issues_gql_pageinfo$endCursor
    issues_gql <- gh_gql(issue_query, variables = variables)
    issues_gql_pageinfo <- issues_gql[[1]][[1]][[1]][[1]]
    issues_gql <- cleanup_data(issues_gql)
    gql_data[[length(gql_data) + 1]] <- issues_gql
  }

  prs_gql <- gh_gql(pr_query)
  prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
  prs_gql <- cleanup_data(prs_gql)
  gql_data[[length(gql_data) + 1]] <- prs_gql

  # Paginate API
  while (prs_gql_pageinfo$hasNextPage) {
    variables <- list()
    variables$after <- prs_gql_pageinfo$endCursor
    prs_gql <- gh_gql(pr_query, variables = variables)
    prs_gql_pageinfo <- prs_gql[[1]][[1]][[1]][[1]]
    prs_gql <- cleanup_data(prs_gql)
    gql_data[[length(gql_data) + 1]] <- prs_gql
  }

  issues_gql_all <- do.call(rbind, gql_data)

  # Cleanup
  rm(issues_gql, prs_gql, gql_data, variables, issues_gql_pageinfo, prs_gql_pageinfo)


  issues_gql_all <- issues_gql_all |>
    rowwise()
}
```

# Get Employee association from Github and Gitlab

Please note that in the current PDF this is not yet hooked up to gitlab
or checking the github workplace field. It may also exclude some users
that are not detectable.

``` r
# TODO also check against gitlab
# TODO also check workplace thingy

# Compile a list of who is who
if (!exists("element_employee") || !exists("famedly_employee") || !exists("beeper_employee")) {
  element_employee <- list()
  sct_employee <- c("ara4n", "erikjohnston", "richvdh", "dbkr", "uhoreg", "anoadragon453", "turt2live", "KitsuneRal", "matrixbot")
  famedly_employee <- list("deepbluev7", "Sorunome", "MTRNord")
  beeper_employee <- list("Fizzadar")
  users <- list()
  # Get orgs of users on github
  for (i in rownames(issues_gql_all)) {
    user <- issues_gql_all[i, "author"]
    user <- paste(unlist(user), collapse = "")
    if (is.na(user) || is.null(user) || user == "") {
      next
    }
    if ((user %in% users) || (user %in% sct_employee)) {
      next
    }
    orgs_raw <- gh(sprintf("GET /users/%s/orgs", user))
    orgs <- as.data.frame(do.call(rbind, orgs_raw))

    if ("vector-im" %in% orgs$login) {
      element_employee[[length(element_employee) + 1]] <- user
    } else if ("beeper" %in% orgs$login) {
      beeper_employee[[length(beeper_employee) + 1]] <- user
    } else if ("Famedly" %in% orgs$login) {
      famedly_employee[[length(famedly_employee) + 1]] <- user
    }
    users[[length(users) + 1]] <- user
  }
  rm(orgs, orgs_raw, user, author, i, users)
}
```

# Get times for state transitions

``` r
# Opened to Proposal transition
opened_to_proposal <- issues_gql_all |>
  filter(!is.na(labels) && is.element("proposal", labels$name)) |>
  select(title, author, createdAt, labels)
for (i in 1:length(opened_to_proposal$labels)) {
  opened_to_proposal$labels[[i]] <- opened_to_proposal$labels[[i]] |>
    rowwise() |>
    filter(is.element("proposal", name))
}
opened_to_proposal <- opened_to_proposal |>
  mutate(proposalAt = labels$createdAt) |>
  select(title, author, createdAt, proposalAt)

opened_to_proposal <- opened_to_proposal |>
  mutate(Company = case_when(
    !is.null(author) && is.element(author, element_employee) ~ "Element",
    !is.null(author) && is.element(author, sct_employee) ~ "SCT",
    !is.null(author) && is.element(author, famedly_employee) ~ "Famedly",
    !is.null(author) && is.element(author, beeper_employee) ~ "Beeper",
    TRUE ~ "Other"
  )) |>
  group_by(Company)

# Proposal to having impl
# FIXME this doesnt work at this time as we dont know when the labels got removed. We need timelineItems() in the graphql query for this
# proposal_to_impl <- issues_gql_all |>
#  filter(!is.na(labels) && is.element("proposal", labels$name)) |>
#  select(title, author, createdAt)
```

# MSCs by Company (all kind)

Note that this does not adjust for private vs company MSCs.

``` r
# Filter MSCs by company
mscs <- issues_gql_all |>
  filter(!is.na(labels) && is.element("proposal", labels$name))


mscs_element <- mscs |>
  filter(!is.null(author) && is.element(author, element_employee)) |>
  nrow()

mscs_sct <- mscs |>
  filter(!is.null(author) && is.element(author, sct_employee)) |>
  nrow()

mscs_famedly <- mscs |>
  filter(!is.null(author) && is.element(author, famedly_employee)) |>
  nrow()

mscs_beeper <- mscs |>
  filter(!is.null(author) && is.element(author, beeper_employee)) |>
  nrow()

mscs_other <- nrow(mscs) - mscs_element - mscs_beeper - mscs_famedly - mscs_sct

# Display Data
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(mscs_beeper, mscs_element, mscs_famedly, mscs_other, mscs_sct))
data <- map_df(data, rev)
data$Company <- factor(data$Company, levels = data$Company)


# Basic piechart
ggplot(data, aes(x = Company, y = Count)) +
  geom_bar(stat = "identity", fill = "#69b3a2") +
  theme_ipsum() +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "none"
  ) +
  labs(
    title = str_wrap("MSCs from companies", 40),
    caption = "source: Github API",
    x = "Issues"
  ) +
  xlab("") +
  geom_text(
    aes(label = Count),
    hjust = 1.5,
    colour = "white"
  ) +
  coord_flip()
```

![](index_files/figure-gfm/unnamed-chunk-6-1.jpeg)<!-- -->

# Merged MSCs by Company

Note that this does not adjust for private vs company MSCs.

``` r
# Filter for only merged MSCs
merged_mscs <- issues_gql_all |>
  filter(!is.na(labels) && is.element("proposal", labels$name) && (is.element("disposition-merge", labels$name) || is.element("merged", labels$name)))

# Filter MSCs by company
merged_element <- merged_mscs |>
  filter(!is.null(author) && is.element(author, element_employee)) |>
  nrow()

merged_sct <- merged_mscs |>
  filter(!is.null(author) && is.element(author, sct_employee)) |>
  nrow()

merged_famedly <- merged_mscs |>
  filter(!is.null(author) && is.element(author, famedly_employee)) |>
  nrow()

merged_beeper <- merged_mscs |>
  filter(!is.null(author) && is.element(author, beeper_employee)) |>
  nrow()

merged_other <- nrow(merged_mscs) - merged_element - merged_beeper - merged_famedly - merged_sct

# Display Data
data <- data.frame(Company = c("Beeper", "Element", "Famedly", "Other", "SCT"), Count = c(merged_beeper, merged_element, merged_famedly, merged_other, merged_sct))
data <- map_df(data, rev)
data$Company <- factor(data$Company, levels = data$Company)


# Basic piechart
ggplot(data, aes(x = Company, y = Count)) +
  geom_bar(stat = "identity", fill = "#69b3a2") +
  theme_ipsum() +
  theme(
    panel.grid.minor.y = element_blank(),
    panel.grid.major.y = element_blank(),
    legend.position = "none"
  ) +
  labs(
    title = str_wrap("Merged MSCs from companies", 40),
    caption = "source: Github API",
    x = "Issues"
  ) +
  xlab("") +
  geom_text(
    aes(label = Count),
    hjust = 1.5,
    colour = "white"
  ) +
  coord_flip()
```

![](index_files/figure-gfm/unnamed-chunk-7-1.jpeg)<!-- -->