#start with clean workspace 
rm(list=ls())
getwd()
#> [1] "C:/Users/ninab/OneDrive/Documenten/GitHub/labjournal"

1 packages

library(data.table) 
library(tidyverse) 
require(stringi)
require(RSiena)

2 load data

getwd()
#> [1] "C:/Users/ninab/OneDrive/Documenten/GitHub/labjournal"
load("./data/names_df2_v20220106.RData")
load("./data/names_df_publications_v20221006.RData")

3 selecting only the staff members from RU sociology

#wrong gs_id (just by eye-balling)
deselect <- c(24,25,34,35)

#selecting only the names of RU sociology staff members

names_df %>% 
  filter(affiliation=="Radboud University") %>%
  filter(field=="sociology") %>% 
  filter(!(id %in% deselect)) -> soc_df

# Selecting only the publications of RU sociology staff members

names_df_publications %>% 
  filter(gs_id %in% soc_df$gs_id) -> socpub_df
  
#attaching the gs id and lasname of staff member to their publications in socpub_df

soc_df %>% 
  select(c("gs_id", "lastname")) %>%
  right_join(socpub_df) -> socpub_df

4 cleaning names

soc_df$lastname_pubs <- as.character(str_split(soc_df$lastname, pattern=",", n = 2, simplify = TRUE)[,1]) 

socpub_df$author <- tolower(socpub_df$author)

5 adding ethnicity

Ethnicity of the ego was conducted by hand, by looking at the last name of the ego and their appearance. Everyone with a Dutch name got the score “1” and everyone with a Dutch name got the score “0”. The ethnic diversity of the egonet was caculated by looking at the collaborators of the papers in the period 2019-2022. Ethnicity of the alters was also decided by looking at the last name and appearance of the unique collaborators. Ethnicity of the egonet is operationalized as the percentage of people within the network with a non-Dutch name. —-

#select the publication of these staff members you want
socpub_df %>% filter(year>2019) %>%
  select(c("author", "gs_id")) -> soc_pubs_etni
#adding collaborators ego to the soc_df dataframe
gs_id <- soc_pubs_etni$gs_id
soc_pubs_etni <- stats::aggregate(x=soc_pubs_etni$author, by=list(gs_id), FUN=toString, na.rm=TRUE)
names(soc_pubs_etni) <- c("gs_id", "author")

soc_df <- left_join(soc_df, soc_pubs_etni)

author2 <- list()
for (i in 1: nrow(soc_df)) {
  author2[[i]] <- unique(unlist(str_split(soc_df$author[i], ",") ))
  author2[[i]] <- sub(author2[[i]], pattern="([A-Z]+ )", replacement=" ")
  author2[[i]] <- sub(author2[[i]], pattern="([A-Z]\\.)", replacement=" ")
  author2[[i]] <- trimws(author2[[i]])
  author2[[i]] <- unique(author2[[i]])
}
#loading excel file with data with the information of diversity of the egonet, constructed by hand
div <- read.csv2("./data/div_RU.csv")

In this excel file: nauthors = number of alters in the egonetwork. Nnl = number of alters with a non-Dutch name. Div = ethnic diversity of egonet in percentage. Div.ego = diversity of the ego, with 1 is Dutch and 0 is non-Dutch

#adding ethnicity to soc_df
soc_df <- cbind(soc_df, div) 
soc_df$div.net <- as.numeric(soc_df$div)
save(soc_df, file="./data/descriptives/RU_dfv1.RData")
save(socpub_df, file="./data/descriptives/RUpub_dfv1.RData")

6 adding age

6.1 Age of the ego is operationalized as the year of their first publication.

#year first pub
socpub_df %>% group_by(gs_id) %>%
  mutate(pub_first = min(year, na.rm=TRUE)) %>% 
  select(c("gs_id", "pub_first")) %>%
  distinct(gs_id, pub_first, .keep_all = TRUE) -> firstpub_df
soc_df <- cbind(soc_df, firstpub_df) 

7 making gender numeric

soc_df$gender3 <- recode(soc_df$gender, "female" = "0", "male" = "1")
soc_df$gender4 <- as.numeric(soc_df$gender3)

8 deleting scholars with missing value on dependent variable -> Renea Loh

soc_df <- soc_df[!(is.na(soc_df$div.net)), ]
save(soc_df, file="./data/descriptives/RU_dfv2.RData")
save(socpub_df, file="./data/descriptives/RUpub_dfv2.RData")
LS0tDQp0aXRsZTogIlJVIGNvbnN0cnVjdGluZyBldGhuaWNpdHkgYW5kIGFnZSINCmF1dGhvcjogImJ5OiBOaW5hIEJyYW50ZW4iDQpiaWJsaW9ncmFwaHk6IHJlZmVyZW5jZXMuYmliDQotLS0NCg0KDQoNCmBgYHtyLCBpbnN0YWxsIHJlbW90ZWx5LCBnbG9iYWxzZXR0aW5ncywgZWNobz1GQUxTRSwgd2FybmluZz1GQUxTRSwgcmVzdWx0cz0naGlkZScsIGV2YWw9RkFMU0V9DQppbnN0YWxsLnBhY2thZ2VzKCJyZW1vdGVzIikNCnJlbW90ZXM6Omluc3RhbGxfZ2l0aHViKCJybGVzdXIva2xpcHB5IikNCmBgYCANCg0KYGBge3IsIGdsb2JhbHNldHRpbmdzLCBlY2hvPUZBTFNFLCB3YXJuaW5nPUZBTFNFLCByZXN1bHRzPSdoaWRlJ30NCmxpYnJhcnkoa25pdHIpDQpsaWJyYXJ5KHJnbCkNCg0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFKQ0Kb3B0c19jaHVuayRzZXQodGlkeS5vcHRzPWxpc3Qod2lkdGguY3V0b2ZmPTEwMCksdGlkeT1UUlVFLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRSxjb21tZW50ID0gIiM+IiwgY2FjaGU9VFJVRSwgY2xhc3Muc291cmNlPWMoInRlc3QiKSwgY2xhc3Mub3V0cHV0PWMoInRlc3QyIikpDQpvcHRpb25zKHdpZHRoID0gMTAwKQ0KcmdsOjpzZXR1cEtuaXRyKCkNCg0KDQoNCmNvbG9yaXplIDwtIGZ1bmN0aW9uKHgsIGNvbG9yKSB7c3ByaW50ZigiPHNwYW4gc3R5bGU9J2NvbG9yOiAlczsnPiVzPC9zcGFuPiIsIGNvbG9yLCB4KSB9DQoNCmBgYA0KDQpgYGB7ciBrbGlwcHksIGVjaG89RkFMU0UsIGluY2x1ZGU9VFJVRX0NCmtsaXBweTo6a2xpcHB5KHBvc2l0aW9uID0gYygndG9wJywgJ3JpZ2h0JykpDQoja2xpcHB5OjprbGlwcHkoY29sb3IgPSAnZGFya3JlZCcpDQoja2xpcHB5OjprbGlwcHkodG9vbHRpcF9tZXNzYWdlID0gJ0NsaWNrIHRvIGNvcHknLCB0b29sdGlwX3N1Y2Nlc3MgPSAnRG9uZScpDQpgYGANCg0KDQpgYGB7cn0NCiNzdGFydCB3aXRoIGNsZWFuIHdvcmtzcGFjZSANCnJtKGxpc3Q9bHMoKSkNCmdldHdkKCkNCmBgYA0KDQojIHBhY2thZ2VzDQoNCmBgYHtyfQ0KbGlicmFyeShkYXRhLnRhYmxlKSANCmxpYnJhcnkodGlkeXZlcnNlKSANCnJlcXVpcmUoc3RyaW5naSkNCnJlcXVpcmUoUlNpZW5hKQ0KDQpgYGANCg0KIyBsb2FkIGRhdGENCg0KYGBge3J9DQpnZXR3ZCgpDQpsb2FkKCIuL2RhdGEvbmFtZXNfZGYyX3YyMDIyMDEwNi5SRGF0YSIpDQpsb2FkKCIuL2RhdGEvbmFtZXNfZGZfcHVibGljYXRpb25zX3YyMDIyMTAwNi5SRGF0YSIpDQoNCmBgYA0KDQojIHNlbGVjdGluZyBvbmx5IHRoZSBzdGFmZiBtZW1iZXJzIGZyb20gUlUgc29jaW9sb2d5DQoNCmBgYHtyfQ0KDQojd3JvbmcgZ3NfaWQgKGp1c3QgYnkgZXllLWJhbGxpbmcpDQpkZXNlbGVjdCA8LSBjKDI0LDI1LDM0LDM1KQ0KDQojc2VsZWN0aW5nIG9ubHkgdGhlIG5hbWVzIG9mIFJVIHNvY2lvbG9neSBzdGFmZiBtZW1iZXJzDQoNCm5hbWVzX2RmICU+JSANCiAgZmlsdGVyKGFmZmlsaWF0aW9uPT0iUmFkYm91ZCBVbml2ZXJzaXR5IikgJT4lDQogIGZpbHRlcihmaWVsZD09InNvY2lvbG9neSIpICU+JSANCiAgZmlsdGVyKCEoaWQgJWluJSBkZXNlbGVjdCkpIC0+IHNvY19kZg0KDQojIFNlbGVjdGluZyBvbmx5IHRoZSBwdWJsaWNhdGlvbnMgb2YgUlUgc29jaW9sb2d5IHN0YWZmIG1lbWJlcnMNCg0KbmFtZXNfZGZfcHVibGljYXRpb25zICU+JSANCiAgZmlsdGVyKGdzX2lkICVpbiUgc29jX2RmJGdzX2lkKSAtPiBzb2NwdWJfZGYNCiAgDQojYXR0YWNoaW5nIHRoZSBncyBpZCBhbmQgbGFzbmFtZSBvZiBzdGFmZiBtZW1iZXIgdG8gdGhlaXIgcHVibGljYXRpb25zIGluIHNvY3B1Yl9kZg0KDQpzb2NfZGYgJT4lIA0KICBzZWxlY3QoYygiZ3NfaWQiLCAibGFzdG5hbWUiKSkgJT4lDQogIHJpZ2h0X2pvaW4oc29jcHViX2RmKSAtPiBzb2NwdWJfZGYNCg0KYGBgDQoNCg0KIyBjbGVhbmluZyBuYW1lcw0KDQpgYGB7cn0NCnNvY19kZiRsYXN0bmFtZV9wdWJzIDwtIGFzLmNoYXJhY3RlcihzdHJfc3BsaXQoc29jX2RmJGxhc3RuYW1lLCBwYXR0ZXJuPSIsIiwgbiA9IDIsIHNpbXBsaWZ5ID0gVFJVRSlbLDFdKSANCg0Kc29jcHViX2RmJGF1dGhvciA8LSB0b2xvd2VyKHNvY3B1Yl9kZiRhdXRob3IpDQoNCmBgYA0KDQoNCiMgYWRkaW5nIGV0aG5pY2l0eQ0KRXRobmljaXR5IG9mIHRoZSBlZ28gd2FzIGNvbmR1Y3RlZCBieSBoYW5kLCBieSBsb29raW5nIGF0IHRoZSBsYXN0IG5hbWUgb2YgdGhlIGVnbyBhbmQgdGhlaXIgYXBwZWFyYW5jZS4gRXZlcnlvbmUgd2l0aCBhIER1dGNoIG5hbWUgZ290IHRoZSBzY29yZSAiMSIgYW5kIGV2ZXJ5b25lIHdpdGggYSBEdXRjaCBuYW1lIGdvdCB0aGUgc2NvcmUgIjAiLg0KVGhlIGV0aG5pYyBkaXZlcnNpdHkgb2YgdGhlIGVnb25ldCB3YXMgY2FjdWxhdGVkIGJ5IGxvb2tpbmcgYXQgdGhlIGNvbGxhYm9yYXRvcnMgb2YgdGhlIHBhcGVycyBpbiB0aGUgcGVyaW9kIDIwMTktMjAyMi4gRXRobmljaXR5IG9mIHRoZSBhbHRlcnMgd2FzIGFsc28gZGVjaWRlZCBieSBsb29raW5nIGF0IHRoZSBsYXN0IG5hbWUgYW5kIGFwcGVhcmFuY2Ugb2YgdGhlIHVuaXF1ZSBjb2xsYWJvcmF0b3JzLiBFdGhuaWNpdHkgb2YgdGhlIGVnb25ldCBpcyBvcGVyYXRpb25hbGl6ZWQgYXMgdGhlIHBlcmNlbnRhZ2Ugb2YgcGVvcGxlIHdpdGhpbiB0aGUgbmV0d29yayB3aXRoIGEgbm9uLUR1dGNoIG5hbWUuDQotLS0tDQoNCmBgYHtyfQ0KI3NlbGVjdCB0aGUgcHVibGljYXRpb24gb2YgdGhlc2Ugc3RhZmYgbWVtYmVycyB5b3Ugd2FudA0Kc29jcHViX2RmICU+JSBmaWx0ZXIoeWVhcj4yMDE5KSAlPiUNCiAgc2VsZWN0KGMoImF1dGhvciIsICJnc19pZCIpKSAtPiBzb2NfcHVic19ldG5pDQpgYGANCg0KYGBge3J9DQojYWRkaW5nIGNvbGxhYm9yYXRvcnMgZWdvIHRvIHRoZSBzb2NfZGYgZGF0YWZyYW1lDQpnc19pZCA8LSBzb2NfcHVic19ldG5pJGdzX2lkDQpzb2NfcHVic19ldG5pIDwtIHN0YXRzOjphZ2dyZWdhdGUoeD1zb2NfcHVic19ldG5pJGF1dGhvciwgYnk9bGlzdChnc19pZCksIEZVTj10b1N0cmluZywgbmEucm09VFJVRSkNCm5hbWVzKHNvY19wdWJzX2V0bmkpIDwtIGMoImdzX2lkIiwgImF1dGhvciIpDQoNCnNvY19kZiA8LSBsZWZ0X2pvaW4oc29jX2RmLCBzb2NfcHVic19ldG5pKQ0KDQphdXRob3IyIDwtIGxpc3QoKQ0KZm9yIChpIGluIDE6IG5yb3coc29jX2RmKSkgew0KICBhdXRob3IyW1tpXV0gPC0gdW5pcXVlKHVubGlzdChzdHJfc3BsaXQoc29jX2RmJGF1dGhvcltpXSwgIiwiKSApKQ0KICBhdXRob3IyW1tpXV0gPC0gc3ViKGF1dGhvcjJbW2ldXSwgcGF0dGVybj0iKFtBLVpdKyApIiwgcmVwbGFjZW1lbnQ9IiAiKQ0KICBhdXRob3IyW1tpXV0gPC0gc3ViKGF1dGhvcjJbW2ldXSwgcGF0dGVybj0iKFtBLVpdXFwuKSIsIHJlcGxhY2VtZW50PSIgIikNCiAgYXV0aG9yMltbaV1dIDwtIHRyaW13cyhhdXRob3IyW1tpXV0pDQogIGF1dGhvcjJbW2ldXSA8LSB1bmlxdWUoYXV0aG9yMltbaV1dKQ0KfQ0KYGBgDQoNCmBgYHtyfQ0KI2xvYWRpbmcgZXhjZWwgZmlsZSB3aXRoIGRhdGEgd2l0aCB0aGUgaW5mb3JtYXRpb24gb2YgZGl2ZXJzaXR5IG9mIHRoZSBlZ29uZXQsIGNvbnN0cnVjdGVkIGJ5IGhhbmQNCmRpdiA8LSByZWFkLmNzdjIoIi4vZGF0YS9kaXZfUlUuY3N2IikNCmBgYA0KDQpJbiB0aGlzIGV4Y2VsIGZpbGU6IG5hdXRob3JzID0gbnVtYmVyIG9mIGFsdGVycyBpbiB0aGUgZWdvbmV0d29yay4gTm5sID0gbnVtYmVyIG9mIGFsdGVycyB3aXRoIGEgbm9uLUR1dGNoIG5hbWUuIERpdiA9IGV0aG5pYyBkaXZlcnNpdHkgb2YgZWdvbmV0IGluIHBlcmNlbnRhZ2UuIERpdi5lZ28gPSBkaXZlcnNpdHkgb2YgdGhlIGVnbywgd2l0aCAxIGlzIER1dGNoIGFuZCAwIGlzIG5vbi1EdXRjaA0KDQpgYGB7cn0NCg0KI2FkZGluZyBldGhuaWNpdHkgdG8gc29jX2RmDQpzb2NfZGYgPC0gY2JpbmQoc29jX2RmLCBkaXYpIA0KYGBgDQoNCmBgYHtyfQ0Kc29jX2RmJGRpdi5uZXQgPC0gYXMubnVtZXJpYyhzb2NfZGYkZGl2KQ0KYGBgDQoNCg0KYGBge3J9DQpzYXZlKHNvY19kZiwgZmlsZT0iLi9kYXRhL2Rlc2NyaXB0aXZlcy9SVV9kZnYxLlJEYXRhIikNCnNhdmUoc29jcHViX2RmLCBmaWxlPSIuL2RhdGEvZGVzY3JpcHRpdmVzL1JVcHViX2RmdjEuUkRhdGEiKQ0KDQpgYGANCg0KIyBhZGRpbmcgYWdlDQpBZ2Ugb2YgdGhlIGVnbyBpcyBvcGVyYXRpb25hbGl6ZWQgYXMgdGhlIHllYXIgb2YgdGhlaXIgZmlyc3QgcHVibGljYXRpb24uDQotLS0tDQpgYGB7cn0NCiN5ZWFyIGZpcnN0IHB1Yg0Kc29jcHViX2RmICU+JSBncm91cF9ieShnc19pZCkgJT4lDQogIG11dGF0ZShwdWJfZmlyc3QgPSBtaW4oeWVhciwgbmEucm09VFJVRSkpICU+JSANCiAgc2VsZWN0KGMoImdzX2lkIiwgInB1Yl9maXJzdCIpKSAlPiUNCiAgZGlzdGluY3QoZ3NfaWQsIHB1Yl9maXJzdCwgLmtlZXBfYWxsID0gVFJVRSkgLT4gZmlyc3RwdWJfZGYNCmBgYA0KDQoNCg0KYGBge3J9DQpzb2NfZGYgPC0gY2JpbmQoc29jX2RmLCBmaXJzdHB1Yl9kZikgDQoNCmBgYA0KDQojIG1ha2luZyBnZW5kZXIgbnVtZXJpYw0KDQpgYGB7cn0NCnNvY19kZiRnZW5kZXIzIDwtIHJlY29kZShzb2NfZGYkZ2VuZGVyLCAiZmVtYWxlIiA9ICIwIiwgIm1hbGUiID0gIjEiKQ0Kc29jX2RmJGdlbmRlcjQgPC0gYXMubnVtZXJpYyhzb2NfZGYkZ2VuZGVyMykNCmBgYA0KDQoNCiMgZGVsZXRpbmcgc2Nob2xhcnMgd2l0aCBtaXNzaW5nIHZhbHVlIG9uIGRlcGVuZGVudCB2YXJpYWJsZSAtPiBSZW5lYSBMb2gNCmBgYHtyfQ0Kc29jX2RmIDwtIHNvY19kZlshKGlzLm5hKHNvY19kZiRkaXYubmV0KSksIF0NCmBgYA0KDQpgYGB7cn0NCnNhdmUoc29jX2RmLCBmaWxlPSIuL2RhdGEvZGVzY3JpcHRpdmVzL1JVX2RmdjIuUkRhdGEiKQ0Kc2F2ZShzb2NwdWJfZGYsIGZpbGU9Ii4vZGF0YS9kZXNjcmlwdGl2ZXMvUlVwdWJfZGZ2Mi5SRGF0YSIpDQoNCmBgYA==