1 2.1 Zachary

#install.packages("igraph")

require(igraph)
g <- make_graph("Zachary")
plot(g)

gmat <- as_adjacency_matrix(g, type = "both", sparse = FALSE)
gmat
#>       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16] [,17]
#>  [1,]    0    1    1    1    1    1    1    1    1     0     1     1     1     1     0     0     0
#>  [2,]    1    0    1    1    0    0    0    1    0     0     0     0     0     1     0     0     0
#>  [3,]    1    1    0    1    0    0    0    1    1     1     0     0     0     1     0     0     0
#>  [4,]    1    1    1    0    0    0    0    1    0     0     0     0     1     1     0     0     0
#>  [5,]    1    0    0    0    0    0    1    0    0     0     1     0     0     0     0     0     0
#>  [6,]    1    0    0    0    0    0    1    0    0     0     1     0     0     0     0     0     1
#>  [7,]    1    0    0    0    1    1    0    0    0     0     0     0     0     0     0     0     1
#>  [8,]    1    1    1    1    0    0    0    0    0     0     0     0     0     0     0     0     0
#>  [9,]    1    0    1    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [10,]    0    0    1    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [11,]    1    0    0    0    1    1    0    0    0     0     0     0     0     0     0     0     0
#> [12,]    1    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [13,]    1    0    0    1    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [14,]    1    1    1    1    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [15,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [16,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [17,]    0    0    0    0    0    1    1    0    0     0     0     0     0     0     0     0     0
#> [18,]    1    1    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [19,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [20,]    1    1    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [21,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [22,]    1    1    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [23,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [24,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [25,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [26,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [27,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [28,]    0    0    1    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [29,]    0    0    1    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [30,]    0    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [31,]    0    1    0    0    0    0    0    0    1     0     0     0     0     0     0     0     0
#> [32,]    1    0    0    0    0    0    0    0    0     0     0     0     0     0     0     0     0
#> [33,]    0    0    1    0    0    0    0    0    1     0     0     0     0     0     1     1     0
#> [34,]    0    0    0    0    0    0    0    0    1     1     0     0     0     1     1     1     0
#>       [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32]
#>  [1,]     1     0     1     0     1     0     0     0     0     0     0     0     0     0     1
#>  [2,]     1     0     1     0     1     0     0     0     0     0     0     0     0     1     0
#>  [3,]     0     0     0     0     0     0     0     0     0     0     1     1     0     0     0
#>  [4,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#>  [5,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#>  [6,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#>  [7,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#>  [8,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#>  [9,]     0     0     0     0     0     0     0     0     0     0     0     0     0     1     0
#> [10,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [11,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [12,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [13,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [14,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [15,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [16,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [17,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [18,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [19,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [20,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [21,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [22,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [23,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [24,]     0     0     0     0     0     0     0     0     1     0     1     0     1     0     0
#> [25,]     0     0     0     0     0     0     0     0     1     0     1     0     0     0     1
#> [26,]     0     0     0     0     0     0     1     1     0     0     0     0     0     0     1
#> [27,]     0     0     0     0     0     0     0     0     0     0     0     0     1     0     0
#> [28,]     0     0     0     0     0     0     1     1     0     0     0     0     0     0     0
#> [29,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     1
#> [30,]     0     0     0     0     0     0     1     0     0     1     0     0     0     0     0
#> [31,]     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0
#> [32,]     0     0     0     0     0     0     0     1     1     0     0     1     0     0     0
#> [33,]     0     1     0     1     0     1     1     0     0     0     0     0     1     1     1
#> [34,]     0     1     1     1     0     1     1     0     0     1     1     1     1     1     1
#>       [,33] [,34]
#>  [1,]     0     0
#>  [2,]     0     0
#>  [3,]     1     0
#>  [4,]     0     0
#>  [5,]     0     0
#>  [6,]     0     0
#>  [7,]     0     0
#>  [8,]     0     0
#>  [9,]     1     1
#> [10,]     0     1
#> [11,]     0     0
#> [12,]     0     0
#> [13,]     0     0
#> [14,]     0     1
#> [15,]     1     1
#> [16,]     1     1
#> [17,]     0     0
#> [18,]     0     0
#> [19,]     1     1
#> [20,]     0     1
#> [21,]     1     1
#> [22,]     0     0
#> [23,]     1     1
#> [24,]     1     1
#> [25,]     0     0
#> [26,]     0     0
#> [27,]     0     1
#> [28,]     0     1
#> [29,]     0     1
#> [30,]     1     1
#> [31,]     1     1
#> [32,]     1     1
#> [33,]     0     1
#> [34,]     1     0
hist(table(degree(g)), xlab='indegree', main= 'Histogram of indegree')

(34*33)/2
#> [1] 561
483+483+78
#> [1] 1044
igraph::triad.census(g)
#>  [1] 3971    0 1575    0    0    0    0    0    0    0  393    0    0    0    0   45
# I will use sna because it shows the names of the triads as well.
#sna::triad.census(gmat)
(34*33*32)/3
#> [1] 11968
# changing V
V(g)$size = degree(g, normalized = T) * 60 + 10  #after some trial and error
plot(g, mode = "undirected")

# changing V
V(g)$size = degree(g) *2  #after some trial and error
plot(g, mode = "undirected")

# load the necessary datasets
getwd()
#> [1] "/Users/anuschka/Documents/labjournal"
load("./data/soc_df.RData")
load("./data/soc_collabs1.RData")
load("./data/soc_collabs2.RData")
#setwd("/Users/anuschka/Documents/labjournal")

2 2.2 Network visualization tutorial

# get unique collaborators of soc staff first. In the soc_collabs data there are 400 entries. However, some of them are listed twice. You want to take those out. The [,4] means that the fourth column is used to get the unique cases (those are the coauthor ids)
sc_unique <- unique(soc_collabs[, 4])  # so 229 unique collaborators for RU staff?
sc_unique <- c(sc_unique, soc_df$gs_id)  # add the soc staff themselves.
#Here you add the gs_id (google scholar ids) from the dataset soc_df to the newly created data (sc_unique)
sc_unique <- data.frame(sc_unique)
#Now it is saved as a vector but we want it more neatly in a dataframe. Probably you could combine all the code used above with row bind 
sc_unique$v1 <- 1  # convenient to select on after the merge.
#Here you create variable v1 with value 1
sc_unique <- unique(sc_unique)
#Here you again take out duplicate elements
# so this is a very important step, we join the unique soc collaborators to the collaborators of
# collaborators
#onefivedegree is the new dataset of the 1.5 degree network. You then join the co-authors of the unique # collaborators together into this onefivedegree
require(tidyverse)
onefivedegree <- left_join(collabs_1deep, sc_unique, by = c(coauth_id = "sc_unique"))
# Then, we drop those that are not among sociology collaborators and who don't lsit coauthors
# regardless
#is.na means missing (not available). 
onefivedegree <- onefivedegree[!is.na(onefivedegree$v1), ]
onefivedegree <- onefivedegree[!is.na(onefivedegree$coauth), ]
# we pick those columns and have an edgelist of soc collaborators and whether they collaborate with
# those same collaborators the 1.5 degree network. Here you only save the names and the names of their #coauthors (and you change those names so that you see how the relation works). In this edgelist you cant see that much yet (only a tie between dyads(?))
onefivedegree <- onefivedegree[, c("name", "coauth")]
names(onefivedegree) <- c("from", "to")
# we get soc collaborators and add those to the data above and end up with a nice edgelist! So you first take out the missings and save the name and coauthors from the soc_collabs file. Then you change those names to from and to as well and then you bind these in the rows of onefivedegree. 
socc <- soc_collabs[!is.na(soc_collabs$coauth), ]
socc <- socc[, c("name", "coauth")]
names(socc) <- c("from", "to")
onefivedegree <- rbind(socc, onefivedegree)
save(onefivedegree, file = "./data/soc_onefivedegree.RData")
onefivedegree <- as.matrix(onefivedegree)  # matrix because igraph wants that
library(igraph)  # Notice that we call igraph here, in anticipation of the viz tutorial.
net1 <- graph_from_edgelist(onefivedegree, directed = TRUE)  # Notice the igraph function here!
plot(net1)  # pretty ugly!

plot(net1 ,
     vertex.color = "gold", # nice color for the vertices
     vertex.size = 4,  # we'll make vertices a bit smaller
     vertex.frame.color = "gray",  # we'll put a gray frame around vertices
     vertex.label.color = "black",  # not that ugly blue color for the labels (names)
     vertex.label.family = "Helvetica", # not a fan of times new roman in figures
     vertex.label.cex = 0.4,  # make the label a bit smaller too
     vertex.label.dist = 0.5,  # we'll pull the labels a bit away from the vertices
     edge.curved = 0.2, # curved edges is always a nice touch
     edge.arrow.size = 0.1) # make arrow size (direction of edge) smaller

#I think that with this piece of code it transforms the data of the vertices that are now stored in net1 (where the graphs are based on) into normal vectors (that what ad_ids does. But im not totally sure what they are right now. Innetwork now has all the names.
in_network <- data.frame(as_ids(V(net1)))
names(in_network)[1] <- "name"

#soc_df$total_cites <- soc_df$total_cites.x
#With the first line of code, it saves names and total cites from the soc_df data into ru_nodes. Then the in_network and ru_nodes are merged based on the name. We can see that cities scores a lot of missings because I believe we only know the cites of the researchers at the RU. All the others score an NA. The final line of code uses an if-statement. If there is a missing on total_cites, those individuals get the E69F00 (then the condition is true), if the condition is false (and people dont have a missing) they get the score 56B4E9. The first code is a kind of orange color and the second a blue one. Thus, you can use this to give different colors to people based on the total cites. 
ru_nodes <- soc_df[, c("name", "total_cites")]
in_network <- left_join(in_network, ru_nodes, by = c("name" = "name"))
in_network$vcol <- ifelse(is.na(in_network$total_cites), "#E69F00", "#56B4E9")

#Now in the plot, the color bases on the newly created variable. 
plot(net1,
     vertex.color = in_network$vcol, #THIS WAS WHAT WE DID THE LAST CODEBLOCK FOR!
     vertex.size = 4,  # we'll make them a bit smaller
     vertex.frame.color = "gray",  # we'll put a frame around it
     vertex.label.color = "black",  # not that ugly blue color for the names
     vertex.label.family = "Helvetica", # not a fan of times in figures
     vertex.label.cex = 0.4,  # a bit smaller too
     vertex.label.dist = 0.5,  # we'll pull the labels a bit away from the vertices
     edge.curved = 0.2,  # curved edges is always a nice tough
     edge.arrow.size = 0.1) # arrow size smaller

#If there is a missing on total cites, then you dont want a label
plot(net1,
     vertex.color = in_network$vcol, 
     #NOTICE THESE CONDITIONAL STATEMENTS BELOW
     vertex.label = ifelse(!is.na(in_network$total_cites), in_network$name, NA),
     vertex.size = 4,  # we'll make them a bit smaller
     vertex.frame.color = "gray",  # we'll put a frame around it
     vertex.label.color = "black",  # not that ugly blue color for the names
     vertex.label.family = "Helvetica", # not a fan of times in figures
     vertex.label.cex = 0.65,  # a bit smaller too
     vertex.label.dist = 0.5,  # we'll pull the labels a bit away from the vertices
     edge.curved = 0.2, # curved edges is always a nice tough
     edge.arrow.size = 0.1) # arrow size smaller

#If there is a missing on total_cites, the size of the vertex increases with log(2) (thus x0.3). But why do you use log and not just *0.3 e.g.? 
plot(net1,
     vertex.color = in_network$vcol, 
     vertex.label = ifelse(!is.na(in_network$total_cites), in_network$name, NA),
     # SAME HERE, TRY TO SMOOTH THE TOTAL_CITES A BIT WITH LOGSCALE
     vertex.size = ifelse(!is.na(in_network$total_cites), log(in_network$total_cites), 2),
     vertex.frame.color = "gray",  # we'll put a frame around it
     vertex.label.color = "black",  # not that ugly blue color for the names
     vertex.label.family = "Helvetica", # not a fan of times in figures
     vertex.label.cex = 0.65,  # a bit smaller too
     vertex.label.dist = 0.5,  # we'll pull the labels a bit away from the vertices
     edge.curved = 0.2, # curved edges is always a nice tough
     edge.arrow.size = 0.1) # arrow size smaller

3 2.3 End of tutorial, improve the plot myself

#In line with my research topic for this course, I would like to make visible who of the researchers has a homepage (more active online?) and who does not. In soc_df you have the variable homepage. 
ru_page <- soc_df[, c("name", "homepage")]
in_network <- left_join(in_network, ru_page, by = c("name" = "name"))
in_network$vcolp <- ifelse(is.na(in_network$homepage), "#94E58C", "#B88CE5")

#If people have a website, they are purple and if they dont they are green. Furthermore, I still dont want labels for the non-RU researchers. 
plot(net1,
     vertex.color = in_network$vcolp, 
     #NOTICE THESE CONDITIONAL STATEMENTS BELOW
     vertex.label = ifelse(!is.na(in_network$total_cites), in_network$name, NA),
     vertex.size = 4,  # we'll make them a bit smaller
     vertex.frame.color = "gray",  # we'll put a frame around it
     vertex.label.color = "black",  # not that ugly blue color for the names
     vertex.label.family = "Helvetica", # not a fan of times in figures
     vertex.label.cex = 0.65,  # a bit smaller too
     vertex.label.dist = 0.5,  # we'll pull the labels a bit away from the vertices
     edge.curved = 0.2, # curved edges is always a nice tough
     edge.arrow.size = 0.1) # arrow size smaller

#The purple color is not clearly visible because the edges are grey (but I do want to keep the grey color).
in_network$vcolp <- ifelse(is.na(in_network$homepage), "#94E58C", "#9145DE")
#I want the people with a webpage to appear larger in the plot. However, it's a string variable so first I have to make it numeric
in_network$vcolps <- ifelse(is.na(in_network$homepage), "1", "2")
in_network$vpagn <- as.numeric (in_network$vcolps)

#Now the researchers with a webpage have a larger vertex size. 
plot(net1,
     vertex.color = in_network$vcolp, 
     vertex.label = ifelse(!is.na(in_network$total_cites), in_network$name, NA),
     vertex.size = ifelse(!is.na(in_network$homepage), in_network$vpagn*5, in_network$vpagn*1.5),
     vertex.frame.color = "gray",  # we'll put a frame around it
     vertex.label.color = "black",  # not that ugly blue color for the names
     vertex.label.family = "Helvetica", # not a fan of times in figures
     vertex.label.cex = 0.65,  # a bit smaller too
     vertex.label.dist = 0.5,  # we'll pull the labels a bit away from the vertices
     edge.curved = 0.2, # curved edges is always a nice tough
     edge.arrow.size = 0.01) # arrow size smaller

#I would like something extra to the plot, namely a different color for men than for women. 
ru_page <- soc_df[, c("name", "gender")]
in_network <- left_join(in_network, ru_page, by = c("name" = "name"))

#The table shows that there are 14 men and 13 women, and of course that most people have a missing. Gender information is not coded for the data of the coauthors so i can only do it for these people. 
table(in_network$gender, useNA="always")
#> 
#>   man woman  <NA> 
#>    14    13   209
#Im not sure why, but the color codes all of a sudden gave errors (while they worked above). I dont manage to give the missings on gender a different color because i either get the output that the missings have a color and men/women still have a numeric score, or the men are blue, the women magenta but then the missings return to NA. Idk how to combine it all in one code. 
in_network$gendern <- as.integer(as.factor(in_network$gender))
in_network$gendercol <-ifelse(is.na(in_network$gendern),"gray", in_network$gendern)
in_network$gendercol <-ifelse(in_network$gendern==1,"blue","magenta")

#My final plot has a different color for men than for women and the researchers with a homepage have a larger vertex size. 
plot(net1,
     vertex.color = in_network$gendercol, 
     vertex.label = ifelse(!is.na(in_network$total_cites), in_network$name, NA),
     vertex.size = ifelse(!is.na(in_network$homepage), in_network$vpagn*5, in_network$vpagn*1.5),
     vertex.frame.color = "gray",  # we'll put a frame around it
     vertex.label.color = "black",  # not that ugly blue color for the names
     vertex.label.family = "Helvetica", # not a fan of times in figures
     vertex.label.cex = 0.65,  # a bit smaller too
     vertex.label.dist = 0.5,  # we'll pull the labels a bit away from the vertices
     edge.curved = 0.2, # curved edges is always a nice tough
     edge.arrow.size = 0.01) # arrow size smaller

