# install.packages('network', .libPaths(), repos='http://cran.us.r-project.org')
# install.packages('ergm', .libPaths(), repos='http://cran.us.r-project.org')
# install.packages('devtools', .libPaths(), repos='http://cran.us.r-project.org')
devtools::install_github('tedhchen/ergmWorkshopTools')
library(ergm)
library(ergmWorkshopTools)
As you can see from the output, loading the ergm package also loaded the network package.
There are also these references to the statnet project. They are the main developers of ERGM functionalities in R.
data(mid_mat)
?mid_mat
checkmat() is a utility function that plots the supplied adjacency matrix.
Note about plotting: You can ignore the plotting options if you are not using a jupyter notebook.
options(repr.plot.width=10, repr.plot.height=10)
checkmat(mid_mat)
network function to create a network object.¶?network
nw <- network(mid_mat, directed = F)
nw
options(repr.plot.width=10, repr.plot.height=10)
par(mfrow = c(1, 1))
set.seed(210615); plot(nw)
The adjacency matrix format is generally the easiest to work with, but it can take a lot of space.
Sometimes, we will have edgelists instead. In fact, unless you have pre-prepared network data, edgelists are usually what you'll have.
data(mid_edgelist)
nw.b <- network(mid_edgelist, directed = F)
nw.b
options(repr.plot.width=20, repr.plot.height=10)
par(mfrow = c(1, 2))
set.seed(210615); plot(nw); plot(nw.b)
data(mid_node_attr)
head(mid_node_attr)
?mid_node_attr
mat <- matrix(0, ncol = 189, nrow = 189, dimnames = list(row.names(mid_node_attr), row.names(mid_node_attr)))
mat[1:10, 1:10]
for(i in 1:nrow(mid_edgelist)){
mat[mid_edgelist[i, 1], mid_edgelist[i, 2]] <- 1
mat[mid_edgelist[i, 2], mid_edgelist[i, 1]] <- 1
}
options(repr.plot.width=14, repr.plot.height=7)
par(mfrow = c(1, 2))
checkmat(mat);checkmat(mid_mat)
nw
The %v% operator is how to access vertex/node variables from the network object.
nw%v%'vertex.names'
nw%v%'dem' <- mid_node_attr$'dem'
nw
nw%v%'dem'
options(repr.plot.width=10, repr.plot.height=10)
set.seed(210615); plot(nw, vertex.col = ifelse(nw%v%'dem', 2, 1)); legend('bottomright', legend = c('Democracy', 'Nondemocracy'), fill = c(2, 1), bty = 'n', cex = 1.1)
These are the geographical contiguity and joint-democracy networks.
data(contig); data(joint_dem)
options(repr.plot.width=16, repr.plot.height=8)
par(mfrow = c(1, 2))
set.seed(210615); plot(network(contig, directed = F), sub = 'Contiguity', cex.sub = 2); plot(network(joint_dem, directed = F), sub = 'Joint-Democracy', cex.sub = 2)
?`ergm-terms' to look at all the existing terms in the ergm packagesearch.ergmTerms function also works well?`ergm-terms`
search.ergmTerms(keyword = 'transitive')
m0 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem))
summary(m0)
dyad_df <- ergmMPLE(nw ~ edges + nodefactor('dem') + edgecov(joint_dem))
dyad_df$predictor
summary(glm(dyad_df$response ~ dyad_df$predictor - 1, weights = dyad_df$weights, family = 'binomial'))
kstar(2) (which is the 2-star term) is the basic term for thism1 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + kstar(2), control = control.ergm(seed = 210616))
ergm package, it is called gwdegree.Hunter, DR et al. 2008. "ergm: A package to fit, simulate and diagnose exponentiall-family models for networks." Journal of Statistical Software.
Snijders, T et al. 2006. "New specifications for exponential random graph models." Sociological Methodology.
m1 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T), control = control.ergm(seed = 210616))
summary(m1)
gwdeg.fixed.0 term has coefficient of -1.16m2 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T) + triangle, control = control.ergm(seed = 210616))
ergm package, it is called gwespm2 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T), control = control.ergm(seed = 210616))
summary(m2)
gwesp.fixed.0 is positive heregwdegree and gwesp termsgwesp)Levy, M. 2016. "gwdegree: Improving interpretation of geometrically-weighted degree estimates in exponential random graph models." Journal of Open Source Software.
gwesp.¶tenclique <- network(matrix(1, ncol = 10, nrow = 10), directed = T)
options(repr.plot.width=7, repr.plot.height=7)
set.seed(210615); plot(tenclique)
The summary function is very useful when provided with an ERGM model.
summary(network ~ ergm-terms)
summary(tenclique ~ edges + gwesp(0, fixed = T) + gwesp(0.5, fixed = T) + gwesp(1, fixed = T) + gwesp(2, fixed = T) + gwesp(5, fixed = T) + gwesp(10, fixed = T) + ttriple)
gwesp with decay set to 0 means every edge has "one or more" shared partner - this makes it the same value as the edge count termttriple term, which is a count of all edgewise shared partners of all edges (i.e. $10\times 9\times 8 = 720$)edges and gwesp are the same) and on the other end, you will run into degeneracy issuesm2.b <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0.2, fixed = T) + gwesp(decay = 0, fixed = T), control = control.ergm(seed = 210616))
summary(m2.b)
gof function is what we usem2fit <- gof(m2, control = control.gof.ergm(seed = 210616))
m2fit
options(repr.plot.width=32, repr.plot.height=8)
par(mfrow = c(1, 4))
plot(m2fit)
m1fit <- gof(m1, control = control.gof.ergm(seed = 210616))
options(repr.plot.width=32, repr.plot.height=8)
par(mfrow = c(1, 4))
plot(m1fit)
summary(m2)
gwesp means triadic closure is a feature of the networknodefactor.dem will be 0 if two nondemocracies, 1 if mixed regime, and 2 if joint democraciesgwesp.fixed.0 will be 1 if the dyad is part of at least one triangle$P(y_{ij} = 1 | Y, \mathbf{\theta}) = logit ^{-1}(\sum^k_{r=1}\theta_r \delta_r^{(ij)}Y)$¶
- conditional log odds of a tie given a one unit change in the statistic
- for the less complicated terms, it becomes the conditional log odds of a tie given that it is part of the local configuration one more time
Just a simple three node network with two ties to show tie formation on the empty dyad
tri <- matrix(c(0, 1, 1,
1, 0, 0,
1, 0, 0), byrow = T, ncol = 3)
nw.tri <- network(tri, directed = F)
nw.tri%v%'dem' <- c(0, 1, 1)
jd.tri <- matrix(c(0, 0, 0,
0, 0, 1,
0, 1, 0), byrow = T, ncol = 3)
options(repr.plot.width=10, repr.plot.height=10)
set.seed(210615); plot(nw.tri, vertex.col = ifelse(nw.tri%v%'dem', 2, 1), displaylabels = T)
ergmMPLE function can help with this, especially with more complicated terms like the geometrically weighted onesergmMPLE(nw.tri ~ edges + nodefactor('dem') + edgecov(jd.tri) + gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T))$predictor
gwdegree.0 termgwesp.0 termgwdegree not change?¶Let's look at the degree distribution:
| nodes with degree: | 0 | 1 | 2 |
|---|---|---|---|
| w/o (2,3) edge | 0 | 2 | 1 |
| w/ (2,3) edge | 0 | 0 | 3 |
gwdegree.0 does not changeround(coefficients(m2), 2)
plogis(-1.76)
Desmarais, BA & Cranmer, SJ. 2012. "Micro-level interpretations of exponential random graph models with applications to estuary networks." Policy Studies Journal.
eval.loglik if you are exploring large and complex modelsMCMC.burnin, MCMC.samplesize, and MCMC.interval for higher quality estimatesparallel to speed up estimation when working with larger modelsSee all the settings with
?control.ergm
m2.c <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T),
eval.loglik = F,
control = control.ergm(seed = 210615,
MCMC.burnin = 50000,
MCMC.samplesize = 2500,
MCMC.interval = 2500,
parallel = 0))
summary(m2.c)
m2.c <- logLik(m2.c, add = T)
summary(m2.c)
options(repr.plot.width=20, repr.plot.height=10)
mcmc.diagnostics(m2)
options(repr.plot.width=20, repr.plot.height=10)
mcmc.diagnostics(m2.c)
statnet suite of packages have lots of extensions of the ERGM and other network methods
xergm package: lots of extensions to the ERGM
gwdegree package: helps with understanding gwdegree terms (https://github.com/michaellevy/gwdegree)
Desmarais, BA & Cranmer, SJ. 2012. "Micro-level interpretations of exponential random graph models with applications to estuary networks." Policy Studies Journal.
Hunter, DR et al. 2008. "ergm: A package to fit, simulate and diagnose exponentiall-family models for networks." Journal of Statistical Software.
Levy, M. 2016. "gwdegree: Improving interpretation of geometrically-weighted degree estimates in exponential random graph models." Journal of Open Source Software.
Snijders, T et al. 2006. "New specifications for exponential random graph models." Sociological Methodology.