# install.packages('network', .libPaths(), repos='http://cran.us.r-project.org')
# install.packages('ergm', .libPaths(), repos='http://cran.us.r-project.org')
# install.packages('devtools', .libPaths(), repos='http://cran.us.r-project.org')


devtools::install_github('tedhchen/ergmWorkshopTools')

Downloading GitHub repo tedhchen/ergmWorkshopTools@HEAD

✔  checking for file ‘/tmp/RtmpQbcSQg/remotes1ec57f9fac2/tedhchen-ergmWorkshopTools-d00ab89/DESCRIPTION’ (424ms)
─  preparing ‘ergmWorkshopTools’:
✔  checking DESCRIPTION meta-information
─  checking for LF line-endings in source and make files and shell scripts
─  checking for empty or unneeded directories
─  building ‘ergmWorkshopTools_0.0.1.tar.gz’


library(ergm)
library(ergmWorkshopTools)

Loading required package: network


‘network’ 1.17.1 (2021-06-12), part of the Statnet Project
* ‘news(package="network")’ for changes since last version
* ‘citation("network")’ for citation information
* ‘https://statnet.org’ for help, support, and other information


‘ergm’ 4.0.0 (2021-06-14), part of the Statnet Project
* ‘news(package="ergm")’ for changes since last version
* ‘citation("ergm")’ for citation information
* ‘https://statnet.org’ for help, support, and other information


‘ergm’ 4 is a major release that introduces a fair number of
backwards-incompatible changes. Please type ‘news(package="ergm")’ for
a list of major changes.


======================
Package:  ergmWorkshopTools
Version:  0.0.1
Date:     2021-06-14
Author:   Ted Hsuan Yun Chen (Aalto University and University of Helsinki)


This package contains data and functions needed for the 2021 PolNet ERGM workshop.


data(mid_mat)
?mid_mat


options(repr.plot.width=10, repr.plot.height=10)
checkmat(mid_mat)


?network


nw <- network(mid_mat, directed = F)
nw

 Network attributes:
  vertices = 189 
  directed = FALSE 
  hyper = FALSE 
  loops = FALSE 
  multiple = FALSE 
  bipartite = FALSE 
  total edges= 134 
    missing edges= 0 
    non-missing edges= 134 

 Vertex attribute names: 
    vertex.names 

No edge attributes


options(repr.plot.width=10, repr.plot.height=10)
par(mfrow = c(1, 1))
set.seed(210615); plot(nw)


data(mid_edgelist)
nw.b <- network(mid_edgelist, directed = F)
nw.b

 Network attributes:
  vertices = 120 
  directed = FALSE 
  hyper = FALSE 
  loops = FALSE 
  multiple = FALSE 
  bipartite = FALSE 
  total edges= 134 
    missing edges= 0 
    non-missing edges= 134 

 Vertex attribute names: 
    vertex.names 

No edge attributes


options(repr.plot.width=20, repr.plot.height=10)
par(mfrow = c(1, 2))
set.seed(210615); plot(nw); plot(nw.b)


data(mid_node_attr)
head(mid_node_attr)
?mid_node_attr


mat <- matrix(0, ncol = 189, nrow = 189, dimnames = list(row.names(mid_node_attr), row.names(mid_node_attr)))
mat[1:10, 1:10]


for(i in 1:nrow(mid_edgelist)){
    mat[mid_edgelist[i, 1], mid_edgelist[i, 2]] <- 1
    mat[mid_edgelist[i, 2], mid_edgelist[i, 1]] <- 1
}


options(repr.plot.width=14, repr.plot.height=7)
par(mfrow = c(1, 2))
checkmat(mat);checkmat(mid_mat)

nw

 Network attributes:
  vertices = 189 
  directed = FALSE 
  hyper = FALSE 
  loops = FALSE 
  multiple = FALSE 
  bipartite = FALSE 
  total edges= 134 
    missing edges= 0 
    non-missing edges= 134 

 Vertex attribute names: 
    vertex.names 

No edge attributes


nw%v%'vertex.names'


nw%v%'dem' <- mid_node_attr$'dem'
nw

 Network attributes:
  vertices = 189 
  directed = FALSE 
  hyper = FALSE 
  loops = FALSE 
  multiple = FALSE 
  bipartite = FALSE 
  total edges= 134 
    missing edges= 0 
    non-missing edges= 134 

 Vertex attribute names: 
    dem vertex.names 

No edge attributes


nw%v%'dem'


options(repr.plot.width=10, repr.plot.height=10)
set.seed(210615); plot(nw, vertex.col = ifelse(nw%v%'dem', 2, 1)); legend('bottomright', legend = c('Democracy', 'Nondemocracy'), fill = c(2, 1), bty = 'n', cex = 1.1)


data(contig); data(joint_dem)


options(repr.plot.width=16, repr.plot.height=8)
par(mfrow = c(1, 2))
set.seed(210615); plot(network(contig, directed = F), sub = 'Contiguity', cex.sub = 2); plot(network(joint_dem, directed = F), sub = 'Joint-Democracy', cex.sub = 2)


?`ergm-terms`


search.ergmTerms(keyword = 'transitive')

Found  15  matching ergm terms:
ddsp(d, type="OTP")
 

desp(d, type="OTP")
 

dgwdsp(decay, fixed=FALSE, cutoff=30, type="OTP")
 

dgwesp(decay, fixed=FALSE, cutoff=30, type="OTP")
 

dgwnsp(decay, fixed=FALSE, cutoff=30, type="OTP")
 

dnsp(d, type="OTP")
 

intransitive()
 Intransitive triads

intransitive()
 Intransitive triads

transitive()
 Transitive triads

transitiveties(attr=NULL, levels=NULL)
 Transitive ties

transitiveties(threshold=0)
 Transitive ties

transitiveweights(twopath="min",combine="max",affect="min")
 Transitive weights

triangle(attr=NULL, diff=FALSE, levels=NULL)
 Triangles

ttriple(attr=NULL, diff=FALSE, levels=NULL)
 Transitive triples

ttriad()
 Transitive triples


m0 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem))
summary(m0)

Starting maximum pseudolikelihood estimation (MPLE):

Evaluating the predictor and response matrix.

Maximizing the pseudolikelihood.

Finished MPLE.

Stopping at the initial estimate.

Evaluating log-likelihood at the estimate.

Call:
ergm(formula = nw ~ edges + nodefactor("dem") + edgecov(joint_dem))

Maximum Likelihood Results:

                  Estimate Std. Error MCMC % z value Pr(>|z|)    
edges              -4.6678     0.1286      0 -36.287   <1e-04 ***
nodefactor.dem.1   -0.2632     0.1818      0  -1.447    0.148    
edgecov.joint_dem  -0.2179     0.4078      0  -0.534    0.593    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

     Null Deviance: 24629  on 17766  degrees of freedom
 Residual Deviance:  1570  on 17763  degrees of freedom
 
AIC: 1576  BIC: 1599  (Smaller is better. MC Std. Err. = 0)


dyad_df <- ergmMPLE(nw ~ edges + nodefactor('dem') + edgecov(joint_dem))
dyad_df$predictor


summary(glm(dyad_df$response ~ dyad_df$predictor - 1, weights = dyad_df$weights, family = 'binomial'))

Call:
glm(formula = dyad_df$response ~ dyad_df$predictor - 1, family = "binomial", 
    weights = dyad_df$weights)

Deviance Residuals: 
      1        2        3        4        5        6  
 11.402   -4.894   24.545   23.887  -11.020  -11.025  

Coefficients:
                                   Estimate Std. Error z value Pr(>|z|)    
dyad_df$predictoredges              -4.6678     0.1286 -36.287   <2e-16 ***
dyad_df$predictornodefactor.dem.1   -0.2632     0.1818  -1.447    0.148    
dyad_df$predictoredgecov.joint_dem  -0.2179     0.4078  -0.534    0.593    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 24629  on 6  degrees of freedom
Residual deviance:  1570  on 3  degrees of freedom
AIC: 1576

Number of Fisher Scoring iterations: 7


m1 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + kstar(2), control = control.ergm(seed = 210616))

Starting maximum pseudolikelihood estimation (MPLE):

Evaluating the predictor and response matrix.

Maximizing the pseudolikelihood.

Finished MPLE.

Starting Monte Carlo maximum likelihood estimation (MCMLE):

Iteration 1 of at most 60:

Error in ergm.MCMLE(init, nw, model, initialfit = (initialfit <- NULL), : Number of edges in a simulated network exceeds that in the observed by a factor of more than 20. This is a strong indicator of model degeneracy or a very poor starting parameter configuration. If you are reasonably certain that neither of these is the case, increase the MCMLE.density.guard control.ergm() parameter.
Traceback:

1. ergm(nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + kstar(2), 
 .     control = control.ergm(seed = 210616))
2. ergm.MCMLE(init, nw, model, initialfit = (initialfit <- NULL), 
 .     control = control, proposal = proposal, proposal.obs = proposal.obs, 
 .     verbose = verbose, ...)
3. stop("Number of edges in a simulated network exceeds that in the observed by a factor of more than ", 
 .     floor(control$MCMLE.density.guard), ". This is a strong indicator of model degeneracy or a very poor starting parameter configuration. If you are reasonably certain that neither of these is the case, increase the MCMLE.density.guard control.ergm() parameter.")


m1 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T), control = control.ergm(seed = 210616))

Starting maximum pseudolikelihood estimation (MPLE):

Evaluating the predictor and response matrix.

Maximizing the pseudolikelihood.

Finished MPLE.

Starting Monte Carlo maximum likelihood estimation (MCMLE):

Iteration 1 of at most 60:

Optimizing with step length 0.5407.

The log-likelihood improved by 1.6896.

Estimating equations are not within tolerance region.

Iteration 2 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.4623.

Estimating equations are not within tolerance region.

Iteration 3 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.1125.

Estimating equations are not within tolerance region.

Iteration 4 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0849.

Convergence test p-value: 0.7687. 
Not converged with 99% confidence; increasing sample size.

Iteration 5 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0420.

Convergence test p-value: 0.3443. 
Not converged with 99% confidence; increasing sample size.

Iteration 6 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0100.

Convergence test p-value: 0.0007. 
Converged with 99% confidence.

Finished MCMLE.

Evaluating log-likelihood at the estimate. 
Fitting the dyad-independent submodel...

Bridging between the dyad-independent submodel and the full model...

Setting up bridge sampling...

Using 16 bridges: 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
.

Bridging finished.

This model was fit using MCMC.  To examine model diagnostics and check
for degeneracy, use the mcmc.diagnostics() function.


summary(m1)

Call:
ergm(formula = nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + 
    gwdegree(decay = 0, fixed = T), control = control.ergm(seed = 210616))

Monte Carlo Maximum Likelihood Results:

                  Estimate Std. Error MCMC % z value Pr(>|z|)    
edges              -4.1573     0.1528      0 -27.206   <1e-04 ***
nodefactor.dem.1   -0.1599     0.1738      0  -0.920    0.357    
edgecov.joint_dem  -0.2259     0.3969      0  -0.569    0.569    
gwdeg.fixed.0      -1.1605     0.2355      0  -4.928   <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

     Null Deviance: 24629  on 17766  degrees of freedom
 Residual Deviance:  1544  on 17762  degrees of freedom
 
AIC: 1552  BIC: 1583  (Smaller is better. MC Std. Err. = 0.4484)


m2 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T) + triangle, control = control.ergm(seed = 210616))

Starting maximum pseudolikelihood estimation (MPLE):

Evaluating the predictor and response matrix.

Maximizing the pseudolikelihood.

Finished MPLE.

Starting Monte Carlo maximum likelihood estimation (MCMLE):

Iteration 1 of at most 60:

Optimizing with step length 0.3570.

The log-likelihood improved by 1.7058.

Estimating equations are not within tolerance region.

Iteration 2 of at most 60:

Error in ergm.MCMLE(init, nw, model, initialfit = (initialfit <- NULL), : Number of edges in a simulated network exceeds that in the observed by a factor of more than 20. This is a strong indicator of model degeneracy or a very poor starting parameter configuration. If you are reasonably certain that neither of these is the case, increase the MCMLE.density.guard control.ergm() parameter.
Traceback:

1. ergm(nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + gwdegree(decay = 0, 
 .     fixed = T) + triangle, control = control.ergm(seed = 210616))
2. ergm.MCMLE(init, nw, model, initialfit = (initialfit <- NULL), 
 .     control = control, proposal = proposal, proposal.obs = proposal.obs, 
 .     verbose = verbose, ...)
3. stop("Number of edges in a simulated network exceeds that in the observed by a factor of more than ", 
 .     floor(control$MCMLE.density.guard), ". This is a strong indicator of model degeneracy or a very poor starting parameter configuration. If you are reasonably certain that neither of these is the case, increase the MCMLE.density.guard control.ergm() parameter.")


m2 <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T), control = control.ergm(seed = 210616))

Starting maximum pseudolikelihood estimation (MPLE):

Evaluating the predictor and response matrix.

Maximizing the pseudolikelihood.

Finished MPLE.

Starting Monte Carlo maximum likelihood estimation (MCMLE):

Iteration 1 of at most 60:

Optimizing with step length 0.4429.

The log-likelihood improved by 1.9503.

Estimating equations are not within tolerance region.

Iteration 2 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.8813.

Estimating equations are not within tolerance region.

Iteration 3 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.3267.

Estimating equations are not within tolerance region.

Iteration 4 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.2839.

Estimating equations are not within tolerance region.

Iteration 5 of at most 60:

Optimizing with step length 0.9045.

The log-likelihood improved by 1.5003.

Estimating equations are not within tolerance region.

Iteration 6 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 1.5620.

Estimating equations are not within tolerance region.

Iteration 7 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.1756.

Estimating equations are not within tolerance region.

Iteration 8 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.1829.

Estimating equations are not within tolerance region.

Iteration 9 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.4080.

Estimating equations are not within tolerance region.

Iteration 10 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0691.

Convergence test p-value: 0.9569. 
Not converged with 99% confidence; increasing sample size.

Iteration 11 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.1327.

Estimating equations are not within tolerance region.

Estimating equations did not move closer to tolerance region more than 1 time(s) in 4 steps; increasing sample size.

Iteration 12 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0895.

Convergence test p-value: 0.5342. 
Not converged with 99% confidence; increasing sample size.

Iteration 13 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0194.

Convergence test p-value: 0.0808. 
Not converged with 99% confidence; increasing sample size.

Iteration 14 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0232.

Convergence test p-value: 0.0165. 
Not converged with 99% confidence; increasing sample size.

Iteration 15 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0152.

Convergence test p-value: < 0.0001. 
Converged with 99% confidence.

Finished MCMLE.

Evaluating log-likelihood at the estimate. 
Fitting the dyad-independent submodel...

Bridging between the dyad-independent submodel and the full model...

Setting up bridge sampling...

Using 16 bridges: 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
.

Bridging finished.

This model was fit using MCMC.  To examine model diagnostics and check
for degeneracy, use the mcmc.diagnostics() function.


summary(m2)

Call:
ergm(formula = nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + 
    gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T), 
    control = control.ergm(seed = 210616))

Monte Carlo Maximum Likelihood Results:

                  Estimate Std. Error MCMC % z value Pr(>|z|)    
edges              -4.6965     0.1702      0 -27.594  < 1e-04 ***
nodefactor.dem.1   -0.1392     0.1595      0  -0.873  0.38291    
edgecov.joint_dem  -0.2355     0.4113      0  -0.573  0.56692    
gwdeg.fixed.0      -0.6418     0.2284      0  -2.810  0.00495 ** 
gwesp.fixed.0       1.1526     0.1415      0   8.148  < 1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

     Null Deviance: 24629  on 17766  degrees of freedom
 Residual Deviance:  1494  on 17761  degrees of freedom
 
AIC: 1504  BIC: 1543  (Smaller is better. MC Std. Err. = 0.4262)


tenclique <- network(matrix(1, ncol = 10, nrow = 10), directed = T)
options(repr.plot.width=7, repr.plot.height=7)
set.seed(210615); plot(tenclique)


summary(tenclique ~ edges + gwesp(0, fixed = T) + gwesp(0.5, fixed = T) + gwesp(1, fixed = T) + gwesp(2, fixed = T) + gwesp(5, fixed = T) + gwesp(10, fixed = T) + ttriple)


m2.b <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0.2, fixed = T) + gwesp(decay = 0, fixed = T), control = control.ergm(seed = 210616))

Starting maximum pseudolikelihood estimation (MPLE):

Evaluating the predictor and response matrix.

Maximizing the pseudolikelihood.

Finished MPLE.

Starting Monte Carlo maximum likelihood estimation (MCMLE):

Iteration 1 of at most 60:

Optimizing with step length 0.3570.

The log-likelihood improved by 1.3288.

Estimating equations are not within tolerance region.

Iteration 2 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 1.5190.

Estimating equations are not within tolerance region.

Iteration 3 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.4394.

Estimating equations are not within tolerance region.

Iteration 4 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0368.

Convergence test p-value: 0.9732. 
Not converged with 99% confidence; increasing sample size.

Iteration 5 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.4939.

Estimating equations are not within tolerance region.

Iteration 6 of at most 60:

Optimizing with step length 0.9045.

The log-likelihood improved by 1.8995.

Estimating equations are not within tolerance region.

Estimating equations did not move closer to tolerance region more than 1 time(s) in 4 steps; increasing sample size.

Iteration 7 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.3506.

Estimating equations are not within tolerance region.

Iteration 8 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0215.

Convergence test p-value: 0.4942. 
Not converged with 99% confidence; increasing sample size.

Iteration 9 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0223.

Convergence test p-value: 0.0003. 
Converged with 99% confidence.

Finished MCMLE.

Evaluating log-likelihood at the estimate. 
Fitting the dyad-independent submodel...

Bridging between the dyad-independent submodel and the full model...

Setting up bridge sampling...

Using 16 bridges: 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
.

Bridging finished.

This model was fit using MCMC.  To examine model diagnostics and check
for degeneracy, use the mcmc.diagnostics() function.


summary(m2.b)

Call:
ergm(formula = nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + 
    gwdegree(decay = 0.2, fixed = T) + gwesp(decay = 0, fixed = T), 
    control = control.ergm(seed = 210616))

Monte Carlo Maximum Likelihood Results:

                  Estimate Std. Error MCMC % z value Pr(>|z|)    
edges              -4.4797     0.1931      0 -23.195  < 1e-04 ***
nodefactor.dem.1   -0.1274     0.1739      0  -0.732 0.463893    
edgecov.joint_dem  -0.2045     0.4263      0  -0.480 0.631492    
gwdeg.fixed.0.2    -0.8503     0.2271      0  -3.744 0.000181 ***
gwesp.fixed.0       1.0601     0.1468      0   7.221  < 1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

     Null Deviance: 24629  on 17766  degrees of freedom
 Residual Deviance:  1487  on 17761  degrees of freedom
 
AIC: 1497  BIC: 1536  (Smaller is better. MC Std. Err. = 0.5284)


m2fit <- gof(m2, control = control.gof.ergm(seed = 210616))
m2fit

Goodness-of-fit for degree 

         obs min  mean max MC p-value
degree0   69  49 68.12  94       0.88
degree1   66  25 44.72  63       0.00
degree2   23  22 34.25  51       0.02
degree3   13  11 21.67  36       0.10
degree4    7   5 11.32  23       0.30
degree5    3   0  5.44  15       0.52
degree6    2   0  2.23   6       1.00
degree7    1   0  0.77   5       0.92
degree8    2   0  0.34   3       0.12
degree9    0   0  0.06   2       1.00
degree10   1   0  0.06   2       0.10
degree11   1   0  0.02   1       0.04
degree18   1   0  0.00   0       0.00

Goodness-of-fit for edgewise shared partner 

     obs min  mean max MC p-value
esp0  95  68 95.38 122       0.98
esp1  32   3 38.62  73       0.60
esp2   5   0  2.68  11       0.40
esp3   2   0  0.10   2       0.02

Goodness-of-fit for minimum geodesic distance 

      obs   min  mean   max MC p-value
1     134   134   134   134          1
2     419   419   419   419          1
3     658   658   658   658          1
4     765   765   765   765          1
5     737   737   737   737          1
6     489   489   489   489          1
7     359   359   359   359          1
8     203   203   203   203          1
9      90    90    90    90          1
10     28    28    28    28          1
11      6     6     6     6          1
12      1     1     1     1          1
Inf 13877 13877 13877 13877          1

Goodness-of-fit for model statistics 

                  obs min   mean max MC p-value
edges             134  88 136.78 199       1.00
nodefactor.dem.1   85  54  85.74 137       0.98
edgecov.joint_dem  12   4  11.72  22       1.00
gwdeg.fixed.0     120  95 120.88 140       0.88
gwesp.fixed.0      39   3  41.40  82       0.94


options(repr.plot.width=32, repr.plot.height=8)
par(mfrow = c(1, 4))
plot(m2fit)


m1fit <- gof(m1, control = control.gof.ergm(seed = 210616))
options(repr.plot.width=32, repr.plot.height=8)
par(mfrow = c(1, 4))
plot(m1fit)


summary(m2)

Call:
ergm(formula = nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + 
    gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T), 
    control = control.ergm(seed = 210616))

Monte Carlo Maximum Likelihood Results:

                  Estimate Std. Error MCMC % z value Pr(>|z|)    
edges              -4.6965     0.1702      0 -27.594  < 1e-04 ***
nodefactor.dem.1   -0.1392     0.1595      0  -0.873  0.38291    
edgecov.joint_dem  -0.2355     0.4113      0  -0.573  0.56692    
gwdeg.fixed.0      -0.6418     0.2284      0  -2.810  0.00495 ** 
gwesp.fixed.0       1.1526     0.1415      0   8.148  < 1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

     Null Deviance: 24629  on 17766  degrees of freedom
 Residual Deviance:  1494  on 17761  degrees of freedom
 
AIC: 1504  BIC: 1543  (Smaller is better. MC Std. Err. = 0.4262)


tri <- matrix(c(0, 1, 1,
                1, 0, 0,
                1, 0, 0), byrow = T, ncol = 3)
nw.tri <- network(tri, directed = F)
nw.tri%v%'dem' <- c(0, 1, 1)
jd.tri <- matrix(c(0, 0, 0,
                   0, 0, 1,
                   0, 1, 0), byrow = T, ncol = 3)
options(repr.plot.width=10, repr.plot.height=10)
set.seed(210615); plot(nw.tri, vertex.col = ifelse(nw.tri%v%'dem', 2, 1), displaylabels = T)


ergmMPLE(nw.tri ~ edges + nodefactor('dem') + edgecov(jd.tri) + gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T))$predictor


round(coefficients(m2), 2)


plogis(-1.76)


m2.c <- ergm(nw ~ edges + nodefactor('dem') + edgecov(joint_dem) + gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T),
             eval.loglik = F,
             control = control.ergm(seed = 210615,
                                    MCMC.burnin = 50000,
                                    MCMC.samplesize = 2500,
                                    MCMC.interval = 2500,
                                    parallel = 0))

Starting maximum pseudolikelihood estimation (MPLE):

Evaluating the predictor and response matrix.

Maximizing the pseudolikelihood.

Finished MPLE.

Starting Monte Carlo maximum likelihood estimation (MCMLE):

Iteration 1 of at most 60:

Optimizing with step length 0.5407.

The log-likelihood improved by 4.6038.

Estimating equations are not within tolerance region.

Iteration 2 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 1.9610.

Estimating equations are not within tolerance region.

Iteration 3 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.1193.

Estimating equations are not within tolerance region.

Iteration 4 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0281.

Convergence test p-value: 0.0482. 
Not converged with 99% confidence; increasing sample size.

Iteration 5 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0140.

Convergence test p-value: 0.2140. 
Not converged with 99% confidence; increasing sample size.

Iteration 6 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0189.

Convergence test p-value: 0.2765. 
Not converged with 99% confidence; increasing sample size.

Iteration 7 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0132.

Convergence test p-value: 0.0821. 
Not converged with 99% confidence; increasing sample size.

Iteration 8 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0145.

Convergence test p-value: 0.1721. 
Not converged with 99% confidence; increasing sample size.

Iteration 9 of at most 60:

Optimizing with step length 1.0000.

The log-likelihood improved by 0.0029.

Convergence test p-value: < 0.0001. 
Converged with 99% confidence.

Finished MCMLE.

This model was fit using MCMC.  To examine model diagnostics and check
for degeneracy, use the mcmc.diagnostics() function.


summary(m2.c)

Log-likelihood was not estimated for this fit. To get deviances, AIC, and/or BIC, use ‘*fit* <-logLik(*fit*, add=TRUE)’ to add it to the object or rerun this function with eval.loglik=TRUE.

Call:
ergm(formula = nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + 
    gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T), 
    eval.loglik = F, control = control.ergm(seed = 210615, MCMC.burnin = 50000, 
        MCMC.samplesize = 2500, MCMC.interval = 2500, parallel = 0))

Monte Carlo Maximum Likelihood Results:

                  Estimate Std. Error MCMC % z value Pr(>|z|)    
edges              -4.6948     0.1771      0 -26.510   <1e-04 ***
nodefactor.dem.1   -0.1390     0.1664      0  -0.835   0.4036    
edgecov.joint_dem  -0.2336     0.4088      0  -0.572   0.5676    
gwdeg.fixed.0      -0.6391     0.2317      0  -2.759   0.0058 ** 
gwesp.fixed.0       1.1460     0.1468      0   7.809   <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1


m2.c <- logLik(m2.c, add = T)

Fitting the dyad-independent submodel...

Bridging between the dyad-independent submodel and the full model...

Setting up bridge sampling...

Using 16 bridges: 
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
.

Bridging finished.


summary(m2.c)

Call:
ergm(formula = nw ~ edges + nodefactor("dem") + edgecov(joint_dem) + 
    gwdegree(decay = 0, fixed = T) + gwesp(decay = 0, fixed = T), 
    eval.loglik = F, control = control.ergm(seed = 210615, MCMC.burnin = 50000, 
        MCMC.samplesize = 2500, MCMC.interval = 2500, parallel = 0))

Monte Carlo Maximum Likelihood Results:

                  Estimate Std. Error MCMC % z value Pr(>|z|)    
edges              -4.6948     0.1771      0 -26.510   <1e-04 ***
nodefactor.dem.1   -0.1390     0.1664      0  -0.835   0.4036    
edgecov.joint_dem  -0.2336     0.4088      0  -0.572   0.5676    
gwdeg.fixed.0      -0.6391     0.2317      0  -2.759   0.0058 ** 
gwesp.fixed.0       1.1460     0.1468      0   7.809   <1e-04 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

     Null Deviance: 24629  on 17766  degrees of freedom
 Residual Deviance:  1495  on 17761  degrees of freedom
 
AIC: 1505  BIC: 1544  (Smaller is better. MC Std. Err. = 0.3209)


options(repr.plot.width=20, repr.plot.height=10)
mcmc.diagnostics(m2)

Sample statistics summary:

Iterations = 2803712:6039552
Thinning interval = 2048 
Number of chains = 1 
Sample size per chain = 1581 

1. Empirical mean and standard deviation for each variable,
   plus standard error of the mean:

                    Mean     SD Naive SE Time-series SE
edges             2.8741 19.864   0.4996         0.9986
nodefactor.dem.1  1.4187 15.633   0.3932         0.6645
edgecov.joint_dem 0.1954  4.089   0.1028         0.1352
gwdeg.fixed.0     0.8299  9.662   0.2430         0.3857
gwesp.fixed.0     2.2815 13.649   0.3433         0.7790

2. Quantiles for each variable:

                   2.5% 25% 50% 75% 97.5%
edges             -33.5 -10   2  16    45
nodefactor.dem.1  -27.0 -10   1  12    35
edgecov.joint_dem  -7.0  -3   0   3     9
gwdeg.fixed.0     -20.0  -5   1   8    18
gwesp.fixed.0     -21.5  -7   2  11    31


Are sample statistics significantly different from observed?
                 edges nodefactor.dem.1 edgecov.joint_dem gwdeg.fixed.0
diff.      2.874130297       1.41872233         0.1954459    0.82985452
test stat. 2.878250816       2.13509251         1.4456122    2.15181337
P-val.     0.003998871       0.03275345         0.1482860    0.03141206
           gwesp.fixed.0 Overall (Chi^2)
diff.        2.281467426              NA
test stat.   2.928565083     10.74797367
P-val.       0.003405305      0.05852964

Sample statistics cross-correlations:
                      edges nodefactor.dem.1 edgecov.joint_dem gwdeg.fixed.0
edges             1.0000000        0.7989286         0.4602394     0.8505776
nodefactor.dem.1  0.7989286        1.0000000         0.7638296     0.7340096
edgecov.joint_dem 0.4602394        0.7638296         1.0000000     0.4590853
gwdeg.fixed.0     0.8505776        0.7340096         0.4590853     1.0000000
gwesp.fixed.0     0.8038153        0.6072574         0.3261639     0.5303066
                  gwesp.fixed.0
edges                 0.8038153
nodefactor.dem.1      0.6072574
edgecov.joint_dem     0.3261639
gwdeg.fixed.0         0.5303066
gwesp.fixed.0         1.0000000

Sample statistics auto-correlation:
Chain 1 
               edges nodefactor.dem.1 edgecov.joint_dem gwdeg.fixed.0
Lag 0     1.00000000       1.00000000       1.000000000    1.00000000
Lag 2048  0.48734776       0.32828093       0.184489694    0.24289768
Lag 4096  0.35325989       0.19724345       0.061385594    0.17426045
Lag 6144  0.25131376       0.16371725       0.071877375    0.15011261
Lag 8192  0.14326239       0.09728171       0.018212371    0.07119263
Lag 10240 0.08689039       0.04923721      -0.004524255    0.06458949
          gwesp.fixed.0
Lag 0         1.0000000
Lag 2048      0.6746308
Lag 4096      0.4618212
Lag 6144      0.3014276
Lag 8192      0.2107175
Lag 10240     0.1333792

Sample statistics burn-in diagnostic (Geweke):
Chain 1 

Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5 

            edges  nodefactor.dem.1 edgecov.joint_dem     gwdeg.fixed.0 
           -1.143            -2.195            -2.068            -1.622 
    gwesp.fixed.0 
           -1.211 

Individual P-values (lower = worse):
            edges  nodefactor.dem.1 edgecov.joint_dem     gwdeg.fixed.0 
       0.25285063        0.02818016        0.03863393        0.10486900 
    gwesp.fixed.0 
       0.22606896 
Joint P-value (lower = worse):  0.212401 .

MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).


options(repr.plot.width=20, repr.plot.height=10)
mcmc.diagnostics(m2.c)

Sample statistics summary:

Iterations = 290784:3852576
Thinning interval = 312 
Number of chains = 1 
Sample size per chain = 11417 

1. Empirical mean and standard deviation for each variable,
   plus standard error of the mean:

                     Mean     SD Naive SE Time-series SE
edges             -0.7379 19.435  0.18189         0.8781
nodefactor.dem.1  -0.2601 15.128  0.14158         0.5677
edgecov.joint_dem -0.1223  4.007  0.03750         0.1165
gwdeg.fixed.0     -0.4866  9.534  0.08922         0.2897
gwesp.fixed.0     -0.2658 13.134  0.12292         0.6814

2. Quantiles for each variable:

                  2.5% 25% 50% 75% 97.5%
edges              -37 -14  -1  12    40
nodefactor.dem.1   -28 -11  -1   9    31
edgecov.joint_dem   -7  -3   0   2     8
gwdeg.fixed.0      -20  -7   0   6    17
gwesp.fixed.0      -22 -10  -1   8    29


Are sample statistics significantly different from observed?
                edges nodefactor.dem.1 edgecov.joint_dem gwdeg.fixed.0
diff.      -0.7379347       -0.2600508        -0.1222738   -0.48664273
test stat. -0.8404137       -0.4580759        -1.0492226   -1.68004510
P-val.      0.4006765        0.6468979         0.2940757    0.09294854
           gwesp.fixed.0 Overall (Chi^2)
diff.         -0.2658317              NA
test stat.    -0.3901499    18.586465455
P-val.         0.6964257     0.002400931

Sample statistics cross-correlations:
                      edges nodefactor.dem.1 edgecov.joint_dem gwdeg.fixed.0
edges             1.0000000        0.8064376         0.4286277     0.8502422
nodefactor.dem.1  0.8064376        1.0000000         0.7393320     0.7305253
edgecov.joint_dem 0.4286277        0.7393320         1.0000000     0.4130171
gwdeg.fixed.0     0.8502422        0.7305253         0.4130171     1.0000000
gwesp.fixed.0     0.8001978        0.6119440         0.3105163     0.5223755
                  gwesp.fixed.0
edges                 0.8001978
nodefactor.dem.1      0.6119440
edgecov.joint_dem     0.3105163
gwdeg.fixed.0         0.5223755
gwesp.fixed.0         1.0000000

Sample statistics auto-correlation:
Chain 1 
             edges nodefactor.dem.1 edgecov.joint_dem gwdeg.fixed.0
Lag 0    1.0000000        1.0000000         1.0000000     1.0000000
Lag 312  0.7957683        0.7150144         0.5801672     0.5846007
Lag 624  0.6806121        0.5726600         0.3964272     0.4140633
Lag 936  0.6074834        0.4928086         0.3206351     0.3200706
Lag 1248 0.5605824        0.4460110         0.2760459     0.2849391
Lag 1560 0.5214302        0.4071122         0.2389041     0.2600111
         gwesp.fixed.0
Lag 0        1.0000000
Lag 312      0.9331801
Lag 624      0.8747046
Lag 936      0.8199298
Lag 1248     0.7689267
Lag 1560     0.7218895

Sample statistics burn-in diagnostic (Geweke):
Chain 1 

Fraction in 1st window = 0.1
Fraction in 2nd window = 0.5 

            edges  nodefactor.dem.1 edgecov.joint_dem     gwdeg.fixed.0 
          -0.5416           -0.2782            0.7456           -0.4388 
    gwesp.fixed.0 
          -0.6521 

Individual P-values (lower = worse):
            edges  nodefactor.dem.1 edgecov.joint_dem     gwdeg.fixed.0 
        0.5880912         0.7808294         0.4559219         0.6608426 
    gwesp.fixed.0 
        0.5143141 
Joint P-value (lower = worse):  0.4488022 .

MCMC diagnostics shown here are from the last round of simulation, prior to computation of final parameter estimates. Because the final estimates are refinements of those used for this simulation run, these diagnostics may understate model performance. To directly assess the performance of the final model on in-model statistics, please use the GOF command: gof(ergmFitObject, GOF=~model).

	name	intrastate	dem	cinc	trans
	<chr>	<dbl>	<dbl>	<dbl>	<dbl>
2	USA	0	1	0.149933	0
20	CAN	0	1	0.012238	0
31	BHM	0	1	0.000044	0
40	CUB	1	0	0.002783	0
41	HAI	1	0	0.000443	0
42	DOM	0	0	0.000748	0

	2	20	31	40	41	42	51	52	53	54
2	0	0	0	0	0	0	0	0	0	0
20	0	0	0	0	0	0	0	0	0	0
31	0	0	0	0	0	0	0	0	0	0
40	0	0	0	0	0	0	0	0	0	0
41	0	0	0	0	0	0	0	0	0	0
42	0	0	0	0	0	0	0	0	0	0
51	0	0	0	0	0	0	0	0	0	0
52	0	0	0	0	0	0	0	0	0	0
53	0	0	0	0	0	0	0	0	0	0
54	0	0	0	0	0	0	0	0	0	0

nodes with degree:	0	1	2
w/o (2,3) edge	0	2	1
w/ (2,3) edge	0	0	3

	2	20	31	40	41	42	51	52	53	54
2	0	0	0	0	0	0	0	0	0	0
20	0	0	0	0	0	0	0	0	0	0
31	0	0	0	0	0	0	0	0	0	0
40	0	0	0	0	0	0	0	0	0	0
41	0	0	0	0	0	0	0	0	0	0
42	0	0	0	0	0	0	0	0	0	0
51	0	0	0	0	0	0	0	0	0	0
52	0	0	0	0	0	0	0	0	0	0
53	0	0	0	0	0	0	0	0	0	0
54	0	0	0	0	0	0	0	0	0	0

1. Preparation¶

First, we install the required packages.¶

We have a workshop package that contains the data we will be working with and some utility functions.¶

Next, we load the libraries.¶

2. Building our networks¶

First, load the data.¶

Next, we use the network function to create a network object.¶

There are different ways network data is usually stored.¶

We need to be careful when working with them.¶

What's the difference?¶

Where are the isolates?¶

When working with this kind of data, we have to make sure we have a list of the full actors in the network.¶

Let's manually make the adjacency matrix from the edgelist.¶

Let's get back to working with our network.¶

Let's work with vertex attributes.¶

We can look at how democracies and nondemocracies differ in conflict behavior.¶

Red nodes are democracies and black are nondemocracies.¶

Edge attributes can also be specified in a similar way but it's easier to just use a separate matrix.¶

3. Specifying the ERGM¶

Let's take a look at the basic function for ERGM fitting: ergm.¶

What are the ERGM terms that we can use?¶

3.1 ERGM Terms¶

3.2 Starting to fit an ERGM¶

We start with some node and dyad variables.¶

Let's compare it with the logistic regression.¶

They are exactly the same.¶

Let's move to some network effects.¶

What happened here?¶

So instead, we can use alternative network statistics designed to address these problems.¶

References:¶

Looks like it converged.¶

Let's also add the triadic closure effect before looking at the model in more detail.¶

Again, seems like we have degeneracy.¶

Looks like it converged.¶

3.2.1 Let's consider model fit, degeneracy, and decay parameters¶

How to understand the decay parameters?¶

Reference:¶

This is easiest to demonstrate using the gwesp.¶

What happened?¶

Let's look at a real example using our MIDs network.¶

Slightly increasing the decay value improved fit and didn't run into degeneracy issues.¶

3.3 Assessing ERGM fit¶

Back to our MIDs model; it seems like our model converged, but we should still check the fit using simulations.¶

What are we looking at?¶

But it helps to compare.¶

3.4 Interpreting the ERGM output¶

Let's bring up the model output again.¶

At the network level:¶

At the dyad level:¶

$P(y_{ij} = 1 | Y, \mathbf{\theta}) = logit ^{-1}(\sum^k_{r=1}\theta_r \delta_r^{(ij)}Y)$¶

Let's illustrate using a simple example and the same coefficients.¶

What's the probability of a tie forming on the empty dyad (2,3) if this network has the same generative features of our conflict network?¶

There are two types of dyads here:¶

Adding an edge on the (2,3) dyad will add...¶

Why does gwdegree not change?¶

Calculate the log-odds.¶

Calculate the corresponding probability.¶

At the block level:¶

Reference:¶

3.5 ERGM Settings¶

Let's conclude this section by looking at some settings and useful things to do.¶

We don't get any likelihood-based statistics. We can follow the instructions to add them.¶

Increasing the MCMC samples can improve our estimates.¶

4. Resources and References¶

Packages¶

References¶

5. Questions or comments?¶

Please visit the Github repository for this notebook.¶

Or email me at ted.hsuanyun.chen@gmail.com.¶

Next, we use the `network` function to create a network object.¶

Let's take a look at the basic function for ERGM fitting: `ergm`.¶

This is easiest to demonstrate using the `gwesp`.¶

Why does `gwdegree` not change?¶

	2	20	31	40	41	42	51	52	53	54
2	0	0	0	0	0	0	0	0	0	0
20	0	0	0	0	0	0	0	0	0	0
31	0	0	0	0	0	0	0	0	0	0
40	0	0	0	0	0	0	0	0	0	0
41	0	0	0	0	0	0	0	0	0	0
42	0	0	0	0	0	0	0	0	0	0
51	0	0	0	0	0	0	0	0	0	0
52	0	0	0	0	0	0	0	0	0	0
53	0	0	0	0	0	0	0	0	0	0
54	0	0	0	0	0	0	0	0	0	0