Commit 7162d717 authored by Rodrigo Tapia-McClung's avatar Rodrigo Tapia-McClung

Added code and data

parent 7c114269
# Percolate network created using edges and different distance thresholds
#install.packages(pacman)
pacman::p_load(foreign, igraph, parallel, dplyr)
print("Creating general graph...")
edges.dbf <- read.dbf("data/edges.dbf")
near <- edges.dbf["NEAR_FID"]
target <- near %>% mutate(target = lag(NEAR_FID))
target$NEAR_FID <- NULL
edges.dbf <- cbind(edges.dbf,target)
edges.dbf$idsec <- rep(c(2, 1), times = length(near$NEAR_FID)/2)
edges_comp <- as.data.frame(subset(edges.dbf, idsec==1, select=c(IN_FID, NEAR_FID, target, Shape_Leng)))
colnames(edges_comp) <- c("edge_id", "source", "target", "weight")
edges <- edges_comp[c(2,3,4,1)]
general.graph.mx <- graph.data.frame(edges, directed = F)
if (!dir.exists("results")) {dir.create("results")}
getSizeDiameter <- function(i, decomposed) {
# return cluster size and diameter
return (c(length(V(decomposed[[i]])),
diameter(decomposed[[i]], directed = FALSE)))
}
percolate <- function(threshold){
start_time <- Sys.time()
print(paste0("Processing ", threshold))
print("Reducing graph")
sub.g = subgraph.edges(general.graph.mx,
E(general.graph.mx)[E(general.graph.mx)$weight <= threshold], del=T)
sub.g <- delete_edges(sub.g, E(sub.g)[E(sub.g)$weight > threshold])
sub.g <- delete_vertices(sub.g, degree(sub.g, mode = "total")==0)
components <- components(sub.g)
sub.g.csize <- induced.subgraph(sub.g, V(sub.g)[components$csize[components$membership]>=50])
print("Decomposing graph")
decomposed <- decompose.graph(sub.g.csize)
cl.list <- seq(1, components(sub.g.csize)$no)
cl = parallel::makeCluster(parallel::detectCores()-1)
parallel::setDefaultCluster(cl)
print(cl)
clusterEvalQ(cl, library(igraph))
clusterExport(cl, varlist = c("getSizeDiameter"), envir = environment())
print(paste0("Working on ", length(cl.list), " clusters..."))
out <- parLapplyLB(cl, cl.list, getSizeDiameter, decomposed)
out <- data.frame(matrix(unlist(out), nrow=length(cl.list), byrow=T))
# return cluster size and diameter
names(out) <- c("csize","diameter")
stopCluster(cl)
end_time <- Sys.time()
print(end_time - start_time)
saveRDS(out, file = paste0("results/perc.diam", threshold,".Rds"))
}
print("Ready. Use perc.diam<d> <- percolate(d) to generate one run or lapply on a sequence...")
# Calculate cluster sizes on network created using edges and different distance thresholds
# Do not consider graph diameter
pacman::p_load(foreign, igraph, parallel, dplyr)
if (!dir.exists("results")) {dir.create("results")}
if (!exists("edges.dbf")){
print("Creating general graph...")
edges.dbf <- read.dbf("data/edges.dbf")
near <- edges.dbf["NEAR_FID"]
target <- near %>% mutate(target = lag(NEAR_FID))
target$NEAR_FID <- NULL
edges.dbf <- cbind(edges.dbf,target)
edges.dbf$idsec <- rep(c(2, 1), times = length(near$NEAR_FID)/2)
edges_comp <- as.data.frame(subset(edges.dbf, idsec==1, select=c(IN_FID, NEAR_FID, target, Shape_Leng)))
colnames(edges_comp) <- c("edge_id", "source", "target", "weight")
edges <- edges_comp[c(2,3,4,1)]
general.graph.mx <- graph.data.frame(edges, directed = F)
}
getSize <- function(i, decomposed) {
# return cluster size
return (length(V(decomposed[[i]])))
}
percolateSize <- function(threshold) {
partial_start_time <- Sys.time()
print(paste0("Processing ", threshold))
print("Percolating graph for current distance...")
sub.g = subgraph.edges(general.graph.mx,
E(general.graph.mx)[E(general.graph.mx)$weight <= threshold], del=T)
sub.g <- delete_edges(sub.g, E(sub.g)[E(sub.g)$weight > threshold])
sub.g <- delete_vertices(sub.g, degree(sub.g, mode = "total")==0)
components <- components(sub.g)
sub.g.csize <- induced.subgraph(sub.g, V(sub.g)[components$csize[components$membership]>=50])
print("Decomposing graph for cluster sizes >= 50")
decomposed <- decompose.graph(sub.g.csize)
cl.list <- seq(1, components(sub.g.csize)$no)
print(paste0("Working on ", length(cl.list), " clusters..."))
out <- invisible(lapply(cl.list, getSize, decomposed))
out <- data.frame(matrix(unlist(out), nrow=length(cl.list), byrow=T))
# return cluster size
names(out) <- c("csize")
saveRDS(out, file = paste0("results/cl-size", threshold,".Rds"))
partial_end_time <- Sys.time()
print(partial_end_time - partial_start_time)
}
print("Ready. Use clsize<d> <- percolateSize(d) to generate one run or lapply on a sequence...")
# Calculate and plot Shannon's entropy for different distance thresholds
pacman::p_load(foreign, igraph, parallel, dplyr)
print("Reading input edge and vertex data...")
if (!exists("edges.dbf")){
print("Creating general graph...")
edges.dbf <- read.dbf("data/edges.dbf")
near <- edges.dbf["NEAR_FID"]
target <- near %>% mutate(target = lag(NEAR_FID))
target$NEAR_FID <- NULL
edges.dbf <- cbind(edges.dbf,target)
edges.dbf$idsec <- rep(c(2, 1), times = length(near$NEAR_FID)/2)
edges_comp <- as.data.frame(subset(edges.dbf, idsec==1, select=c(IN_FID, NEAR_FID, target, Shape_Leng)))
colnames(edges_comp) <- c("edge_id", "source", "target", "weight")
edges <- edges_comp[c(2,3,4,1)]
}
if (!exists("nodes")){
nodes <- read.csv("data/nodes.csv")
}
nodes <- nodes[c(1,1,2,3)]
colnames(nodes) <- c("id", "node_id", "lng", "lat")
print("Creating general graph...")
general.graph.mx <- graph.data.frame(edges, directed = F, vertices = nodes)
# percolate cluster sizes keeping all clusters - for entropy plot
percolate_sizes <- function(threshold){
current_distance <<- threshold
print(paste0("Working on ", threshold))
print("Reducing graph")
sub.g = subgraph.edges(general.graph.mx,
E(general.graph.mx)[E(general.graph.mx)$weight <= threshold], del=T)
sub.g <- delete_edges(sub.g, E(sub.g)[E(sub.g)$weight > threshold])
sub.g <- delete_vertices(sub.g, degree(sub.g, mode = "total")==0)
components <- components(sub.g)
sub.g.csize <- induced.subgraph(sub.g, V(sub.g)[components$csize[components$membership]>=50])
print("Getting components and membership")
components <- components(sub.g.csize)
# size df
sizec <- data.frame(components[["csize"]])
sizec$id <- seq(1:length(sizec$components...csize...))
colnames(sizec) <- c("csize", "membership")
sizec <- sizec[order(-sizec$csize),]
sizec$rank <- seq(1:length(sizec$csize))
print("Writing output files...")
saveRDS(sizec, file = paste0("results/entropy", threshold,".Rds"))
}
print("Ready. Use entropy<d> <- percolate_sizes(d) to generate one run or lapply on a sequence...")
# percolation
City percolation
\ No newline at end of file
City percolation code in R.
Use `1 percolate.R` to percolate network created using edges and different distance thresholds.
Use `2 cluster_sizes.R` to calculate cluster sizes on network created using edges and different distance thresholds without considering graph diameter.
Use `3 entropy.R` to calculate and plot Shannon's entropy for different distance thresholds.
Results are written to a `results` folder created during runtime if it does not already exist.
\ No newline at end of file
File added
This diff is collapsed.
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment