How to create new polygons by simplifying two SpatialPolygonsDataFrame objects in R?

Let's say I have two sets of form files that cover the same region and often but not always share borders, for example. USA and PUMAs. I would like to define a new scale for a polygon that uses both PUMA and circles as atomic building blocks, i.e. None of them can be divided, but I still like as many units as possible. Here is an example of a toy:

library(sp) # make fake data # 1) counties: Cty <- SpatialPolygons(list( Polygons(list(Polygon(cbind(x=c(0,2,2,1,0,0),y=c(0,0,2,2,1,0)), hole=FALSE)),"county1"), Polygons(list(Polygon(cbind(x=c(2,4,4,3,3,2,2),y=c(0,0,2,2,1,1,0)),hole=FALSE)),"county2"), Polygons(list(Polygon(cbind(x=c(4,5,5,4,4),y=c(0,0,3,2,0)),hole=FALSE)),"county3"), Polygons(list(Polygon(cbind(x=c(0,1,2,2,0,0),y=c(1,2,2,3,3,1)),hole=FALSE)),"county4"), Polygons(list(Polygon(cbind(x=c(2,3,3,4,4,3,3,2,2),y=c(1,1,2,2,3,3,4,4,1)),hole=FALSE)),"county5"), Polygons(list(Polygon(cbind(x=c(0,2,2,1,0,0),y=c(3,3,4,5,5,3)),hole=FALSE)),"county6"), Polygons(list(Polygon(cbind(x=c(1,2,3,4,1),y=c(5,4,4,5,5)),hole=FALSE)),"county7"), Polygons(list(Polygon(cbind(x=c(3,4,4,5,5,4,3,3),y=c(3,3,2,3,5,5,4,3)),hole=FALSE)),"county8") )) counties <- SpatialPolygonsDataFrame(Cty, data = data.frame(ID=paste0("county",1:8), row.names=paste0("county",1:8), stringsAsFactors=FALSE) ) # 2) PUMAs: Pum <- SpatialPolygons(list( Polygons(list(Polygon(cbind(x=c(0,4,4,3,3,2,2,1,0,0),y=c(0,0,2,2,1,1,2,2,1,0)), hole=FALSE)),"puma1"), Polygons(list(Polygon(cbind(x=c(4,5,5,4,3,3,4,4),y=c(0,0,5,5,4,3,3,0)),hole=FALSE)),"puma2"), Polygons(list(Polygon(cbind(x=c(0,1,2,2,3,3,2,0,0),y=c(1,2,2,1,1,2,3,3,1)),hole=FALSE)),"puma3"), Polygons(list(Polygon(cbind(x=c(2,3,4,4,3,3,2,2),y=c(3,2,2,3,3,4,4,3)),hole=FALSE)),"puma4"), Polygons(list(Polygon(cbind(x=c(0,1,1,3,4,0,0),y=c(3,3,4,4,5,5,3)),hole=FALSE)),"puma5"), Polygons(list(Polygon(cbind(x=c(1,2,2,1,1),y=c(3,3,4,4,3)),hole=FALSE)),"puma6") )) Pumas <- SpatialPolygonsDataFrame(Pum, data = data.frame(ID=paste0("puma",1:6), row.names=paste0("puma",1:6), stringsAsFactors=FALSE) ) # desired result: Cclust <- SpatialPolygons(list( Polygons(list(Polygon(cbind(x=c(0,4,4,3,3,2,2,1,0,0),y=c(0,0,2,2,1,1,2,2,1,0)), hole=FALSE)),"ctyclust1"), Polygons(list(Polygon(cbind(x=c(4,5,5,4,3,3,4,4),y=c(0,0,5,5,4,3,3,0)),hole=FALSE)),"ctyclust2"), Polygons(list(Polygon(cbind(x=c(0,1,2,2,3,3,4,4,3,3,2,2,0,0),y=c(1,2,2,1,1,2,2,3,3,4,4,3,3,1)),hole=FALSE)),"ctyclust3"), Polygons(list(Polygon(cbind(x=c(0,2,2,3,4,0,0),y=c(3,3,4,4,5,5,3)),hole=FALSE)),"ctyclust4") )) CtyClusters <- SpatialPolygonsDataFrame(Cclust, data = data.frame(ID = paste0("ctyclust", 1:4), row.names = paste0("ctyclust", 1:4), stringsAsFactors=FALSE) ) # take a look par(mfrow = c(1, 2)) plot(counties, border = gray(.3), lwd = 4) plot(Pumas, add = TRUE, border = "#EEBB00", lty = 2, lwd = 2) legend(-.5, -.3, lty = c(1, 2), lwd = c(4, 2), col = c(gray(.3), "#EEBB00"), legend = c("county line", "puma line"), xpd = TRUE, bty = "n") text(coordinates(counties), counties@data $ID,col = gray(.3)) text(coordinates(Pumas), Pumas@data $ID, col = "#EEBB00",cex=1.5) title("building blocks") #desired result: plot(CtyClusters) title("desired result") text(-.5, -.5, "maximum units possible,\nwithout breaking either PUMAs or counties", xpd = TRUE, pos = 4) 

enter image description here I naively tried to use many of the g * functions in the rgeos package to achieve this and did not succeed. Does anyone know of a good feature or a terrific trick for this task? Thanks!

[I am also open to suggestions for a better name]

+7
source share
3 answers

Here's a relatively concise solution that:

  • Uses rgeos::gRelate() to identify Pumas that overlap but do not fully cover / cover each county. To understand what he is doing, run example(gRelate) and look at this Wikipedia page . (ch. Tim Riff)

  • Uses RBGL::connectedComp() to identify the Pumas groups that should be combined. (For pointers to installing the RBGL package , see My SO answer to this question .)

  • Uses rgeos::gUnionCascaded() to combine the specified Pumas.

     library(rgeos) library(RBGL) ## Identify groups of Pumas that should be merged x <- gRelate(counties, Pumas, byid=TRUE) x <- matrix(grepl("2.2......", x), ncol=ncol(x), dimnames=dimnames(x)) k <- x %*% t(x) l <- connectedComp(as(k, "graphNEL")) ## Extend gUnionCascaded so that each SpatialPolygon gets its own ID. gMerge <- function(ii) { x <- gUnionCascaded(Pumas[ii,]) spChFIDs(x, paste(ii, collapse="_")) } ## Merge Pumas as needed res <- do.call(rbind, sapply(l, gMerge)) plot(res) 

enter image description here

+3
source

I think you could do this with a smart test suite for containment. This gets two parts, a simple doubles case where puma1 contains county1 and county2 , and puma2 contains county8 and county3 .

 library(rgeos) ## pumas by counties pbyc <- gContains(Pumas, counties, byid = TRUE) ## row / col pairs of where contains test is TRUE for Pumas pbyc.pairs <- cbind(row(pbyc)[pbyc], col(pbyc)[pbyc]) par(mfrow = c(nrow(pbyc.pairs), 1)) for (i in 1:nrow(pbyc.pairs)) { plot(counties, col = "white") plot(gUnion(counties[pbyc.pairs[i,1], ], Pumas[pbyc.pairs[i,2], ]), col = "red", add = TRUE) } 

The plot there is stupidly redundant, but I think it shows the beginning. You need to find something that contains tests that accumulate the smallest parts, and then combine them. Sorry, I didn’t make an effort to finish, but I think it will work.

+3
source

After much trial and error, I came up with the following inelegant solution, rather, in accordance with the advice from @mdsummer, but adding additional checks, removing redundant merged polygons and checking. Clap. If someone can take what I have done and make it cleaner, then I would rather accept this answer, which I would like to avoid, if at all possible:

 library(rgeos) pbyc <- gCovers(Pumas, counties, byid = TRUE) | gContains(Pumas, counties, byid = TRUE) | gOverlaps(Pumas, counties, byid = TRUE) | t(gCovers(counties, Pumas, byid = TRUE) | gContains(counties, Pumas, byid = TRUE) | gOverlaps(counties, Pumas, byid = TRUE)) ## row / col pairs of where test is TRUE for Pumas or counties pbyc.pairs <- cbind(row(pbyc)[pbyc], col(pbyc)[pbyc]) Potentials <- apply(pbyc.pairs, 1, function(x,counties,Pumas){ gUnion(counties[x[1], ], Pumas[x[2], ]) }, counties = counties, Pumas= Pumas) for (i in 1:length(Potentials)){ Potentials[[i]]@polygons[[1]]@ID <- paste0("p",i) } Potentials <- do.call("rbind",Potentials) # remove redundant polygons: Redundants <- gEquals(Potentials, byid = TRUE) Redundants <- row(Redundants)[Redundants & lower.tri(Redundants)] Potentials <- Potentials[-c(Redundants),] # for each Potential summary polygon, see which counties and Pumas are contained: keep.i <- vector(length=length(Potentials)) for (i in 1:length(Potentials)){ ctyblocki <- gUnionCascaded(counties[c(gCovers(Potentials[i, ], counties, byid = TRUE)), ]) Pumablocki <- gUnionCascaded(Pumas[c(gCovers(Potentials[i, ], Pumas, byid = TRUE)), ]) keep.i[i] <- gEquals(ctyblocki, Potentials[i, ]) & gEquals(Pumablocki, Potentials[i, ]) } # what do we have left? NewUnits <- Potentials[keep.i, ] plot(NewUnits) 

enter image description here

+1
source

All Articles