Advertisement
Guest User

Untitled

a guest
Dec 18th, 2014
164
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.69 KB | None | 0 0
  1. library(sp)
  2. library(rgeos)
  3. library(plyr)
  4. # Create SpatialPointsDataFrame
  5. # My actual dataset has 24 million observations
  6. my.pts <- data.frame(LONGITUDE=c(-85.4,-84.7,-82.7,-82.7,-86.5,-88.9,-94.8,-83.9,-87.8,-82.8),
  7. LATITUDE=c(30.0,29.9,27.5,28.5,30.4,26.1,29.3,28.0,29.4,27.8),
  8. MYID=c(1,1,2,2,2,2,3,4,4,4),
  9. INDEX=1:10)
  10. coordinates(my.pts) <- c("LONGITUDE","LATITUDE")
  11.  
  12. # Create two polygons in a SpatialPolygonsDataFrame
  13. # My actual dataset has 71 polygons (U.S. counties)
  14. x1 <- data.frame(x=c(-92.3, -92.3, -90.7, -90.7, -92.3, -92.3),y=c(27.6, 29.4, 29.4, 27.6, 27.6, 27.6))
  15. x1 <- as.data.frame(x1)
  16. x1 <- Polygon(rbind(x1,x1[1,]))
  17.  
  18. x2 <- data.frame(x=c(-85.2, -85.2, -83.3, -83.2, -85.2, -85.2),y=c(26.4, 26.9, 26.9, 26.0, 26.4, 26.4))
  19. x2 <- as.data.frame(x2)
  20. x2 <- Polygon(rbind(x2,x2[1,]))
  21.  
  22. poly1 <- Polygons(list(x1),"poly1")
  23. poly2 <- Polygons(list(x2),"poly2")
  24. myShp <- SpatialPolygons(list(poly1,poly2),1:2)
  25. sdf <- data.frame(ID=c(1,2))
  26. row.names(sdf) <- c("poly1","poly2")
  27. myShp <- SpatialPolygonsDataFrame(myShp,data=sdf)
  28.  
  29. # I have been outputting my results to a list. With this small sample, it's easy to just put everything into the object county.vec. But I worry that the 24 million x 71 object would not be feasible. The non-loop version shows the output I've been getting more easily.
  30.  
  31. COUNTY.LIST <- list()
  32. county.vec <- gDistance(my.pts, myShp, byid=TRUE)
  33. COUNTY.LIST[[1]] = apply(county.vec, 2, min)
  34. COUNTY.LIST[[2]] = apply(county.vec, 2, which.min)
  35. COUNTY.LIST[[3]] = my.pts$INDEX
  36.  
  37. # I have been putting it into a loop so that county.vec gets dumped for each version of the loop.
  38. # Seems like this could be done using dlply perhaps? And then I would have the power of parallel processing?
  39. idx <- unique(my.pts$MYID)
  40. COUNTY.LIST <- list()
  41. for(i in 1:length(idx)){
  42. COUNTY.LIST[[i]] <- list()
  43. county.vec <- gDistance(my.pts[my.pts$MYID==idx[i],], myShp, byid=TRUE)
  44. COUNTY.LIST[[i]][[1]] = apply(county.vec, 2, min)
  45. COUNTY.LIST[[i]][[2]] = apply(county.vec, 2, which.min)
  46. COUNTY.LIST[[i]][[3]] = my.pts$MY[my.pts$MYID==idx[i]]
  47. rm(county.vec)
  48. }
  49.  
  50. dlply(my.pts,.(MYID),gDistance(my.pts, myShp, byid=TRUE),.parallel=TRUE)
  51. > dlply(my.pts,.(MYID),gDistance(my.pts, myShp, byid=TRUE))
  52. Error in eval.quoted(.variables, data) :
  53. envir must be either NULL, a list, or an environment.
  54.  
  55. # I suspect this error is because my.pts is a SpatialPointsPolygon. I also recognize that my function call probably isn't right, but first things first.
  56.  
  57. # I tried another way to reference the MYID field, more inline with treatment of S4 objects...
  58. dlply(my.pts,my.pts@data$MYID,gDistance(my.pts, myShp, byid=TRUE),.parallel=TRUE)
  59.  
  60. # It yields the same error.
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement