Advertisement
lvalnegri

build_UK_boundaries.R

May 16th, 2018
326
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
R 8.77 KB | None | 0 0
  1. #######################################
  2. # SHINY Explorer - build UK boundaries
  3. #######################################
  4.  
  5. # The following is the R code to create arbitrary aggregation boundaries, starting from some base shapefile layer.
  6. # For the UK the reference is the union of:
  7. # - Output Areas in England and Wales (EW). Generalised Clipped: http://geoportal.statistics.gov.uk/datasets/09b8a48426e3482ebbc0b0c49985c0fb_2
  8. # - Output Areas in Scotland (SC) https://www.nrscotland.gov.uk/statistics-and-data/geography/our-products/census-datasets/2011-census/2011-boundaries
  9. # - Small Areas in Northern Ireland (NI) http://www.nisra.gov.uk/geography/SmallAreas.htm
  10. # Once downloaded, rename each group of files with the corresponding two characters acronym.
  11. # It's necessary to keep only the files with the four following extensions: shp, shx, prj, dbf.
  12.  
  13. # One small problem to be aware of is the different original projection of the boundaries:
  14. #   EW: '+init=epsg:27700'  # http://spatialreference.org/ref/epsg/osgb-1936-british-national-grid/
  15. #   SC: '+init=epsg:27700'   # http://spatialreference.org/ref/epsg/osgb-1936-british-national-grid/
  16. #   NI: '+init=epsg:29902'   # http://spatialreference.org/ref/epsg/29902/
  17. # but all of the above will be transformed in the standard WGS84 for web mapping.
  18.  
  19. # ONS Reference Country codes
  20. #  - E92000001 England;
  21. #  - W92000004 Wales;
  22. #  - S92000003 Scotland;
  23. #  - N92000002 Northern Ireland;
  24.  
  25. # The following are the number of features in each set of boundaries:
  26. #   - EW: 181,408 (E 171,372 + W 10,036)
  27. #   - SC:  46,351
  28. #   - NI:   4,537
  29. # for a total UK of 232,296. This is important to know because when simplifying the polygons overall, some of the features could dissolve.
  30.  
  31. # To successfully aggregate, the script needs also a lookup table that gives back a unique aggregated area code for every output/small area (from now on just OA).
  32. # This table could be built for many types of aggregations using, for example, the Postcodes Directories:
  33. #  - NHS Postcode Directory: http://ons.maps.arcgis.com/home/item.html?id=4e63d9b589604c0ba10fbbd3bef08e6f
  34. #  - ONS Postcode Directory: http://ons.maps.arcgis.com/home/item.html?id=dfa0ff74981b4b228d2030d852f0b14a
  35. # These are maintained and published every quarter by the ONS, that relates both current and terminated postcodes in the UK
  36. # to a range of current and past statutory administrative, electoral, health and other geographies. The downside to this approach is that some OA could belong to different aggregations, and it becomes somewhat arbitrary, without more knowledge, decide which is the right destination area.
  37.  
  38. # When saving to the database, there is an added column 'type' that is part of the foreign key ('id', 'type') connecting to the primary key ('boundary_id', 'type') in the table *boundaries_ids*, used to retrieve the correct id of the feature to link to external data.
  39. # I saved all files in s:\ICS\Projects\Data Management\R_Shiny\Resources\boundaries_aggregations\
  40.  
  41. # It is finally important to understand that when reproducing ONS content, the output must include a source accreditation to ONS.
  42.  
  43.  
  44.  
  45. # LOAD PACKAGES ---------------------------------------------------------------------------------------------------
  46. pkg <- c('maptools', 'rgeos', 'rgdal', 'rmapshaper', 'RMySQL')
  47. invisible( lapply(pkg, require, character.only = TRUE) )
  48.  
  49. # SET VARIABLES ---------------------------------------------------------------------------------------------------
  50.  
  51. boundaries.path <- 'C:/cloud/OneDrive - University College London/data/boundaries/shp'
  52. proj.wgs <- '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'
  53.  
  54. # LOAD AND MERGE EW, SC AND NI OAs MAP INTO A UNIQUE UK MAP  -------------------------------------------------------
  55.  
  56. # England and Wales
  57. # read polygons from shapefile (using rgdal, projection is automatically applied if the prj file is found)
  58. shp.ew <- readOGR(boundaries.path, layer = 'EW')
  59. # check the projection, and read the field to keep as future id
  60. summary(shp.ew)
  61. # transform the shapefile to WGS84 projection
  62. shp.ew <- spTransform(shp.ew, CRS(proj.wgs))
  63. # keep in the data slot only the ONS Output Area id, renaming it as 'id'
  64. shp.ew <- shp.ew[, 'oa11cd']
  65. colnames(shp.ew@data) <- c('id')
  66. # reassign the polygon IDs
  67. shp.ew <- spChFIDs(shp.ew, as.character(shp.ew$id))
  68.  
  69. # Scotland
  70. shp.sc <- readOGR(boundaries.path, layer = 'SC')
  71. summary(shp.sc)
  72. shp.sc <- spTransform(shp.sc, CRS(proj.wgs))
  73. shp.sc <- shp.sc[, 'code']
  74. colnames(shp.sc@data) <- c('id')
  75. shp.sc <- spChFIDs(shp.sc, as.character(shp.sc$id))
  76.  
  77. # N.Ireland
  78. shp.ni <- readOGR(boundaries.path, layer = 'NI')
  79. summary(shp.ni)
  80. shp.ni <- spTransform(shp.ni, CRS(proj.wgs))
  81. shp.ni <- shp.ni[, 'SA2011']
  82. colnames(shp.ni@data) <- c('id')
  83. shp.ni <- spChFIDs(shp.ni, as.character(shp.ni$id))
  84.  
  85. # UK as a merge of all previous boundaries
  86. shp.uk <- spRbind(spRbind(shp.ew, shp.sc), shp.ni)
  87. # reduce the complexity of the boundaries
  88. # run the following line only on Linux with at least 32GB RAM, it should take a few hours
  89. # shp.uk <- ms_simplify(shp.uk, keep = 0.05)
  90. # save merged polygons as a unique shapefile
  91. try( do.call(file.remove, as.list(paste0(boundaries.path, '/', list.files(path = boundaries.path, pattern = 'UK') ) ) ), silent = FALSE )
  92. writeOGR(shp.uk, dsn = boundaries.path, layer = 'UK', driver = 'ESRI Shapefile')
  93.  
  94.  
  95. # # # STEP 2 - CREATE AGGREGATION MAPS USING PREVIOUS UK MAP AND A LOOKUP TABLE ------------------------------------------------------------
  96.  
  97. # read polygons from shapefile
  98. shp.uk <- readOGR(boundaries.path, layer = 'UK')
  99. # connect to database
  100. db_conn <- dbConnect(MySQL(), user = 'root', password = 'root', dbname = 'geography')
  101. # retrieve lookup table
  102. lookups <- dbReadTable(db_conn, 'lookups')
  103. # list of aggregations to process
  104. areas <- c('CCG', 'LAT', 'NHSR', 'CCR', 'CTRY')
  105. for(area in areas){
  106.     # join shapefile data slot and lookup table on the area code
  107.     shp.uk <- merge(shp.uk, lookups[, c('OA', area)], by.x = 'id', by.y = 'OA')
  108.     # Build the list of subareas
  109.     subareas <- sort(unique(shp.uk[[area]]))
  110.     # Define first area
  111.     subarea <- subareas[1]
  112.     # Print a processing message
  113.     print(paste('Processing', area, 'subarea', subarea, '- number 1 out of', length(subareas)))
  114.     # select all OA contained in first subarea
  115.     shp.area <- subset(shp.uk, shp.uk[[area]] == subarea)
  116.     # dissolve submap
  117.     shp.area <- ms_dissolve(shp.area)
  118.     # define object id
  119.     shp.area$id <- subarea
  120.     shp.area <- spChFIDs(shp.area, as.character(shp.area$id))
  121.     # proceed in the same way for all other subareas, attaching every results to previous object
  122.     for(idx in 2:length(subareas)){
  123.         subarea <- subareas[idx]
  124.         print(paste('Processing', area, 'subarea', subarea, '- number', idx, 'out of', length(subareas)))
  125.         shp.tmp <- subset(shp.uk, shp.uk[[area]] == subarea)
  126.         shp.tmp <- ms_dissolve(shp.tmp)
  127.         shp.tmp$id <- subarea
  128.         shp.tmp <- spChFIDs(shp.tmp, as.character(shp.tmp$id))
  129.         shp.area <- spRbind(shp.area, shp.tmp)
  130.     }
  131.     print(paste0('Simplifying ', area, 's'))
  132.     # reduce the details of the boundaries for less space (it's a statistical map, not an OS Explorer Map!)
  133.     shp.area <- ms_simplify(shp.area, keep = 0.05)
  134.     # Print a processing message
  135.     print(paste('Saving', area, 'in dataframe format'))
  136.     # save lookup (rmapshaperid, id)
  137.     lkp <- shp.area@data
  138.     #  adding the area type
  139.     lkp$type <- area
  140.     #  rename columns of lookup
  141.     colnames(lkp) <- c('boundary_id', 'id', 'type')
  142.     # save lookup to database
  143.     dbSendQuery(db_conn, paste0("DELETE FROM boundaries_ids WHERE type = '", area, "'") )
  144.     dbWriteTable(db_conn, 'boundaries_ids', lkp, row.names = FALSE, append = TRUE)
  145.     # create dataframe suitable for ggplot
  146.     df.area <- tidy(shp.area)
  147.     # change names in dataframe to coordinates to avoid possible mismatching with programming languages keywords
  148.     setnames(df.area, c('long', 'lat'), c('X_lon', 'Y_lat'))
  149.     # add "type" column
  150.     df.area$type <- area
  151.     # save dataframe to database
  152.     dbSendQuery(db_conn, paste0("DELETE FROM boundaries WHERE type = '", area, "'") )
  153.     dbWriteTable(db_conn, 'boundaries', df.area, row.names = FALSE, append = TRUE)
  154.     # Print a processing message
  155.     print(paste('Saving', area, 'in shapefile format'))
  156.     # delete the rmapshaperid from Polygons
  157.     shp.area <- shp.area[, 'id']
  158.     # save Polygons as shapefile
  159.     try( do.call(file.remove, as.list(paste0(boundaries.path, '/', list.files(path = boundaries.path, pattern = area) ) ) ), silent = TRUE )
  160.     writeOGR(shp.area, dsn = boundaries.path, layer = area, driver = 'ESRI Shapefile')
  161.     # some polishing...
  162.     shp.uk <- shp.uk[, 'id']
  163.     rm(df.area)
  164.     rm(shp.area)
  165.     gc()
  166. }
  167.  
  168.  
  169. # clean and exit
  170. dbDisconnect(db_conn)
  171. rm(list = ls())
  172. gc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement