Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #######################################
- # SHINY Explorer - build UK boundaries
- #######################################
- # The following is the R code to create arbitrary aggregation boundaries, starting from some base shapefile layer.
- # For the UK the reference is the union of:
- # - Output Areas in England and Wales (EW). Generalised Clipped: http://geoportal.statistics.gov.uk/datasets/09b8a48426e3482ebbc0b0c49985c0fb_2
- # - Output Areas in Scotland (SC) https://www.nrscotland.gov.uk/statistics-and-data/geography/our-products/census-datasets/2011-census/2011-boundaries
- # - Small Areas in Northern Ireland (NI) http://www.nisra.gov.uk/geography/SmallAreas.htm
- # Once downloaded, rename each group of files with the corresponding two characters acronym.
- # It's necessary to keep only the files with the four following extensions: shp, shx, prj, dbf.
- # One small problem to be aware of is the different original projection of the boundaries:
- # EW: '+init=epsg:27700' # http://spatialreference.org/ref/epsg/osgb-1936-british-national-grid/
- # SC: '+init=epsg:27700' # http://spatialreference.org/ref/epsg/osgb-1936-british-national-grid/
- # NI: '+init=epsg:29902' # http://spatialreference.org/ref/epsg/29902/
- # but all of the above will be transformed in the standard WGS84 for web mapping.
- # ONS Reference Country codes
- # - E92000001 England;
- # - W92000004 Wales;
- # - S92000003 Scotland;
- # - N92000002 Northern Ireland;
- # The following are the number of features in each set of boundaries:
- # - EW: 181,408 (E 171,372 + W 10,036)
- # - SC: 46,351
- # - NI: 4,537
- # for a total UK of 232,296. This is important to know because when simplifying the polygons overall, some of the features could dissolve.
- # To successfully aggregate, the script needs also a lookup table that gives back a unique aggregated area code for every output/small area (from now on just OA).
- # This table could be built for many types of aggregations using, for example, the Postcodes Directories:
- # - NHS Postcode Directory: http://ons.maps.arcgis.com/home/item.html?id=4e63d9b589604c0ba10fbbd3bef08e6f
- # - ONS Postcode Directory: http://ons.maps.arcgis.com/home/item.html?id=dfa0ff74981b4b228d2030d852f0b14a
- # These are maintained and published every quarter by the ONS, that relates both current and terminated postcodes in the UK
- # to a range of current and past statutory administrative, electoral, health and other geographies. The downside to this approach is that some OA could belong to different aggregations, and it becomes somewhat arbitrary, without more knowledge, decide which is the right destination area.
- # When saving to the database, there is an added column 'type' that is part of the foreign key ('id', 'type') connecting to the primary key ('boundary_id', 'type') in the table *boundaries_ids*, used to retrieve the correct id of the feature to link to external data.
- # I saved all files in s:\ICS\Projects\Data Management\R_Shiny\Resources\boundaries_aggregations\
- # It is finally important to understand that when reproducing ONS content, the output must include a source accreditation to ONS.
- # LOAD PACKAGES ---------------------------------------------------------------------------------------------------
- pkg <- c('maptools', 'rgeos', 'rgdal', 'rmapshaper', 'RMySQL')
- invisible( lapply(pkg, require, character.only = TRUE) )
- # SET VARIABLES ---------------------------------------------------------------------------------------------------
- boundaries.path <- 'C:/cloud/OneDrive - University College London/data/boundaries/shp'
- proj.wgs <- '+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0'
- # LOAD AND MERGE EW, SC AND NI OAs MAP INTO A UNIQUE UK MAP -------------------------------------------------------
- # England and Wales
- # read polygons from shapefile (using rgdal, projection is automatically applied if the prj file is found)
- shp.ew <- readOGR(boundaries.path, layer = 'EW')
- # check the projection, and read the field to keep as future id
- summary(shp.ew)
- # transform the shapefile to WGS84 projection
- shp.ew <- spTransform(shp.ew, CRS(proj.wgs))
- # keep in the data slot only the ONS Output Area id, renaming it as 'id'
- shp.ew <- shp.ew[, 'oa11cd']
- colnames(shp.ew@data) <- c('id')
- # reassign the polygon IDs
- shp.ew <- spChFIDs(shp.ew, as.character(shp.ew$id))
- # Scotland
- shp.sc <- readOGR(boundaries.path, layer = 'SC')
- summary(shp.sc)
- shp.sc <- spTransform(shp.sc, CRS(proj.wgs))
- shp.sc <- shp.sc[, 'code']
- colnames(shp.sc@data) <- c('id')
- shp.sc <- spChFIDs(shp.sc, as.character(shp.sc$id))
- # N.Ireland
- shp.ni <- readOGR(boundaries.path, layer = 'NI')
- summary(shp.ni)
- shp.ni <- spTransform(shp.ni, CRS(proj.wgs))
- shp.ni <- shp.ni[, 'SA2011']
- colnames(shp.ni@data) <- c('id')
- shp.ni <- spChFIDs(shp.ni, as.character(shp.ni$id))
- # UK as a merge of all previous boundaries
- shp.uk <- spRbind(spRbind(shp.ew, shp.sc), shp.ni)
- # reduce the complexity of the boundaries
- # run the following line only on Linux with at least 32GB RAM, it should take a few hours
- # shp.uk <- ms_simplify(shp.uk, keep = 0.05)
- # save merged polygons as a unique shapefile
- try( do.call(file.remove, as.list(paste0(boundaries.path, '/', list.files(path = boundaries.path, pattern = 'UK') ) ) ), silent = FALSE )
- writeOGR(shp.uk, dsn = boundaries.path, layer = 'UK', driver = 'ESRI Shapefile')
- # # # STEP 2 - CREATE AGGREGATION MAPS USING PREVIOUS UK MAP AND A LOOKUP TABLE ------------------------------------------------------------
- # read polygons from shapefile
- shp.uk <- readOGR(boundaries.path, layer = 'UK')
- # connect to database
- db_conn <- dbConnect(MySQL(), user = 'root', password = 'root', dbname = 'geography')
- # retrieve lookup table
- lookups <- dbReadTable(db_conn, 'lookups')
- # list of aggregations to process
- areas <- c('CCG', 'LAT', 'NHSR', 'CCR', 'CTRY')
- for(area in areas){
- # join shapefile data slot and lookup table on the area code
- shp.uk <- merge(shp.uk, lookups[, c('OA', area)], by.x = 'id', by.y = 'OA')
- # Build the list of subareas
- subareas <- sort(unique(shp.uk[[area]]))
- # Define first area
- subarea <- subareas[1]
- # Print a processing message
- print(paste('Processing', area, 'subarea', subarea, '- number 1 out of', length(subareas)))
- # select all OA contained in first subarea
- shp.area <- subset(shp.uk, shp.uk[[area]] == subarea)
- # dissolve submap
- shp.area <- ms_dissolve(shp.area)
- # define object id
- shp.area$id <- subarea
- shp.area <- spChFIDs(shp.area, as.character(shp.area$id))
- # proceed in the same way for all other subareas, attaching every results to previous object
- for(idx in 2:length(subareas)){
- subarea <- subareas[idx]
- print(paste('Processing', area, 'subarea', subarea, '- number', idx, 'out of', length(subareas)))
- shp.tmp <- subset(shp.uk, shp.uk[[area]] == subarea)
- shp.tmp <- ms_dissolve(shp.tmp)
- shp.tmp$id <- subarea
- shp.tmp <- spChFIDs(shp.tmp, as.character(shp.tmp$id))
- shp.area <- spRbind(shp.area, shp.tmp)
- }
- print(paste0('Simplifying ', area, 's'))
- # reduce the details of the boundaries for less space (it's a statistical map, not an OS Explorer Map!)
- shp.area <- ms_simplify(shp.area, keep = 0.05)
- # Print a processing message
- print(paste('Saving', area, 'in dataframe format'))
- # save lookup (rmapshaperid, id)
- lkp <- shp.area@data
- # adding the area type
- lkp$type <- area
- # rename columns of lookup
- colnames(lkp) <- c('boundary_id', 'id', 'type')
- # save lookup to database
- dbSendQuery(db_conn, paste0("DELETE FROM boundaries_ids WHERE type = '", area, "'") )
- dbWriteTable(db_conn, 'boundaries_ids', lkp, row.names = FALSE, append = TRUE)
- # create dataframe suitable for ggplot
- df.area <- tidy(shp.area)
- # change names in dataframe to coordinates to avoid possible mismatching with programming languages keywords
- setnames(df.area, c('long', 'lat'), c('X_lon', 'Y_lat'))
- # add "type" column
- df.area$type <- area
- # save dataframe to database
- dbSendQuery(db_conn, paste0("DELETE FROM boundaries WHERE type = '", area, "'") )
- dbWriteTable(db_conn, 'boundaries', df.area, row.names = FALSE, append = TRUE)
- # Print a processing message
- print(paste('Saving', area, 'in shapefile format'))
- # delete the rmapshaperid from Polygons
- shp.area <- shp.area[, 'id']
- # save Polygons as shapefile
- try( do.call(file.remove, as.list(paste0(boundaries.path, '/', list.files(path = boundaries.path, pattern = area) ) ) ), silent = TRUE )
- writeOGR(shp.area, dsn = boundaries.path, layer = area, driver = 'ESRI Shapefile')
- # some polishing...
- shp.uk <- shp.uk[, 'id']
- rm(df.area)
- rm(shp.area)
- gc()
- }
- # clean and exit
- dbDisconnect(db_conn)
- rm(list = ls())
- gc()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement