Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ## Set working directory
- setwd(file.path("G:", "projects", "serve-interactive-maps-beta"))
- ## Import packages
- library(data.table)
- library(digest)
- library(dplyr)
- library(httr)
- library(jsonlite)
- library(magrittr)
- library(parallel)
- library(rmongodb)
- library(stringr)
- ## Set R options for convenience
- options(digits = 10)
- options(width = 512)
- options(warn = 0)
- Sys.setenv(R_LIBS_USER = "C:/R/library")
- .libPaths("C:/R/library")
- source("crypt.R")
- load("myrkey.RData")
- credentials <- read.aes(filename = "credentials.txt", key = key)
- ## Declare external functions to be used
- Comment <- function(`@Comments`) { invisible() }
- if (FALSE) {
- set_config(
- use_proxy(url = credentials$url,
- port = credentials$port,
- username = credentials$username,
- password = credentials$password)
- )
- }
- if (FALSE) {
- ## Install `rmongodb` if necessary
- library(devtools)
- install_github(repo = "mongosoup/rmongodb")
- }
- ## Connect to localhost
- mymongo <- mongo.create(db="science")
- ## Check for working connection
- if (mongo.is.connected(mymongo) == TRUE) {
- ## Get the list of input filenames
- files <- list.files(path = "json",
- pattern = "json_houjinkai_*",
- full.names = TRUE)
- ## Insert data into mongodb
- t1 <- Sys.time()
- for (n in 1:length(files)) {
- ## Read from text file
- doc <- jsonlite::fromJSON(txt = files[n])
- ## PARALLEL-BEGIN
- cl <- makeCluster(4)
- clusterExport(cl=cl, varlist=c("credentials"), envir=environment())
- # md5(producerid)
- doc$md5_pid <- parSapply(cl, doc$producerid, function(x) {
- library(digest)
- md5_pid <- digest::digest(object = x,
- algo = "md5",
- seed = credentials$myseed,
- errormode = "warn")
- return (md5_pid)
- })
- # md5(companyid)
- doc$md5_cid <- parSapply(cl, doc$companyid, function(x) {
- library(digest)
- md5_cid <- digest::digest(object = x,
- algo = "md5",
- seed = 123,
- errormode = "warn")
- return (md5_cid)
- })
- # md5(company)
- doc$md5_cmp <- parSapply(cl, doc$company, function(x) {
- library(digest)
- md5_cmp <- digest::digest(object = x,
- algo = "md5",
- seed = 123,
- errormode = "warn")
- return (md5_cmp)
- })
- if("houjinkai" %in% names(doc)) {
- # md5(houjinkai)
- doc$md5_hou <- parSapply(cl, doc$houjinkai, function(x) {
- library(digest)
- md5_hou <- digest::digest(object = x,
- algo = "md5",
- seed = 123,
- errormode = "warn")
- return (md5_hou)
- })
- doc$md5_hou[ is.na(doc$houjinkai) ] <- NA
- doc <- doc[, c("md5_pid",
- "md5_cid",
- "md5_cmp",
- "md5_hou",
- "address",
- "lat",
- "long",
- "depth")]
- }
- if("nouzeikyoukai" %in% names(doc)) {
- # md5(nouzeikyoukai)
- doc$md5_nou <- parSapply(cl, doc$nouzeikyoukai, function(x) {
- library(digest)
- md5_nou <- digest::digest(object = x,
- algo = "md5",
- seed = 123,
- errormode = "warn")
- return (md5_nou)
- })
- doc$md5_nou[ is.na(doc$nouzeikyoukai) ] <- NA
- doc <- doc[, c("md5_pid",
- "md5_cid",
- "md5_cmp",
- "md5_nou",
- "address",
- "lat",
- "long",
- "depth")]
- }
- ## Convert original dataframe to a list of lists,
- ## where each row is a list
- lst_doc <- parLapply(cl, split(doc, seq_along(doc[,1])), function(x) {
- x <- as.list(x)
- x[!is.na(x)]
- })
- ## Convert the list-of-lists to an BSON object
- doc <- parLapply(cl, lst_doc, function(x) mongo.bson.from.list(x))
- stopCluster(cl)
- ## PARALLEL-END
- ## Insert the BSON object into mongodb
- mongo.insert.batch(mongo = mymongo,
- ns = "science.houjinkai",
- lst = doc)
- ## Print the processed filename
- print(files[n])
- flush.console()
- }
- t2 <- Sys.time()
- td <- as.numeric(as.difftime(t2-t1), units = "secs")
- msg <- paste0("[insert data into mongodb] took ", format(td), " secs")
- print(msg)
- flush.console()
- rm(n, doc, lst_doc, files, t1, t2, td, msg)
- ##
- if (FALSE) {
- if(mongo.is.connected(mymongo) == TRUE){
- # mongo.drop(mymongo, "science.houjinkai")
- # nr <- mongo.count(mymongo, ns="science.houjinkai")
- # print(nr)
- t1 <- Sys.time()
- res <- mongo.find.all(mymongo, "science.houjinkai") %>%
- lapply(., function(x) { as.data.table(x) }) %>%
- dplyr::bind_rows(.)
- Encoding(res$companyid) <- "UTF-8"
- Encoding(res$company) <- "UTF-8"
- Encoding(res$address) <- "UTF-8"
- Encoding(res$houjinkai) <- "UTF-8"
- Encoding(res$nouzeikyoukai) <- "UTF-8"
- t2 <- Sys.time()
- td <- as.numeric(as.difftime(t2-t1), units = "secs")
- msg <- paste0("[get data from mongodb] took ", format(td), " secs")
- print(msg)
- flush.console()
- }
- }
- ## Disconnect from mongoDB
- mongo.destroy(mymongo)
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement