Advertisement
Guest User

Untitled

a guest
Feb 3rd, 2017
172
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.50 KB | None | 0 0
  1. #THIS CODE WAS WRITTEN IN RSTUDIO#
  2.  
  3. install.packages("RMySQL")
  4. library(RMySQL)
  5.  
  6. mydb = dbConnect(MySQL(), user='abacus117', password='aJ4K@!lg3N2', dbname='smalldb', host='mysql4.gear.host')
  7. #dbDisconnect(mydb)
  8. dbListTables(mydb)
  9.  
  10. A <- fetch(dbSendQuery(mydb, 'SELECT x.*
  11. FROM
  12. (SELECT b.store_name,
  13. CAST(b.store_id AS CHAR) store_id,
  14. count(DISTINCT CAST(a.order_id AS CHAR)) order_count
  15. FROM all_orders a
  16. JOIN user_stores b
  17. ON a.store_id = b.store_id
  18. JOIN user_info c
  19. ON b.user_id=c.user_id
  20. WHERE YEAR(a.processed_at) < 2014
  21. AND a.canceled_at IS NULL
  22. AND c.account_paused IS NULL
  23. GROUP BY b.store_name,
  24. b.store_id) x
  25. ORDER BY x.order_count DESC
  26. LIMIT 10;'))
  27.  
  28. print(A)
  29. write.table(A, "C:/Users/Daniel/Documents/MailChimp - 1A - Output.csv", sep=",", row.names=F)
  30.  
  31.  
  32. B <- fetch(dbSendQuery(mydb,
  33. "SELECT b.store_name,
  34. CAST(b.store_id AS CHAR) store_id,
  35. c.country,
  36. SUM(SUBSTRING(a.order_total, 2) + 0) revenue
  37. FROM smalldb.all_orders a
  38. JOIN smalldb.user_stores b
  39. ON a.store_id = b.store_id
  40. JOIN smalldb.user_info c
  41. ON b.user_id=c.user_id
  42. WHERE c.country in ('US','CA')
  43. AND a.canceled_at IS NULL
  44. GROUP BY b.store_name,
  45. b.store_id,
  46. c.country
  47. ORDER BY b.store_name,
  48. b.store_id,
  49. c.country;"))
  50.  
  51. print(B)
  52.  
  53. US <- B[ which(B$country=='US'), ]
  54. nrow(US)
  55. print(US)
  56.  
  57. CA <- B[ which(B$country=='CA'), ]
  58. nrow(CA)
  59. print(CA)
  60.  
  61. outlier_funk <- function(dt, var) {
  62. var_name <- eval(substitute(var),eval(dt))
  63. na1 <- sum(is.na(var_name))
  64. m1 <- mean(var_name, na.rm = T)
  65. par(mfrow=c(2, 2), oma=c(0,0,3,0))
  66. boxplot(var_name, main="With outliers")
  67. hist(var_name, main="With outliers", xlab=NA, ylab=NA)
  68. outlier <- boxplot.stats(var_name)$out
  69. mo <- mean(outlier)
  70. var_name <- ifelse(var_name %in% outlier, NA, var_name)
  71. boxplot(var_name, main="Without outliers")
  72. hist(var_name, main="Without outliers", xlab=NA, ylab=NA)
  73. title("Outlier Check", outer=TRUE)
  74. na2 <- sum(is.na(var_name))
  75. cat("Outliers identified:", na2 - na1, "n")
  76. cat("Propotion (%) of outliers:", round((na2 - na1) / sum(!is.na(var_name))*100, 1), "n")
  77. cat("Mean of the outliers:", round(mo, 2), "n")
  78. m2 <- mean(var_name, na.rm = T)
  79. cat("Mean without removing outliers:", round(m1, 2), "n")
  80. cat("Mean if we remove outliers:", round(m2, 2), "n")
  81. response <- readline(prompt="Do you want to remove outliers and to replace with NA? [yes/no]: ")
  82. if(response == "y" | response == "yes"){
  83. dt[as.character(substitute(var))] <- invisible(var_name)
  84. assign(as.character(as.list(match.call())$dt), dt, envir = .GlobalEnv)
  85. cat("Outliers successfully removed", "n")
  86. return(invisible(dt))
  87. } else{
  88. cat("Nothing changed", "n")
  89. return(invisible(var_name))
  90. }
  91. }
  92.  
  93. outlier_funk(US, revenue)
  94. outlier_funk(CA,revenue)
  95.  
  96. capture.output(print(summary(US$revenue)), file="C:/Users/Daniel/Documents/MailChimp - 2US - Output.csv")
  97. hist(US$revenue, main="US Revenues")
  98.  
  99. capture.output(print(summary(CA$revenue)), file="C:/Users/Daniel/Documents/MailChimp - 2CA - Output.csv")
  100. hist(CA$revenue, main="CA Revenues")
  101.  
  102.  
  103. Users <- fetch(dbSendQuery(mydb,
  104. "SELECT
  105. c.country,
  106. COUNT(DISTINCT CAST(a.order_id AS CHAR)) orders,
  107. COUNT(DISTINCT CAST(c.user_id AS CHAR)) users,
  108. SUM(SUBSTRING(a.order_total, 2) + 0) revenue
  109. FROM smalldb.all_orders a
  110. JOIN smalldb.user_stores b
  111. ON a.store_id = b.store_id
  112. JOIN smalldb.user_info c
  113. ON b.user_id=c.user_id
  114. WHERE c.country in ('US','CA')
  115. AND a.canceled_at IS NULL
  116. AND c.account_paused IS NULL
  117. GROUP BY
  118. c.country;"))
  119. print(Users)
  120.  
  121. C <- fetch(dbSendQuery(mydb,
  122. "SELECT x.*,
  123. (x.orders/x.stores) order_ratio,
  124. (x.revenue/x.stores) rev_store_ratio,
  125. (x.revenue/x.orders) rev_order_ratio
  126. FROM (
  127. SELECT
  128. c.country,
  129. COUNT(DISTINCT CAST(b.store_id AS CHAR)) stores,
  130. COUNT(DISTINCT CAST(a.order_id AS CHAR)) orders,
  131. SUM(SUBSTRING(a.order_total, 2) + 0) revenue
  132. FROM smalldb.all_orders a
  133. JOIN smalldb.user_stores b
  134. ON a.store_id = b.store_id
  135. JOIN smalldb.user_info c
  136. ON b.user_id=c.user_id
  137. WHERE c.country in ('US','CA')
  138. AND a.canceled_at IS NULL
  139. AND c.account_paused IS NULL
  140. GROUP BY
  141. c.country) x;"))
  142.  
  143. print(C)
  144.  
  145. US2 <- C[ which(C$country=='US'), ]
  146. nrow(US2)
  147. print(US2)
  148.  
  149. CA2 <- C[ which(C$country=='CA'), ]
  150. nrow(CA2)
  151. print(CA2)
  152.  
  153. D <- fetch(dbSendQuery(mydb,
  154. "SELECT b.store_name,
  155. CAST(b.store_id AS CHAR) store_id,
  156. c.country,
  157. COUNT(DISTINCT CAST(c.user_id AS CHAR)) users,
  158. COUNT(DISTINCT CAST(a.order_id AS CHAR)) orders,
  159. SUM(SUBSTRING(a.order_total, 2) + 0) revenue
  160. FROM smalldb.all_orders a
  161. JOIN smalldb.user_stores b
  162. ON a.store_id = b.store_id
  163. JOIN smalldb.user_info c
  164. ON b.user_id=c.user_id
  165. WHERE c.country in ('US','CA')
  166. AND a.canceled_at IS NULL
  167. AND c.account_paused IS NULL
  168. GROUP BY b.store_name,
  169. b.store_id,
  170. c.country
  171. ORDER BY b.store_name,
  172. b.store_id,
  173. c.country;"))
  174.  
  175. print(D)
  176.  
  177. US3 <- D[ which(D$country=='US'), ]
  178. nrow(US3)
  179. print(US3)
  180.  
  181. CA3 <- D[ which(D$country=='CA'), ]
  182. nrow(CA3)
  183. print(CA3)
  184.  
  185. summary(US3$revenue/US3$orders)
  186. hist(US3$revenue/US3$orders, main="US Revenue per Order")
  187.  
  188. summary(CA3$revenue/CA3$orders)
  189. hist(CA3$revenue/CA3$orders, main="CA Revenue per Order")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement