Advertisement
Guest User

Untitled

a guest
Aug 18th, 2016
116
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.71 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # START OF CONFIGURATION
  4.  
  5. # dbname is the name of the Canvas Data database
  6. dbname=canvas_data
  7.  
  8. # basedir is the directory containing the data files.
  9. # These may be from the CLI tool or files you've manually downloaded
  10. # The names of the files do not depend on the structure of the CLI tool
  11. # It may be a relative path to the current directory or an absolute path
  12. basedir='/home/muhe/dataFiles'
  13.  
  14. # checksequence queries the database before importing data to see if
  15. # this sequence has already been imported. It mostly applies to
  16. # incremental files like the requests table. It allows you to leave your
  17. # files on the disk without having to extract them or truncate your tables
  18. # It can also be used to pick back up where you left off, although if the
  19. # sequence bombs in the middle, you may need to truncate the offended table
  20. # Note that this relies on a versions table that was created using the
  21. # accompanying SQL script
  22. checksequence=1
  23.  
  24. # incrementaltables is a comma separated list of the tables that are partial
  25. # and should not have their tables truncated before importing
  26. # If you are using checksequence=1, then it will try to figure this out from
  27. # the database
  28. incrementaltables=requests
  29.  
  30. # leaveasgzip temporarily extracts the compressed gzip file for importing
  31. # but then removes the uncompressed version after the import
  32. # This is an attempt to save file space
  33. leaveasgzip=1
  34.  
  35. # sortdata will pipe the data through the sort -u command
  36. # This can have a huge impact on the import process
  37. sortdata=1
  38.  
  39. # MYSQL is the mysql command that is needed to execute mysql
  40. # You can put items like username and password here, but it is recommended
  41. # that you configure the ~/.my.cnf file instead
  42. MYSQL='mysql --local-infile --user=yourusername --password=yourpassword'
  43.  
  44. # verbosity controls the logging of messages
  45. # 0 = no logging of messages
  46. # 1 = minimal logging of 1 per file
  47. # 2 = log importing of data into database
  48. # 3 = more verbose logging including decompressing file and truncating tables
  49. # 4 = log little things that probably don't need logged
  50. verbosity=2
  51.  
  52. # END OF CONFIGURATION
  53.  
  54.  
  55. # Create a temporary file to hold all of the files to be considered
  56. # Store only the directory and basenames to allow for both the compressed
  57. # and uncompressed versions to exist
  58. # This file will be created in your systems $TMPDIR folder,
  59. # which is often /tmp or /var/tmp
  60. # The file will be removed if the process successfully completes, but will
  61. # be orphaned if it aborts
  62. tmpfile="$(mktemp)"
  63.  
  64. # Exclude requests table ( ! -name "*requests*" )
  65. find "${basedir}" -type f -regextype posix-egrep -regex '.*/([0-9]+_)?[a-z_]+-[0-9]{5}-[0-9a-f]{8}(\.gz)?$' ! -name "*requests*" -printf '%h/' -exec basename {} .gz \; | sort -u > "${tmpfile}"
  66.  
  67.  
  68. if [ ${checksequence} -eq 1 ]
  69. then
  70. # Try to fetch the list of incremental tables from the database
  71. tables=$( ${MYSQL} ${dbname} -sse "SELECT CONCAT_WS(',', table_name) FROM versions WHERE incremental=1" )
  72. if [ "${tables}" != "" ]
  73. then
  74. incrementaltables="${tables}"
  75. fi
  76. fi
  77.  
  78. # Initialize some variables
  79. oldtable=""
  80. oldseq=""
  81. hasprocessed=0
  82.  
  83. # Iterate through the files
  84. for pathname in $(cat "${tmpfile}")
  85. do
  86.  
  87. if [ ${verbosity} -gt 0 ]
  88. then
  89. echo "Processing ${pathname}"
  90. fi
  91.  
  92. # Split the filename into parts
  93. dirname=$(dirname "${pathname}")
  94. filename=$(basename "${pathname}")
  95. firstpart=$(echo "${filename}" | cut -f1 -d-)
  96. tablepart=$(echo "${firstpart}" | sed -r "s/^[0-9]+_//")
  97. seqpart=$(echo "${firstpart}" | grep -Eo "^[0-9]+")
  98. numidpart=$(echo "${filename}" | cut -f2- -d-)
  99.  
  100. # Check to see if the previous table has been processed, if so then update the database
  101. if [ ${hasprocessed} -eq 1 -a ${checksequence} -eq 1 -a "${oldtable}" != "${tablepart}" -a "$oldseq" != "" ]
  102. then
  103.  
  104. if [ ${verbosity} -ge 4 ]
  105. then
  106. echo "Updating sequence number for ${oldtable} to ${oldseq}"
  107. fi
  108.  
  109. $( ${MYSQL} ${dbname} -sqe "UPDATE versions SET version = ${oldseq} WHERE table_name = '${oldtable}'" )
  110. hasprocessed=0
  111. fi
  112.  
  113. process=1
  114.  
  115. # Get the last version imported
  116. if [ ${checksequence} -eq 1 -a "$seqpart" != "" ]
  117. then
  118.  
  119. if [ ${verbosity} -ge 4 ]
  120. then
  121. echo "Checking for previously saved version of ${tablepart}"
  122. fi
  123.  
  124. extseq=$( ${MYSQL} ${dbname} -sse "SELECT IFNULL(version,0) FROM versions WHERE table_name='${tablepart}'" )
  125. if [ "${extseq}" == "" -o ${extseq} -ge $seqpart ]
  126. then
  127. process=0
  128. fi
  129. fi
  130.  
  131. # Process this file
  132. if [ ${process} -eq 1 ]
  133. then
  134. removefile=0
  135. datafile="${dirname}/${filename}"
  136. if [ ! -f "${datafile}" ]
  137. then
  138. # There is no already-extracted file
  139. if [ -f "${datafile}.gz" ]
  140. then
  141. # There is a gzipped version
  142.  
  143. if [ ${verbosity} -ge 3 ]
  144. then
  145. echo "Uncompressing ${filename}"
  146. fi
  147.  
  148. if [ ${leaveasgzip} -eq 1 ]
  149. then
  150. # Extract it, but plan on removing it later
  151. gzip -dc "${datafile}.gz" > "${datafile}"
  152. if [ ${sortdata} -eq 1 ]
  153. then
  154. sort -u -o "${datafile}" "${datafile}"
  155. fi
  156. removefile=1
  157. else
  158. # Extract it and leave it extracted
  159. gzip -d "${datafile}.gz"
  160. if [ ${sortdata} -eq 1 ]
  161. then
  162. sort -u -o "${datafile}" "${datafile}"
  163. fi
  164. fi
  165. fi
  166. fi
  167.  
  168. # Check to see if this is an incremental table
  169. partial=0
  170. if [ "x${incrementaltables/$tablepart}" != "x${incrementaltables}" ]
  171. then
  172. partial=1
  173. fi
  174.  
  175. # If it is incremental or the previous file used the same table,
  176. # then don't truncate it first, but do an append instead
  177. if [ ${partial} -eq 0 -a "${oldtable}" != "${tablepart}" ]
  178. then
  179.  
  180. if [ ${verbosity} -ge 3 ]
  181. then
  182. echo "Truncating ${tablepart}"
  183. fi
  184.  
  185. ${MYSQL} ${dbname} -sqe "TRUNCATE ${tablepart}"
  186.  
  187. fi
  188. # Load the data into the table
  189.  
  190. if [ ${verbosity} -ge 2 ]
  191. then
  192. echo "Loading ${filename} into ${tablepart}"
  193. fi
  194.  
  195. ${MYSQL} ${dbname} -sqe "LOAD DATA LOCAL INFILE '${datafile}' INTO TABLE ${tablepart}"
  196.  
  197. hasprocessed=1
  198. # Remove the uncompressed version if needed
  199. if [ ${removefile} -eq 1 ]
  200. then
  201.  
  202. if [ ${verbosity} -ge 4 ]
  203. then
  204. echo "Removing uncompressed version of ${datafile}"
  205. fi
  206.  
  207. rm "${datafile}"
  208. fi
  209.  
  210. fi
  211.  
  212. # Update the previous table and sequence data
  213. oldtable=${tablepart}
  214. oldseq=${seqpart}
  215.  
  216. done
  217.  
  218. # Update the sequence on the final file if necessary
  219. if [ ${hasprocessed} -eq 1 -a ${checksequence} -eq 1 -a "${oldtable}" != "" -a "$oldseq" != "" ]
  220. then
  221.  
  222. if [ ${verbosity} -ge 4 ]
  223. then
  224. echo "Updating sequence number for ${oldtable} to ${oldseq}"
  225. fi
  226.  
  227. $( ${MYSQL} ${dbname} -sqe "UPDATE versions SET version = ${oldseq} WHERE table_name = '${oldtable}'" )
  228. fi
  229.  
  230. # Remove the list of files
  231. rm "${tmpfile}"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement