Advertisement
Guest User

Untitled

a guest
Dec 3rd, 2016
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.62 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. SOURCE_DB="myrulib.db"
  4.  
  5. TARGET_DB="metadata_my.db"
  6.  
  7. #an empty Calibre database, with books and series
  8. # create/update triggers removed
  9. EMPTY_DB="metadata_empty.db"
  10.  
  11. LIB_ROOT="./"
  12.  
  13. function esc_q {
  14. # double quotes escape themselves in SQL
  15. sed 's/"/""/g'
  16. }
  17.  
  18. function esc_sed {
  19. # sed special characters escape
  20. sed -e 's/\\/\\\\"/g' -e 's~[\.\*\^\$/]\|\[\|\]~\\&~g'
  21. }
  22.  
  23. function my_cut {
  24. local i="$1"
  25. i=$((i - 1))
  26. sed -n "s/\([^¶]*¶\)\{$i\}\([^¶]*\).*/\2/p"
  27. }
  28.  
  29. function get_date {
  30. local arg=""
  31. if [ -n "$1" ]; then
  32. arg="--date=$1"
  33. fi
  34. date -u $arg "+%Y-%m-%d %H:%M:%S.%N%z" | sed 's/[0-9]\{2\}+0000$/+0000/'
  35. }
  36.  
  37. function get_stamp {
  38. get_date $(stat "${LIB_ROOT}$1" -c%y)
  39. }
  40.  
  41. # turn comma-separated list of authors into
  42. # newline separated one. Also remove some trash
  43. # from authors names
  44. function sanitize_authors {
  45. sed \
  46. -e 's/, \?\([[:upper:]]\|d[ie][[:upper:]]\|v[ao]n \|d[ea] \|de[ls] \|te[nr]\?'`
  47. `' \|tom \|де \|ван \|ди \|фон \)/\n\1/g' | sed \
  48. -e 's/, \?[ae]t al$//' -e 's/, ред$//' \
  49. -e 's/\([[:upper:]]\)\.\?[, ]*$/\1./'
  50. }
  51.  
  52. #filter get ids from authors names (case-insensetive)
  53. function auth_ids {
  54. local auths=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
  55. (sed -n -e "/^[^¶]*¶\(${auths}\)\$/Ip" | my_cut 1) <<< "${AUTHORS_FULL_NAMES}"
  56. }
  57.  
  58. #same as auth_ids, but for series
  59. function series_ids {
  60. local series=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
  61. (sed -n -e "/^[^¶]*¶\(${series}\)\$/Ip" | my_cut 1) <<< "${SERIES}"
  62. }
  63.  
  64. #same as auth_ids, but for tags
  65. function tag_ids {
  66. local tags=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
  67. (sed -n -e "/^[^¶]*¶\(${tags}\)\$/Ip" | my_cut 1) <<< "${TAGS}"
  68. }
  69.  
  70. #get hierarchical tag from file path, e.g.,
  71. # "M_Mathematics/MC_Calculus/MCat_Advanced calculus/<file>.pdf" ->
  72. # "Mathematics.Calculus.Advanced calculus"
  73. function tag_from_filepath {
  74. sed -e 's|^[^/]*_||' -e 's|/[^/]*_|/|g' -e 's|/[^/]*$||' -e 's|/|.|g'
  75. }
  76.  
  77. #same as auth_ids, but for langs
  78. function lang_ids {
  79. local langs=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
  80. (sed -n -e "/^[^¶]*¶\(${langs}\)\$/Ip" | my_cut 1) <<< "${LANGS}"
  81. }
  82.  
  83. #feed stdin to sqlite, as well as log to "$1"
  84. function feed_sql_and_log {
  85. exec 3>>"$1"
  86. rm -f "$1"
  87. tee /dev/fd/3 | sqlite3 "${TARGET_DB}"
  88. }
  89.  
  90. #the most painful part: process book entries and collect/feed the respective
  91. # books, books_authors_link, books_series_link, books_tags_link,
  92. # books_languages_link, comments SQL INSERT commands
  93. function books_insert {
  94. # BOOKS:
  95. #<id>¶<title>¶<auths>¶<series>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<desc>
  96.  
  97. local book
  98. local book_count=$(echo "${BOOKS}" | wc -l)
  99. local n=$(echo -n "${book_count}" | wc -c)
  100. (while IFS='' read -r book || [ -n "$book" ]; do
  101. local book_id=$(echo "$book" | my_cut 1)
  102. (awk "{printf \"[%07.4f]%% Inserting book %${n}d out of ${book_count}"`
  103. `"\n\", (\$0*100)/${book_count}, \$0}" >&2) <<< "${book_id}"
  104. local title=$(echo "$book" | my_cut 2 | esc_q)
  105. local authors=$(echo "$book" | my_cut 3 | sanitize_authors)
  106. local author_sort=$(echo "$authors" | \
  107. sed ':a;N;$!ba;s/\([^\n]*\)\n/\1 \& /g;s/.*/\L&/' | esc_q)
  108. local series=$(echo "$book" | my_cut 4)
  109. local file_path=$(echo "$book" | my_cut 5)
  110. local file_type=$(echo "$book" | my_cut 6)
  111. local md5sum=$(echo "$book" | my_cut 7)
  112. local langs=$(echo "$book" | my_cut 8 | sed -e 's/,/\n/g')
  113. local year=$(echo "$book" | my_cut 9)
  114. local description=$(echo "$book" | my_cut 10 | esc_q | \
  115. sed 's/\\newline/\n/g')
  116. local timestamp=$(get_stamp "${file_path}")
  117. local ISBN=$(echo "${file_path}" | \
  118. sed -n 's|.*(ISBN \([0-9]*\))[^/]*$|\1|p')
  119. #book hierarchical classification, extracted from file path
  120. local tag=$(echo "${file_path}" | tag_from_filepath)
  121.  
  122. #books INSERT
  123. echo -e "INSERT INTO books (id,title,sort,timestamp,pubdate,"`
  124. `"author_sort,isbn,path,uuid,has_cover,"`
  125. `"last_modified)\n"`
  126. `"VALUES (${book_id},\"$title\",\"$title\",\"$timestamp\","`
  127. `"\"$year\",\"${author_sort}\",\"$ISBN\",\"${file_path}\""`
  128. `",\"$md5sum\",0,\"$timestamp\");"
  129.  
  130. #books_authors_link INSERT
  131. auth_ids "$authors" | sed -n -e 's/.\+/'`
  132. `"INSERT INTO books_authors_link (book,author)\n"`
  133. `"VALUES (${book_id},&);/p"
  134.  
  135. #books_series_link INSERT
  136. series_ids "$series" | sed -n -e 's/.\+/'`
  137. `"INSERT INTO books_series_link (book,series)\n"`
  138. `"VALUES (${book_id},&);/p"
  139.  
  140. #books_tags_link INSERT
  141. tag_ids "$tag" | sed -n -e 's/.\+/'`
  142. `"INSERT INTO books_tags_link (book,tag)\n"`
  143. `"VALUES (${book_id},&);/p"
  144.  
  145. #books_languages_link INSERT
  146. lang_ids "$langs" | sed -n -e 's/.\+/'`
  147. `"INSERT INTO books_languages_link (book,lang_code,item_order)\n"`
  148. `"VALUES (${book_id},&,0);/p"
  149.  
  150. #comments INSERT
  151. if [ -n "${description}" ]; then
  152. echo -e "INSERT INTO comments (book,text)\n"`
  153. `"VALUES (${book_id},\"${description}\");"
  154. fi
  155. done <<< "${BOOKS}") | feed_sql_and_log "$1"
  156. }
  157.  
  158. cp -f "${EMPTY_DB}" "${TARGET_DB}"
  159.  
  160. # AUTHORS_FULL_NAMES: '<id>¶<name>' lines
  161. AUTHORS_FULL_NAMES=$(sqlite3 "${SOURCE_DB}" 'SELECT full_name FROM authors;' | \
  162. sanitize_authors | sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
  163.  
  164. # TAGS: '<id>¶<tag>' lines
  165. TAGS=$(sqlite3 "${SOURCE_DB}" 'SELECT file_name FROM books;' | tag_from_filepath | \
  166. sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
  167.  
  168. # LANGS: '<id>¶<lang>' lines
  169. LANGS=$(sqlite3 "${SOURCE_DB}" 'SELECT DISTINCT lang FROM books;' | \
  170. sed -e 's/,/\n/g' | sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
  171.  
  172. # BOOKS:
  173. #'<id>¶<title>¶<authors>¶<sequence>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<description>'
  174. BOOKS=$(sqlite3 -separator '<S_e^Pa_r^At_oR>' "${SOURCE_DB}" \
  175. 'SELECT printf("<S_e^Pa_r^At_oR>%s",books.title),authors.full_name,'`
  176. `'sequences.value,books.file_name,books.file_type,'`
  177. `'books.md5sum,books.lang,books.year,books.description '`
  178. `'FROM books,authors,sequences '`
  179. `'WHERE books.id_author=authors.id AND books.id_sequence=sequences.id;' | \
  180. sed -e 's/<S_e^Pa_r^At_oR>/¶/g' | sed ':a;N;$!ba;s/\n\([^¶]\)/\\newline\1/g' | \
  181. awk '{printf("%d%s\n", NR,$0)}')
  182.  
  183. # SERIES: '<id>¶<name>'
  184. SERIES=$(sqlite3 "${SOURCE_DB}" 'SELECT value FROM sequences;' | sed '/^\s*$/d' | \
  185. sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
  186.  
  187. #INSERT authors
  188. echo "${AUTHORS_FULL_NAMES}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
  189. `'INSERT INTO authors (id,name,sort)\n'`
  190. `'VALUES (\1,\"\2\",\"\L\2\");/' | \
  191. feed_sql_and_log authors_insert_command.txt
  192.  
  193. #INSERT tags
  194. echo "${TAGS}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
  195. `'INSERT INTO tags (id,name)\nVALUES (\1,\"\2\");/' | \
  196. feed_sql_and_log tags_insert_command.txt
  197.  
  198. #INSERT languages
  199. echo "${LANGS}" | sed 's/\([^¶]*\)¶\(.*\)/'`
  200. `'INSERT INTO languages (id,lang_code)\n'`
  201. `'VALUES (\1,\"\2\");/' | \
  202. feed_sql_and_log languages_insert_command.txt
  203.  
  204. #INSERT series
  205. echo "${SERIES}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
  206. `'INSERT INTO series (id,name,sort)\n'`
  207. `'VALUES (\1,\"\2\",\"\2\");/' | \
  208. feed_sql_and_log series_insert_command.txt
  209.  
  210. #INSERT books, books_authors_link, books_series_link,
  211. # books_tags_link, books_languages_link, comments
  212. books_insert books_insert_command.txt
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement