Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- SOURCE_DB="myrulib.db"
- TARGET_DB="metadata_my.db"
- #an empty Calibre database, with books and series
- # create/update triggers removed
- EMPTY_DB="metadata_empty.db"
- LIB_ROOT="./"
- function esc_q {
- # double quotes escape themselves in SQL
- sed 's/"/""/g'
- }
- function esc_sed {
- # sed special characters escape
- sed -e 's/\\/\\\\"/g' -e 's~[\.\*\^\$/]\|\[\|\]~\\&~g'
- }
- function my_cut {
- local i="$1"
- i=$((i - 1))
- sed -n "s/\([^¶]*¶\)\{$i\}\([^¶]*\).*/\2/p"
- }
- function get_date {
- local arg=""
- if [ -n "$1" ]; then
- arg="--date=$1"
- fi
- date -u $arg "+%Y-%m-%d %H:%M:%S.%N%z" | sed 's/[0-9]\{2\}+0000$/+0000/'
- }
- function get_stamp {
- get_date $(stat "${LIB_ROOT}$1" -c%y)
- }
- # turn comma-separated list of authors into
- # newline separated one. Also remove some trash
- # from authors names
- function sanitize_authors {
- sed \
- -e 's/, \?\([[:upper:]]\|d[ie][[:upper:]]\|v[ao]n \|d[ea] \|de[ls] \|te[nr]\?'`
- `' \|tom \|де \|ван \|ди \|фон \)/\n\1/g' | sed \
- -e 's/, \?[ae]t al$//' -e 's/, ред$//' \
- -e 's/\([[:upper:]]\)\.\?[, ]*$/\1./'
- }
- #filter get ids from authors names (case-insensetive)
- function auth_ids {
- local auths=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
- (sed -n -e "/^[^¶]*¶\(${auths}\)\$/Ip" | my_cut 1) <<< "${AUTHORS_FULL_NAMES}"
- }
- #same as auth_ids, but for series
- function series_ids {
- local series=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
- (sed -n -e "/^[^¶]*¶\(${series}\)\$/Ip" | my_cut 1) <<< "${SERIES}"
- }
- #same as auth_ids, but for tags
- function tag_ids {
- local tags=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
- (sed -n -e "/^[^¶]*¶\(${tags}\)\$/Ip" | my_cut 1) <<< "${TAGS}"
- }
- #get hierarchical tag from file path, e.g.,
- # "M_Mathematics/MC_Calculus/MCat_Advanced calculus/<file>.pdf" ->
- # "Mathematics.Calculus.Advanced calculus"
- function tag_from_filepath {
- sed -e 's|^[^/]*_||' -e 's|/[^/]*_|/|g' -e 's|/[^/]*$||' -e 's|/|.|g'
- }
- #same as auth_ids, but for langs
- function lang_ids {
- local langs=$(echo "$1" | esc_sed | sed ':a;N;$!ba;s/\n/\\|/g')
- (sed -n -e "/^[^¶]*¶\(${langs}\)\$/Ip" | my_cut 1) <<< "${LANGS}"
- }
- #feed stdin to sqlite, as well as log to "$1"
- function feed_sql_and_log {
- exec 3>>"$1"
- rm -f "$1"
- tee /dev/fd/3 | sqlite3 "${TARGET_DB}"
- }
- #the most painful part: process book entries and collect/feed the respective
- # books, books_authors_link, books_series_link, books_tags_link,
- # books_languages_link, comments SQL INSERT commands
- function books_insert {
- # BOOKS:
- #<id>¶<title>¶<auths>¶<series>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<desc>
- local book
- local book_count=$(echo "${BOOKS}" | wc -l)
- local n=$(echo -n "${book_count}" | wc -c)
- (while IFS='' read -r book || [ -n "$book" ]; do
- local book_id=$(echo "$book" | my_cut 1)
- (awk "{printf \"[%07.4f]%% Inserting book %${n}d out of ${book_count}"`
- `"\n\", (\$0*100)/${book_count}, \$0}" >&2) <<< "${book_id}"
- local title=$(echo "$book" | my_cut 2 | esc_q)
- local authors=$(echo "$book" | my_cut 3 | sanitize_authors)
- local author_sort=$(echo "$authors" | \
- sed ':a;N;$!ba;s/\([^\n]*\)\n/\1 \& /g;s/.*/\L&/' | esc_q)
- local series=$(echo "$book" | my_cut 4)
- local file_path=$(echo "$book" | my_cut 5)
- local file_type=$(echo "$book" | my_cut 6)
- local md5sum=$(echo "$book" | my_cut 7)
- local langs=$(echo "$book" | my_cut 8 | sed -e 's/,/\n/g')
- local year=$(echo "$book" | my_cut 9)
- local description=$(echo "$book" | my_cut 10 | esc_q | \
- sed 's/\\newline/\n/g')
- local timestamp=$(get_stamp "${file_path}")
- local ISBN=$(echo "${file_path}" | \
- sed -n 's|.*(ISBN \([0-9]*\))[^/]*$|\1|p')
- #book hierarchical classification, extracted from file path
- local tag=$(echo "${file_path}" | tag_from_filepath)
- #books INSERT
- echo -e "INSERT INTO books (id,title,sort,timestamp,pubdate,"`
- `"author_sort,isbn,path,uuid,has_cover,"`
- `"last_modified)\n"`
- `"VALUES (${book_id},\"$title\",\"$title\",\"$timestamp\","`
- `"\"$year\",\"${author_sort}\",\"$ISBN\",\"${file_path}\""`
- `",\"$md5sum\",0,\"$timestamp\");"
- #books_authors_link INSERT
- auth_ids "$authors" | sed -n -e 's/.\+/'`
- `"INSERT INTO books_authors_link (book,author)\n"`
- `"VALUES (${book_id},&);/p"
- #books_series_link INSERT
- series_ids "$series" | sed -n -e 's/.\+/'`
- `"INSERT INTO books_series_link (book,series)\n"`
- `"VALUES (${book_id},&);/p"
- #books_tags_link INSERT
- tag_ids "$tag" | sed -n -e 's/.\+/'`
- `"INSERT INTO books_tags_link (book,tag)\n"`
- `"VALUES (${book_id},&);/p"
- #books_languages_link INSERT
- lang_ids "$langs" | sed -n -e 's/.\+/'`
- `"INSERT INTO books_languages_link (book,lang_code,item_order)\n"`
- `"VALUES (${book_id},&,0);/p"
- #comments INSERT
- if [ -n "${description}" ]; then
- echo -e "INSERT INTO comments (book,text)\n"`
- `"VALUES (${book_id},\"${description}\");"
- fi
- done <<< "${BOOKS}") | feed_sql_and_log "$1"
- }
- cp -f "${EMPTY_DB}" "${TARGET_DB}"
- # AUTHORS_FULL_NAMES: '<id>¶<name>' lines
- AUTHORS_FULL_NAMES=$(sqlite3 "${SOURCE_DB}" 'SELECT full_name FROM authors;' | \
- sanitize_authors | sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
- # TAGS: '<id>¶<tag>' lines
- TAGS=$(sqlite3 "${SOURCE_DB}" 'SELECT file_name FROM books;' | tag_from_filepath | \
- sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
- # LANGS: '<id>¶<lang>' lines
- LANGS=$(sqlite3 "${SOURCE_DB}" 'SELECT DISTINCT lang FROM books;' | \
- sed -e 's/,/\n/g' | sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
- # BOOKS:
- #'<id>¶<title>¶<authors>¶<sequence>¶<file_path>¶<file_type>¶<md5sum>¶<lang>¶<year>¶<description>'
- BOOKS=$(sqlite3 -separator '<S_e^Pa_r^At_oR>' "${SOURCE_DB}" \
- 'SELECT printf("<S_e^Pa_r^At_oR>%s",books.title),authors.full_name,'`
- `'sequences.value,books.file_name,books.file_type,'`
- `'books.md5sum,books.lang,books.year,books.description '`
- `'FROM books,authors,sequences '`
- `'WHERE books.id_author=authors.id AND books.id_sequence=sequences.id;' | \
- sed -e 's/<S_e^Pa_r^At_oR>/¶/g' | sed ':a;N;$!ba;s/\n\([^¶]\)/\\newline\1/g' | \
- awk '{printf("%d%s\n", NR,$0)}')
- # SERIES: '<id>¶<name>'
- SERIES=$(sqlite3 "${SOURCE_DB}" 'SELECT value FROM sequences;' | sed '/^\s*$/d' | \
- sort -uf | awk '{printf("%d¶%s\n", NR,$0)}')
- #INSERT authors
- echo "${AUTHORS_FULL_NAMES}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
- `'INSERT INTO authors (id,name,sort)\n'`
- `'VALUES (\1,\"\2\",\"\L\2\");/' | \
- feed_sql_and_log authors_insert_command.txt
- #INSERT tags
- echo "${TAGS}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
- `'INSERT INTO tags (id,name)\nVALUES (\1,\"\2\");/' | \
- feed_sql_and_log tags_insert_command.txt
- #INSERT languages
- echo "${LANGS}" | sed 's/\([^¶]*\)¶\(.*\)/'`
- `'INSERT INTO languages (id,lang_code)\n'`
- `'VALUES (\1,\"\2\");/' | \
- feed_sql_and_log languages_insert_command.txt
- #INSERT series
- echo "${SERIES}" | esc_q | sed 's/\([^¶]*\)¶\(.*\)/'`
- `'INSERT INTO series (id,name,sort)\n'`
- `'VALUES (\1,\"\2\",\"\2\");/' | \
- feed_sql_and_log series_insert_command.txt
- #INSERT books, books_authors_link, books_series_link,
- # books_tags_link, books_languages_link, comments
- books_insert books_insert_command.txt
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement