Untitled

#!/bin/bash

# v 1.0 (after some previous unnumbered versions) [12-09-2009]
#
# v 2.0 [19-09-2009]
#       (fixed bug with IFS/ORIGIFS)
#       (use XMLStarlet)
#
# v 2.1 [29-11-2014]
#       (option for reading epub in lynx. SBT) [09-12-2011]
#       (correctly detect authors without opf:role)
#       (escape HTML contents)
#       (delete temporary directory when aborted)
#   (fix quotes around variables)
#       (fix prev/next links)
#       (open with xdg-open)

# Needs:
#   unzip      (http://www.info-zip.org/)
#   XMLStarlet (http://xmlstar.sourceforge.net/)
#===============================================================================

get_data() {
  # Get name and path of the OPF file
  NS=$($XML sel -B -T -t \
    -m "//*[local-name()='rootfile'][1]" -v "namespace-uri()" META-INF/container.xml)
  OPF=$($XML sel -B -T -N x="$NS" -t \
    -m "//x:rootfile" -v "@full-path" META-INF/container.xml)
  BASEDIR=$(dirname $OPF)
  # Get the title of the EPUB
  NS=$($XML sel -B -T -t \
    -m "//*[local-name()='metadata'][1]" -v "namespace-uri()" "$OPF")
  DC=$($XML sel -B -T -t \
    -m "//*[local-name()='title'][1]" -v "namespace-uri()" "$OPF")
  TITLE=$($XML sel -B -T -N x="$NS" -N dc="$DC" -t \
    -m "//x:metadata/dc:title[1]" -v "text()" "$OPF")
  AUTHOR=$($XML sel -B -T -N x="$NS" -N dc="$DC" -t \
    -m "//x:metadata/dc:creator[@x:role='aut' or not(@x:role)]" -v "text()" -i "position()!=last()" -o "; " "$OPF")
  echo "Title: $TITLE"
  echo "Author(s): $AUTHOR"
  # Get name of the NCX file
  NCX=$($XML sel -B -T -N x="$NS" -t \
    -m "//x:spine" -v "@toc" "$OPF")
  NCX=$($XML sel -B -T -N x="$NS" -t \
    -m "//x:manifest/x:item[@id='$NCX']" -v "@href" "$OPF")
  NCX=$BASEDIR/$NCX

  # Handle lynx' inability to deal with xml:base and meta charset:
  lynx_base=
  if (( $LYNX )); then
     lynx_base="${BASEDIR}/"
     find ${BASEDIR} -name "*html" -exec sed -i s/'<[^>]*charset=[^>]*>'//I {} \;
     lynx -show_cfg > lynx.config
     cat >> lynx.config << EOF
     SUFFIX_ORDER:PRECEDENCE_HERE
     SUFFIX:.xhtml:text/html
EOF
  fi
}

read_spine() {
  ORIGIFS=$IFS
  IFS=$(echo -en "\n\b")
  # Get the filenames for all the items in the spine
  for id in $($XML sel -B -T -N x="$NS" -t \
    -m "//x:spine/*" -v "@idref" -n "$OPF"); do
    ids[${#ids[*]}]=$id
    files[${#files[*]}]=$($XML sel -B -T -N x="$NS" -t \
    -m "//x:manifest/x:item[@id='$id']" -v "@href" "$OPF")
    linears[${#linears[*]}]=$($XML sel -B -T -N x="$NS" -t \
    -m "//x:spine/x:itemref[@idref='$id']" -v "number(not(@linear='no'))" "$OPF")
  done
  IFS=$ORIGIFS
}

read_toc() {
  NC=$($XML sel -B -T -t \
    -m "//*[local-name()='navMap'][1]" -v "namespace-uri()" "$NCX")
  # Get the depth of the navMap element
  navmap=$($XML sel -B -T -N x="$NC" -t \
    -m "//x:navMap[1]" -v "count(ancestor::*)" "$NCX")
  ORIGIFS=$IFS
  IFS=$(echo -en "\n\b")
  # Get the name, source and depth of every navPoint element
  for line in $($XML sel -B -T -N x="$NC" -t \
    -m "//x:navMap[1]//x:navPoint" \
        -m "x:navLabel/x:text" -v "text()" -o "|" -b \
        -m "x:content" -v "@src" -o "|" -b \
        -v "count(ancestor::*)" -n "$NCX"); do
    IFS="|"
    words=($line)
    items[${#items[*]}]=$($XML esc ${words[0]})
    srcs[${#srcs[*]}]=${words[1]}
    levels[${#levels[*]}]=$((${words[2]}-navmap))
  done
  IFS=$ORIGIFS
}

write_css() {
  CSSFILE="epub-read.css"
  cp $DEF_STYLE $CSSFILE
}

write_xhtml() {
  XHTMLFILE="epub-read.xhtml"
  cat > $XHTMLFILE << EOF
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:base="$BASEDIR/">
<head>
  <style>
body {
  margin-top: 4em;
  background-color: white;
}
div.top {
  position: fixed;
  top: 0;
  margin: 0 auto 0 auto;
  background-color: white;
  color: black;
  width: 100%;
}
div.top a {
  color: inherit;
  text-decoration: none;
  font-weight: bold;
}
div.top a.deact {
  opacity: 0.3;
}
div.top table {
  width: 100%;
}
div.top td {
  width: 25%;
  text-align: center;
}
p {
  text-indent: -1em;
  margin: 0;
}
.level1 {
  margin: 1em 0 0 1em;
}
.level2 {
  margin: 0.5em 0 0 1.5em;
}
.level3 {
  margin: 0 0 0 2em;
}
.big {
  font-size: 150%;
  font-weight: bold;
}
.small {
  font-size: 80%;
  color: red;
}
.show {
  display: block;
}
.hide {
  display: none;
}
  </style>
  <script lang="javascript" type="text/javascript">
<![CDATA[
var num = 0;
var html = document.getElementsByTagName("html");
var basedir = html[0].getAttribute("xml:base");
EOF

  echo -n 'var files = [' >> $XHTMLFILE
  echo -n "'${files[0]}'" >> $XHTMLFILE
  for (( i=1; i<${#files[*]}; i++ )); do
    if [[ ${linears[$i]} -eq 1 ]]; then echo -n ", '${files[$i]}'" >> $XHTMLFILE; fi
  done
  echo '];' >> $XHTMLFILE

  cat >> $XHTMLFILE << EOF
function nextFile() {
  if (num < files.length-1) {
    num = Math.floor(num);
    num++;
    parent.book.location.href=basedir + files[num];
    setLinks(num);
  }
}
function prevFile() {
  if (num > 0) {
    num = Math.ceil(num);
    num--;
    parent.book.location.href=basedir + files[num];
    setLinks(num);
  }
}
function setLinks(number) {
  num = number;
  if (Math.floor(num) == Math.ceil(num)) {
    if (num <= 0) {
      document.getElementById('prev').setAttribute('class','deact');
      document.getElementById('prev').href=files[0];
    } else {
      document.getElementById('prev').setAttribute('class','act');
      document.getElementById('prev').href=files[num-1];
    }
    if (num >= files.length-1) {
      document.getElementById('next').setAttribute('class','deact');
      document.getElementById('next').href=files[files.length-1];
    } else {
      document.getElementById('next').setAttribute('class','act');
      document.getElementById('next').href=files[num+1];
    }
  } else {
    document.getElementById('prev').setAttribute('class','act');
    document.getElementById('prev').href=files[Math.floor(num)];
    if (Math.ceil(num) >= files.length-1) {
      document.getElementById('next').setAttribute('class','deact');
      document.getElementById('next').href=window.event.srcElement.href;
    } else {
      document.getElementById('next').setAttribute('class','act');
      document.getElementById('next').href=files[Math.ceil(num)+1];
    }
  }
}
function setSpine() {
  document.getElementById('current').innerHTML='SPINE';
  document.getElementById('change').innerHTML='TOC';
  document.getElementById('change').setAttribute('onclick','setToc()');
  document.getElementById('spine').setAttribute('class','show');
  document.getElementById('toc').setAttribute('class','hide');
}
function setToc() {
  document.getElementById('current').innerHTML='TOC';
  document.getElementById('change').innerHTML='SPINE';
  document.getElementById('change').setAttribute('onclick','setSpine()');
  document.getElementById('toc').setAttribute('class','show');
  document.getElementById('spine').setAttribute('class','hide');
}
]]>
  </script>
</head>
<body>

<div class="top">
<table><tr>
<td><a href="${files[0]}" target="book" onclick="setLinks(0);">&lt;&lt;&lt;</a></td>
<td><a class="deact" id="prev" href="${files[0]}" target="book" onclick="prevFile(); return false;">&lt;</a></td>
<td><a id="next" href="${files[1]}" target="book" onclick="nextFile(); return false;">&gt;</a></td>
<td><a href="${files[${#files[*]}-1]}" target="book" onclick="setLinks(files.length-1);">&gt;&gt;&gt;</a></td>
</tr><tr>
<td id="current" class="big" colspan="2">TOC</td>
<td id="change" class="small" onclick="setSpine();">SPINE</td>
</tr></table>
</div>

<div id="spine" class="hide">
EOF

  j=-1
  for (( i=0; i<${#ids[*]}; i++ )); do
    if [[ ${linears[$i]} -eq 0 ]]; then
      j=$(echo "scale=0; k=($j+1)/1-($j); scale=10; if ($j >= 0) $j+k/2 else 0" | bc)
      echo -n "* " >> $XHTMLFILE
     else
      j=$(echo "scale=0; ($j+1)/1" | bc);
    fi
    echo "<a href=\"${lynx_base}${files[$i]}\" target=\"book\" onclick=\"setLinks($j);\">${ids[$i]}</a><br/>" >> $XHTMLFILE
    index[${#index[*]}]=$j
  done

  cat >> $XHTMLFILE << EOF
</div>

<div id="toc" class="show">
EOF

  for (( i=0; i<${#items[*]}; i++ )); do
    k=-1
    check=$(expr ${srcs[$i]} : '\([^#]*\)#\?.*$')
    for (( j=0; j<${#files[*]}; j++ )); do
      if [[ "$check" == "${files[$j]}" ]]; then k=$j; fi
    done
    echo "<p class=\"level${levels[$i]}\">&bull; <a href=\"${lynx_base}${srcs[$i]}\" target=\"book\" onclick=\"setLinks(${index[$k]});\">${items[$i]}</a></p>" >> $XHTMLFILE
  done

  cat >> $XHTMLFILE << EOF
</div>

</body>
</html>
EOF
}

write_html() {
  HTMLFILE="epub-read.html"
  cat > $HTMLFILE << EOF
<html>
<head>
  <title>$($XML esc "$TITLE")</title>
  <script lang="javascript" type="text/javascript">
  function AddCSS() {
    var NewStyle=book.document.createElement("link");
    NewStyle.setAttribute("rel","stylesheet");
    NewStyle.setAttribute("type","text/css");
    NewStyle.setAttribute("href","$(readlink -f "$CSSFILE")");
    book.document.getElementsByTagName("head")[0].appendChild(NewStyle);
  }
  function ChangeTitle() {
    document.title=book.document.title;
  }
  </script>
</head>
<frameset cols="25%,75%">
  <frame src="$XHTMLFILE"/>
  <frame src="$BASEDIR/${files[0]}" name="book" onload="AddCSS(); ChangeTitle()"/>
</frameset>
</html>
EOF
}

#===============================================================================

VERBOSE=0
HELP=0
EXIT=0
DEF_STYLE="epub-read.css"
XML="xml"

while getopts "volsh:" OPTION; do
  case "$OPTION" in
    v ) VERBOSE=1 ;;
    o ) OPEN=1 ;;
    l ) LYNX=1 ;;
    s ) DEF_STYLE="$OPTARG" ;;
    h|*) HELP=1 ;;
  esac
done
shift $(($OPTIND-1))

if [[ $# < 1 ]]; then
  HELP=1
fi
if (( $HELP )); then
  echo
  cat << EOF
USAGE:

$(basename $0) [options] input.epub

Where the options are:
  -s "style.css"  Use "style.css" as stylesheet (default is "epub-read.css")
         The stylesheet will be searched in the current directory first, and then in ~/.epub2pdf
  -o              Open the generated page with the default program (with xdg-open)
  -l              Open the generated page with lynx
  -v              Verbose output
  -h              Show this help
EOF
  echo
  EXIT=1
fi

# Check the needed programs exist
unzip >& /dev/null
if (( $? == 127 )); then
  echo "Error: unzip does not exist (http://www.info-zip.org/)"
  EXIT=1
fi
$XML >& /dev/null
if (( $? == 127 )); then
  XML="xmlstarlet"
  $XML >& /dev/null
  if (( $? == 127 )); then
    echo "Error: xml and xmlstarlet do not exist (http://xmlstar.sourceforge.net/)"
    EXIT=1
  fi
fi

if (( $EXIT )); then exit 1; fi

if [[ ! -r "$DEF_STYLE" ]]; then
  if [[ -r "$HOME/.epub2pdf/$DEF_STYLE" ]]; then
    DEF_STYLE="$HOME/.epub2pdf/$DEF_STYLE"
  fi
fi
if [[ -r "$DEF_STYLE" ]]; then
  if (( $VERBOSE )); then
    echo "Using stylesheet $DEF_STYLE"
  fi
 else
  echo "Cannot read file $DEF_STYLE"
  exit 1
fi
DEF_STYLE=$(readlink -f "$DEF_STYLE")

DIR=$PWD
EPUB=$(readlink -f "$1")

if [[ ! -r "$EPUB" ]]; then
  echo "Cannot read file $1"
  exit 1
fi

TEMPDIR=$(mktemp -td epub-read.XXX) || exit 1
[ -d $TEMPDIR ] || mkdir $TEMPDIR
trap "rm -rf $TEMPDIR" INT TERM

cd $TEMPDIR
unzip -qo "$EPUB"

get_data
read_spine
read_toc
write_css
write_xhtml
write_html

echo -e "\n$(readlink -f $HTMLFILE)\n"

if (( $OPEN )); then
  xdg-open $(readlink -f $HTMLFILE)
elif (( $LYNX )); then
  lynx -cfg=lynx.config $(readlink -f $HTMLFILE)
fi

read -p "Press <ENTER> to delete the temporary files" END

cd "$DIR"
rm -rf $TEMPDIR
exit 0