Advertisement
Try95th

rainofsnow_to_epub

Mar 8th, 2023 (edited)
114
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 3.25 KB | None | 0 0
  1. ## scrape and combine wordpress chapters into an ebook [ epub file ]
  2.  
  3. ## EXAMPLE OF USAGE:
  4. ### rainofsnow_to_epub('https://rainofsnow.com/the-tragedy-of-the-villainess')
  5. #### output at https://drive.google.com/file/d/1Ob3ZSZTvjvpOmBYKSfgfzuxn613p2Dwf
  6.  
  7. ## REQUIREMENTS:
  8. ### linkToSoup [ scraperFn ]  at https://pastebin.com/rBTr06vy
  9. ### confParse at https://pastebin.com/c0TjDmNE
  10. # ! pip install ebooklib
  11. # from ebooklib import epub
  12. # import time
  13.  
  14. bSel = {
  15.         'title': 'div.text>h2', 'synopsis': 'div#synop',
  16.         'c1_link': ('span.chapter>a[href]', 'href'),
  17.         'details': {
  18.             'tSel': ('span.vt1', 'span.vt1+small.vt2'),
  19.             'listSel': 'ul>li:has(span.vt1+small.vt2)',
  20.             '__apply2resultSet__': dict
  21.         }
  22.     }
  23. chSel = {
  24.             'title': 'div#item{{chapCt}}.bb-item>div.content>div.scroller>h2',
  25.             'html': ('div.scroller>div.zoomdesc-cont',*([None]*3),'prettify'),
  26.             'next_link': ('nav>ul>li:has(>a)+li>a:not(.disable)', 'href')
  27.         }
  28.  
  29. def rainofsnow_to_epub(tl_url, scraperFn=linkToSoup, selRef_b=bSel, selRef_c=chSel):
  30.     book_dets = confParse(scraperFn(tl_url, isv=True), selRef_b)
  31.  
  32.     chapterLink = book_dets['c1_link']
  33.     nid = f'rainofsnow_{int(time.time())}_nid'
  34.     if 'novelid=' in chapterLink.split('?',1)[-1]:
  35.         nid += chapterLink.split('?',1)[-1].split('novelid=',1)[0]
  36.  
  37.     book = epub.EpubBook()
  38.     book.set_identifier(nid)
  39.     book.set_title(book_dets['title'])
  40.     book.set_language('en')
  41.     book.add_author(book_dets['details'].get('Author', 'UNKNOWN'))
  42.     book.add_metadata('DC', 'description', book_dets['synopsis'])
  43.     for mk, mv in book_dets['details'].items():
  44.         if mk == 'Author': continue
  45.         book.add_metadata(None, 'meta', '', {'name': mk, 'content': mv})
  46.    
  47.     chapCt, chList, titleSel = 0, [], selRef_c['title'][:]
  48.     while chapterLink:
  49.         chapCt += 1
  50.         selRef_c['title'] = titleSel.replace('{{chapCt}}', f'{chapCt}')
  51.         cSoup = scraperFn(chapterLink, isv=False, returnErr=True)
  52.         if isinstance(cSoup, str):
  53.             print('', end=f'\rWaiting 5s before retrying ch{chapCt}: !{cSoup}')
  54.             chapCt = chapCt - 1
  55.             time.sleep(5)
  56.             continue
  57.         ch_data = confParse(cSoup, selRef_c)
  58.         ch_fn, ch_name = f'ch{chapCt}.xhtml', ch_data['title']
  59.         ch_name = f'[Ch{chapCt}] {ch_name}'
  60.         ch_body = f'<h2>{ch_name}</h2>\n{ch_data["html"]}'
  61.  
  62.         ch_cur = epub.EpubHtml(title=ch_name, file_name=ch_fn, lang='en')
  63.         ch_cur.set_content(f'<html><body>{ch_body}</body></html>')
  64.         book.add_item(ch_cur)
  65.         chList.append(ch_cur)
  66.  
  67.         print('', end=f'\rAdded as {repr(ch_fn)}: {repr(ch_name)}')
  68.         chapterLink = ch_data['next_link']
  69.  
  70.     style = 'body { font-family: Times, Times New Roman, serif; }'
  71.     nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css",
  72.                             media_type="text/css", content=style)
  73.     book.add_item(nav_css)
  74.     book.toc, book.spine = chList, ['nav', *chList]
  75.  
  76.     book.add_item(epub.EpubNcx())
  77.     book.add_item(epub.EpubNav())    
  78.     epub.write_epub(f"{book_dets['title']}.epub", book)
  79.     print(f'\rSaved {len(chList)} chapters to',
  80.           repr(f"{book_dets['title']}.epub"))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement