Advertisement
Try95th

scrape quotes from https://quotes.toscrape.com

Nov 23rd, 2022 (edited)
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 21.94 KB | None | 0 0
  1. ## scrape quotes from https://quotes.toscrape.com [there are only 100quotes] ##
  2. ## scroll to bottom for examples of usage [with output] ##
  3.  
  4. import requests
  5. from bs4 import BeautifulSoup
  6. import time
  7. import pandas
  8. # import cloudscraper # if requests isn't enough [implement in scrapeLink function]
  9. # from requests_html import HTMLSession # if requests and cloudscraper aren't enough [implement in scrapeLink function]
  10.  
  11.  
  12. class scrapedQuotes:
  13.     def __init__(self) -> None:
  14.         self.authTagQuotes = {}
  15.  
  16.     def check_if_new_quote(self, qauth, qtags, qtxt):
  17.         # return (desc, [authIsNew, tagsIsNew, txtIsNew])
  18.         if qauth not in self.authTagQuotes:
  19.             return ('all', [True]*3) ## all new
  20.         if qtags not in self.authTagQuotes[qauth]:
  21.             return ('tag', [False, True, True])
  22.         if qtxt not in self.authTagQuotes[qauth][qtags]:
  23.             return ('txt', [False, False, True])
  24.         return (None, [False]*3) ## no new
  25.  
  26.    
  27.     def add_new_quote(self, qauth, qtags, qtxt, isNew='?'):
  28.         global authTagQuotes
  29.         if isNew not in ['txt', 'tag', 'all', None]:
  30.             isNew = self.check_if_new_quote(qauth, qtags, qtxt)[0]
  31.            
  32.         if isNew == 'txt':
  33.             self.authTagQuotes[qauth][qtags].append(qtxt)
  34.         elif isNew == 'tag':
  35.             self.authTagQuotes[qauth][qtags] = [qtxt]
  36.         elif isNew == 'all':
  37.             self.authTagQuotes[qauth] = {qtags: [qtxt]}
  38.         else: return False
  39.         return True
  40.  
  41.     def to_flattened(self):
  42.         qFlat = []
  43.         for a, ad in self.authTagQuotes.items():
  44.             for t, tl in ad.items():
  45.                 qFlat += [{
  46.                     'author': a, 'tags': t, 'text': x
  47.                 } for i, x in enumerate(tl)]
  48.         return qFlat
  49.    
  50.     def to_df(self):
  51.         return pandas.DataFrame(self.to_flattened())
  52.     def save_to_csv(self, csv_fn):
  53.         self.to_df().to_csv(csv_fn, index=False)
  54.    
  55.     def add_from_csv(self, csv_fn):
  56.         qdf = pandas.concat(
  57.             self.to_df(), pandas.read_csv(csv_fn)
  58.         ).set_index(['author', 'tags'])
  59.         self.authTagQuotes = {au: {
  60.             t: [([s] if type(s) == str else list(set(s)))
  61.                       for s in [qdf.loc[au, t]['text']]][0]
  62.             for t in set(qdf.loc[au].index)
  63.         } for au in set([a for a,t in qdf.index])}
  64.  
  65.     def getCount(self, targetItem='quotes'):
  66.         if targetItem == 'author':
  67.             return len(list(self.authTagQuotes.keys()))
  68.         if targetItem == 'authTags':
  69.             return sum([
  70.                 len(list(tt.keys())) for tt in self.authTagQuotes.values()
  71.             ])
  72.         if targetItem == 'tags':
  73.             return len(list(set([tag for tags in [
  74.                 list(tt.keys()) for tt in self.authTagQuotes.values()
  75.             ] for tag in tags])))
  76.         # if targetItem == 'quotes' # default
  77.         return sum([sum(
  78.             [len(tl) for tl in tt.values()]
  79.         ) for tt in self.authTagQuotes.values()])
  80.  
  81.     def scrapeLink(self, targetUrl):
  82.         try:
  83.             r = requests.get(targetUrl)
  84.             # r = cloudscraper.create_scraper().get(targetUrl)
  85.             # r = HTMLSession().get(targetUrl)
  86.            
  87.             if r.status_code == 200:
  88.                 ######## CAN ADD OTHER CHECKS ########
  89.                 return BeautifulSoup(r.content, 'html5lib')
  90.             errMsg = f'<{r.status_code} {r.reason}> - '
  91.             errMsg = f'{errMsg}Failed to scrape {targetUrl}'
  92.         except Exception as e:
  93.             errMsg = f'Failed to scrape {targetUrl} \n - errorMsg: "{str(e)}"'
  94.         print(errMsg)
  95.         return None
  96.  
  97.     def scrapeNewQuotes(self, maxAdd=100, maxFails=10, pauseTime=2):
  98.         qAdded = failCt = 0
  99.         rootUrl = 'https://quotes.toscrape.com'
  100.         nextUrl = f'{rootUrl}/'
  101.  
  102.         quotSp_sel = 'span.text'
  103.         authHl_sel = 'span small.author + a[href^="/author\/"]'
  104.         authSp_sel = 'span:has(small.author + a[href^="/author\/"])'
  105.         tagkSp_sel = 'div.tags > meta.keywords[content]'
  106.         contDv_sel = f'div.quote:has({quotSp_sel}~{authSp_sel}~{tagkSp_sel})'
  107.         nxtpHl_sel = 'li.next > a[href^="\/page\/"][href$="\/"]'
  108.  
  109.         while qAdded < maxAdd and nextUrl:
  110.             qSoup = self.scrapeLink(nextUrl)
  111.             if qSoup is None:
  112.                 if failCt < maxFails:
  113.                     failCt += 1
  114.                     pauseTime = pauseTime*failCt
  115.                     print(f'waiting {pauseTime}s before trying again')
  116.                     time.sleep(pauseTime)
  117.                     continue
  118.                 break
  119.            
  120.             qList = [(
  121.                 q.select_one(quotSp_sel).get_text(' ', strip=True), # text
  122.                 q.select_one(authHl_sel).get('href'), # author
  123.                 q.select_one(tagkSp_sel).get('content') # tags
  124.             ) for q in qSoup.select(contDv_sel)]
  125.             qList = [(
  126.                 a.split('/')[2], # extract author from link
  127.                 ','.join(sorted(t.split(','))), # rearrange tags alphabetically
  128.                 q[(1 if q[0]=='“' else 0):(-1 if q[-1] == '”' else len(q))]
  129.                 #remove “” from around quote
  130.             ) for q, a, t in qList]
  131.             qFil = [
  132.                 (a, t, q) for a, t, q in qList if
  133.                 self.check_if_new_quote(a, t, q)
  134.             ]
  135.  
  136.             for a, t, q in qFil:
  137.                 if qAdded < maxAdd:
  138.                     qAdded += self.add_new_quote(a, t, q)
  139.                 else: break
  140.  
  141.             nextUrl = qSoup.select_one(nxtpHl_sel)
  142.             if nextUrl:
  143.                 nextUrl = f"{rootUrl}{nextUrl.get('href')}"
  144.        
  145.         return qAdded
  146.  
  147.  
  148. ############################## EXAMPLE OF USAGE ##############################
  149. scq = scrapedQuotes()
  150. while(scq.scrapeNewQuotes(13)): print(scq.getCount(), end=' ')
  151. print('\n')
  152. print(scq.to_df().to_csv())
  153. ############################### PRINTED OUTPUT ###############################
  154. '''
  155. 13 26 39 52 65 78 91 100
  156.  
  157. ,author,tags,text
  158. 0,Albert-Einstein,"change,deep-thoughts,thinking,world",The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.
  159. 1,Albert-Einstein,"inspirational,life,live,miracle,miracles",There are only two ways to live your life. One is as though nothing is a miracle. The other is as though everything is a miracle.
  160. 2,Albert-Einstein,"adulthood,success,value",Try not to become a man of success. Rather become a man of value.
  161. 3,Albert-Einstein,"simplicity,understand","If you can't explain it to a six year old, you don't understand it yourself."
  162. 4,Albert-Einstein,"children,fairy-tales","If you want your children to be intelligent, read them fairy tales. If you want them to be more intelligent, read them more fairy tales."
  163. 5,Albert-Einstein,imagination,Logic will get you from A to Z; imagination will get you everywhere.
  164. 6,Albert-Einstein,"knowledge,learning,understanding,wisdom",Any fool can know. The point is to understand.
  165. 7,Albert-Einstein,"life,simile","Life is like riding a bicycle. To keep your balance, you must keep moving."
  166. 8,Albert-Einstein,music,"If I were not a physicist, I would probably be a musician. I often think in music. I live my daydreams in music. I see my life in terms of music."
  167. 9,Albert-Einstein,mistakes,Anyone who has never made a mistake has never tried anything new.
  168. 10,J-K-Rowling,"abilities,choices","It is our choices, Harry, that show what we truly are, far more than our abilities."
  169. 11,J-K-Rowling,"courage,friends","It takes a great deal of bravery to stand up to our enemies, but just as much to stand up to our friends."
  170. 12,J-K-Rowling,,"It is impossible to live without failing at something, unless you live so cautiously that you might as well not have lived at all - in which case, you fail by default."
  171. 13,J-K-Rowling,dumbledore,"Of course it is happening inside your head, Harry, but why on earth should that mean that it is not real?"
  172. 14,J-K-Rowling,dumbledore,"It matters not what someone is born, but what they grow to be."
  173. 15,J-K-Rowling,"death,inspirational","To the well-organized mind, death is but the next great adventure."
  174. 16,J-K-Rowling,live-death-love,"Do not pity the dead, Harry. Pity the living, and, above all those who live without love."
  175. 17,J-K-Rowling,integrity,"Remember, if the time should come when you have to make a choice between what is right and what is easy, remember what happened to a boy who was good, and kind, and brave, because he strayed across the path of Lord Voldemort. Remember Cedric Diggory."
  176. 18,J-K-Rowling,truth,"The truth."" Dumbledore sighed. ""It is a beautiful and terrible thing, and should therefore be treated with great caution."
  177. 19,Jane-Austen,"aliteracy,books,classic,humor","The person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid."
  178. 20,Jane-Austen,"friendship,love","There is nothing I would not do for those who are really my friends. I have no notion of loving people by halves, it is not my nature."
  179. 21,Jane-Austen,"humor,love,romantic,women","A lady's imagination is very rapid; it jumps from admiration to love, from love to matrimony in a moment."
  180. 22,Jane-Austen,"books,library,reading","I declare after all there is no enjoyment like reading! How much sooner one tires of any thing than of a book! -- When I have a house of my own, I shall be miserable if I have not an excellent library."
  181. 23,Jane-Austen,"elizabeth-bennet,jane-austen","There are few people whom I really love, and still fewer of whom I think well. The more I see of the world, the more am I dissatisfied with it; and every day confirms my belief of the inconsistency of all human characters, and of the little dependence that can be placed on the appearance of merit or sense."
  182. 24,Marilyn-Monroe,"be-yourself,inspirational","Imperfection is beauty, madness is genius and it's better to be absolutely ridiculous than absolutely boring."
  183. 25,Marilyn-Monroe,"friends,heartbreak,inspirational,life,love,sisters","This life is what you make it. No matter what, you're going to mess up sometimes, it's a universal truth. But the good part is you get to decide how you're going to mess it up. Girls will be your friends - they'll act like it anyway. But just remember, some come, some go. The ones that stay with you through everything - they're your true best friends. Don't let go of them. Also remember, sisters make the best friends in the world. As for lovers, well, they'll come and go too. And baby, I hate to say it, most of them - actually pretty much all of them are going to break your heart, but you can't give up because if you give up, you'll never find your soulmate. You'll never find that half who makes you whole and that goes for everything. Just because you fail once, doesn't mean you're gonna fail at everything. Keep trying, hold on, and always, always, always believe in yourself, because if you don't, then who will, sweetie? So keep your head high, keep your chin up, and most importantly, keep smiling, because life's a beautiful thing and there's so much to smile about."
  184. 26,Marilyn-Monroe,,You believe lies so you eventually learn to trust no one but yourself.
  185. 27,Marilyn-Monroe,"girls,love","If you can make a woman laugh, you can make her do anything."
  186. 28,Marilyn-Monroe,love,The real lover is the man who can thrill you by kissing your forehead or smiling into your eyes or just staring into space.
  187. 29,Marilyn-Monroe,attributed-no-source,"A wise girl kisses but doesn't love, listens but doesn't believe, and leaves before she is left."
  188. 30,Marilyn-Monroe,attributed-no-source,"I am good, but not an angel. I do sin, but I am not the devil. I am just a small girl in a big world trying to find someone to love."
  189. 31,Andre-Gide,"life,love",It is better to be hated for what you are than to be loved for what you are not.
  190. 32,Thomas-A-Edison,"edison,failure,inspirational,paraphrased","I have not failed. I've just found 10,000 ways that won't work."
  191. 33,Eleanor-Roosevelt,misattributed-eleanor-roosevelt,A woman is like a tea bag; you never know how strong it is until it's in hot water.
  192. 34,Eleanor-Roosevelt,"attributed,fear,inspiration",Do one thing every day that scares you.
  193. 35,Steve-Martin,"humor,obvious,simile","A day without sunshine is like, you know, night."
  194. 36,Bob-Marley,love,"You may not be her first, her last, or her only. She loved before she may love again. But if she loves you now, what else matters? She's not perfect—you aren't either, and the two of you may never be perfect together but if she can make you laugh, cause you to think twice, and admit to being human and making mistakes, hold onto her and give her the most you can. She may not be thinking about you every second of the day, but she will give you a part of her that she knows you can break—her heart. So don't hurt her, don't change her, don't analyze and don't expect more than she can give. Smile when she makes you happy, let her know when she makes you mad, and miss her when she's not there."
  195. 37,Bob-Marley,music,"One good thing about music, when it hits you, you feel no pain."
  196. 38,Bob-Marley,friendship,"The truth is, everyone is going to hurt you. You just got to find the ones worth suffering for."
  197. 39,Dr-Seuss,fantasy,"I like nonsense, it wakes up the brain cells. Fantasy is a necessary ingredient in living."
  198. 40,Dr-Seuss,"comedy,life,yourself","Today you are You, that is truer than true. There is no one alive who is Youer than You."
  199. 41,Dr-Seuss,"learning,reading,seuss","The more that you read, the more things you will know. The more that you learn, the more places you'll go."
  200. 42,Dr-Seuss,troubles,I have heard there are troubles of more than one kind. Some come from ahead and some come from behind. But I've bought a big bat. I'm all ready you see. Now my troubles are going to have troubles with me!
  201. 43,Dr-Seuss,"humor,philosophy","Think left and think right and think low and think high. Oh, the thinks you can think up if only you try!"
  202. 44,Dr-Seuss,inspirational,"A person's a person, no matter how small."
  203. 45,Douglas-Adams,"life,navigation","I may not have gone where I intended to go, but I think I have ended up where I needed to be."
  204. 46,Elie-Wiesel,"activism,apathy,hate,indifference,inspirational,love,opposite,philosophy","The opposite of love is not hate, it's indifference. The opposite of art is not ugliness, it's indifference. The opposite of faith is not heresy, it's indifference. And the opposite of life is not death, it's indifference."
  205. 47,Friedrich-Nietzsche,"friendship,lack-of-friendship,lack-of-love,love,marriage,unhappy-marriage","It is not a lack of love, but a lack of friendship that makes unhappy marriages."
  206. 48,Mark-Twain,"books,contentment,friends,friendship,life","Good friends, good books, and a sleepy conscience: this is the ideal life."
  207. 49,Mark-Twain,education,I have never let my schooling interfere with my education.
  208. 50,Mark-Twain,"books,classic,reading",′Classic′ - a book which people praise and don't read.
  209. 51,Mark-Twain,"death,life",The fear of death follows from the fear of life. A man who lives fully is prepared to die at any time.
  210. 52,Mark-Twain,"misattributed-mark-twain,truth",A lie can travel half way around the world while the truth is putting on its shoes.
  211. 53,Mark-Twain,truth,Never tell the truth to people who are not worthy of it.
  212. 54,Allen-Saunders,"fate,life,misattributed-john-lennon,planning,plans",Life is what happens to us while we are making other plans.
  213. 55,Pablo-Neruda,"love,poetry","I love you without knowing how, or when, or from where. I love you simply, without problems or pride: I love you in this way because I do not know any other way of loving but this, in which there is no I or you, so intimate that your hand upon my chest is my hand, so intimate that when I fall asleep your eyes close."
  214. 56,Ralph-Waldo-Emerson,happiness,For every minute you are angry you lose sixty seconds of happiness.
  215. 57,Ralph-Waldo-Emerson,"life,regrets",Finish each day and be done with it. You have done what you could. Some blunders and absurdities no doubt crept in; forget them as soon as you can. Tomorrow is a new day. You shall begin it serenely and with too high a spirit to be encumbered with your old nonsense.
  216. 58,Mother-Teresa,attributed-no-source,"If you judge people, you have no time to love them."
  217. 59,Mother-Teresa,"misattributed-to-mother-teresa,paraphrased",Not all of us can do great things. But we can do small things with great love.
  218. 60,Garrison-Keillor,"humor,religion",Anyone who thinks sitting in church can make you a Christian must also think that sitting in a garage can make you a car.
  219. 61,Jim-Henson,humor,Beauty is in the eye of the beholder and it may be necessary from time to time to give a stupid or misinformed beholder a black eye.
  220. 62,Charles-M-Schulz,"chocolate,food,humor",All you need is love. But a little chocolate now and then doesn't hurt.
  221. 63,William-Nicholson,"misattributed-to-c-s-lewis,reading",We read to know we're not alone.
  222. 64,Jorge-Luis-Borges,"books,library",I have always imagined that Paradise will be a kind of library.
  223. 65,George-Eliot,inspirational,It is never too late to be what you might have been.
  224. 66,George-R-R-Martin,"read,readers,reading,reading-books","A reader lives a thousand lives before he dies, said Jojen. The man who never reads lives only one."
  225. 67,George-R-R-Martin,"books,mind","... a mind needs books as a sword needs a whetstone, if it is to keep its edge."
  226. 68,C-S-Lewis,"books,inspirational,reading,tea",You can never get a cup of tea large enough or a book long enough to suit me.
  227. 69,C-S-Lewis,love,"To love at all is to be vulnerable. Love anything and your heart will be wrung and possibly broken. If you want to make sure of keeping it intact you must give it to no one, not even an animal. Wrap it carefully round with hobbies and little luxuries; avoid all entanglements. Lock it up safe in the casket or coffin of your selfishness. But in that casket, safe, dark, motionless, airless, it will change. It will not be broken; it will become unbreakable, impenetrable, irredeemable. To love is to be vulnerable."
  228. 70,C-S-Lewis,"age,fairytales,growing-up",Some day you will be old enough to start reading fairy tales again.
  229. 71,C-S-Lewis,god,We are not necessarily doubting that God will do the best for us; we are wondering how painful the best will turn out to be.
  230. 72,C-S-Lewis,"christianity,faith,religion,sun","I believe in Christianity as I believe that the sun has risen: not only because I see it, but because by it I see everything else."
  231. 73,Martin-Luther-King-Jr,"hope,inspirational",Only in the darkness can you see the stars.
  232. 74,James-Baldwin,love,"Love does not begin and end the way we seem to think it does. Love is a battle, love is a war; love is a growing up."
  233. 75,Haruki-Murakami,"books,thought","If you only read the books that everyone else is reading, you can only think what everyone else is thinking."
  234. 76,Alexandre-Dumas-fils,misattributed-to-einstein,The difference between genius and stupidity is: genius has its limits.
  235. 77,Stephenie-Meyer,"drug,romance,simile","He's like a drug for you, Bella."
  236. 78,Ernest-Hemingway,"books,friends,novelist-quotes",There is no friend as loyal as a book.
  237. 79,Ernest-Hemingway,"good,writing",There is nothing to writing. All you do is sit down at a typewriter and bleed.
  238. 80,Helen-Keller,inspirational,"When one door of happiness closes, another opens; but often we look so long at the closed door that we do not see the one which has been opened for us."
  239. 81,George-Bernard-Shaw,"inspirational,life,yourself",Life isn't about finding yourself. Life is about creating yourself.
  240. 82,Charles-Bukowski,alcohol,"That's the problem with drinking, I thought, as I poured myself a drink. If something bad happens you drink in an attempt to forget; if something good happens you drink in order to celebrate; and if nothing happens you drink to make something happen."
  241. 83,Charles-Bukowski,humor,Some people never go crazy. What truly horrible lives they must lead.
  242. 84,Suzanne-Collins,the-hunger-games,You don’t forget the face of the person who was your last hope.
  243. 85,Suzanne-Collins,humor,"Remember, we're madly in love, so it's all right to kiss me anytime you feel like it."
  244. 86,J-R-R-Tolkien,"bilbo,journey,lost,quest,travel,wander",Not all those who wander are lost.
  245. 87,Alfred-Tennyson,"friendship,love",If I had a flower for every time I thought of you...I could walk through my garden forever.
  246. 88,Terry-Pratchett,"humor,open-mind,thinking","The trouble with having an open mind, of course, is that people will insist on coming along and trying to put things in it."
  247. 89,J-D-Salinger,"authors,books,literature,reading,writing","What really knocks me out is a book that, when you're all done reading it, you wish the author that wrote it was a terrific friend of yours and you could call him up on the phone whenever you felt like it. That doesn't happen much, though."
  248. 90,George-Carlin,"humor,insanity,lies,lying,self-indulgence,truth",The reason I talk to myself is because I’m the only one whose answers I accept.
  249. 91,John-Lennon,"beatles,connection,dreamers,dreaming,dreams,hope,inspirational,peace","You may say I'm a dreamer, but I'm not the only one. I hope someday you'll join us. And the world will live as one."
  250. 92,W-C-Fields,"humor,sinister",I am free of all prejudice. I hate everyone equally.
  251. 93,Ayn-Rand,,The question isn't who is going to let me; it's who is going to stop me.
  252. 94,Jimi-Hendrix,"death,life","I'm the one that's got to die when it's time for me to die, so let me live my life the way I want to."
  253. 95,J-M-Barrie,"adventure,love",To die will be an awfully big adventure.
  254. 96,E-E-Cummings,courage,It takes courage to grow up and become who you really are.
  255. 97,Khaled-Hosseini,life,But better to get hurt by the truth than comforted with a lie.
  256. 98,Harper-Lee,better-life-empathy,You never really understand a person until you consider things from his point of view... Until you climb inside of his skin and walk around in it.
  257. 99,Madeleine-LEngle,"books,children,difficult,grown-ups,write,writers,writing","You have to write the book that wants to be written. And if the book will be too difficult for grown-ups, then you write it for children."
  258. '''
  259. ##############################################################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement