am_dot_com

IA 2022-01-05

Jan 5th, 2022 (edited)
369
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 20.79 KB | None | 0 0
  1. #class_animals.py #classification of images of animals
  2.  
  3. import os #operating system
  4. from amutil_ai import AmUtilAi
  5.  
  6. dirWithImages = "."
  7. listOfNamesOfAllFilesInDir =\
  8.     os.listdir(
  9.         dirWithImages
  10.     )
  11. #print(listOfNamesOfAllFilesInDir)
  12.  
  13. for filename in listOfNamesOfAllFilesInDir:
  14.     bJpg = filename.find(".jpg")!=-1
  15.     bPng = filename.find(".png")!=-1
  16.     bImage = bJpg or bPng
  17.     if (bImage):
  18.         print ("Will classify the image ", filename)
  19.         classificationResult =\
  20.             AmUtilAi.kerasClassify(
  21.                 pStrImagePath=filename,
  22.                 pbStrClassificationTop1=True,
  23.                 pbShowImageWithOverlayedClassificationRightAfterPredicion=False
  24.             )
  25.         #input("Press ENTER to continue") #pause
  26.  
  27.         #TODO rename
  28.         AmUtilAi.renameImageWithPrefixingTopClassification(
  29.             pStrImageFileName=filename,
  30.             pStrClassification=classificationResult
  31.         )
  32.     #if
  33. #for
  34.  
  35. ****
  36.  
  37. #amutil_tools.py
  38.  
  39. import urllib.request
  40. from urllib.request import urlopen
  41. from urllib.error import URLError
  42. from urllib.error import HTTPError
  43. from bs4 import BeautifulSoup
  44.  
  45. import datetime
  46. from datetime import date, datetime
  47.  
  48. import json #for json.dumps
  49.  
  50. #pip install certifi
  51. import certifi
  52. import ssl
  53.  
  54. class AmUtil:
  55.     USER_AGENT_STRING_MOZ47 = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0'
  56.     USER_AGENT_STRING_GOOGLE_NEWS = 'Googlebot-News'
  57.     REFERER_GOOGLE = "https://www.google.com"
  58.  
  59.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  60.     @staticmethod
  61.     # https://docs.python.org/3/library/urllib.request.html
  62.     def buildDictOfHttpHeadersToFakeUserAgent(
  63.         pStrUserAgentSignature = USER_AGENT_STRING_MOZ47
  64.     ):
  65.         dictHeaders = dict()
  66.         dictHeaders["User-Agent"]=pStrUserAgentSignature
  67.         return dictHeaders
  68.     #def buildDictOfHttpHeadersToFakeUserAgent
  69.  
  70.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  71.     @staticmethod
  72.     # https://docs.python.org/3/library/urllib.request.html
  73.     def buildDictOfHttpHeadersToFakeReferer(
  74.         pStrDefaultReferer=REFERER_GOOGLE
  75.     ):
  76.         dictHeaders = dict()
  77.         dictHeaders["Referer"] = pStrDefaultReferer
  78.         return dictHeaders
  79.     # def buildDictOfHttpHeadersToFakeReferer
  80.  
  81.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  82.     @staticmethod
  83.     def combineAllDictionaries(
  84.         *pDictionaries #variable number of not named-param arguments
  85.     ):
  86.         #https://betterprogramming.pub/new-union-operators-to-merge-dictionaries-in-python-3-9-8c7dbbd1080c
  87.         dictRet = dict()
  88.         for d in pDictionaries:
  89.             dictRet.update(d)
  90.  
  91.         return dictRet
  92.     # def combineAllDictionaries
  93.  
  94.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  95.     @staticmethod
  96.     def genericUrlReader (
  97.         pStrUrl:str,
  98.         pUserAgent:str="",
  99.         pReferer:str="",
  100.         pStrEncoding="UTF-8",
  101.         pbSilent:bool = False,
  102.         pBytesInsteadOfStr = False
  103.     ):
  104.         strRead = ""
  105.         strErrorMsg = "No error reading %s"%(pStrUrl)
  106.  
  107.         try:
  108.             sslContext = ssl.create_default_context(
  109.                 cafile=certifi.where()
  110.             )
  111.  
  112.             #oRequest = urlopen(pStrUrl,  cafile=certifi.where()) #DeprecationWarning: cafile, capath and cadefault are deprecated, use a custom context instead.
  113.  
  114.             dictHeadersToFakeUserAgent = dict()
  115.             dictHeadersToFakeReferer = dict()
  116.             if (pUserAgent!=""):
  117.                 dictHeadersToFakeUserAgent = AmUtil.buildDictOfHttpHeadersToFakeUserAgent() #defaults to Firefox v47
  118.  
  119.             if (pReferer!=""):
  120.                 dictHeadersToFakeReferer = AmUtil.buildDictOfHttpHeadersToFakeReferer() #defaults to Google
  121.  
  122.             dictAllHeaders = AmUtil.combineAllDictionaries(
  123.                 dictHeadersToFakeUserAgent,
  124.                 dictHeadersToFakeReferer
  125.             )
  126.  
  127.             #https://docs.python.org/3/library/urllib.request.html
  128.             oRequest = urllib.request.Request(
  129.                 url=pStrUrl,
  130.                 headers=dictAllHeaders,
  131.             )
  132.             reader = urllib.request.urlopen(
  133.                 oRequest,
  134.                 context=sslContext
  135.             )
  136.             bytesRead = reader.read()
  137.  
  138.             """
  139.            #this approach does not allow custom headers, necessary to bypass some sites' barriers to consumption
  140.            oRequest = urlopen(
  141.                pStrUrl, #str or urllib.request.Request object
  142.                context=sslContext
  143.            )
  144.            bytesRead = oRequest.read()
  145.            """
  146.  
  147.             if (pBytesInsteadOfStr):
  148.                 return bytesRead
  149.             else:
  150.                 strRead = str(bytesRead, pStrEncoding)
  151.  
  152.         except URLError as urlError:
  153.             iErrorCode = urlError.reason.errno #e.g. 11004
  154.             strError = urlError.reason.strerror #e.g. 'getaddrinfo failed'
  155.  
  156.             strUrlError = str(urlError)
  157.             strErrorMsg = "URL error %s opening URL: %s"%(strUrlError, pStrUrl)
  158.         except HTTPError as httpError:
  159.             strHttpError = str(httpError)
  160.             strErrorMsg = "HTTP error %s @URL: %s"%(strHttpError, pStrUrl)
  161.         finally:
  162.             print (strErrorMsg)
  163.         #try/except/finally
  164.  
  165.         return strRead
  166.     #def genericUrlReader
  167.  
  168.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  169.     @staticmethod
  170.     def findAnchorsAtUrl(pStrUrl):
  171.         listAnchors = []
  172.         strContents = AmUtil.genericUrlReader(pStrUrl)
  173.  
  174.         if (strContents != ""):
  175.             bs = BeautifulSoup(
  176.                 strContents,
  177.                 "html5lib"
  178.             )
  179.             anchors = bs.findAll("a") #<class 'bs4.element.ResultSet'>
  180.             iHowManyAnchors = anchors.__len__()
  181.             for a in anchors:
  182.                 dictAttributesOfElement = a.attrs
  183.                 bContainsHref = "href" in dictAttributesOfElement.keys()
  184.                 if (bContainsHref):
  185.                     href = dictAttributesOfElement["href"]
  186.                     anchor = a.text
  187.                     dictA = {}
  188.                     dictA['href'] = href
  189.                     dictA['anchor'] = anchor
  190.  
  191.                     bNewDict = not (dictA in listAnchors)
  192.  
  193.                     if (bNewDict):
  194.                         listAnchors.append(dictA)
  195.                     #if new dict
  196.                 #if anchor contains href
  197.                 else:
  198.                     print ("Anchor without href: "+str(a))
  199.             # for every anchor
  200.         # if
  201.         return listAnchors
  202.     # def findAnchorsAtUrl
  203.  
  204.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  205.     @staticmethod
  206.     def findAnchorsAtUrlFilterByHrefWith(
  207.         pStrUrl:str,
  208.         pStrFilterInHref:str=""
  209.     ):
  210.         listAnchors = []
  211.         strContents = AmUtil.genericUrlReader(pStrUrl)
  212.  
  213.         if (strContents != ""):
  214.             bs = BeautifulSoup(
  215.                 strContents,
  216.                 "html5lib"
  217.             )
  218.             anchors = bs.findAll("a")  # <class 'bs4.element.ResultSet'>
  219.             iHowManyAnchors = anchors.__len__()
  220.             for a in anchors:
  221.                 dictAttributesOfElement = a.attrs
  222.                 bContainsHref = "href" in dictAttributesOfElement.keys()
  223.  
  224.                 if (bContainsHref):
  225.                     href = dictAttributesOfElement["href"]
  226.                     anchor = a.text
  227.                     dictA = {}
  228.                     dictA['href'] = href
  229.                     dictA['anchor'] = anchor
  230.  
  231.                     bNoFilter = pStrFilterInHref==""
  232.                     iWhereFilterExistsInHref = href.find(pStrFilterInHref)
  233.                     bFilterExistsInHref = iWhereFilterExistsInHref!=-1
  234.                     bHrefSatisfiesFilter = bNoFilter or bFilterExistsInHref
  235.  
  236.                     bNewDict = not (dictA in listAnchors)
  237.  
  238.                     if (bNewDict and bHrefSatisfiesFilter):
  239.                         listAnchors.append(dictA)
  240.                     #if new dict
  241.                 #if anchor contains href
  242.             # for every anchor
  243.         # if
  244.         return listAnchors
  245.     # def findAnchorsAtUrlFilterByHrefWith
  246.  
  247.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  248.     @staticmethod
  249.     def listOfDictsHrefAnchorToString(pListOfDictsHrefAnchor:list):
  250.         strAll = ""
  251.         for el in pListOfDictsHrefAnchor:
  252.             bCheckHref = "href" in el.keys()
  253.             bCheckAnchor = "anchor" in el.keys()
  254.             bCheck = bCheckHref and bCheckAnchor
  255.             if (bCheck):
  256.                 strEl = "href: %s\nanchor: %s"%(el['href'], el['anchor'])
  257.                 strAll+=strEl+"\n"
  258.             #if
  259.         #for
  260.         return strAll
  261.     # def listOfDictsHrefAnchorToString
  262.  
  263.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  264.     @staticmethod
  265.     def listOfDictsHrefAnchorToFile(
  266.         pListOfDictsHrefAnchor: list,
  267.         pStrFileName:str=""
  268.     )->str:
  269.         bAutomaticFileName:bool = pStrFileName==""
  270.         if (bAutomaticFileName):
  271.             strNow = AmUtil.utilNowYMDHMS()
  272.             strFileName:str = "%s_hrefs_anchors.TXT"%(strNow)
  273.         else:
  274.             strFileName = pStrFileName
  275.         #if-else
  276.  
  277.         fw = open(
  278.             file = strFileName,
  279.             mode = "wt",
  280.             encoding = "UTF-8"
  281.         )
  282.         strAll = AmUtil.listOfDictsHrefAnchorToString(pListOfDictsHrefAnchor)
  283.         strAll = str(
  284.             strAll
  285.         )
  286.         fw.write(
  287.             strAll
  288.         )
  289.         fw.close()
  290.  
  291.         return strFileName
  292.     #def listOfDictsHrefAnchorToFile
  293.  
  294.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  295.     @staticmethod
  296.     def genericDataStructureToJsonFile(
  297.             pData,
  298.             pStrFileName: str = ""
  299.     ) -> str:
  300.         bAutomaticFileName: bool = pStrFileName == ""
  301.         if (bAutomaticFileName):
  302.             strNow = AmUtil.utilNowYMDHMS()
  303.             strFileName: str = "%s_data.JSON" % (strNow)
  304.         else:
  305.             strFileName = pStrFileName
  306.         # if-else
  307.  
  308.         fw = open(
  309.             file=strFileName,
  310.             mode="wt",
  311.             encoding="UTF-8"
  312.         )
  313.         strAll = json.dumps(pData)
  314.         fw.write(
  315.             strAll
  316.         )
  317.         fw.close()
  318.  
  319.         return strFileName
  320.     # def genericDataStructureToJsonFile
  321.  
  322.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  323.     #from datetime import today
  324.     @staticmethod
  325.     def getDateCorrespondingToToday():
  326.         today = date.today() #<class 'datime.date'>
  327.         y = today.year
  328.         m = today.month
  329.         d = today.day
  330.         #return (y,m,d)
  331.         return today
  332.     #def getDateCorrespondingToToday
  333.  
  334.     # _.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-._.-^-.
  335.     #import datetime
  336.     @staticmethod
  337.     def getDateCorrespondingToYMD (pY, pM, pD, pHH=0, pMM=0, pSS=0):
  338.         oYMDHMS = datetime.datetime(pY, pM, pD, pHH, pMM, pSS) #<class 'datetime.datetime'>
  339.         oDate = oYMDHMS.date() #<class 'datime.date'>
  340.         return oDate
  341.     #def getDateCorrespondingToYMD
  342.  
  343.     # _.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.
  344.     @staticmethod
  345.     def utilNowYMDHMS():
  346.         dateToday = date.today()
  347.         y, m, d = dateToday.year, dateToday.month, dateToday.day
  348.         strYMD = "%d-%d-%d" % (y, m, d)
  349.  
  350.         timeNow = datetime.now()
  351.         hh, mm, ss = timeNow.hour, timeNow.minute, timeNow.second
  352.         strHMS = "%d-%d-%d" % (hh, mm, ss)
  353.  
  354.         strRet = "%s_%s" % (strYMD, strHMS)
  355.  
  356.         return strRet
  357.     # def utilNowYMDHMS
  358.  
  359.     # _.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.
  360.     #untested, unused
  361.     #2022-01-01
  362.     @staticmethod
  363.     def genericDictToTsvLine(
  364.         pDict:dict
  365.     )->str:
  366.         strRet = ""
  367.         theKeys = pDict.keys()
  368.         iHowManyKeys = len(theKeys)
  369.         iCurrentKey = 1
  370.         for k in theKeys:
  371.             bThisIsTheLastKey = iCurrentKey==iHowManyKeys
  372.             v = pDict[v]
  373.             bValueIsDict = type(v)==dict
  374.             if (bValueIsDict):
  375.                 strRet+=AmUtil.genericDictToTsvLine(v)+"\t"
  376.             else:
  377.                 strRet+=str(v)
  378.             #if-else
  379.  
  380.             if(bThisIsTheLastKey):
  381.                 strRet+="\n"
  382.             #if
  383.         #for
  384.     #def genericDictToTsvLine
  385.  
  386.     # _.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.-~^~-._.
  387.     # 2021-12-31
  388.     # created to assist the AmSpotify project
  389.     @staticmethod
  390.     def genericListOfDictsToTsvFile(
  391.         pListOfDicts: list,
  392.         pStrFileName: str
  393.     ):
  394.         strAll = ""
  395.         bCheck = len(pListOfDicts) > 0 and (type(pListOfDicts[0]) == dict)
  396.         if (bCheck):
  397.             keys = pListOfDicts[0].keys()
  398.             strKeys = ""
  399.  
  400.             # build the headers line
  401.             idx = 0
  402.             for k in keys:
  403.                 bLastOne = idx == len(keys) - 1
  404.                 strKeys += k + "\t" if (not bLastOne) else k + "\n"
  405.                 idx += 1
  406.             # for
  407.  
  408.             strAll += strKeys
  409.  
  410.             for el in pListOfDicts:
  411.                 idxCol = 0
  412.                 strLineForEl = ""
  413.                 for key in keys:
  414.                     v = el[key]
  415.                     # TODO: if v is a dict itself
  416.                     bLastOne = idxCol == len(keys) - 1
  417.                     strLineForEl += (str(v) + "\t") if (not bLastOne) else (str(v) + "\n")
  418.                     idxCol += 1
  419.                 # for every key for the current element
  420.  
  421.                 strAll += strLineForEl
  422.             # for every element in the list
  423.         # if check
  424.         # return strAll
  425.  
  426.         try:
  427.             fw = open(
  428.                 file=pStrFileName,
  429.                 mode="wt",
  430.                 encoding="UTF-8"
  431.             )
  432.             fw.write(strAll)
  433.             fw.close()
  434.         except Exception as e:
  435.             print(str(e))
  436.             return False
  437.         # try-except
  438.  
  439.         return len(strAll)  # returns the number of bytes written
  440.     # def genericListOfDictsToTsvFile
  441. #def classAmUtil
  442.  
  443. *****
  444.  
  445. #Am4Chan.py
  446. from amutil_tools import AmUtil
  447.  
  448. import re #regular expressions
  449. from bs4 import BeautifulSoup
  450.  
  451. class Am4Chan:
  452.     PATTERN_FOR_BOARD_DISCOVERY = "^//boards\.4channel\.org/[a-zA-Z0-9_]+/$"
  453.     PATTERN_FOR_JPGS = ".*\.jpg$|.*\.jpeg$"
  454.     PATTERN_FOR_PNGS = ".*\.png$"
  455.  
  456.     def __init__(
  457.         self,
  458.         pBoardName:str
  459.     ):
  460.         self.mBoardName = pBoardName
  461.  
  462.         self.mAllResources = self.discoverAllResources()
  463.         #print(self.mAllResources)
  464.  
  465.         self.mBoards = self.discoverBoards()
  466.         #print(self.mBoards)
  467.  
  468.         self.mJpgs = self.discoverJpgs()
  469.         #print(self.mJpgs)
  470.  
  471.         self.mPngs = self.discoverPngs()
  472.         #print(self.mPngs)
  473.     #def __init__
  474.  
  475.     #dunder / double underscore
  476.     def __str__(self):
  477.         strAll = "board name: %s"%(self.mBoardName)
  478.         return strAll
  479.     #def __str__
  480.  
  481.     def buildUrlForPageNumber(self, piPageNumber:int)->str:
  482.         strUrl = "https://boards.4chan.org/%s/"%(self.mBoardName)
  483.         if(piPageNumber==1):
  484.             return strUrl
  485.         else:
  486.             #strUrl = strUrl+str(piPageNumber)
  487.             strUrl = "%s%d"%(strUrl, piPageNumber)
  488.             return strUrl
  489.         #if-else
  490.     #def buildUrlForPageNumber
  491.  
  492.     def discoverAllResources(self):
  493.         listResources = list()
  494.         strUrl = self.buildUrlForPageNumber(1)
  495.         strSourceCode =\
  496.             AmUtil.genericUrlReader(
  497.                 pStrUrl=strUrl,
  498.                 pUserAgent=AmUtil.USER_AGENT_STRING_MOZ47,
  499.                 pReferer="https://boards.4chan.org/",
  500.                 pBytesInsteadOfStr=False
  501.             )
  502.         #print(strSourceCode)
  503.         bs = BeautifulSoup(
  504.             markup=strSourceCode,
  505.             features="html5lib"
  506.         )
  507.         anchors = bs.findAll("a") #each object is of "Tag" (BeautifulSoup), non-hashable
  508.         for a in anchors:
  509.             bHrefExists = "href" in a.attrs.keys()
  510.             if (bHrefExists):
  511.                 href = a.attrs['href']
  512.                 anchor = a.text
  513.                 myDict = dict()
  514.                 myDict['href'] = href
  515.                 myDict['anchor'] = anchor
  516.                 listResources.append(myDict)
  517.             #if
  518.         #for
  519.  
  520.         #self.mAllResources = listResources #this being is done at __init__
  521.         return listResources
  522.     #def discoverAllResources
  523.  
  524.     def filterAllResourcesByRegexp(
  525.         self,
  526.         pStrRegExp:str #this is the regular expression that will be used to filter results
  527.     ):
  528.         listFiltered = list()
  529.         for dictResource in self.mAllResources:
  530.             href = dictResource['href']
  531.  
  532.             reTester = re.compile(
  533.                 pattern=pStrRegExp,
  534.                 flags=re.IGNORECASE
  535.             )
  536.             matchResult = reTester.match(href)
  537.             bNoMatch = matchResult==None
  538.             bMatch = not bNoMatch
  539.             if (bMatch):
  540.                 listFiltered.append(dictResource)
  541.             #if the resource's href matches the pattern
  542.         #for every resource
  543.         return listFiltered
  544.     #def filterAllResourcesByRegexp
  545.  
  546.     def discoverBoards(self):
  547.         listBoards = list()
  548.         #incomplete URLs, may have repetitions
  549.         foundBoards = self.filterAllResourcesByRegexp(Am4Chan.PATTERN_FOR_BOARD_DISCOVERY)
  550.         for dictBoard in foundBoards:
  551.             href = dictBoard['href']
  552.             listBoards.append("https:"+href)
  553.         #for
  554.         #list for absolute URLs for other communities
  555.  
  556.         #now, elimininate repititions, if there are any
  557.         setNoRepetitions = set(listBoards) #set elements are non-hashable
  558.         listNoRepetitions = list(setNoRepetitions)
  559.         return listNoRepetitions
  560.     #def discoverBoards
  561.  
  562.     #2022-01-05
  563.     def discoverJpgs(self):
  564.         listJpgs = list()
  565.         listOfDictsOfFoundJpgs = self.filterAllResourcesByRegexp(Am4Chan.PATTERN_FOR_JPGS)
  566.         for dictJpg in listOfDictsOfFoundJpgs:
  567.             href=dictJpg['href']
  568.             listJpgs.append("https:"+href)
  569.         #for
  570.  
  571.         setOfJpgsWithNoRepetitions = set(listJpgs)
  572.         listOfJpgsWithNoRepetitions = list(setOfJpgsWithNoRepetitions)
  573.  
  574.         #return listJpgs
  575.         return listOfJpgsWithNoRepetitions
  576.     #def discoverJpgs
  577.  
  578.     def discoverPngs(self):
  579.         listPngs = list()
  580.         listOfDictsOfFoundPngs = self.filterAllResourcesByRegexp(Am4Chan.PATTERN_FOR_PNGS)
  581.         for dictPng in listOfDictsOfFoundPngs:
  582.             href=dictPng['href']
  583.             listPngs.append("https:"+href)
  584.         #for
  585.  
  586.         setOfPngsWithNoRepetitions = set(listPngs)
  587.         listOfPngsWithNoRepetitions = list(setOfPngsWithNoRepetitions)
  588.  
  589.         #return listPngs
  590.         return listOfPngsWithNoRepetitions
  591.     #def discoverPngs
  592.  
  593.     def downloadAllImages(self):
  594.         listOfAllImages = self.mPngs + self.mJpgs #["https://i.4cdn.org/wg/i1.png", "...2.jpg" ...]
  595.         for strHref in listOfAllImages:
  596.             theBytes =\
  597.                 AmUtil.genericUrlReader(
  598.                     pStrUrl=strHref,
  599.                     pUserAgent=AmUtil.USER_AGENT_STRING_MOZ47,
  600.                     pReferer="",
  601.                     pBytesInsteadOfStr=True
  602.                 )
  603.  
  604.             #find = leftmost occurence of symbol in string
  605.             # rfind = rightmost occurence of symbol in string
  606.             #"Artur".find("r") #1
  607.             #"Artur".rfind("r")  # 4
  608.  
  609.             #string slicing
  610.             strNameForImageFile = strHref [
  611.               strHref.rfind("/")+1 #start pos
  612.               : #slice operator
  613.               #len(strHref) #final pos
  614.             ]
  615.             fw = open(
  616.                 strNameForImageFile,
  617.                 mode="wb" #write bytes
  618.             )
  619.             fw.write(theBytes)
  620.             fw.close()
  621.         #for
  622. #class Am4Chan
  623.  
  624. wg = Am4Chan("wg") #instantiate the consumer
  625. print (wg)
  626. wg.downloadAllImages()
  627. #wg.discoverAllResources()
  628. #wg.discoverBoards() #discover other communities
  629. #wg.discoverMedia() #media is plural of medium (jpgs, pngs, ...)
Add Comment
Please, Sign In to add comment