FP 2021-12-21

#artetv_v1.py
from amutil import AmUtil #mainly for genericUrlReader

from enum import Enum
import json

from artetvvideo import ArteTvVideo

class Languages(Enum):
    English = "en"
    Portuguese = "pt"
    German = "de"
    French = "fr"
    Spanish = "es"
#class Languages

class ArteTvConsumer:

    BASE_LAST_CHANCE_URL = "https://www.arte.tv/%s/videos/last-chance/"

    LAST_CHANCE_JSON_SCRIPT_START = "<script id=\"__NEXT_DATA__\" type=\"application/json\">"
    LAST_CHANCE_JSON_SCRIPT_END = "</script>"

    def __init__(
        self,
        pLanguage = Languages.English
    ):
        ArteTvConsumer.BASE_LAST_CHANCE_URL = ArteTvConsumer.BASE_LAST_CHANCE_URL % (pLanguage.value)
    #def __init__

    def __str__(self):
        strAll = "BASE_URL= {}".format(ArteTvConsumer.BASE_LAST_CHANCE_URL)
        return strAll
    #def __str__

    #classmethod
    @staticmethod
    def getDictForLastChanceVideos(
        pUrl:str = BASE_LAST_CHANCE_URL,
        pOnlyVideos = True,
        pDebug = False
    ):
        strHTML = AmUtil.genericUrlReader(pUrl)
        bSuccessOnAccess = strHTML!=""
        if (bSuccessOnAccess):
            iStartRelevantPortion = strHTML.find(ArteTvConsumer.LAST_CHANCE_JSON_SCRIPT_START)
            bStartExists = iStartRelevantPortion!=-1
            if (bStartExists):
                strRelevant = strHTML[
                    iStartRelevantPortion + len(ArteTvConsumer.LAST_CHANCE_JSON_SCRIPT_START)
                    :
                ]

                #"banana".find("batata") #-1
                iEndExists = strRelevant.find(ArteTvConsumer.LAST_CHANCE_JSON_SCRIPT_END)
                bEndExists = iEndExists!=-1
                if (bEndExists):
                    strRelevant = strRelevant[
                        0
                        :
                        iEndExists
                    ]
                    #strRelevant is a string, in the JSON notation, that corresponds to a dict describing videos, about to disappear from the site
                    dictFullVideosLastChance = json.loads(strRelevant)
                    dictVideosOnly = dictFullVideosLastChance["props"]["pageProps"]["initialPage"]["zones"][0]["data"]
                    if (pOnlyVideos):
                        if (pDebug):
                            strVideosOnly = json.dumps(dictVideosOnly)
                            fw = open(
                                file="example_videos_only.JSON",
                                mode="wt",
                                encoding="UTF-8"
                            )
                            fw.write(strVideosOnly)
                            fw.close()
                        return dictVideosOnly
                    else:
                        if (pDebug):
                            fw = open(
                                file="example_full.JSON",
                                mode="wt",
                                encoding="UTF-8"
                            )
                            fw.write(strRelevant)
                            fw.close()
                        dictFullVideosLastChance
                #if there is an ending
            #if there is a proper start
        #if there was HTML access
    #def getDictForLastChanceVideos

    def getLastChanceVideosNPagesDeep(
        self,
        pN = 2,
        pDebug = False
    )->list:
        #listVideos = list()
        listVideos = []

        bCompleted = False
        iPage = 1
        url = ArteTvConsumer.BASE_LAST_CHANCE_URL+"?page=%d"%(iPage) #"https://www.arte.tv/en/videos/last-chance/?page=1" .. "https://www.arte.tv/en/videos/last-chance/?page=10"
        while (not bCompleted):

            dictVideosInCurrentPage = ArteTvConsumer.getDictForLastChanceVideos(url)

            for video in dictVideosInCurrentPage:
                listVideos.append(video)

            #TODO listVideos must grow, must incorporate the found videos

            iPage+=1
            url = ArteTvConsumer.BASE_LAST_CHANCE_URL + "?page=%d" % (iPage)
            bCompleted = iPage>pN
        #while

        if (pDebug):
            fw = open (
                file="list_of_all_videos_found.JSON",
                mode="wt",
                encoding="UTF-8"
            )
            #fw.write(json.dumps(listVideos))
            strJson = json.dumps(listVideos)
            fw.write(strJson)
            fw.close()
        #if

        return listVideos
    #def getLastChanceVideosNPagesDeep

    """
    single table
    tVideos
    video (programId\turl, title, duration, ageRating)
    programId\turl\ttitle\tduration\tageRating\n
    ...
    record2\n
    record3\n
    """
    @staticmethod
    def addVideosToTSVDatabase(
        pListVideos:list,
        pDatabaseName = "ARTE_TV_DB.TSV"
    ):
        fw = open(
            file=pDatabaseName,
            #mode="wt" #destructive write/replace text
            mode="at", #non-destructive append text
            encoding="UTF-8"
        )
        for video in pListVideos:
            programId = video["programId"]
            url = video["url"]
            title = video["title"]
            duration = video["duration"]
            ageRating = video["ageRating"]

            #bRecordAlreadyExists = findVideoInTSVDatabase(pDatabaseName, programId)
            bRecordAlreadyExists = False

            if (not bRecordAlreadyExists):
                strRecordTSV = "%s\t%s\t%s\t%d\t%d\n"%(
                    programId,
                    url,
                    title,
                    duration,
                    ageRating
                )
                fw.write(strRecordTSV)
            #if
        #for all videos
        fw.close()
        return pDatabaseName
    #def addVideosToTSVDatabase

    @staticmethod
    def findLongestVideo(
        pListOfVideos:list
    ):
        iLongestDuration = None
        idxLongestVideo = None

        #idx 0 .. 39
        for idx in range(len(pListOfVideos)):
            video = pListOfVideos[idx]
            duration = video["duration"]
            if (iLongestDuration==None or duration>iLongestDuration):
                iLongestDuration = duration
                idxLongestVideo = idx
            #if
        #for

        oLongestVideo = pListOfVideos[idxLongestVideo]
        return oLongestVideo
    #def findLongestVideo

    @staticmethod
    def readVideosFromTsvFile(pTsvFile:str)->list:
        listRet = []
        try:
            fr = open(
                file=pTsvFile,
                encoding="UTF-8",
                mode="rt",
            )
            strAll = fr.read()
            fr.close()

            if (strAll!=""):
                aRecords = strAll.split("\n")
                for record in aRecords:
                    aFields = record.split("\t")
                    bValidRecord:bool = len(aFields)==5
                    if (bValidRecord):
                        dictVideo = dict()
                        dictVideo["programId"] = aFields[0]
                        dictVideo["url"] = aFields[1]
                        dictVideo["title"] = aFields[2]
                        #dictVideo["duration"] = aFields[3] #duration will be str
                        #dictVideo["ageRating"] = aFields[4] #ageRating will be str
                        dictVideo["duration"] = int(aFields[3]) #duration will be int
                        dictVideo["ageRating"] = int(aFields[4]) #ageRating will be int
                        listRet.append(dictVideo)
                    #if valid record
                #for all records
            #if there was content
        #try
        except:
            print("Could not read file ", pTsvFile)
            return []
        #try-except

        return listRet
    #def readVideosFromTsvFile

    @staticmethod
    def readArteTvVideosFromTsvFile(pTsvFile: str) -> list:
        listRet = []
        try:
            fr = open(
                file=pTsvFile,
                encoding="UTF-8",
                mode="rt",
            )
            strAll = fr.read()
            fr.close()

            if (strAll != ""):
                aRecords = strAll.split("\n")
                for record in aRecords:
                    aFields = record.split("\t")
                    bValidRecord: bool = len(aFields) == 5
                    if (bValidRecord):
                        oArteTvVideo = ArteTvVideo(
                            aFields[0], #programId
                            aFields[1], #url
                            aFields[2], #title
                            int(aFields[3]), #duration
                            int(aFields[4]) #age rating
                        )

                        listRet.append(oArteTvVideo)
                    # if valid record
                # for all records
            # if there was content
        # try
        except:
            print("Could not read file ", pTsvFile)
            return []
        # try-except

        return listRet
    # def readArteTvVideosFromTsvFile

    @staticmethod
    def sortListOfVideosByDuration(pListOfVideos:list, pReverse=False):
        pListOfVideos.sort(key=byDuration, reverse=pReverse) #key should be the name of a function that extracts the data that will be used as the criterion (critério) / criteria (critérios) for ascending sorting
    #def sortListOfVideosByDuration

    @staticmethod
    def sortListOfVideosByTitle(pListOfVideos:list, pReverse=False):
        pListOfVideos.sort(key=byTitle, reverse=pReverse)
    #def sortListOfVideosByTitle
#class ArteTvConsumer

tv = ArteTvConsumer()
#print (ArteTvConsumer.BASE_URL)
print(tv)

#tv.getDictForLastChanceVideos() #would be possible for classmethod
#dictVideos = ArteTvConsumer.getDictForLastChanceVideos(pDebug=True) #will also write an "example.json" file
#dictVideos = ArteTvConsumer.getDictForLastChanceVideos() #will also write an "example.json" file
#print(dictVideos)
#listAllVideosUpToPageNumber2 = tv.getLastChanceVideosNPagesDeep(pN=2,pDebug=True)
#print(listAllVideosUpToPageNumber2)
#strDB = ArteTvConsumer.addVideosToTSVDatabase(listAllVideosUpToPageNumber2)
#print (strDB)
#oLongestVideo = ArteTvConsumer.findLongestVideo(listAllVideosUpToPageNumber2)
#print (oLongestVideo)

def byDuration(pAnyVideo):
    #return pAnyVideo["duration"]
    #return pAnyVideo.get("duration")
    return int(pAnyVideo.get("duration"))
#def byDuration

def byTitle(pAnyVideo):
    return pAnyVideo.get("title")
#def

"""
listOfVideosPreviouslySaved = ArteTvConsumer.readVideosFromTsvFile("ARTE_TV_DB.TSV")
print ("Not sorted: ")
print (listOfVideosPreviouslySaved)

ArteTvConsumer.sortListOfVideosByDuration(listOfVideosPreviouslySaved, pReverse=True)
print ("Sorted by duration: ")
print (listOfVideosPreviouslySaved)

print ("__"*40)
ArteTvConsumer.sortListOfVideosByTitle(listOfVideosPreviouslySaved)
print ("Sorted by title: ")
print (listOfVideosPreviouslySaved)
"""

listOfArteTvVideos = ArteTvConsumer.readArteTvVideosFromTsvFile("ARTE_TV_DB.TSV")
print(listOfArteTvVideos)


********************

class ArteTvVideo:
    def __init__(self, pProgramId, pUrl, pTitle, pDuration, pAgeRating):
        self.mProgramId = pProgramId
        self.mUrl = pUrl
        self.mTitle = pTitle
        self.mDuration = pDuration
        self.mAgeRating = pAgeRating
    #def __init__

    #for print of a SINGLE video
    def __str__(self):
        strFormat = "program id: %s\nURL: %s\nTitle: %s\nDuration: %d\nAge rating: %d\n%s\n"%(
            self.mProgramId,
            self.mUrl,
            self.mTitle,
            self.mDuration,
            self.mAgeRating,
            "-"*80
        )
        return strFormat
    #def __str__

    # for print of collections of videos
    def __repr__(self):
        return self.__str__()

    def __eq__(self, other):
        bIdenticalByTitle = self.mTitle == other.mTitle
        return bIdenticalByTitle

    def __gt__(self, other):
        return self.mDuration>other.mDuration

    def __lt__(self, v2):
        return self.mDuration<v2.mDuration
#class ArteTvVideo