Untitled

#!/usr/bin/python3.5
# -*- coding: utf-8 -*-

# Creer un flux rss .xml afichant les derniers films poster sur cpasbien
# Peut etre mis a jour par cron
# Dependance python3 :

# python3-pyrss2gen , pthon3-wget , python3-bs4 , python3-requests

from bs4 import BeautifulSoup
import os,glob,datetime,PyRSS2Gen,requests,wget

"""" class """
# permet l intégration de code HTML dans les items du RSS http://stackoverflow.com/questions/5371704/python-generated-rss-outputting-raw-html/7912205#7912205
class NoOutput:
    def __init__(self):
        pass
    def publish(self, handler):
        pass

class MediaRSS2(PyRSS2Gen.RSSItem):
    def __init__(self, **kwargs):
        PyRSS2Gen.RSSItem.__init__(self, **kwargs)

    def publish(self, handler):
        self.do_not_autooutput_description = self.description
        self.description = NoOutput() # This disables the Py2GenRSS "Automatic" output of the description, which would be escaped.
        PyRSS2Gen.RSSItem.publish(self, handler)

    def publish_extensions(self, handler):
        handler._write('<%s><![CDATA[%s]]></%s>' % ("description", self.do_not_autooutput_description, "description"))

""" rapatriement des articles """

def download():

    # récupération code HTML pour cpasbien
    resp = requests.get("http://www.cpasbien.cm/view_cat.php?categorie=films")
    encoding = resp.encoding if 'charset' in resp.headers.get('content-type', '').lower() else None
    soup = BeautifulSoup(resp.content, 'lxml', from_encoding=encoding)

    # extraction lignes utile (1er filtre)
    liens_brut = soup.findAll(True, {"class":["ligne0", "ligne1"]})

    # extraction liens (2ieme filtre) et download des pages
    count = 0
    for x in liens_brut:
        soup = BeautifulSoup(str(x), 'lxml')
        wget.download(soup.a.get('href'),out="page"+str(count))
        count += 1;

""" creation rss """

def makerss():

    # generation items_list
    items_list = []
    for y in range(0, 29):
        soup = BeautifulSoup(open("page"+str(y)), 'lxml')

        items_list.append(
            MediaRSS2(
                title = str(soup.h2.a.contents),
                description = str(soup.find(id="textefiche"))+str(soup.find(id="bigcover")),
                link = str(soup.h2.a.get('href')),
            ),
        )

    # generation du rss
    rss = PyRSS2Gen.RSS2(
        title = "Cpasbien",
        link = "http://www.cpasbien.cm",
        description = "Sorties films cpasbien",
        lastBuildDate = datetime.datetime.now(),
        items =
            items_list
    )

    # ecriture du xml
    rss.rss_attrs["xmlns:media"] = "http://search.yahoo.com/mrss/"
    rss.write_xml(open("Cpasbien.xml", "w"), "utf-8");

download()


makerss()

""" netoyage fichier temporaire """
for filename in glob.glob('page*') :
    os.remove( filename )