View difference between Paste ID: WNHYzEKM and mgwGFxv3
SHOW: | | - or go back to the newest paste.
1-
#Que os follen a todos corruptos hijos de la gran puta
1+
#Que os follen a todos hijos de la gran puta
2
3
from urllib.request import urlopen
4
from urllib.request import HTTPError
5
6
print("START!")
7
8
robots_file = open("robots.txt",'r')
9
10
files_missing = []
11
files_total = 0
12
13
#For every line in robot
14
for line in robots_file:
15
16
    #If it is a PDF file
17
    if ".pdf" in line:
18
19
        #Get the URL and name
20
        parts = line.split()
21
        url = 'http://boe.es'+parts[1]
22
        name_tree = parts[1].split('.pdf')[0].split('/')
23
        name = name_tree[-1]
24
25
        #If is a SUM.pdf file, add also the date
26
        if name=="SUM":
27
            name = name + "_" + name_tree[3] + "_" + name_tree[4]+ "_" + name_tree[5]
28
        
29
        #Show what are you downloading
30
        print("downloading "+name+".pdf ... ")
31
32
        #Download it
33
        try:
34
            f = urlopen(url)    
35
            data = f.read()
36
            with open("pdfs/"+name+".pdf", "wb") as code:
37
                code.write(data)
38
        except HTTPError:
39
            print("Can't download"+name+".pdf \n")
40
            files_missing.append(name+".pdf")
41
42
        except URLError:
43
            print("Can't download"+name+".pdf \n")
44
            print("Bad url "+url+" ¿? \n")
45
            files_missing.append(name+".pdf")
46
            
47
        else:
48
            print("done! \n")
49
            files_total = files_total + 1
50
        
51
52
print("Total Download = "+files_total)
53
print("Files not found = "+files_missing.size())
54
print("Files missing are...")
55
56
for element in files_missing:
57
58
    print(element)
59
60
robots_file.close()