Advertisement
Guest User

with login

a guest
Sep 22nd, 2023
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.68 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. from PIL import Image, ImageDraw, ImageFont
  4. from io import BytesIO
  5. from urllib.parse import urljoin
  6.  
  7. # You can edit these
  8. # BEGIN_PAG and END_PAG to change where program begins and ends (https://www.mangaupdates.com/releases.html)
  9. # username and password are needed to show eromanga related covers because MU doesn't display them if you're not logged in, if you don't want to login use the other .py file
  10. BEGIN_PAG = 1
  11. END_PAG = 15
  12. username = 'usr'
  13. password = 'pwd'
  14.  
  15. session = requests.Session()
  16. login_url = 'https://www.mangaupdates.com/login.html'
  17. payload = {'username': username, 'password': password, 'act': 'login'}
  18. response = session.post(login_url, data=payload)
  19.  
  20. def resize_image(img, max_height=224):
  21. width, height = img.size
  22. if height > max_height:
  23. new_height = max_height
  24. aspect_ratio = width / height
  25. new_width = int(new_height * aspect_ratio)
  26. img = img.resize((new_width, new_height), Image.ANTIALIAS)
  27. return img
  28.  
  29.  
  30. def parse_series_page(url):
  31. response = session.get(url)
  32. series_soup = BeautifulSoup(response.text, 'html.parser')
  33.  
  34. for type_tag in series_soup.find_all('div', {'class': 'sContent'}):
  35. type_text = type_tag.get_text().strip()
  36. if type_text == "Manhwa" or type_text == "Manhua" or type_text == "Doujinshi":
  37. return None # Return None to indicate this should be skipped
  38.  
  39. # Check for "Completely Scanlated? No"
  40. for cat_div, content_div in zip(series_soup.find_all('div', {'class': 'sCat'}),
  41. series_soup.find_all('div', {'class': 'sContent'})):
  42. cat_text = cat_div.get_text().strip()
  43. content_text = content_div.get_text().strip()
  44. if "Completely Scanlated?" in cat_text and content_text == "No":
  45. return None # Return None to indicate this should be skipped
  46.  
  47.  
  48. img_tags = series_soup.find_all('img', {'class': 'img-fluid'})
  49. if len(img_tags) >= 4:
  50. img_tag = img_tags[3]
  51. img_url = img_tag['src']
  52.  
  53. if not img_url.startswith(('http:', 'https:')):
  54. img_url = urljoin(url, img_url)
  55.  
  56. try:
  57. img_response = requests.get(img_url)
  58. img = Image.open(BytesIO(img_response.content))
  59. return img
  60. except requests.exceptions.MissingSchema:
  61. print("Invalid URL for image:", img_url)
  62. return None
  63. return None
  64.  
  65.  
  66. def break_text(text, max_length=19):
  67. if len(text) <= max_length:
  68. return [text]
  69.  
  70. lines = []
  71. current_line = ""
  72.  
  73. for word in text.split(" "):
  74. if len(current_line) + len(word) + 1 > max_length:
  75. lines.append(current_line)
  76. current_line = ""
  77.  
  78. current_line += (word + " ")
  79.  
  80. lines.append(current_line)
  81. return lines
  82.  
  83.  
  84. def truncate_text(text, max_length=50):
  85. if len(text) > max_length:
  86. return text[:45] + "..."
  87. return text
  88.  
  89.  
  90. def create_montage(images, titles, first_date, last_date, images_per_row=10):
  91. images = [img for img in images if img is not None]
  92.  
  93. if len(images) == 0:
  94. print("No images to create a montage.")
  95. return
  96.  
  97. img_width, img_height = images[0].size
  98. text_height = 60
  99. title_height = 40 # Height for the title text
  100. new_img_height = img_height + text_height
  101.  
  102. num_rows = (len(images) - 1) // images_per_row + 1
  103. montage_width = img_width * min(images_per_row, len(images))
  104. montage_height = new_img_height * num_rows + title_height # Adding height for title
  105.  
  106. montage = Image.new(mode="RGB", size=(montage_width, montage_height), color=(255, 255, 255))
  107. draw = ImageDraw.Draw(montage)
  108.  
  109. try:
  110. font = ImageFont.truetype("arial.ttf", 16)
  111. title_font = ImageFont.truetype("arial.ttf", 24) # Font for title
  112. except IOError:
  113. print("Arial font not found, using default.")
  114. font = ImageFont.load_default()
  115. title_font = ImageFont.load_default()
  116.  
  117. # Draw title
  118. title_text = f"Series completed from {first_date} to {last_date}"
  119. title_width, title_height_actual = draw.textsize(title_text, font=title_font)
  120. title_position = ((montage_width - title_width) // 2, 10) # X-center the text
  121. draw.text(title_position, title_text, font=title_font, fill=(0, 0, 0))
  122.  
  123. for i, (img, title) in enumerate(zip(images, titles)):
  124. row = i // images_per_row
  125. col = i % images_per_row
  126. x_offset = col * img_width
  127. y_offset = row * new_img_height + title_height # Y-offset adjusted for title height
  128.  
  129. montage.paste(img, (x_offset, y_offset))
  130.  
  131. truncated_title = truncate_text(title)
  132. lines = break_text(truncated_title)
  133. for j, line in enumerate(lines):
  134. draw.text((x_offset, y_offset + img_height + j * 20), line.strip(), font=font, fill=(0, 0, 0))
  135.  
  136. # montage.show()
  137. montage.save("montage.png")
  138.  
  139. first_date = None
  140. last_date = None
  141.  
  142. ended_series = []
  143. date_elements = []
  144.  
  145. for page_num in range(BEGIN_PAG, END_PAG+1):
  146. url = f"https://www.mangaupdates.com/releases.html?page={page_num}"
  147. print("url= ", url)
  148. response = session.get(url)
  149. soup = BeautifulSoup(response.text, 'html.parser')
  150.  
  151. for p_tag in soup.find_all('p', {'class': 'd-inline titlesmall'}):
  152. date_text = p_tag.get_text().strip()
  153. date_elements.append(date_text)
  154.  
  155. for div in soup.find_all('div', {'class': 'col-2 pl-1 pbreak'}):
  156. text = div.get_text().strip()
  157. if "(end)" in text:
  158. prev_div = div.find_previous_sibling('div', {'class': 'col-6 pbreak'})
  159.  
  160. if prev_div is not None:
  161. link_tag = prev_div.find('a')
  162. if link_tag is not None:
  163. series_name = link_tag.get_text()
  164. series_link = link_tag['href']
  165.  
  166. series_image = parse_series_page(series_link)
  167.  
  168. # print("series_name ", series_name)
  169. # print("series_link ", series_link)
  170. # print("series_image ", series_image)
  171.  
  172. if series_image is not None:
  173. ended_series.append({
  174. 'name': series_name,
  175. 'link': series_link,
  176. 'image': series_image
  177. })
  178.  
  179. if len(date_elements) > 0:
  180. first_date = date_elements[0]
  181. last_date = date_elements[-1]
  182.  
  183. images = [series.get('image', None) for series in ended_series]
  184. titles = [series.get('name', '') for series in ended_series]
  185. resized_images = [resize_image(img) for img in images]
  186.  
  187. create_montage(resized_images, titles, first_date, last_date)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement