Advertisement
Guest User

without login

a guest
Sep 22nd, 2023
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.14 KB | None | 0 0
  1. from bs4 import BeautifulSoup
  2. import requests
  3. from PIL import Image, ImageDraw, ImageFont
  4. from io import BytesIO
  5. from urllib.parse import urljoin
  6.  
  7. BEGIN_PAG = 1
  8. END_PAG = 10
  9.  
  10. def resize_image(img, max_height=224):
  11. width, height = img.size
  12. if height > max_height:
  13. new_height = max_height
  14. aspect_ratio = width / height
  15. new_width = int(new_height * aspect_ratio)
  16. img = img.resize((new_width, new_height), Image.ANTIALIAS)
  17. return img
  18.  
  19.  
  20. def parse_series_page(url):
  21. response = requests.get(url)
  22. series_soup = BeautifulSoup(response.text, 'html.parser')
  23.  
  24. for type_tag in series_soup.find_all('div', {'class': 'sContent'}):
  25. type_text = type_tag.get_text().strip()
  26. if type_text == "Manhwa" or type_text == "Manhua" or type_text == "Doujinshi":
  27. return None # Return None to indicate this should be skipped
  28.  
  29. # Check for "Completely Scanlated? No"
  30. for cat_div, content_div in zip(series_soup.find_all('div', {'class': 'sCat'}),
  31. series_soup.find_all('div', {'class': 'sContent'})):
  32. cat_text = cat_div.get_text().strip()
  33. content_text = content_div.get_text().strip()
  34. if "Completely Scanlated?" in cat_text and content_text == "No":
  35. return None # Return None to indicate this should be skipped
  36.  
  37. img_tags = series_soup.find_all('img', {'class': 'img-fluid'})
  38. if len(img_tags) >= 3:
  39. img_tag = img_tags[2]
  40. img_url = img_tag['src']
  41.  
  42. if not img_url.startswith(('http:', 'https:')):
  43. img_url = urljoin(url, img_url)
  44.  
  45. try:
  46. img_response = requests.get(img_url)
  47. img = Image.open(BytesIO(img_response.content))
  48. return img
  49. except requests.exceptions.MissingSchema:
  50. print("Invalid URL for image:", img_url)
  51. return None
  52. return None
  53.  
  54.  
  55. def break_text(text, max_length=19):
  56. if len(text) <= max_length:
  57. return [text]
  58.  
  59. lines = []
  60. current_line = ""
  61.  
  62. for word in text.split(" "):
  63. if len(current_line) + len(word) + 1 > max_length:
  64. lines.append(current_line)
  65. current_line = ""
  66.  
  67. current_line += (word + " ")
  68.  
  69. lines.append(current_line)
  70. return lines
  71.  
  72.  
  73. def truncate_text(text, max_length=50):
  74. if len(text) > max_length:
  75. return text[:45] + "..."
  76. return text
  77.  
  78.  
  79. def create_montage(images, titles, first_date, last_date, images_per_row=10):
  80. images = [img for img in images if img is not None]
  81.  
  82. if len(images) == 0:
  83. print("No images to create a montage.")
  84. return
  85.  
  86. img_width, img_height = images[0].size
  87. text_height = 60
  88. title_height = 40 # Height for the title text
  89. new_img_height = img_height + text_height
  90.  
  91. num_rows = (len(images) - 1) // images_per_row + 1
  92. montage_width = img_width * min(images_per_row, len(images))
  93. montage_height = new_img_height * num_rows + title_height # Adding height for title
  94.  
  95. montage = Image.new(mode="RGB", size=(montage_width, montage_height), color=(255, 255, 255))
  96. draw = ImageDraw.Draw(montage)
  97.  
  98. try:
  99. font = ImageFont.truetype("arial.ttf", 16)
  100. title_font = ImageFont.truetype("arial.ttf", 24) # Font for title
  101. except IOError:
  102. print("Arial font not found, using default.")
  103. font = ImageFont.load_default()
  104. title_font = ImageFont.load_default()
  105.  
  106. # Draw title
  107. title_text = f"Series completed from {first_date} to {last_date}"
  108. title_width, title_height_actual = draw.textsize(title_text, font=title_font)
  109. title_position = ((montage_width - title_width) // 2, 10) # X-center the text
  110. draw.text(title_position, title_text, font=title_font, fill=(0, 0, 0))
  111.  
  112. for i, (img, title) in enumerate(zip(images, titles)):
  113. row = i // images_per_row
  114. col = i % images_per_row
  115. x_offset = col * img_width
  116. y_offset = row * new_img_height + title_height # Y-offset adjusted for title height
  117.  
  118. montage.paste(img, (x_offset, y_offset))
  119.  
  120. truncated_title = truncate_text(title)
  121. lines = break_text(truncated_title)
  122. for j, line in enumerate(lines):
  123. draw.text((x_offset, y_offset + img_height + j * 20), line.strip(), font=font, fill=(0, 0, 0))
  124.  
  125. # montage.show()
  126. montage.save("montage.png")
  127.  
  128. first_date = None
  129. last_date = None
  130.  
  131. ended_series = []
  132. date_elements = []
  133.  
  134. for page_num in range(BEGIN_PAG, END_PAG+1):
  135. url = f"https://www.mangaupdates.com/releases.html?page={page_num}"
  136. print("url= ", url)
  137. response = requests.get(url)
  138. soup = BeautifulSoup(response.text, 'html.parser')
  139.  
  140. for p_tag in soup.find_all('p', {'class': 'd-inline titlesmall'}):
  141. date_text = p_tag.get_text().strip()
  142. date_elements.append(date_text)
  143.  
  144. for div in soup.find_all('div', {'class': 'col-2 pl-1 pbreak'}):
  145. text = div.get_text().strip()
  146. if "(end)" in text:
  147. prev_div = div.find_previous_sibling('div', {'class': 'col-6 pbreak'})
  148.  
  149. if prev_div is not None:
  150. link_tag = prev_div.find('a')
  151. if link_tag is not None:
  152. series_name = link_tag.get_text()
  153. series_link = link_tag['href']
  154.  
  155. series_image = parse_series_page(series_link)
  156.  
  157. # print("series_name ", series_name)
  158. # print("series_link ", series_link)
  159. # print("series_image ", series_image)
  160.  
  161. if series_image is not None:
  162. ended_series.append({
  163. 'name': series_name,
  164. 'link': series_link,
  165. 'image': series_image
  166. })
  167.  
  168. if len(date_elements) > 0:
  169. first_date = date_elements[0]
  170. last_date = date_elements[-1]
  171.  
  172. images = [series.get('image', None) for series in ended_series]
  173. titles = [series.get('name', '') for series in ended_series]
  174. resized_images = [resize_image(img) for img in images]
  175.  
  176. create_montage(resized_images, titles, first_date, last_date)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement