LordPankake

Comic Downloader

Jun 19th, 2017
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
VB.NET 13.10 KB | None | 0 0
  1. Imports System.Net
  2. Imports System.Threading
  3. Imports System.IO
  4.  
  5. ''' <summary>
  6. ''' A comic downloader.
  7. ''' </summary>
  8. Public Class FrmNHDownloader
  9. #Region "Variables and Structures"
  10.     ''' <summary>
  11.     ''' Number of threads. Used for limiting concurrent downloads.
  12.     ''' </summary>
  13.     <ThreadStatic> Shared Threads As Integer = 0
  14.     ''' <summary>
  15.     ''' Number of threads. Used for limiting concurrent downloads.
  16.     ''' </summary>
  17.     <ThreadStatic> Shared MaxThreads As Integer = 10
  18.     ''' <summary>
  19.     ''' The scale of the image previews in the flow layout.
  20.     ''' </summary>
  21.     <ThreadStatic> Shared ImageScale As Integer = 3
  22.     ''' <summary>
  23.     ''' The number of pages downloaded.
  24.     ''' </summary>
  25.     <ThreadStatic> Shared PagedDownloaded As Integer = 0
  26.     ''' <summary>
  27.     ''' Name of the sub-folder to place downloded items in.
  28.     ''' </summary>
  29.     <ThreadStatic> Shared FolderName As String
  30.     ''' <summary>
  31.     ''' Check if the user wants to open the folder when the comic finishes.
  32.     ''' </summary>
  33.     <ThreadStatic> Shared OpenOnExit As Boolean = True
  34.     ''' <summary>
  35.     ''' Data structure for handling comic parsing.
  36.     ''' </summary>
  37.     Private Class ComicData
  38.         Public ComicURL As String
  39.         Public Name As String
  40.         Public NumPages As Integer
  41.     End Class
  42. #End Region
  43. #Region "Controls and UI Functionality"
  44.     ''' <summary>
  45.     ''' Redraws the images when the form is resized. Updates the flow panel size.
  46.     ''' </summary>
  47.     ''' <param name="sender"></param>
  48.     ''' <param name="e"></param>
  49.     Private Sub FrmNHDownloader_ResizeEnd(sender As Object, e As EventArgs) Handles MyBase.ResizeEnd
  50.         ResizeImages()
  51.     End Sub
  52.     ''' <summary>
  53.     ''' Resizes the flow panel that contains the image previews of downloaded pages.
  54.     ''' </summary>
  55.     ''' <param name="sender"></param>
  56.     ''' <param name="e"></param>
  57.     Private Sub FrmNHDownloader_Resize(sender As Object, e As EventArgs) Handles MyBase.Resize
  58.         FlowImages.Height = Height - 185
  59.         FlowImages.MaximumSize = New Size(Width, 9999)
  60.     End Sub
  61.     ''' <summary>
  62.     ''' Updates the size of the image previews.
  63.     ''' </summary>
  64.     ''' <param name="sender"></param>
  65.     ''' <param name="e"></param>
  66.     Private Sub TBImageSize_Scroll(sender As Object, e As EventArgs) Handles TBImageSize.ValueChanged
  67.         ImageScale = TBImageSize.Value
  68.         ResizeImages()
  69.     End Sub
  70.     ''' <summary>
  71.     ''' Remove focus after usage. Lets users scroll throgh the images instead.
  72.     ''' </summary>
  73.     Private Sub TBImageSize_OnFocus(sender As Object, e As EventArgs) Handles TBImageSize.GotFocus
  74.         FlowImages.Focus()
  75.     End Sub
  76.     ''' <summary>
  77.     ''' Updates the folder name to download files to. An empty name will default to what is found online.
  78.     ''' </summary>
  79.     ''' <param name="sender"></param>
  80.     ''' <param name="e"></param>
  81.     Private Sub TxtFolder_TextChanged(sender As Object, e As EventArgs) Handles TxtFolder.TextChanged
  82.         FolderName = TxtFolder.Text
  83.     End Sub
  84.     ''' <summary>
  85.     ''' Updates the value that determines if the folder is opened when the comic finishes downloading.
  86.     ''' </summary>
  87.     ''' <param name="sender"></param>
  88.     ''' <param name="e"></param>
  89.     Private Sub ChkOpenOnCreate_CheckedChanged(sender As Object, e As EventArgs) Handles ChkOpenOnCreate.CheckedChanged
  90.         OpenOnExit = ChkOpenOnCreate.Checked
  91.     End Sub
  92.     ''' <summary>
  93.     ''' Updates the number of allowed threads.
  94.     ''' </summary>
  95.     ''' <param name="sender"></param>
  96.     ''' <param name="e"></param>
  97.     Private Sub NUDThreads_ValueChanged(sender As Object, e As EventArgs) Handles NUDThreads.ValueChanged
  98.         MaxThreads = NUDThreads.Value
  99.     End Sub
  100.     ''' <summary>
  101.     ''' Adds an image preview of the downloaded file to the flow display.
  102.     ''' </summary>
  103.     ''' <param name="FilePath">Path to file</param>
  104.     Private Sub UpdateDownloadedFiles(ByVal FilePath As String)
  105.         ' Check if Flow needs to be invoked
  106.         If FlowImages.InvokeRequired Then
  107.             FlowImages.Invoke(New UpdateDownloadedFilesDel(AddressOf UpdateDownloadedFiles), {FilePath})
  108.         ElseIf File.Exists(FilePath) Then
  109.             ' Read image from file path
  110.             ' Create PictureBox
  111.             ' Append PictureBox to Flow layout
  112.             Dim Image As Image = Image.FromFile(FilePath)
  113.             Dim PictureBox As PictureBox = New PictureBox()
  114.             PictureBox.Image = Image
  115.             PictureBox.SizeMode = PictureBoxSizeMode.StretchImage
  116.             PictureBox.Width = CInt(Math.Round((FlowImages.Width) / (ImageScale + 0.1) - 10.0))
  117.             PictureBox.Height = CInt(Math.Round(PictureBox.Width * 1.4))
  118.             FlowImages.Controls.Add(PictureBox)
  119.         End If
  120.     End Sub
  121.     Private Delegate Sub UpdateDownloadedFilesDel(ByVal FilePath As String)
  122.     ''' <summary>
  123.     ''' Checks how many pages there are to download and launches downloader threads.
  124.     ''' </summary>
  125.     ''' <param name="sender"></param>
  126.     ''' <param name="e"></param>
  127.     Private Sub BtnDownload_Click(sender As Object, e As EventArgs) Handles BtnDownload.Click
  128.         PagedDownloaded = 0
  129.         ' Check if URL is valid
  130.         Dim URL As String = TxtComicURL.Text
  131.         If Not URL.StartsWith("http") Then
  132.             MessageBox.Show("Enter a valid URL")
  133.             Return
  134.         End If
  135.         ' Read HTML from comic page
  136.         Dim HTML As String = GrabHTML(URL)
  137.         If String.IsNullOrEmpty(HTML) Then
  138.             MessageBox.Show("Failed to grab comic HTML")
  139.             Return
  140.         End If
  141.         ' Parse the comic
  142.         Dim Data As ComicData = ParseComic(URL, HTML)
  143.         If Data Is Nothing Then
  144.             MessageBox.Show("Failed to parse comic data")
  145.             Return
  146.         End If
  147.         ' Ensure download directory exists
  148.         Dim DLDirectory As String = If(String.IsNullOrEmpty(FolderName), Data.Name, FolderName)
  149.         Directory.CreateDirectory(DLDirectory)
  150.         Process.Start(DLDirectory)
  151.         ' Download the comic
  152.         Dim Page As Integer = 1
  153.         While Page <= Data.NumPages
  154.             ' Not all comics use JPG's but we'll handle that in the download method
  155.             Dim URLPage As String = ""
  156.             If URL.Contains("nhentai") Then
  157.                 URLPage = Data.ComicURL & Page & ".jpg"
  158.             ElseIf URL.Contains("myhentaicomics") Then
  159.                 URLPage = Data.ComicURL & Page.ToString("000") & ".jpg"
  160.             End If
  161.             LaunchDownloadThread(URLPage, Data, Page)
  162.             Page += 1
  163.         End While
  164.     End Sub
  165. #End Region
  166. #Region "Utils"
  167.     ''' <summary>
  168.     ''' Grabs the HTML from the website at the given URL.
  169.     ''' </summary>
  170.     ''' <param name="URL">URL to visit.</param>
  171.     ''' <returns>HTML of URL</returns>
  172.     Private Function GrabHTML(URL As String) As String
  173.         Using wc As New WebClient
  174.             Return wc.DownloadString(URL)
  175.         End Using
  176.         Return Nothing
  177.     End Function
  178.     ''' <summary>
  179.     ''' Parses comic data from the given HTML.
  180.     ''' </summary>
  181.     ''' <param name="HTML">Raw HTML to parse</param>
  182.     ''' <returns>Comic Data</returns>
  183.     Private Function ParseComic(URL As String, HTML As String) As ComicData
  184.         Try
  185.             Dim Data As New ComicData()
  186.             ' Comic name is denoted by the title tag
  187.             Dim ComicName As String = HTML.Substring(HTML.IndexOf("<title>") + 7)
  188.             ComicName = ComicName.Substring(0, ComicName.IndexOf("</title>"))
  189.             ComicName = ComicName.Trim()
  190.             'Substringing garbage away
  191.             If URL.Contains("nhentai") Then
  192.                 If ComicName.Contains("nhentai") Then
  193.                     ComicName = ComicName.Substring(0, ComicName.IndexOf("nhentai"))
  194.                 End If
  195.                 If ComicName.Contains("&raquo;") Then
  196.                     ComicName = ComicName.Substring(0, ComicName.IndexOf("&raquo;"))
  197.                 End If
  198.             End If
  199.             ' Purge bad characters
  200.             ComicName = ComicName.Replace("*", " ")
  201.             ComicName = ComicName.Replace(":", "-")
  202.             ComicName = ComicName.Replace("|", "-")
  203.             ComicName = ComicName.Replace("?", "")
  204.             ComicName = ComicName.Replace("/", "")
  205.             ComicName = ComicName.Replace("\", "")
  206.             ComicName = ComicName.Replace("<", "(")
  207.             ComicName = ComicName.Replace(">", ")")
  208.             ' Update comic name
  209.             Data.Name = ComicName
  210.             ' "pages" is only shows up inside a div
  211.             ' Just parse around that to get the number of pages
  212.             If URL.Contains("nhentai") Then
  213.                 Dim PagesIndex As Integer = HTML.IndexOf("pages</div>")
  214.                 Dim PagesData As String = HTML.Substring(PagesIndex - 10)
  215.                 PagesData = PagesData.Substring(0, PagesData.IndexOf(" "))
  216.                 PagesData = PagesData.Substring(PagesData.IndexOf("<div>") + 5)
  217.                 Data.NumPages = Integer.Parse(PagesData)
  218.             ElseIf URL.Contains("myhentaicomics") Then
  219.                 '  Photos 1 - 15 of 37
  220.                 Dim PagesIndex As Integer = HTML.IndexOf("Photos 1")
  221.                 Dim PagesData As String = HTML.Substring(PagesIndex)
  222.                 PagesData = PagesData.Substring(PagesData.IndexOf("of") + 3)
  223.                 PagesData = PagesData.Substring(0, PagesData.IndexOf("</li>") - 1)
  224.                 Data.NumPages = Integer.Parse(PagesData)
  225.             End If
  226.             ' Read the cover image URL since its consistent, then change it a little bit
  227.             ' so that it matches the comic page syntax.
  228.             If URL.Contains("nhentai") Then
  229.                 Dim ComURL As String = HTML.Substring(HTML.IndexOf("content=""https://t.nhentai.net/galleries/"))
  230.                 ComURL = ComURL.Substring(0, ComURL.IndexOf("/>")).Substring(ComURL.IndexOf("http")).Replace("t.n", "i.n")
  231.                 ComURL = ComURL.Substring(0, ComURL.IndexOf("cover."))
  232.                 Data.ComicURL = ComURL
  233.             ElseIf URL.Contains("myhentaicomics") Then
  234.                 Dim ComURL As String = HTML.Substring(HTML.IndexOf("/var/thumbs/"))
  235.                 ComURL = ComURL.Substring(0, ComURL.IndexOf("?m="))
  236.                 ComURL = ComURL.Replace("thumbs", "resizes")
  237.                 ComURL = "http://myhentaicomics.com" & ComURL.Substring(0, ComURL.LastIndexOf("/") + 1)
  238.                 Data.ComicURL = ComURL
  239.             End If
  240.             Return Data
  241.         Catch : End Try
  242.         Return Nothing
  243.     End Function
  244.     Private Sub ResizeImages()
  245.         For Each control As Control In Me.FlowImages.Controls
  246.             Dim pictureBox As PictureBox = CType(control, PictureBox)
  247.             pictureBox.Width = CInt(Math.Round((FlowImages.Width) / (ImageScale + 0.1) - 10.0))
  248.             pictureBox.Height = CInt(Math.Round((pictureBox.Width) * 1.4))
  249.         Next
  250.     End Sub
  251. #End Region
  252. #Region "Downloading"
  253.     ''' <summary>
  254.     ''' Starts a thread that downloads a file at the given URL.
  255.     ''' </summary>
  256.     ''' <param name="URL">URL to download from</param>
  257.     ''' <param name="DATA">Additional comic data</param>
  258.     Private Sub LaunchDownloadThread(ByVal URL As String, ByVal Data As ComicData, ByVal PageIndex As Integer)
  259.         Dim Evaluator As Thread = New Thread(Sub() Me.Download(URL, Data, PageIndex))
  260.         Evaluator.Start()
  261.     End Sub
  262.     ''' <summary>
  263.     ''' Downloads a comic page from the given URL.
  264.     ''' </summary>
  265.     ''' <param name="URL">URL to download from</param>
  266.     ''' <param name="Data">Extra comic data</param>
  267.     ''' <param name="PageIndex">The page being downloaded</param>
  268.     Private Sub Download(ByVal URL As String, ByVal Data As ComicData, ByVal PageIndex As Integer)
  269.         ' Prevent too many concurrent threads.
  270.         ' Too many download threads slows every one down.
  271.         While Threads > MaxThreads
  272.             Thread.Sleep(100)
  273.         End While
  274.         Threads += 1
  275.         ' Try to download the file
  276.         Try
  277.             Dim DLDirectory As String = If(String.IsNullOrEmpty(FolderName), Data.Name, FolderName)
  278.             Dim LocalName As String = DLDirectory & "/" & URL.Substring(URL.LastIndexOf("/") + 1)
  279.             Using wc As New WebClient
  280.                 tryDownload(wc, URL, LocalName)
  281.  
  282.             End Using
  283.             PagedDownloaded += 1
  284.         Catch
  285.         End Try
  286.         Threads -= 1
  287.     End Sub
  288.     ''' <summary>
  289.     ''' Download the file at the given url to the given local-name.
  290.     ''' </summary>
  291.     ''' <param name="wc"></param>
  292.     ''' <param name="URL"></param>
  293.     ''' <param name="LocalName"></param>
  294.     Private Sub TryDownload(ByRef wc As WebClient, ByVal URL As String, ByVal LocalName As String)
  295.         Try
  296.             wc.DownloadFile(URL, LocalName)
  297.             UpdateDownloadedFiles(LocalName)
  298.         Catch e As WebException
  299.             Try
  300.                 wc.DownloadFile(URL.Replace(".jpg", ".png"), LocalName.Replace(".jpg", ".png"))
  301.                 UpdateDownloadedFiles(LocalName.Replace(".jpg", ".png"))
  302.             Catch ex As WebException : End Try
  303.         End Try
  304.     End Sub
  305. #End Region
  306. End Class
Advertisement
Add Comment
Please, Sign In to add comment