Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.IO;
- using System.Linq;
- using System.Net.Http;
- using HtmlAgilityPack;
- using System.Threading;
- namespace SosachContentParser
- {
- public class Program
- {
- const string XPATH_TO_LINK = "//*[contains(@id, post-body)]/div[2]/figure/figcaption/a";
- public static void Main(string[] args)
- {
- var threadUrl = args[0];
- var board = threadUrl.Split('/')[3];
- var contentFilesPath = args[1];
- string rawHtml;
- using (var client = new HttpClient())
- {
- rawHtml = client
- .GetAsync(threadUrl)
- .Result
- .Content
- .ReadAsStringAsync()
- .Result;
- }
- var htmlDoc = new HtmlDocument();
- htmlDoc.LoadHtml(rawHtml);
- var links = htmlDoc.DocumentNode
- .SelectNodes(XPATH_TO_LINK)
- .Select(x => x.Attributes)
- .Select(x => x["href"].Value)
- .Select(x => x.Replace("..", "https://2ch.hk/" + board))
- .ToList();
- Console.WriteLine($"Files count: {links.Count}");
- int counter = 0;
- foreach (var link in links.AsParallel())
- {
- byte[] fileByteArray;
- using (var client = new HttpClient())
- {
- fileByteArray = client
- .GetAsync(link)
- .Result
- .Content
- .ReadAsByteArrayAsync()
- .Result;
- }
- File.WriteAllBytes(contentFilesPath + link.Split('/').Last(), fileByteArray);
- Interlocked.Increment(ref counter);
- Console.Clear();
- Console.WriteLine($"Files count: {links.Count}");
- Console.WriteLine($"Files downloaded: {counter}");
- }
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement