Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System.Web;
- using System.Xml.Serialization;
- namespace DataContract
- {
- [XmlRoot(ElementName = "post")]
- public class Post
- {
- [XmlElement(ElementName = "image")]
- public string Image { get; set; }
- [XmlElement(ElementName = "tags")]
- public string Tags { get; set; }
- [XmlElement(ElementName = "file_url")]
- public string File_url { get; set; }
- }
- [XmlRoot(ElementName = "posts")]
- public class Posts
- {
- [XmlElement(ElementName = "post")]
- public List<Post>? Post { get; set; }
- [XmlAttribute(AttributeName = "limit")]
- public int Limit { get; set; }
- [XmlAttribute(AttributeName = "offset")]
- public int Offset { get; set; }
- [XmlAttribute(AttributeName = "count")]
- public int Count { get; set; }
- }
- }
- public static class Scheduler
- {
- public static void Process<T>(IEnumerable<T> ie, int delay, Func<T, Task<bool>> job)
- {
- const int concurrency = 4;
- var count = 0;
- var total = ie.Count();
- var mutex = new object();
- void ReportProgress(int current)
- {
- lock (mutex)
- {
- if (current == count)
- {
- var progress = Math.Round((float)current / total * 100, 2);
- Console.WriteLine($"{current}/{total}: {progress}%");
- }
- }
- }
- Parallel.ForEach(ie, new ParallelOptions() { MaxDegreeOfParallelism = concurrency }, x =>
- {
- var didWork = true;
- try
- {
- didWork = job(x).GetAwaiter().GetResult();
- }
- catch (Exception _)
- {
- Console.WriteLine($"Task failed :(");
- }
- finally
- {
- ReportProgress(Interlocked.Increment(ref count));
- if (didWork) Thread.Sleep(delay);
- }
- });
- }
- }
- public class Entrypoint
- {
- private static HttpClient _hc = new HttpClient();
- private static XmlSerializer _xml = new XmlSerializer(typeof(DataContract.Posts));
- private static string ConstructGelbooruURL(int page, int limit, IEnumerable<string> tags)
- {
- var encodedTags = HttpUtility.UrlEncode(string.Join(" ", tags));
- return $"https://gelbooru.com/index.php?page=dapi&s=post&q=index&limit={limit}&pid={page}&tags={encodedTags}";
- }
- private static async Task<DataContract.Posts?> GetPosts(int page, int limit, IEnumerable<string> tags)
- {
- var get = await _hc.GetAsync(ConstructGelbooruURL(page, limit, tags));
- var content = await get.Content.ReadAsStringAsync();
- using (var sr = new StringReader(content))
- return (DataContract.Posts?)_xml.Deserialize(sr);
- }
- public static void Main(string[] args)
- {
- const string output_dir = "OUT";
- if (!Directory.Exists(output_dir))
- Directory.CreateDirectory(output_dir);
- var tags = new[] { "belko", "paizuri" };
- var mre = new ManualResetEvent(false);
- Task.Factory.StartNew(async () =>
- {
- var page = 0;
- var postsPerPage = 20;
- var getTotal = await GetPosts(0, 1, tags);
- var totalPosts = getTotal.Count;
- var pages = (int)Math.Ceiling((float)totalPosts / postsPerPage);
- for(var i = 0; i < pages; i++)
- {
- Console.WriteLine($"Fetching page {i + 1}");
- var posts = await GetPosts(i, postsPerPage, tags);
- Scheduler.Process(posts.Post, 5000, async post =>
- {
- var didWork = false;
- if (post == null) return false;
- var tagPath = Path.Combine(output_dir, Path.GetFileNameWithoutExtension(post.Image) + ".txt");
- if (!File.Exists(tagPath))
- {
- File.WriteAllText(tagPath, string.Join(", ", post.Tags.Split(" ")));
- }
- var imagePath = Path.Combine(output_dir, post.Image);
- if (!File.Exists(imagePath))
- {
- didWork = true;
- var data = await _hc.GetByteArrayAsync(post.File_url);
- File.WriteAllBytes(imagePath, data);
- }
- return didWork;
- });
- await Task.Delay(5000);
- }
- mre.Set();
- });
- Console.ReadLine();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement