Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.ComponentModel;
- using System.Data;
- using System.Drawing;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- using System.Windows.Forms;
- using System.Net.Http;
- using HtmlAgilityPack;
- namespace WebScrapping
- {
- public partial class frmHttp : Form
- {
- public class NamesAndScores
- {
- public string Title { get; set; }
- public string Paragraph { get; set; }
- }
- DataTable table;
- HtmlWeb web = new HtmlWeb();
- public frmHttp()
- {
- InitializeComponent();
- InitialTable();
- }
- private async Task<List<NamesAndScores>> GetInfo(int pageNum)
- {
- string url = "https://www.hespress.com/politique/";
- if (pageNum != 0)
- url = "https://www.hespress.com/politique/index." + pageNum.ToString() + ".html";
- var doc = await Task.Factory.StartNew(() => web.Load(url));
- var titleNodes = doc.DocumentNode.SelectNodes("//*[@id='box_center_holder']/div[6]/div/h2/a");
- var paragraphNodes = doc.DocumentNode.SelectNodes("//*[@id='box_center_holder']/div[4]/div/p");
- if (titleNodes == null || paragraphNodes == null)
- return new List<NamesAndScores>();
- var titles = titleNodes.Select(node => node.InnerText.Replace(""", "\""));
- var paragraphes = paragraphNodes.Select(node => node.InnerText.Replace(""", "\"").Replace(" ", Environment.NewLine));
- return titles.Zip(paragraphes, (title, paragraph) => new NamesAndScores() { Title = title, Paragraph = paragraph }).ToList();
- }
- private async void btnGet_Click(object sender, EventArgs e)
- {
- try
- {
- this.Cursor = Cursors.WaitCursor;
- btnGet.Enabled = false;
- int pageNum = 0;
- var getinfos = await GetInfo(pageNum);
- while (getinfos.Count > 0)
- {
- foreach (var getinfo in getinfos)
- table.Rows.Add(getinfo.Title, getinfo.Paragraph);
- pageNum++;
- getinfos = await GetInfo(pageNum);
- }
- this.Cursor = Cursors.Hand;
- btnGet.Enabled = true;
- }
- catch (Exception ex)
- {
- MessageBox.Show(ex.Message);
- }
- }
- private void InitialTable()
- {
- table = new DataTable("MyTable");
- table.Columns.Add("Title of article", typeof(string));
- table.Columns.Add("Excerpt from the article", typeof(string));
- dataGridView1.DataSource = table;
- }
- private void btnExample_Click(object sender, EventArgs e)
- {
- ExampleWebScrapping webScraper = new ExampleWebScrapping();
- webScraper.ShowDialog();
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement