Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Imports System.IO
- Imports System.Threading
- Imports Strings
- Imports HttpSync
- Module Module1
- Private codes As New List(Of String)
- Private threadCount As Integer
- Private closedThreads As Integer
- Private ReadOnly threadLocker As New Object
- Private keepRunning As Boolean
- Private totalScraped As Integer
- Private codeIndex As Integer
- Private triggers As New List(Of String)
- Sub Main()
- 'intro
- Console.WriteLine("Welcome to allareacodes.com scraper.")
- Console.WriteLine("Make sure areacodes.txt exists with desired area codes, one per line.")
- Console.WriteLine("Example:")
- Console.WriteLine("123")
- Console.WriteLine("456")
- Console.WriteLine("789")
- Console.WriteLine()
- 'area codes
- Console.WriteLine("Press any key to load the file.")
- Console.WriteLine()
- Console.ReadKey()
- While Not File.Exists("areacodes.txt")
- Console.WriteLine("Could not find file areacodes.txt")
- Console.WriteLine("Create it and press any key to load it.")
- Console.WriteLine()
- Console.ReadKey()
- End While
- While New FileInfo("areacodes.txt").Length = 0
- Console.WriteLine("File areacodes.txt exists but it is empty, add something to it.")
- Console.WriteLine("Press any key to load it.")
- Console.WriteLine()
- Console.ReadKey()
- End While
- codes.AddRange(File.ReadLines("areacodes.txt"))
- Console.WriteLine($"Loaded {codes.Count} area code/s.")
- Console.WriteLine()
- 'triggers
- Console.WriteLine("Make sure triggers.txt exists with desired trigger text, one per line.")
- Console.WriteLine("Example:")
- Console.WriteLine("Verizon")
- Console.WriteLine("Sprint")
- Console.WriteLine("etc..")
- Console.WriteLine()
- Console.WriteLine("Press any key to load the file.")
- Console.WriteLine()
- Console.ReadKey()
- While Not File.Exists("triggers.txt")
- Console.WriteLine("Could not find file triggers.txt")
- Console.WriteLine("Create it and press any key to load it.")
- Console.WriteLine()
- Console.ReadKey()
- End While
- While New FileInfo("triggers.txt").Length = 0
- Console.WriteLine("File triggers.txt exists but it is empty, add something to it.")
- Console.WriteLine("Press any key to load it.")
- Console.WriteLine()
- Console.ReadKey()
- End While
- triggers.AddRange(File.ReadLines("triggers.txt"))
- Console.WriteLine($"Loaded {triggers.Count} trigger/s.")
- Console.WriteLine()
- 'Threads
- Console.WriteLine("Enter the amount of threads to use.")
- Console.WriteLine("Thread count is 0 based. (Entering 0 will use one thread.)")
- Console.WriteLine()
- While Not Integer.TryParse(Console.ReadLine(), threadCount)
- Console.WriteLine("That is not a valid number.")
- Console.WriteLine("Enter the amount of threads to use.")
- Console.WriteLine()
- End While
- Console.WriteLine($"Threads set to {threadCount}.")
- Console.WriteLine()
- 'start
- Console.WriteLine("Press any key to start.")
- Console.ReadKey()
- Console.WriteLine("Key press accepted. Started..")
- Console.WriteLine()
- keepRunning = True
- For i As Integer = 0 To threadCount
- Dim t As New Thread(AddressOf StartThread) With {.IsBackground = True}
- t.Start()
- Next
- While closedThreads <= threadCount
- Thread.Sleep(10000)
- Console.WriteLine($"{Now.ToLongTimeString} - Scraped {totalScraped.ToThousands}")
- Console.WriteLine()
- End While
- Console.WriteLine("All threads have been closed.")
- Console.WriteLine($"Scraped: {totalScraped.ToThousands}")
- Console.WriteLine()
- Console.WriteLine("Press any key to exit.")
- Console.ReadKey()
- End Sub
- Private Sub StartThread()
- Dim strCode As String
- Dim strNumber As String
- Dim strProvider As String
- Dim h As New HttpProperties
- With h
- .Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
- .AutoDecompress = True
- .AcceptLanguage = "en-US,en;q=0.9"
- .UpgradeInsecureRequestsToDefault()
- .UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
- .CookiesEnabled = False
- End With
- While keepRunning
- SyncLock threadLocker
- If codeIndex >= codes.Count Then
- Exit While
- End If
- strCode = codes(codeIndex)
- codeIndex += 1
- End SyncLock
- With h
- .Url = New Uri($"https://www.allareacodes.com/{strCode}")
- End With
- Using r As HttpResponse = HttpRequest.Get(h)
- If Not r.Success OrElse r.IsEmptyBody Then
- SyncLock threadLocker
- Using sw As New StreamWriter("Redo.txt", True)
- sw.WriteLine(strCode)
- End Using
- End SyncLock
- Continue While
- End If
- Dim b As String = r.ReadBody
- For Each s As String In b.ParseToArray("<div class=""list-group-item"">", "</span>")
- strNumber = s.Parse("<div class=""col-xs-12 prefix-col1"">", "</div>").Trim
- strProvider = s.Parse("<div class=""col-xs-12 prefix-col4"">", "</div>").Trim
- If Not strNumber.IsEmpty AndAlso Not strProvider.IsEmpty Then
- For Each strTrigger In triggers
- If strProvider.Contains(strTrigger) Then
- SyncLock threadLocker
- Using sw As New StreamWriter($"{strTrigger}.txt", True)
- sw.WriteLine(strNumber)
- End Using
- End SyncLock
- Interlocked.Increment(totalScraped)
- Exit For
- End If
- Next
- End If
- 'Debug.Print(strProvider & " - " & strNumber)
- Next
- End Using
- End While
- SyncLock threadLocker
- closedThreads += 1
- Console.WriteLine($"Closed thread #{closedThreads}")
- If closedThreads > threadCount Then
- Console.WriteLine("Program stopped.")
- Console.WriteLine()
- End If
- End SyncLock
- End Sub
- End Module
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement