Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 'Coded by ShadowTzu
- 'Free to use
- 'my 3D engine, Tzu3D: http://shadowtzu.free.fr
- 'Youtube: https://www.youtube.com/user/shadowtzu
- 'Facebook: https://www.facebook.com/Tzu3d
- 'Twitter: https://twitter.com/shadowtzu
- 'Twitch: http://www.twitch.tv/shadowtzu
- 'Website: http://tzu3d.weebly.com
- Imports System.Net
- Public Class Extractor
- Implements IDisposable
- #Region "Structure"
- Public Structure struct_Element
- Friend mtext As String
- Friend mAttributes_Name() As String
- Friend mAttributes_Value() As String
- Public ReadOnly Property InnerText As String
- Get
- Return mtext
- End Get
- End Property
- Public ReadOnly Property Attribut(Name As String) As String
- Get
- If mAttributes_Name Is Nothing Then Return Nothing
- For i As Integer = 0 To mAttributes_Name.Length - 1
- If mAttributes_Name(i) = Name.ToLower Then Return mAttributes_Value(i)
- Next
- Return Nothing
- End Get
- End Property
- Public Function Element(balise As String, Index As Integer) As struct_Element
- Return Create_element(balise, Index, mtext)
- End Function
- End Structure
- Private Structure struct_Balise
- Public Name As String
- Public Attributes As String
- Public Value As String
- Public Start As Integer
- Public Length As Integer
- End Structure
- #End Region
- Private mDataPage As String
- Private Web As Net.WebClient
- #Region "Constructeur"
- Public Sub New()
- Web = New WebClient
- Web.Proxy = Nothing
- Web.Encoding = System.Text.Encoding.UTF8
- 'Example:
- 'Dim ExtractData As New Extractor
- 'ExtractData.Load("http://www.youtube.com/results?search_query=telemann")
- 'Dim elem As Extractor.struct_Element = ExtractData.Element("ol", 1)
- 'Dim elemUrl As Extractor.struct_Element = elem.Element("a", 1)
- 'MsgBox("http://www.youtube.com" & elemUrl.Attribut("href") & " = " & elemUrl.InnerText & vbCrLf)
- End Sub
- #End Region
- #Region "Load"
- Public Sub Load(url As String)
- Try
- mDataPage = Web.DownloadString(url)
- Catch
- End Try
- End Sub
- Public Sub Load_FromString(Data As String)
- mDataPage = Data
- End Sub
- #End Region
- #Region "Public"
- Public Function Element(balise As String, Index As Integer) As struct_Element
- Return Create_element(balise, Index, mDataPage)
- End Function
- #End Region
- #Region "Private"
- ''' <summary>
- ''' Créé un element à partir des données extraite
- ''' </summary>
- ''' <param name="balise"></param>
- ''' <param name="index"></param>
- ''' <param name="data"></param>
- ''' <returns></returns>
- ''' <remarks></remarks>
- Private Shared Function Create_element(balise As String, index As Integer, data As String) As struct_Element
- data = data.Replace(""", """")
- Dim myElement As struct_Element = Nothing
- Dim start_index As Integer
- Dim myBalise As struct_Balise = Nothing
- For i As Integer = 0 To index
- myBalise = Search(balise, start_index, data)
- start_index = myBalise.Start + myBalise.Length
- Next i
- If myBalise.Attributes <> "" Then
- Dim dec_attrib() As String = Split(myBalise.Attributes, """")
- ReDim myElement.mAttributes_Name(dec_attrib.Length \ 2)
- ReDim myElement.mAttributes_Value(myElement.mAttributes_Name.Length - 1)
- Dim t As Integer = 0
- For i As Integer = 0 To myElement.mAttributes_Name.Length - 2
- myElement.mAttributes_Name(i) = ClearString(dec_attrib(t)).Replace("=", "")
- myElement.mAttributes_Value(i) = ClearString(dec_attrib(t + 1))
- t += 2
- Next i
- End If
- myElement.mtext = myBalise.Value
- Return myElement
- End Function
- ''' <summary>
- ''' Recherche une balise et récupère tout ce qu'elle contient
- ''' </summary>
- ''' <param name="Balise"></param>
- ''' <param name="Start_Index"></param>
- ''' <param name="data"></param>
- ''' <returns></returns>
- ''' <remarks></remarks>
- Private Shared Function Search(Balise As String, Start_Index As Integer, data As String) As struct_Balise
- Dim myBalise As struct_Balise = Extract_Attrib(Balise, data, Start_Index)
- Extract_Value(myBalise, data)
- myBalise.Value = ClearString(myBalise.Value)
- Return myBalise
- End Function
- ''' <summary>
- ''' extrait les attributs d'une balise
- ''' </summary>
- ''' <param name="balise"></param>
- ''' <param name="data"></param>
- ''' <param name="Start_Index"></param>
- ''' <returns></returns>
- ''' <remarks></remarks>
- Private Shared Function Extract_Attrib(balise As String, data As String, Start_Index As Integer) As struct_Balise
- Dim Data_start As Integer = data.IndexOf("<" & balise, Start_Index)
- If Data_start = -1 Then Return Nothing
- Dim next_str As String = data.Substring(Data_start + balise.Length + 1, 1)
- Dim IsWord As Boolean
- Dim alphabet As String = "abcdefghijklmnopqrstuvwxyz"
- Do While IsWord = False
- IsWord = True
- For j As Integer = 0 To alphabet.Length - 1
- If next_str.ToLower = alphabet(j) Then
- IsWord = False
- Data_start = data.IndexOf("<" & balise, Data_start + balise.Length + 1)
- If Data_start = -1 Then Exit Do
- next_str = data.Substring(Data_start + balise.Length + 1, 1)
- Exit For
- End If
- Next
- Loop
- If Data_start = -1 Then Return Nothing
- Dim data_end As Integer = data.IndexOf(">", Data_start + balise.Length + 1)
- If data_end = -1 Then Return Nothing
- Dim myBalise As struct_Balise = Nothing
- myBalise.Attributes = data.Substring(Data_start + balise.Length + 1, data_end - Data_start - balise.Length - 1)
- myBalise.Start = Data_start
- myBalise.Name = balise
- Return myBalise
- End Function
- ''' <summary>
- ''' récupère ce que contient une balise
- ''' </summary>
- ''' <param name="balise"></param>
- ''' <param name="data"></param>
- ''' <remarks></remarks>
- Private Shared Sub Extract_Value(ByRef balise As struct_Balise, data As String)
- If balise.Name = "" Then Exit Sub
- Dim ends As Integer = data.IndexOf(balise.Name & ">", balise.Start + balise.Name.Length + 1 + balise.Attributes.Length + 1)
- Dim sub_start, sub_length As Integer
- sub_start = balise.Start + balise.Name.Length + 1 + balise.Attributes.Length + 1
- sub_length = ends - sub_start
- balise.Value = data.Substring(sub_start, sub_length - 2)
- balise.Length = sub_length
- End Sub
- ''' <summary>
- ''' enlève les retours chariot
- ''' </summary>
- ''' <param name="text"></param>
- ''' <returns></returns>
- ''' <remarks></remarks>
- Private Shared Function ClearString(text As String) As String
- If text = "" Then Return ""
- Return Trim(text.Replace(vbCrLf, "").Replace(vbLf, ""))
- End Function
- #End Region
- #Region "Destructeur"
- Private disposedValue As Boolean
- ' IDisposable
- Protected Overridable Sub Dispose(disposing As Boolean)
- If Not Me.disposedValue Then
- If disposing Then
- Me.Web.Dispose()
- Me.Web = Nothing
- End If
- mDataPage = Nothing
- End If
- Me.disposedValue = True
- End Sub
- Public Sub Dispose() Implements IDisposable.Dispose
- Dispose(True)
- GC.SuppressFinalize(Me)
- End Sub
- #End Region
- End Class
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement