VBcoders Guest



Don't have an account yet? Register
 


Forgot Password?



This code is an library of functions, to parse and process html. This library is good for dataminin

by Friso Kluitenberg (2 Submissions)
Category: Internet/HTML
Compatability: Visual Basic 4.0 (32-bit)
Difficulty: Unknown Difficulty
Originally Published: Thu 9th August 2007
Date Added: Mon 8th February 2021
Rating: (1 Votes)

This code is an library of functions, to parse and process html. This library is good for datamining, parsing and extracting data in your

Rate This code is an library of functions, to parse and process html. This library is good for dataminin



Public Sub GetAllTags(sHtml As String, ByRef sArray() As String)
Dim Pos1 As Long
Dim Pos2 As Long
Dim ub As Long
ReDim sArray(0)

Do
Pos1 = InStr(Pos2 + 1, sHtml, "<")
Pos2 = InStr(Pos1 + 1, sHtml, ">")
        If (Pos1 < 1) Or (Pos2 < 1) Then Exit Do
        ub = UBound(sArray)
        sArray(ub) = Mid$(sHtml, Pos1, Pos2 - Pos1 + 1)
        ReDim Preserve sArray(ub + 1)
Loop
If ub > 0 Then ReDim Preserve sArray(ub)

End Sub

Public Function GetTagType(Tag As String) As String
Dim Pos As Integer

If Tag = vbNullString Then Exit Function

Pos = InStr(1, Tag, " ")
If Pos < 1 Then Pos = Len(Tag)
GetTagType = LCase$(Mid$(Tag, 2, Pos - 2))

End Function

Public Function GetTagAttrValue(Tag, Attr As String) As String '//<a href=> href is the attr
Dim Pos As Long
Dim Pos2 As Long
Dim Sep As String

Attr = " " & Attr '//An atribute is always prefixed with a space
Pos = InStr(1, LCase$(Tag), LCase$(Attr))

If Pos < 1 Then Exit Function '// ATTR NOT FOUND___
Pos = Pos + Len(Attr) '//Move forward to the end of attr

Do
    Sep = Mid$(Tag, Pos, 1)
    Pos = Pos + 1
Loop While (Sep = " ") Or (Sep = "=")

Select Case Sep
    
    Case "'"
    Pos2 = InStr(Pos + 1, Tag, "'")
    If Pos2 = 0 Then Exit Function
    GetTagAttrValue = Mid$(Tag, Pos, Pos2 - Pos)
    
    Case Chr(34)
    Pos2 = InStr(Pos + 1, Tag, Chr(34))
    If Pos2 = 0 Then Exit Function
    GetTagAttrValue = Mid$(Tag, Pos, Pos2 - Pos)
    
    Case Else 'sep is " ", or ">"
    Pos2 = InStr(Pos + 1, Tag, " ")
    If Pos2 < 1 Then Pos2 = Len(Tag) - 1 '//if no space is found, the end='>', thats always on  the end so len is faster
    GetTagAttrValue = Mid$(Tag, Pos - 1, Pos2 - Pos + 1)
    
End Select

End Function


Download this snippet    Add to My Saved Code

This code is an library of functions, to parse and process html. This library is good for dataminin Comments

No comments have been posted about This code is an library of functions, to parse and process html. This library is good for dataminin. Why not be the first to post a comment about This code is an library of functions, to parse and process html. This library is good for dataminin.

Post your comment

Subject:
Message:
0/1000 characters