LinkURL



Public Type Link
URL As String
Tag As String
End Type

Global LinkBuffer() As Link

Public Sub ExtractLinks(str As String, baseurl As String)
Erase LinkBuffer()
ReDim LinkBuffer(0)
thea = InStr(1, LCase(str), "<a ")
bizace = InStr(1, LCase(str), "<base ") + 6

If bizace <> 0 Then
endofbase = InStr(bizace, str, ">") + 1
If endofbase = 0 Then Goto nobase
thebasetag = Mid(str, bizace, endofbase - bizace)
basehrefloc = InStr(1, LCase(thebasetag), "href=")
If basehrefloc = 0 Then Goto nobase
basehrefend = InStr(href, thewholelink, " ")
If basehrefend = 0 Then basehrefend = InStr(basehrefloc, _
thebasetag, ">")
If basehrefend = 0 Then Goto nobase
baseurl = Mid(thebasetag, basehrefloc + 5, basehrefend - basehrefloc)
If Right$(baseurl, 1) = ">" Then baseurl = Mid(baseurl, _
1, Len(baseurl) - 1)
If Left$(baseurl, 1) = """" Then baseurl = Mid(baseurl, 2)
If Right$(baseurl, 1) = """" Then baseurl = Mid(baseurl, _
1, Len(baseurl) - 1)
End If

nobase:

Do While thea <> 0
endoflink = InStr(thea, LCase(str), "</a>") + 3
If endoflink = 0 Then Goto Invalid
thewholelink = Mid(str, thea, endoflink)
endofh = InStr(1, thewholelink, ">")
firsthalf = Mid(thewholelink, 1, endofh)

href = InStr(1, firsthalf, "href=") + 5
If href = 0 Then Goto Invalid
endofurl = InStr(href, firsthalf, " ")
If endofurl = 0 Then endofurl = InStr(href, firsthalf, ">")
If endofurl = 0 Then Goto nobase
theurl = Mid(firsthalf, href, endofurl - href)
tagend = InStr(1, LCase(thewholelink), "</a>") - 1
thetag = Mid(thewholelink, Len(firsthalf) + 1, tagend - Len(firsthalf))
If Left$(theurl, 1) = """" Then theurl = Mid(theurl, 2)
If Right$(theurl, 1) = """" Then theurl = _
Mid(theurl, 1, Len(theurl) - 1)

If Mid(theurl, 4, 3) <> "://" Then
If Left$(theurl, 1) <> "/" Then theurl = "/" & theurl
If Right$(baseurl, 1) = "/" Then baseurl = Left$(baseurl, _
Len(baseurl) - 1)
theurl = baseurl & theurl
End If

ReDim Preserve LinkBuffer(UBound(LinkBuffer) + 1)
LinkBuffer(UBound(LinkBuffer)).Owner = CurrentURL
LinkBuffer(UBound(LinkBuffer)).Tag = thetag
Invalid:

DoEvents
If endoflink = 0 Then endoflink = thea + 1
thea = InStr(endoflink, LCase(str), "<a ")
Loop
End Sub

(linkurl.html)- by Paolo Puglisi - Modifica del 25/3/2019