|
Public Type Link
URL As String Tag As String End Type Global LinkBuffer() As Link Public Sub ExtractLinks(str As String, baseurl As String) Erase LinkBuffer() ReDim LinkBuffer(0) thea = InStr(1, LCase(str), "<a ") bizace = InStr(1, LCase(str), "<base ") + 6 If bizace <> 0 Then endofbase = InStr(bizace, str, ">") + 1 If endofbase = 0 Then Goto nobase thebasetag = Mid(str, bizace, endofbase - bizace) basehrefloc = InStr(1, LCase(thebasetag), "href=") If basehrefloc = 0 Then Goto nobase basehrefend = InStr(href, thewholelink, " ") If basehrefend = 0 Then basehrefend = InStr(basehrefloc, _ thebasetag, ">") If basehrefend = 0 Then Goto nobase baseurl = Mid(thebasetag, basehrefloc + 5, basehrefend - basehrefloc) If Right$(baseurl, 1) = ">" Then baseurl = Mid(baseurl, _ 1, Len(baseurl) - 1) If Left$(baseurl, 1) = """" Then baseurl = Mid(baseurl, 2) If Right$(baseurl, 1) = """" Then baseurl = Mid(baseurl, _ 1, Len(baseurl) - 1) End If nobase: Do While thea <> 0 endoflink = InStr(thea, LCase(str), "</a>") + 3 If endoflink = 0 Then Goto Invalid thewholelink = Mid(str, thea, endoflink) endofh = InStr(1, thewholelink, ">") firsthalf = Mid(thewholelink, 1, endofh) href = InStr(1, firsthalf, "href=") + 5 If href = 0 Then Goto Invalid endofurl = InStr(href, firsthalf, " ") If endofurl = 0 Then endofurl = InStr(href, firsthalf, ">") If endofurl = 0 Then Goto nobase theurl = Mid(firsthalf, href, endofurl - href) tagend = InStr(1, LCase(thewholelink), "</a>") - 1 thetag = Mid(thewholelink, Len(firsthalf) + 1, tagend - Len(firsthalf)) If Left$(theurl, 1) = """" Then theurl = Mid(theurl, 2) If Right$(theurl, 1) = """" Then theurl = _ Mid(theurl, 1, Len(theurl) - 1) If Mid(theurl, 4, 3) <> "://" Then If Left$(theurl, 1) <> "/" Then theurl = "/" & theurl If Right$(baseurl, 1) = "/" Then baseurl = Left$(baseurl, _ Len(baseurl) - 1) theurl = baseurl & theurl End If ReDim Preserve LinkBuffer(UBound(LinkBuffer) + 1) LinkBuffer(UBound(LinkBuffer)).Owner = CurrentURL LinkBuffer(UBound(LinkBuffer)).Tag = thetag Invalid: DoEvents If endoflink = 0 Then endoflink = thea + 1 thea = InStr(endoflink, LCase(str), "<a ") Loop End Sub |