본문 바로가기
카테고리 없음

[vb.net] HtmlAgilityPack 라이브러리를 이용한 크롤링시 라이브러리 사용 예시

by IT HUB 2020. 9. 8.
728x90
반응형

 

Imports HtmlAgilityPack
 
        Dim doc As New HtmlDocument, a As HtmlNode, img As HtmlNode, hNode As HtmlNode
        doc.LoadHtml(T)
        If Not doc.DocumentNode.SelectNodes("//*[@id=""productList""]/li") Is Nothing Then
            For Each li As HtmlNode In doc.DocumentNode.SelectNodes("//*[@id=""productList""]/li")
                a = li.SelectSingleNode("a")
                T = a.SelectSingleNode("dl/dd/div[2]").InnerHtml.Replace(vbLf, Space(1)).Trim
                T = a.Attributes("data-item-id").Value
                T = a.Attributes("data-is-rocket").Value = "true"
                T = "https://www.coupang.com" & a.Attributes("href").Value.Replace("&""&")
                img = a.SelectSingleNode("dl/dt/img")
                T = "https:" & img.Attributes("src").Value
                If Not img.Attributes("data-img-src") Is Nothing Then
                    T = "https:" & img.Attributes("data-img-src").Value
                Else
                Application.DoEvents()
                End If
                If Not a.SelectSingleNode("//*[@id=""searchOptionForm""]/div/div/div[1]/div/div[1]/h3") Is Nothing Then
                    T = doc.DocumentNode.SelectSingleNode("//*[@id=""searchOptionForm""]/div/div/div[1]/div/div[1]/h3").InnerText.Replace(vbTab, Space(1)).Replace(vbLf, Space(1)).Split("("c).First.Trim
                End If
                T = a.SelectSingleNode("dl/dd/div[3]/div[1]/div[1]/em/strong").InnerText
                hNode = GetClassNode(a.SelectSingleNode("dl/dd/div[3]/div[1]/div[1]/span[1]"), "span""discount-percentage")
            next
        End If
 
    Private Function GetClassNode(ByVal Node As HtmlNode, ByVal tagName As StringByVal className As StringAs HtmlNode
        Try
            Dim ND As List(Of HtmlNode) = Node.Descendants(tagName).Where(Function(k) k.Attributes.Contains("class") AndAlso k.Attributes("class").Value.Contains(className)).ToList
            If ND.Count > 0 Then Return ND.First
        Catch ex As Exception
        End Try
        Return Nothing
    End Function
 

 

HtmlAgilityPack.dll
0.16MB

 

 

반응형


댓글