| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 |
- package crawler
- import (
- "container/list"
- "log"
- "net/http"
- "../goquery"
- )
- // PUBLIC METHOD
- // Crawler ...
- type Crawler struct {
- url string
- listUrls *list.List
- response *http.Response
- }
- // New ..
- func New(url string) Crawler {
- d := Crawler{}
- d.url = url
- d.listUrls = list.New()
- d.response = HTTPGet(url)
- return d
- }
- // ListLink ...
- func (d Crawler) ListLink() list.List {
- document, err := goquery.NewDocumentFromReader(d.response.Body)
- if err != nil {
- log.Fatal("Error loading HTTP response body. ", err)
- }
- d.listUrls.Init()
- document.Find("a").Each(d.ProcessElementHref)
- return *d.listUrls
- }
- // PRIVATE METHOD
- // HTTPGet ...
- func HTTPGet(url string) *http.Response {
- resp, err := http.Get(url)
- if err != nil {
- panic(err)
- }
- return resp
- }
- // SaveURL ...
- func (d *Crawler) SaveURL(url string) {
- d.listUrls.PushBack(url)
- }
- // ProcessElementHref ...
- func (d *Crawler) ProcessElementHref(index int, element *goquery.Selection) {
- href, exists := element.Attr("href")
- if exists {
- d.listUrls.PushBack(href)
- }
- }
|