crawler.go 1.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. package crawler
  2. import (
  3. "container/list"
  4. "log"
  5. "net/http"
  6. "../goquery"
  7. )
  8. // PUBLIC METHOD
  9. // Crawler ...
  10. type Crawler struct {
  11. url string
  12. listUrls *list.List
  13. response *http.Response
  14. }
  15. // New ..
  16. func New(url string) Crawler {
  17. d := Crawler{}
  18. d.url = url
  19. d.listUrls = list.New()
  20. d.response = HTTPGet(url)
  21. return d
  22. }
  23. // ListLink ...
  24. func (d Crawler) ListLink() list.List {
  25. document, err := goquery.NewDocumentFromReader(d.response.Body)
  26. if err != nil {
  27. log.Fatal("Error loading HTTP response body. ", err)
  28. }
  29. d.listUrls.Init()
  30. document.Find("a").Each(d.ProcessElementHref)
  31. return *d.listUrls
  32. }
  33. // PRIVATE METHOD
  34. // HTTPGet ...
  35. func HTTPGet(url string) *http.Response {
  36. resp, err := http.Get(url)
  37. if err != nil {
  38. panic(err)
  39. }
  40. return resp
  41. }
  42. // SaveURL ...
  43. func (d *Crawler) SaveURL(url string) {
  44. d.listUrls.PushBack(url)
  45. }
  46. // ProcessElementHref ...
  47. func (d *Crawler) ProcessElementHref(index int, element *goquery.Selection) {
  48. href, exists := element.Attr("href")
  49. if exists {
  50. d.listUrls.PushBack(href)
  51. }
  52. }