代码拉取完成,页面将自动刷新
package emailscraper
import (
"github.com/gocolly/colly"
)
// Scrape is responsible for main scraping logic.
func (s *Scraper) Scrape(url string) ([]string, error) {
url = getWebsite(url, true)
var e emails
c := s.collector
if !s.cfg.FollowExternalLinks {
allowedDomains, err := prepareAllowedDomain(url)
if err != nil {
return nil, err
}
c.AllowedDomains = allowedDomains
}
// Parse emails on each downloaded page
c.OnScraped(func(response *colly.Response) {
e.parseEmails(response.Body)
})
// cloudflare encoded email support
c.OnHTML("span[data-cfemail]", func(el *colly.HTMLElement) {
e.parseCloudflareEmail(el.Attr("data-cfemail"))
})
// Start the scrape
if err := c.Visit(url); err != nil {
s.log("error while visiting secure domain: ", url, err.Error())
}
c.Wait() // Wait for concurrent scrapes to finish
if e.emails == nil || len(e.emails) == 0 {
// Start the scrape on insecure url
if err := c.Visit(getWebsite(url, false)); err != nil {
s.log("error while visiting insecure domain: ", err.Error())
}
c.Wait() // Wait for concurrent scrapes to finish
}
return e.emails, nil
}
func getWebsite(url string, secure bool) string {
url = trimProtocol(url)
if secure {
return "https://" + url
}
return "http://" + url
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。