mirror of https://github.com/fmartingr/bazaar.git
feat: added support for more shops
- Added support for akiracomics.com, heroesdepapel.es, amazon.es, amazon.com, store.steampowered.com - Added a utils package with helpers to parse prices and times - Added a shortcut `manager.Retrieve` method to use directly with a configured manager (allowing the user to directly call for data extraction on an URL without having to parse/retireve the sop manually).
This commit is contained in:
parent
fe33dd5360
commit
c29b0fbed4
|
@ -5,15 +5,20 @@ import (
|
|||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/url"
|
||||
|
||||
"github.com/fmartingr/bazaar/pkg/manager"
|
||||
"github.com/fmartingr/bazaar/pkg/shop/akira"
|
||||
"github.com/fmartingr/bazaar/pkg/shop/akiracomics"
|
||||
"github.com/fmartingr/bazaar/pkg/shop/amazon"
|
||||
"github.com/fmartingr/bazaar/pkg/shop/heroesdepapel"
|
||||
"github.com/fmartingr/bazaar/pkg/shop/steam"
|
||||
)
|
||||
|
||||
func main() {
|
||||
m := manager.NewManager()
|
||||
m.Register(akira.Domains, akira.NewAkiraShopFactory())
|
||||
m.Register(akiracomics.Domains, akiracomics.NewAkiraShopFactory())
|
||||
m.Register(steam.Domains, steam.NewSteamShopFactory())
|
||||
m.Register(heroesdepapel.Domains, heroesdepapel.NewHeroesDePapelShopFactory())
|
||||
m.Register(amazon.Domains, amazon.NewAmazonShopFactory())
|
||||
|
||||
http.HandleFunc("/item", func(rw http.ResponseWriter, r *http.Request) {
|
||||
if err := r.ParseForm(); err != nil {
|
||||
|
@ -21,18 +26,7 @@ func main() {
|
|||
return
|
||||
}
|
||||
|
||||
itemUrl, err := url.Parse(r.PostForm.Get("url"))
|
||||
if err != nil {
|
||||
rw.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
|
||||
shop := m.Get(itemUrl.Host)
|
||||
if shop == nil {
|
||||
rw.WriteHeader(400)
|
||||
return
|
||||
}
|
||||
product, err := shop.Get(itemUrl.String())
|
||||
product, err := m.Retrieve(r.PostForm.Get("url"))
|
||||
if err != nil {
|
||||
rw.WriteHeader(500)
|
||||
return
|
||||
|
@ -44,6 +38,8 @@ func main() {
|
|||
rw.Write(payload)
|
||||
})
|
||||
|
||||
log.Println("starting server")
|
||||
|
||||
if err := http.ListenAndServe(":5001", http.DefaultServeMux); err != nil {
|
||||
log.Printf("Error: %s", err)
|
||||
}
|
||||
|
|
11
go.mod
11
go.mod
|
@ -3,14 +3,7 @@ module github.com/fmartingr/bazaar
|
|||
go 1.16
|
||||
|
||||
require (
|
||||
github.com/PuerkitoBio/goquery v1.8.0 // indirect
|
||||
github.com/antchfx/htmlquery v1.2.4 // indirect
|
||||
github.com/antchfx/xmlquery v1.3.8 // indirect
|
||||
github.com/gobwas/glob v0.2.3 // indirect
|
||||
github.com/gocolly/colly v1.2.0
|
||||
github.com/kennygrant/sanitize v1.2.4 // indirect
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
|
||||
github.com/temoto/robotstxt v1.1.2 // indirect
|
||||
github.com/PuerkitoBio/goquery v1.8.0
|
||||
github.com/goodsign/monday v1.0.0
|
||||
golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
)
|
||||
|
|
43
go.sum
43
go.sum
|
@ -2,52 +2,13 @@ github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0g
|
|||
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
|
||||
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
|
||||
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
|
||||
github.com/antchfx/htmlquery v1.2.4 h1:qLteofCMe/KGovBI6SQgmou2QNyedFUW+pE+BpeZ494=
|
||||
github.com/antchfx/htmlquery v1.2.4/go.mod h1:2xO6iu3EVWs7R2JYqBbp8YzG50gj/ofqs5/0VZoDZLc=
|
||||
github.com/antchfx/xmlquery v1.3.8 h1:dRnBQM3Vk5BVJFvFwsHOLAox+mEiNw5ZusaUNCrEdoU=
|
||||
github.com/antchfx/xmlquery v1.3.8/go.mod h1:wojC/BxjEkjJt6dPiAqUzoXO5nIMWtxHS8PD8TmN4ks=
|
||||
github.com/antchfx/xpath v1.2.0 h1:mbwv7co+x0RwgeGAOHdrKy89GvHaGvxxBtPK0uF9Zr8=
|
||||
github.com/antchfx/xpath v1.2.0/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
|
||||
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
||||
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
|
||||
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
|
||||
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
|
||||
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
|
||||
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
|
||||
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg=
|
||||
github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||
github.com/goodsign/monday v1.0.0 h1:Yyk/s/WgudMbAJN6UWSU5xAs8jtNewfqtVblAlw0yoc=
|
||||
github.com/goodsign/monday v1.0.0/go.mod h1:r4T4breXpoFwspQNM+u2sLxJb2zyTaxVGqUfTBjWOu8=
|
||||
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b h1:MWaHNqZy3KTpuTMAGvv+Kw+ylsEpmyJZizz1dqxnu28=
|
||||
golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
|
||||
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
|
|
|
@ -2,20 +2,17 @@ package manager
|
|||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
|
||||
"github.com/fmartingr/bazaar/pkg/models"
|
||||
"github.com/gocolly/colly"
|
||||
)
|
||||
|
||||
type Manager struct {
|
||||
domains map[string]models.Shop
|
||||
collectorOptions []func(*colly.Collector)
|
||||
domains map[string]models.Shop
|
||||
}
|
||||
|
||||
func (m *Manager) Register(domains []string, shopFactory models.ShopFactory) error {
|
||||
options := m.collectorOptions
|
||||
options = append(options, colly.AllowedDomains(domains...))
|
||||
shop := shopFactory(options)
|
||||
shop := shopFactory()
|
||||
|
||||
for _, domain := range domains {
|
||||
if _, exists := m.domains[domain]; exists {
|
||||
|
@ -28,20 +25,30 @@ func (m *Manager) Register(domains []string, shopFactory models.ShopFactory) err
|
|||
return nil
|
||||
}
|
||||
|
||||
func (m *Manager) Get(host string) models.Shop {
|
||||
func (m *Manager) GetShop(host string) models.Shop {
|
||||
shop, exists := m.domains[host]
|
||||
if !exists {
|
||||
return nil
|
||||
}
|
||||
|
||||
return shop
|
||||
}
|
||||
|
||||
func (m *Manager) Retrieve(productURL string) (*models.Product, error) {
|
||||
itemUrl, err := url.Parse(productURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing url: %s", err)
|
||||
}
|
||||
|
||||
shop := m.GetShop(itemUrl.Host)
|
||||
if shop == nil {
|
||||
return nil, fmt.Errorf("shop not found for domain")
|
||||
}
|
||||
|
||||
return shop.Get(productURL)
|
||||
}
|
||||
|
||||
func NewManager() Manager {
|
||||
return Manager{
|
||||
collectorOptions: []func(*colly.Collector){
|
||||
colly.UserAgent("bazaar/0.0.1"),
|
||||
},
|
||||
domains: make(map[string]models.Shop),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
package models
|
||||
|
||||
import "time"
|
||||
|
||||
type Product struct {
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
InStock bool `json:"in_stock"`
|
||||
PriceText string `json:"price_text"`
|
||||
Price float64 `json:"price"`
|
||||
Name string `json:"name"`
|
||||
URL string `json:"url"`
|
||||
ImageURL string `json:"image_url"`
|
||||
InStock bool `json:"in_stock"`
|
||||
PriceText string `json:"price_text"`
|
||||
Price float64 `json:"price"`
|
||||
ReleaseDate *time.Time `json:"release_date"`
|
||||
}
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package models
|
||||
|
||||
import "github.com/gocolly/colly"
|
||||
|
||||
type ShopFactory func(collectorOptions []func(*colly.Collector)) Shop
|
||||
type ShopFactory func() Shop
|
||||
|
||||
type Shop interface {
|
||||
Get(url string) (*Product, error)
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
package akira
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/fmartingr/bazaar/pkg/models"
|
||||
"github.com/gocolly/colly"
|
||||
)
|
||||
|
||||
var Domains = []string{"www.akiracomics.com", "akiracomics.com"}
|
||||
|
||||
type AkiraShop struct {
|
||||
collector *colly.Collector
|
||||
domains []string
|
||||
items map[string]*models.Product
|
||||
itemsMu sync.RWMutex
|
||||
}
|
||||
|
||||
func (s *AkiraShop) init() {
|
||||
s.collector.OnRequest(func(r *colly.Request) {
|
||||
fmt.Println("Visiting", r.URL.String())
|
||||
})
|
||||
|
||||
s.collector.OnHTML(`div.panel-ficha-producto div.panel-grupo`, func(h *colly.HTMLElement) {
|
||||
priceText := h.ChildText(`[itemprop="price"]`)
|
||||
priceNum, err := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceText, " ")[0], ",", "."), 64)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
}
|
||||
|
||||
s.itemsMu.Lock()
|
||||
s.items[h.Request.URL.String()] = &models.Product{
|
||||
Name: h.ChildText("h1.titulo"),
|
||||
InStock: h.ChildText("span.disponibilidad") == "Disponible",
|
||||
URL: h.Request.URL.String(),
|
||||
PriceText: priceText,
|
||||
Price: priceNum,
|
||||
}
|
||||
s.itemsMu.Unlock()
|
||||
})
|
||||
}
|
||||
|
||||
func (s *AkiraShop) Get(url string) (*models.Product, error) {
|
||||
if err := s.collector.Visit(url); err != nil {
|
||||
return nil, fmt.Errorf("error getting product information: %s", err)
|
||||
}
|
||||
s.itemsMu.RLock()
|
||||
defer s.itemsMu.RUnlock()
|
||||
return s.items[url], nil
|
||||
}
|
||||
|
||||
func NewAkiraShopFactory() models.ShopFactory {
|
||||
return func(collectorOptions []func(*colly.Collector)) models.Shop {
|
||||
shop := AkiraShop{
|
||||
collector: colly.NewCollector(collectorOptions...),
|
||||
domains: Domains,
|
||||
items: make(map[string]*models.Product),
|
||||
itemsMu: sync.RWMutex{},
|
||||
}
|
||||
shop.init()
|
||||
return &shop
|
||||
}
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
package akiracomics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/fmartingr/bazaar/pkg/models"
|
||||
)
|
||||
|
||||
var Domains = []string{"www.akiracomics.com", "akiracomics.com"}
|
||||
|
||||
type AkiraShop struct {
|
||||
domains []string
|
||||
}
|
||||
|
||||
func (s *AkiraShop) Get(url string) (*models.Product, error) {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving url: %s", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing body: %s", err)
|
||||
}
|
||||
|
||||
product := models.Product{
|
||||
URL: url,
|
||||
}
|
||||
|
||||
doc.Find(`div.panel-ficha-producto div.panel-grupo`).Each(func(i int, s *goquery.Selection) {
|
||||
priceText := s.Find(`[itemprop="price"]`).Text()
|
||||
priceNum, _ := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceText, " ")[0], ",", "."), 64)
|
||||
// TODO: error logging
|
||||
|
||||
// Javascript injects a img.zoomImg without the height/widht paramenters, we could remove the parameters
|
||||
// from the URL we get. It's most likely that the "thumbnail" is enough for most use cases.
|
||||
imgURL, _ := s.Find("a.portada img").Attr("src")
|
||||
// TODO: error logging
|
||||
|
||||
product.Name = s.Find("h1.titulo").Text()
|
||||
product.InStock = s.Find("span.disponibilidad").Text() == "Disponible"
|
||||
product.ImageURL = "https://" + Domains[0] + imgURL
|
||||
product.PriceText = priceText
|
||||
product.Price = priceNum
|
||||
})
|
||||
|
||||
doc.Find(`.panel-descripcion-propiedades`).Each(func(i int, s *goquery.Selection) {
|
||||
releaseDateText := s.Find(".fechaedicion .valor-propiedad").Text()
|
||||
releaseDate, _ := time.Parse("02/01/2006", releaseDateText)
|
||||
// TODO: error logging
|
||||
product.ReleaseDate = &releaseDate
|
||||
})
|
||||
return &product, nil
|
||||
}
|
||||
|
||||
func NewAkiraShopFactory() models.ShopFactory {
|
||||
return func() models.Shop {
|
||||
shop := AkiraShop{
|
||||
domains: Domains,
|
||||
}
|
||||
return &shop
|
||||
}
|
||||
}
|
|
@ -0,0 +1,113 @@
|
|||
package amazon
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/fmartingr/bazaar/pkg/models"
|
||||
"github.com/fmartingr/bazaar/pkg/utils"
|
||||
"github.com/goodsign/monday"
|
||||
)
|
||||
|
||||
var Domains = []string{"www.amazon.es", "www.amazon.com"}
|
||||
|
||||
type AmazonShop struct {
|
||||
domains []string
|
||||
}
|
||||
|
||||
var priceSelectors = []string{
|
||||
"#buybox span.a-color-price",
|
||||
"#tp_price_block_total_price_ww",
|
||||
"#price",
|
||||
}
|
||||
|
||||
var releaseDateLayoutByDomain = map[string]string{
|
||||
Domains[0]: "2 January 2006",
|
||||
Domains[1]: "January 2, 2006",
|
||||
}
|
||||
|
||||
func (s *AmazonShop) Get(url string) (*models.Product, error) {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving url: %s", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing body: %s", err)
|
||||
}
|
||||
|
||||
log.Println(doc.Text())
|
||||
|
||||
log.Printf("len(nodes) = %d", len(doc.Nodes))
|
||||
log.Printf("len(nodes.children) = %d", len(doc.Children().Nodes))
|
||||
|
||||
product := models.Product{
|
||||
URL: url,
|
||||
}
|
||||
|
||||
var tentativePrice string
|
||||
for _, selector := range priceSelectors {
|
||||
tentativePrice = doc.Find(selector).First().Text()
|
||||
if tentativePrice != "" {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if tentativePrice != "" {
|
||||
priceNum, err := strconv.ParseFloat(utils.ExtractPrice(tentativePrice), 64)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
} else {
|
||||
product.PriceText = tentativePrice
|
||||
product.Price = priceNum
|
||||
}
|
||||
}
|
||||
|
||||
product.Name = strings.TrimSpace(doc.Find("#productTitle").Text())
|
||||
|
||||
imagesJSON, _ := doc.Find("#main-image-container img").Attr("data-a-dynamic-image")
|
||||
// TODO: error handling
|
||||
var images map[string]interface{}
|
||||
json.Unmarshal([]byte(imagesJSON), &images)
|
||||
// TODO: error handling
|
||||
var lastImage string
|
||||
for key := range images {
|
||||
lastImage = key
|
||||
}
|
||||
product.ImageURL = lastImage
|
||||
|
||||
releaseDateElement := doc.Find(".book_details-publication_date")
|
||||
if len(releaseDateElement.Nodes) > 0 {
|
||||
releaseDateRaw := releaseDateElement.Parent().Parent().Find(".rpi-attribute-value").Text()
|
||||
|
||||
releaseDate, err := utils.ParseReleaseDate(releaseDateLayoutByDomain[res.Request.URL.Host], strings.TrimSpace(releaseDateRaw), monday.LocaleEsES)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
} else {
|
||||
product.ReleaseDate = releaseDate
|
||||
}
|
||||
}
|
||||
|
||||
product.InStock = !strings.Contains(doc.Find("#availability").Text(), "No disponible")
|
||||
|
||||
return &product, nil
|
||||
}
|
||||
|
||||
func NewAmazonShopFactory() models.ShopFactory {
|
||||
return func() models.Shop {
|
||||
shop := AmazonShop{
|
||||
domains: Domains,
|
||||
}
|
||||
return &shop
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package heroesdepapel
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/fmartingr/bazaar/pkg/models"
|
||||
)
|
||||
|
||||
var Domains = []string{"www.heroesdepapel.es"}
|
||||
|
||||
type HeroesDePapelShop struct {
|
||||
domains []string
|
||||
}
|
||||
|
||||
func (s *HeroesDePapelShop) Get(url string) (*models.Product, error) {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving url: %s", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing body: %s", err)
|
||||
}
|
||||
|
||||
product := models.Product{
|
||||
URL: url,
|
||||
}
|
||||
|
||||
doc.Find(".section-product-details").Each(func(i int, s *goquery.Selection) {
|
||||
priceText := strings.TrimSpace(s.Find(`.productos-price`).Text())
|
||||
priceNum, _ := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceText, " ")[0], ",", "."), 64)
|
||||
// TODO: error logging
|
||||
|
||||
imgURL, _ := s.Find(".carousel-inner .active img").Attr("src")
|
||||
// TODO: error logging
|
||||
|
||||
product.Name = strings.TrimSpace(s.Find(".product-title").Nodes[0].FirstChild.Data)
|
||||
product.InStock = s.Find(".btn-productos-add-to-cart").Text() != "Agotado"
|
||||
product.ImageURL = "https://" + Domains[0] + "/" + imgURL
|
||||
product.PriceText = priceText
|
||||
product.Price = priceNum
|
||||
// releaseDateText := strings.Split(s.Find(".tab-inner-content section-product-description h4").Text(), "A LA VENTA EL")
|
||||
// if len(releaseDateText) > 0 {
|
||||
// releaseDate, _ := time.Parse("2 DE January", releaseDateText[1])
|
||||
// // TODO: error logging
|
||||
// product.ReleaseDate = releaseDate
|
||||
// }
|
||||
})
|
||||
|
||||
return &product, nil
|
||||
}
|
||||
|
||||
func NewHeroesDePapelShopFactory() models.ShopFactory {
|
||||
return func() models.Shop {
|
||||
shop := HeroesDePapelShop{
|
||||
domains: Domains,
|
||||
}
|
||||
return &shop
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package steam
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/fmartingr/bazaar/pkg/models"
|
||||
)
|
||||
|
||||
var Domains = []string{"store.steampowered.com"}
|
||||
|
||||
type SteamShop struct {
|
||||
domains []string
|
||||
}
|
||||
|
||||
func (s *SteamShop) Get(url string) (*models.Product, error) {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving url: %s", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
if res.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status)
|
||||
}
|
||||
|
||||
doc, err := goquery.NewDocumentFromReader(res.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing body: %s", err)
|
||||
}
|
||||
|
||||
product := models.Product{
|
||||
URL: url,
|
||||
}
|
||||
|
||||
doc.Find(`.page_content_ctn`).Each(func(i int, s *goquery.Selection) {
|
||||
priceText := strings.TrimSpace(s.Find(`.game_purchase_action .price`).Text())
|
||||
priceValue, _ := s.Find(`.game_purchase_price.price`).Attr("data-price-final")
|
||||
priceNum, _ := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceValue, " ")[0], ",", "."), 64)
|
||||
// TODO: error logging
|
||||
|
||||
imgURL, _ := s.Find("img.game_header_image_full").Attr("src")
|
||||
// TODO: error logging
|
||||
|
||||
product.Name = s.Find("#appHubAppName").Text()
|
||||
product.InStock = len(s.Find(".game_area_comingsoon").Nodes) == 0
|
||||
product.ImageURL = imgURL
|
||||
product.PriceText = priceText
|
||||
product.Price = priceNum / 100
|
||||
releaseDateText := s.Find(".release_date .date").Text()
|
||||
releaseDate, _ := time.Parse("2 Jan, 2006", releaseDateText)
|
||||
// TODO: error logging
|
||||
product.ReleaseDate = &releaseDate
|
||||
})
|
||||
|
||||
return &product, nil
|
||||
}
|
||||
|
||||
func NewSteamShopFactory() models.ShopFactory {
|
||||
return func() models.Shop {
|
||||
shop := SteamShop{
|
||||
domains: Domains,
|
||||
}
|
||||
return &shop
|
||||
}
|
||||
}
|
|
@ -0,0 +1,11 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func ExtractPrice(raw string) string {
|
||||
re := regexp.MustCompile("[^0-9,.]+")
|
||||
return strings.Replace(re.ReplaceAllString(raw, ""), ",", ".", 1)
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
package utils
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/goodsign/monday"
|
||||
)
|
||||
|
||||
func ParseReleaseDate(layout, raw string, locale monday.Locale) (*time.Time, error) {
|
||||
result, err := monday.Parse(layout, raw, locale)
|
||||
return &result, err
|
||||
}
|
Loading…
Reference in New Issue