diff --git a/cmd/server/main.go b/cmd/server/main.go index 90b190d..839ee09 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -5,15 +5,20 @@ import ( "fmt" "log" "net/http" - "net/url" "github.com/fmartingr/bazaar/pkg/manager" - "github.com/fmartingr/bazaar/pkg/shop/akira" + "github.com/fmartingr/bazaar/pkg/shop/akiracomics" + "github.com/fmartingr/bazaar/pkg/shop/amazon" + "github.com/fmartingr/bazaar/pkg/shop/heroesdepapel" + "github.com/fmartingr/bazaar/pkg/shop/steam" ) func main() { m := manager.NewManager() - m.Register(akira.Domains, akira.NewAkiraShopFactory()) + m.Register(akiracomics.Domains, akiracomics.NewAkiraShopFactory()) + m.Register(steam.Domains, steam.NewSteamShopFactory()) + m.Register(heroesdepapel.Domains, heroesdepapel.NewHeroesDePapelShopFactory()) + m.Register(amazon.Domains, amazon.NewAmazonShopFactory()) http.HandleFunc("/item", func(rw http.ResponseWriter, r *http.Request) { if err := r.ParseForm(); err != nil { @@ -21,18 +26,7 @@ func main() { return } - itemUrl, err := url.Parse(r.PostForm.Get("url")) - if err != nil { - rw.WriteHeader(400) - return - } - - shop := m.Get(itemUrl.Host) - if shop == nil { - rw.WriteHeader(400) - return - } - product, err := shop.Get(itemUrl.String()) + product, err := m.Retrieve(r.PostForm.Get("url")) if err != nil { rw.WriteHeader(500) return @@ -44,6 +38,8 @@ func main() { rw.Write(payload) }) + log.Println("starting server") + if err := http.ListenAndServe(":5001", http.DefaultServeMux); err != nil { log.Printf("Error: %s", err) } diff --git a/go.mod b/go.mod index 10b8150..dbdf6b5 100644 --- a/go.mod +++ b/go.mod @@ -3,14 +3,7 @@ module github.com/fmartingr/bazaar go 1.16 require ( - github.com/PuerkitoBio/goquery v1.8.0 // indirect - github.com/antchfx/htmlquery v1.2.4 // indirect - github.com/antchfx/xmlquery v1.3.8 // indirect - github.com/gobwas/glob v0.2.3 // indirect - github.com/gocolly/colly v1.2.0 - github.com/kennygrant/sanitize v1.2.4 // indirect - github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect - github.com/temoto/robotstxt v1.1.2 // indirect + github.com/PuerkitoBio/goquery v1.8.0 + github.com/goodsign/monday v1.0.0 golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b // indirect - google.golang.org/appengine v1.6.7 // indirect ) diff --git a/go.sum b/go.sum index 9e7af5d..a2f05c5 100644 --- a/go.sum +++ b/go.sum @@ -2,52 +2,13 @@ github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0g github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= -github.com/antchfx/htmlquery v1.2.4 h1:qLteofCMe/KGovBI6SQgmou2QNyedFUW+pE+BpeZ494= -github.com/antchfx/htmlquery v1.2.4/go.mod h1:2xO6iu3EVWs7R2JYqBbp8YzG50gj/ofqs5/0VZoDZLc= -github.com/antchfx/xmlquery v1.3.8 h1:dRnBQM3Vk5BVJFvFwsHOLAox+mEiNw5ZusaUNCrEdoU= -github.com/antchfx/xmlquery v1.3.8/go.mod h1:wojC/BxjEkjJt6dPiAqUzoXO5nIMWtxHS8PD8TmN4ks= -github.com/antchfx/xpath v1.2.0 h1:mbwv7co+x0RwgeGAOHdrKy89GvHaGvxxBtPK0uF9Zr8= -github.com/antchfx/xpath v1.2.0/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= -github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= -github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= -github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= -github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= -github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= -github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg= -github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +github.com/goodsign/monday v1.0.0 h1:Yyk/s/WgudMbAJN6UWSU5xAs8jtNewfqtVblAlw0yoc= +github.com/goodsign/monday v1.0.0/go.mod h1:r4T4breXpoFwspQNM+u2sLxJb2zyTaxVGqUfTBjWOu8= golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b h1:MWaHNqZy3KTpuTMAGvv+Kw+ylsEpmyJZizz1dqxnu28= golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= diff --git a/pkg/manager/main.go b/pkg/manager/main.go index f6d5975..7ada88f 100644 --- a/pkg/manager/main.go +++ b/pkg/manager/main.go @@ -2,20 +2,17 @@ package manager import ( "fmt" + "net/url" "github.com/fmartingr/bazaar/pkg/models" - "github.com/gocolly/colly" ) type Manager struct { - domains map[string]models.Shop - collectorOptions []func(*colly.Collector) + domains map[string]models.Shop } func (m *Manager) Register(domains []string, shopFactory models.ShopFactory) error { - options := m.collectorOptions - options = append(options, colly.AllowedDomains(domains...)) - shop := shopFactory(options) + shop := shopFactory() for _, domain := range domains { if _, exists := m.domains[domain]; exists { @@ -28,20 +25,30 @@ func (m *Manager) Register(domains []string, shopFactory models.ShopFactory) err return nil } -func (m *Manager) Get(host string) models.Shop { +func (m *Manager) GetShop(host string) models.Shop { shop, exists := m.domains[host] if !exists { return nil } - return shop } +func (m *Manager) Retrieve(productURL string) (*models.Product, error) { + itemUrl, err := url.Parse(productURL) + if err != nil { + return nil, fmt.Errorf("error parsing url: %s", err) + } + + shop := m.GetShop(itemUrl.Host) + if shop == nil { + return nil, fmt.Errorf("shop not found for domain") + } + + return shop.Get(productURL) +} + func NewManager() Manager { return Manager{ - collectorOptions: []func(*colly.Collector){ - colly.UserAgent("bazaar/0.0.1"), - }, domains: make(map[string]models.Shop), } } diff --git a/pkg/models/product.go b/pkg/models/product.go index 8862dbb..a186958 100644 --- a/pkg/models/product.go +++ b/pkg/models/product.go @@ -1,9 +1,13 @@ package models +import "time" + type Product struct { - Name string `json:"name"` - URL string `json:"url"` - InStock bool `json:"in_stock"` - PriceText string `json:"price_text"` - Price float64 `json:"price"` + Name string `json:"name"` + URL string `json:"url"` + ImageURL string `json:"image_url"` + InStock bool `json:"in_stock"` + PriceText string `json:"price_text"` + Price float64 `json:"price"` + ReleaseDate *time.Time `json:"release_date"` } diff --git a/pkg/models/shop.go b/pkg/models/shop.go index 9fe8cb6..aee340b 100644 --- a/pkg/models/shop.go +++ b/pkg/models/shop.go @@ -1,8 +1,6 @@ package models -import "github.com/gocolly/colly" - -type ShopFactory func(collectorOptions []func(*colly.Collector)) Shop +type ShopFactory func() Shop type Shop interface { Get(url string) (*Product, error) diff --git a/pkg/shop/akira/akira.go b/pkg/shop/akira/akira.go deleted file mode 100644 index 6b2e0c1..0000000 --- a/pkg/shop/akira/akira.go +++ /dev/null @@ -1,66 +0,0 @@ -package akira - -import ( - "fmt" - "strconv" - "strings" - "sync" - - "github.com/fmartingr/bazaar/pkg/models" - "github.com/gocolly/colly" -) - -var Domains = []string{"www.akiracomics.com", "akiracomics.com"} - -type AkiraShop struct { - collector *colly.Collector - domains []string - items map[string]*models.Product - itemsMu sync.RWMutex -} - -func (s *AkiraShop) init() { - s.collector.OnRequest(func(r *colly.Request) { - fmt.Println("Visiting", r.URL.String()) - }) - - s.collector.OnHTML(`div.panel-ficha-producto div.panel-grupo`, func(h *colly.HTMLElement) { - priceText := h.ChildText(`[itemprop="price"]`) - priceNum, err := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceText, " ")[0], ",", "."), 64) - if err != nil { - fmt.Println(err) - } - - s.itemsMu.Lock() - s.items[h.Request.URL.String()] = &models.Product{ - Name: h.ChildText("h1.titulo"), - InStock: h.ChildText("span.disponibilidad") == "Disponible", - URL: h.Request.URL.String(), - PriceText: priceText, - Price: priceNum, - } - s.itemsMu.Unlock() - }) -} - -func (s *AkiraShop) Get(url string) (*models.Product, error) { - if err := s.collector.Visit(url); err != nil { - return nil, fmt.Errorf("error getting product information: %s", err) - } - s.itemsMu.RLock() - defer s.itemsMu.RUnlock() - return s.items[url], nil -} - -func NewAkiraShopFactory() models.ShopFactory { - return func(collectorOptions []func(*colly.Collector)) models.Shop { - shop := AkiraShop{ - collector: colly.NewCollector(collectorOptions...), - domains: Domains, - items: make(map[string]*models.Product), - itemsMu: sync.RWMutex{}, - } - shop.init() - return &shop - } -} diff --git a/pkg/shop/akiracomics/akira.go b/pkg/shop/akiracomics/akira.go new file mode 100644 index 0000000..751fda6 --- /dev/null +++ b/pkg/shop/akiracomics/akira.go @@ -0,0 +1,72 @@ +package akiracomics + +import ( + "fmt" + "net/http" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/fmartingr/bazaar/pkg/models" +) + +var Domains = []string{"www.akiracomics.com", "akiracomics.com"} + +type AkiraShop struct { + domains []string +} + +func (s *AkiraShop) Get(url string) (*models.Product, error) { + res, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("error retrieving url: %s", err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status) + } + + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return nil, fmt.Errorf("error parsing body: %s", err) + } + + product := models.Product{ + URL: url, + } + + doc.Find(`div.panel-ficha-producto div.panel-grupo`).Each(func(i int, s *goquery.Selection) { + priceText := s.Find(`[itemprop="price"]`).Text() + priceNum, _ := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceText, " ")[0], ",", "."), 64) + // TODO: error logging + + // Javascript injects a img.zoomImg without the height/widht paramenters, we could remove the parameters + // from the URL we get. It's most likely that the "thumbnail" is enough for most use cases. + imgURL, _ := s.Find("a.portada img").Attr("src") + // TODO: error logging + + product.Name = s.Find("h1.titulo").Text() + product.InStock = s.Find("span.disponibilidad").Text() == "Disponible" + product.ImageURL = "https://" + Domains[0] + imgURL + product.PriceText = priceText + product.Price = priceNum + }) + + doc.Find(`.panel-descripcion-propiedades`).Each(func(i int, s *goquery.Selection) { + releaseDateText := s.Find(".fechaedicion .valor-propiedad").Text() + releaseDate, _ := time.Parse("02/01/2006", releaseDateText) + // TODO: error logging + product.ReleaseDate = &releaseDate + }) + return &product, nil +} + +func NewAkiraShopFactory() models.ShopFactory { + return func() models.Shop { + shop := AkiraShop{ + domains: Domains, + } + return &shop + } +} diff --git a/pkg/shop/amazon/amazon.es.go b/pkg/shop/amazon/amazon.es.go new file mode 100644 index 0000000..be74309 --- /dev/null +++ b/pkg/shop/amazon/amazon.es.go @@ -0,0 +1,113 @@ +package amazon + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/fmartingr/bazaar/pkg/models" + "github.com/fmartingr/bazaar/pkg/utils" + "github.com/goodsign/monday" +) + +var Domains = []string{"www.amazon.es", "www.amazon.com"} + +type AmazonShop struct { + domains []string +} + +var priceSelectors = []string{ + "#buybox span.a-color-price", + "#tp_price_block_total_price_ww", + "#price", +} + +var releaseDateLayoutByDomain = map[string]string{ + Domains[0]: "2 January 2006", + Domains[1]: "January 2, 2006", +} + +func (s *AmazonShop) Get(url string) (*models.Product, error) { + res, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("error retrieving url: %s", err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status) + } + + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return nil, fmt.Errorf("error parsing body: %s", err) + } + + log.Println(doc.Text()) + + log.Printf("len(nodes) = %d", len(doc.Nodes)) + log.Printf("len(nodes.children) = %d", len(doc.Children().Nodes)) + + product := models.Product{ + URL: url, + } + + var tentativePrice string + for _, selector := range priceSelectors { + tentativePrice = doc.Find(selector).First().Text() + if tentativePrice != "" { + break + } + } + + if tentativePrice != "" { + priceNum, err := strconv.ParseFloat(utils.ExtractPrice(tentativePrice), 64) + if err != nil { + log.Println(err) + } else { + product.PriceText = tentativePrice + product.Price = priceNum + } + } + + product.Name = strings.TrimSpace(doc.Find("#productTitle").Text()) + + imagesJSON, _ := doc.Find("#main-image-container img").Attr("data-a-dynamic-image") + // TODO: error handling + var images map[string]interface{} + json.Unmarshal([]byte(imagesJSON), &images) + // TODO: error handling + var lastImage string + for key := range images { + lastImage = key + } + product.ImageURL = lastImage + + releaseDateElement := doc.Find(".book_details-publication_date") + if len(releaseDateElement.Nodes) > 0 { + releaseDateRaw := releaseDateElement.Parent().Parent().Find(".rpi-attribute-value").Text() + + releaseDate, err := utils.ParseReleaseDate(releaseDateLayoutByDomain[res.Request.URL.Host], strings.TrimSpace(releaseDateRaw), monday.LocaleEsES) + if err != nil { + log.Println(err) + } else { + product.ReleaseDate = releaseDate + } + } + + product.InStock = !strings.Contains(doc.Find("#availability").Text(), "No disponible") + + return &product, nil +} + +func NewAmazonShopFactory() models.ShopFactory { + return func() models.Shop { + shop := AmazonShop{ + domains: Domains, + } + return &shop + } +} diff --git a/pkg/shop/heroesdepapel/heroesdepapel.go b/pkg/shop/heroesdepapel/heroesdepapel.go new file mode 100644 index 0000000..a41525f --- /dev/null +++ b/pkg/shop/heroesdepapel/heroesdepapel.go @@ -0,0 +1,69 @@ +package heroesdepapel + +import ( + "fmt" + "net/http" + "strconv" + "strings" + + "github.com/PuerkitoBio/goquery" + "github.com/fmartingr/bazaar/pkg/models" +) + +var Domains = []string{"www.heroesdepapel.es"} + +type HeroesDePapelShop struct { + domains []string +} + +func (s *HeroesDePapelShop) Get(url string) (*models.Product, error) { + res, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("error retrieving url: %s", err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status) + } + + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return nil, fmt.Errorf("error parsing body: %s", err) + } + + product := models.Product{ + URL: url, + } + + doc.Find(".section-product-details").Each(func(i int, s *goquery.Selection) { + priceText := strings.TrimSpace(s.Find(`.productos-price`).Text()) + priceNum, _ := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceText, " ")[0], ",", "."), 64) + // TODO: error logging + + imgURL, _ := s.Find(".carousel-inner .active img").Attr("src") + // TODO: error logging + + product.Name = strings.TrimSpace(s.Find(".product-title").Nodes[0].FirstChild.Data) + product.InStock = s.Find(".btn-productos-add-to-cart").Text() != "Agotado" + product.ImageURL = "https://" + Domains[0] + "/" + imgURL + product.PriceText = priceText + product.Price = priceNum + // releaseDateText := strings.Split(s.Find(".tab-inner-content section-product-description h4").Text(), "A LA VENTA EL") + // if len(releaseDateText) > 0 { + // releaseDate, _ := time.Parse("2 DE January", releaseDateText[1]) + // // TODO: error logging + // product.ReleaseDate = releaseDate + // } + }) + + return &product, nil +} + +func NewHeroesDePapelShopFactory() models.ShopFactory { + return func() models.Shop { + shop := HeroesDePapelShop{ + domains: Domains, + } + return &shop + } +} diff --git a/pkg/shop/steam/steam.go b/pkg/shop/steam/steam.go new file mode 100644 index 0000000..0b70790 --- /dev/null +++ b/pkg/shop/steam/steam.go @@ -0,0 +1,69 @@ +package steam + +import ( + "fmt" + "net/http" + "strconv" + "strings" + "time" + + "github.com/PuerkitoBio/goquery" + "github.com/fmartingr/bazaar/pkg/models" +) + +var Domains = []string{"store.steampowered.com"} + +type SteamShop struct { + domains []string +} + +func (s *SteamShop) Get(url string) (*models.Product, error) { + res, err := http.Get(url) + if err != nil { + return nil, fmt.Errorf("error retrieving url: %s", err) + } + defer res.Body.Close() + if res.StatusCode != 200 { + return nil, fmt.Errorf("error retrieving url: %d %s", res.StatusCode, res.Status) + } + + doc, err := goquery.NewDocumentFromReader(res.Body) + if err != nil { + return nil, fmt.Errorf("error parsing body: %s", err) + } + + product := models.Product{ + URL: url, + } + + doc.Find(`.page_content_ctn`).Each(func(i int, s *goquery.Selection) { + priceText := strings.TrimSpace(s.Find(`.game_purchase_action .price`).Text()) + priceValue, _ := s.Find(`.game_purchase_price.price`).Attr("data-price-final") + priceNum, _ := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceValue, " ")[0], ",", "."), 64) + // TODO: error logging + + imgURL, _ := s.Find("img.game_header_image_full").Attr("src") + // TODO: error logging + + product.Name = s.Find("#appHubAppName").Text() + product.InStock = len(s.Find(".game_area_comingsoon").Nodes) == 0 + product.ImageURL = imgURL + product.PriceText = priceText + product.Price = priceNum / 100 + releaseDateText := s.Find(".release_date .date").Text() + releaseDate, _ := time.Parse("2 Jan, 2006", releaseDateText) + // TODO: error logging + product.ReleaseDate = &releaseDate + }) + + return &product, nil +} + +func NewSteamShopFactory() models.ShopFactory { + return func() models.Shop { + shop := SteamShop{ + domains: Domains, + } + return &shop + } +} diff --git a/pkg/utils/price.go b/pkg/utils/price.go new file mode 100644 index 0000000..f561341 --- /dev/null +++ b/pkg/utils/price.go @@ -0,0 +1,11 @@ +package utils + +import ( + "regexp" + "strings" +) + +func ExtractPrice(raw string) string { + re := regexp.MustCompile("[^0-9,.]+") + return strings.Replace(re.ReplaceAllString(raw, ""), ",", ".", 1) +} diff --git a/pkg/utils/release_date.go b/pkg/utils/release_date.go new file mode 100644 index 0000000..8209cfb --- /dev/null +++ b/pkg/utils/release_date.go @@ -0,0 +1,12 @@ +package utils + +import ( + "time" + + "github.com/goodsign/monday" +) + +func ParseReleaseDate(layout, raw string, locale monday.Locale) (*time.Time, error) { + result, err := monday.Parse(layout, raw, locale) + return &result, err +}