From fe33dd53600685e66bd350894f4ee7917c0d9579 Mon Sep 17 00:00:00 2001 From: Felipe M Date: Sun, 2 Jan 2022 23:11:00 +0100 Subject: [PATCH] feat: proof of concept --- cmd/server/main.go | 50 +++++++++++++++++++++++++++++++ go.mod | 16 ++++++++++ go.sum | 53 +++++++++++++++++++++++++++++++++ pkg/manager/main.go | 47 +++++++++++++++++++++++++++++ pkg/models/product.go | 9 ++++++ pkg/models/shop.go | 9 ++++++ pkg/shop/akira/akira.go | 66 +++++++++++++++++++++++++++++++++++++++++ 7 files changed, 250 insertions(+) create mode 100644 cmd/server/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 pkg/manager/main.go create mode 100644 pkg/models/product.go create mode 100644 pkg/models/shop.go create mode 100644 pkg/shop/akira/akira.go diff --git a/cmd/server/main.go b/cmd/server/main.go new file mode 100644 index 0000000..90b190d --- /dev/null +++ b/cmd/server/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "net/url" + + "github.com/fmartingr/bazaar/pkg/manager" + "github.com/fmartingr/bazaar/pkg/shop/akira" +) + +func main() { + m := manager.NewManager() + m.Register(akira.Domains, akira.NewAkiraShopFactory()) + + http.HandleFunc("/item", func(rw http.ResponseWriter, r *http.Request) { + if err := r.ParseForm(); err != nil { + fmt.Fprintf(rw, "ParseForm() err: %v", err) + return + } + + itemUrl, err := url.Parse(r.PostForm.Get("url")) + if err != nil { + rw.WriteHeader(400) + return + } + + shop := m.Get(itemUrl.Host) + if shop == nil { + rw.WriteHeader(400) + return + } + product, err := shop.Get(itemUrl.String()) + if err != nil { + rw.WriteHeader(500) + return + } + + payload, _ := json.Marshal(product) + + rw.Header().Add("Content-Type", "application/json") + rw.Write(payload) + }) + + if err := http.ListenAndServe(":5001", http.DefaultServeMux); err != nil { + log.Printf("Error: %s", err) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..10b8150 --- /dev/null +++ b/go.mod @@ -0,0 +1,16 @@ +module github.com/fmartingr/bazaar + +go 1.16 + +require ( + github.com/PuerkitoBio/goquery v1.8.0 // indirect + github.com/antchfx/htmlquery v1.2.4 // indirect + github.com/antchfx/xmlquery v1.3.8 // indirect + github.com/gobwas/glob v0.2.3 // indirect + github.com/gocolly/colly v1.2.0 + github.com/kennygrant/sanitize v1.2.4 // indirect + github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect + github.com/temoto/robotstxt v1.1.2 // indirect + golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b // indirect + google.golang.org/appengine v1.6.7 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9e7af5d --- /dev/null +++ b/go.sum @@ -0,0 +1,53 @@ +github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0gta/U= +github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI= +github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c= +github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA= +github.com/antchfx/htmlquery v1.2.4 h1:qLteofCMe/KGovBI6SQgmou2QNyedFUW+pE+BpeZ494= +github.com/antchfx/htmlquery v1.2.4/go.mod h1:2xO6iu3EVWs7R2JYqBbp8YzG50gj/ofqs5/0VZoDZLc= +github.com/antchfx/xmlquery v1.3.8 h1:dRnBQM3Vk5BVJFvFwsHOLAox+mEiNw5ZusaUNCrEdoU= +github.com/antchfx/xmlquery v1.3.8/go.mod h1:wojC/BxjEkjJt6dPiAqUzoXO5nIMWtxHS8PD8TmN4ks= +github.com/antchfx/xpath v1.2.0 h1:mbwv7co+x0RwgeGAOHdrKy89GvHaGvxxBtPK0uF9Zr8= +github.com/antchfx/xpath v1.2.0/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= +github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= +github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= +github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg= +github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b h1:MWaHNqZy3KTpuTMAGvv+Kw+ylsEpmyJZizz1dqxnu28= +golang.org/x/net v0.0.0-20211208012354-db4efeb81f4b/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= diff --git a/pkg/manager/main.go b/pkg/manager/main.go new file mode 100644 index 0000000..f6d5975 --- /dev/null +++ b/pkg/manager/main.go @@ -0,0 +1,47 @@ +package manager + +import ( + "fmt" + + "github.com/fmartingr/bazaar/pkg/models" + "github.com/gocolly/colly" +) + +type Manager struct { + domains map[string]models.Shop + collectorOptions []func(*colly.Collector) +} + +func (m *Manager) Register(domains []string, shopFactory models.ShopFactory) error { + options := m.collectorOptions + options = append(options, colly.AllowedDomains(domains...)) + shop := shopFactory(options) + + for _, domain := range domains { + if _, exists := m.domains[domain]; exists { + return fmt.Errorf("domain %s is already registered", domain) + } else { + m.domains[domain] = shop + } + } + + return nil +} + +func (m *Manager) Get(host string) models.Shop { + shop, exists := m.domains[host] + if !exists { + return nil + } + + return shop +} + +func NewManager() Manager { + return Manager{ + collectorOptions: []func(*colly.Collector){ + colly.UserAgent("bazaar/0.0.1"), + }, + domains: make(map[string]models.Shop), + } +} diff --git a/pkg/models/product.go b/pkg/models/product.go new file mode 100644 index 0000000..8862dbb --- /dev/null +++ b/pkg/models/product.go @@ -0,0 +1,9 @@ +package models + +type Product struct { + Name string `json:"name"` + URL string `json:"url"` + InStock bool `json:"in_stock"` + PriceText string `json:"price_text"` + Price float64 `json:"price"` +} diff --git a/pkg/models/shop.go b/pkg/models/shop.go new file mode 100644 index 0000000..9fe8cb6 --- /dev/null +++ b/pkg/models/shop.go @@ -0,0 +1,9 @@ +package models + +import "github.com/gocolly/colly" + +type ShopFactory func(collectorOptions []func(*colly.Collector)) Shop + +type Shop interface { + Get(url string) (*Product, error) +} diff --git a/pkg/shop/akira/akira.go b/pkg/shop/akira/akira.go new file mode 100644 index 0000000..6b2e0c1 --- /dev/null +++ b/pkg/shop/akira/akira.go @@ -0,0 +1,66 @@ +package akira + +import ( + "fmt" + "strconv" + "strings" + "sync" + + "github.com/fmartingr/bazaar/pkg/models" + "github.com/gocolly/colly" +) + +var Domains = []string{"www.akiracomics.com", "akiracomics.com"} + +type AkiraShop struct { + collector *colly.Collector + domains []string + items map[string]*models.Product + itemsMu sync.RWMutex +} + +func (s *AkiraShop) init() { + s.collector.OnRequest(func(r *colly.Request) { + fmt.Println("Visiting", r.URL.String()) + }) + + s.collector.OnHTML(`div.panel-ficha-producto div.panel-grupo`, func(h *colly.HTMLElement) { + priceText := h.ChildText(`[itemprop="price"]`) + priceNum, err := strconv.ParseFloat(strings.ReplaceAll(strings.Split(priceText, " ")[0], ",", "."), 64) + if err != nil { + fmt.Println(err) + } + + s.itemsMu.Lock() + s.items[h.Request.URL.String()] = &models.Product{ + Name: h.ChildText("h1.titulo"), + InStock: h.ChildText("span.disponibilidad") == "Disponible", + URL: h.Request.URL.String(), + PriceText: priceText, + Price: priceNum, + } + s.itemsMu.Unlock() + }) +} + +func (s *AkiraShop) Get(url string) (*models.Product, error) { + if err := s.collector.Visit(url); err != nil { + return nil, fmt.Errorf("error getting product information: %s", err) + } + s.itemsMu.RLock() + defer s.itemsMu.RUnlock() + return s.items[url], nil +} + +func NewAkiraShopFactory() models.ShopFactory { + return func(collectorOptions []func(*colly.Collector)) models.Shop { + shop := AkiraShop{ + collector: colly.NewCollector(collectorOptions...), + domains: Domains, + items: make(map[string]*models.Product), + itemsMu: sync.RWMutex{}, + } + shop.init() + return &shop + } +}