price scrapper
This commit is contained in:
parent
d19f5f7621
commit
8922b8a4f0
7 changed files with 426 additions and 0 deletions
167
internal/scrapper/service.go
Normal file
167
internal/scrapper/service.go
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
package scrapper
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/chromedp/chromedp"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"regexp"
|
||||
"scrapper-mandarake/internal/common"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func (s *Scrapper) getPrice(ctx context.Context, task common.Task) (int32, error) {
|
||||
var (
|
||||
singlePrice string
|
||||
rangedPrice string
|
||||
prices []int32
|
||||
)
|
||||
|
||||
if err := chromedp.Run(ctx,
|
||||
chromedp.Navigate(task.Link),
|
||||
chromedp.WaitReady("body"),
|
||||
chromedp.Text(`div.price`, &singlePrice, chromedp.ByQuery, chromedp.AtLeast(0)),
|
||||
chromedp.Text(`div.price_range`, &rangedPrice, chromedp.ByQuery, chromedp.AtLeast(0)),
|
||||
); err != nil {
|
||||
log.WithError(err).Error(pkgLogHeader + logGetPrice + "failed to get single price tag")
|
||||
return zeroPrice, err
|
||||
}
|
||||
singlePrice = strings.TrimSpace(singlePrice)
|
||||
prices = append(prices, s.getSinglePriceWithTax(singlePrice))
|
||||
|
||||
rangedPrice = strings.TrimSpace(rangedPrice)
|
||||
if rangedPrice != "" {
|
||||
prices = append(prices, s.getMinimalPriceFromRangeWithTax(rangedPrice))
|
||||
}
|
||||
|
||||
minimal := slices.Min(prices)
|
||||
log.Infof(pkgLogHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
|
||||
|
||||
return minimal, nil
|
||||
}
|
||||
|
||||
func (s *Scrapper) getMinimalPrice(sessionCtx context.Context, task common.Task) int32 {
|
||||
var (
|
||||
singlePrice string
|
||||
rangedPrice string
|
||||
)
|
||||
|
||||
if err := chromedp.Run(sessionCtx,
|
||||
chromedp.Navigate(task.Link),
|
||||
chromedp.WaitVisible("body", chromedp.ByQuery),
|
||||
chromedp.Evaluate(`(document.querySelector('div.price')?.innerText || '').trim()`, &singlePrice),
|
||||
chromedp.Evaluate(`(document.querySelector('div.price_range')?.innerText || '').trim()`, &rangedPrice),
|
||||
); err != nil {
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
minimal := s.processPrices(singlePrice, rangedPrice)
|
||||
log.Infof(pkgLogHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
|
||||
return minimal
|
||||
}
|
||||
|
||||
func (s *Scrapper) processPrices(singlePrice, rangedPrice string) int32 {
|
||||
var prices []int32
|
||||
|
||||
//in case of any errors or no price return zeroPrice const
|
||||
//if success add to prices slice
|
||||
if singlePrice != "" {
|
||||
singlePrice = strings.TrimSpace(singlePrice)
|
||||
counted, err := s.parseSinglePrice(singlePrice)
|
||||
if err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"err": err.Error(),
|
||||
"singlePrice": singlePrice,
|
||||
}).Error(pkgLogHeader + logGetPrice + "failed to parse single price, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
prices = append(prices, counted)
|
||||
} else {
|
||||
log.Warn(pkgLogHeader + logGetPrice + "single price not found")
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
//optional, adds price only if no errors and has non zero value
|
||||
if rangedPrice != "" {
|
||||
rangedPrice = strings.TrimSpace(rangedPrice)
|
||||
counted, err := s.parseRangedPrice(rangedPrice)
|
||||
if err != nil {
|
||||
log.WithFields(log.Fields{
|
||||
"err": err.Error(),
|
||||
"rangedPrice": rangedPrice,
|
||||
}).Error(pkgLogHeader + logGetPrice + "failed to parse ranged price")
|
||||
} else {
|
||||
if counted > 0 {
|
||||
prices = append(prices, counted)
|
||||
}
|
||||
}
|
||||
}
|
||||
return slices.Min(prices)
|
||||
}
|
||||
|
||||
func (s *Scrapper) getSinglePriceWithTax(rawPrice string) int32 {
|
||||
re := regexp.MustCompile(`(\d+)\s*円`)
|
||||
matches := re.FindStringSubmatch(rawPrice)
|
||||
if len(matches) < 2 {
|
||||
log.Error("Mandarake | Single price not found, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
priceStr := matches[1]
|
||||
price, err := strconv.Atoi(priceStr)
|
||||
if err != nil {
|
||||
log.Error("Mandarake | Failed to convert single price, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
return int32(price)
|
||||
}
|
||||
|
||||
func (s *Scrapper) getMinimalPriceFromRangeWithTax(priceRange string) int32 {
|
||||
re := regexp.MustCompile(`他([\d,]+)円`)
|
||||
matches := re.FindStringSubmatch(priceRange)
|
||||
if len(matches) < 2 {
|
||||
log.Error("Price not found in range, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
priceStr := strings.ReplaceAll(matches[1], ",", "")
|
||||
price, err := strconv.Atoi(priceStr)
|
||||
if err != nil {
|
||||
log.Error("Failed to convert minimal price in range, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
return int32(float64(price) * taxMultiplier)
|
||||
}
|
||||
|
||||
func (s *Scrapper) parseSinglePrice(rawPrice string) (int32, error) {
|
||||
deCommaStr := strings.ReplaceAll(rawPrice, ",", "")
|
||||
split := strings.Split(deCommaStr, "円")
|
||||
finalPrice, err := s.countTax(split[0])
|
||||
if err != nil {
|
||||
return zeroPrice, err
|
||||
}
|
||||
return finalPrice, nil
|
||||
}
|
||||
|
||||
func (s *Scrapper) parseRangedPrice(rawPrice string) (int32, error) {
|
||||
deCommaStr := strings.ReplaceAll(rawPrice, ",", "")
|
||||
split := strings.Split(deCommaStr, "円")
|
||||
rm1 := strings.ReplaceAll(split[0], "(", "")
|
||||
rm2 := strings.ReplaceAll(rm1, "他", "")
|
||||
|
||||
finalPrice, err := s.countTax(rm2)
|
||||
if err != nil {
|
||||
return zeroPrice, err
|
||||
}
|
||||
return finalPrice, nil
|
||||
}
|
||||
|
||||
func (s *Scrapper) countTax(priceStr string) (int32, error) {
|
||||
intPrice, err := strconv.Atoi(priceStr)
|
||||
if err != nil {
|
||||
return zeroPrice, err
|
||||
}
|
||||
return int32(float64(intPrice) * taxMultiplier), nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue