This commit is contained in:
parent
d63d345a9b
commit
e48160dfa3
8 changed files with 243 additions and 390 deletions
60
internal/parsers/mandarake/handleTasks.go
Normal file
60
internal/parsers/mandarake/handleTasks.go
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
package mandarake
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/chromedp/chromedp"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"sync"
|
||||
"task-processor/internal/appState"
|
||||
"task-processor/internal/shared"
|
||||
)
|
||||
|
||||
func (s *Parser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) {
|
||||
log.Debug(logHeader + logWorker + "handling tasks")
|
||||
|
||||
allocCtx, allocCancel := chromedp.NewRemoteAllocator(s.baseCtx, s.externalBrowser)
|
||||
|
||||
receiver := make(chan shared.Task, len(tasks))
|
||||
for _, task := range tasks {
|
||||
receiver <- task
|
||||
}
|
||||
close(receiver)
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
for i := 0; i < s.goroutinesNumber; i++ {
|
||||
wg.Add(1)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
s.worker(allocCtx, receiver, sender, state)
|
||||
}()
|
||||
}
|
||||
wg.Wait()
|
||||
allocCancel()
|
||||
log.Debug(logHeader + logWorker + "finished handling tasks")
|
||||
}
|
||||
|
||||
func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult, state *appState.State) {
|
||||
pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(log.Printf))
|
||||
defer pageCancel()
|
||||
|
||||
for task := range receiver {
|
||||
log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task")
|
||||
|
||||
price, err := s.getPrice(pageCtx, task)
|
||||
if err != nil {
|
||||
log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price")
|
||||
sender <- shared.TaskResult{
|
||||
MerchUuid: task.MerchUuid,
|
||||
Origin: task.Origin,
|
||||
Price: zeroPrice,
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
sender <- shared.TaskResult{
|
||||
MerchUuid: task.MerchUuid,
|
||||
Origin: task.Origin,
|
||||
Price: price,
|
||||
}
|
||||
}
|
||||
}
|
||||
40
internal/parsers/mandarake/handler.go
Normal file
40
internal/parsers/mandarake/handler.go
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
package mandarake
|
||||
|
||||
import (
|
||||
"context"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
zeroPrice int32 = 0
|
||||
taxMultiplier float64 = 1.1
|
||||
logHeader = "Mandarake parser | "
|
||||
logWorker = "worker: "
|
||||
logTaskWarning = "task warning: "
|
||||
logGetPrice = "get price: "
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
baseCtx context.Context
|
||||
externalBrowser string
|
||||
goroutinesNumber int
|
||||
}
|
||||
|
||||
type ParserDeps struct {
|
||||
Enabled bool
|
||||
ExternalBrowser string
|
||||
GoroutinesNumber int
|
||||
}
|
||||
|
||||
func NewParser(deps ParserDeps) *Parser {
|
||||
if !deps.Enabled {
|
||||
log.Info(logHeader + "disabled")
|
||||
return nil
|
||||
}
|
||||
|
||||
return &Parser{
|
||||
baseCtx: context.Background(),
|
||||
externalBrowser: deps.ExternalBrowser,
|
||||
goroutinesNumber: deps.GoroutinesNumber,
|
||||
}
|
||||
}
|
||||
90
internal/parsers/mandarake/service.go
Normal file
90
internal/parsers/mandarake/service.go
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
package mandarake
|
||||
|
||||
import (
|
||||
"context"
|
||||
"github.com/chromedp/chromedp"
|
||||
log "github.com/sirupsen/logrus"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"task-processor/internal/shared"
|
||||
)
|
||||
|
||||
func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error) {
|
||||
var (
|
||||
singlePrice string
|
||||
rangedPrice string
|
||||
prices []int32
|
||||
)
|
||||
|
||||
//get single price
|
||||
if err := chromedp.Run(ctx,
|
||||
chromedp.Navigate(task.Link),
|
||||
chromedp.WaitReady("body"),
|
||||
chromedp.WaitVisible(`div.price`, chromedp.ByQuery),
|
||||
chromedp.Text(`div.price`, &singlePrice, chromedp.ByQuery),
|
||||
); err != nil {
|
||||
log.WithError(err).Error(logHeader + logGetPrice + "failed to get single price tag")
|
||||
return zeroPrice, err
|
||||
}
|
||||
singlePrice = strings.TrimSpace(singlePrice)
|
||||
prices = append(prices, s.getSinglePriceWithTax(singlePrice))
|
||||
|
||||
//get price range
|
||||
if err := chromedp.Run(ctx,
|
||||
chromedp.Navigate(task.Link),
|
||||
chromedp.WaitReady("body"),
|
||||
chromedp.WaitVisible(`price_range`, chromedp.ByQuery),
|
||||
chromedp.Text(`price_range`, &rangedPrice, chromedp.ByQuery),
|
||||
); err != nil {
|
||||
log.WithError(err).Warn(logHeader + logGetPrice + "failed to get ranged price tag")
|
||||
}
|
||||
|
||||
rangedPrice = strings.TrimSpace(rangedPrice)
|
||||
|
||||
if rangedPrice != "" {
|
||||
prices = append(prices, s.getMinimalPriceFromRangeWithTax(rangedPrice))
|
||||
}
|
||||
|
||||
//get minimal price
|
||||
minimal := slices.Min(prices)
|
||||
log.Infof(logHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
|
||||
|
||||
return minimal, nil
|
||||
}
|
||||
|
||||
func (s *Parser) getSinglePriceWithTax(rawPrice string) int32 {
|
||||
re := regexp.MustCompile(`(\d+)\s*円`)
|
||||
matches := re.FindStringSubmatch(rawPrice)
|
||||
if len(matches) < 2 {
|
||||
log.Error("Mandarake | Single price not found, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
priceStr := matches[1]
|
||||
price, err := strconv.Atoi(priceStr)
|
||||
if err != nil {
|
||||
log.Error("Mandarake | Failed to convert single price, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
return int32(price)
|
||||
}
|
||||
|
||||
func (s *Parser) getMinimalPriceFromRangeWithTax(priceRange string) int32 {
|
||||
re := regexp.MustCompile(`他([\d,]+)円`)
|
||||
matches := re.FindStringSubmatch(priceRange)
|
||||
if len(matches) < 2 {
|
||||
log.Error("Price not found in range, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
priceStr := strings.ReplaceAll(matches[1], ",", "")
|
||||
price, err := strconv.Atoi(priceStr)
|
||||
if err != nil {
|
||||
log.Error("Failed to convert minimal price in range, returning zero price")
|
||||
return zeroPrice
|
||||
}
|
||||
|
||||
return int32(float64(price) * taxMultiplier)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue