mandarake parser rework
All checks were successful
/ Make image (push) Successful in 39s

This commit is contained in:
nquidox 2025-12-26 16:19:09 +03:00
parent d63d345a9b
commit e48160dfa3
8 changed files with 243 additions and 390 deletions

View file

@ -0,0 +1,60 @@
package mandarake
import (
"context"
"github.com/chromedp/chromedp"
log "github.com/sirupsen/logrus"
"sync"
"task-processor/internal/appState"
"task-processor/internal/shared"
)
func (s *Parser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) {
log.Debug(logHeader + logWorker + "handling tasks")
allocCtx, allocCancel := chromedp.NewRemoteAllocator(s.baseCtx, s.externalBrowser)
receiver := make(chan shared.Task, len(tasks))
for _, task := range tasks {
receiver <- task
}
close(receiver)
wg := sync.WaitGroup{}
for i := 0; i < s.goroutinesNumber; i++ {
wg.Add(1)
go func() {
defer wg.Done()
s.worker(allocCtx, receiver, sender, state)
}()
}
wg.Wait()
allocCancel()
log.Debug(logHeader + logWorker + "finished handling tasks")
}
func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult, state *appState.State) {
pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(log.Printf))
defer pageCancel()
for task := range receiver {
log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task")
price, err := s.getPrice(pageCtx, task)
if err != nil {
log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price")
sender <- shared.TaskResult{
MerchUuid: task.MerchUuid,
Origin: task.Origin,
Price: zeroPrice,
}
continue
}
sender <- shared.TaskResult{
MerchUuid: task.MerchUuid,
Origin: task.Origin,
Price: price,
}
}
}

View file

@ -0,0 +1,40 @@
package mandarake
import (
"context"
log "github.com/sirupsen/logrus"
)
const (
zeroPrice int32 = 0
taxMultiplier float64 = 1.1
logHeader = "Mandarake parser | "
logWorker = "worker: "
logTaskWarning = "task warning: "
logGetPrice = "get price: "
)
type Parser struct {
baseCtx context.Context
externalBrowser string
goroutinesNumber int
}
type ParserDeps struct {
Enabled bool
ExternalBrowser string
GoroutinesNumber int
}
func NewParser(deps ParserDeps) *Parser {
if !deps.Enabled {
log.Info(logHeader + "disabled")
return nil
}
return &Parser{
baseCtx: context.Background(),
externalBrowser: deps.ExternalBrowser,
goroutinesNumber: deps.GoroutinesNumber,
}
}

View file

@ -0,0 +1,90 @@
package mandarake
import (
"context"
"github.com/chromedp/chromedp"
log "github.com/sirupsen/logrus"
"regexp"
"slices"
"strconv"
"strings"
"task-processor/internal/shared"
)
func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error) {
var (
singlePrice string
rangedPrice string
prices []int32
)
//get single price
if err := chromedp.Run(ctx,
chromedp.Navigate(task.Link),
chromedp.WaitReady("body"),
chromedp.WaitVisible(`div.price`, chromedp.ByQuery),
chromedp.Text(`div.price`, &singlePrice, chromedp.ByQuery),
); err != nil {
log.WithError(err).Error(logHeader + logGetPrice + "failed to get single price tag")
return zeroPrice, err
}
singlePrice = strings.TrimSpace(singlePrice)
prices = append(prices, s.getSinglePriceWithTax(singlePrice))
//get price range
if err := chromedp.Run(ctx,
chromedp.Navigate(task.Link),
chromedp.WaitReady("body"),
chromedp.WaitVisible(`price_range`, chromedp.ByQuery),
chromedp.Text(`price_range`, &rangedPrice, chromedp.ByQuery),
); err != nil {
log.WithError(err).Warn(logHeader + logGetPrice + "failed to get ranged price tag")
}
rangedPrice = strings.TrimSpace(rangedPrice)
if rangedPrice != "" {
prices = append(prices, s.getMinimalPriceFromRangeWithTax(rangedPrice))
}
//get minimal price
minimal := slices.Min(prices)
log.Infof(logHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
return minimal, nil
}
func (s *Parser) getSinglePriceWithTax(rawPrice string) int32 {
re := regexp.MustCompile(`(\d+)\s*円`)
matches := re.FindStringSubmatch(rawPrice)
if len(matches) < 2 {
log.Error("Mandarake | Single price not found, returning zero price")
return zeroPrice
}
priceStr := matches[1]
price, err := strconv.Atoi(priceStr)
if err != nil {
log.Error("Mandarake | Failed to convert single price, returning zero price")
return zeroPrice
}
return int32(price)
}
func (s *Parser) getMinimalPriceFromRangeWithTax(priceRange string) int32 {
re := regexp.MustCompile(`他([\d,]+)円`)
matches := re.FindStringSubmatch(priceRange)
if len(matches) < 2 {
log.Error("Price not found in range, returning zero price")
return zeroPrice
}
priceStr := strings.ReplaceAll(matches[1], ",", "")
price, err := strconv.Atoi(priceStr)
if err != nil {
log.Error("Failed to convert minimal price in range, returning zero price")
return zeroPrice
}
return int32(float64(price) * taxMultiplier)
}