Compare commits

..

No commits in common. "main" and "v0.1.12" have entirely different histories.

5 changed files with 22 additions and 200 deletions

View file

@ -71,18 +71,14 @@ func (app *App) Run() {
surugayaScrapper := newSurugayaScrapperClient(app.config.GrpcCfg.SurugayaScrapperHost + ":" + app.config.GrpcCfg.SurugayaScrapperPort) surugayaScrapper := newSurugayaScrapperClient(app.config.GrpcCfg.SurugayaScrapperHost + ":" + app.config.GrpcCfg.SurugayaScrapperPort)
//task processor //task processor
handlers := make(map[string]parsers.TaskHandler) handlers := map[string]parsers.TaskHandler{
shared.OriginSurugaya: parsers.NewSurugayaParser(ctx, surugayaScrapper),
if app.config.OriginEnabled.Surugaya { shared.OriginMandarake: mandarake.NewParser(mandarake.ParserDeps{
handlers[shared.OriginSurugaya] = parsers.NewSurugayaParser(ctx, surugayaScrapper)
}
if app.config.OriginEnabled.Mandarake {
handlers[shared.OriginMandarake] = mandarake.NewParser(mandarake.ParserDeps{
Enabled: app.config.OriginEnabled.Mandarake, Enabled: app.config.OriginEnabled.Mandarake,
ExternalBrowser: app.config.ExternalBrowser, ExternalBrowser: app.config.ExternalBrowser,
GoroutinesNumber: app.numCPUs, GoroutinesNumber: app.numCPUs,
}) }),
} }
taskProcessor := processor.New(processor.Deps{ taskProcessor := processor.New(processor.Deps{

View file

@ -37,24 +37,20 @@ func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender c
for task := range receiver { for task := range receiver {
log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task") log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task")
//pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(func(string, ...any) {})) pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(func(string, ...any) {}))
//
//price, err := s.getPrice(pageCtx, task)
//pageCancel()
//price, err := s.getMinimalPrice(task) price, err := s.getPrice(pageCtx, task)
//if err != nil { pageCancel()
// log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price") if err != nil {
// sender <- shared.TaskResult{ log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price")
// MerchUuid: task.MerchUuid, sender <- shared.TaskResult{
// Origin: task.Origin, MerchUuid: task.MerchUuid,
// Price: zeroPrice, Origin: task.Origin,
// } Price: zeroPrice,
// continue }
//} continue
}
//price will be zeroPrice value in case of any error or if price not found
price := s.getMinimalPrice(task)
sender <- shared.TaskResult{ sender <- shared.TaskResult{
MerchUuid: task.MerchUuid, MerchUuid: task.MerchUuid,
Origin: task.Origin, Origin: task.Origin,

View file

@ -19,6 +19,8 @@ func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error)
) )
if err := chromedp.Run(ctx, if err := chromedp.Run(ctx,
chromedp.Navigate("https://www.mandarake.co.jp/index2.html"),
chromedp.WaitReady("body", chromedp.ByQuery),
chromedp.Navigate(task.Link), chromedp.Navigate(task.Link),
chromedp.WaitReady("body"), chromedp.WaitReady("body"),
chromedp.Text(`div.price`, &singlePrice, chromedp.ByQuery, chromedp.AtLeast(0)), chromedp.Text(`div.price`, &singlePrice, chromedp.ByQuery, chromedp.AtLeast(0)),
@ -41,72 +43,6 @@ func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error)
return minimal, nil return minimal, nil
} }
func (s *Parser) getMinimalPrice(task shared.Task) int32 {
ctx := context.Background()
allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser)
defer allocCancel()
sessionCtx, sessionCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
defer sessionCancel()
var (
singlePrice string
rangedPrice string
)
if err := chromedp.Run(sessionCtx,
chromedp.Navigate(task.Link),
chromedp.WaitVisible("body", chromedp.ByQuery),
chromedp.Evaluate(`(document.querySelector('div.price')?.innerText || '').trim()`, &singlePrice),
chromedp.Evaluate(`(document.querySelector('div.price_range')?.innerText || '').trim()`, &rangedPrice),
); err != nil {
return zeroPrice
}
minimal := s.processPrices(singlePrice, rangedPrice)
log.Infof(logHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
return minimal
}
func (s *Parser) processPrices(singlePrice, rangedPrice string) int32 {
var prices []int32
//in case of any errors or no price return zeroPrice const
//if success add to prices slice
if singlePrice != "" {
singlePrice = strings.TrimSpace(singlePrice)
counted, err := s.parseSinglePrice(singlePrice)
if err != nil {
log.WithFields(log.Fields{
"err": err.Error(),
"singlePrice": singlePrice,
}).Error(logHeader + logGetPrice + "failed to parse single price, returning zero price")
return zeroPrice
}
prices = append(prices, counted)
} else {
log.Warn(logHeader + logGetPrice + "single price not found")
return zeroPrice
}
//optional, adds price only if no errors and has non zero value
if rangedPrice != "" {
rangedPrice = strings.TrimSpace(rangedPrice)
counted, err := s.parseRangedPrice(rangedPrice)
if err != nil {
log.WithFields(log.Fields{
"err": err.Error(),
"rangedPrice": rangedPrice,
}).Error(logHeader + logGetPrice + "failed to parse ranged price")
} else {
if counted > 0 {
prices = append(prices, counted)
}
}
}
return slices.Min(prices)
}
func (s *Parser) getSinglePriceWithTax(rawPrice string) int32 { func (s *Parser) getSinglePriceWithTax(rawPrice string) int32 {
re := regexp.MustCompile(`(\d+)\s*円`) re := regexp.MustCompile(`(\d+)\s*円`)
matches := re.FindStringSubmatch(rawPrice) matches := re.FindStringSubmatch(rawPrice)
@ -141,34 +77,3 @@ func (s *Parser) getMinimalPriceFromRangeWithTax(priceRange string) int32 {
return int32(float64(price) * taxMultiplier) return int32(float64(price) * taxMultiplier)
} }
func (s *Parser) parseSinglePrice(rawPrice string) (int32, error) {
deCommaStr := strings.ReplaceAll(rawPrice, ",", "")
split := strings.Split(deCommaStr, "円")
finalPrice, err := s.countTax(split[0])
if err != nil {
return zeroPrice, err
}
return finalPrice, nil
}
func (s *Parser) parseRangedPrice(rawPrice string) (int32, error) {
deCommaStr := strings.ReplaceAll(rawPrice, ",", "")
split := strings.Split(deCommaStr, "円")
rm1 := strings.ReplaceAll(split[0], "(", "")
rm2 := strings.ReplaceAll(rm1, "他", "")
finalPrice, err := s.countTax(rm2)
if err != nil {
return zeroPrice, err
}
return finalPrice, nil
}
func (s *Parser) countTax(priceStr string) (int32, error) {
intPrice, err := strconv.Atoi(priceStr)
if err != nil {
return zeroPrice, err
}
return int32(float64(intPrice) * taxMultiplier), nil
}

View file

@ -1,77 +0,0 @@
package mandarake
import (
"context"
"testing"
)
func TestParser_processPrices(t *testing.T) {
type fields struct {
baseCtx context.Context
externalBrowser string
goroutinesNumber int
}
type args struct {
singlePrice string
rangedPrice string
}
var placeholderFields = fields{
baseCtx: context.Background(),
externalBrowser: "",
goroutinesNumber: 10,
}
//single := "18,000円 (税込 19,800円)"
//ranged := "(他15,000円16,000円もあります)"
tests := []struct {
name string
fields fields
args args
want int32
}{
//Cases
{name: "Full success", fields: placeholderFields, args: args{
singlePrice: "18,000円 (税込 19,800円)",
rangedPrice: "(他15,000円16,000円もあります)",
}, want: 16500},
{name: "Single price only success 1", fields: placeholderFields, args: args{
singlePrice: "18,000円 (税込 19,800円)",
rangedPrice: "",
}, want: 19800},
{name: "Single price only success 2", fields: placeholderFields, args: args{
singlePrice: "18,000円 (税込 19,800円)",
rangedPrice: "no numbers in this string",
}, want: 19800},
{name: "zero single price success 1", fields: placeholderFields, args: args{
singlePrice: "",
rangedPrice: "",
}, want: 0},
{name: "zero single price success 2", fields: placeholderFields, args: args{
singlePrice: "no numbers in this string",
rangedPrice: "",
}, want: 0},
{name: "zero single price success 3", fields: placeholderFields, args: args{
singlePrice: "no numbers in this string",
rangedPrice: "no numbers in this string",
}, want: 0},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := &Parser{
baseCtx: tt.fields.baseCtx,
externalBrowser: tt.fields.externalBrowser,
goroutinesNumber: tt.fields.goroutinesNumber,
}
if got := s.processPrices(tt.args.singlePrice, tt.args.rangedPrice); got != tt.want {
t.Errorf("processPrices() = %v, want %v", got, tt.want)
}
})
}
}

View file

@ -26,8 +26,10 @@ func (p *Processor) StartWork(receivedTasks []shared.TaskResponse) {
wg.Add(1) wg.Add(1)
go func(origin string, tasks []shared.Task) { go func(origin string, tasks []shared.Task) {
defer wg.Done() defer wg.Done()
log.Info("Running task handler for origin: ", origin) if p.handlers[origin] != nil {
p.handlers[origin].HandleTasks(tasks, p.out, p.state) log.Info("Running task handler for origin: ", origin)
p.handlers[origin].HandleTasks(tasks, p.out, p.state)
}
}(origin, tasks) }(origin, tasks)
} }
wg.Wait() wg.Wait()