Compare commits
No commits in common. "main" and "v0.1.9" have entirely different histories.
5 changed files with 36 additions and 203 deletions
|
|
@ -71,18 +71,14 @@ func (app *App) Run() {
|
||||||
surugayaScrapper := newSurugayaScrapperClient(app.config.GrpcCfg.SurugayaScrapperHost + ":" + app.config.GrpcCfg.SurugayaScrapperPort)
|
surugayaScrapper := newSurugayaScrapperClient(app.config.GrpcCfg.SurugayaScrapperHost + ":" + app.config.GrpcCfg.SurugayaScrapperPort)
|
||||||
|
|
||||||
//task processor
|
//task processor
|
||||||
handlers := make(map[string]parsers.TaskHandler)
|
handlers := map[string]parsers.TaskHandler{
|
||||||
|
shared.OriginSurugaya: parsers.NewSurugayaParser(ctx, surugayaScrapper),
|
||||||
|
|
||||||
if app.config.OriginEnabled.Surugaya {
|
shared.OriginMandarake: mandarake.NewParser(mandarake.ParserDeps{
|
||||||
handlers[shared.OriginSurugaya] = parsers.NewSurugayaParser(ctx, surugayaScrapper)
|
|
||||||
}
|
|
||||||
|
|
||||||
if app.config.OriginEnabled.Mandarake {
|
|
||||||
handlers[shared.OriginMandarake] = mandarake.NewParser(mandarake.ParserDeps{
|
|
||||||
Enabled: app.config.OriginEnabled.Mandarake,
|
Enabled: app.config.OriginEnabled.Mandarake,
|
||||||
ExternalBrowser: app.config.ExternalBrowser,
|
ExternalBrowser: app.config.ExternalBrowser,
|
||||||
GoroutinesNumber: app.numCPUs,
|
GoroutinesNumber: app.numCPUs,
|
||||||
})
|
}),
|
||||||
}
|
}
|
||||||
|
|
||||||
taskProcessor := processor.New(processor.Deps{
|
taskProcessor := processor.New(processor.Deps{
|
||||||
|
|
|
||||||
|
|
@ -34,27 +34,23 @@ func (s *Parser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult, state *appState.State) {
|
func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult, state *appState.State) {
|
||||||
|
pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(log.Printf))
|
||||||
|
defer pageCancel()
|
||||||
|
|
||||||
for task := range receiver {
|
for task := range receiver {
|
||||||
log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task")
|
log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task")
|
||||||
|
|
||||||
//pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(func(string, ...any) {}))
|
price, err := s.getPrice(pageCtx, task)
|
||||||
//
|
if err != nil {
|
||||||
//price, err := s.getPrice(pageCtx, task)
|
log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price")
|
||||||
//pageCancel()
|
sender <- shared.TaskResult{
|
||||||
|
MerchUuid: task.MerchUuid,
|
||||||
|
Origin: task.Origin,
|
||||||
|
Price: zeroPrice,
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
//price, err := s.getMinimalPrice(task)
|
|
||||||
//if err != nil {
|
|
||||||
// log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price")
|
|
||||||
// sender <- shared.TaskResult{
|
|
||||||
// MerchUuid: task.MerchUuid,
|
|
||||||
// Origin: task.Origin,
|
|
||||||
// Price: zeroPrice,
|
|
||||||
// }
|
|
||||||
// continue
|
|
||||||
//}
|
|
||||||
|
|
||||||
//price will be zeroPrice value in case of any error or if price not found
|
|
||||||
price := s.getMinimalPrice(task)
|
|
||||||
sender <- shared.TaskResult{
|
sender <- shared.TaskResult{
|
||||||
MerchUuid: task.MerchUuid,
|
MerchUuid: task.MerchUuid,
|
||||||
Origin: task.Origin,
|
Origin: task.Origin,
|
||||||
|
|
|
||||||
|
|
@ -18,11 +18,12 @@ func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error)
|
||||||
prices []int32
|
prices []int32
|
||||||
)
|
)
|
||||||
|
|
||||||
|
//get single price
|
||||||
if err := chromedp.Run(ctx,
|
if err := chromedp.Run(ctx,
|
||||||
chromedp.Navigate(task.Link),
|
chromedp.Navigate(task.Link),
|
||||||
chromedp.WaitReady("body"),
|
chromedp.WaitReady("body"),
|
||||||
chromedp.Text(`div.price`, &singlePrice, chromedp.ByQuery, chromedp.AtLeast(0)),
|
chromedp.WaitVisible(`div.price`, chromedp.ByQuery),
|
||||||
chromedp.Text(`div.price_range`, &rangedPrice, chromedp.ByQuery, chromedp.AtLeast(0)),
|
chromedp.Text(`div.price`, &singlePrice, chromedp.ByQuery),
|
||||||
); err != nil {
|
); err != nil {
|
||||||
log.WithError(err).Error(logHeader + logGetPrice + "failed to get single price tag")
|
log.WithError(err).Error(logHeader + logGetPrice + "failed to get single price tag")
|
||||||
return zeroPrice, err
|
return zeroPrice, err
|
||||||
|
|
@ -30,83 +31,29 @@ func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error)
|
||||||
singlePrice = strings.TrimSpace(singlePrice)
|
singlePrice = strings.TrimSpace(singlePrice)
|
||||||
prices = append(prices, s.getSinglePriceWithTax(singlePrice))
|
prices = append(prices, s.getSinglePriceWithTax(singlePrice))
|
||||||
|
|
||||||
|
//get price range
|
||||||
|
if err := chromedp.Run(ctx,
|
||||||
|
chromedp.Navigate(task.Link),
|
||||||
|
chromedp.WaitReady("body"),
|
||||||
|
chromedp.WaitVisible(`price_range`, chromedp.ByQuery),
|
||||||
|
chromedp.Text(`price_range`, &rangedPrice, chromedp.ByQuery),
|
||||||
|
); err != nil {
|
||||||
|
log.WithError(err).Warn(logHeader + logGetPrice + "failed to get ranged price tag")
|
||||||
|
}
|
||||||
|
|
||||||
rangedPrice = strings.TrimSpace(rangedPrice)
|
rangedPrice = strings.TrimSpace(rangedPrice)
|
||||||
|
|
||||||
if rangedPrice != "" {
|
if rangedPrice != "" {
|
||||||
prices = append(prices, s.getMinimalPriceFromRangeWithTax(rangedPrice))
|
prices = append(prices, s.getMinimalPriceFromRangeWithTax(rangedPrice))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//get minimal price
|
||||||
minimal := slices.Min(prices)
|
minimal := slices.Min(prices)
|
||||||
log.Infof(logHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
|
log.Infof(logHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
|
||||||
|
|
||||||
return minimal, nil
|
return minimal, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Parser) getMinimalPrice(task shared.Task) int32 {
|
|
||||||
ctx := context.Background()
|
|
||||||
allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser)
|
|
||||||
defer allocCancel()
|
|
||||||
|
|
||||||
sessionCtx, sessionCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
|
|
||||||
defer sessionCancel()
|
|
||||||
|
|
||||||
var (
|
|
||||||
singlePrice string
|
|
||||||
rangedPrice string
|
|
||||||
)
|
|
||||||
|
|
||||||
if err := chromedp.Run(sessionCtx,
|
|
||||||
chromedp.Navigate(task.Link),
|
|
||||||
chromedp.WaitVisible("body", chromedp.ByQuery),
|
|
||||||
chromedp.Evaluate(`(document.querySelector('div.price')?.innerText || '').trim()`, &singlePrice),
|
|
||||||
chromedp.Evaluate(`(document.querySelector('div.price_range')?.innerText || '').trim()`, &rangedPrice),
|
|
||||||
); err != nil {
|
|
||||||
return zeroPrice
|
|
||||||
}
|
|
||||||
|
|
||||||
minimal := s.processPrices(singlePrice, rangedPrice)
|
|
||||||
log.Infof(logHeader+"uuid: %s, price: %d", task.MerchUuid, minimal)
|
|
||||||
return minimal
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Parser) processPrices(singlePrice, rangedPrice string) int32 {
|
|
||||||
var prices []int32
|
|
||||||
|
|
||||||
//in case of any errors or no price return zeroPrice const
|
|
||||||
//if success add to prices slice
|
|
||||||
if singlePrice != "" {
|
|
||||||
singlePrice = strings.TrimSpace(singlePrice)
|
|
||||||
counted, err := s.parseSinglePrice(singlePrice)
|
|
||||||
if err != nil {
|
|
||||||
log.WithFields(log.Fields{
|
|
||||||
"err": err.Error(),
|
|
||||||
"singlePrice": singlePrice,
|
|
||||||
}).Error(logHeader + logGetPrice + "failed to parse single price, returning zero price")
|
|
||||||
return zeroPrice
|
|
||||||
}
|
|
||||||
prices = append(prices, counted)
|
|
||||||
} else {
|
|
||||||
log.Warn(logHeader + logGetPrice + "single price not found")
|
|
||||||
return zeroPrice
|
|
||||||
}
|
|
||||||
|
|
||||||
//optional, adds price only if no errors and has non zero value
|
|
||||||
if rangedPrice != "" {
|
|
||||||
rangedPrice = strings.TrimSpace(rangedPrice)
|
|
||||||
counted, err := s.parseRangedPrice(rangedPrice)
|
|
||||||
if err != nil {
|
|
||||||
log.WithFields(log.Fields{
|
|
||||||
"err": err.Error(),
|
|
||||||
"rangedPrice": rangedPrice,
|
|
||||||
}).Error(logHeader + logGetPrice + "failed to parse ranged price")
|
|
||||||
} else {
|
|
||||||
if counted > 0 {
|
|
||||||
prices = append(prices, counted)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return slices.Min(prices)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Parser) getSinglePriceWithTax(rawPrice string) int32 {
|
func (s *Parser) getSinglePriceWithTax(rawPrice string) int32 {
|
||||||
re := regexp.MustCompile(`(\d+)\s*円`)
|
re := regexp.MustCompile(`(\d+)\s*円`)
|
||||||
matches := re.FindStringSubmatch(rawPrice)
|
matches := re.FindStringSubmatch(rawPrice)
|
||||||
|
|
@ -141,34 +88,3 @@ func (s *Parser) getMinimalPriceFromRangeWithTax(priceRange string) int32 {
|
||||||
|
|
||||||
return int32(float64(price) * taxMultiplier)
|
return int32(float64(price) * taxMultiplier)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Parser) parseSinglePrice(rawPrice string) (int32, error) {
|
|
||||||
deCommaStr := strings.ReplaceAll(rawPrice, ",", "")
|
|
||||||
split := strings.Split(deCommaStr, "円")
|
|
||||||
finalPrice, err := s.countTax(split[0])
|
|
||||||
if err != nil {
|
|
||||||
return zeroPrice, err
|
|
||||||
}
|
|
||||||
return finalPrice, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Parser) parseRangedPrice(rawPrice string) (int32, error) {
|
|
||||||
deCommaStr := strings.ReplaceAll(rawPrice, ",", "")
|
|
||||||
split := strings.Split(deCommaStr, "円")
|
|
||||||
rm1 := strings.ReplaceAll(split[0], "(", "")
|
|
||||||
rm2 := strings.ReplaceAll(rm1, "他", "")
|
|
||||||
|
|
||||||
finalPrice, err := s.countTax(rm2)
|
|
||||||
if err != nil {
|
|
||||||
return zeroPrice, err
|
|
||||||
}
|
|
||||||
return finalPrice, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *Parser) countTax(priceStr string) (int32, error) {
|
|
||||||
intPrice, err := strconv.Atoi(priceStr)
|
|
||||||
if err != nil {
|
|
||||||
return zeroPrice, err
|
|
||||||
}
|
|
||||||
return int32(float64(intPrice) * taxMultiplier), nil
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -1,77 +0,0 @@
|
||||||
package mandarake
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestParser_processPrices(t *testing.T) {
|
|
||||||
type fields struct {
|
|
||||||
baseCtx context.Context
|
|
||||||
externalBrowser string
|
|
||||||
goroutinesNumber int
|
|
||||||
}
|
|
||||||
type args struct {
|
|
||||||
singlePrice string
|
|
||||||
rangedPrice string
|
|
||||||
}
|
|
||||||
|
|
||||||
var placeholderFields = fields{
|
|
||||||
baseCtx: context.Background(),
|
|
||||||
externalBrowser: "",
|
|
||||||
goroutinesNumber: 10,
|
|
||||||
}
|
|
||||||
|
|
||||||
//single := "18,000円 (税込 19,800円)"
|
|
||||||
//ranged := "(他15,000円~16,000円もあります)"
|
|
||||||
|
|
||||||
tests := []struct {
|
|
||||||
name string
|
|
||||||
fields fields
|
|
||||||
args args
|
|
||||||
want int32
|
|
||||||
}{
|
|
||||||
//Cases
|
|
||||||
{name: "Full success", fields: placeholderFields, args: args{
|
|
||||||
singlePrice: "18,000円 (税込 19,800円)",
|
|
||||||
rangedPrice: "(他15,000円~16,000円もあります)",
|
|
||||||
}, want: 16500},
|
|
||||||
|
|
||||||
{name: "Single price only success 1", fields: placeholderFields, args: args{
|
|
||||||
singlePrice: "18,000円 (税込 19,800円)",
|
|
||||||
rangedPrice: "",
|
|
||||||
}, want: 19800},
|
|
||||||
|
|
||||||
{name: "Single price only success 2", fields: placeholderFields, args: args{
|
|
||||||
singlePrice: "18,000円 (税込 19,800円)",
|
|
||||||
rangedPrice: "no numbers in this string",
|
|
||||||
}, want: 19800},
|
|
||||||
|
|
||||||
{name: "zero single price success 1", fields: placeholderFields, args: args{
|
|
||||||
singlePrice: "",
|
|
||||||
rangedPrice: "",
|
|
||||||
}, want: 0},
|
|
||||||
|
|
||||||
{name: "zero single price success 2", fields: placeholderFields, args: args{
|
|
||||||
singlePrice: "no numbers in this string",
|
|
||||||
rangedPrice: "",
|
|
||||||
}, want: 0},
|
|
||||||
|
|
||||||
{name: "zero single price success 3", fields: placeholderFields, args: args{
|
|
||||||
singlePrice: "no numbers in this string",
|
|
||||||
rangedPrice: "no numbers in this string",
|
|
||||||
}, want: 0},
|
|
||||||
}
|
|
||||||
for _, tt := range tests {
|
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
|
||||||
s := &Parser{
|
|
||||||
baseCtx: tt.fields.baseCtx,
|
|
||||||
externalBrowser: tt.fields.externalBrowser,
|
|
||||||
goroutinesNumber: tt.fields.goroutinesNumber,
|
|
||||||
}
|
|
||||||
if got := s.processPrices(tt.args.singlePrice, tt.args.rangedPrice); got != tt.want {
|
|
||||||
t.Errorf("processPrices() = %v, want %v", got, tt.want)
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -26,8 +26,10 @@ func (p *Processor) StartWork(receivedTasks []shared.TaskResponse) {
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(origin string, tasks []shared.Task) {
|
go func(origin string, tasks []shared.Task) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
if p.handlers[origin] != nil {
|
||||||
log.Info("Running task handler for origin: ", origin)
|
log.Info("Running task handler for origin: ", origin)
|
||||||
p.handlers[origin].HandleTasks(tasks, p.out, p.state)
|
p.handlers[origin].HandleTasks(tasks, p.out, p.state)
|
||||||
|
}
|
||||||
}(origin, tasks)
|
}(origin, tasks)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue