diff --git a/cmd/main.go b/cmd/main.go index 92ae164..e244516 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -2,6 +2,9 @@ package main import ( "context" + log "github.com/sirupsen/logrus" + "net/http" + _ "net/http/pprof" "os" "os/signal" "syscall" @@ -18,6 +21,12 @@ func main() { logging.LogSetup(c.LogLevel) + if c.PprofEnabled { + go func() { + log.Println(http.ListenAndServe("localhost:6060", nil)) + }() + } + appl := app.New(c) appl.Run(ctx) diff --git a/config.env b/config.env index 7633de5..757bc25 100644 --- a/config.env +++ b/config.env @@ -2,6 +2,7 @@ APP_LOG_LEVEL=error APP_NUMCPUS=-1 APP_CHECK_PERIOD=6 EXTERNAL_BROWSER= +PPROF_ENABLED=false GRPC_SERVER_HOST=0.0.0.0 GRPC_SERVER_PORT=9060 @@ -14,6 +15,7 @@ GRPC_SURUGAYA_SCRAPPER_PORT=9070 TASK_RETRY_COUNT=3 TASK_RETRY_MINUTES=5 +TASK_TIMEOUT_MINUTES=5 ORIGIN_SURUGAYA_ENABLED=false ORIGIN_MANDARAKE_ENABLED=false diff --git a/config/config.go b/config/config.go index b19ffe0..6b1ec59 100644 --- a/config/config.go +++ b/config/config.go @@ -7,6 +7,7 @@ import ( ) type Config struct { + PprofEnabled bool LogLevel string NumCPUs int CheckPeriod int @@ -29,6 +30,7 @@ type GrpcConfig struct { type TasksConfig struct { RetryCount int RetryMinutes int + TaskTimeout int } type OriginEnabled struct { @@ -45,6 +47,7 @@ type MetricsConfig struct { func NewConfig() *Config { return &Config{ + PprofEnabled: getEnvBool("PPROF_ENABLED", true), LogLevel: getEnv("APP_LOG_LEVEL", "debug"), NumCPUs: getEnvInt("APP_NUMCPUS", -1), CheckPeriod: getEnvInt("APP_CHECK_PERIOD", 6), @@ -62,6 +65,7 @@ func NewConfig() *Config { TasksCfg: TasksConfig{ RetryCount: getEnvInt("TASK_RETRY_COUNT", 3), RetryMinutes: getEnvInt("TASK_RETRY_MINUTES", 5), + TaskTimeout: getEnvInt("TASK_TIMEOUT_MINUTES", 5), }, OriginEnabled: OriginEnabled{ diff --git a/internal/app/app.go b/internal/app/app.go index 832e338..4f27a7d 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -75,9 +75,6 @@ func (app *App) Run(ctx context.Context) { //main apiClient := newApiClient(app.config.GrpcCfg.ApiClientHost + ":" + app.config.GrpcCfg.ApiClientPort) - period := time.NewTicker(app.checkPeriod * time.Hour) - defer period.Stop() - sender := make(chan shared.TaskResult, app.numCPUs*10) defer close(sender) @@ -96,6 +93,7 @@ func (app *App) Run(ctx context.Context) { Enabled: app.config.OriginEnabled.Mandarake, ExternalBrowser: app.config.ExternalBrowser, GoroutinesNumber: app.numCPUs, + TaskTimeout: app.config.TasksCfg.TaskTimeout, }) } @@ -117,6 +115,9 @@ func (app *App) Run(ctx context.Context) { taskProcessor.StartWork(ctx, receivedTasks) } + period := time.NewTicker(app.checkPeriod * time.Hour) + defer period.Stop() + go func() { process() //immediate start for range period.C { @@ -140,7 +141,7 @@ func (app *App) Run(ctx context.Context) { l := len(sendData) if l > 0 { log.WithField("length", l).Debug("Sending parsed data") - app.network.SendResult(apiClient, sendData) + app.network.SendResult(ctx, apiClient, sendData) sendData = sendData[:0] } } diff --git a/internal/parsers/mandarake/handleTasks.go b/internal/parsers/mandarake/handleTasks.go index 1b566e7..8596747 100644 --- a/internal/parsers/mandarake/handleTasks.go +++ b/internal/parsers/mandarake/handleTasks.go @@ -10,14 +10,42 @@ import ( "task-processor/internal/shared" ) -func (s *Parser) HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) { - log.Infof("%v %v handling tasks", logHeader, logWorker) - +func (s *Parser) setupBrowser(ctx context.Context) (string, error) { allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser) defer allocCancel() - sessionCtx, sessionCancel := chromedp.NewContext(allocCtx /* chromedp.WithLogf(log.Printf) */, chromedp.WithLogf(func(string, ...any) {})) - defer sessionCancel() + pageCtx, pageCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(func(string, ...any) {})) + defer pageCancel() + + copyright := "No copyright div found." + + if err := chromedp.Run(pageCtx, + chromedp.Navigate("https://www.mandarake.co.jp/"), + chromedp.WaitReady("body", chromedp.ByQuery), + chromedp.Text(`div.copyright`, ©right, chromedp.ByQuery, chromedp.AtLeast(0)), + chromedp.Navigate("https://www.mandarake.co.jp/index2.html"), + chromedp.WaitReady("body", chromedp.ByQuery), + ); err != nil { + log.WithError(err).Error(logHeader + logGetPrice + "failed to get single price tag") + return copyright, err + } + + return copyright, nil +} + +func (s *Parser) HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) { + log.Infof("%v Start handling tasks", logHeader) + log.Infof("%v Setting up browser", logHeader) + cr, err := s.setupBrowser(ctx) + if err != nil { + log.WithError(err).Error(logHeader + logGetPrice + "failed to setup browser") + } + log.WithField("Copyright message", cr).Infof("%v Finished setting up browser.", logHeader) + + log.Infof("%v %v processing tasks...", logHeader, logWorker) + + allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser) + defer allocCancel() receiver := make(chan shared.Task, len(tasks)) for _, task := range tasks { @@ -31,7 +59,7 @@ func (s *Parser) HandleTasks(ctx context.Context, tasks []shared.Task, sender ch wg.Add(1) go func() { defer wg.Done() - s.worker(sessionCtx, receiver, sender) + s.worker(allocCtx, receiver, sender) }() } wg.Wait() @@ -42,14 +70,20 @@ func (s *Parser) HandleTasks(ctx context.Context, tasks []shared.Task, sender ch func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult) { for task := range receiver { + taskCtx, taskCancel := chromedp.NewContext(ctx /* chromedp.WithLogf(log.Printf) */, chromedp.WithLogf(func(string, ...any) {})) + timeoutCtx, timeoutCancel := context.WithTimeout(taskCtx, s.taskTimeout) + log.WithField("task_uuid", task.MerchUuid).Infof("%v %v processing task", logHeader, logWorker) //price will be zeroPrice value in case of any error or if price not found - price := s.getMinimalPrice(ctx, task) + price := s.getMinimalPrice(timeoutCtx, task) sender <- shared.TaskResult{ MerchUuid: task.MerchUuid, Origin: task.Origin, Price: price, } + + timeoutCancel() + taskCancel() } } diff --git a/internal/parsers/mandarake/handler.go b/internal/parsers/mandarake/handler.go index cd6b272..0955206 100644 --- a/internal/parsers/mandarake/handler.go +++ b/internal/parsers/mandarake/handler.go @@ -2,6 +2,7 @@ package mandarake import ( log "github.com/sirupsen/logrus" + "time" ) const ( @@ -16,22 +17,25 @@ const ( type Parser struct { externalBrowser string goroutinesNumber int + taskTimeout time.Duration } type Deps struct { Enabled bool ExternalBrowser string GoroutinesNumber int + TaskTimeout int } func NewParser(deps Deps) *Parser { if !deps.Enabled { - log.Info(logHeader + "disabled") + log.Infof("%v disabled", logHeader) return nil } return &Parser{ externalBrowser: deps.ExternalBrowser, goroutinesNumber: deps.GoroutinesNumber, + taskTimeout: time.Minute * time.Duration(deps.TaskTimeout), } } diff --git a/internal/remote/interface.go b/internal/remote/interface.go index b34de33..210d214 100644 --- a/internal/remote/interface.go +++ b/internal/remote/interface.go @@ -8,5 +8,5 @@ import ( type Handler interface { RequestTasks(ctx context.Context, client pb.TaskProcessorClient) []shared.TaskResponse - SendResult(client pb.TaskProcessorClient, tasksDone []shared.TaskResult) + SendResult(ctx context.Context, client pb.TaskProcessorClient, tasksDone []shared.TaskResult) } diff --git a/internal/remote/send.go b/internal/remote/send.go index 6e9c148..046fadf 100644 --- a/internal/remote/send.go +++ b/internal/remote/send.go @@ -5,10 +5,14 @@ import ( log "github.com/sirupsen/logrus" "task-processor/internal/shared" pb "task-processor/proto/taskProcessor" + "time" ) -func (n *Network) SendResult(client pb.TaskProcessorClient, tasksDone []shared.TaskResult) { - stream, err := client.SendResult(context.Background()) +func (n *Network) SendResult(ctx context.Context, client pb.TaskProcessorClient, tasksDone []shared.TaskResult) { + sendCtx, cancel := context.WithTimeout(ctx, time.Second*60) + defer cancel() + + stream, err := client.SendResult(sendCtx) if err != nil { log.Fatalf("Error calling PostMerch: %v", err) } @@ -29,7 +33,12 @@ func (n *Network) SendResult(client pb.TaskProcessorClient, tasksDone []shared.T } } - if err = stream.CloseSend(); err != nil { - log.Fatalf("Error closing stream: %v", err) + //if err = stream.CloseSend(); err != nil { + // log.Fatalf("Error closing stream: %v", err) + //} + + _, err = stream.CloseAndRecv() + if err != nil { + log.Fatalf("Error receiving response: %v", err) } } diff --git a/pkg/router/handler.go b/pkg/router/handler.go index ddd1b91..0f924cc 100644 --- a/pkg/router/handler.go +++ b/pkg/router/handler.go @@ -21,7 +21,7 @@ type Deps struct { const pkgLogHeader string = "Router |" func NewHandler(deps Deps) *Handler { - engine := gin.Default() + engine := gin.New() if deps.GinMode == "release" { gin.SetMode(gin.ReleaseMode) @@ -32,7 +32,8 @@ func NewHandler(deps Deps) *Handler { } } - engine.GET("/", func(c *gin.Context) { c.JSON(200, gin.H{"msg": "v2"}) }) + logGroup := engine.Group("") + logGroup.GET("/", func(c *gin.Context) { c.JSON(200, gin.H{"msg": "v2"}) }) p := ginprometheus.NewPrometheus("gin") p.Use(engine)