diff --git a/cmd/main.go b/cmd/main.go index e244516..1fd8de8 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -1,33 +1,17 @@ package main import ( - "context" - log "github.com/sirupsen/logrus" - "net/http" - _ "net/http/pprof" - "os" - "os/signal" - "syscall" "task-processor/config" "task-processor/internal/app" "task-processor/internal/logging" ) func main() { - ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) - defer cancel() - c := config.NewConfig() logging.LogSetup(c.LogLevel) - if c.PprofEnabled { - go func() { - log.Println(http.ListenAndServe("localhost:6060", nil)) - }() - } - appl := app.New(c) - appl.Run(ctx) + appl.Run() } diff --git a/config.env b/config.env index 757bc25..7633de5 100644 --- a/config.env +++ b/config.env @@ -2,7 +2,6 @@ APP_LOG_LEVEL=error APP_NUMCPUS=-1 APP_CHECK_PERIOD=6 EXTERNAL_BROWSER= -PPROF_ENABLED=false GRPC_SERVER_HOST=0.0.0.0 GRPC_SERVER_PORT=9060 @@ -15,7 +14,6 @@ GRPC_SURUGAYA_SCRAPPER_PORT=9070 TASK_RETRY_COUNT=3 TASK_RETRY_MINUTES=5 -TASK_TIMEOUT_MINUTES=5 ORIGIN_SURUGAYA_ENABLED=false ORIGIN_MANDARAKE_ENABLED=false diff --git a/config/config.go b/config/config.go index 6b1ec59..b19ffe0 100644 --- a/config/config.go +++ b/config/config.go @@ -7,7 +7,6 @@ import ( ) type Config struct { - PprofEnabled bool LogLevel string NumCPUs int CheckPeriod int @@ -30,7 +29,6 @@ type GrpcConfig struct { type TasksConfig struct { RetryCount int RetryMinutes int - TaskTimeout int } type OriginEnabled struct { @@ -47,7 +45,6 @@ type MetricsConfig struct { func NewConfig() *Config { return &Config{ - PprofEnabled: getEnvBool("PPROF_ENABLED", true), LogLevel: getEnv("APP_LOG_LEVEL", "debug"), NumCPUs: getEnvInt("APP_NUMCPUS", -1), CheckPeriod: getEnvInt("APP_CHECK_PERIOD", 6), @@ -65,7 +62,6 @@ func NewConfig() *Config { TasksCfg: TasksConfig{ RetryCount: getEnvInt("TASK_RETRY_COUNT", 3), RetryMinutes: getEnvInt("TASK_RETRY_MINUTES", 5), - TaskTimeout: getEnvInt("TASK_TIMEOUT_MINUTES", 5), }, OriginEnabled: OriginEnabled{ diff --git a/internal/app/app.go b/internal/app/app.go index 4f27a7d..6353e7c 100644 --- a/internal/app/app.go +++ b/internal/app/app.go @@ -3,9 +3,11 @@ package app import ( "context" log "github.com/sirupsen/logrus" - "google.golang.org/grpc" "net" + "os" + "os/signal" "runtime" + "syscall" "task-processor/config" "task-processor/internal/appState" "task-processor/internal/parsers" @@ -26,8 +28,6 @@ type App struct { state *appState.State network *remote.Network numCPUs int - metricsSrv *router.Handler - taskApiSrv *grpc.Server } func New(c *config.Config) *App { @@ -38,14 +38,6 @@ func New(c *config.Config) *App { st := appState.NewState(numCPUs, c.CheckPeriod, c.TasksCfg.RetryCount, c.TasksCfg.RetryMinutes) - server := newServer(st) - - //metrics - mSrv := router.NewHandler(router.Deps{ - Addr: net.JoinHostPort(c.Metrics.Host, c.Metrics.Port), - GinMode: c.Metrics.GinMode, - }) - return &App{ config: c, checkPeriod: time.Duration(c.CheckPeriod), @@ -55,28 +47,33 @@ func New(c *config.Config) *App { state: st, network: remote.NewHandler(), numCPUs: numCPUs, - metricsSrv: mSrv, - taskApiSrv: server, } } -func (app *App) Run(ctx context.Context) { +func (app *App) Run() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + log.Info("Application start") - - addr := net.JoinHostPort(app.config.GrpcCfg.ServerHost, app.config.GrpcCfg.ServerPort) - log.WithFields(log.Fields{ - "Service address": addr, + "Service address": app.config.GrpcCfg.ServerHost + ":" + app.config.GrpcCfg.ServerPort, "Number of CPUs": app.numCPUs, }).Debug("App settings") - errChan := make(chan error, 16) + //metrics + mSrv := router.NewHandler(router.Deps{ + Addr: net.JoinHostPort(app.config.Metrics.Host, app.config.Metrics.Port), + GinMode: app.config.Metrics.GinMode, + }) //main + server := newServer(app) apiClient := newApiClient(app.config.GrpcCfg.ApiClientHost + ":" + app.config.GrpcCfg.ApiClientPort) + period := time.NewTicker(app.checkPeriod * time.Hour) + defer period.Stop() + sender := make(chan shared.TaskResult, app.numCPUs*10) - defer close(sender) // external scrapper surugayaScrapper := newSurugayaScrapperClient(app.config.GrpcCfg.SurugayaScrapperHost + ":" + app.config.GrpcCfg.SurugayaScrapperPort) @@ -85,15 +82,14 @@ func (app *App) Run(ctx context.Context) { handlers := make(map[string]parsers.TaskHandler) if app.config.OriginEnabled.Surugaya { - handlers[shared.OriginSurugaya] = parsers.NewSurugayaParser(surugayaScrapper) + handlers[shared.OriginSurugaya] = parsers.NewSurugayaParser(ctx, surugayaScrapper) } if app.config.OriginEnabled.Mandarake { - handlers[shared.OriginMandarake] = mandarake.NewParser(mandarake.Deps{ + handlers[shared.OriginMandarake] = mandarake.NewParser(mandarake.ParserDeps{ Enabled: app.config.OriginEnabled.Mandarake, ExternalBrowser: app.config.ExternalBrowser, GoroutinesNumber: app.numCPUs, - TaskTimeout: app.config.TasksCfg.TaskTimeout, }) } @@ -112,12 +108,9 @@ func (app *App) Run(ctx context.Context) { receivedTasks := app.network.RequestTasks(ctx, apiClient) log.WithField("length", len(receivedTasks)).Debug("End receiving") - taskProcessor.StartWork(ctx, receivedTasks) + taskProcessor.StartWork(receivedTasks) } - period := time.NewTicker(app.checkPeriod * time.Hour) - defer period.Stop() - go func() { process() //immediate start for range period.C { @@ -141,7 +134,7 @@ func (app *App) Run(ctx context.Context) { l := len(sendData) if l > 0 { log.WithField("length", l).Debug("Sending parsed data") - app.network.SendResult(ctx, apiClient, sendData) + app.network.SendResult(apiClient, sendData) sendData = sendData[:0] } } @@ -150,38 +143,37 @@ func (app *App) Run(ctx context.Context) { //start metrics server go func() { - if err := app.metricsSrv.Run(); err != nil { - errChan <- err + if err := mSrv.Run(); err != nil { + log.WithError(err).Error("Metrics server run failed") } }() //gRPC Server for status response go func() { - listener, err := net.Listen("tcp", addr) + listener, err := net.Listen("tcp", app.config.GrpcCfg.ServerHost+":"+app.config.GrpcCfg.ServerPort) if err != nil { - errChan <- err + log.Fatalf("failed to listen: %v", err) } - log.Infof("gRPC Server listening at %v", addr) - if err = app.taskApiSrv.Serve(listener); err != nil { - errChan <- err + log.Infof("gRPC Server listening at %v", app.config.GrpcCfg.ServerHost+":"+app.config.GrpcCfg.ServerPort) + if err := server.Serve(listener); err != nil { + log.Fatalf("failed to serve: %v", err) } }() - select { - case <-ctx.Done(): - app.shutdown(ctx) - case err := <-errChan: - log.WithError(err).Fatal("Application run error") - } -} - -func (app *App) shutdown(ctx context.Context) { - log.Info("Shutting down...") - - app.taskApiSrv.GracefulStop() - - if err := app.metricsSrv.Shutdown(ctx); err != nil { - log.WithError(err).Error("Failed to shutdown server") - } + go func() { + sigint := make(chan os.Signal, 1) + signal.Notify(sigint, os.Interrupt, syscall.SIGTERM) + <-sigint + log.Info("Shutting down...") + + period.Stop() + server.GracefulStop() + cancel() + if err := mSrv.Shutdown(ctx); err != nil { + log.WithError(err).Error("Failed to shutdown server") + } + }() + + <-ctx.Done() } diff --git a/internal/app/server.go b/internal/app/server.go index 9c9dadc..f837430 100644 --- a/internal/app/server.go +++ b/internal/app/server.go @@ -12,10 +12,10 @@ type Server struct { state *appState.State } -func newServer(state *appState.State) *grpc.Server { +func newServer(app *App) *grpc.Server { s := grpc.NewServer() srv := &Server{ - state: state, + state: app.state, } pb.RegisterTaskProcessorServer(s, srv) return s diff --git a/internal/parsers/interface.go b/internal/parsers/interface.go index 1a4c6fe..453d619 100644 --- a/internal/parsers/interface.go +++ b/internal/parsers/interface.go @@ -1,11 +1,10 @@ package parsers import ( - "context" "task-processor/internal/appState" "task-processor/internal/shared" ) type TaskHandler interface { - HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) + HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) } diff --git a/internal/parsers/mandarake/handleTasks.go b/internal/parsers/mandarake/handleTasks.go index 8596747..1920554 100644 --- a/internal/parsers/mandarake/handleTasks.go +++ b/internal/parsers/mandarake/handleTasks.go @@ -4,48 +4,15 @@ import ( "context" "github.com/chromedp/chromedp" log "github.com/sirupsen/logrus" - "runtime" "sync" "task-processor/internal/appState" "task-processor/internal/shared" ) -func (s *Parser) setupBrowser(ctx context.Context) (string, error) { - allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser) - defer allocCancel() +func (s *Parser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) { + log.Debug(logHeader + logWorker + "handling tasks") - pageCtx, pageCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(func(string, ...any) {})) - defer pageCancel() - - copyright := "No copyright div found." - - if err := chromedp.Run(pageCtx, - chromedp.Navigate("https://www.mandarake.co.jp/"), - chromedp.WaitReady("body", chromedp.ByQuery), - chromedp.Text(`div.copyright`, ©right, chromedp.ByQuery, chromedp.AtLeast(0)), - chromedp.Navigate("https://www.mandarake.co.jp/index2.html"), - chromedp.WaitReady("body", chromedp.ByQuery), - ); err != nil { - log.WithError(err).Error(logHeader + logGetPrice + "failed to get single price tag") - return copyright, err - } - - return copyright, nil -} - -func (s *Parser) HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) { - log.Infof("%v Start handling tasks", logHeader) - log.Infof("%v Setting up browser", logHeader) - cr, err := s.setupBrowser(ctx) - if err != nil { - log.WithError(err).Error(logHeader + logGetPrice + "failed to setup browser") - } - log.WithField("Copyright message", cr).Infof("%v Finished setting up browser.", logHeader) - - log.Infof("%v %v processing tasks...", logHeader, logWorker) - - allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser) - defer allocCancel() + allocCtx, allocCancel := chromedp.NewRemoteAllocator(s.baseCtx, s.externalBrowser) receiver := make(chan shared.Task, len(tasks)) for _, task := range tasks { @@ -53,37 +20,45 @@ func (s *Parser) HandleTasks(ctx context.Context, tasks []shared.Task, sender ch } close(receiver) - log.Debugf("%v gorutines before wait group: %v", logHeader, runtime.NumGoroutine()) wg := sync.WaitGroup{} for i := 0; i < s.goroutinesNumber; i++ { wg.Add(1) go func() { defer wg.Done() - s.worker(allocCtx, receiver, sender) + s.worker(allocCtx, receiver, sender, state) }() } wg.Wait() - - log.Debugf("%v gorutines after wait group: %v", logHeader, runtime.NumGoroutine()) - log.Infof(logHeader + logWorker + "finished handling tasks") + allocCancel() + log.Debug(logHeader + logWorker + "finished handling tasks") } -func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult) { +func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult, state *appState.State) { for task := range receiver { - taskCtx, taskCancel := chromedp.NewContext(ctx /* chromedp.WithLogf(log.Printf) */, chromedp.WithLogf(func(string, ...any) {})) - timeoutCtx, timeoutCancel := context.WithTimeout(taskCtx, s.taskTimeout) + log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task") - log.WithField("task_uuid", task.MerchUuid).Infof("%v %v processing task", logHeader, logWorker) + //pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(func(string, ...any) {})) + // + //price, err := s.getPrice(pageCtx, task) + //pageCancel() + + //price, err := s.getMinimalPrice(task) + //if err != nil { + // log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price") + // sender <- shared.TaskResult{ + // MerchUuid: task.MerchUuid, + // Origin: task.Origin, + // Price: zeroPrice, + // } + // continue + //} //price will be zeroPrice value in case of any error or if price not found - price := s.getMinimalPrice(timeoutCtx, task) + price := s.getMinimalPrice(task) sender <- shared.TaskResult{ MerchUuid: task.MerchUuid, Origin: task.Origin, Price: price, } - - timeoutCancel() - taskCancel() } } diff --git a/internal/parsers/mandarake/handler.go b/internal/parsers/mandarake/handler.go index 0955206..92e24dd 100644 --- a/internal/parsers/mandarake/handler.go +++ b/internal/parsers/mandarake/handler.go @@ -1,41 +1,40 @@ package mandarake import ( + "context" log "github.com/sirupsen/logrus" - "time" ) const ( zeroPrice int32 = 0 taxMultiplier float64 = 1.1 - logHeader = "Mandarake parser |" - logWorker = "worker:" - logTaskWarning = "task warning:" - logGetPrice = "get price:" + logHeader = "Mandarake parser | " + logWorker = "worker: " + logTaskWarning = "task warning: " + logGetPrice = "get price: " ) type Parser struct { + baseCtx context.Context externalBrowser string goroutinesNumber int - taskTimeout time.Duration } -type Deps struct { +type ParserDeps struct { Enabled bool ExternalBrowser string GoroutinesNumber int - TaskTimeout int } -func NewParser(deps Deps) *Parser { +func NewParser(deps ParserDeps) *Parser { if !deps.Enabled { - log.Infof("%v disabled", logHeader) + log.Info(logHeader + "disabled") return nil } return &Parser{ + baseCtx: context.Background(), externalBrowser: deps.ExternalBrowser, goroutinesNumber: deps.GoroutinesNumber, - taskTimeout: time.Minute * time.Duration(deps.TaskTimeout), } } diff --git a/internal/parsers/mandarake/service.go b/internal/parsers/mandarake/service.go index 1e47a3c..3147fa1 100644 --- a/internal/parsers/mandarake/service.go +++ b/internal/parsers/mandarake/service.go @@ -41,7 +41,14 @@ func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error) return minimal, nil } -func (s *Parser) getMinimalPrice(sessionCtx context.Context, task shared.Task) int32 { +func (s *Parser) getMinimalPrice(task shared.Task) int32 { + ctx := context.Background() + allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser) + defer allocCancel() + + sessionCtx, sessionCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf)) + defer sessionCancel() + var ( singlePrice string rangedPrice string diff --git a/internal/parsers/mandarake/service_test.go b/internal/parsers/mandarake/service_test.go index 736331a..c0f5a79 100644 --- a/internal/parsers/mandarake/service_test.go +++ b/internal/parsers/mandarake/service_test.go @@ -65,6 +65,7 @@ func TestParser_processPrices(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { s := &Parser{ + baseCtx: tt.fields.baseCtx, externalBrowser: tt.fields.externalBrowser, goroutinesNumber: tt.fields.goroutinesNumber, } diff --git a/internal/parsers/surugaya.go b/internal/parsers/surugaya.go index 1fecccf..9dc3c5f 100644 --- a/internal/parsers/surugaya.go +++ b/internal/parsers/surugaya.go @@ -11,19 +11,21 @@ import ( type SurugayaParser struct { scrapper sc.SurugayaScrapperClient + ctx context.Context } -func NewSurugayaParser(scrapper sc.SurugayaScrapperClient) *SurugayaParser { +func NewSurugayaParser(ctx context.Context, scrapper sc.SurugayaScrapperClient) *SurugayaParser { log.Debug("Surugaya parser init") return &SurugayaParser{ scrapper: scrapper, + ctx: ctx, } } -func (s *SurugayaParser) HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) { +func (s *SurugayaParser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) { log.WithField("count", len(tasks)).Debug("Handling Surugaya Tasks") - stream, err := s.scrapper.ProcessTasks(ctx) + stream, err := s.scrapper.ProcessTasks(s.ctx) if err != nil { log.WithField("err", err).Error("Error creating stream") return diff --git a/internal/processor/service.go b/internal/processor/service.go index 61e7fe0..4748566 100644 --- a/internal/processor/service.go +++ b/internal/processor/service.go @@ -1,14 +1,13 @@ package processor import ( - "context" log "github.com/sirupsen/logrus" "sync" "task-processor/internal/appState" "task-processor/internal/shared" ) -func (p *Processor) StartWork(ctx context.Context, receivedTasks []shared.TaskResponse) { +func (p *Processor) StartWork(receivedTasks []shared.TaskResponse) { log.Info("Starting work...") p.state.ResetCounters() @@ -28,7 +27,7 @@ func (p *Processor) StartWork(ctx context.Context, receivedTasks []shared.TaskRe go func(origin string, tasks []shared.Task) { defer wg.Done() log.Info("Running task handler for origin: ", origin) - p.handlers[origin].HandleTasks(ctx, tasks, p.out, p.state) + p.handlers[origin].HandleTasks(tasks, p.out, p.state) }(origin, tasks) } wg.Wait() diff --git a/internal/remote/interface.go b/internal/remote/interface.go index 210d214..b34de33 100644 --- a/internal/remote/interface.go +++ b/internal/remote/interface.go @@ -8,5 +8,5 @@ import ( type Handler interface { RequestTasks(ctx context.Context, client pb.TaskProcessorClient) []shared.TaskResponse - SendResult(ctx context.Context, client pb.TaskProcessorClient, tasksDone []shared.TaskResult) + SendResult(client pb.TaskProcessorClient, tasksDone []shared.TaskResult) } diff --git a/internal/remote/send.go b/internal/remote/send.go index 046fadf..6e9c148 100644 --- a/internal/remote/send.go +++ b/internal/remote/send.go @@ -5,14 +5,10 @@ import ( log "github.com/sirupsen/logrus" "task-processor/internal/shared" pb "task-processor/proto/taskProcessor" - "time" ) -func (n *Network) SendResult(ctx context.Context, client pb.TaskProcessorClient, tasksDone []shared.TaskResult) { - sendCtx, cancel := context.WithTimeout(ctx, time.Second*60) - defer cancel() - - stream, err := client.SendResult(sendCtx) +func (n *Network) SendResult(client pb.TaskProcessorClient, tasksDone []shared.TaskResult) { + stream, err := client.SendResult(context.Background()) if err != nil { log.Fatalf("Error calling PostMerch: %v", err) } @@ -33,12 +29,7 @@ func (n *Network) SendResult(ctx context.Context, client pb.TaskProcessorClient, } } - //if err = stream.CloseSend(); err != nil { - // log.Fatalf("Error closing stream: %v", err) - //} - - _, err = stream.CloseAndRecv() - if err != nil { - log.Fatalf("Error receiving response: %v", err) + if err = stream.CloseSend(); err != nil { + log.Fatalf("Error closing stream: %v", err) } } diff --git a/pkg/router/handler.go b/pkg/router/handler.go index 0f924cc..ddd1b91 100644 --- a/pkg/router/handler.go +++ b/pkg/router/handler.go @@ -21,7 +21,7 @@ type Deps struct { const pkgLogHeader string = "Router |" func NewHandler(deps Deps) *Handler { - engine := gin.New() + engine := gin.Default() if deps.GinMode == "release" { gin.SetMode(gin.ReleaseMode) @@ -32,8 +32,7 @@ func NewHandler(deps Deps) *Handler { } } - logGroup := engine.Group("") - logGroup.GET("/", func(c *gin.Context) { c.JSON(200, gin.H{"msg": "v2"}) }) + engine.GET("/", func(c *gin.Context) { c.JSON(200, gin.H{"msg": "v2"}) }) p := ginprometheus.NewPrometheus("gin") p.Use(engine)