Compare commits

...

10 commits

Author SHA1 Message Date
nquidox
1e331cada5 gin logger
All checks were successful
/ Make image (push) Successful in 1m14s
2026-03-18 20:34:33 +03:00
nquidox
532386222b setup browser 2026-03-18 20:34:22 +03:00
nquidox
a32b1ef69b CloseAndRecv in send
All checks were successful
/ Make image (push) Successful in 49s
2026-03-01 01:01:06 +03:00
nquidox
77c626a7c3 exclude metrics req from log
All checks were successful
/ Make image (push) Successful in 48s
2026-03-01 00:06:19 +03:00
nquidox
d90682e183 pprof env 2026-03-01 00:05:55 +03:00
nquidox
8395cf71b4 pprof
All checks were successful
/ Make image (push) Successful in 49s
2026-02-28 23:33:37 +03:00
nquidox
f466566505 page timeout added 2026-02-28 23:33:22 +03:00
nquidox
56309cafb9 env added 2026-02-28 23:32:47 +03:00
nquidox
13ebb27335 small refactor
All checks were successful
/ Make image (push) Successful in 53s
2026-02-28 10:53:33 +03:00
nquidox
c955615fb1 context fix 2026-02-28 10:53:02 +03:00
15 changed files with 162 additions and 104 deletions

View file

@ -1,17 +1,33 @@
package main
import (
"context"
log "github.com/sirupsen/logrus"
"net/http"
_ "net/http/pprof"
"os"
"os/signal"
"syscall"
"task-processor/config"
"task-processor/internal/app"
"task-processor/internal/logging"
)
func main() {
ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer cancel()
c := config.NewConfig()
logging.LogSetup(c.LogLevel)
if c.PprofEnabled {
go func() {
log.Println(http.ListenAndServe("localhost:6060", nil))
}()
}
appl := app.New(c)
appl.Run()
appl.Run(ctx)
}

View file

@ -2,6 +2,7 @@ APP_LOG_LEVEL=error
APP_NUMCPUS=-1
APP_CHECK_PERIOD=6
EXTERNAL_BROWSER=
PPROF_ENABLED=false
GRPC_SERVER_HOST=0.0.0.0
GRPC_SERVER_PORT=9060
@ -14,6 +15,7 @@ GRPC_SURUGAYA_SCRAPPER_PORT=9070
TASK_RETRY_COUNT=3
TASK_RETRY_MINUTES=5
TASK_TIMEOUT_MINUTES=5
ORIGIN_SURUGAYA_ENABLED=false
ORIGIN_MANDARAKE_ENABLED=false

View file

@ -7,6 +7,7 @@ import (
)
type Config struct {
PprofEnabled bool
LogLevel string
NumCPUs int
CheckPeriod int
@ -29,6 +30,7 @@ type GrpcConfig struct {
type TasksConfig struct {
RetryCount int
RetryMinutes int
TaskTimeout int
}
type OriginEnabled struct {
@ -45,6 +47,7 @@ type MetricsConfig struct {
func NewConfig() *Config {
return &Config{
PprofEnabled: getEnvBool("PPROF_ENABLED", true),
LogLevel: getEnv("APP_LOG_LEVEL", "debug"),
NumCPUs: getEnvInt("APP_NUMCPUS", -1),
CheckPeriod: getEnvInt("APP_CHECK_PERIOD", 6),
@ -62,6 +65,7 @@ func NewConfig() *Config {
TasksCfg: TasksConfig{
RetryCount: getEnvInt("TASK_RETRY_COUNT", 3),
RetryMinutes: getEnvInt("TASK_RETRY_MINUTES", 5),
TaskTimeout: getEnvInt("TASK_TIMEOUT_MINUTES", 5),
},
OriginEnabled: OriginEnabled{

View file

@ -3,11 +3,9 @@ package app
import (
"context"
log "github.com/sirupsen/logrus"
"google.golang.org/grpc"
"net"
"os"
"os/signal"
"runtime"
"syscall"
"task-processor/config"
"task-processor/internal/appState"
"task-processor/internal/parsers"
@ -28,6 +26,8 @@ type App struct {
state *appState.State
network *remote.Network
numCPUs int
metricsSrv *router.Handler
taskApiSrv *grpc.Server
}
func New(c *config.Config) *App {
@ -38,6 +38,14 @@ func New(c *config.Config) *App {
st := appState.NewState(numCPUs, c.CheckPeriod, c.TasksCfg.RetryCount, c.TasksCfg.RetryMinutes)
server := newServer(st)
//metrics
mSrv := router.NewHandler(router.Deps{
Addr: net.JoinHostPort(c.Metrics.Host, c.Metrics.Port),
GinMode: c.Metrics.GinMode,
})
return &App{
config: c,
checkPeriod: time.Duration(c.CheckPeriod),
@ -47,33 +55,28 @@ func New(c *config.Config) *App {
state: st,
network: remote.NewHandler(),
numCPUs: numCPUs,
metricsSrv: mSrv,
taskApiSrv: server,
}
}
func (app *App) Run() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
func (app *App) Run(ctx context.Context) {
log.Info("Application start")
addr := net.JoinHostPort(app.config.GrpcCfg.ServerHost, app.config.GrpcCfg.ServerPort)
log.WithFields(log.Fields{
"Service address": app.config.GrpcCfg.ServerHost + ":" + app.config.GrpcCfg.ServerPort,
"Service address": addr,
"Number of CPUs": app.numCPUs,
}).Debug("App settings")
//metrics
mSrv := router.NewHandler(router.Deps{
Addr: net.JoinHostPort(app.config.Metrics.Host, app.config.Metrics.Port),
GinMode: app.config.Metrics.GinMode,
})
errChan := make(chan error, 16)
//main
server := newServer(app)
apiClient := newApiClient(app.config.GrpcCfg.ApiClientHost + ":" + app.config.GrpcCfg.ApiClientPort)
period := time.NewTicker(app.checkPeriod * time.Hour)
defer period.Stop()
sender := make(chan shared.TaskResult, app.numCPUs*10)
defer close(sender)
// external scrapper
surugayaScrapper := newSurugayaScrapperClient(app.config.GrpcCfg.SurugayaScrapperHost + ":" + app.config.GrpcCfg.SurugayaScrapperPort)
@ -82,14 +85,15 @@ func (app *App) Run() {
handlers := make(map[string]parsers.TaskHandler)
if app.config.OriginEnabled.Surugaya {
handlers[shared.OriginSurugaya] = parsers.NewSurugayaParser(ctx, surugayaScrapper)
handlers[shared.OriginSurugaya] = parsers.NewSurugayaParser(surugayaScrapper)
}
if app.config.OriginEnabled.Mandarake {
handlers[shared.OriginMandarake] = mandarake.NewParser(mandarake.ParserDeps{
handlers[shared.OriginMandarake] = mandarake.NewParser(mandarake.Deps{
Enabled: app.config.OriginEnabled.Mandarake,
ExternalBrowser: app.config.ExternalBrowser,
GoroutinesNumber: app.numCPUs,
TaskTimeout: app.config.TasksCfg.TaskTimeout,
})
}
@ -108,9 +112,12 @@ func (app *App) Run() {
receivedTasks := app.network.RequestTasks(ctx, apiClient)
log.WithField("length", len(receivedTasks)).Debug("End receiving")
taskProcessor.StartWork(receivedTasks)
taskProcessor.StartWork(ctx, receivedTasks)
}
period := time.NewTicker(app.checkPeriod * time.Hour)
defer period.Stop()
go func() {
process() //immediate start
for range period.C {
@ -134,7 +141,7 @@ func (app *App) Run() {
l := len(sendData)
if l > 0 {
log.WithField("length", l).Debug("Sending parsed data")
app.network.SendResult(apiClient, sendData)
app.network.SendResult(ctx, apiClient, sendData)
sendData = sendData[:0]
}
}
@ -143,37 +150,38 @@ func (app *App) Run() {
//start metrics server
go func() {
if err := mSrv.Run(); err != nil {
log.WithError(err).Error("Metrics server run failed")
if err := app.metricsSrv.Run(); err != nil {
errChan <- err
}
}()
//gRPC Server for status response
go func() {
listener, err := net.Listen("tcp", app.config.GrpcCfg.ServerHost+":"+app.config.GrpcCfg.ServerPort)
listener, err := net.Listen("tcp", addr)
if err != nil {
log.Fatalf("failed to listen: %v", err)
errChan <- err
}
log.Infof("gRPC Server listening at %v", app.config.GrpcCfg.ServerHost+":"+app.config.GrpcCfg.ServerPort)
if err := server.Serve(listener); err != nil {
log.Fatalf("failed to serve: %v", err)
log.Infof("gRPC Server listening at %v", addr)
if err = app.taskApiSrv.Serve(listener); err != nil {
errChan <- err
}
}()
go func() {
sigint := make(chan os.Signal, 1)
signal.Notify(sigint, os.Interrupt, syscall.SIGTERM)
<-sigint
log.Info("Shutting down...")
period.Stop()
server.GracefulStop()
cancel()
if err := mSrv.Shutdown(ctx); err != nil {
log.WithError(err).Error("Failed to shutdown server")
}
}()
<-ctx.Done()
select {
case <-ctx.Done():
app.shutdown(ctx)
case err := <-errChan:
log.WithError(err).Fatal("Application run error")
}
}
func (app *App) shutdown(ctx context.Context) {
log.Info("Shutting down...")
app.taskApiSrv.GracefulStop()
if err := app.metricsSrv.Shutdown(ctx); err != nil {
log.WithError(err).Error("Failed to shutdown server")
}
}

View file

@ -12,10 +12,10 @@ type Server struct {
state *appState.State
}
func newServer(app *App) *grpc.Server {
func newServer(state *appState.State) *grpc.Server {
s := grpc.NewServer()
srv := &Server{
state: app.state,
state: state,
}
pb.RegisterTaskProcessorServer(s, srv)
return s

View file

@ -1,10 +1,11 @@
package parsers
import (
"context"
"task-processor/internal/appState"
"task-processor/internal/shared"
)
type TaskHandler interface {
HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State)
HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State)
}

View file

@ -4,15 +4,48 @@ import (
"context"
"github.com/chromedp/chromedp"
log "github.com/sirupsen/logrus"
"runtime"
"sync"
"task-processor/internal/appState"
"task-processor/internal/shared"
)
func (s *Parser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) {
log.Debug(logHeader + logWorker + "handling tasks")
func (s *Parser) setupBrowser(ctx context.Context) (string, error) {
allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser)
defer allocCancel()
allocCtx, allocCancel := chromedp.NewRemoteAllocator(s.baseCtx, s.externalBrowser)
pageCtx, pageCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(func(string, ...any) {}))
defer pageCancel()
copyright := "No copyright div found."
if err := chromedp.Run(pageCtx,
chromedp.Navigate("https://www.mandarake.co.jp/"),
chromedp.WaitReady("body", chromedp.ByQuery),
chromedp.Text(`div.copyright`, &copyright, chromedp.ByQuery, chromedp.AtLeast(0)),
chromedp.Navigate("https://www.mandarake.co.jp/index2.html"),
chromedp.WaitReady("body", chromedp.ByQuery),
); err != nil {
log.WithError(err).Error(logHeader + logGetPrice + "failed to get single price tag")
return copyright, err
}
return copyright, nil
}
func (s *Parser) HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) {
log.Infof("%v Start handling tasks", logHeader)
log.Infof("%v Setting up browser", logHeader)
cr, err := s.setupBrowser(ctx)
if err != nil {
log.WithError(err).Error(logHeader + logGetPrice + "failed to setup browser")
}
log.WithField("Copyright message", cr).Infof("%v Finished setting up browser.", logHeader)
log.Infof("%v %v processing tasks...", logHeader, logWorker)
allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser)
defer allocCancel()
receiver := make(chan shared.Task, len(tasks))
for _, task := range tasks {
@ -20,45 +53,37 @@ func (s *Parser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult,
}
close(receiver)
log.Debugf("%v gorutines before wait group: %v", logHeader, runtime.NumGoroutine())
wg := sync.WaitGroup{}
for i := 0; i < s.goroutinesNumber; i++ {
wg.Add(1)
go func() {
defer wg.Done()
s.worker(allocCtx, receiver, sender, state)
s.worker(allocCtx, receiver, sender)
}()
}
wg.Wait()
allocCancel()
log.Debug(logHeader + logWorker + "finished handling tasks")
log.Debugf("%v gorutines after wait group: %v", logHeader, runtime.NumGoroutine())
log.Infof(logHeader + logWorker + "finished handling tasks")
}
func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult, state *appState.State) {
func (s *Parser) worker(ctx context.Context, receiver chan shared.Task, sender chan shared.TaskResult) {
for task := range receiver {
log.WithField("task_uuid", task.MerchUuid).Debug(logHeader + logWorker + "processing task")
taskCtx, taskCancel := chromedp.NewContext(ctx /* chromedp.WithLogf(log.Printf) */, chromedp.WithLogf(func(string, ...any) {}))
timeoutCtx, timeoutCancel := context.WithTimeout(taskCtx, s.taskTimeout)
//pageCtx, pageCancel := chromedp.NewContext(ctx, chromedp.WithLogf(func(string, ...any) {}))
//
//price, err := s.getPrice(pageCtx, task)
//pageCancel()
//price, err := s.getMinimalPrice(task)
//if err != nil {
// log.WithField("task_uuid", task.MerchUuid).Warn(logHeader + logWorker + logTaskWarning + "failed to process, zero price")
// sender <- shared.TaskResult{
// MerchUuid: task.MerchUuid,
// Origin: task.Origin,
// Price: zeroPrice,
// }
// continue
//}
log.WithField("task_uuid", task.MerchUuid).Infof("%v %v processing task", logHeader, logWorker)
//price will be zeroPrice value in case of any error or if price not found
price := s.getMinimalPrice(task)
price := s.getMinimalPrice(timeoutCtx, task)
sender <- shared.TaskResult{
MerchUuid: task.MerchUuid,
Origin: task.Origin,
Price: price,
}
timeoutCancel()
taskCancel()
}
}

View file

@ -1,40 +1,41 @@
package mandarake
import (
"context"
log "github.com/sirupsen/logrus"
"time"
)
const (
zeroPrice int32 = 0
taxMultiplier float64 = 1.1
logHeader = "Mandarake parser | "
logWorker = "worker: "
logTaskWarning = "task warning: "
logGetPrice = "get price: "
logHeader = "Mandarake parser |"
logWorker = "worker:"
logTaskWarning = "task warning:"
logGetPrice = "get price:"
)
type Parser struct {
baseCtx context.Context
externalBrowser string
goroutinesNumber int
taskTimeout time.Duration
}
type ParserDeps struct {
type Deps struct {
Enabled bool
ExternalBrowser string
GoroutinesNumber int
TaskTimeout int
}
func NewParser(deps ParserDeps) *Parser {
func NewParser(deps Deps) *Parser {
if !deps.Enabled {
log.Info(logHeader + "disabled")
log.Infof("%v disabled", logHeader)
return nil
}
return &Parser{
baseCtx: context.Background(),
externalBrowser: deps.ExternalBrowser,
goroutinesNumber: deps.GoroutinesNumber,
taskTimeout: time.Minute * time.Duration(deps.TaskTimeout),
}
}

View file

@ -41,14 +41,7 @@ func (s *Parser) getPrice(ctx context.Context, task shared.Task) (int32, error)
return minimal, nil
}
func (s *Parser) getMinimalPrice(task shared.Task) int32 {
ctx := context.Background()
allocCtx, allocCancel := chromedp.NewRemoteAllocator(ctx, s.externalBrowser)
defer allocCancel()
sessionCtx, sessionCancel := chromedp.NewContext(allocCtx, chromedp.WithLogf(log.Printf))
defer sessionCancel()
func (s *Parser) getMinimalPrice(sessionCtx context.Context, task shared.Task) int32 {
var (
singlePrice string
rangedPrice string

View file

@ -65,7 +65,6 @@ func TestParser_processPrices(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := &Parser{
baseCtx: tt.fields.baseCtx,
externalBrowser: tt.fields.externalBrowser,
goroutinesNumber: tt.fields.goroutinesNumber,
}

View file

@ -11,21 +11,19 @@ import (
type SurugayaParser struct {
scrapper sc.SurugayaScrapperClient
ctx context.Context
}
func NewSurugayaParser(ctx context.Context, scrapper sc.SurugayaScrapperClient) *SurugayaParser {
func NewSurugayaParser(scrapper sc.SurugayaScrapperClient) *SurugayaParser {
log.Debug("Surugaya parser init")
return &SurugayaParser{
scrapper: scrapper,
ctx: ctx,
}
}
func (s *SurugayaParser) HandleTasks(tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) {
func (s *SurugayaParser) HandleTasks(ctx context.Context, tasks []shared.Task, sender chan shared.TaskResult, state *appState.State) {
log.WithField("count", len(tasks)).Debug("Handling Surugaya Tasks")
stream, err := s.scrapper.ProcessTasks(s.ctx)
stream, err := s.scrapper.ProcessTasks(ctx)
if err != nil {
log.WithField("err", err).Error("Error creating stream")
return

View file

@ -1,13 +1,14 @@
package processor
import (
"context"
log "github.com/sirupsen/logrus"
"sync"
"task-processor/internal/appState"
"task-processor/internal/shared"
)
func (p *Processor) StartWork(receivedTasks []shared.TaskResponse) {
func (p *Processor) StartWork(ctx context.Context, receivedTasks []shared.TaskResponse) {
log.Info("Starting work...")
p.state.ResetCounters()
@ -27,7 +28,7 @@ func (p *Processor) StartWork(receivedTasks []shared.TaskResponse) {
go func(origin string, tasks []shared.Task) {
defer wg.Done()
log.Info("Running task handler for origin: ", origin)
p.handlers[origin].HandleTasks(tasks, p.out, p.state)
p.handlers[origin].HandleTasks(ctx, tasks, p.out, p.state)
}(origin, tasks)
}
wg.Wait()

View file

@ -8,5 +8,5 @@ import (
type Handler interface {
RequestTasks(ctx context.Context, client pb.TaskProcessorClient) []shared.TaskResponse
SendResult(client pb.TaskProcessorClient, tasksDone []shared.TaskResult)
SendResult(ctx context.Context, client pb.TaskProcessorClient, tasksDone []shared.TaskResult)
}

View file

@ -5,10 +5,14 @@ import (
log "github.com/sirupsen/logrus"
"task-processor/internal/shared"
pb "task-processor/proto/taskProcessor"
"time"
)
func (n *Network) SendResult(client pb.TaskProcessorClient, tasksDone []shared.TaskResult) {
stream, err := client.SendResult(context.Background())
func (n *Network) SendResult(ctx context.Context, client pb.TaskProcessorClient, tasksDone []shared.TaskResult) {
sendCtx, cancel := context.WithTimeout(ctx, time.Second*60)
defer cancel()
stream, err := client.SendResult(sendCtx)
if err != nil {
log.Fatalf("Error calling PostMerch: %v", err)
}
@ -29,7 +33,12 @@ func (n *Network) SendResult(client pb.TaskProcessorClient, tasksDone []shared.T
}
}
if err = stream.CloseSend(); err != nil {
log.Fatalf("Error closing stream: %v", err)
//if err = stream.CloseSend(); err != nil {
// log.Fatalf("Error closing stream: %v", err)
//}
_, err = stream.CloseAndRecv()
if err != nil {
log.Fatalf("Error receiving response: %v", err)
}
}

View file

@ -21,7 +21,7 @@ type Deps struct {
const pkgLogHeader string = "Router |"
func NewHandler(deps Deps) *Handler {
engine := gin.Default()
engine := gin.New()
if deps.GinMode == "release" {
gin.SetMode(gin.ReleaseMode)
@ -32,7 +32,8 @@ func NewHandler(deps Deps) *Handler {
}
}
engine.GET("/", func(c *gin.Context) { c.JSON(200, gin.H{"msg": "v2"}) })
logGroup := engine.Group("")
logGroup.GET("/", func(c *gin.Context) { c.JSON(200, gin.H{"msg": "v2"}) })
p := ginprometheus.NewPrometheus("gin")
p.Use(engine)