mirror of
https://github.com/muety/wakapi.git
synced 2025-12-05 22:20:24 -08:00
refactor(perf): user first heartbeats query
This commit is contained in:
@@ -134,20 +134,22 @@ func (r *HeartbeatRepository) GetLatestByFilters(user *models.User, filterMap ma
|
||||
|
||||
func (r *HeartbeatRepository) GetFirstByUsers() ([]*models.TimeByUser, error) {
|
||||
var result []*models.TimeByUser
|
||||
r.db.Model(&models.User{}).
|
||||
Select(utils.QuoteSql(r.db, "users.id as %s, min(time) as %s", "user", "time")).
|
||||
Joins("left join heartbeats on users.id = heartbeats.user_id").
|
||||
Group("users.id").
|
||||
r.db.Raw("with agg as (select " + utils.QuoteSql(r.db, "user_id, min(time) as %s", "time") + " from heartbeats group by user_id) " +
|
||||
"select " + utils.QuoteSql(r.db, "id as %s, time ", "user") +
|
||||
"from users " +
|
||||
"left join agg on agg.user_id = id " +
|
||||
"order by users.id").
|
||||
Scan(&result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
func (r *HeartbeatRepository) GetLastByUsers() ([]*models.TimeByUser, error) {
|
||||
var result []*models.TimeByUser
|
||||
r.db.Model(&models.User{}).
|
||||
Select(utils.QuoteSql(r.db, "users.id as %s, max(time) as %s", "user", "time")).
|
||||
Joins("left join heartbeats on users.id = heartbeats.user_id").
|
||||
Group("user").
|
||||
r.db.Raw("with agg as (select " + utils.QuoteSql(r.db, "user_id, max(time) as %s", "time") + " from heartbeats group by user_id) " +
|
||||
"select " + utils.QuoteSql(r.db, "id as %s, time ", "user") +
|
||||
"from users " +
|
||||
"left join agg on agg.user_id = id " +
|
||||
"order by users.id").
|
||||
Scan(&result)
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -68,14 +68,14 @@ func (srv *AggregationService) AggregateSummaries(userIds datastructure.Set[stri
|
||||
slog.Info("generating summaries")
|
||||
|
||||
// Get a map from user ids to the time of their latest summary or nil if none exists yet
|
||||
lastUserSummaryTimes, err := srv.summaryService.GetLatestByUser()
|
||||
lastUserSummaryTimes, err := srv.summaryService.GetLatestByUser() // TODO: build user-specific variant of this query for efficiency
|
||||
if err != nil {
|
||||
config.Log().Error("error occurred", "error", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Get a map from user ids to the time of their earliest heartbeats or nil if none exists yet
|
||||
firstUserHeartbeatTimes, err := srv.heartbeatService.GetFirstByUsers()
|
||||
firstUserHeartbeatTimes, err := srv.heartbeatService.GetFirstByUsers() // TODO: build user-specific variant of this query for efficiency
|
||||
if err != nil {
|
||||
config.Log().Error("error occurred", "error", err)
|
||||
return err
|
||||
|
||||
@@ -1,205 +1,205 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"github.com/duke-git/lancet/v2/slice"
|
||||
"github.com/muety/artifex/v2"
|
||||
"github.com/muety/wakapi/config"
|
||||
"github.com/muety/wakapi/models"
|
||||
"github.com/muety/wakapi/utils"
|
||||
"log/slog"
|
||||
"time"
|
||||
"github.com/duke-git/lancet/v2/slice"
|
||||
"github.com/muety/artifex/v2"
|
||||
"github.com/muety/wakapi/config"
|
||||
"github.com/muety/wakapi/models"
|
||||
"github.com/muety/wakapi/utils"
|
||||
"log/slog"
|
||||
"time"
|
||||
)
|
||||
|
||||
type HousekeepingService struct {
|
||||
config *config.Config
|
||||
userSrvc IUserService
|
||||
heartbeatSrvc IHeartbeatService
|
||||
summarySrvc ISummaryService
|
||||
queueDefault *artifex.Dispatcher
|
||||
queueWorkers *artifex.Dispatcher
|
||||
config *config.Config
|
||||
userSrvc IUserService
|
||||
heartbeatSrvc IHeartbeatService
|
||||
summarySrvc ISummaryService
|
||||
queueDefault *artifex.Dispatcher
|
||||
queueWorkers *artifex.Dispatcher
|
||||
}
|
||||
|
||||
func NewHousekeepingService(userService IUserService, heartbeatService IHeartbeatService, summaryService ISummaryService) *HousekeepingService {
|
||||
return &HousekeepingService{
|
||||
config: config.Get(),
|
||||
userSrvc: userService,
|
||||
heartbeatSrvc: heartbeatService,
|
||||
summarySrvc: summaryService,
|
||||
queueDefault: config.GetDefaultQueue(),
|
||||
queueWorkers: config.GetQueue(config.QueueHousekeeping),
|
||||
}
|
||||
return &HousekeepingService{
|
||||
config: config.Get(),
|
||||
userSrvc: userService,
|
||||
heartbeatSrvc: heartbeatService,
|
||||
summarySrvc: summaryService,
|
||||
queueDefault: config.GetDefaultQueue(),
|
||||
queueWorkers: config.GetQueue(config.QueueHousekeeping),
|
||||
}
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) Schedule() {
|
||||
s.scheduleDataCleanups()
|
||||
s.scheduleInactiveUsersCleanup()
|
||||
if s.config.App.WarmCaches {
|
||||
s.scheduleProjectStatsCacheWarming()
|
||||
}
|
||||
s.scheduleDataCleanups()
|
||||
s.scheduleInactiveUsersCleanup()
|
||||
if s.config.App.WarmCaches {
|
||||
s.scheduleProjectStatsCacheWarming()
|
||||
}
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) CleanUserDataBefore(user *models.User, before time.Time) error {
|
||||
slog.Warn("cleaning up user data older than", "userID", user.ID, "date", before)
|
||||
if s.config.App.DataCleanupDryRun {
|
||||
slog.Info("skipping actual data deletion for dry run", "userID", user.ID)
|
||||
return nil
|
||||
}
|
||||
slog.Warn("cleaning up user data older than", "userID", user.ID, "date", before)
|
||||
if s.config.App.DataCleanupDryRun {
|
||||
slog.Info("skipping actual data deletion for dry run", "userID", user.ID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// clear old heartbeats
|
||||
if err := s.heartbeatSrvc.DeleteByUserBefore(user, before); err != nil {
|
||||
return err
|
||||
}
|
||||
// clear old heartbeats
|
||||
if err := s.heartbeatSrvc.DeleteByUserBefore(user, before); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// clear old summaries
|
||||
slog.Info("clearing summaries for user older than", "userID", user.ID, "date", before)
|
||||
if err := s.summarySrvc.DeleteByUserBefore(user.ID, before); err != nil {
|
||||
return err
|
||||
}
|
||||
// clear old summaries
|
||||
slog.Info("clearing summaries for user older than", "userID", user.ID, "date", before)
|
||||
if err := s.summarySrvc.DeleteByUserBefore(user.ID, before); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) CleanInactiveUsers(before time.Time) error {
|
||||
slog.Info("cleaning up users inactive since", "date", before)
|
||||
users, err := s.userSrvc.GetAll()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
slog.Info("cleaning up users inactive since", "date", before)
|
||||
users, err := s.userSrvc.GetAll()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var i int
|
||||
for _, u := range users {
|
||||
if u.LastLoggedInAt.T().After(before) || u.HasData {
|
||||
continue
|
||||
}
|
||||
var i int
|
||||
for _, u := range users {
|
||||
if u.LastLoggedInAt.T().After(before) || u.HasData {
|
||||
continue
|
||||
}
|
||||
|
||||
slog.Warn("deleting user due to inactivity and no data", "userID", u.ID)
|
||||
if err := s.userSrvc.Delete(u); err != nil {
|
||||
config.Log().Error("failed to delete user", "userID", u.ID)
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
}
|
||||
slog.Info("deleted users due to inactivity", "deletedCount", i, "totalCount", len(users))
|
||||
slog.Warn("deleting user due to inactivity and no data", "userID", u.ID)
|
||||
if err := s.userSrvc.Delete(u); err != nil {
|
||||
config.Log().Error("failed to delete user", "userID", u.ID)
|
||||
} else {
|
||||
i++
|
||||
}
|
||||
}
|
||||
slog.Info("deleted users due to inactivity", "deletedCount", i, "totalCount", len(users))
|
||||
|
||||
return nil
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) WarmUserProjectStatsCache(user *models.User) error {
|
||||
slog.Info("pre-warming project stats cache for user", "userID", user.ID)
|
||||
if _, err := s.heartbeatSrvc.GetUserProjectStats(user, time.Time{}, utils.BeginOfToday(time.Local), nil, true); err != nil {
|
||||
config.Log().Error("failed to pre-warm project stats cache", "userID", user.ID, "error", err)
|
||||
}
|
||||
return nil
|
||||
slog.Info("pre-warming project stats cache for user", "userID", user.ID)
|
||||
if _, err := s.heartbeatSrvc.GetUserProjectStats(user, time.Time{}, utils.BeginOfToday(time.Local), nil, true); err != nil {
|
||||
config.Log().Error("failed to pre-warm project stats cache", "userID", user.ID, "error", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) runWarmProjectStatsCache() {
|
||||
// fetch active users
|
||||
users, err := s.userSrvc.GetActive(false)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to get active users for project stats cache warming", "error", err)
|
||||
return
|
||||
}
|
||||
// fetch active users
|
||||
users, err := s.userSrvc.GetActive(false)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to get active users for project stats cache warming", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// fetch user heartbeat counts
|
||||
userHeartbeatCounts, err := s.heartbeatSrvc.CountByUsers(users)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to count user heartbeats for project stats cache warming", "error", err)
|
||||
return
|
||||
}
|
||||
// fetch user heartbeat counts
|
||||
userHeartbeatCounts, err := s.heartbeatSrvc.CountByUsers(users)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to count user heartbeats for project stats cache warming", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// schedule jobs
|
||||
for _, c := range userHeartbeatCounts {
|
||||
// only warm cache for users with >= 100k heartbeats (where calculation is expected to take unbearably long)
|
||||
if c.Count < 100_000 {
|
||||
continue
|
||||
}
|
||||
// schedule jobs
|
||||
for _, c := range userHeartbeatCounts {
|
||||
// only warm cache for users with >= 100k heartbeats (where calculation is expected to take unbearably long)
|
||||
if c.Count < 100_000 {
|
||||
continue
|
||||
}
|
||||
|
||||
user, _ := slice.FindBy[*models.User](users, func(i int, u *models.User) bool {
|
||||
return u.ID == c.User
|
||||
})
|
||||
s.queueWorkers.Dispatch(func() {
|
||||
if err := s.WarmUserProjectStatsCache(user); err != nil {
|
||||
config.Log().Error("failed to pre-warm project stats cache", "userID", user.ID)
|
||||
}
|
||||
})
|
||||
}
|
||||
user, _ := slice.FindBy[*models.User](users, func(i int, u *models.User) bool {
|
||||
return u.ID == c.User
|
||||
})
|
||||
s.queueWorkers.Dispatch(func() {
|
||||
if err := s.WarmUserProjectStatsCache(user); err != nil {
|
||||
config.Log().Error("failed to pre-warm project stats cache", "userID", user.ID)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) runCleanData() {
|
||||
// fetch all users
|
||||
users, err := s.userSrvc.GetAll()
|
||||
if err != nil {
|
||||
config.Log().Error("failed to get users for data cleanup", "error", err)
|
||||
return
|
||||
}
|
||||
// fetch all users
|
||||
users, err := s.userSrvc.GetAll()
|
||||
if err != nil {
|
||||
config.Log().Error("failed to get users for data cleanup", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
// schedule jobs
|
||||
for _, u := range users {
|
||||
// don't clean data for subscribed users or when they otherwise have unlimited data access
|
||||
if u.MinDataAge().IsZero() {
|
||||
continue
|
||||
}
|
||||
// schedule jobs
|
||||
for _, u := range users {
|
||||
// don't clean data for subscribed users or when they otherwise have unlimited data access
|
||||
if u.MinDataAge().IsZero() {
|
||||
continue
|
||||
}
|
||||
|
||||
user := *u
|
||||
s.queueWorkers.Dispatch(func() {
|
||||
if err := s.CleanUserDataBefore(&user, user.MinDataAge()); err != nil {
|
||||
config.Log().Error("failed to clear old user data", "userID", user.ID)
|
||||
}
|
||||
})
|
||||
}
|
||||
user := *u
|
||||
s.queueWorkers.Dispatch(func() {
|
||||
if err := s.CleanUserDataBefore(&user, user.MinDataAge()); err != nil {
|
||||
config.Log().Error("failed to clear old user data", "userID", user.ID)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) runCleanInactiveUsers() {
|
||||
s.queueWorkers.Dispatch(func() {
|
||||
if s.config.App.MaxInactiveMonths <= 0 {
|
||||
return
|
||||
}
|
||||
if err := s.CleanInactiveUsers(time.Now().AddDate(0, -s.config.App.MaxInactiveMonths, 0)); err != nil {
|
||||
config.Log().Error("failed to clean up inactive users", "error", err)
|
||||
}
|
||||
})
|
||||
s.queueWorkers.Dispatch(func() {
|
||||
if s.config.App.MaxInactiveMonths <= 0 {
|
||||
return
|
||||
}
|
||||
if err := s.CleanInactiveUsers(time.Now().AddDate(0, -s.config.App.MaxInactiveMonths, 0)); err != nil {
|
||||
config.Log().Error("failed to clean up inactive users", "error", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// individual scheduling functions
|
||||
|
||||
func (s *HousekeepingService) scheduleDataCleanups() {
|
||||
if s.config.App.DataRetentionMonths <= 0 {
|
||||
return
|
||||
}
|
||||
if s.config.App.DataRetentionMonths <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("scheduling data cleanup")
|
||||
slog.Info("scheduling data cleanup")
|
||||
|
||||
_, err := s.queueDefault.DispatchCron(s.runCleanData, s.config.App.DataCleanupTime)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to dispatch data cleanup jobs", "error", err)
|
||||
}
|
||||
_, err := s.queueDefault.DispatchCron(s.runCleanData, s.config.App.DataCleanupTime)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to dispatch data cleanup jobs", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) scheduleInactiveUsersCleanup() {
|
||||
if s.config.App.MaxInactiveMonths <= 0 {
|
||||
return
|
||||
}
|
||||
if s.config.App.MaxInactiveMonths <= 0 {
|
||||
return
|
||||
}
|
||||
|
||||
slog.Info("scheduling inactive users cleanup")
|
||||
slog.Info("scheduling inactive users cleanup")
|
||||
|
||||
_, err := s.queueDefault.DispatchCron(s.runCleanInactiveUsers, s.config.App.DataCleanupTime)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to dispatch inactive users cleanup job", "error", err)
|
||||
}
|
||||
_, err := s.queueDefault.DispatchCron(s.runCleanInactiveUsers, s.config.App.DataCleanupTime)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to dispatch inactive users cleanup job", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *HousekeepingService) scheduleProjectStatsCacheWarming() {
|
||||
slog.Info("scheduling project stats cache pre-warming")
|
||||
slog.Info("scheduling project stats cache pre-warming")
|
||||
|
||||
_, err := s.queueDefault.DispatchEvery(s.runWarmProjectStatsCache, 12*time.Hour)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to dispatch pre-warming project stats cache", "error", err)
|
||||
}
|
||||
_, err := s.queueDefault.DispatchEvery(s.runWarmProjectStatsCache, 12*time.Hour)
|
||||
if err != nil {
|
||||
config.Log().Error("failed to dispatch pre-warming project stats cache", "error", err)
|
||||
}
|
||||
|
||||
// run once initially, 1 min after start
|
||||
if !s.config.QuickStart {
|
||||
if err := s.queueDefault.DispatchIn(s.runWarmProjectStatsCache, 1*time.Minute); err != nil {
|
||||
config.Log().Error("failed to dispatch pre-warming project stats cache", "error", err)
|
||||
}
|
||||
}
|
||||
// run once initially, 1 min after start
|
||||
if !s.config.QuickStart {
|
||||
if err := s.queueDefault.DispatchIn(s.runWarmProjectStatsCache, 1*time.Minute); err != nil {
|
||||
config.Log().Error("failed to dispatch pre-warming project stats cache", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -66,22 +66,16 @@ func (s stringWriter) WriteString(str string) (int, error) {
|
||||
|
||||
// QuoteDbIdentifier quotes a column name used in a query.
|
||||
func QuoteDbIdentifier(db *gorm.DB, identifier string) string {
|
||||
|
||||
builder := stringWriter{Builder: &strings.Builder{}}
|
||||
|
||||
db.Dialector.QuoteTo(builder, identifier)
|
||||
|
||||
return builder.Builder.String()
|
||||
}
|
||||
|
||||
// QuoteSql quotes a SQL statement with the given identifiers.
|
||||
func QuoteSql(db *gorm.DB, queryTemplate string, identifiers ...string) string {
|
||||
|
||||
quotedIdentifiers := make([]interface{}, len(identifiers))
|
||||
|
||||
for i, identifier := range identifiers {
|
||||
quotedIdentifiers[i] = QuoteDbIdentifier(db, identifier)
|
||||
}
|
||||
|
||||
return fmt.Sprintf(queryTemplate, quotedIdentifiers...)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user