fix(wip): duplicate leaderboard entries due to inconsistent language spelling

This commit is contained in:
Ferdinand Mütsch
2025-05-30 23:48:53 +02:00
parent ff287eaa12
commit d2d9a2cfa3
7 changed files with 1288 additions and 1206 deletions

View File

@@ -40,6 +40,21 @@ app:
ipynb: Python
svelte: Svelte
astro: Astro
canonical_language_names:
'java': 'Java'
'ini': 'INI'
'xml': 'XML'
'jsx': 'JSX'
'tsx': 'TSX'
'php': 'PHP'
'yaml': 'YAML'
'toml': 'TOML'
'sql': 'SQL'
'css': 'CSS'
'scss': 'SCSS'
'jsp': 'JSP'
'svg': 'SVG'
'csv': 'CSV'
# url template for user avatar images (to be used with services like gravatar or dicebear)
# available variable placeholders are: username, username_hash, email, email_hash

View File

@@ -103,6 +103,7 @@ type appConfig struct {
DateFormat string `yaml:"date_format" default:"Mon, 02 Jan 2006" env:"WAKAPI_DATE_FORMAT"`
DateTimeFormat string `yaml:"datetime_format" default:"Mon, 02 Jan 2006 15:04" env:"WAKAPI_DATETIME_FORMAT"`
CustomLanguages map[string]string `yaml:"custom_languages"`
CanonicalLanguageNames map[string]string `yaml:"canonical_language_names"` // lower case, compacted representation -> canonical name
Colors map[string]map[string]string `yaml:"-"`
}
@@ -239,6 +240,13 @@ func (c *appConfig) GetCustomLanguages() map[string]string {
return utils.CloneStringMap(c.CustomLanguages, false)
}
func (c *appConfig) GetCanonicalLanguageNames() map[string]string {
if c.CanonicalLanguageNames == nil {
return make(map[string]string)
}
return utils.CloneStringMap(c.CanonicalLanguageNames, false)
}
func (c *appConfig) GetLanguageColors() map[string]string {
return utils.CloneStringMap(c.Colors["languages"], true)
}

File diff suppressed because it is too large Load Diff

40
models/canonical_names.go Normal file
View File

@@ -0,0 +1,40 @@
package models
import (
"github.com/duke-git/lancet/v2/strutil"
"github.com/muety/wakapi/config"
"regexp"
"strings"
)
// special treatment for system-wide entities (language, editors, os) that are known to commonly cause confusion
// due to being sent by different plugins in different use of capital and small letters in their spelling, e.g. "JAVA"
var nonAlphanumericRegex = regexp.MustCompile(`[^a-zA-Z0-9]+`)
var canonicalNames map[uint8]map[string]string
func initLookup() {
cfg := config.Get()
canonicalNames = map[uint8]map[string]string{
SummaryLanguage: cfg.App.GetCanonicalLanguageNames(),
SummaryEditor: {},
SummaryOS: {},
}
}
func CanonicalName(value string, entityType uint8) string {
if canonicalNames == nil {
initLookup()
}
if _, ok := canonicalNames[entityType]; !ok {
return value
}
lookupKey := nonAlphanumericRegex.ReplaceAllString(strings.ToLower(value), "")
if canonical, ok := canonicalNames[entityType][lookupKey]; ok {
return canonical
}
return strutil.Capitalize(value) // even if no specific canonical name is provided, still always capitalize languages, editors and os for consistency
}

View File

@@ -8,7 +8,6 @@ import (
"log/slog"
"github.com/duke-git/lancet/v2/strutil"
"github.com/gohugoio/hashstructure"
)
@@ -50,8 +49,9 @@ func (h *Heartbeat) Timely(maxAge time.Duration) bool {
}
func (h *Heartbeat) Sanitize() *Heartbeat {
h.OperatingSystem = strutil.Capitalize(h.OperatingSystem)
h.Editor = strutil.Capitalize(h.Editor)
h.OperatingSystem = CanonicalName(h.OperatingSystem, SummaryOS)
h.Editor = CanonicalName(h.Editor, SummaryEditor)
h.Language = CanonicalName(h.Language, SummaryLanguage)
if h.Category == "" && (h.Type == "domain" || h.Type == "url") {
h.Category = "browsing"
}

View File

@@ -76,6 +76,8 @@ func (srv *AliasService) GetAliasOrDefault(userId string, summaryType uint8, val
srv.MayInitializeUser(userId)
}
value = models.CanonicalName(value, summaryType) // currently only implemented for languages
match := func(aliasValue string, itemKey string) bool {
return wildmatch.NewWildMatch(aliasValue).IsMatch(itemKey)
}

View File

@@ -1,6 +1,7 @@
package services
import (
"github.com/muety/wakapi/config"
"github.com/muety/wakapi/mocks"
"github.com/muety/wakapi/models"
"github.com/stretchr/testify/assert"
@@ -16,6 +17,8 @@ type AliasServiceTestSuite struct {
}
func (suite *AliasServiceTestSuite) SetupSuite() {
config.Set(config.Empty())
suite.TestUserId = "johndoe@example.org"
aliases := []*models.Alias{
@@ -51,7 +54,8 @@ func (suite *AliasServiceTestSuite) TestAliasService_GetAliasOrDefault() {
result2, err2 := sut.GetAliasOrDefault(suite.TestUserId, models.SummaryProject, "wakapi")
result3, err3 := sut.GetAliasOrDefault(suite.TestUserId, models.SummaryProject, "anchr")
result4, err4 := sut.GetAliasOrDefault(suite.TestUserId, models.SummaryProject, "telepush-mobile")
result5, err5 := sut.GetAliasOrDefault(suite.TestUserId, models.SummaryLanguage, "telepush-mobile")
result5, err5 := sut.GetAliasOrDefault(suite.TestUserId, models.SummaryEntity, "telepush-mobile")
result6, err6 := sut.GetAliasOrDefault(suite.TestUserId, models.SummaryLanguage, "telepush-mobile")
assert.Equal(suite.T(), "wakapi", result1)
assert.Nil(suite.T(), err1)
@@ -63,4 +67,6 @@ func (suite *AliasServiceTestSuite) TestAliasService_GetAliasOrDefault() {
assert.Nil(suite.T(), err4)
assert.Equal(suite.T(), "telepush-mobile", result5)
assert.Nil(suite.T(), err5)
assert.Equal(suite.T(), "Telepush-mobile", result6) // not really scope of this test, but nevertheless: language shall always be capitaliized
assert.Nil(suite.T(), err6)
}