From ea4099918def119912d9b276fd7bbd871c44f962 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Thu, 9 May 2024 11:32:18 -0700 Subject: [PATCH 01/12] Respect Co-authored-by trailers in the contributors graph --- services/repository/contributors_graph.go | 203 +++++++++++------ .../repository/contributors_graph_test.go | 204 +++++++++++++----- .../1c/faee861da76612fda8951b94bde7ba3726109f | Bin 0 -> 117 bytes .../26/442ad16af268ef4768a5a30db377e860f504a3 | 1 + .../ea/3b10c09130232e534cecfed470dbc0f8eb2eb3 | 2 + .../refs/heads/branch-with-co-author | 1 + 6 files changed, 287 insertions(+), 124 deletions(-) create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/1c/faee861da76612fda8951b94bde7ba3726109f create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/26/442ad16af268ef4768a5a30db377e860f504a3 create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/ea/3b10c09130232e534cecfed470dbc0f8eb2eb3 create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-co-author diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index b0748f8ee308d..474c49f62698f 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -8,6 +8,7 @@ import ( "context" "errors" "fmt" + "net/mail" "os" "strconv" "strings" @@ -53,10 +54,11 @@ type ContributorData struct { Weeks map[int64]*WeekData `json:"weeks"` } -// ExtendedCommitStats contains information for commit stats with author data +// ExtendedCommitStats contains information for commit stats with both author and coauthors data type ExtendedCommitStats struct { - Author *api.CommitUser `json:"author"` - Stats *api.CommitStats `json:"stats"` + Author *api.CommitUser `json:"author"` + CoAuthors []*api.CommitUser `json:"co_authors"` + Stats *api.CommitStats `json:"stats"` } const layout = time.DateOnly @@ -125,8 +127,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int _ = stdoutWriter.Close() }() - gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as", "--reverse") - // AddOptionFormat("--max-count=%d", limit) + gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as%n%(trailers:key=Co-authored-by,valueonly=true)", "--reverse") gitCmd.AddDynamicArguments(baseCommit.ID.String()) var extendedCommitStats []*ExtendedCommitStats @@ -150,6 +151,25 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int authorEmail := strings.TrimSpace(scanner.Text()) scanner.Scan() date := strings.TrimSpace(scanner.Text()) + + var coAuthors []*api.CommitUser + for scanner.Scan() { + line := scanner.Text() + if line == "" { + // There should be an empty line before we read the commit stats line. + break + } + coAuthorEmail, coAuthorName, err := parseCoAuthorTrailerValue(line) + if err != nil { + continue + } + coAuthor := &api.CommitUser{ + Identity: api.Identity{Name: coAuthorName, Email: coAuthorEmail}, + Date: date, + } + coAuthors = append(coAuthors, coAuthor) + } + scanner.Scan() stats := strings.TrimSpace(scanner.Text()) if authorName == "" || authorEmail == "" || date == "" || stats == "" { @@ -184,7 +204,8 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int }, Date: date, }, - Stats: &commitStats, + CoAuthors: coAuthors, + Stats: &commitStats, } extendedCommitStats = append(extendedCommitStats, res) } @@ -199,6 +220,31 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int return extendedCommitStats, nil } +var errSyntax error = errors.New("syntax error occurred") + +func parseCoAuthorTrailerValue(value string) (email, name string, err error) { + value = strings.TrimSpace(value) + if !strings.HasSuffix(value, ">") { + return "", "", errSyntax + } + if openEmailBracketIdx := strings.LastIndex(value, "<"); openEmailBracketIdx == -1 { + return "", "", errSyntax + } + + parts := strings.Split(value, "<") + if len(parts) < 2 { + return "", "", errSyntax + } + + email = strings.TrimRight(parts[1], ">") + if _, err := mail.ParseAddress(email); err != nil { + return "", "", err + } + name = strings.TrimSpace(parts[0]) + + return email, name, nil +} + func generateContributorStats(genDone chan struct{}, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) { ctx := graceful.GetManager().HammerContext() @@ -222,8 +268,6 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca return } - layout := time.DateOnly - unknownUserAvatarLink := user_model.NewGhostUser().AvatarLinkWithSize(ctx, 0) contributorsCommitStats := make(map[string]*ContributorData) contributorsCommitStats["total"] = &ContributorData{ @@ -237,67 +281,16 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca if len(userEmail) == 0 { continue } - u, _ := user_model.GetUserByEmail(ctx, userEmail) - if u != nil { - // update userEmail with user's primary email address so - // that different mail addresses will linked to same account - userEmail = u.GetEmail() - } - // duplicated logic - if _, ok := contributorsCommitStats[userEmail]; !ok { - if u == nil { - avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0) - if avatarLink == "" { - avatarLink = unknownUserAvatarLink - } - contributorsCommitStats[userEmail] = &ContributorData{ - Name: v.Author.Name, - AvatarLink: avatarLink, - Weeks: make(map[int64]*WeekData), - } - } else { - contributorsCommitStats[userEmail] = &ContributorData{ - Name: u.DisplayName(), - Login: u.LowerName, - AvatarLink: u.AvatarLinkWithSize(ctx, 0), - HomeLink: u.HomeLink(), - Weeks: make(map[int64]*WeekData), - } - } - } - // Update user statistics - user := contributorsCommitStats[userEmail] - startingOfWeek, _ := findLastSundayBeforeDate(v.Author.Date) - - val, _ := time.Parse(layout, startingOfWeek) - week := val.UnixMilli() - - if user.Weeks[week] == nil { - user.Weeks[week] = &WeekData{ - Additions: 0, - Deletions: 0, - Commits: 0, - Week: week, - } - } - if total.Weeks[week] == nil { - total.Weeks[week] = &WeekData{ - Additions: 0, - Deletions: 0, - Commits: 0, - Week: week, - } + + authorData := getContributorData(ctx, contributorsCommitStats, v.Author, unknownUserAvatarLink) + date := v.Author.Date + stats := v.Stats + updateUserAndOverallStats(stats, date, authorData, total, false) + + for _, coAuthor := range v.CoAuthors { + coAuthorData := getContributorData(ctx, contributorsCommitStats, coAuthor, unknownUserAvatarLink) + updateUserAndOverallStats(stats, date, coAuthorData, total, true) } - user.Weeks[week].Additions += v.Stats.Additions - user.Weeks[week].Deletions += v.Stats.Deletions - user.Weeks[week].Commits++ - user.TotalCommits++ - - // Update overall statistics - total.Weeks[week].Additions += v.Stats.Additions - total.Weeks[week].Deletions += v.Stats.Deletions - total.Weeks[week].Commits++ - total.TotalCommits++ } _ = cache.PutJSON(cacheKey, contributorsCommitStats, contributorStatsCacheTimeout) @@ -306,3 +299,77 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca genDone <- struct{}{} } } + +func getContributorData(ctx context.Context, contributorsCommitStats map[string]*ContributorData, user *api.CommitUser, defaultUserAvatarLink string) *ContributorData { + userEmail := user.Email + u, _ := user_model.GetUserByEmail(ctx, userEmail) + if u != nil { + // update userEmail with user's primary email address so + // that different mail addresses will linked to same account + userEmail = u.GetEmail() + } + + if _, ok := contributorsCommitStats[userEmail]; !ok { + if u == nil { + avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0) + if avatarLink == "" { + avatarLink = defaultUserAvatarLink + } + contributorsCommitStats[userEmail] = &ContributorData{ + Name: user.Name, + AvatarLink: avatarLink, + Weeks: make(map[int64]*WeekData), + } + } else { + contributorsCommitStats[userEmail] = &ContributorData{ + Name: u.DisplayName(), + Login: u.LowerName, + AvatarLink: u.AvatarLinkWithSize(ctx, 0), + HomeLink: u.HomeLink(), + Weeks: make(map[int64]*WeekData), + } + } + } + return contributorsCommitStats[userEmail] +} + +func updateUserAndOverallStats(stats *api.CommitStats, commitDate string, user, total *ContributorData, isCoAuthor bool) { + startingOfWeek, _ := findLastSundayBeforeDate(commitDate) + + val, _ := time.Parse(layout, startingOfWeek) + week := val.UnixMilli() + + if user.Weeks[week] == nil { + user.Weeks[week] = &WeekData{ + Additions: 0, + Deletions: 0, + Commits: 0, + Week: week, + } + } + if total.Weeks[week] == nil { + total.Weeks[week] = &WeekData{ + Additions: 0, + Deletions: 0, + Commits: 0, + Week: week, + } + } + // Update user statistics + user.Weeks[week].Additions += stats.Additions + user.Weeks[week].Deletions += stats.Deletions + user.Weeks[week].Commits++ + user.TotalCommits++ + + if isCoAuthor { + // We would have or will count these additions/deletions/commits already when we encounter the original + // author of the commit. Let's avoid this duplication. + return + } + + // Update overall statistics + total.Weeks[week].Additions += stats.Additions + total.Weeks[week].Deletions += stats.Deletions + total.Weeks[week].Commits++ + total.TotalCommits++ +} diff --git a/services/repository/contributors_graph_test.go b/services/repository/contributors_graph_test.go index f22c115276ee3..d572d758c2bd3 100644 --- a/services/repository/contributors_graph_test.go +++ b/services/repository/contributors_graph_test.go @@ -20,66 +20,158 @@ func TestRepository_ContributorsGraph(t *testing.T) { assert.NoError(t, unittest.PrepareTestDatabase()) repo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{ID: 2}) assert.NoError(t, repo.LoadOwner(db.DefaultContext)) - mockCache, err := cache.NewStringCache(setting.Cache{}) - assert.NoError(t, err) - generateContributorStats(nil, mockCache, "key", repo, "404ref") - var data map[string]*ContributorData - _, getErr := mockCache.GetJSON("key", &data) - assert.NotNil(t, getErr) - assert.ErrorContains(t, getErr.ToError(), "object does not exist") + t.Run("non-existent revision", func(t *testing.T) { + mockCache, err := cache.NewStringCache(setting.Cache{}) + assert.NoError(t, err) + generateContributorStats(nil, mockCache, "key", repo, "404ref") + var data map[string]*ContributorData + _, getErr := mockCache.GetJSON("key", &data) + assert.NotNil(t, getErr) + assert.ErrorContains(t, getErr.ToError(), "object does not exist") + }) + t.Run("generate contributor stats", func(t *testing.T) { + mockCache, err := cache.NewStringCache(setting.Cache{}) + assert.NoError(t, err) + generateContributorStats(nil, mockCache, "key", repo, "master") + var data map[string]*ContributorData + exist, _ := mockCache.GetJSON("key", &data) + assert.True(t, exist) + var keys []string + for k := range data { + keys = append(keys, k) + } + slices.Sort(keys) + assert.EqualValues(t, []string{ + "ethantkoenig@gmail.com", + "jimmy.praet@telenet.be", + "jon@allspice.io", + "total", // generated summary + }, keys) - generateContributorStats(nil, mockCache, "key2", repo, "master") - exist, _ := mockCache.GetJSON("key2", &data) - assert.True(t, exist) - var keys []string - for k := range data { - keys = append(keys, k) - } - slices.Sort(keys) - assert.EqualValues(t, []string{ - "ethantkoenig@gmail.com", - "jimmy.praet@telenet.be", - "jon@allspice.io", - "total", // generated summary - }, keys) - - assert.EqualValues(t, &ContributorData{ - Name: "Ethan Koenig", - AvatarLink: "https://secure.gravatar.com/avatar/b42fb195faa8c61b8d88abfefe30e9e3?d=identicon", - TotalCommits: 1, - Weeks: map[int64]*WeekData{ - 1511654400000: { - Week: 1511654400000, // sunday 2017-11-26 - Additions: 3, - Deletions: 0, - Commits: 1, + assert.EqualValues(t, &ContributorData{ + Name: "Ethan Koenig", + AvatarLink: "https://secure.gravatar.com/avatar/b42fb195faa8c61b8d88abfefe30e9e3?d=identicon", + TotalCommits: 1, + Weeks: map[int64]*WeekData{ + 1511654400000: { + Week: 1511654400000, // sunday 2017-11-26 + Additions: 3, + Deletions: 0, + Commits: 1, + }, }, - }, - }, data["ethantkoenig@gmail.com"]) - assert.EqualValues(t, &ContributorData{ - Name: "Total", - AvatarLink: "", - TotalCommits: 3, - Weeks: map[int64]*WeekData{ - 1511654400000: { - Week: 1511654400000, // sunday 2017-11-26 (2017-11-26 20:31:18 -0800) - Additions: 3, - Deletions: 0, - Commits: 1, + }, data["ethantkoenig@gmail.com"]) + assert.EqualValues(t, &ContributorData{ + Name: "Total", + AvatarLink: "", + TotalCommits: 3, + Weeks: map[int64]*WeekData{ + 1511654400000: { + Week: 1511654400000, // sunday 2017-11-26 (2017-11-26 20:31:18 -0800) + Additions: 3, + Deletions: 0, + Commits: 1, + }, + 1607817600000: { + Week: 1607817600000, // sunday 2020-12-13 (2020-12-15 15:23:11 -0500) + Additions: 10, + Deletions: 0, + Commits: 1, + }, + 1624752000000: { + Week: 1624752000000, // sunday 2021-06-27 (2021-06-29 21:54:09 +0200) + Additions: 2, + Deletions: 0, + Commits: 1, + }, }, - 1607817600000: { - Week: 1607817600000, // sunday 2020-12-13 (2020-12-15 15:23:11 -0500) - Additions: 10, - Deletions: 0, - Commits: 1, + }, data["total"]) + }) + + t.Run("generate contributor stats with co-authored commit", func(t *testing.T) { + mockCache, err := cache.NewStringCache(setting.Cache{}) + assert.NoError(t, err) + generateContributorStats(nil, mockCache, "key", repo, "branch-with-co-author") + var data map[string]*ContributorData + exist, _ := mockCache.GetJSON("key", &data) + assert.True(t, exist) + var keys []string + for k := range data { + keys = append(keys, k) + } + slices.Sort(keys) + assert.EqualValues(t, []string{ + "ethantkoenig@gmail.com", + "fizzbuzz@example.com", + "foobar@example.com", + "jimmy.praet@telenet.be", + "jon@allspice.io", + "total", + }, keys) + + // make sure we can see the author of the commit + assert.EqualValues(t, &ContributorData{ + Name: "Foo Bar", + AvatarLink: "https://secure.gravatar.com/avatar/0d4907cea9d97688aa7a5e722d742f71?d=identicon", + TotalCommits: 1, + Weeks: map[int64]*WeekData{ + 1714867200000: { + Week: 1714867200000, // sunday 2024-05-05 + Additions: 1, + Deletions: 1, + Commits: 1, + }, }, - 1624752000000: { - Week: 1624752000000, // sunday 2021-06-27 (2021-06-29 21:54:09 +0200) - Additions: 2, - Deletions: 0, - Commits: 1, + }, data["foobar@example.com"]) + + // make sure that we can also see the co-author + assert.EqualValues(t, &ContributorData{ + Name: "Fizz Buzz", + AvatarLink: "https://secure.gravatar.com/avatar/474e3516254f43b2337011c4ac4de421?d=identicon", + TotalCommits: 1, + Weeks: map[int64]*WeekData{ + 1714867200000: { + Week: 1714867200000, // sunday 2024-05-05 + Additions: 1, + Deletions: 1, + Commits: 1, + }, }, - }, - }, data["total"]) + }, data["fizzbuzz@example.com"]) + + // let's also make sure we don't duplicate the additions/deletions/commits counts in the overall stats that week + assert.EqualValues(t, &ContributorData{ + Name: "Total", + AvatarLink: "", + TotalCommits: 4, + Weeks: map[int64]*WeekData{ + 1714867200000: { + Week: 1714867200000, // sunday 2024-05-05 + Additions: 1, + Deletions: 1, + Commits: 1, + }, + 1511654400000: { + Week: 1511654400000, // sunday 2017-11-26 + Additions: 3, + Deletions: 0, + Commits: 1, + }, + 1607817600000: { + Week: 1607817600000, // sunday 2020-12-13 + Additions: 10, + Deletions: 0, + Commits: 1, + }, + 1624752000000: { + Week: 1624752000000, // sunday 2021-06-27 + Additions: 2, + Deletions: 0, + Commits: 1, + }, + }, + }, data["total"]) + }) + } diff --git a/tests/gitea-repositories-meta/user2/repo2.git/objects/1c/faee861da76612fda8951b94bde7ba3726109f b/tests/gitea-repositories-meta/user2/repo2.git/objects/1c/faee861da76612fda8951b94bde7ba3726109f new file mode 100644 index 0000000000000000000000000000000000000000..a87c676a688f8e7c97b80c5c9cf9a97df35c1dd1 GIT binary patch literal 117 zcmV-*0E+*30V^p=O;s>7FlR6{FfcPQQSivmP1VayVR&UNaA2Z=vR<&yn}1gdZXfvZ zT5mH{Nls>7s$OwfIz#f*ZGH0ZcFXysR{PJ-UX-4tVZ#koQ<7R-qF0fd!=U!VY0p~o XUk(d*o1EVIPI2Dj*fv`L#2zjA6Y@DQ literal 0 HcmV?d00001 diff --git a/tests/gitea-repositories-meta/user2/repo2.git/objects/26/442ad16af268ef4768a5a30db377e860f504a3 b/tests/gitea-repositories-meta/user2/repo2.git/objects/26/442ad16af268ef4768a5a30db377e860f504a3 new file mode 100644 index 0000000000000..4645515be7bfa --- /dev/null +++ b/tests/gitea-repositories-meta/user2/repo2.git/objects/26/442ad16af268ef4768a5a30db377e860f504a3 @@ -0,0 +1 @@ +xuJ1E]+LIfPJ"vE'RzS\psJsmLJVAnT8*":oE{G%}+V6Pr>xCdE$vuVJQ*\J3VR)_ׅw#(i Cv+p޸33L1U Date: Thu, 9 May 2024 13:52:11 -0700 Subject: [PATCH 02/12] CI feedback --- services/repository/contributors_graph.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index 474c49f62698f..e9e9dd1d2ba6e 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -220,7 +220,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int return extendedCommitStats, nil } -var errSyntax error = errors.New("syntax error occurred") +var errSyntax = errors.New("syntax error occurred") func parseCoAuthorTrailerValue(value string) (email, name string, err error) { value = strings.TrimSpace(value) From 9ac9327c57faf14df91be3f5b7a15e04b0b3da62 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Thu, 9 May 2024 14:01:57 -0700 Subject: [PATCH 03/12] Fix lint problem in test code --- services/repository/contributors_graph_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/services/repository/contributors_graph_test.go b/services/repository/contributors_graph_test.go index d572d758c2bd3..17bbcbf51c4e0 100644 --- a/services/repository/contributors_graph_test.go +++ b/services/repository/contributors_graph_test.go @@ -173,5 +173,4 @@ func TestRepository_ContributorsGraph(t *testing.T) { }, }, data["total"]) }) - } From cce78121ffddf3798fb7be50b1d832041beb7065 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Sat, 11 May 2024 16:02:08 -0700 Subject: [PATCH 04/12] Have co-author parsing be more robust and some refactoring --- services/repository/contributors_graph.go | 22 ++++++++++--------- .../repository/contributors_graph_test.go | 1 - 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index e9e9dd1d2ba6e..c22225f716887 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -159,7 +159,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int // There should be an empty line before we read the commit stats line. break } - coAuthorEmail, coAuthorName, err := parseCoAuthorTrailerValue(line) + coAuthorName, coAuthorEmail, err := parseCoAuthorTrailerValue(line) if err != nil { continue } @@ -222,27 +222,29 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int var errSyntax = errors.New("syntax error occurred") -func parseCoAuthorTrailerValue(value string) (email, name string, err error) { +func parseCoAuthorTrailerValue(value string) (name, email string, err error) { value = strings.TrimSpace(value) if !strings.HasSuffix(value, ">") { return "", "", errSyntax } - if openEmailBracketIdx := strings.LastIndex(value, "<"); openEmailBracketIdx == -1 { - return "", "", errSyntax - } - parts := strings.Split(value, "<") - if len(parts) < 2 { + closedBracketIdx := len(value) - 1 + openEmailBracketIdx := strings.LastIndex(value, "<") + if openEmailBracketIdx == -1 { return "", "", errSyntax } - email = strings.TrimRight(parts[1], ">") + email = value[openEmailBracketIdx+1 : closedBracketIdx] if _, err := mail.ParseAddress(email); err != nil { return "", "", err } - name = strings.TrimSpace(parts[0]) - return email, name, nil + name = strings.TrimSpace(value[:openEmailBracketIdx]) + if len(name) == 0 { + return "", "", errSyntax + } + + return name, email, nil } func generateContributorStats(genDone chan struct{}, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) { diff --git a/services/repository/contributors_graph_test.go b/services/repository/contributors_graph_test.go index 17bbcbf51c4e0..77e412df3e045 100644 --- a/services/repository/contributors_graph_test.go +++ b/services/repository/contributors_graph_test.go @@ -88,7 +88,6 @@ func TestRepository_ContributorsGraph(t *testing.T) { }, }, data["total"]) }) - t.Run("generate contributor stats with co-authored commit", func(t *testing.T) { mockCache, err := cache.NewStringCache(setting.Cache{}) assert.NoError(t, err) From 25a9f121b5f6777414f65d5693226de083d78e92 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Sat, 11 May 2024 16:09:40 -0700 Subject: [PATCH 05/12] Shrink openEmailBracketIdx -> openBracketIdx --- services/repository/contributors_graph.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index c22225f716887..e1423d5989672 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -229,17 +229,17 @@ func parseCoAuthorTrailerValue(value string) (name, email string, err error) { } closedBracketIdx := len(value) - 1 - openEmailBracketIdx := strings.LastIndex(value, "<") - if openEmailBracketIdx == -1 { + openBracketIdx := strings.LastIndex(value, "<") + if openBracketIdx == -1 { return "", "", errSyntax } - email = value[openEmailBracketIdx+1 : closedBracketIdx] + email = value[openBracketIdx+1 : closedBracketIdx] if _, err := mail.ParseAddress(email); err != nil { return "", "", err } - name = strings.TrimSpace(value[:openEmailBracketIdx]) + name = strings.TrimSpace(value[:openBracketIdx]) if len(name) == 0 { return "", "", errSyntax } From 8e3209b09a284f1354809636ec63672c2aec5300 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Mon, 13 May 2024 22:35:17 -0700 Subject: [PATCH 06/12] Make commit trailer parsing public and add parameterized tests --- modules/util/commit_trailers.go | 42 +++++++++++++++++++++++ modules/util/commit_trailers_test.go | 37 ++++++++++++++++++++ services/repository/contributors_graph.go | 31 ++--------------- 3 files changed, 81 insertions(+), 29 deletions(-) create mode 100644 modules/util/commit_trailers.go create mode 100644 modules/util/commit_trailers_test.go diff --git a/modules/util/commit_trailers.go b/modules/util/commit_trailers.go new file mode 100644 index 0000000000000..9c6547d0cd67c --- /dev/null +++ b/modules/util/commit_trailers.go @@ -0,0 +1,42 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package util + +import ( + "errors" + "net/mail" + "strings" +) + +var ErrInvalidCommitTrailerValueSyntax = errors.New("syntax error occurred while parsing a commit trailer value") + +// ParseCommitTrailerValueWithAuthor parses a commit trailer value that contains author data. +// Note that it only parses the value and does not consider the trailer key i.e. we just +// parse something like the following: +// +// Foo Bar +func ParseCommitTrailerValueWithAuthor(value string) (name, email string, err error) { + value = strings.TrimSpace(value) + if !strings.HasSuffix(value, ">") { + return "", "", ErrInvalidCommitTrailerValueSyntax + } + + closedBracketIdx := len(value) - 1 + openBracketIdx := strings.LastIndex(value, "<") + if openBracketIdx == -1 { + return "", "", ErrInvalidCommitTrailerValueSyntax + } + + email = value[openBracketIdx+1 : closedBracketIdx] + if _, err := mail.ParseAddress(email); err != nil { + return "", "", ErrInvalidCommitTrailerValueSyntax + } + + name = strings.TrimSpace(value[:openBracketIdx]) + if len(name) == 0 { + return "", "", ErrInvalidCommitTrailerValueSyntax + } + + return name, email, nil +} diff --git a/modules/util/commit_trailers_test.go b/modules/util/commit_trailers_test.go new file mode 100644 index 0000000000000..3ba4b64ec98ed --- /dev/null +++ b/modules/util/commit_trailers_test.go @@ -0,0 +1,37 @@ +package util + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestParseCommitTrailerValueWithAuthor(t *testing.T) { + cases := []struct { + input string + shouldBeError bool + expectedName string + expectedEmail string + }{ + {"Foo Bar ", true, "", ""}, + {"Foo Bar <>", true, "", ""}, + {"Foo Bar ", true, "", ""}, + {"", true, "", ""}, + {" ", true, "", ""}, + {"Foo Bar ", false, "Foo Bar", "foobar@example.com"}, + {" Foo Bar ", false, "Foo Bar", "foobar@example.com"}, + // Account for edge case where name contains an open bracket. + {" Foo < Bar ", false, "Foo < Bar", "foobar@example.com"}, + } + + for n, c := range cases { + name, email, err := ParseCommitTrailerValueWithAuthor(c.input) + if c.shouldBeError { + assert.Error(t, err, "case %d should be a syntax error", n) + } else { + assert.Equal(t, c.expectedName, name, "case %d should have correct name", n) + assert.Equal(t, c.expectedEmail, email, "case %d should have correct email", n) + } + } +} diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index e1423d5989672..b91d64e3b7184 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -8,7 +8,6 @@ import ( "context" "errors" "fmt" - "net/mail" "os" "strconv" "strings" @@ -24,6 +23,7 @@ import ( "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" api "code.gitea.io/gitea/modules/structs" + "code.gitea.io/gitea/modules/util" ) const ( @@ -159,7 +159,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int // There should be an empty line before we read the commit stats line. break } - coAuthorName, coAuthorEmail, err := parseCoAuthorTrailerValue(line) + coAuthorName, coAuthorEmail, err := util.ParseCommitTrailerValueWithAuthor(line) if err != nil { continue } @@ -220,33 +220,6 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int return extendedCommitStats, nil } -var errSyntax = errors.New("syntax error occurred") - -func parseCoAuthorTrailerValue(value string) (name, email string, err error) { - value = strings.TrimSpace(value) - if !strings.HasSuffix(value, ">") { - return "", "", errSyntax - } - - closedBracketIdx := len(value) - 1 - openBracketIdx := strings.LastIndex(value, "<") - if openBracketIdx == -1 { - return "", "", errSyntax - } - - email = value[openBracketIdx+1 : closedBracketIdx] - if _, err := mail.ParseAddress(email); err != nil { - return "", "", err - } - - name = strings.TrimSpace(value[:openBracketIdx]) - if len(name) == 0 { - return "", "", errSyntax - } - - return name, email, nil -} - func generateContributorStats(genDone chan struct{}, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) { ctx := graceful.GetManager().HammerContext() From fd0bf3b56317dfdac0ae4a578d04113a4cd936cb Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Mon, 13 May 2024 23:12:19 -0700 Subject: [PATCH 07/12] Handle duplicated Co-authored-by entries --- services/repository/contributors_graph.go | 5 ++ .../repository/contributors_graph_test.go | 84 ++++++++++++++++++ .../79/a64f10ca2ea21fa2a94102751f0dd9f2c15ce8 | 1 + .../92/6513a3660f218d0d9446ff5f7f7a491eaa8273 | Bin 0 -> 82 bytes .../e5/e7105cf5863a4f6aec6e1bc77719fdf90eb864 | 1 + .../branch-with-duplicated-co-author-entries | 1 + 6 files changed, 92 insertions(+) create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/79/a64f10ca2ea21fa2a94102751f0dd9f2c15ce8 create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/92/6513a3660f218d0d9446ff5f7f7a491eaa8273 create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/e5/e7105cf5863a4f6aec6e1bc77719fdf90eb864 create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index b91d64e3b7184..41c9c4f8ab4bc 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -153,6 +153,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int date := strings.TrimSpace(scanner.Text()) var coAuthors []*api.CommitUser + emailSet := map[string]bool{} for scanner.Scan() { line := scanner.Text() if line == "" { @@ -163,6 +164,10 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int if err != nil { continue } + if _, exists := emailSet[coAuthorEmail]; exists { + continue + } + emailSet[coAuthorEmail] = true coAuthor := &api.CommitUser{ Identity: api.Identity{Name: coAuthorName, Email: coAuthorEmail}, Date: date, diff --git a/services/repository/contributors_graph_test.go b/services/repository/contributors_graph_test.go index 77e412df3e045..b2df72fbfc13e 100644 --- a/services/repository/contributors_graph_test.go +++ b/services/repository/contributors_graph_test.go @@ -172,4 +172,88 @@ func TestRepository_ContributorsGraph(t *testing.T) { }, }, data["total"]) }) + t.Run("generate contributor stats with commit that has duplicate co-authored lines", func(t *testing.T) { + mockCache, err := cache.NewStringCache(setting.Cache{}) + assert.NoError(t, err) + generateContributorStats(nil, mockCache, "key", repo, "branch-with-duplicated-co-author-entries") + var data map[string]*ContributorData + exist, _ := mockCache.GetJSON("key", &data) + assert.True(t, exist) + var keys []string + for k := range data { + keys = append(keys, k) + } + slices.Sort(keys) + assert.EqualValues(t, []string{ + "ethantkoenig@gmail.com", + "fizzbuzz@example.com", + "foobar@example.com", + "jimmy.praet@telenet.be", + "jon@allspice.io", + "total", + }, keys) + + // make sure we can see the author of the commit + assert.EqualValues(t, &ContributorData{ + Name: "Foo Bar", + AvatarLink: "https://secure.gravatar.com/avatar/0d4907cea9d97688aa7a5e722d742f71?d=identicon", + TotalCommits: 1, + Weeks: map[int64]*WeekData{ + 1715472000000: { + Week: 1715472000000, // sunday 2024-05-12 + Additions: 1, + Deletions: 0, + Commits: 1, + }, + }, + }, data["foobar@example.com"]) + + // make sure that we can also see the co-author and that we don't see duplicated additions/deletions/commits + assert.EqualValues(t, &ContributorData{ + Name: "Fizz Buzz", + AvatarLink: "https://secure.gravatar.com/avatar/474e3516254f43b2337011c4ac4de421?d=identicon", + TotalCommits: 1, + Weeks: map[int64]*WeekData{ + 1715472000000: { + Week: 1715472000000, // sunday 2024-05-12 + Additions: 1, + Deletions: 0, + Commits: 1, + }, + }, + }, data["fizzbuzz@example.com"]) + + // let's also make sure we don't duplicate the additions/deletions/commits counts in the overall stats that week + assert.EqualValues(t, &ContributorData{ + Name: "Total", + AvatarLink: "", + TotalCommits: 4, + Weeks: map[int64]*WeekData{ + 1715472000000: { + Week: 1715472000000, // sunday 2024-05-12 + Additions: 1, + Deletions: 0, + Commits: 1, + }, + 1511654400000: { + Week: 1511654400000, // sunday 2017-11-26 + Additions: 3, + Deletions: 0, + Commits: 1, + }, + 1607817600000: { + Week: 1607817600000, // sunday 2020-12-13 + Additions: 10, + Deletions: 0, + Commits: 1, + }, + 1624752000000: { + Week: 1624752000000, // sunday 2021-06-27 + Additions: 2, + Deletions: 0, + Commits: 1, + }, + }, + }, data["total"]) + }) } diff --git a/tests/gitea-repositories-meta/user2/repo2.git/objects/79/a64f10ca2ea21fa2a94102751f0dd9f2c15ce8 b/tests/gitea-repositories-meta/user2/repo2.git/objects/79/a64f10ca2ea21fa2a94102751f0dd9f2c15ce8 new file mode 100644 index 0000000000000..ccc9a3fcb28ba --- /dev/null +++ b/tests/gitea-repositories-meta/user2/repo2.git/objects/79/a64f10ca2ea21fa2a94102751f0dd9f2c15ce8 @@ -0,0 +1 @@ +x͎J0]) tHO # k7MnbcJȀʅ{7s|RaƻZ)fI#] &n(M$/NIڀ6Z`k}9 1grOLH#9 Amv 8k}$j?nxC`e3C<>Cs-$ShU>qLPeMg$$UmB3w98Avq&4 oFFjQuH!&quA*oUUD3Gt4SXz>iUzC~xWas8)mMC%o03${ni*5EFzW@LL literal 0 HcmV?d00001 diff --git a/tests/gitea-repositories-meta/user2/repo2.git/objects/e5/e7105cf5863a4f6aec6e1bc77719fdf90eb864 b/tests/gitea-repositories-meta/user2/repo2.git/objects/e5/e7105cf5863a4f6aec6e1bc77719fdf90eb864 new file mode 100644 index 0000000000000..2b9c574bb18d5 --- /dev/null +++ b/tests/gitea-repositories-meta/user2/repo2.git/objects/e5/e7105cf5863a4f6aec6e1bc77719fdf90eb864 @@ -0,0 +1 @@ +x+)JMU040g040031QMMa*8_w*OUMP%9yze Sɿ-Z??{Qz 7TMIjq^En=k9,mrz;ycl˸* \ No newline at end of file diff --git a/tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries b/tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries new file mode 100644 index 0000000000000..c17567e1fbc75 --- /dev/null +++ b/tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries @@ -0,0 +1 @@ +79a64f10ca2ea21fa2a94102751f0dd9f2c15ce8 From 6c713da4b1dd6a5550b953a9652835a4119a554a Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Mon, 13 May 2024 23:15:48 -0700 Subject: [PATCH 08/12] Add missing license comment --- modules/util/commit_trailers_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/util/commit_trailers_test.go b/modules/util/commit_trailers_test.go index 3ba4b64ec98ed..3b6a7eeb1c6a5 100644 --- a/modules/util/commit_trailers_test.go +++ b/modules/util/commit_trailers_test.go @@ -1,3 +1,5 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT package util import ( From 78fecfc32eac124bed0bd09d575980ade8f97c64 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Mon, 13 May 2024 23:17:00 -0700 Subject: [PATCH 09/12] Add a space between license comment and package keyword --- modules/util/commit_trailers_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/util/commit_trailers_test.go b/modules/util/commit_trailers_test.go index 3b6a7eeb1c6a5..fe9b64d896203 100644 --- a/modules/util/commit_trailers_test.go +++ b/modules/util/commit_trailers_test.go @@ -1,5 +1,6 @@ // Copyright 2024 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT + package util import ( From 22d8652f6464942b492f69baa35b637e4d8ceb3b Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Fri, 25 Oct 2024 22:48:32 -0700 Subject: [PATCH 10/12] Parse commit trailers using `mail.ParseAddress()` --- modules/util/commit_trailers.go | 24 +++++++----------------- modules/util/commit_trailers_test.go | 5 +++-- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/modules/util/commit_trailers.go b/modules/util/commit_trailers.go index 9c6547d0cd67c..62b3c8305e75f 100644 --- a/modules/util/commit_trailers.go +++ b/modules/util/commit_trailers.go @@ -6,7 +6,6 @@ package util import ( "errors" "net/mail" - "strings" ) var ErrInvalidCommitTrailerValueSyntax = errors.New("syntax error occurred while parsing a commit trailer value") @@ -17,26 +16,17 @@ var ErrInvalidCommitTrailerValueSyntax = errors.New("syntax error occurred while // // Foo Bar func ParseCommitTrailerValueWithAuthor(value string) (name, email string, err error) { - value = strings.TrimSpace(value) - if !strings.HasSuffix(value, ">") { - return "", "", ErrInvalidCommitTrailerValueSyntax + addr, err := mail.ParseAddress(value) + if err != nil { + return name, email, err } - closedBracketIdx := len(value) - 1 - openBracketIdx := strings.LastIndex(value, "<") - if openBracketIdx == -1 { - return "", "", ErrInvalidCommitTrailerValueSyntax + if addr.Name == "" { + return name, email, errors.New("commit trailer missing name") } - email = value[openBracketIdx+1 : closedBracketIdx] - if _, err := mail.ParseAddress(email); err != nil { - return "", "", ErrInvalidCommitTrailerValueSyntax - } - - name = strings.TrimSpace(value[:openBracketIdx]) - if len(name) == 0 { - return "", "", ErrInvalidCommitTrailerValueSyntax - } + name = addr.Name + email = addr.Address return name, email, nil } diff --git a/modules/util/commit_trailers_test.go b/modules/util/commit_trailers_test.go index fe9b64d896203..f5ab2e6449e38 100644 --- a/modules/util/commit_trailers_test.go +++ b/modules/util/commit_trailers_test.go @@ -24,8 +24,6 @@ func TestParseCommitTrailerValueWithAuthor(t *testing.T) { {" ", true, "", ""}, {"Foo Bar ", false, "Foo Bar", "foobar@example.com"}, {" Foo Bar ", false, "Foo Bar", "foobar@example.com"}, - // Account for edge case where name contains an open bracket. - {" Foo < Bar ", false, "Foo < Bar", "foobar@example.com"}, } for n, c := range cases { @@ -33,6 +31,9 @@ func TestParseCommitTrailerValueWithAuthor(t *testing.T) { if c.shouldBeError { assert.Error(t, err, "case %d should be a syntax error", n) } else { + if err != nil { + assert.Fail(t, "did not expect an error: %v", err) + } assert.Equal(t, c.expectedName, name, "case %d should have correct name", n) assert.Equal(t, c.expectedEmail, email, "case %d should have correct email", n) } From b8802097af006890846f7c752f5b5d45fe40d070 Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Fri, 25 Oct 2024 23:31:30 -0700 Subject: [PATCH 11/12] Add check for co-author email = author email --- services/repository/contributors_graph.go | 4 ++++ .../ab/60139e57bea4f9dc090a72cf97b7236d152a4b | Bin 0 -> 166 bytes .../f7/eee4e802f39ec0dadc5f872c86d4ee440b741d | Bin 0 -> 206 bytes .../branch-with-duplicated-co-author-entries | 2 +- 4 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/ab/60139e57bea4f9dc090a72cf97b7236d152a4b create mode 100644 tests/gitea-repositories-meta/user2/repo2.git/objects/f7/eee4e802f39ec0dadc5f872c86d4ee440b741d diff --git a/services/repository/contributors_graph.go b/services/repository/contributors_graph.go index 41c9c4f8ab4bc..fa2d2360ec1e0 100644 --- a/services/repository/contributors_graph.go +++ b/services/repository/contributors_graph.go @@ -161,6 +161,10 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int break } coAuthorName, coAuthorEmail, err := util.ParseCommitTrailerValueWithAuthor(line) + if authorEmail == coAuthorEmail { + // Authors shouldn't be co-authors too. + continue + } if err != nil { continue } diff --git a/tests/gitea-repositories-meta/user2/repo2.git/objects/ab/60139e57bea4f9dc090a72cf97b7236d152a4b b/tests/gitea-repositories-meta/user2/repo2.git/objects/ab/60139e57bea4f9dc090a72cf97b7236d152a4b new file mode 100644 index 0000000000000000000000000000000000000000..5c4919fb1bdd8644a53c2e48bc84cf1683027a97 GIT binary patch literal 166 zcmV;X09pTd0fo*%3PLdq1W?yKMfRek`T3~@5q04`(*B5owNfj1e0u}8!@OZ~E#=;U zd|35n21&iz!bYaVJS{=8Wn!PABxB1l;%+9(?xoG11LCYXh`&w~i3S@bFSe5~1wr(} zGp2b=IQ{fnZE&d-&e~vqtu<*o+E>f#VcT5G0lW~$D9jPoNQiEv<6XCZE>Mibhyoi7 U!@1M4K(}|_e#^uC09~I$mL=&?)Bpeg literal 0 HcmV?d00001 diff --git a/tests/gitea-repositories-meta/user2/repo2.git/objects/f7/eee4e802f39ec0dadc5f872c86d4ee440b741d b/tests/gitea-repositories-meta/user2/repo2.git/objects/f7/eee4e802f39ec0dadc5f872c86d4ee440b741d new file mode 100644 index 0000000000000000000000000000000000000000..1e3c7faf58db56aa27e93e2f5e56685ba13971c0 GIT binary patch literal 206 zcmV;<05Sh~0o9PfN<%RaMP27B<_BI$@{&wPib#dx5BLF-WP*jpJVFZEuQ#9@H@bE+ zTrT&(VN$F0JirBWH@XOj7c|LnRl|68FKWjk~-J;25-7NjyfG zbIM7RX0)jgMzm|l##?q#@pZhm4yV@O$Q=%)waEQZ-nniU*`-z=z@YIVc%lG{My=Is zr`?c0IZI@nHu%k{mo4)Ucy3j8H7oVhmM;OxOZ>Qp)A@FT<8_`xnK91vL*-upzr0d? I0-?Q|(ach0^8f$< literal 0 HcmV?d00001 diff --git a/tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries b/tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries index c17567e1fbc75..f8c5eee90dbda 100644 --- a/tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries +++ b/tests/gitea-repositories-meta/user2/repo2.git/refs/heads/branch-with-duplicated-co-author-entries @@ -1 +1 @@ -79a64f10ca2ea21fa2a94102751f0dd9f2c15ce8 +f7eee4e802f39ec0dadc5f872c86d4ee440b741d From f2e114e5cf54b002d5b1023d3651ec696ba94b5a Mon Sep 17 00:00:00 2001 From: Kemal Zebari Date: Fri, 25 Oct 2024 23:34:25 -0700 Subject: [PATCH 12/12] Remove error object that I no longer use --- modules/util/commit_trailers.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/util/commit_trailers.go b/modules/util/commit_trailers.go index 62b3c8305e75f..4852e2b5c4614 100644 --- a/modules/util/commit_trailers.go +++ b/modules/util/commit_trailers.go @@ -8,8 +8,6 @@ import ( "net/mail" ) -var ErrInvalidCommitTrailerValueSyntax = errors.New("syntax error occurred while parsing a commit trailer value") - // ParseCommitTrailerValueWithAuthor parses a commit trailer value that contains author data. // Note that it only parses the value and does not consider the trailer key i.e. we just // parse something like the following: