Skip to content

Respect Co-authored-by trailers in the contributors graph #30925

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
203 changes: 135 additions & 68 deletions services/repository/contributors_graph.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"context"
"errors"
"fmt"
"net/mail"
"os"
"strconv"
"strings"
Expand Down Expand Up @@ -53,10 +54,11 @@ type ContributorData struct {
Weeks map[int64]*WeekData `json:"weeks"`
}

// ExtendedCommitStats contains information for commit stats with author data
// ExtendedCommitStats contains information for commit stats with both author and coauthors data
type ExtendedCommitStats struct {
Author *api.CommitUser `json:"author"`
Stats *api.CommitStats `json:"stats"`
Author *api.CommitUser `json:"author"`
CoAuthors []*api.CommitUser `json:"co_authors"`
Stats *api.CommitStats `json:"stats"`
}

const layout = time.DateOnly
Expand Down Expand Up @@ -125,8 +127,7 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
_ = stdoutWriter.Close()
}()

gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as", "--reverse")
// AddOptionFormat("--max-count=%d", limit)
gitCmd := git.NewCommand(repo.Ctx, "log", "--shortstat", "--no-merges", "--pretty=format:---%n%aN%n%aE%n%as%n%(trailers:key=Co-authored-by,valueonly=true)", "--reverse")
Copy link
Member

@silverwind silverwind May 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked compat on this trailers option and it does appear to be present in git 2.27 at least which is the oldest version where git's site has docs for, so it's likely fine to use:

https://git-scm.com/docs/git-log/2.27.0

gitCmd.AddDynamicArguments(baseCommit.ID.String())

var extendedCommitStats []*ExtendedCommitStats
Expand All @@ -150,6 +151,25 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
authorEmail := strings.TrimSpace(scanner.Text())
scanner.Scan()
date := strings.TrimSpace(scanner.Text())

var coAuthors []*api.CommitUser
for scanner.Scan() {
line := scanner.Text()
if line == "" {
// There should be an empty line before we read the commit stats line.
break
}
coAuthorEmail, coAuthorName, err := parseCoAuthorTrailerValue(line)
if err != nil {
continue
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to have a log here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure how frequent it is for commits to have invalid Co-authored-by trailers but if it won't cause a problem for admins we could have this as maybe an INFO/DEBUG level log

}
coAuthor := &api.CommitUser{
Identity: api.Identity{Name: coAuthorName, Email: coAuthorEmail},
Date: date,
}
coAuthors = append(coAuthors, coAuthor)
}

scanner.Scan()
stats := strings.TrimSpace(scanner.Text())
if authorName == "" || authorEmail == "" || date == "" || stats == "" {
Expand Down Expand Up @@ -184,7 +204,8 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
},
Date: date,
},
Stats: &commitStats,
CoAuthors: coAuthors,
Stats: &commitStats,
}
extendedCommitStats = append(extendedCommitStats, res)
}
Expand All @@ -199,6 +220,31 @@ func getExtendedCommitStats(repo *git.Repository, revision string /*, limit int
return extendedCommitStats, nil
}

var errSyntax = errors.New("syntax error occurred")

func parseCoAuthorTrailerValue(value string) (email, name string, err error) {
value = strings.TrimSpace(value)
if !strings.HasSuffix(value, ">") {
return "", "", errSyntax
}
if openEmailBracketIdx := strings.LastIndex(value, "<"); openEmailBracketIdx == -1 {
return "", "", errSyntax
}

parts := strings.Split(value, "<")
if len(parts) < 2 {
return "", "", errSyntax
}

email = strings.TrimRight(parts[1], ">")
if _, err := mail.ParseAddress(email); err != nil {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since users can add anything between the <> brackets I believed that we could try to throw away lines that contain invalid email addresses.

Though while testing by creating blank repos and merging the commit history of existing projects into these repos I observed that some emails associated to bots will be rejected (e.g. the pre-commit bot on GitHub). I guess this happens because these emails don't follow RFC 5322. Not sure if I should do this validation check.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Depends on how we parse other trailer lines I suppose. I would make it consistent with those.

return "", "", err
}
name = strings.TrimSpace(parts[0])

return email, name, nil
}

func generateContributorStats(genDone chan struct{}, cache cache.StringCache, cacheKey string, repo *repo_model.Repository, revision string) {
ctx := graceful.GetManager().HammerContext()

Expand All @@ -222,8 +268,6 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
return
}

layout := time.DateOnly

unknownUserAvatarLink := user_model.NewGhostUser().AvatarLinkWithSize(ctx, 0)
contributorsCommitStats := make(map[string]*ContributorData)
contributorsCommitStats["total"] = &ContributorData{
Expand All @@ -237,67 +281,16 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
if len(userEmail) == 0 {
continue
}
u, _ := user_model.GetUserByEmail(ctx, userEmail)
if u != nil {
// update userEmail with user's primary email address so
// that different mail addresses will linked to same account
userEmail = u.GetEmail()
}
// duplicated logic
if _, ok := contributorsCommitStats[userEmail]; !ok {
if u == nil {
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0)
if avatarLink == "" {
avatarLink = unknownUserAvatarLink
}
contributorsCommitStats[userEmail] = &ContributorData{
Name: v.Author.Name,
AvatarLink: avatarLink,
Weeks: make(map[int64]*WeekData),
}
} else {
contributorsCommitStats[userEmail] = &ContributorData{
Name: u.DisplayName(),
Login: u.LowerName,
AvatarLink: u.AvatarLinkWithSize(ctx, 0),
HomeLink: u.HomeLink(),
Weeks: make(map[int64]*WeekData),
}
}
}
// Update user statistics
user := contributorsCommitStats[userEmail]
startingOfWeek, _ := findLastSundayBeforeDate(v.Author.Date)

val, _ := time.Parse(layout, startingOfWeek)
week := val.UnixMilli()

if user.Weeks[week] == nil {
user.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
if total.Weeks[week] == nil {
total.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}

authorData := getContributorData(ctx, contributorsCommitStats, v.Author, unknownUserAvatarLink)
date := v.Author.Date
stats := v.Stats
updateUserAndOverallStats(stats, date, authorData, total, false)

for _, coAuthor := range v.CoAuthors {
coAuthorData := getContributorData(ctx, contributorsCommitStats, coAuthor, unknownUserAvatarLink)
updateUserAndOverallStats(stats, date, coAuthorData, total, true)
}
user.Weeks[week].Additions += v.Stats.Additions
user.Weeks[week].Deletions += v.Stats.Deletions
user.Weeks[week].Commits++
user.TotalCommits++

// Update overall statistics
total.Weeks[week].Additions += v.Stats.Additions
total.Weeks[week].Deletions += v.Stats.Deletions
total.Weeks[week].Commits++
total.TotalCommits++
}

_ = cache.PutJSON(cacheKey, contributorsCommitStats, contributorStatsCacheTimeout)
Expand All @@ -306,3 +299,77 @@ func generateContributorStats(genDone chan struct{}, cache cache.StringCache, ca
genDone <- struct{}{}
}
}

func getContributorData(ctx context.Context, contributorsCommitStats map[string]*ContributorData, user *api.CommitUser, defaultUserAvatarLink string) *ContributorData {
userEmail := user.Email
u, _ := user_model.GetUserByEmail(ctx, userEmail)
if u != nil {
// update userEmail with user's primary email address so
// that different mail addresses will linked to same account
userEmail = u.GetEmail()
}

if _, ok := contributorsCommitStats[userEmail]; !ok {
if u == nil {
avatarLink := avatars.GenerateEmailAvatarFastLink(ctx, userEmail, 0)
if avatarLink == "" {
avatarLink = defaultUserAvatarLink
}
contributorsCommitStats[userEmail] = &ContributorData{
Name: user.Name,
AvatarLink: avatarLink,
Weeks: make(map[int64]*WeekData),
}
} else {
contributorsCommitStats[userEmail] = &ContributorData{
Name: u.DisplayName(),
Login: u.LowerName,
AvatarLink: u.AvatarLinkWithSize(ctx, 0),
HomeLink: u.HomeLink(),
Weeks: make(map[int64]*WeekData),
}
}
}
return contributorsCommitStats[userEmail]
}

func updateUserAndOverallStats(stats *api.CommitStats, commitDate string, user, total *ContributorData, isCoAuthor bool) {
startingOfWeek, _ := findLastSundayBeforeDate(commitDate)

val, _ := time.Parse(layout, startingOfWeek)
week := val.UnixMilli()

if user.Weeks[week] == nil {
user.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
if total.Weeks[week] == nil {
total.Weeks[week] = &WeekData{
Additions: 0,
Deletions: 0,
Commits: 0,
Week: week,
}
}
// Update user statistics
user.Weeks[week].Additions += stats.Additions
user.Weeks[week].Deletions += stats.Deletions
user.Weeks[week].Commits++
user.TotalCommits++

if isCoAuthor {
// We would have or will count these additions/deletions/commits already when we encounter the original
// author of the commit. Let's avoid this duplication.
return
}

// Update overall statistics
total.Weeks[week].Additions += stats.Additions
total.Weeks[week].Deletions += stats.Deletions
total.Weeks[week].Commits++
total.TotalCommits++
}
Loading