Skip to content
This repository was archived by the owner on Sep 30, 2024. It is now read-only.

Commit adbd4b4

Browse files
authored
fix/frontend/graphql: log and add trace events for all non-not-found errors that occur when fetching permission syncs (#63302)
Closes https://linear.app/sourcegraph/issue/SRC-421/customer-reported-an-issue-with-user-permissions-stats This PR ensures that any non not-found errors that occur when fetching permission sync jobs are logged and added as error events in the trace. Before, these errors were silently swallowed which makes things exceptionally hard to debug. This PR only changes the monitoring around this query - **it doesn't change the underlying behavior.** **Note**: I spent a while reading up on how to communicate partial errors in our graphql API, but it seems like there isn't a clear consensus. - graph-gophers/graphql-go#414 - https://spec.graphql.org/October2021/#sel-HAHlBNJDPEBAAADFBU-_D According to the above, it seems like ideally we should be returning `null` in the graphQL for permission sync jobs that have an error. However, when I tried to do this I begain seeing nil panics all over the place. It's hard for me to know if the underlying library supports this or if we are just holding it wrong. 😥 ## Test plan Unit tests ## Changelog - Our graphqlAPI now logs and traces any non-not-found errors that occur when fetching permission sync jobs (as opposed to being silently swallowed).
1 parent fe56852 commit adbd4b4

File tree

5 files changed

+249
-19
lines changed

5 files changed

+249
-19
lines changed

cmd/frontend/internal/authz/resolvers/BUILD.bazel

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ go_library(
3131
"//internal/gqlutil",
3232
"//internal/licensing",
3333
"//internal/observation",
34+
"//internal/trace",
3435
"//internal/types",
3536
"//lib/errors",
3637
"//lib/pointers",

cmd/frontend/internal/authz/resolvers/permissions_sync_jobs.go

+30-9
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,41 @@ import (
55

66
"github.com/graph-gophers/graphql-go"
77
"github.com/graph-gophers/graphql-go/relay"
8+
"github.com/sourcegraph/log"
89

910
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend"
1011
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/graphqlutil"
1112
"github.com/sourcegraph/sourcegraph/internal/api"
1213
"github.com/sourcegraph/sourcegraph/internal/database"
14+
"github.com/sourcegraph/sourcegraph/internal/errcode"
1315
"github.com/sourcegraph/sourcegraph/internal/gitserver"
1416
"github.com/sourcegraph/sourcegraph/internal/gqlutil"
17+
"github.com/sourcegraph/sourcegraph/internal/trace"
1518
"github.com/sourcegraph/sourcegraph/lib/errors"
1619
"github.com/sourcegraph/sourcegraph/lib/pointers"
1720
)
1821

1922
const permissionsSyncJobIDKind = "PermissionsSyncJob"
2023

21-
func NewPermissionsSyncJobsResolver(db database.DB, args graphqlbackend.ListPermissionsSyncJobsArgs) (*graphqlutil.ConnectionResolver[graphqlbackend.PermissionsSyncJobResolver], error) {
24+
func NewPermissionsSyncJobsResolver(logger log.Logger, db database.DB, args graphqlbackend.ListPermissionsSyncJobsArgs) (*graphqlutil.ConnectionResolver[graphqlbackend.PermissionsSyncJobResolver], error) {
2225
store := &permissionsSyncJobConnectionStore{
23-
db: db,
24-
args: args,
26+
logger: logger.Scoped("permissions_sync_jobs_resolver"),
27+
db: db,
28+
args: args,
2529
}
2630

2731
if args.UserID != nil && args.RepoID != nil {
2832
return nil, errors.New("please provide either userID or repoID, but not both.")
2933
}
3034

31-
return graphqlutil.NewConnectionResolver[graphqlbackend.PermissionsSyncJobResolver](store, &args.ConnectionResolverArgs, nil)
35+
return graphqlutil.NewConnectionResolver[graphqlbackend.PermissionsSyncJobResolver](
36+
store, &args.ConnectionResolverArgs, nil)
3237
}
3338

3439
type permissionsSyncJobConnectionStore struct {
35-
db database.DB
36-
args graphqlbackend.ListPermissionsSyncJobsArgs
40+
db database.DB
41+
args graphqlbackend.ListPermissionsSyncJobsArgs
42+
logger log.Logger
3743
}
3844

3945
func (s *permissionsSyncJobConnectionStore) ComputeTotal(ctx context.Context) (int32, error) {
@@ -52,20 +58,35 @@ func (s *permissionsSyncJobConnectionStore) ComputeNodes(ctx context.Context, ar
5258
}
5359

5460
resolvers := make([]graphqlbackend.PermissionsSyncJobResolver, 0, len(jobs))
61+
var errs []error
5562
for _, job := range jobs {
5663
syncSubject, err := s.resolveSubject(ctx, job)
5764
if err != nil {
58-
// NOTE(naman): async cleaning of repos might make repo record unavailable.
59-
// That will break the api, as subject will not be resolved. In this case
60-
// it is better to not bubble up the error but return the remaining nodes.
65+
if !errcode.IsNotFound(err) {
66+
// We don't surface errors for deleted repositories or users. It's frequently the case that asynchronous
67+
// cleanup jobs will delete repositories and users that are referenced by permissions sync jobs.
68+
errs = append(errs, errors.Wrapf(err, "resolving permissions sync job subject for job %d", job.ID))
69+
}
70+
6171
continue
6272
}
73+
6374
resolvers = append(resolvers, &permissionsSyncJobResolver{
6475
db: s.db,
6576
job: job,
6677
syncSubject: syncSubject,
6778
})
6879
}
80+
81+
if len(errs) > 0 {
82+
s.logger.Warn("Failed to resolve permissions sync job subjects", log.Error(errors.Append(nil, errs...)))
83+
84+
tr := trace.FromContext(ctx)
85+
for _, e := range errs {
86+
tr.RecordError(e)
87+
}
88+
}
89+
6990
return resolvers, nil
7091
}
7192

cmd/frontend/internal/authz/resolvers/permissions_sync_jobs_test.go

+203-3
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,18 @@ package resolvers
22

33
import (
44
"context"
5+
"strings"
56
"testing"
67

8+
"github.com/sourcegraph/log/logtest"
9+
"github.com/stretchr/testify/require"
10+
711
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend"
812
"github.com/sourcegraph/sourcegraph/cmd/frontend/graphqlbackend/graphqlutil"
913
"github.com/sourcegraph/sourcegraph/internal/database"
1014
"github.com/sourcegraph/sourcegraph/internal/database/dbmocks"
1115
"github.com/sourcegraph/sourcegraph/internal/types"
12-
"github.com/stretchr/testify/require"
16+
"github.com/sourcegraph/sourcegraph/lib/errors"
1317
)
1418

1519
func TestPermissionSyncJobsResolver(t *testing.T) {
@@ -24,8 +28,9 @@ func TestPermissionSyncJobsResolver(t *testing.T) {
2428
jobsStore.ListFunc.SetDefaultReturn([]*database.PermissionSyncJob{}, nil)
2529

2630
db.PermissionSyncJobsFunc.SetDefaultReturn(jobsStore)
31+
logger := logtest.NoOp(t)
2732

28-
resolver, err := NewPermissionsSyncJobsResolver(db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
33+
resolver, err := NewPermissionsSyncJobsResolver(logger, db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
2934
require.NoError(t, err)
3035
jobs, err := resolver.Nodes(ctx)
3136
require.NoError(t, err)
@@ -42,12 +47,207 @@ func TestPermissionSyncJobsResolver(t *testing.T) {
4247

4348
db.PermissionSyncJobsFunc.SetDefaultReturn(jobsStore)
4449
db.ReposFunc.SetDefaultReturn(repoStore)
50+
logger := logtest.NoOp(t)
4551

46-
resolver, err := NewPermissionsSyncJobsResolver(db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
52+
resolver, err := NewPermissionsSyncJobsResolver(logger, db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
4753
require.NoError(t, err)
4854
jobs, err := resolver.Nodes(ctx)
4955
require.NoError(t, err)
5056
require.Len(t, jobs, 1)
5157
require.Equal(t, marshalPermissionsSyncJobID(1), jobs[0].ID())
5258
})
59+
60+
t.Run("logs non-not-found errors", func(t *testing.T) {
61+
t.Run("repository error", func(t *testing.T) {
62+
// Create a mock database
63+
db := dbmocks.NewMockDB()
64+
65+
// Create a mock permissions sync job store
66+
jobStore := dbmocks.NewMockPermissionSyncJobStore()
67+
68+
// Set up the expectations for the job store
69+
job := &database.PermissionSyncJob{ID: 1, RepositoryID: 1}
70+
jobStore.ListFunc.SetDefaultReturn([]*database.PermissionSyncJob{job}, nil)
71+
72+
db.PermissionSyncJobsFunc.SetDefaultReturn(jobStore)
73+
74+
unexpectedError := errors.New("this is a test error")
75+
76+
reposStore := dbmocks.NewMockRepoStore()
77+
reposStore.GetFunc.SetDefaultReturn(nil, unexpectedError)
78+
db.ReposFunc.SetDefaultReturn(reposStore)
79+
80+
logger, dumpLogs := logtest.Captured(t)
81+
// Create a permissions sync job connection store
82+
resolver, err := NewPermissionsSyncJobsResolver(logger, db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
83+
require.NoError(t, err)
84+
85+
nodes, err := resolver.Nodes(ctx)
86+
require.NoError(t, err) // Assert that no error is returned
87+
require.Equal(t, 0, len(nodes))
88+
89+
// Assert that the error is logged
90+
logEntries := dumpLogs()
91+
var foundLogEntry bool
92+
for _, entry := range logEntries {
93+
errText, ok := entry.Fields["error"].(string)
94+
95+
if ok && strings.Contains(errText, unexpectedError.Error()) {
96+
foundLogEntry = true
97+
break
98+
}
99+
}
100+
101+
require.True(t, foundLogEntry, "Expected log entry not found")
102+
})
103+
104+
t.Run("user error", func(t *testing.T) {
105+
// Create a mock database
106+
db := dbmocks.NewMockDB()
107+
108+
// Create a mock permissions sync job store
109+
jobStore := dbmocks.NewMockPermissionSyncJobStore()
110+
111+
// Set up the expectations for the job store
112+
job := &database.PermissionSyncJob{ID: 1, UserID: 1}
113+
jobStore.ListFunc.SetDefaultReturn([]*database.PermissionSyncJob{job}, nil)
114+
115+
db.PermissionSyncJobsFunc.SetDefaultReturn(jobStore)
116+
117+
unexpectedError := errors.New("this is a test error")
118+
119+
userStore := dbmocks.NewMockUserStore()
120+
userStore.GetByIDFunc.SetDefaultReturn(nil, unexpectedError)
121+
db.UsersFunc.SetDefaultReturn(userStore)
122+
123+
logger, dumpLogs := logtest.Captured(t)
124+
// Create a permissions sync job connection store
125+
resolver, err := NewPermissionsSyncJobsResolver(logger, db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
126+
require.NoError(t, err)
127+
128+
nodes, err := resolver.Nodes(ctx)
129+
require.NoError(t, err)
130+
require.Equal(t, 0, len(nodes))
131+
132+
// Assert that the error is logged
133+
logEntries := dumpLogs()
134+
var foundLogEntry bool
135+
for _, entry := range logEntries {
136+
errText, ok := entry.Fields["error"].(string)
137+
138+
if ok && strings.Contains(errText, unexpectedError.Error()) {
139+
foundLogEntry = true
140+
break
141+
}
142+
}
143+
144+
require.True(t, foundLogEntry, "Expected log entry not found")
145+
})
146+
})
147+
148+
t.Run("skips over not found errors", func(t *testing.T) {
149+
t.Run("repository error", func(t *testing.T) {
150+
151+
// Create a mock
152+
db := dbmocks.NewMockDB()
153+
154+
// Create a mock permissions sync job store
155+
jobStore := dbmocks.NewMockPermissionSyncJobStore()
156+
157+
// Set up the expectations for the job store
158+
job := &database.PermissionSyncJob{ID: 1, RepositoryID: 1}
159+
jobStore.ListFunc.SetDefaultReturn([]*database.PermissionSyncJob{job}, nil)
160+
161+
db.PermissionSyncJobsFunc.SetDefaultReturn(jobStore)
162+
163+
notFoundError := testNotFoundError{}
164+
165+
repoStore := dbmocks.NewMockRepoStore()
166+
repoStore.GetFunc.SetDefaultReturn(nil, notFoundError)
167+
db.ReposFunc.SetDefaultReturn(repoStore)
168+
169+
logger, dumpLogs := logtest.Captured(t)
170+
// Create a permissions sync job connection store
171+
resolver, err := NewPermissionsSyncJobsResolver(logger, db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
172+
require.NoError(t, err)
173+
174+
nodes, err := resolver.Nodes(ctx)
175+
require.NoError(t, err) // Assert that no error is returned
176+
require.Equal(t, 0, len(nodes))
177+
178+
foundError := false
179+
for _, entry := range dumpLogs() {
180+
if strings.Contains(entry.Message, notFoundError.Error()) {
181+
foundError = true
182+
break
183+
}
184+
185+
errText, ok := entry.Fields["error"].(string)
186+
if ok && strings.Contains(errText, notFoundError.Error()) {
187+
foundError = true
188+
break
189+
}
190+
}
191+
192+
require.False(t, foundError, "Unexpected error found in logs")
193+
})
194+
195+
t.Run("user error", func(t *testing.T) {
196+
197+
// Create a mock
198+
db := dbmocks.NewMockDB()
199+
200+
// Create a mock permissions sync job store
201+
jobStore := dbmocks.NewMockPermissionSyncJobStore()
202+
203+
// Set up the expectations for the job store
204+
job := &database.PermissionSyncJob{ID: 1, RepositoryID: 1}
205+
jobStore.ListFunc.SetDefaultReturn([]*database.PermissionSyncJob{job}, nil)
206+
207+
db.PermissionSyncJobsFunc.SetDefaultReturn(jobStore)
208+
209+
notFoundError := testNotFoundError{}
210+
211+
userStore := dbmocks.NewMockRepoStore()
212+
userStore.GetFunc.SetDefaultReturn(nil, notFoundError)
213+
db.ReposFunc.SetDefaultReturn(userStore)
214+
215+
logger, dumpLogs := logtest.Captured(t)
216+
// Create a permissions sync job connection store
217+
resolver, err := NewPermissionsSyncJobsResolver(logger, db, graphqlbackend.ListPermissionsSyncJobsArgs{ConnectionResolverArgs: args})
218+
require.NoError(t, err)
219+
220+
nodes, err := resolver.Nodes(ctx)
221+
require.NoError(t, err) // Assert that no error is returned
222+
require.Equal(t, 0, len(nodes))
223+
224+
foundError := false
225+
for _, entry := range dumpLogs() {
226+
if strings.Contains(entry.Message, notFoundError.Error()) {
227+
foundError = true
228+
break
229+
}
230+
231+
errText, ok := entry.Fields["error"].(string)
232+
if ok && strings.Contains(errText, notFoundError.Error()) {
233+
foundError = true
234+
break
235+
}
236+
}
237+
238+
require.False(t, foundError, "Unexpected error found in logs")
239+
})
240+
})
241+
}
242+
243+
type testNotFoundError struct{}
244+
245+
func (testNotFoundError) NotFound() bool {
246+
return true
53247
}
248+
249+
func (testNotFoundError) Error() string {
250+
return "this is a test not found error"
251+
}
252+
253+
var _ error = testNotFoundError{}

cmd/frontend/internal/authz/resolvers/resolver.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ func (r *Resolver) PermissionsSyncJobs(ctx context.Context, args graphqlbackend.
663663
return nil, err
664664
}
665665

666-
return NewPermissionsSyncJobsResolver(r.db, args)
666+
return NewPermissionsSyncJobsResolver(r.logger, r.db, args)
667667
}
668668

669669
func (r *Resolver) PermissionsSyncingStats(ctx context.Context) (graphqlbackend.PermissionsSyncingStatsResolver, error) {

0 commit comments

Comments
 (0)