@@ -7,6 +7,7 @@ package languagestats
7
7
8
8
import (
9
9
"bytes"
10
+ "context"
10
11
"io"
11
12
12
13
"code.gitea.io/gitea/modules/analyze"
@@ -18,8 +19,8 @@ import (
18
19
"github.com/go-enry/go-enry/v2"
19
20
)
20
21
21
- // GetLanguageStats calculates language stats for git repository at specified commit
22
- func GetLanguageStats ( repo * git.Repository , commitID string ) (map [string ]int64 , error ) {
22
+ // CalcLanguageStats calculates language stats for git repository at specified commit
23
+ func CalcLanguageStats ( ctx context. Context , repo * git.Repository , commitID string ) (map [string ]int64 , error ) {
23
24
// We will feed the commit IDs in order into cat-file --batch, followed by blobs as necessary.
24
25
// so let's create a batch stdin and stdout
25
26
batchStdinWriter , batchReader , cancel , err := repo .CatFileBatch (repo .Ctx )
@@ -59,11 +60,6 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
59
60
60
61
tree := commit .Tree
61
62
62
- entries , err := tree .ListEntriesRecursiveWithSize ()
63
- if err != nil {
64
- return nil , err
65
- }
66
-
67
63
checker , err := attribute .NewBatchChecker (repo , commitID , attribute .LinguistAttributes )
68
64
if err != nil {
69
65
return nil , err
@@ -82,18 +78,12 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
82
78
firstExcludedLanguage := ""
83
79
firstExcludedLanguageSize := int64 (0 )
84
80
85
- for _ , f := range entries {
86
- select {
87
- case <- repo .Ctx .Done ():
88
- return sizes , repo .Ctx .Err ()
89
- default :
90
- }
91
-
81
+ if err := tree .IterateEntriesRecursive (ctx , func (ctx context.Context , f * git.TreeEntry ) error {
92
82
contentBuf .Reset ()
93
83
content = contentBuf .Bytes ()
94
84
95
85
if f .Size () == 0 {
96
- continue
86
+ return nil
97
87
}
98
88
99
89
isVendored := optional .None [bool ]()
@@ -104,19 +94,19 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
104
94
attrLinguistGenerated := optional .None [bool ]()
105
95
if err == nil {
106
96
if isVendored = attrs .GetVendored (); isVendored .ValueOrDefault (false ) {
107
- continue
97
+ return nil
108
98
}
109
99
110
100
if attrLinguistGenerated = attrs .GetGenerated (); attrLinguistGenerated .ValueOrDefault (false ) {
111
- continue
101
+ return nil
112
102
}
113
103
114
104
if isDocumentation = attrs .GetDocumentation (); isDocumentation .ValueOrDefault (false ) {
115
- continue
105
+ return nil
116
106
}
117
107
118
108
if isDetectable = attrs .GetDetectable (); ! isDetectable .ValueOrDefault (true ) {
119
- continue
109
+ return nil
120
110
}
121
111
122
112
if hasLanguage := attrs .GetLanguage (); hasLanguage .Value () != "" {
@@ -130,27 +120,27 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
130
120
131
121
// this language will always be added to the size
132
122
sizes [language ] += f .Size ()
133
- continue
123
+ return nil
134
124
}
135
125
}
136
126
137
127
if (! isVendored .Has () && analyze .IsVendor (f .Name ())) ||
138
128
enry .IsDotFile (f .Name ()) ||
139
129
(! isDocumentation .Has () && enry .IsDocumentation (f .Name ())) ||
140
130
enry .IsConfiguration (f .Name ()) {
141
- continue
131
+ return nil
142
132
}
143
133
144
134
// If content can not be read or file is too big just do detection by filename
145
135
146
136
if f .Size () <= bigFileSize {
147
137
if err := writeID (f .ID .String ()); err != nil {
148
- return nil , err
138
+ return err
149
139
}
150
140
_ , _ , size , err := git .ReadBatchLine (batchReader )
151
141
if err != nil {
152
142
log .Debug ("Error reading blob: %s Err: %v" , f .ID .String (), err )
153
- return nil , err
143
+ return err
154
144
}
155
145
156
146
sizeToRead := size
@@ -162,11 +152,11 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
162
152
163
153
_ , err = contentBuf .ReadFrom (io .LimitReader (batchReader , sizeToRead ))
164
154
if err != nil {
165
- return nil , err
155
+ return err
166
156
}
167
157
content = contentBuf .Bytes ()
168
158
if err := git .DiscardFull (batchReader , discard ); err != nil {
169
- return nil , err
159
+ return err
170
160
}
171
161
}
172
162
@@ -178,14 +168,14 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
178
168
isGenerated = enry .IsGenerated (f .Name (), content )
179
169
}
180
170
if isGenerated {
181
- continue
171
+ return nil
182
172
}
183
173
184
174
// FIXME: Why can't we split this and the IsGenerated tests to avoid reading the blob unless absolutely necessary?
185
175
// - eg. do the all the detection tests using filename first before reading content.
186
176
language := analyze .GetCodeLanguage (f .Name (), content )
187
177
if language == "" {
188
- continue
178
+ return nil
189
179
}
190
180
191
181
// group languages, such as Pug -> HTML; SCSS -> CSS
@@ -206,6 +196,9 @@ func GetLanguageStats(repo *git.Repository, commitID string) (map[string]int64,
206
196
firstExcludedLanguage = language
207
197
firstExcludedLanguageSize += f .Size ()
208
198
}
199
+ return nil
200
+ }, git.TrustedCmdArgs {"--long" }); err != nil {
201
+ return sizes , err
209
202
}
210
203
211
204
// If there are no included languages add the first excluded language
0 commit comments