Skip to content

Commit 2d931f3

Browse files
authored
switch to go-git implementation (#400)
1 parent 9c789fe commit 2d931f3

10 files changed

Lines changed: 1158 additions & 711 deletions

File tree

Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,3 +33,11 @@ update-snapshots:
3333
update-vulndb:
3434
go test -tags build_platform_vuln_database -run TestPopulateBuildPlatformVulnDatabase -timeout 10m ./opa/
3535
opa fmt -w opa/rego/external/build_platform.rego
36+
37+
.PHONY: bench-org
38+
bench-org:
39+
go test -bench=BenchmarkAnalyzeOrg -benchtime=1x -count=3 -timeout=30m ./bench/analyze/
40+
41+
.PHONY: bench-repo
42+
bench-repo:
43+
go test -bench=BenchmarkAnalyzeRepo -benchtime=1x -count=3 -timeout=10m ./bench/analyze/

analyze/analyze.go

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ type GitClient interface {
7373
GetRepoHeadBranchName(ctx context.Context, repoPath string) (string, error)
7474
GetUniqWorkflowsBranches(ctx context.Context, clonePath string) (map[string][]models.BranchInfo, error)
7575
BlobMatches(ctx context.Context, clonePath string, blobsha string, regex *regexp.Regexp) (bool, []byte, error)
76+
ListFiles(clonePath string, extensions []string) (map[string][]byte, error)
77+
Cleanup(clonePath string)
7678
}
7779

7880
func NewAnalyzer(scmClient ScmClient, gitClient GitClient, formatter Formatter, config *models.Config, opaClient *opa.Opa) *Analyzer {
@@ -164,20 +166,37 @@ func (a *Analyzer) AnalyzeOrg(ctx context.Context, org string, numberOfGoroutine
164166
defer goRoutineLimitSem.Release(1)
165167
defer reposWg.Done()
166168
repoNameWithOwner := repo.GetRepoIdentifier()
167-
tempDir, err := a.cloneRepoToTemp(ctx, repo.BuildGitURL(a.ScmClient.GetProviderBaseURL()), a.ScmClient.GetToken(), "HEAD")
169+
repoKey, err := a.cloneRepo(ctx, repo.BuildGitURL(a.ScmClient.GetProviderBaseURL()), a.ScmClient.GetToken(), "HEAD")
168170
if err != nil {
169171
log.Error().Err(err).Str("repo", repoNameWithOwner).Msg("failed to clone repo")
170172
return
171173
}
172-
defer os.RemoveAll(tempDir)
174+
defer a.GitClient.Cleanup(repoKey)
173175

174-
pkg, err := a.GeneratePackageInsights(ctx, tempDir, repo, "HEAD")
176+
pkg, err := a.GeneratePackageInsights(ctx, repoKey, repo, "HEAD")
175177
if err != nil {
176178
log.Error().Err(err).Str("repo", repoNameWithOwner).Msg("failed to generate package insights")
177179
return
178180
}
179181

180-
scannedPkg, err := inventory.ScanPackage(ctx, *pkg, tempDir)
182+
files, err := a.GitClient.ListFiles(repoKey, []string{".yml", ".yaml"})
183+
if err != nil {
184+
log.Error().Err(err).Str("repo", repoNameWithOwner).Msg("failed to list files")
185+
return
186+
}
187+
188+
memScanner := &scanner.InventoryScannerMem{
189+
Files: files,
190+
Parsers: []scanner.MemParser{
191+
scanner.NewGithubActionsMetadataParser(),
192+
scanner.NewGithubActionWorkflowParser(),
193+
scanner.NewAzurePipelinesParser(),
194+
scanner.NewGitlabCiParser(),
195+
scanner.NewPipelineAsCodeTektonParser(),
196+
},
197+
}
198+
199+
scannedPkg, err := inventory.ScanPackageScanner(ctx, *pkg, memScanner)
181200
if err != nil {
182201
log.Error().Err(err).Str("repo", repoNameWithOwner).Msg("failed to scan package")
183202
return
@@ -245,16 +264,16 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
245264
_ = bar.RenderBlank()
246265

247266
repoUrl := repo.BuildGitURL(a.ScmClient.GetProviderBaseURL())
248-
tempDir, err := a.FetchConeToTemp(ctx, repoUrl, a.ScmClient.GetToken(), "refs/heads/*:refs/remotes/origin/*", ".github/workflows")
267+
repoKey, err := a.fetchCone(ctx, repoUrl, a.ScmClient.GetToken(), "refs/heads/*:refs/remotes/origin/*", ".github/workflows")
249268
if err != nil {
250269
return nil, fmt.Errorf("failed to fetch cone: %w", err)
251270
}
252-
defer os.RemoveAll(tempDir)
271+
defer a.GitClient.Cleanup(repoKey)
253272

254273
bar.Describe("Listing unique workflows")
255274
_ = bar.Add(1)
256275

257-
workflows, err := a.GitClient.GetUniqWorkflowsBranches(ctx, tempDir)
276+
workflows, err := a.GitClient.GetUniqWorkflowsBranches(ctx, repoKey)
258277
if err != nil {
259278
return nil, fmt.Errorf("failed to get unique workflow: %w", err)
260279
}
@@ -267,7 +286,7 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
267286
if numberOfGoroutines != nil {
268287
maxGoroutines = *numberOfGoroutines
269288
}
270-
semaphore := semaphore.NewWeighted(int64(maxGoroutines))
289+
sem := semaphore.NewWeighted(int64(maxGoroutines))
271290
m := sync.Mutex{}
272291
type file struct {
273292
path string
@@ -291,15 +310,15 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
291310
blobShas = append(blobShas, sha)
292311
}
293312
for _, blobSha := range blobShas {
294-
if err := semaphore.Acquire(ctx, 1); err != nil {
313+
if err := sem.Acquire(ctx, 1); err != nil {
295314
errChan <- fmt.Errorf("failed to acquire semaphore: %w", err)
296315
break
297316
}
298317
wgProducer.Add(1)
299318
go func(blobSha string) {
300319
defer wgProducer.Done()
301-
defer semaphore.Release(1)
302-
match, content, err := a.GitClient.BlobMatches(ctx, tempDir, blobSha, regex)
320+
defer sem.Release(1)
321+
match, content, err := a.GitClient.BlobMatches(ctx, repoKey, blobSha, regex)
303322
if err != nil {
304323
errChan <- fmt.Errorf("failed to blob match %s: %w", blobSha, err)
305324
return
@@ -326,7 +345,7 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
326345

327346
bar.Describe("Scanning package")
328347
_ = bar.Add(1)
329-
pkg, err := a.GeneratePackageInsights(ctx, tempDir, repo, "HEAD")
348+
pkg, err := a.GeneratePackageInsights(ctx, repoKey, repo, "HEAD")
330349
if err != nil {
331350
return nil, fmt.Errorf("failed to generate package insight: %w", err)
332351
}
@@ -412,21 +431,37 @@ func (a *Analyzer) AnalyzeRepo(ctx context.Context, repoString string, ref strin
412431
bar := a.ProgressBar(2, "Cloning repository")
413432
_ = bar.RenderBlank()
414433

415-
tempDir, err := a.cloneRepoToTemp(ctx, repo.BuildGitURL(a.ScmClient.GetProviderBaseURL()), a.ScmClient.GetToken(), ref)
434+
repoKey, err := a.cloneRepo(ctx, repo.BuildGitURL(a.ScmClient.GetProviderBaseURL()), a.ScmClient.GetToken(), ref)
416435
if err != nil {
417436
return nil, err
418437
}
419-
defer os.RemoveAll(tempDir)
438+
defer a.GitClient.Cleanup(repoKey)
420439

421440
bar.Describe("Analyzing repository")
422441
_ = bar.Add(1)
423442

424-
pkg, err := a.GeneratePackageInsights(ctx, tempDir, repo, ref)
443+
pkg, err := a.GeneratePackageInsights(ctx, repoKey, repo, ref)
425444
if err != nil {
426445
return nil, err
427446
}
428447

429-
scannedPackage, err := inventory.ScanPackage(ctx, *pkg, tempDir)
448+
files, err := a.GitClient.ListFiles(repoKey, []string{".yml", ".yaml"})
449+
if err != nil {
450+
return nil, fmt.Errorf("failed to list files: %w", err)
451+
}
452+
453+
memScanner := &scanner.InventoryScannerMem{
454+
Files: files,
455+
Parsers: []scanner.MemParser{
456+
scanner.NewGithubActionsMetadataParser(),
457+
scanner.NewGithubActionWorkflowParser(),
458+
scanner.NewAzurePipelinesParser(),
459+
scanner.NewGitlabCiParser(),
460+
scanner.NewPipelineAsCodeTektonParser(),
461+
},
462+
}
463+
464+
scannedPackage, err := inventory.ScanPackageScanner(ctx, *pkg, memScanner)
430465
if err != nil {
431466
return nil, err
432467
}
@@ -669,32 +704,22 @@ func (a *Analyzer) GeneratePackageInsights(ctx context.Context, tempDir string,
669704
return pkg, nil
670705
}
671706

672-
func (a *Analyzer) FetchConeToTemp(ctx context.Context, gitURL, token, ref string, cone string) (string, error) {
673-
tempDir, err := os.MkdirTemp("", TEMP_DIR_PREFIX)
707+
func (a *Analyzer) fetchCone(ctx context.Context, gitURL, token, ref string, cone string) (string, error) {
708+
key := fmt.Sprintf("repo:%s:cone:%d", gitURL, time.Now().UnixNano())
709+
err := a.GitClient.FetchCone(ctx, key, gitURL, token, ref, cone)
674710
if err != nil {
675-
return "", fmt.Errorf("failed to create temp directory: %w", err)
711+
return "", fmt.Errorf("failed to fetch cone: %w", err)
676712
}
677-
678-
err = a.GitClient.FetchCone(ctx, tempDir, gitURL, token, ref, cone)
679-
if err != nil {
680-
os.RemoveAll(tempDir) // Clean up if cloning fails
681-
return "", fmt.Errorf("failed to clone repo: %w", err)
682-
}
683-
return tempDir, nil
713+
return key, nil
684714
}
685715

686-
func (a *Analyzer) cloneRepoToTemp(ctx context.Context, gitURL string, token string, ref string) (string, error) {
687-
tempDir, err := os.MkdirTemp("", TEMP_DIR_PREFIX)
688-
if err != nil {
689-
return "", fmt.Errorf("failed to create temp directory: %w", err)
690-
}
691-
692-
err = a.GitClient.Clone(ctx, tempDir, gitURL, token, ref)
716+
func (a *Analyzer) cloneRepo(ctx context.Context, gitURL string, token string, ref string) (string, error) {
717+
key := fmt.Sprintf("repo:%s:%d", gitURL, time.Now().UnixNano())
718+
err := a.GitClient.Clone(ctx, key, gitURL, token, ref)
693719
if err != nil {
694-
os.RemoveAll(tempDir) // Clean up if cloning fails
695720
return "", fmt.Errorf("failed to clone repo: %w", err)
696721
}
697-
return tempDir, nil
722+
return key, nil
698723
}
699724

700725
func (a *Analyzer) ProgressBar(maxValue int64, description string) *progressbar.ProgressBar {

bench/analyze/bench_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
package analyze_test
2+
3+
import (
4+
"context"
5+
"os"
6+
"runtime"
7+
"syscall"
8+
"testing"
9+
10+
"github.com/boostsecurityio/poutine/analyze"
11+
"github.com/boostsecurityio/poutine/formatters/noop"
12+
"github.com/boostsecurityio/poutine/models"
13+
"github.com/boostsecurityio/poutine/opa"
14+
"github.com/boostsecurityio/poutine/providers/gitops"
15+
"github.com/boostsecurityio/poutine/providers/scm"
16+
"github.com/rs/zerolog"
17+
)
18+
19+
func timevalToMs(tv syscall.Timeval) float64 {
20+
return float64(tv.Sec)*1000 + float64(tv.Usec)/1000
21+
}
22+
23+
func normalizeMaxRSS(rssValue int64) float64 {
24+
if runtime.GOOS == "linux" {
25+
return float64(rssValue) * 1024
26+
}
27+
return float64(rssValue)
28+
}
29+
30+
type resourceSnapshot struct {
31+
mem runtime.MemStats
32+
ruSelf syscall.Rusage
33+
ruChild syscall.Rusage
34+
}
35+
36+
func captureSnapshot() resourceSnapshot {
37+
var s resourceSnapshot
38+
runtime.ReadMemStats(&s.mem)
39+
_ = syscall.Getrusage(syscall.RUSAGE_SELF, &s.ruSelf)
40+
_ = syscall.Getrusage(syscall.RUSAGE_CHILDREN, &s.ruChild)
41+
return s
42+
}
43+
44+
func reportMetrics(b *testing.B, before, after resourceSnapshot) {
45+
cpuSelfMs := (timevalToMs(after.ruSelf.Utime) - timevalToMs(before.ruSelf.Utime)) +
46+
(timevalToMs(after.ruSelf.Stime) - timevalToMs(before.ruSelf.Stime))
47+
cpuChildrenMs := (timevalToMs(after.ruChild.Utime) - timevalToMs(before.ruChild.Utime)) +
48+
(timevalToMs(after.ruChild.Stime) - timevalToMs(before.ruChild.Stime))
49+
50+
b.ReportMetric(cpuSelfMs, "cpu-self-ms/op")
51+
b.ReportMetric(cpuChildrenMs, "cpu-children-ms/op")
52+
b.ReportMetric(normalizeMaxRSS(after.ruSelf.Maxrss)/1024/1024, "rss-self-MB/op")
53+
b.ReportMetric(normalizeMaxRSS(after.ruChild.Maxrss)/1024/1024, "rss-children-MB/op")
54+
b.ReportMetric(float64(after.mem.HeapInuse)/1024/1024, "heap-inuse-MB/op")
55+
b.ReportMetric(float64(after.mem.Sys)/1024/1024, "sys-MB/op")
56+
}
57+
58+
func setupAnalyzer(b *testing.B, command string) *analyze.Analyzer {
59+
token := os.Getenv("GH_TOKEN")
60+
if token == "" {
61+
b.Skip("GH_TOKEN not set, skipping benchmark")
62+
}
63+
64+
zerolog.SetGlobalLevel(zerolog.WarnLevel)
65+
66+
ctx := context.Background()
67+
68+
scmClient, err := scm.NewScmClient(ctx, "github", "", token, command)
69+
if err != nil {
70+
b.Fatalf("failed to create SCM client: %v", err)
71+
}
72+
73+
config := models.DefaultConfig()
74+
config.Quiet = false
75+
76+
opaClient, err := opa.NewOpa(ctx, config)
77+
if err != nil {
78+
b.Fatalf("failed to create OPA client: %v", err)
79+
}
80+
81+
return analyze.NewAnalyzer(scmClient, gitops.NewGitClient(nil), &noop.Format{}, config, opaClient)
82+
}
83+
84+
func BenchmarkAnalyzeOrg(b *testing.B) {
85+
analyzer := setupAnalyzer(b, "analyze_org")
86+
ctx := context.Background()
87+
threads := 2
88+
89+
b.ReportAllocs()
90+
b.ResetTimer()
91+
92+
for i := 0; i < b.N; i++ {
93+
runtime.GC()
94+
before := captureSnapshot()
95+
96+
packages, err := analyzer.AnalyzeOrg(ctx, "microsoft", &threads)
97+
if err != nil {
98+
b.Fatalf("AnalyzeOrg failed: %v", err)
99+
}
100+
101+
after := captureSnapshot()
102+
reportMetrics(b, before, after)
103+
b.ReportMetric(float64(len(packages)), "repos/op")
104+
}
105+
}
106+
107+
func BenchmarkAnalyzeRepo(b *testing.B) {
108+
analyzer := setupAnalyzer(b, "analyze_repo")
109+
ctx := context.Background()
110+
111+
b.ReportAllocs()
112+
b.ResetTimer()
113+
114+
for i := 0; i < b.N; i++ {
115+
runtime.GC()
116+
before := captureSnapshot()
117+
118+
pkg, err := analyzer.AnalyzeRepo(ctx, "messypoutine/gha-playground", "HEAD")
119+
if err != nil {
120+
b.Fatalf("AnalyzeRepo failed: %v", err)
121+
}
122+
123+
after := captureSnapshot()
124+
reportMetrics(b, before, after)
125+
b.ReportMetric(float64(len(pkg.FindingsResults.Findings)), "findings/op")
126+
}
127+
}

0 commit comments

Comments
 (0)