Skip to content

Commit 0134c3b

Browse files
authored
fix: improve git URL parsing (#486)
* Removes the dependency on git-urls for git URL parsing/validation * Uses go-git's transport.NewEndpoint instead, with some additional massaging to keep our current behaviour in line. * Fixes the SSH server implementation in gittest that was causing us to be unable to properly test SSH clones. * Fixes another test flake I ran into in log.
1 parent 55a0ba3 commit 0134c3b

11 files changed

Lines changed: 201 additions & 65 deletions

File tree

docs/git-auth.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,27 @@
1-
# Git Authentication
1+
# Supported URL Formats
2+
3+
Envbuilder supports three distinct types of Git URLs:
4+
5+
1) Valid URLs with scheme (e.g. `https://user:password@host.tld:12345/path/to/repo`)
6+
2) SCP-like URLs (e.g. `git@host.tld:path/to/repo.git`)
7+
3) Filesystem URLs (require the `git` executable to be available)
8+
9+
Based on the type of URL, one of two authentication methods will be used:
10+
11+
| Git URL format | GIT_USERNAME | GIT_PASSWORD | Auth Method |
12+
| ------------------------|--------------|--------------|-------------|
13+
| https?://host.tld/repo | Not Set | Not Set | None |
14+
| https?://host.tld/repo | Not Set | Set | HTTP Basic |
15+
| https?://host.tld/repo | Set | Not Set | HTTP Basic |
16+
| https?://host.tld/repo | Set | Set | HTTP Basic |
17+
| ssh://host.tld/repo | - | - | SSH |
18+
| git://host.tld/repo | - | - | SSH |
19+
| file://path/to/repo | - | - | None |
20+
| path/to/repo | - | - | None |
21+
| user@host.tld:path/repo | - | - | SSH |
22+
| All other formats | - | - | SSH |
23+
24+
# Authentication Methods
225

326
Two methods of authentication are supported:
427

git/git.go

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@ import (
1010
"os"
1111
"strings"
1212

13+
"github.com/coder/envbuilder/internal/ebutil"
1314
"github.com/coder/envbuilder/options"
1415

15-
giturls "github.com/chainguard-dev/git-urls"
1616
"github.com/go-git/go-billy/v5"
1717
"github.com/go-git/go-git/v5"
1818
"github.com/go-git/go-git/v5/plumbing"
@@ -49,18 +49,17 @@ type CloneRepoOptions struct {
4949
//
5050
// The bool returned states whether the repository was cloned or not.
5151
func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOptions) (bool, error) {
52-
parsed, err := giturls.Parse(opts.RepoURL)
52+
parsed, err := ebutil.ParseRepoURL(opts.RepoURL)
5353
if err != nil {
5454
return false, fmt.Errorf("parse url %q: %w", opts.RepoURL, err)
5555
}
56-
logf("Parsed Git URL as %q", parsed.Redacted())
5756

5857
thinPack := true
5958

6059
if !opts.ThinPack {
6160
thinPack = false
6261
logf("ThinPack options is false, Marking thin-pack as unsupported")
63-
} else if parsed.Hostname() == "dev.azure.com" {
62+
} else if parsed.Host == "dev.azure.com" {
6463
// Azure DevOps requires capabilities multi_ack / multi_ack_detailed,
6564
// which are not fully implemented and by default are included in
6665
// transport.UnsupportedCapabilities.
@@ -92,12 +91,9 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt
9291
if err != nil {
9392
return false, fmt.Errorf("mkdir %q: %w", opts.Path, err)
9493
}
95-
reference := parsed.Fragment
96-
if reference == "" && opts.SingleBranch {
97-
reference = "refs/heads/main"
94+
if parsed.Reference == "" && opts.SingleBranch {
95+
parsed.Reference = "refs/heads/main"
9896
}
99-
parsed.RawFragment = ""
100-
parsed.Fragment = ""
10197
fs, err := opts.Storage.Chroot(opts.Path)
10298
if err != nil {
10399
return false, fmt.Errorf("chroot %q: %w", opts.Path, err)
@@ -120,10 +116,10 @@ func CloneRepo(ctx context.Context, logf func(string, ...any), opts CloneRepoOpt
120116
}
121117

122118
_, err = git.CloneContext(ctx, gitStorage, fs, &git.CloneOptions{
123-
URL: parsed.String(),
119+
URL: parsed.Cleaned,
124120
Auth: opts.RepoAuth,
125121
Progress: opts.Progress,
126-
ReferenceName: plumbing.ReferenceName(reference),
122+
ReferenceName: plumbing.ReferenceName(parsed.Reference),
127123
InsecureSkipTLS: opts.Insecure,
128124
Depth: opts.Depth,
129125
SingleBranch: opts.SingleBranch,
@@ -245,18 +241,23 @@ func LogHostKeyCallback(logger func(string, ...any)) gossh.HostKeyCallback {
245241
// If SSH_KNOWN_HOSTS is not set, the SSH auth method will be configured
246242
// to accept and log all host keys. Otherwise, host key checking will be
247243
// performed as usual.
244+
//
245+
// Git URL formats may only consist of the following:
246+
// 1. A valid URL with a scheme
247+
// 2. An SCP-like URL (e.g. git@host.tld:path/to/repo.git)
248+
// 3. Local filesystem paths (require `git` executable)
248249
func SetupRepoAuth(logf func(string, ...any), options *options.Options) transport.AuthMethod {
249250
if options.GitURL == "" {
250251
logf("❔ No Git URL supplied!")
251252
return nil
252253
}
253-
parsedURL, err := giturls.Parse(options.GitURL)
254+
parsedURL, err := ebutil.ParseRepoURL(options.GitURL)
254255
if err != nil {
255256
logf("❌ Failed to parse Git URL: %s", err.Error())
256257
return nil
257258
}
258259

259-
if parsedURL.Scheme == "http" || parsedURL.Scheme == "https" {
260+
if parsedURL.Protocol == "http" || parsedURL.Protocol == "https" {
260261
// Special case: no auth
261262
if options.GitUsername == "" && options.GitPassword == "" {
262263
logf("👤 Using no authentication!")
@@ -272,7 +273,7 @@ func SetupRepoAuth(logf func(string, ...any), options *options.Options) transpor
272273
}
273274
}
274275

275-
if parsedURL.Scheme == "file" {
276+
if parsedURL.Protocol == "file" {
276277
// go-git will try to fallback to using the `git` command for local
277278
// filesystem clones. However, it's more likely than not that the
278279
// `git` command is not present in the container image. Log a warning

git/git_test.go

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,12 @@ import (
44
"context"
55
"crypto/ed25519"
66
"encoding/base64"
7-
"fmt"
87
"io"
98
"net/http/httptest"
109
"net/url"
1110
"os"
1211
"path/filepath"
13-
"regexp"
12+
"strings"
1413
"testing"
1514

1615
"github.com/coder/envbuilder/git"
@@ -76,6 +75,18 @@ func TestCloneRepo(t *testing.T) {
7675
password: "",
7776
expectClone: true,
7877
},
78+
{
79+
name: "whitespace in URL",
80+
mungeURL: func(u *string) {
81+
if u == nil {
82+
t.Errorf("expected non-nil URL")
83+
return
84+
}
85+
*u = " " + *u + " "
86+
t.Logf("munged URL: %q", *u)
87+
},
88+
expectClone: true,
89+
},
7990
} {
8091
tc := tc
8192
t.Run(tc.name, func(t *testing.T) {
@@ -87,6 +98,9 @@ func TestCloneRepo(t *testing.T) {
8798
_ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "README.md", "Hello, world!", "Wow!"))
8899
authMW := mwtest.BasicAuthMW(tc.srvUsername, tc.srvPassword)
89100
srv := httptest.NewServer(authMW(gittest.NewServer(srvFS)))
101+
if tc.mungeURL != nil {
102+
tc.mungeURL(&srv.URL)
103+
}
90104
clientFS := memfs.New()
91105
// A repo already exists!
92106
_ = gittest.NewRepo(t, clientFS)
@@ -106,6 +120,9 @@ func TestCloneRepo(t *testing.T) {
106120
_ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "README.md", "Hello, world!", "Wow!"))
107121
authMW := mwtest.BasicAuthMW(tc.srvUsername, tc.srvPassword)
108122
srv := httptest.NewServer(authMW(gittest.NewServer(srvFS)))
123+
if tc.mungeURL != nil {
124+
tc.mungeURL(&srv.URL)
125+
}
109126
clientFS := memfs.New()
110127

111128
cloned, err := git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{
@@ -129,7 +146,7 @@ func TestCloneRepo(t *testing.T) {
129146
require.Equal(t, "Hello, world!", readme)
130147
gitConfig := mustRead(t, clientFS, "/workspace/.git/config")
131148
// Ensure we do not modify the git URL that folks pass in.
132-
require.Regexp(t, fmt.Sprintf(`(?m)^\s+url\s+=\s+%s\s*$`, regexp.QuoteMeta(srv.URL)), gitConfig)
149+
require.Contains(t, gitConfig, strings.TrimSpace(srv.URL))
133150
})
134151

135152
// In-URL-style auth e.g. http://user:password@host:port
@@ -139,15 +156,19 @@ func TestCloneRepo(t *testing.T) {
139156
_ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "README.md", "Hello, world!", "Wow!"))
140157
authMW := mwtest.BasicAuthMW(tc.srvUsername, tc.srvPassword)
141158
srv := httptest.NewServer(authMW(gittest.NewServer(srvFS)))
142-
143159
authURL, err := url.Parse(srv.URL)
144160
require.NoError(t, err)
145161
authURL.User = url.UserPassword(tc.username, tc.password)
162+
cloneURL := authURL.String()
163+
if tc.mungeURL != nil {
164+
tc.mungeURL(&cloneURL)
165+
}
166+
146167
clientFS := memfs.New()
147168

148169
cloned, err := git.CloneRepo(context.Background(), t.Logf, git.CloneRepoOptions{
149170
Path: "/workspace",
150-
RepoURL: authURL.String(),
171+
RepoURL: cloneURL,
151172
Storage: clientFS,
152173
})
153174
require.Equal(t, tc.expectClone, cloned)
@@ -162,7 +183,7 @@ func TestCloneRepo(t *testing.T) {
162183
require.Equal(t, "Hello, world!", readme)
163184
gitConfig := mustRead(t, clientFS, "/workspace/.git/config")
164185
// Ensure we do not modify the git URL that folks pass in.
165-
require.Regexp(t, fmt.Sprintf(`(?m)^\s+url\s+=\s+%s\s*$`, regexp.QuoteMeta(authURL.String())), gitConfig)
186+
require.Contains(t, gitConfig, strings.TrimSpace(cloneURL))
166187
})
167188
})
168189
}
@@ -238,10 +259,9 @@ func TestShallowCloneRepo(t *testing.T) {
238259
func TestCloneRepoSSH(t *testing.T) {
239260
t.Parallel()
240261

241-
t.Run("AuthSuccess", func(t *testing.T) {
262+
t.Run("Success", func(t *testing.T) {
242263
t.Parallel()
243264

244-
// TODO: test the rest of the cloning flow. This just tests successful auth.
245265
tmpDir := t.TempDir()
246266
srvFS := osfs.New(tmpDir, osfs.WithChrootOS())
247267

@@ -264,10 +284,9 @@ func TestCloneRepoSSH(t *testing.T) {
264284
},
265285
},
266286
})
267-
// TODO: ideally, we want to test the entire cloning flow.
268-
// For now, this indicates successful ssh key auth.
269-
require.ErrorContains(t, err, "repository not found")
270-
require.False(t, cloned)
287+
require.NoError(t, err)
288+
require.True(t, cloned)
289+
require.Equal(t, "Hello, world!", mustRead(t, clientFS, "/workspace/README.md"))
271290
})
272291

273292
t.Run("AuthFailure", func(t *testing.T) {
@@ -399,12 +418,12 @@ func TestSetupRepoAuth(t *testing.T) {
399418
// Anything that is not https:// or http:// is treated as SSH.
400419
kPath := writeTestPrivateKey(t)
401420
opts := &options.Options{
402-
GitURL: "git://git@host.tld:repo/path",
421+
GitURL: "git://git@host.tld:12345/path",
403422
GitSSHPrivateKeyPath: kPath,
404423
}
405424
auth := git.SetupRepoAuth(t.Logf, opts)
406425
_, ok := auth.(*gitssh.PublicKeys)
407-
require.True(t, ok)
426+
require.True(t, ok, "expected SSH auth for git:// URL")
408427
})
409428

410429
t.Run("SSH/GitUsername", func(t *testing.T) {
@@ -422,7 +441,7 @@ func TestSetupRepoAuth(t *testing.T) {
422441
t.Run("SSH/PrivateKey", func(t *testing.T) {
423442
kPath := writeTestPrivateKey(t)
424443
opts := &options.Options{
425-
GitURL: "ssh://git@host.tld:repo/path",
444+
GitURL: "ssh://git@host.tld/repo/path",
426445
GitSSHPrivateKeyPath: kPath,
427446
}
428447
auth := git.SetupRepoAuth(t.Logf, opts)
@@ -436,7 +455,7 @@ func TestSetupRepoAuth(t *testing.T) {
436455

437456
t.Run("SSH/Base64PrivateKey", func(t *testing.T) {
438457
opts := &options.Options{
439-
GitURL: "ssh://git@host.tld:repo/path",
458+
GitURL: "ssh://git@host.tld/repo/path",
440459
GitSSHPrivateKeyBase64: base64EncodeTestPrivateKey(),
441460
}
442461
auth := git.SetupRepoAuth(t.Logf, opts)
@@ -452,7 +471,7 @@ func TestSetupRepoAuth(t *testing.T) {
452471

453472
t.Run("SSH/NoAuthMethods", func(t *testing.T) {
454473
opts := &options.Options{
455-
GitURL: "ssh://git@host.tld:repo/path",
474+
GitURL: "git@host.tld:repo/path",
456475
}
457476
auth := git.SetupRepoAuth(t.Logf, opts)
458477
require.Nil(t, auth) // TODO: actually test SSH_AUTH_SOCK
@@ -481,6 +500,28 @@ func TestSetupRepoAuth(t *testing.T) {
481500
auth := git.SetupRepoAuth(t.Logf, opts)
482501
require.Nil(t, auth)
483502
})
503+
504+
t.Run("Whitespace", func(t *testing.T) {
505+
kPath := writeTestPrivateKey(t)
506+
opts := &options.Options{
507+
GitURL: "ssh://git@host.tld/repo path",
508+
GitSSHPrivateKeyPath: kPath,
509+
}
510+
auth := git.SetupRepoAuth(t.Logf, opts)
511+
_, ok := auth.(*gitssh.PublicKeys)
512+
require.True(t, ok)
513+
})
514+
515+
t.Run("LeadingTrailingWhitespace", func(t *testing.T) {
516+
kPath := writeTestPrivateKey(t)
517+
opts := &options.Options{
518+
GitURL: " ssh://git@host.tld/repo/path ",
519+
GitSSHPrivateKeyPath: kPath,
520+
}
521+
auth := git.SetupRepoAuth(t.Logf, opts)
522+
_, ok := auth.(*gitssh.PublicKeys)
523+
require.True(t, ok)
524+
})
484525
}
485526

486527
func mustRead(t *testing.T, fs billy.Filesystem, path string) string {

go.mod

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ require (
1313
cdr.dev/slog v1.6.2-0.20240126064726-20367d4aede6
1414
github.com/GoogleContainerTools/kaniko v1.9.2
1515
github.com/breml/rootcerts v0.2.10
16-
github.com/chainguard-dev/git-urls v1.0.2
1716
github.com/coder/coder/v2 v2.10.1-0.20240704130443-c2d44d16a352
1817
github.com/coder/retry v1.5.1
1918
github.com/coder/serpent v0.8.0

go.sum

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,6 @@ github.com/certifi/gocertifi v0.0.0-20200922220541-2c3bb06c6054/go.mod h1:sGbDF6
152152
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
153153
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
154154
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
155-
github.com/chainguard-dev/git-urls v1.0.2 h1:pSpT7ifrpc5X55n4aTTm7FFUE+ZQHKiqpiwNkJrVcKQ=
156-
github.com/chainguard-dev/git-urls v1.0.2/go.mod h1:rbGgj10OS7UgZlbzdUQIQpT0k/D4+An04HJY7Ol+Y/o=
157155
github.com/charmbracelet/lipgloss v0.8.0 h1:IS00fk4XAHcf8uZKc3eHeMUTCxUH6NkaTrdyCQk84RU=
158156
github.com/charmbracelet/lipgloss v0.8.0/go.mod h1:p4eYUZZJ/0oXTuCQKFF8mqyKCz0ja6y+7DniDDw5KKU=
159157
github.com/chenzhuoyu/base64x v0.0.0-20230717121745-296ad89f973d h1:77cEq6EriyTZ0g/qfRdp61a3Uu/AWrgIq2s0ClJV1g0=

integration/integration_test.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -434,14 +434,16 @@ func TestGitSSHAuth(t *testing.T) {
434434
_ = gittest.NewRepo(t, srvFS, gittest.Commit(t, "Dockerfile", "FROM "+testImageAlpine, "Initial commit"))
435435
tr := gittest.NewServerSSH(t, srvFS, signer.PublicKey())
436436

437-
_, err = runEnvbuilder(t, runOpts{env: []string{
437+
ctr, err := runEnvbuilder(t, runOpts{env: []string{
438438
envbuilderEnv("DOCKERFILE_PATH", "Dockerfile"),
439-
envbuilderEnv("GIT_URL", tr.String()+"."),
439+
envbuilderEnv("GIT_URL", tr.String()),
440440
envbuilderEnv("GIT_SSH_PRIVATE_KEY_BASE64", base64Key),
441441
}})
442-
// TODO: Ensure it actually clones but this does mean we have
443-
// successfully authenticated.
444-
require.ErrorContains(t, err, "repository not found")
442+
require.NoError(t, err)
443+
dockerFilePath := execContainer(t, ctr, "find /workspaces -name Dockerfile")
444+
require.NotEmpty(t, dockerFilePath)
445+
dockerFile := execContainer(t, ctr, "cat "+dockerFilePath)
446+
require.Contains(t, dockerFile, testImageAlpine)
445447
})
446448

447449
t.Run("Base64/Failure", func(t *testing.T) {

0 commit comments

Comments
 (0)