From 550f3190a2ba8bbbc48729855ea76f5918eb5485 Mon Sep 17 00:00:00 2001 From: Yolean k8s-qa Date: Tue, 12 May 2026 07:55:46 +0000 Subject: [PATCH] feat(images): list --context= queries the cluster's containerd `y-cluster images list` overloads to read from a cluster's k8s.io containerd namespace. Mutually exclusive with the positional YAML input -- same conceptual question ("what images are in this scope?"), different source. Output is one row per stored manifest (digest-aliases of the same content fold together, the bare sha256: config-digest row containerd writes is filtered out), sorted by descending compressed size by default. --sort=name switches to alphabetical; --format=json emits [{ref, digest, size_bytes, size_human}] for piping. Implementation parses `ctr -n k8s.io image list` tabular output -- the same format pkg/images/load.go already relies on -- and reads the SIZE column back from its IEC units. Stable across the containerd versions y-cluster targets without depending on ctr's --format json shape. When many rows fold into one, refRank picks the most informative form: name:tag@digest > name@digest > name:tag > anything else. Hostport ":port" before the last slash isn't counted as a tag (the standard containerd-ref edge case stripTag already handles). Refs specs/y-cluster/FEATURE_REQUEST_IMAGES_LIST_FROM_CLUSTER.md --- cmd/y-cluster/images.go | 102 +++++++++++++++-- cmd/y-cluster/images_test.go | 54 +++++++++ pkg/images/list_cluster.go | 194 ++++++++++++++++++++++++++++++++ pkg/images/list_cluster_test.go | 147 ++++++++++++++++++++++++ 4 files changed, 486 insertions(+), 11 deletions(-) create mode 100644 pkg/images/list_cluster.go create mode 100644 pkg/images/list_cluster_test.go diff --git a/cmd/y-cluster/images.go b/cmd/y-cluster/images.go index 0266fec..a0b4989 100644 --- a/cmd/y-cluster/images.go +++ b/cmd/y-cluster/images.go @@ -2,6 +2,7 @@ package main import ( "context" + "encoding/json" "fmt" "io" "os" @@ -31,24 +32,48 @@ func imagesCmd() *cobra.Command { } func imagesListCmd() *cobra.Command { + var contextName string + var format string + var sortKey string + cmd := &cobra.Command{ - Use: "list ", - Short: "Print every container image referenced by a Kubernetes YAML stream", - Long: `Read a YAML stream and print every image reference found in any -PodSpec (Deployment, StatefulSet, DaemonSet, Job, CronJob, ReplicaSet, -Pod). Output is sorted, deduplicated, one ref per line — suitable for -piping to xargs or a downstream tool. + Use: "list [|-]", + Short: "Print images referenced by a YAML stream or stored in a cluster", + Long: `Two input modes; mutually exclusive. -Input source is a positional argument: +YAML mode (positional argument): read the file at path - read stdin + Prints every image reference found in any PodSpec + (Deployment, StatefulSet, DaemonSet, Job, CronJob, ReplicaSet, + Pod). Output is sorted, deduplicated, one ref per line — + suitable for piping to xargs or a downstream tool. + Pipe a kustomize build through it: + kubectl kustomize ./base | y-cluster images list - -To extract images from a kustomize tree, pipe the build through: - kubectl kustomize ./base | y-cluster images list - +Cluster mode (--context=): + Queries the cluster's containerd k8s.io namespace and prints + one row per stored manifest, sorted by descending compressed + size by default. Digest-aliases of the same manifest are + collapsed (no double-count). Use this to answer "what's + taking the space in this appliance qcow2". + Default output is a SIZE/IMAGE table; --format=json emits + [{ref, digest, size_bytes, size_human}]. --sort=name switches + to alphabetical. -Exit codes: 0 on success, 1 on YAML parse / I/O error, 2 on usage.`, - Args: cobra.ExactArgs(1), +Exit codes: 0 on success, 1 on YAML parse / I/O / cluster +error, 2 on usage.`, + Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { + if len(args) > 0 && contextName != "" { + return fmt.Errorf("--context is mutually exclusive with positional input") + } + if len(args) == 0 && contextName == "" { + return fmt.Errorf("specify a positional input (|-) or --context=") + } + if contextName != "" { + return runListFromCluster(cmd, contextName, format, sortKey) + } r, closer, err := openYAMLInput(args[0], cmd.InOrStdin()) if err != nil { return err @@ -65,9 +90,64 @@ Exit codes: 0 on success, 1 on YAML parse / I/O error, 2 on usage.`, return nil }, } + cmd.Flags().StringVar(&contextName, "context", "", "kubeconfig context — query the cluster's containerd (mutex with positional input)") + cmd.Flags().StringVar(&format, "format", "table", "cluster mode output format: table|json") + cmd.Flags().StringVar(&sortKey, "sort", "size", "cluster mode sort key: size (desc) | name (asc)") return cmd } +func runListFromCluster(cmd *cobra.Command, contextName, format, sortKey string) error { + ctx := cmd.Context() + lr, err := cluster.Lookup(ctx, "", contextName) + if err != nil { + return err + } + rows, err := images.ListFromCluster(ctx, lr) + if err != nil { + return err + } + switch sortKey { + case "", "size": + images.SortClusterImagesBySizeDesc(rows) + case "name": + images.SortClusterImagesByName(rows) + default: + return fmt.Errorf("--sort: unknown value %q (size|name)", sortKey) + } + out := cmd.OutOrStdout() + switch format { + case "", "table": + return writeClusterImagesTable(out, rows) + case "json": + enc := json.NewEncoder(out) + enc.SetIndent("", " ") + return enc.Encode(rows) + default: + return fmt.Errorf("--format: unknown value %q (table|json)", format) + } +} + +// writeClusterImagesTable renders one row per stored manifest +// as "SIZE IMAGE", with the SIZE column padded to the widest +// value so refs line up. Matches the spec's example output. +func writeClusterImagesTable(w io.Writer, rows []images.ClusterImage) error { + sizeWidth := len("SIZE") + for _, r := range rows { + if l := len(r.SizeHuman); l > sizeWidth { + sizeWidth = l + } + } + if _, err := fmt.Fprintf(w, "%-*s %s\n", sizeWidth, "SIZE", "IMAGE"); err != nil { + return err + } + for _, r := range rows { + if _, err := fmt.Fprintf(w, "%-*s %s\n", sizeWidth, r.SizeHuman, r.Ref); err != nil { + return err + } + } + return nil +} + func imagesCacheCmd() *cobra.Command { var cacheDir string diff --git a/cmd/y-cluster/images_test.go b/cmd/y-cluster/images_test.go index 812b6b2..dbc7725 100644 --- a/cmd/y-cluster/images_test.go +++ b/cmd/y-cluster/images_test.go @@ -68,6 +68,60 @@ func TestImagesListCmd_FileNotFound(t *testing.T) { } } +// TestImagesListCmd_PositionalAndContextMutex pins the mutex +// rule: a positional input and --context can't both be set, +// because they pick incompatible input sources (YAML stream vs +// containerd ground truth). +func TestImagesListCmd_PositionalAndContextMutex(t *testing.T) { + cmd := rootCmd() + cmd.SetArgs([]string{"images", "list", "--context=local", "/some/path.yaml"}) + err := cmd.Execute() + if err == nil { + t.Fatal("expected error for positional + --context combination") + } + if !strings.Contains(err.Error(), "mutually exclusive") { + t.Errorf("error should mention mutex: %v", err) + } +} + +// TestImagesListCmd_ContextUnknownPropagates: a --context that +// the kubeconfig doesn't know about should surface the cluster +// lookup error rather than swallowing it. +func TestImagesListCmd_ContextUnknownPropagates(t *testing.T) { + cmd := rootCmd() + cmd.SetArgs([]string{"images", "list", "--context=does-not-exist"}) + if err := cmd.Execute(); err == nil { + t.Fatal("expected cluster-lookup error for unknown --context") + } +} + +// TestImagesListCmd_BadFormat / _BadSort pin the validation +// of the cluster-mode formatting knobs. Errors should fire on +// the flag value, NOT on the unreachable cluster -- but a +// non-existent context happens to error first; we assert that +// the flag values themselves are at least accepted without a +// flag-parse error (cobra would error before our RunE runs). +func TestImagesListCmd_FlagsAccepted(t *testing.T) { + for _, args := range [][]string{ + {"images", "list", "--context=does-not-exist", "--format=table"}, + {"images", "list", "--context=does-not-exist", "--format=json"}, + {"images", "list", "--context=does-not-exist", "--sort=size"}, + {"images", "list", "--context=does-not-exist", "--sort=name"}, + } { + cmd := rootCmd() + cmd.SetArgs(args) + // We expect a cluster-lookup error, not a flag-parse error. + err := cmd.Execute() + if err == nil { + t.Errorf("%v: expected cluster-lookup error", args) + continue + } + if strings.Contains(err.Error(), "unknown flag") { + t.Errorf("%v: cobra rejected a flag we own: %v", args, err) + } + } +} + func TestImagesCacheCmd_RequiresRef(t *testing.T) { cmd := rootCmd() cmd.SetArgs([]string{"images", "cache"}) diff --git a/pkg/images/list_cluster.go b/pkg/images/list_cluster.go new file mode 100644 index 0000000..0e8c02c --- /dev/null +++ b/pkg/images/list_cluster.go @@ -0,0 +1,194 @@ +package images + +import ( + "bytes" + "context" + "fmt" + "sort" + "strconv" + "strings" + + "github.com/Yolean/y-cluster/pkg/cluster" +) + +// ClusterImage is one stored manifest in a cluster's containerd +// k8s.io namespace, after digest-alias collapse. Multiple +// (ref, digest) rows in `ctr image list` that share the same +// manifest digest fold into one ClusterImage with the most +// informative ref form selected. +type ClusterImage struct { + Ref string `json:"ref"` + Digest string `json:"digest"` + SizeBytes int64 `json:"size_bytes"` + SizeHuman string `json:"size_human"` +} + +// ListFromCluster queries the cluster's k8s.io containerd +// namespace and returns one row per stored manifest. ctr's +// tabular output is used over `--format json` because it's +// stable across the containerd versions y-cluster targets; +// the size column is parsed back from its IEC-formatted form. +func ListFromCluster(ctx context.Context, lr *cluster.LookupResult) ([]ClusterImage, error) { + var stdout, stderr bytes.Buffer + if err := cluster.RunCtr(ctx, lr, []string{"-n", "k8s.io", "image", "list"}, nil, &stdout, &stderr); err != nil { + return nil, fmt.Errorf("ctr image list: %s: %w", stderr.String(), err) + } + return parseClusterImageList(stdout.String()), nil +} + +// parseClusterImageList is the pure parser exposed for tests. +// Skips header, blank lines, and the bare `sha256:` +// config-digest rows ctr writes alongside the canonical rows. +// Collapses by digest: many refs sharing the same manifest +// digest fold into one ClusterImage, with the most informative +// ref form winning (preferred order in betterRef). +func parseClusterImageList(output string) []ClusterImage { + byDigest := map[string]*ClusterImage{} + for _, line := range strings.Split(output, "\n") { + fields := strings.Fields(line) + if len(fields) == 0 || fields[0] == "REF" { + continue + } + ref := fields[0] + if strings.HasPrefix(ref, "sha256:") { + continue + } + // Locate the digest by the sha256: prefix; the SIZE + // column lives two whitespace tokens after it (number, + // unit). Anchoring on the digest is robust to ctr column + // reorderings. + var digest string + sizeNumIdx := -1 + for i, f := range fields[1:] { + if strings.HasPrefix(f, "sha256:") && len(f) == 7+64 { + digest = f + sizeNumIdx = i + 1 + 1 + break + } + } + if digest == "" { + continue + } + var size int64 + if sizeNumIdx >= 0 && sizeNumIdx+1 < len(fields) { + size = parseHumanSize(fields[sizeNumIdx], fields[sizeNumIdx+1]) + } + if existing, ok := byDigest[digest]; ok { + if refRank(ref) > refRank(existing.Ref) { + existing.Ref = ref + } + if size > existing.SizeBytes { + existing.SizeBytes = size + existing.SizeHuman = formatHumanSize(size) + } + continue + } + byDigest[digest] = &ClusterImage{ + Ref: ref, + Digest: digest, + SizeBytes: size, + SizeHuman: formatHumanSize(size), + } + } + out := make([]ClusterImage, 0, len(byDigest)) + for _, v := range byDigest { + out = append(out, *v) + } + return out +} + +// refRank prefers refs that name more identifying detail: +// name:tag@digest > name@digest > name:tag > anything else. +// Used to pick the canonical ref when many rows fold into one. +func refRank(ref string) int { + hasAt := strings.Contains(ref, "@") + tail := ref + if slash := strings.LastIndex(ref, "/"); slash >= 0 { + tail = ref[slash+1:] + } + if at := strings.Index(tail, "@"); at >= 0 { + tail = tail[:at] + } + hasTag := strings.Contains(tail, ":") + rank := 0 + if hasAt { + rank += 2 + } + if hasTag { + rank++ + } + return rank +} + +// parseHumanSize converts ctr's IEC-formatted size column +// ("263.7 MiB", "1.5 GiB") to bytes. Tolerates "-" / "0" for +// unknown size and a small set of metric units in case +// containerd ever switches. +func parseHumanSize(num, unit string) int64 { + if num == "" || num == "-" { + return 0 + } + f, err := strconv.ParseFloat(num, 64) + if err != nil { + return 0 + } + var mul float64 + switch unit { + case "B", "": + mul = 1 + case "KiB": + mul = 1024 + case "MiB": + mul = 1024 * 1024 + case "GiB": + mul = 1024 * 1024 * 1024 + case "TiB": + mul = 1024 * 1024 * 1024 * 1024 + case "kB": + mul = 1000 + case "MB": + mul = 1000 * 1000 + case "GB": + mul = 1000 * 1000 * 1000 + case "TB": + mul = 1000 * 1000 * 1000 * 1000 + default: + return 0 + } + return int64(f * mul) +} + +// formatHumanSize renders a byte count in the same IEC shape +// ctr uses ("263.7 MiB"). Bytes pass through as " B". +func formatHumanSize(b int64) string { + if b < 1024 { + return fmt.Sprintf("%d B", b) + } + f := float64(b) + for _, u := range []string{"KiB", "MiB", "GiB", "TiB"} { + f /= 1024 + if f < 1024 { + return fmt.Sprintf("%.1f %s", f, u) + } + } + return fmt.Sprintf("%.1f PiB", f/1024) +} + +// SortClusterImagesBySizeDesc sorts in place by SizeBytes +// descending, ties broken by Ref ascending so the output is +// deterministic across runs of an unchanged cluster. +func SortClusterImagesBySizeDesc(rows []ClusterImage) { + sort.SliceStable(rows, func(i, j int) bool { + if rows[i].SizeBytes != rows[j].SizeBytes { + return rows[i].SizeBytes > rows[j].SizeBytes + } + return rows[i].Ref < rows[j].Ref + }) +} + +// SortClusterImagesByName sorts in place by Ref ascending. +func SortClusterImagesByName(rows []ClusterImage) { + sort.SliceStable(rows, func(i, j int) bool { + return rows[i].Ref < rows[j].Ref + }) +} diff --git a/pkg/images/list_cluster_test.go b/pkg/images/list_cluster_test.go new file mode 100644 index 0000000..86b6dbf --- /dev/null +++ b/pkg/images/list_cluster_test.go @@ -0,0 +1,147 @@ +package images + +import ( + "strings" + "testing" +) + +// TestParseClusterImageList_CollapsesByDigest pins the +// digest-collapse policy on a representative `ctr image list` +// capture: the same image manifest appears under tag, digest, +// and the alias the loader writes back. The parser must fold +// all three into one row with the most-informative ref form. +// The bare `sha256:` config-digest row must NOT show up. +func TestParseClusterImageList_CollapsesByDigest(t *testing.T) { + manifest := "sha256:" + strings.Repeat("a", 64) + config := "sha256:" + strings.Repeat("b", 64) + in := strings.Join([]string{ + "REF\tTYPE\tDIGEST\tSIZE\tPLATFORMS\tLABELS", + "ghcr.io/yolean/headless-chrome:abc123\tapplication/vnd.oci.image.index.v1+json\t" + manifest + "\t553.0 MiB\tlinux/amd64\t-", + "ghcr.io/yolean/headless-chrome@" + manifest + "\tapplication/vnd.oci.image.index.v1+json\t" + manifest + "\t553.0 MiB\tlinux/amd64\t-", + "ghcr.io/yolean/headless-chrome:abc123@" + manifest + "\tapplication/vnd.oci.image.index.v1+json\t" + manifest + "\t553.0 MiB\tlinux/amd64\t-", + config + "\tapplication/vnd.docker.distribution.manifest.v2+json\t" + manifest + "\t553.0 MiB\tlinux/amd64\t-", + }, "\n") + "\n" + + got := parseClusterImageList(in) + if len(got) != 1 { + t.Fatalf("expected 1 collapsed row, got %d: %+v", len(got), got) + } + want := "ghcr.io/yolean/headless-chrome:abc123@" + manifest + if got[0].Ref != want { + t.Errorf("canonical ref: got %q, want %q (most-informative form should win)", got[0].Ref, want) + } + if got[0].Digest != manifest { + t.Errorf("digest: got %q, want %q", got[0].Digest, manifest) + } + if got[0].SizeBytes != 553*1024*1024 { + t.Errorf("size: got %d bytes, want %d (553 MiB)", got[0].SizeBytes, 553*1024*1024) + } +} + +// TestParseClusterImageList_MultipleImages confirms the +// happy path where each image has its own digest. +func TestParseClusterImageList_MultipleImages(t *testing.T) { + in := "REF\tTYPE\tDIGEST\tSIZE\tPLATFORMS\tLABELS\n" + + "ghcr.io/foo/a:v1\tx\tsha256:1111111111111111111111111111111111111111111111111111111111111111\t10.0 MiB\tlinux/amd64\t-\n" + + "ghcr.io/foo/b:v1\tx\tsha256:2222222222222222222222222222222222222222222222222222222222222222\t2.0 GiB\tlinux/amd64\t-\n" + got := parseClusterImageList(in) + if len(got) != 2 { + t.Fatalf("expected 2 rows, got %d", len(got)) + } +} + +// TestParseClusterImageList_SkipsConfigDigestRows pins the +// regression guard from ISSUE_IMAGES_LOAD_MANGLES_CONFIG_DIGEST_REFS: +// the bare `sha256:` row must not surface in the listing. +func TestParseClusterImageList_SkipsConfigDigestRows(t *testing.T) { + in := "REF\tTYPE\tDIGEST\tSIZE\tPLATFORMS\tLABELS\n" + + "sha256:dc863b8391abb7c2d3e4f5a6b7c8d9e0f1a2b3c4d5e6f70819253647586978a9\tx\tsha256:1111111111111111111111111111111111111111111111111111111111111111\t10.0 MiB\tlinux/amd64\t-\n" + got := parseClusterImageList(in) + if len(got) != 0 { + t.Fatalf("expected 0 rows (config-digest filtered), got %d: %+v", len(got), got) + } +} + +func TestRefRank(t *testing.T) { + cases := []struct { + ref string + wantHigh int + }{ + // tag+digest beats digest beats tag beats neither + {"ghcr.io/foo/bar:v1@sha256:abc", 3}, + {"ghcr.io/foo/bar@sha256:abc", 2}, + {"ghcr.io/foo/bar:v1", 1}, + {"ghcr.io/foo/bar", 0}, + // hostport ":port" before the last slash should NOT count as a tag + {"registry.example:5000/foo/bar", 0}, + {"registry.example:5000/foo/bar:v1", 1}, + {"registry.example:5000/foo/bar:v1@sha256:abc", 3}, + } + for _, c := range cases { + if got := refRank(c.ref); got != c.wantHigh { + t.Errorf("refRank(%q) = %d, want %d", c.ref, got, c.wantHigh) + } + } +} + +func TestParseHumanSize(t *testing.T) { + mib := func(f float64) int64 { return int64(f * float64(1024*1024)) } + gib := func(f float64) int64 { return int64(f * float64(1024*1024*1024)) } + cases := []struct { + num, unit string + want int64 + }{ + {"0", "B", 0}, + {"1024", "B", 1024}, + {"1.0", "KiB", 1024}, + {"263.7", "MiB", mib(263.7)}, + {"1.5", "GiB", gib(1.5)}, + {"-", "", 0}, + {"", "", 0}, + {"nope", "MiB", 0}, + {"1", "??", 0}, + } + for _, c := range cases { + if got := parseHumanSize(c.num, c.unit); got != c.want { + t.Errorf("parseHumanSize(%q, %q) = %d, want %d", c.num, c.unit, got, c.want) + } + } +} + +func TestFormatHumanSize(t *testing.T) { + cases := []struct { + in int64 + want string + }{ + {0, "0 B"}, + {1023, "1023 B"}, + {1024, "1.0 KiB"}, + {1024 * 1024, "1.0 MiB"}, + {int64(553.0 * 1024 * 1024), "553.0 MiB"}, + {1024 * 1024 * 1024, "1.0 GiB"}, + } + for _, c := range cases { + if got := formatHumanSize(c.in); got != c.want { + t.Errorf("formatHumanSize(%d) = %q, want %q", c.in, got, c.want) + } + } +} + +// TestSortClusterImagesBySizeDesc pins the deterministic +// ordering: size descending, ref ascending as the tiebreaker +// so an unchanged cluster always prints the same lines. +func TestSortClusterImagesBySizeDesc(t *testing.T) { + rows := []ClusterImage{ + {Ref: "small", SizeBytes: 1}, + {Ref: "z-mid", SizeBytes: 100}, + {Ref: "a-mid", SizeBytes: 100}, + {Ref: "big", SizeBytes: 1000}, + } + SortClusterImagesBySizeDesc(rows) + want := []string{"big", "a-mid", "z-mid", "small"} + for i, w := range want { + if rows[i].Ref != w { + t.Errorf("row %d: got %q, want %q", i, rows[i].Ref, w) + } + } +}