|
| 1 | +--- |
| 2 | +title: Custom Sampling |
| 3 | +weight: 5 |
| 4 | +bookToc: true |
| 5 | +--- |
| 6 | + |
| 7 | +# Custom Sampling Parameters |
| 8 | + |
| 9 | +Demonstrate how temperature, top-K, top-P, and repetition penalty affect text generation. The program generates the same prompt three times with different sampling configurations so you can compare the outputs. |
| 10 | + |
| 11 | +## Usage |
| 12 | + |
| 13 | +```bash |
| 14 | +go run ./docs/cookbook/05-custom-sampling/ --model path/to/model.gguf |
| 15 | +``` |
| 16 | + |
| 17 | +## Full Code |
| 18 | + |
| 19 | +```go |
| 20 | +// Recipe 05: Custom Sampling Parameters |
| 21 | +// |
| 22 | +// Demonstrate how temperature, top-K, top-P, and repetition penalty affect |
| 23 | +// text generation. The program generates the same prompt three times with |
| 24 | +// different sampling configurations so you can compare the outputs. |
| 25 | +// |
| 26 | +// Usage: |
| 27 | +// |
| 28 | +// go run ./docs/cookbook/05-custom-sampling/ --model path/to/model.gguf |
| 29 | +package main |
| 30 | + |
| 31 | +import ( |
| 32 | + "context" |
| 33 | + "flag" |
| 34 | + "fmt" |
| 35 | + "os" |
| 36 | + |
| 37 | + "github.com/zerfoo/zerfoo/inference" |
| 38 | +) |
| 39 | + |
| 40 | +func main() { |
| 41 | + modelPath := flag.String("model", "", "path to GGUF model file") |
| 42 | + device := flag.String("device", "cpu", `compute device: "cpu", "cuda"`) |
| 43 | + prompt := flag.String("prompt", "Write a haiku about concurrency.", "generation prompt") |
| 44 | + flag.Parse() |
| 45 | + |
| 46 | + if *modelPath == "" { |
| 47 | + fmt.Fprintln(os.Stderr, "usage: custom-sampling --model <model.gguf>") |
| 48 | + os.Exit(1) |
| 49 | + } |
| 50 | + |
| 51 | + model, err := inference.LoadFile(*modelPath, inference.WithDevice(*device)) |
| 52 | + if err != nil { |
| 53 | + fmt.Fprintf(os.Stderr, "load: %v\n", err) |
| 54 | + os.Exit(1) |
| 55 | + } |
| 56 | + defer model.Close() |
| 57 | + |
| 58 | + ctx := context.Background() |
| 59 | + |
| 60 | + // Configuration 1: Greedy decoding (temperature=0). |
| 61 | + // Deterministic output -- always picks the highest-probability token. |
| 62 | + fmt.Println("=== Greedy (temperature=0) ===") |
| 63 | + text, err := model.Generate(ctx, *prompt, |
| 64 | + inference.WithMaxTokens(64), |
| 65 | + inference.WithTemperature(0), |
| 66 | + ) |
| 67 | + if err != nil { |
| 68 | + fmt.Fprintf(os.Stderr, "generate: %v\n", err) |
| 69 | + os.Exit(1) |
| 70 | + } |
| 71 | + fmt.Println(text) |
| 72 | + |
| 73 | + // Configuration 2: Creative (high temperature + top-P nucleus sampling). |
| 74 | + // Produces more varied, surprising output. |
| 75 | + fmt.Println("\n=== Creative (temp=1.2, top-P=0.9) ===") |
| 76 | + text, err = model.Generate(ctx, *prompt, |
| 77 | + inference.WithMaxTokens(64), |
| 78 | + inference.WithTemperature(1.2), |
| 79 | + inference.WithTopP(0.9), |
| 80 | + ) |
| 81 | + if err != nil { |
| 82 | + fmt.Fprintf(os.Stderr, "generate: %v\n", err) |
| 83 | + os.Exit(1) |
| 84 | + } |
| 85 | + fmt.Println(text) |
| 86 | + |
| 87 | + // Configuration 3: Focused (low temperature + top-K). |
| 88 | + // Picks from a narrow set of likely tokens for coherent output. |
| 89 | + fmt.Println("\n=== Focused (temp=0.3, top-K=10) ===") |
| 90 | + text, err = model.Generate(ctx, *prompt, |
| 91 | + inference.WithMaxTokens(64), |
| 92 | + inference.WithTemperature(0.3), |
| 93 | + inference.WithTopK(10), |
| 94 | + ) |
| 95 | + if err != nil { |
| 96 | + fmt.Fprintf(os.Stderr, "generate: %v\n", err) |
| 97 | + os.Exit(1) |
| 98 | + } |
| 99 | + fmt.Println(text) |
| 100 | +} |
| 101 | +``` |
| 102 | + |
| 103 | +## How It Works |
| 104 | + |
| 105 | +The recipe runs three generation passes with the same prompt but different sampling strategies: |
| 106 | + |
| 107 | +| Configuration | Settings | Behavior | |
| 108 | +|---------------|----------|----------| |
| 109 | +| **Greedy** | `temperature=0` | Deterministic -- always picks the highest-probability token. Produces the most predictable output. | |
| 110 | +| **Creative** | `temperature=1.2, top-P=0.9` | High temperature flattens the probability distribution, making unlikely tokens more probable. Top-P (nucleus sampling) truncates the distribution to the smallest set of tokens whose cumulative probability exceeds 0.9. | |
| 111 | +| **Focused** | `temperature=0.3, top-K=10` | Low temperature sharpens the distribution toward high-probability tokens. Top-K limits selection to the 10 most likely tokens. Produces coherent, on-topic output. | |
| 112 | + |
| 113 | +## See Also |
| 114 | + |
| 115 | +- [Basic Text Generation](/docs/cookbooks/basic-text-generation) -- simple generation with default sampling |
| 116 | +- [Structured JSON Output](/docs/cookbooks/structured-json-output) -- constrain output format with grammar-guided decoding |
0 commit comments