Skip to content

Commit 66292e4

Browse files
committed
docs(cookbooks): add cookbooks 01-06
1 parent c8b04c0 commit 66292e4

6 files changed

Lines changed: 708 additions & 0 deletions

File tree

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
---
2+
title: Basic Text Generation
3+
weight: 1
4+
bookToc: true
5+
---
6+
7+
# Basic Text Generation
8+
9+
Load a GGUF model and generate a text completion with a single function call. This is the simplest way to run inference with Zerfoo.
10+
11+
## Usage
12+
13+
```bash
14+
go run ./docs/cookbook/01-basic-text-generation/ --model path/to/model.gguf
15+
go run ./docs/cookbook/01-basic-text-generation/ --model google/gemma-3-1b
16+
```
17+
18+
## Full Code
19+
20+
```go
21+
// Recipe 01: Basic Text Generation
22+
//
23+
// Load a GGUF model and generate a text completion with a single function call.
24+
// This is the simplest way to run inference with Zerfoo.
25+
//
26+
// Usage:
27+
//
28+
// go run ./docs/cookbook/01-basic-text-generation/ --model path/to/model.gguf
29+
// go run ./docs/cookbook/01-basic-text-generation/ --model google/gemma-3-1b
30+
package main
31+
32+
import (
33+
"context"
34+
"flag"
35+
"fmt"
36+
"os"
37+
38+
"github.com/zerfoo/zerfoo"
39+
)
40+
41+
func main() {
42+
modelPath := flag.String("model", "", "path to GGUF model file or HuggingFace model ID")
43+
prompt := flag.String("prompt", "Explain goroutines in one paragraph.", "generation prompt")
44+
flag.Parse()
45+
46+
if *modelPath == "" {
47+
fmt.Fprintln(os.Stderr, "usage: basic-text-generation --model <path-or-id> [--prompt <text>]")
48+
os.Exit(1)
49+
}
50+
51+
// Load the model. Accepts a local GGUF path or a HuggingFace model ID
52+
// like "google/gemma-3-1b". Remote models are downloaded and cached.
53+
m, err := zerfoo.Load(*modelPath)
54+
if err != nil {
55+
fmt.Fprintf(os.Stderr, "load: %v\n", err)
56+
os.Exit(1)
57+
}
58+
defer m.Close()
59+
60+
// Generate a completion. The result includes the generated text,
61+
// token count, and wall-clock duration.
62+
result, err := m.Generate(context.Background(), *prompt,
63+
zerfoo.WithGenMaxTokens(256),
64+
zerfoo.WithGenTemperature(0.7),
65+
)
66+
if err != nil {
67+
fmt.Fprintf(os.Stderr, "generate: %v\n", err)
68+
os.Exit(1)
69+
}
70+
71+
fmt.Println(result.Text)
72+
fmt.Fprintf(os.Stderr, "\n[%d tokens in %s]\n", result.TokenCount, result.Duration)
73+
}
74+
```
75+
76+
## How It Works
77+
78+
1. **Model loading** -- `zerfoo.Load` accepts either a local GGUF file path or a HuggingFace model ID (e.g. `"google/gemma-3-1b"`). Remote models are downloaded and cached automatically.
79+
2. **Generation** -- `m.Generate` runs autoregressive decoding with the given prompt. The `WithGenMaxTokens` and `WithGenTemperature` options control output length and sampling randomness.
80+
3. **Result** -- The returned `result` contains `Text` (the generated string), `TokenCount`, and `Duration` for performance tracking.
81+
82+
## See Also
83+
84+
- [Quick Start](/docs/getting-started/quickstart) -- minimal setup guide
85+
- [Streaming Chat](/docs/cookbooks/streaming-chat) -- stream tokens as they are generated
86+
- [Custom Sampling](/docs/cookbooks/custom-sampling) -- explore temperature, top-K, and top-P
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
---
2+
title: Custom Sampling
3+
weight: 5
4+
bookToc: true
5+
---
6+
7+
# Custom Sampling Parameters
8+
9+
Demonstrate how temperature, top-K, top-P, and repetition penalty affect text generation. The program generates the same prompt three times with different sampling configurations so you can compare the outputs.
10+
11+
## Usage
12+
13+
```bash
14+
go run ./docs/cookbook/05-custom-sampling/ --model path/to/model.gguf
15+
```
16+
17+
## Full Code
18+
19+
```go
20+
// Recipe 05: Custom Sampling Parameters
21+
//
22+
// Demonstrate how temperature, top-K, top-P, and repetition penalty affect
23+
// text generation. The program generates the same prompt three times with
24+
// different sampling configurations so you can compare the outputs.
25+
//
26+
// Usage:
27+
//
28+
// go run ./docs/cookbook/05-custom-sampling/ --model path/to/model.gguf
29+
package main
30+
31+
import (
32+
"context"
33+
"flag"
34+
"fmt"
35+
"os"
36+
37+
"github.com/zerfoo/zerfoo/inference"
38+
)
39+
40+
func main() {
41+
modelPath := flag.String("model", "", "path to GGUF model file")
42+
device := flag.String("device", "cpu", `compute device: "cpu", "cuda"`)
43+
prompt := flag.String("prompt", "Write a haiku about concurrency.", "generation prompt")
44+
flag.Parse()
45+
46+
if *modelPath == "" {
47+
fmt.Fprintln(os.Stderr, "usage: custom-sampling --model <model.gguf>")
48+
os.Exit(1)
49+
}
50+
51+
model, err := inference.LoadFile(*modelPath, inference.WithDevice(*device))
52+
if err != nil {
53+
fmt.Fprintf(os.Stderr, "load: %v\n", err)
54+
os.Exit(1)
55+
}
56+
defer model.Close()
57+
58+
ctx := context.Background()
59+
60+
// Configuration 1: Greedy decoding (temperature=0).
61+
// Deterministic output -- always picks the highest-probability token.
62+
fmt.Println("=== Greedy (temperature=0) ===")
63+
text, err := model.Generate(ctx, *prompt,
64+
inference.WithMaxTokens(64),
65+
inference.WithTemperature(0),
66+
)
67+
if err != nil {
68+
fmt.Fprintf(os.Stderr, "generate: %v\n", err)
69+
os.Exit(1)
70+
}
71+
fmt.Println(text)
72+
73+
// Configuration 2: Creative (high temperature + top-P nucleus sampling).
74+
// Produces more varied, surprising output.
75+
fmt.Println("\n=== Creative (temp=1.2, top-P=0.9) ===")
76+
text, err = model.Generate(ctx, *prompt,
77+
inference.WithMaxTokens(64),
78+
inference.WithTemperature(1.2),
79+
inference.WithTopP(0.9),
80+
)
81+
if err != nil {
82+
fmt.Fprintf(os.Stderr, "generate: %v\n", err)
83+
os.Exit(1)
84+
}
85+
fmt.Println(text)
86+
87+
// Configuration 3: Focused (low temperature + top-K).
88+
// Picks from a narrow set of likely tokens for coherent output.
89+
fmt.Println("\n=== Focused (temp=0.3, top-K=10) ===")
90+
text, err = model.Generate(ctx, *prompt,
91+
inference.WithMaxTokens(64),
92+
inference.WithTemperature(0.3),
93+
inference.WithTopK(10),
94+
)
95+
if err != nil {
96+
fmt.Fprintf(os.Stderr, "generate: %v\n", err)
97+
os.Exit(1)
98+
}
99+
fmt.Println(text)
100+
}
101+
```
102+
103+
## How It Works
104+
105+
The recipe runs three generation passes with the same prompt but different sampling strategies:
106+
107+
| Configuration | Settings | Behavior |
108+
|---------------|----------|----------|
109+
| **Greedy** | `temperature=0` | Deterministic -- always picks the highest-probability token. Produces the most predictable output. |
110+
| **Creative** | `temperature=1.2, top-P=0.9` | High temperature flattens the probability distribution, making unlikely tokens more probable. Top-P (nucleus sampling) truncates the distribution to the smallest set of tokens whose cumulative probability exceeds 0.9. |
111+
| **Focused** | `temperature=0.3, top-K=10` | Low temperature sharpens the distribution toward high-probability tokens. Top-K limits selection to the 10 most likely tokens. Produces coherent, on-topic output. |
112+
113+
## See Also
114+
115+
- [Basic Text Generation](/docs/cookbooks/basic-text-generation) -- simple generation with default sampling
116+
- [Structured JSON Output](/docs/cookbooks/structured-json-output) -- constrain output format with grammar-guided decoding
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
---
2+
title: Embedding Similarity
3+
weight: 3
4+
bookToc: true
5+
---
6+
7+
# Embedding and Cosine Similarity
8+
9+
Compute text embeddings and rank a corpus of documents by relevance to a query using cosine similarity. This is the retrieval building block for semantic search and RAG systems.
10+
11+
## Usage
12+
13+
```bash
14+
go run ./docs/cookbook/03-embedding-similarity/ --model path/to/model.gguf
15+
go run ./docs/cookbook/03-embedding-similarity/ --model path/to/model.gguf --query "memory management"
16+
```
17+
18+
## Full Code
19+
20+
```go
21+
// Recipe 03: Embedding and Cosine Similarity
22+
//
23+
// Compute text embeddings and rank a corpus of documents by relevance to a
24+
// query using cosine similarity. This is the retrieval building block for
25+
// semantic search and RAG systems.
26+
//
27+
// Usage:
28+
//
29+
// go run ./docs/cookbook/03-embedding-similarity/ --model path/to/model.gguf
30+
// go run ./docs/cookbook/03-embedding-similarity/ --model path/to/model.gguf --query "memory management"
31+
package main
32+
33+
import (
34+
"flag"
35+
"fmt"
36+
"os"
37+
"sort"
38+
39+
"github.com/zerfoo/zerfoo"
40+
)
41+
42+
// corpus is a small document set for demonstration.
43+
var corpus = []string{
44+
"Go's garbage collector is a concurrent, tri-color, mark-sweep collector.",
45+
"Goroutines are multiplexed onto OS threads by the Go runtime scheduler.",
46+
"The sync.Mutex type provides mutual exclusion for shared state.",
47+
"Go modules use go.mod and go.sum to manage versioned dependencies.",
48+
"Channels are the primary mechanism for goroutine communication.",
49+
"The context package carries deadlines and cancellation signals.",
50+
"Go interfaces are satisfied implicitly without an implements keyword.",
51+
"The testing package supports automated unit and benchmark tests.",
52+
}
53+
54+
func main() {
55+
modelPath := flag.String("model", "", "path to GGUF model file or HuggingFace model ID")
56+
query := flag.String("query", "How does Go handle concurrency?", "search query")
57+
topN := flag.Int("top", 3, "number of results to display")
58+
flag.Parse()
59+
60+
if *modelPath == "" {
61+
fmt.Fprintln(os.Stderr, "usage: embedding-similarity --model <path> [--query <text>]")
62+
os.Exit(1)
63+
}
64+
65+
m, err := zerfoo.Load(*modelPath)
66+
if err != nil {
67+
fmt.Fprintf(os.Stderr, "load: %v\n", err)
68+
os.Exit(1)
69+
}
70+
defer m.Close()
71+
72+
// Embed all documents in the corpus.
73+
corpusEmbeds, err := m.Embed(corpus)
74+
if err != nil {
75+
fmt.Fprintf(os.Stderr, "embed corpus: %v\n", err)
76+
os.Exit(1)
77+
}
78+
79+
// Embed the query.
80+
queryEmbeds, err := m.Embed([]string{*query})
81+
if err != nil {
82+
fmt.Fprintf(os.Stderr, "embed query: %v\n", err)
83+
os.Exit(1)
84+
}
85+
qe := queryEmbeds[0]
86+
87+
// Rank documents by cosine similarity.
88+
type result struct {
89+
doc string
90+
score float32
91+
}
92+
results := make([]result, len(corpus))
93+
for i, emb := range corpusEmbeds {
94+
results[i] = result{corpus[i], qe.CosineSimilarity(emb)}
95+
}
96+
sort.Slice(results, func(i, j int) bool { return results[i].score > results[j].score })
97+
98+
fmt.Printf("Query: %q\n\n", *query)
99+
n := *topN
100+
if n > len(results) {
101+
n = len(results)
102+
}
103+
for i := 0; i < n; i++ {
104+
fmt.Printf(" %d. [%.4f] %s\n", i+1, results[i].score, results[i].doc)
105+
}
106+
}
107+
```
108+
109+
## How It Works
110+
111+
1. **Embedding** -- `m.Embed` takes a slice of strings and returns a slice of embedding vectors. Each vector captures the semantic meaning of the input text.
112+
2. **Cosine similarity** -- The `CosineSimilarity` method computes the cosine of the angle between two embedding vectors. Values closer to 1.0 indicate higher semantic similarity.
113+
3. **Ranking** -- Documents are sorted by their similarity score to the query, and the top N results are displayed.
114+
115+
This pattern is the foundation for retrieval-augmented generation (RAG): embed your document corpus, embed the user query, retrieve the most relevant documents, and pass them as context to the generation step.
116+
117+
## See Also
118+
119+
- [Basic Text Generation](/docs/cookbooks/basic-text-generation) -- generate text completions
120+
- [OpenAI Server](/docs/cookbooks/openai-server) -- the `/v1/embeddings` endpoint serves embeddings over HTTP

0 commit comments

Comments
 (0)