Skip to content

Commit 7291598

Browse files
author
simuleite
committed
Keep Sonic Same With LLM Tool
1 parent 3792bab commit 7291598

3 files changed

Lines changed: 258 additions & 131 deletions

File tree

internal/cmd/cli/extract_symbol.go

Lines changed: 167 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -18,65 +18,113 @@ import (
1818
"encoding/json"
1919
"fmt"
2020
"os"
21-
"path/filepath"
21+
"strings"
2222

2323
"github.com/bytedance/sonic"
2424
"github.com/cloudwego/abcoder/lang/utils"
25+
"github.com/cloudwego/abcoder/lang/uniast"
2526
"github.com/spf13/cobra"
2627
)
2728

28-
const indexDir = ".index"
29+
// buildNameToLocations 从 JSON 数据构建 NameToLocations
30+
// 如果 pathFilter 不为空,则只收集匹配前缀的 file
31+
// 返回: name -> type -> fileSet (去重)
32+
func buildNameToLocations(data []byte, pathFilter string) (map[string]map[string]map[string]bool, error) {
33+
// 一次性反序列化整个 Modules
34+
var result struct {
35+
Modules map[string]*uniast.Module `json:"modules"`
36+
}
37+
if err := sonic.Unmarshal(data, &result); err != nil {
38+
return nil, err
39+
}
2940

30-
type SymbolIndex struct {
31-
Mtime int64 `json:"mtime"`
32-
Data map[string][]NameMatch `json:"data"` // name -> []NameMatch
33-
}
41+
// name -> type -> files (去重)
42+
nameToTypeFiles := make(map[string]map[string]map[string]bool)
3443

35-
type NameMatch struct {
36-
File string `json:"file"`
37-
Type string `json:"type"`
38-
}
44+
// 遍历所有模块
45+
for _, mod := range result.Modules {
46+
// 跳过外部模块
47+
if mod.IsExternal() {
48+
continue
49+
}
3950

40-
// saveSymbolIndex 保存符号索引到 ~/.asts/.index/{repo}.idx
41-
func saveSymbolIndex(astsDir, repoName, repoFile string, data map[string][]NameMatch) error {
42-
// 获取 repo 文件的 mtime
43-
info, err := os.Stat(repoFile)
44-
if err != nil {
45-
return fmt.Errorf("stat repo file: %w", err)
46-
}
47-
mtime := info.ModTime().UnixMilli()
48-
49-
// 检查现有索引
50-
idxPath := filepath.Join(astsDir, indexDir, repoName+".idx")
51-
if _, err := os.Stat(idxPath); err == nil {
52-
// 读取现有索引的 mtime
53-
if oldData, err := os.ReadFile(idxPath); err == nil {
54-
var oldIdx SymbolIndex
55-
if json.Unmarshal(oldData, &oldIdx) == nil && oldIdx.Mtime == mtime {
56-
return nil // mtime 一致,无需更新
51+
// 遍历所有包
52+
for _, pkg := range mod.Packages {
53+
// 提取 Functions
54+
for name, fn := range pkg.Functions {
55+
if pathFilter != "" && !strings.HasPrefix(fn.File, pathFilter) {
56+
continue
57+
}
58+
if nameToTypeFiles[name] == nil {
59+
nameToTypeFiles[name] = make(map[string]map[string]bool)
60+
}
61+
if nameToTypeFiles[name]["FUNC"] == nil {
62+
nameToTypeFiles[name]["FUNC"] = make(map[string]bool)
63+
}
64+
nameToTypeFiles[name]["FUNC"][fn.File] = true
65+
}
66+
67+
// 提取 Types
68+
for name, typ := range pkg.Types {
69+
if pathFilter != "" && !strings.HasPrefix(typ.FileLine.File, pathFilter) {
70+
continue
71+
}
72+
if nameToTypeFiles[name] == nil {
73+
nameToTypeFiles[name] = make(map[string]map[string]bool)
74+
}
75+
if nameToTypeFiles[name]["TYPE"] == nil {
76+
nameToTypeFiles[name]["TYPE"] = make(map[string]bool)
77+
}
78+
nameToTypeFiles[name]["TYPE"][typ.FileLine.File] = true
79+
}
80+
81+
// 提取 Vars
82+
for name, v := range pkg.Vars {
83+
if pathFilter != "" && !strings.HasPrefix(v.FileLine.File, pathFilter) {
84+
continue
85+
}
86+
if nameToTypeFiles[name] == nil {
87+
nameToTypeFiles[name] = make(map[string]map[string]bool)
88+
}
89+
if nameToTypeFiles[name]["VAR"] == nil {
90+
nameToTypeFiles[name]["VAR"] = make(map[string]bool)
91+
}
92+
nameToTypeFiles[name]["VAR"][v.FileLine.File] = true
5793
}
5894
}
5995
}
6096

61-
// 创建索引
62-
idx := SymbolIndex{
63-
Mtime: mtime,
64-
Data: data,
65-
}
97+
return nameToTypeFiles, nil
98+
}
6699

67-
// 写入 .tmp 再 rename
68-
idxPathTmp := idxPath + ".tmp"
69-
b, err := json.Marshal(idx)
100+
// saveNameToLocations 写回 NameToLocations 到 JSON 文件
101+
func saveNameToLocations(repoFile string, nameToLocs map[string][]string) error {
102+
data, err := os.ReadFile(repoFile)
70103
if err != nil {
71-
return fmt.Errorf("marshal index: %w", err)
104+
return err
72105
}
73-
if err := utils.MustWriteFile(idxPathTmp, b); err != nil {
74-
return fmt.Errorf("write index: %w", err)
106+
107+
// 使用标准库 JSON 反序列化
108+
var result map[string]interface{}
109+
if err := json.Unmarshal(data, &result); err != nil {
110+
return err
75111
}
76-
if err := os.Rename(idxPathTmp, idxPath); err != nil {
77-
return fmt.Errorf("rename index: %w", err)
112+
113+
// 添加 NameToLocations
114+
result["NameToLocations"] = nameToLocs
115+
116+
// 重新Marshal(保持缩进格式)
117+
prettyJSON, err := json.MarshalIndent(result, "", " ")
118+
if err != nil {
119+
return err
78120
}
79-
return nil
121+
122+
// 写入 .tmp 再 rename
123+
tmpPath := repoFile + ".tmp"
124+
if err := utils.MustWriteFile(tmpPath, prettyJSON); err != nil {
125+
return err
126+
}
127+
return os.Rename(tmpPath, repoFile)
80128
}
81129

82130
type Symbol struct {
@@ -116,86 +164,100 @@ Only extracts filepath + name (no content), for use with search_node.`,
116164
return fmt.Errorf("failed to read repo file: %w", err)
117165
}
118166

119-
// 获取所有 mod keys(只遍历 keys)
120-
modKeys, err := getModuleKeys(data)
121-
if err != nil {
122-
return err
123-
}
124-
125-
var files = make(map[string]map[string][]string)
126-
var indexData = make(map[string][]NameMatch)
127-
128-
// 遍历所有模块
129-
for _, modPath := range modKeys {
130-
// 跳过外部模块
131-
isExtVal, _ := sonic.Get(data, "Modules", modPath, "IsExternal")
132-
if isExt, _ := isExtVal.Bool(); isExt {
133-
continue
167+
// 方式1: 优先用 sonic 读取 NameToLocations
168+
nameToLocsVal, err := sonic.Get(data, "NameToLocations")
169+
if err == nil && nameToLocsVal.Exists() {
170+
if verbose {
171+
fmt.Fprintf(os.Stderr, "[VERBOSE] using existing NameToLocations\n")
134172
}
135173

136-
// 获取所有 package keys(只遍历 keys)
137-
pkgKeys, err := getPackageKeys(data, modPath)
138-
if err != nil {
139-
continue
140-
}
174+
// 获取所有 name keys
175+
nameToLocsMap, _ := nameToLocsVal.Map()
141176

142-
// 遍历所有包
143-
for _, pkgPath := range pkgKeys {
144-
// 提取 Functions: 只读取 Name + File(极致按需)
145-
if results, err := iterSymbolNameFile(data, modPath, pkgPath, "Functions"); err == nil {
146-
for _, r := range results {
147-
name, file := r[0], r[1]
148-
if files[file] == nil {
149-
files[file] = map[string][]string{
150-
"FUNC": {},
151-
"TYPE": {},
152-
"VAR": {},
177+
// 转换为输出格式: file -> type -> names
178+
files := make(map[string]map[string][]string)
179+
for name := range nameToLocsMap {
180+
filesVal, _ := sonic.Get(data, "NameToLocations", name, "Files")
181+
if filesVal.Exists() {
182+
fileList, err := filesVal.Array()
183+
if err == nil {
184+
for _, f := range fileList {
185+
fileStr, _ := f.(string)
186+
if files[fileStr] == nil {
187+
files[fileStr] = map[string][]string{
188+
"FUNC": {},
189+
"TYPE": {},
190+
"VAR": {},
191+
}
153192
}
193+
// NameToLocations 不区分类型,都归为 FUNC
194+
files[fileStr]["FUNC"] = append(files[fileStr]["FUNC"], name)
154195
}
155-
files[file]["FUNC"] = append(files[file]["FUNC"], name)
156-
indexData[name] = append(indexData[name], NameMatch{File: file, Type: "FUNC"})
157196
}
158197
}
198+
}
159199

160-
// 提取 Types
161-
if results, err := iterSymbolNameFile(data, modPath, pkgPath, "Types"); err == nil {
162-
for _, r := range results {
163-
name, file := r[0], r[1]
164-
if files[file] == nil {
165-
files[file] = map[string][]string{
166-
"FUNC": {},
167-
"TYPE": {},
168-
"VAR": {},
169-
}
170-
}
171-
files[file]["TYPE"] = append(files[file]["TYPE"], name)
172-
indexData[name] = append(indexData[name], NameMatch{File: file, Type: "TYPE"})
173-
}
200+
result := ExtractResult{
201+
RepoName: repoName,
202+
Files: files,
203+
}
204+
b, _ := json.MarshalIndent(result, "", " ")
205+
fmt.Fprintf(os.Stdout, "%s\n", b)
206+
return nil
207+
}
208+
209+
// 方式2: 没有 NameToLocations,遍历提取并写回 JSON
210+
if verbose {
211+
fmt.Fprintf(os.Stderr, "[VERBOSE] building NameToLocations\n")
212+
}
213+
214+
// 使用公共函数构建
215+
nameToTypeFiles, err := buildNameToLocations(data, "")
216+
if err != nil {
217+
return err
218+
}
219+
220+
// 转换为 NameToLocations 格式: name -> []file
221+
// 拍平 type,只保留 files
222+
nameToLocsMap := make(map[string][]string)
223+
for name, typeFiles := range nameToTypeFiles {
224+
fileSet := make(map[string]bool)
225+
for _, files := range typeFiles {
226+
for file := range files {
227+
fileSet[file] = true
174228
}
229+
}
230+
var fileList []string
231+
for file := range fileSet {
232+
fileList = append(fileList, file)
233+
}
234+
nameToLocsMap[name] = fileList
235+
}
175236

176-
// 提取 Vars
177-
if results, err := iterSymbolNameFile(data, modPath, pkgPath, "Vars"); err == nil {
178-
for _, r := range results {
179-
name, file := r[0], r[1]
180-
if files[file] == nil {
181-
files[file] = map[string][]string{
182-
"FUNC": {},
183-
"TYPE": {},
184-
"VAR": {},
185-
}
237+
// 写回 JSON
238+
if err := saveNameToLocations(repoFile, nameToLocsMap); err != nil {
239+
fmt.Fprintf(os.Stderr, "Warning: failed to save NameToLocations: %v\n", err)
240+
} else if verbose {
241+
fmt.Fprintf(os.Stderr, "[VERBOSE] saved NameToLocations to %s\n", repoFile)
242+
}
243+
244+
// 转换为输出格式: file -> type -> names
245+
files := make(map[string]map[string][]string)
246+
for name, typeFiles := range nameToTypeFiles {
247+
for typ, fileSet := range typeFiles {
248+
for file := range fileSet {
249+
if files[file] == nil {
250+
files[file] = map[string][]string{
251+
"FUNC": {},
252+
"TYPE": {},
253+
"VAR": {},
186254
}
187-
files[file]["VAR"] = append(files[file]["VAR"], name)
188-
indexData[name] = append(indexData[name], NameMatch{File: file, Type: "VAR"})
189255
}
256+
files[file][typ] = append(files[file][typ], name)
190257
}
191258
}
192259
}
193260

194-
// 保存索引文件
195-
if err := saveSymbolIndex(astsDir, repoName, repoFile, indexData); err != nil {
196-
fmt.Fprintf(os.Stderr, "Warning: failed to save index: %v\n", err)
197-
}
198-
199261
// 过滤掉空的 TYPE 和 VAR
200262
for file, types := range files {
201263
if len(types["TYPE"]) == 0 {
@@ -204,7 +266,6 @@ Only extracts filepath + name (no content), for use with search_node.`,
204266
if len(types["VAR"]) == 0 {
205267
delete(types, "VAR")
206268
}
207-
// 如果 FUNC 也空,删除整个文件
208269
if len(types["FUNC"]) == 0 {
209270
delete(files, file)
210271
}

0 commit comments

Comments
 (0)