Skip to content

Commit f5cd9a6

Browse files
authored
feat: add --include flag to modelfile generate (#473)
* feat: migrate PathFilter to doublestar, add include pattern support - Migrate exclude matching from filepath.Match to doublestar.PathMatch for consistency with pkg/backend/fetch.go - Rename patterns field to excludePatterns, add includePatterns field - Add MatchInclude() for matching include patterns against relative paths - Add ShouldDescend() for directory descent via direct match or prefix match - Add IncludePatterns field to GenerateConfig - Validate patterns with doublestar.PathMatch on construction - ** now supports recursive directory matching in exclude patterns Signed-off-by: Zhao Chen <zhaochen.zju@gmail.com> Signed-off-by: Zhao Chen <winters.zc@antgroup.com> * feat: add --include flag to modelfile generate command Bind --include flag with doublestar glob syntax. Help text documents pattern syntax, matching base (relative to workspace root), and risk warning for broad patterns. Signed-off-by: Zhao Chen <zhaochen.zju@gmail.com> Signed-off-by: Zhao Chen <winters.zc@antgroup.com> * feat: integrate include pattern logic into generateByWorkspace Update filter logic: directory exclude is absolute, isSkippable entries can be rescued by include patterns via ShouldDescend and MatchInclude, rescued files still go through exclude check. Signed-off-by: Zhao Chen <zhaochen.zju@gmail.com> Signed-off-by: Zhao Chen <winters.zc@antgroup.com> * test: add integration tests for --include flag Cover recursive hidden files, specific directory include, include with exclude override, regression (no include), multiple patterns, and selective directory entry. Signed-off-by: Zhao Chen <zhaochen.zju@gmail.com> Signed-off-by: Zhao Chen <winters.zc@antgroup.com> * style: apply linter formatting fixes Signed-off-by: Zhao Chen <zhaochen.zju@gmail.com> Signed-off-by: Zhao Chen <winters.zc@antgroup.com> * docs: add --include examples to modelfile generate help Signed-off-by: Zhao Chen <zhaochen.zju@gmail.com> Signed-off-by: Zhao Chen <winters.zc@antgroup.com> --------- Signed-off-by: Zhao Chen <zhaochen.zju@gmail.com> Signed-off-by: Zhao Chen <winters.zc@antgroup.com>
1 parent d5762b8 commit f5cd9a6

7 files changed

Lines changed: 541 additions & 84 deletions

File tree

cmd/modelfile/generate.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,16 @@ Full URLs with domain names will auto-detect the provider.`,
6464
modctl modelfile generate ./my-model-dir --output ./output/modelfile.yaml
6565
6666
# Generate with metadata overrides
67-
modctl modelfile generate ./my-model-dir --name my-custom-model --family llama3`,
67+
modctl modelfile generate ./my-model-dir --name my-custom-model --family llama3
68+
69+
# Include hidden files at any depth
70+
modctl modelfile generate ./my-model-dir --include "**/.*"
71+
72+
# Include a specific hidden directory
73+
modctl modelfile generate ./my-model-dir --include ".weights/**"
74+
75+
# Include hidden files but exclude sensitive ones
76+
modctl modelfile generate ./my-model-dir --include "**/.*" --exclude "**/.env"`,
6877
Args: cobra.MaximumNArgs(1),
6978
DisableAutoGenTag: true,
7079
SilenceUsage: true,
@@ -112,6 +121,10 @@ func init() {
112121
flags.StringVarP(&generateConfig.Provider, "provider", "p", "", "explicitly specify the provider for short-form URLs (huggingface, modelscope)")
113122
flags.StringVar(&generateConfig.DownloadDir, "download-dir", "", "custom directory for downloading models (default: system temp directory)")
114123
flags.StringArrayVar(&generateConfig.ExcludePatterns, "exclude", []string{}, "specify glob patterns to exclude files/directories (e.g. *.log, checkpoints/*)")
124+
flags.StringArrayVar(&generateConfig.IncludePatterns, "include", []string{},
125+
"glob patterns to include files/directories that are normally skipped (e.g. hidden files).\n"+
126+
"Uses doublestar syntax (*, **, ?, [...], {a,b}), matching against relative paths from workspace root.\n"+
127+
"Note: broad patterns like **/.* may include large directories (.git) or sensitive files (.env)")
115128

116129
// Mark the ignore-unrecognized-file-types flag as deprecated and hidden
117130
flags.MarkDeprecated("ignore-unrecognized-file-types", "this flag will be removed in the next release")

pkg/config/modelfile/modelfile.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ type GenerateConfig struct {
4343
Provider string // Explicit provider for short-form URLs (e.g., "huggingface", "modelscope")
4444
DownloadDir string // Custom directory for downloading models (optional)
4545
ExcludePatterns []string
46+
IncludePatterns []string
4647
}
4748

4849
func NewGenerateConfig() *GenerateConfig {
@@ -63,6 +64,7 @@ func NewGenerateConfig() *GenerateConfig {
6364
Provider: "",
6465
DownloadDir: "",
6566
ExcludePatterns: []string{},
67+
IncludePatterns: []string{},
6668
}
6769
}
6870

pkg/modelfile/constants.go

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ var (
6565
// PyTorch formats.
6666
"*.bin", // General binary format
6767
"*.bin.*", // Sharded binary files (e.g., model.bin.1)
68-
"*.pt", // PyTorch model
69-
"*.pth", // PyTorch model (alternative extension)
70-
"*.mar", // PyTorch Model Archive
71-
"*.pte", // PyTorch ExecuTorch format
72-
"*.pt2", // PyTorch 2.0 export format
73-
"*.ptl", // PyTorch Mobile format
68+
"*.pt", // PyTorch model
69+
"*.pth", // PyTorch model (alternative extension)
70+
"*.mar", // PyTorch Model Archive
71+
"*.pte", // PyTorch ExecuTorch format
72+
"*.pt2", // PyTorch 2.0 export format
73+
"*.ptl", // PyTorch Mobile format
7474

7575
// TensorFlow formats.
7676
"*.tflite", // TensorFlow Lite
@@ -85,16 +85,16 @@ var (
8585
// GGML formats.
8686
"*.gguf", // GGML Universal Format
8787
"*.gguf.*", // Partitioned GGUF files
88-
"*.ggml", // GGML format (legacy)
89-
"*.ggmf", // GGMF format (deprecated)
90-
"*.ggjt", // GGJT format (deprecated)
91-
"*.q4_0", // GGML Q4_0 quantization
92-
"*.q4_1", // GGML Q4_1 quantization
93-
"*.q5_0", // GGML Q5_0 quantization
94-
"*.q5_1", // GGML Q5_1 quantization
95-
"*.q8_0", // GGML Q8_0 quantization
96-
"*.f16", // GGML F16 format
97-
"*.f32", // GGML F32 format
88+
"*.ggml", // GGML format (legacy)
89+
"*.ggmf", // GGMF format (deprecated)
90+
"*.ggjt", // GGJT format (deprecated)
91+
"*.q4_0", // GGML Q4_0 quantization
92+
"*.q4_1", // GGML Q4_1 quantization
93+
"*.q5_0", // GGML Q5_0 quantization
94+
"*.q5_1", // GGML Q5_1 quantization
95+
"*.q8_0", // GGML Q8_0 quantization
96+
"*.f16", // GGML F16 format
97+
"*.f32", // GGML F32 format
9898

9999
// checkpoint formats.
100100
"*.ckpt", // Checkpoint format
@@ -109,37 +109,37 @@ var (
109109
"*.vocab", // Vocabulary files (when binary)
110110

111111
// Other ML frameworks.
112-
"*.ot", // OpenVINO format
113-
"*.engine", // TensorRT format
114-
"*.trt", // TensorRT format (alternative extension)
115-
"*.onnx", // Open Neural Network Exchange format
116-
"*.msgpack", // MessagePack serialization
117-
"*.model", // Some NLP frameworks
118-
"*.pkl", // Pickle format
119-
"*.pickle", // Pickle format (alternative extension)
120-
"*.keras", // Keras native format
121-
"*.joblib", // Joblib serialization (scikit-learn)
122-
"*.npy", // NumPy array format
123-
"*.npz", // NumPy compressed archive
124-
"*.nc", // NetCDF format
125-
"*.mlmodel", // Apple Core ML format
126-
"*.coreml", // Apple Core ML format (alternative)
127-
"*.mleap", // MLeap format (Spark ML)
128-
"*.surml", // SurrealML format
129-
"*.llamafile", // Llamafile format
112+
"*.ot", // OpenVINO format
113+
"*.engine", // TensorRT format
114+
"*.trt", // TensorRT format (alternative extension)
115+
"*.onnx", // Open Neural Network Exchange format
116+
"*.msgpack", // MessagePack serialization
117+
"*.model", // Some NLP frameworks
118+
"*.pkl", // Pickle format
119+
"*.pickle", // Pickle format (alternative extension)
120+
"*.keras", // Keras native format
121+
"*.joblib", // Joblib serialization (scikit-learn)
122+
"*.npy", // NumPy array format
123+
"*.npz", // NumPy compressed archive
124+
"*.nc", // NetCDF format
125+
"*.mlmodel", // Apple Core ML format
126+
"*.coreml", // Apple Core ML format (alternative)
127+
"*.mleap", // MLeap format (Spark ML)
128+
"*.surml", // SurrealML format
129+
"*.llamafile", // Llamafile format
130130
"*.llamafile.*", // Llamafile variants
131-
"*.caffemodel", // Caffe model format
132-
"*.prototxt", // Caffe model definition
133-
"*.dlc", // Qualcomm Deep Learning Container
134-
"*.circle", // Samsung Circle format
135-
"*.nb", // Neural Network Binary format
131+
"*.caffemodel", // Caffe model format
132+
"*.prototxt", // Caffe model definition
133+
"*.dlc", // Qualcomm Deep Learning Container
134+
"*.circle", // Samsung Circle format
135+
"*.nb", // Neural Network Binary format
136136

137137
// Data and dataset formats.
138-
"*.arrow", // Apache Arrow columnar format
139-
"*.parquet", // Apache Parquet columnar format
140-
"*.ftz", // FastText compressed model
141-
"*.ark", // Kaldi ark format (speech/audio models)
142-
"*.db", // Database files (LMDB, etc.)
138+
"*.arrow", // Apache Arrow columnar format
139+
"*.parquet", // Apache Parquet columnar format
140+
"*.ftz", // FastText compressed model
141+
"*.ark", // Kaldi ark format (speech/audio models)
142+
"*.db", // Database files (LMDB, etc.)
143143
}
144144

145145
// Code file patterns - supported script and notebook files.

pkg/modelfile/modelfile.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ func (mf *modelfile) generateByWorkspace(config *configmodelfile.GenerateConfig)
258258
var totalSize int64
259259

260260
// Initialize exclude patterns
261-
filter, err := NewPathFilter(config.ExcludePatterns...)
261+
filter, err := NewPathFilter(config.ExcludePatterns, config.IncludePatterns)
262262
if err != nil {
263263
return err
264264
}
@@ -277,12 +277,32 @@ func (mf *modelfile) generateByWorkspace(config *configmodelfile.GenerateConfig)
277277
return err
278278
}
279279

280-
// Skip hidden, skippable, and excluded files/directories.
281-
if isSkippable(filename) || filter.Match(relPath) {
280+
// Directory exclude is absolute — cannot be reversed by --include.
281+
if info.IsDir() && filter.Match(relPath) {
282+
return filepath.SkipDir
283+
}
284+
285+
// Check skipPatterns — include can rescue skippable entries.
286+
if isSkippable(filename) {
282287
if info.IsDir() {
283-
return filepath.SkipDir
288+
if filter.ShouldDescend(relPath) {
289+
// Rescued by --include, enter directory
290+
} else {
291+
return filepath.SkipDir
292+
}
293+
} else {
294+
if !filter.MatchInclude(relPath) {
295+
return nil
296+
}
297+
// Rescued file still goes through exclude check below
284298
}
299+
}
285300

301+
// Exclude check for non-skippable files (and include-rescued files).
302+
if filter.Match(relPath) {
303+
if info.IsDir() {
304+
return filepath.SkipDir
305+
}
286306
return nil
287307
}
288308

pkg/modelfile/modelfile_test.go

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,141 @@ func TestNewModelfileByWorkspace(t *testing.T) {
676676
expectCodes: []string{"valid_dir/model.py"},
677677
expectName: "skip-test",
678678
},
679+
{
680+
name: "include all hidden files with recursive pattern",
681+
setupFiles: map[string]string{
682+
"config.json": "",
683+
"model.bin": "",
684+
".hidden_config.json": "",
685+
".hidden_dir/model.bin": "",
686+
".hidden_dir/.nested.py": "",
687+
"normal_dir/.hidden_code.py": "",
688+
"normal_dir/visible.py": "",
689+
},
690+
setupDirs: []string{
691+
".hidden_dir",
692+
"normal_dir",
693+
},
694+
config: &configmodelfile.GenerateConfig{
695+
Name: "include-all-hidden",
696+
IncludePatterns: []string{"**/.*"},
697+
},
698+
expectError: false,
699+
expectConfigs: []string{"config.json", ".hidden_config.json"},
700+
expectModels: []string{"model.bin", ".hidden_dir/model.bin"},
701+
expectCodes: []string{".hidden_dir/.nested.py", "normal_dir/.hidden_code.py", "normal_dir/visible.py"},
702+
expectName: "include-all-hidden",
703+
},
704+
{
705+
name: "include specific hidden directory",
706+
setupFiles: map[string]string{
707+
"config.json": "",
708+
"model.bin": "",
709+
".weights/extra.bin": "",
710+
".weights/data.bin": "",
711+
".other/secret.py": "",
712+
},
713+
setupDirs: []string{
714+
".weights",
715+
".other",
716+
},
717+
config: &configmodelfile.GenerateConfig{
718+
Name: "include-weights-dir",
719+
IncludePatterns: []string{".weights/**"},
720+
},
721+
expectError: false,
722+
expectConfigs: []string{"config.json"},
723+
expectModels: []string{"model.bin", ".weights/extra.bin", ".weights/data.bin"},
724+
expectCodes: []string{},
725+
expectName: "include-weights-dir",
726+
},
727+
{
728+
name: "include with exclude override",
729+
setupFiles: map[string]string{
730+
"config.json": "",
731+
"model.bin": "",
732+
".hidden.py": "",
733+
".env": "",
734+
"sub/.secret.yml": "",
735+
},
736+
setupDirs: []string{
737+
"sub",
738+
},
739+
config: &configmodelfile.GenerateConfig{
740+
Name: "include-exclude",
741+
IncludePatterns: []string{"**/.*"},
742+
ExcludePatterns: []string{"**/.env"},
743+
},
744+
expectError: false,
745+
expectConfigs: []string{"config.json", "sub/.secret.yml"},
746+
expectModels: []string{"model.bin"},
747+
expectCodes: []string{".hidden.py"},
748+
expectName: "include-exclude",
749+
},
750+
{
751+
name: "no include patterns regression",
752+
setupFiles: map[string]string{
753+
"config.json": "",
754+
"model.bin": "",
755+
".hidden_file": "",
756+
".hidden_dir/x.py": "",
757+
},
758+
setupDirs: []string{
759+
".hidden_dir",
760+
},
761+
config: &configmodelfile.GenerateConfig{
762+
Name: "no-include-regression",
763+
},
764+
expectError: false,
765+
expectConfigs: []string{"config.json"},
766+
expectModels: []string{"model.bin"},
767+
expectCodes: []string{},
768+
expectName: "no-include-regression",
769+
},
770+
{
771+
name: "multiple include patterns",
772+
setupFiles: map[string]string{
773+
"config.json": "",
774+
"model.bin": "",
775+
".hidden.py": "",
776+
"__pycache__/cache.pyc": "",
777+
},
778+
setupDirs: []string{
779+
"__pycache__",
780+
},
781+
config: &configmodelfile.GenerateConfig{
782+
Name: "multi-include",
783+
IncludePatterns: []string{".*", "**/__pycache__/**"},
784+
},
785+
expectError: false,
786+
expectConfigs: []string{"config.json"},
787+
expectModels: []string{"model.bin"},
788+
expectCodes: []string{".hidden.py", "__pycache__/cache.pyc"},
789+
expectName: "multi-include",
790+
},
791+
{
792+
name: "skippable dirs not matching include are still skipped",
793+
setupFiles: map[string]string{
794+
"config.json": "",
795+
"model.bin": "",
796+
".git/objects/pack": "",
797+
".weights/model.bin": "",
798+
},
799+
setupDirs: []string{
800+
".git",
801+
".git/objects",
802+
".weights",
803+
},
804+
config: &configmodelfile.GenerateConfig{
805+
Name: "selective-include",
806+
IncludePatterns: []string{".weights/**"},
807+
},
808+
expectError: false,
809+
expectConfigs: []string{"config.json"},
810+
expectModels: []string{"model.bin", ".weights/model.bin"},
811+
expectCodes: []string{},
812+
expectName: "selective-include",
813+
},
679814
}
680815

681816
assert := assert.New(t)

0 commit comments

Comments
 (0)