Skip to content

Commit 6fdb61c

Browse files
authored
Improve language detection and return used language in the metrics (#121)
* Improve language detection and return used language in the metrics * Add .m extension in c/c++ parser
1 parent f3ea9d8 commit 6fdb61c

8 files changed

Lines changed: 215 additions & 141 deletions

File tree

src/language.rs

Lines changed: 0 additions & 18 deletions
This file was deleted.

src/languages.rs

Lines changed: 63 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,24 @@ use crate::*;
88

99
mk_langs!(
1010
// 1) Name for enum
11-
// 2) Empty struct name to implement
12-
// 3) Parser name
13-
// 4) tree-sitter function to call to get a Language
14-
// 5) file extensions
15-
// 6) emacs modes
11+
// 2) Display name
12+
// 3) Empty struct name to implement
13+
// 4) Parser name
14+
// 5) tree-sitter function to call to get a Language
15+
// 6) file extensions
16+
// 7) emacs modes
1617
(
1718
Mozjs,
19+
"javascript",
1820
MozjsCode,
1921
MozjsParser,
2022
tree_sitter_mozjs,
2123
[js, jsm],
22-
["js"]
24+
["js", "js2"]
2325
),
2426
(
2527
Javascript,
28+
"javascript",
2629
JavascriptCode,
2730
JavascriptParser,
2831
tree_sitter_javascript,
@@ -31,15 +34,17 @@ mk_langs!(
3134
),
3235
(
3336
Java,
37+
"java",
3438
JavaCode,
3539
JavaParser,
3640
tree_sitter_java,
3741
[java],
3842
["java"]
3943
),
40-
(Go, GoCode, GoParser, tree_sitter_go, [go], ["go"]),
44+
(Go, "go", GoCode, GoParser, tree_sitter_go, [go], ["go"]),
4145
(
4246
Html,
47+
"html",
4348
HtmlCode,
4449
HtmlParser,
4550
tree_sitter_html,
@@ -48,33 +53,61 @@ mk_langs!(
4853
),
4954
(
5055
CSharp,
56+
"c#",
5157
CSharpCode,
5258
CSharpParser,
5359
tree_sitter_c_sharp,
5460
[cs],
5561
["csharp", "c#"]
5662
),
57-
(Rust, RustCode, RustParser, tree_sitter_rust, [rs], ["rust"]),
58-
(Css, CssCode, CssParser, tree_sitter_css, [css], ["css"]),
63+
(
64+
Rust,
65+
"rust",
66+
RustCode,
67+
RustParser,
68+
tree_sitter_rust,
69+
[rs],
70+
["rust"]
71+
),
72+
(
73+
Css,
74+
"css",
75+
CssCode,
76+
CssParser,
77+
tree_sitter_css,
78+
[css],
79+
["css"]
80+
),
5981
(
6082
Cpp,
83+
"c/c++",
6184
CppCode,
6285
CppParser,
6386
tree_sitter_cpp,
64-
[cpp, cxx, cc, hxx, hpp, c, h, hh, inc],
65-
["c++", "c"]
87+
[cpp, cxx, cc, hxx, hpp, c, h, hh, inc, mm, m],
88+
["c++", "c", "objc", "objc++", "objective-c++", "objective-c"]
6689
),
6790
(
6891
Python,
92+
"python",
6993
PythonCode,
7094
PythonParser,
7195
tree_sitter_python,
7296
[py],
7397
["python"]
7498
),
75-
(Tsx, TsxCode, TsxParser, tree_sitter_tsx, [tsx], []),
99+
(
100+
Tsx,
101+
"typescript",
102+
TsxCode,
103+
TsxParser,
104+
tree_sitter_tsx,
105+
[tsx],
106+
[]
107+
),
76108
(
77109
Typescript,
110+
"typescript",
78111
TypescriptCode,
79112
TypescriptParser,
80113
tree_sitter_typescript,
@@ -83,6 +116,7 @@ mk_langs!(
83116
),
84117
(
85118
Ccomment,
119+
"ccomment",
86120
CcommentCode,
87121
CcommentParser,
88122
tree_sitter_ccomment,
@@ -91,10 +125,27 @@ mk_langs!(
91125
),
92126
(
93127
Preproc,
128+
"preproc",
94129
PreprocCode,
95130
PreprocParser,
96131
tree_sitter_preproc,
97132
[],
98133
[]
99134
)
100135
);
136+
137+
pub(crate) mod fake {
138+
pub fn get_true(ext: &str, mode: &str) -> Option<String> {
139+
if ext == "m"
140+
|| ext == "mm"
141+
|| mode == "objc"
142+
|| mode == "objc++"
143+
|| mode == "objective-c++"
144+
|| mode == "objective-c"
145+
{
146+
Some("obj-c/c++".to_string())
147+
} else {
148+
None
149+
}
150+
}
151+
}

src/lib.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,6 @@ pub mod c_macro;
6060
pub mod preproc;
6161
pub use crate::preproc::*;
6262

63-
mod language;
64-
pub use crate::language::*;
65-
6663
mod languages;
6764
pub use crate::languages::*;
6865

src/macros.rs

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ macro_rules! mk_extern {
2525
#[macro_export]
2626
macro_rules! mk_enum {
2727
( $( $camel:ident ),* ) => {
28-
#[derive(Clone, Debug, IntoEnumIterator, PartialEq)]
28+
#[derive(Clone, Copy, Debug, IntoEnumIterator, PartialEq)]
2929
pub enum LANG {
3030
$(
3131
$camel,
@@ -35,13 +35,24 @@ macro_rules! mk_enum {
3535
}
3636

3737
#[macro_export]
38-
macro_rules! mk_get_language {
39-
( $( ($camel:ident, $name:ident) ),* ) => {
40-
pub fn get_language(lang: &LANG) -> Language {
41-
unsafe {
42-
match lang {
38+
macro_rules! mk_impl_lang {
39+
( $( ($camel:ident, $name:ident, $display: expr) ),* ) => {
40+
impl LANG {
41+
42+
pub fn get_language(&self) -> Language {
43+
unsafe {
44+
match self {
45+
$(
46+
LANG::$camel => $name(),
47+
)*
48+
}
49+
}
50+
}
51+
52+
pub fn get_name(&self) -> &'static str {
53+
match self {
4354
$(
44-
LANG::$camel => $name(),
55+
LANG::$camel => $display,
4556
)*
4657
}
4758
}
@@ -126,10 +137,10 @@ macro_rules! mk_code {
126137

127138
#[macro_export]
128139
macro_rules! mk_langs {
129-
( $( ($camel:ident, $code:ident, $parser:ident, $name:ident, [ $( $ext:ident ),* ], [ $( $emacs_mode:expr ),* ]) ),* ) => {
140+
( $( ($camel:ident, $display: expr, $code:ident, $parser:ident, $name:ident, [ $( $ext:ident ),* ], [ $( $emacs_mode:expr ),* ]) ),* ) => {
130141
mk_extern!($( $name ),*);
131142
mk_enum!($( $camel ),*);
132-
mk_get_language!($( ($camel, $name) ),*);
143+
mk_impl_lang!($( ($camel, $name, $display) ),*);
133144
mk_action!($( ($camel, $parser) ),*);
134145
mk_extensions!($( ($camel, [ $( $ext ),* ]) ),*);
135146
mk_emacs_mode!($( ($camel, [ $( $emacs_mode ),* ]) ),*);

src/main.rs

Lines changed: 27 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ struct Config {
3636
}
3737

3838
struct JobItem {
39-
language: LANG,
39+
language: Option<LANG>,
4040
path: PathBuf,
4141
cfg: Config,
4242
}
@@ -60,19 +60,24 @@ fn mk_globset(elems: clap::Values) -> GlobSet {
6060
}
6161
}
6262

63-
fn act_on_file(language: LANG, path: PathBuf, cfg: Config) -> std::io::Result<()> {
63+
fn act_on_file(language: Option<LANG>, path: PathBuf, cfg: Config) -> std::io::Result<()> {
64+
let source = read_file_with_eol(&path)?;
65+
let language = if let Some(language) = language {
66+
language
67+
} else if let Some(language) = guess_language(&source, &path).0 {
68+
language
69+
} else {
70+
return Ok(());
71+
};
72+
6473
let pr = cfg.preproc;
6574
if cfg.dump {
66-
let source = read_file_with_eol(&path)?;
67-
let language = guess_language(&source).0.unwrap_or(language);
6875
let cfg = DumpCfg {
6976
line_start: cfg.line_start,
7077
line_end: cfg.line_end,
7178
};
7279
action::<Dump>(&language, source, &path, pr, cfg)
7380
} else if cfg.metrics {
74-
let source = read_file_with_eol(&path)?;
75-
let language = guess_language(&source).0.unwrap_or(language);
7681
let cfg = MetricsCfg {
7782
path,
7883
output_path: if cfg.output.is_empty() {
@@ -83,30 +88,19 @@ fn act_on_file(language: LANG, path: PathBuf, cfg: Config) -> std::io::Result<()
8388
};
8489
action::<Metrics>(&language, source, &cfg.path.clone(), pr, cfg)
8590
} else if cfg.comments {
86-
let source = read_file_with_eol(&path)?;
87-
let language = guess_language(&source).0.unwrap_or(language);
88-
let lang = get_language_for_file(&path);
8991
let cfg = CommentRmCfg {
9092
in_place: cfg.in_place,
9193
path,
9294
};
93-
if let Some(lang) = lang {
94-
if lang == LANG::Cpp {
95-
action::<CommentRm>(&LANG::Ccomment, source, &cfg.path.clone(), pr, cfg)
96-
} else {
97-
action::<CommentRm>(&language, source, &cfg.path.clone(), pr, cfg)
98-
}
95+
if language == LANG::Cpp {
96+
action::<CommentRm>(&LANG::Ccomment, source, &cfg.path.clone(), pr, cfg)
9997
} else {
10098
action::<CommentRm>(&language, source, &cfg.path.clone(), pr, cfg)
10199
}
102100
} else if cfg.function {
103-
let source = read_file_with_eol(&path)?;
104-
let language = guess_language(&source).0.unwrap_or(language);
105101
let cfg = FunctionCfg { path: path.clone() };
106102
action::<Function>(&language, source, &path, pr, cfg)
107103
} else if !cfg.find_filter.is_empty() {
108-
let source = read_file_with_eol(&path)?;
109-
let language = guess_language(&source).0.unwrap_or(language);
110104
let cfg = FindCfg {
111105
path: Some(path.clone()),
112106
filters: cfg.find_filter,
@@ -115,24 +109,20 @@ fn act_on_file(language: LANG, path: PathBuf, cfg: Config) -> std::io::Result<()
115109
};
116110
action::<Find>(&language, source, &path, pr, cfg)
117111
} else if cfg.count_lock.is_some() {
118-
let source = read_file_with_eol(&path)?;
119-
let language = guess_language(&source).0.unwrap_or(language);
120112
let cfg = CountCfg {
121113
path: Some(path.clone()),
122114
filters: cfg.count_filter,
123115
stats: cfg.count_lock.unwrap().clone(),
124116
};
125117
action::<Count>(&language, source, &path, pr, cfg)
126118
} else if cfg.preproc_lock.is_some() {
127-
if let Some(lang) = get_language_for_file(&path) {
128-
if lang == LANG::Cpp {
129-
let source = read_file_with_eol(&path)?;
130-
preprocess(
131-
&PreprocParser::new(source, &path, None),
132-
&path,
133-
cfg.preproc_lock.unwrap().clone(),
134-
);
135-
}
119+
if language == LANG::Cpp {
120+
let source = read_file_with_eol(&path)?;
121+
preprocess(
122+
&PreprocParser::new(source, &path, None),
123+
&path,
124+
cfg.preproc_lock.unwrap().clone(),
125+
);
136126
}
137127
Ok(())
138128
} else {
@@ -154,21 +144,13 @@ fn consumer(receiver: JobReceiver) {
154144
}
155145

156146
fn send_file(path: PathBuf, cfg: &Config, language: &Option<LANG>, sender: &JobSender) {
157-
let language = if language.is_none() {
158-
get_language_for_file(&path)
159-
} else {
160-
language.clone()
161-
};
162-
163-
if let Some(language) = language {
164-
sender
165-
.send(Some(JobItem {
166-
language,
167-
path,
168-
cfg: cfg.clone(),
169-
}))
170-
.unwrap();
171-
}
147+
sender
148+
.send(Some(JobItem {
149+
language: language.clone(),
150+
path,
151+
cfg: cfg.clone(),
152+
}))
153+
.unwrap();
172154
}
173155

174156
fn is_hidden(entry: &DirEntry) -> bool {

0 commit comments

Comments
 (0)