Skip to content

Commit 236cede

Browse files
committed
[metrics] add guessed language from emacs/vim headers
1 parent 14d3bbc commit 236cede

6 files changed

Lines changed: 88 additions & 31 deletions

File tree

Cargo.lock

Lines changed: 38 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ serde_json = "^1.0"
3333
walkdir = "^2.2"
3434

3535
[dev-dependencies]
36+
pretty_assertions = "^0.6"
3637

3738
[profile.release]
3839
opt-level = 3

src/main.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,15 @@ fn act_on_file(language: LANG, path: PathBuf, cfg: Config) -> std::io::Result<()
6464
let pr = cfg.preproc;
6565
if cfg.dump {
6666
let source = read_file_with_eol(&path)?;
67-
let language = guess_language(&source).unwrap_or(language);
67+
let language = guess_language(&source).0.unwrap_or(language);
6868
let cfg = DumpCfg {
6969
line_start: cfg.line_start,
7070
line_end: cfg.line_end,
7171
};
7272
action::<Dump>(&language, source, &path, pr, cfg)
7373
} else if cfg.metrics {
7474
let source = read_file_with_eol(&path)?;
75-
let language = guess_language(&source).unwrap_or(language);
75+
let language = guess_language(&source).0.unwrap_or(language);
7676
let cfg = MetricsCfg {
7777
path,
7878
output_path: if cfg.output.is_empty() {
@@ -84,7 +84,7 @@ fn act_on_file(language: LANG, path: PathBuf, cfg: Config) -> std::io::Result<()
8484
action::<Metrics>(&language, source, &cfg.path.clone(), pr, cfg)
8585
} else if cfg.comments {
8686
let source = read_file_with_eol(&path)?;
87-
let language = guess_language(&source).unwrap_or(language);
87+
let language = guess_language(&source).0.unwrap_or(language);
8888
let lang = get_language_for_file(&path);
8989
let cfg = CommentRmCfg {
9090
in_place: cfg.in_place,
@@ -101,12 +101,12 @@ fn act_on_file(language: LANG, path: PathBuf, cfg: Config) -> std::io::Result<()
101101
}
102102
} else if cfg.function {
103103
let source = read_file_with_eol(&path)?;
104-
let language = guess_language(&source).unwrap_or(language);
104+
let language = guess_language(&source).0.unwrap_or(language);
105105
let cfg = FunctionCfg { path: path.clone() };
106106
action::<Function>(&language, source, &path, pr, cfg)
107107
} else if !cfg.find_filter.is_empty() {
108108
let source = read_file_with_eol(&path)?;
109-
let language = guess_language(&source).unwrap_or(language);
109+
let language = guess_language(&source).0.unwrap_or(language);
110110
let cfg = FindCfg {
111111
path: Some(path.clone()),
112112
filters: cfg.find_filter,
@@ -116,7 +116,7 @@ fn act_on_file(language: LANG, path: PathBuf, cfg: Config) -> std::io::Result<()
116116
action::<Find>(&language, source, &path, pr, cfg)
117117
} else if cfg.count_lock.is_some() {
118118
let source = read_file_with_eol(&path)?;
119-
let language = guess_language(&source).unwrap_or(language);
119+
let language = guess_language(&source).0.unwrap_or(language);
120120
let cfg = CountCfg {
121121
path: Some(path.clone()),
122122
filters: cfg.count_filter,

src/tools.rs

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -63,19 +63,19 @@ pub fn get_language_for_file(path: &PathBuf) -> Option<LANG> {
6363
}
6464
}
6565

66-
pub fn get_language_with_mode(lang: &[u8]) -> Option<LANG> {
66+
pub fn get_language_with_mode(lang: &[u8]) -> (Option<LANG>, String) {
6767
if let Some(lang) = std::str::from_utf8(lang)
6868
.ok()
6969
.map(|l| l.to_lowercase())
7070
.as_ref()
7171
{
72-
get_from_emacs_mode(lang)
72+
(get_from_emacs_mode(lang), lang.to_string())
7373
} else {
74-
None
74+
(None, "".to_string())
7575
}
7676
}
7777

78-
pub fn guess_language(buf: &[u8]) -> Option<LANG> {
78+
pub fn guess_language(buf: &[u8]) -> (Option<LANG>, String) {
7979
// we just try to use the emacs info (if there)
8080
lazy_static! {
8181
// comment containing coding info are useful
@@ -106,7 +106,7 @@ pub fn guess_language(buf: &[u8]) -> Option<LANG> {
106106
}
107107
}
108108

109-
None
109+
(None, "".to_string())
110110
}
111111

112112
pub fn normalize_path<P: AsRef<Path>>(path: P) -> Option<PathBuf> {
@@ -246,21 +246,24 @@ mod tests {
246246
#[test]
247247
fn test_guess_language() {
248248
let buf = b"// -*- foo: bar; mode: c++; hello: world\n";
249-
assert_eq!(guess_language(buf), Some(LANG::Cpp));
249+
assert_eq!(guess_language(buf), (Some(LANG::Cpp), "c++".to_string()));
250250

251251
let buf = b"// -*- c++ -*-\n";
252-
assert_eq!(guess_language(buf), Some(LANG::Cpp));
252+
assert_eq!(guess_language(buf), (Some(LANG::Cpp), "c++".to_string()));
253253

254254
let buf = b"// -*- foo: bar; bar-mode: c++; hello: world\n";
255-
assert_eq!(guess_language(buf), None);
255+
assert_eq!(guess_language(buf), (None, "".to_string()));
256256

257257
let buf = b"/* hello world */\n";
258-
assert_eq!(guess_language(buf), None);
258+
assert_eq!(guess_language(buf), (None, "".to_string()));
259259

260260
let buf = b"\n\n\n\n\n\n\n\n\n// vim: set ts=4 ft=c++\n\n\n";
261-
assert_eq!(guess_language(buf), Some(LANG::Cpp));
261+
assert_eq!(guess_language(buf), (Some(LANG::Cpp), "c++".to_string()));
262262

263263
let buf = b"\n\n\n\n\n\n\n\n\n\n\n\n";
264-
assert_eq!(guess_language(buf), None);
264+
assert_eq!(guess_language(buf), (None, "".to_string()));
265+
266+
let buf = b"// -*- foo: bar; mode: Objective-C++; hello: world\n";
267+
assert_eq!(guess_language(buf), (None, "objective-c++".to_string()));
265268
}
266269
}

src/web/metrics.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub struct WebMetricsPayload {
1616
#[derive(Debug, Serialize)]
1717
pub struct WebMetricsResponse<'a> {
1818
pub id: String,
19+
pub guessed_language: String,
1920
pub spaces: Option<FuncSpace<'a>>,
2021
}
2122

@@ -31,6 +32,7 @@ pub struct WebMetricsCfg {
3132
pub id: String,
3233
pub path: PathBuf,
3334
pub unit: bool,
35+
pub guessed_language: String,
3436
}
3537

3638
impl Callback for WebMetricsCallback {
@@ -50,6 +52,11 @@ impl Callback for WebMetricsCallback {
5052
spaces
5153
};
5254

53-
serde_json::to_value(WebMetricsResponse { id: cfg.id, spaces }).unwrap()
55+
serde_json::to_value(WebMetricsResponse {
56+
id: cfg.id,
57+
guessed_language: cfg.guessed_language,
58+
spaces,
59+
})
60+
.unwrap()
5461
}
5562
}

src/web/server.rs

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ fn ast_parser(item: web::Json<AstPayload>, _req: HttpRequest) -> HttpResponse {
3232
span: payload.span,
3333
};
3434
let buf = payload.code.into_bytes();
35-
let language = guess_language(&buf).unwrap_or(language);
35+
let language = guess_language(&buf).0.unwrap_or(language);
3636

3737
// TODO: the 4th arg should be preproc data
3838
HttpResponse::Ok().json(action::<AstCallback>(
@@ -56,7 +56,7 @@ fn comment_removal_json(item: web::Json<WebCommentPayload>, _req: HttpRequest) -
5656
if let Some(language) = language {
5757
let cfg = WebCommentCfg { id: payload.id };
5858
let buf = payload.code.into_bytes();
59-
let language = guess_language(&buf).unwrap_or(language);
59+
let language = guess_language(&buf).0.unwrap_or(language);
6060
let language = if language == LANG::Cpp {
6161
LANG::Ccomment
6262
} else {
@@ -81,7 +81,7 @@ fn comment_removal_plain(code: Bytes, info: Query<WebCommentInfo>) -> HttpRespon
8181
let language = get_language_for_file(&PathBuf::from(&info.file_name));
8282
if let Some(language) = language {
8383
let buf = code.to_vec();
84-
let language = guess_language(&buf).unwrap_or(language);
84+
let language = guess_language(&buf).0.unwrap_or(language);
8585
let cfg = WebCommentCfg { id: "".to_string() };
8686
let res = action::<WebCommentCallback>(&language, buf, &PathBuf::from(""), None, cfg);
8787
if let Some(res_code) = res.code {
@@ -105,13 +105,15 @@ fn metrics_json(item: web::Json<WebMetricsPayload>, _req: HttpRequest) -> HttpRe
105105
let language = get_language_for_file(&path);
106106
let payload = item.into_inner();
107107
if let Some(language) = language {
108+
let buf = payload.code.into_bytes();
109+
let (guessed_language, guessed_name) = guess_language(&buf);
110+
let language = guessed_language.unwrap_or(language);
108111
let cfg = WebMetricsCfg {
109112
id: payload.id,
110113
path,
111114
unit: payload.unit,
115+
guessed_language: guessed_name,
112116
};
113-
let buf = payload.code.into_bytes();
114-
let language = guess_language(&buf).unwrap_or(language);
115117
HttpResponse::Ok().json(action::<WebMetricsCallback>(
116118
&language,
117119
buf,
@@ -132,14 +134,16 @@ fn metrics_plain(code: Bytes, info: Query<WebMetricsInfo>) -> HttpResponse {
132134
let language = get_language_for_file(&path);
133135
if let Some(language) = language {
134136
let buf = code.to_vec();
135-
let language = guess_language(&buf).unwrap_or(language);
137+
let (guessed_language, guessed_name) = guess_language(&buf);
138+
let language = guessed_language.unwrap_or(language);
136139
let cfg = WebMetricsCfg {
137140
id: "".to_string(),
138141
path,
139142
unit: info
140143
.unit
141144
.as_ref()
142145
.map_or(false, |s| s == "1" || s == "true"),
146+
guessed_language: guessed_name,
143147
};
144148
HttpResponse::Ok().json(action::<WebMetricsCallback>(
145149
&language,
@@ -162,7 +166,7 @@ fn function_json(item: web::Json<WebFunctionPayload>, _req: HttpRequest) -> Http
162166
if let Some(language) = language {
163167
let cfg = WebFunctionCfg { id: payload.id };
164168
let buf = payload.code.into_bytes();
165-
let language = guess_language(&buf).unwrap_or(language);
169+
let language = guess_language(&buf).0.unwrap_or(language);
166170
HttpResponse::Ok().json(action::<WebFunctionCallback>(
167171
&language,
168172
buf,
@@ -183,7 +187,7 @@ fn function_plain(code: Bytes, info: Query<WebFunctionInfo>) -> HttpResponse {
183187
let language = get_language_for_file(&path);
184188
if let Some(language) = language {
185189
let buf = code.to_vec();
186-
let language = guess_language(&buf).unwrap_or(language);
190+
let language = guess_language(&buf).0.unwrap_or(language);
187191
let cfg = WebFunctionCfg { id: "".to_string() };
188192
HttpResponse::Ok().json(action::<WebFunctionCallback>(
189193
&language,
@@ -266,6 +270,7 @@ pub fn run(host: String, port: u16, n_threads: usize) -> std::io::Result<()> {
266270
mod tests {
267271
use actix_web::{http::header::ContentType, http::StatusCode, test};
268272
use bytes::Bytes;
273+
use pretty_assertions::assert_eq;
269274
use serde_json::value::Value;
270275

271276
use super::*;
@@ -592,17 +597,18 @@ mod tests {
592597
.set_json(&WebMetricsPayload {
593598
id: "1234".to_string(),
594599
file_name: "test.py".to_string(),
595-
code: "def foo():\n pass\n".to_string(),
600+
code: "# -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-\ndef foo():\n pass\n".to_string(),
596601
unit: false,
597602
})
598603
.to_request();
599604

600605
let res: Value = test::read_response_json(&mut app, req).await;
601606
let expected = json!({
602607
"id": "1234",
608+
"guessed_language": "objective-c++",
603609
"spaces": {"kind": "unit",
604610
"start_line": 1,
605-
"end_line": 2,
611+
"end_line": 3,
606612
"metrics": {"cyclomatic": 1.0,
607613
"nargs": 0.,
608614
"nexits": 0.,
@@ -618,11 +624,11 @@ mod tests {
618624
"unique_operands": 1.0,
619625
"unique_operators": 2.0,
620626
"volume": 4.754_887_502_163_468},
621-
"loc": {"cloc": 0.0, "lloc": 2.0, "sloc": 2.0}},
627+
"loc": {"cloc": 0.0, "lloc": 3.0, "sloc": 3.0}},
622628
"name": "test.py",
623629
"spaces": [{"kind": "function",
624-
"start_line": 1,
625-
"end_line": 2,
630+
"start_line": 2,
631+
"end_line": 3,
626632
"metrics": {"cyclomatic": 1.0,
627633
"nargs": 0.,
628634
"nexits": 0.,
@@ -665,6 +671,7 @@ mod tests {
665671
let res: Value = test::read_response_json(&mut app, req).await;
666672
let expected = json!({
667673
"id": "1234",
674+
"guessed_language": "",
668675
"spaces": {"kind": "unit",
669676
"start_line": 1,
670677
"end_line": 2,
@@ -706,6 +713,7 @@ mod tests {
706713
let res: Value = test::read_response_json(&mut app, req).await;
707714
let expected = json!({
708715
"id": "",
716+
"guessed_language": "",
709717
"spaces": {"kind": "unit",
710718
"start_line": 1,
711719
"end_line": 2,

0 commit comments

Comments
 (0)