|
| 1 | +# -*- encoding: utf-8 -*- |
| 2 | +# Copyright (c) 2021-2022 THL A29 Limited |
| 3 | +# |
| 4 | +# This source code file is made available under MIT License |
| 5 | +# See LICENSE for details |
| 6 | +# ============================================================================== |
| 7 | + |
| 8 | +""" regexscanner 正则匹配分析工具 |
| 9 | +""" |
| 10 | + |
| 11 | +import os |
| 12 | +import sys |
| 13 | +import yaml |
| 14 | +import shutil |
| 15 | +import json |
| 16 | + |
| 17 | +from node.app import settings |
| 18 | +from task.codelintmodel import CodeLintModel |
| 19 | +from task.scmmgr import SCMMgr |
| 20 | +from util.pathlib import PathMgr |
| 21 | +from util.textutil import CODE_EXT |
| 22 | +from task.basic.datahandler.filter import REVISION_FILTER, PATH_FILTER |
| 23 | +from util.configlib import ConfigReader |
| 24 | +from util.exceptions import AnalyzeTaskError |
| 25 | +from util.pathfilter import FilterPathUtil |
| 26 | +from util.subprocc import SubProcController |
| 27 | +from task.authcheck.check_license import __lu__ |
| 28 | +from util.logutil import LogPrinter |
| 29 | + |
| 30 | +logger = LogPrinter |
| 31 | + |
| 32 | + |
| 33 | +class RegexScanner(CodeLintModel): |
| 34 | + def __init__(self, params): |
| 35 | + CodeLintModel.__init__(self, params) |
| 36 | + self.tool_home = os.environ.get("REGEXSCANNER_HOME") |
| 37 | + self.tool_name = self.__class__.__name__ |
| 38 | + |
| 39 | + def __add_rules(self, work_dir, rules): |
| 40 | + """添加yaml规则文件 |
| 41 | + """ |
| 42 | + rules_path = os.path.join(self.tool_home, "rules") |
| 43 | + relpos = len(rules_path) + 1 |
| 44 | + endsuff = [".yaml", ".yml"] |
| 45 | + filelist = [] |
| 46 | + for dirpath, _, files in os.walk(rules_path): |
| 47 | + for filename in files: |
| 48 | + if filename.lower().endswith(tuple(endsuff)): |
| 49 | + filelist.append(os.path.join(dirpath, filename)) |
| 50 | + config_rules_path = os.path.join(work_dir, "config_rules") |
| 51 | + if os.path.exists(config_rules_path): |
| 52 | + shutil.rmtree(config_rules_path) |
| 53 | + os.mkdir(config_rules_path) |
| 54 | + for single_file in filelist: |
| 55 | + rel_path = single_file[relpos:] |
| 56 | + file_path = os.path.join(config_rules_path, rel_path) |
| 57 | + with open(single_file,'r') as fp: |
| 58 | + data = yaml.safe_load(fp) |
| 59 | + if data: |
| 60 | + if data.__contains__('rules'): |
| 61 | + for rule_data in data['rules']: |
| 62 | + if rule_data["name"] in rules: |
| 63 | + if not os.path.exists(os.path.dirname(file_path)): |
| 64 | + os.makedirs(os.path.dirname(file_path)) |
| 65 | + shutil.copy(single_file, file_path) |
| 66 | + break |
| 67 | + return config_rules_path |
| 68 | + |
| 69 | + def __format_rules(self, work_dir, rule_list): |
| 70 | + """格式化规则 |
| 71 | + """ |
| 72 | + rules = {"rules": []} |
| 73 | + no_params_rules = [] |
| 74 | + for rule in rule_list: |
| 75 | + rule_name = rule['name'] |
| 76 | + if not rule.get('params'): |
| 77 | + logger.error(f"{rule_name}规则参数为空, 检查已存在的规则.") |
| 78 | + no_params_rules.append(rule_name) |
| 79 | + continue |
| 80 | + if "[regexcheck]" in rule['params']: |
| 81 | + rule_params = rule['params'] |
| 82 | + else: |
| 83 | + rule_params = "[regexcheck]\r\n" + rule['params'] |
| 84 | + rule_params_dict = ConfigReader(cfg_string=rule_params).read('regexcheck') |
| 85 | + |
| 86 | + reg_exp = rule_params_dict.get('regex', '') |
| 87 | + if not reg_exp: |
| 88 | + logger.error(f"{rule_name}规则参数有误,未填写正则表达式,跳过该规则.") |
| 89 | + continue |
| 90 | + |
| 91 | + # 规则的过滤路径(正则表达式) |
| 92 | + exclude_paths = rule_params_dict.get('exclude', '') |
| 93 | + exclude_paths = [p.strip() for p in exclude_paths.split(';') if p.strip()] if exclude_paths else [] |
| 94 | + include_paths = rule_params_dict.get('include', '') |
| 95 | + include_paths = [p.strip() for p in include_paths.split(';') if p.strip()] if include_paths else [] |
| 96 | + |
| 97 | + # 大小写不敏感,可以支持True|true|False|false等 |
| 98 | + ignore_comment = True if rule_params_dict.get('ignore_comment', 'False').lower() == 'true' else False |
| 99 | + file_scan = True if rule_params_dict.get('file_scan', 'False').lower() == 'true' else False |
| 100 | + msg = rule_params_dict.get('msg', "发现不规范代码: %s") |
| 101 | + rules["rules"].append({ |
| 102 | + "name": rule_name, |
| 103 | + "regex": reg_exp, |
| 104 | + "excludes": exclude_paths, |
| 105 | + "includes": include_paths, |
| 106 | + "message": msg, |
| 107 | + "ignorecomment": ignore_comment, |
| 108 | + "filescan": file_scan, |
| 109 | + "severity": "error" |
| 110 | + }) |
| 111 | + config_rules_path = self.__add_rules(work_dir, no_params_rules) |
| 112 | + rules_path = os.path.join(config_rules_path, "regexscanner_rules.yaml") |
| 113 | + with open(rules_path, "w", encoding="utf-8") as f: |
| 114 | + yaml.dump(rules, f) |
| 115 | + return config_rules_path |
| 116 | + |
| 117 | + def analyze(self, params): |
| 118 | + '''执行regexscanner分析任务 |
| 119 | + :param params: 需包含下面键值: |
| 120 | + 'rules': lint分析的规则列表 |
| 121 | + 'incr_scan' : 是否增量分析 |
| 122 | + :return: return a :py:class:`IssueResponse` |
| 123 | + ''' |
| 124 | + source_dir = params['source_dir'] |
| 125 | + work_dir = params['work_dir'] |
| 126 | + incr_scan = params['incr_scan'] |
| 127 | + rules = params["rules"] |
| 128 | + |
| 129 | + files_path = os.path.join(work_dir, "regexscanner_paths.txt") |
| 130 | + output_path = os.path.join(work_dir, "regexscanner_result.json") |
| 131 | + |
| 132 | + logger.info('获取需要分析的文件') |
| 133 | + toscans = [] |
| 134 | + if incr_scan: |
| 135 | + diffs = SCMMgr(params).get_scm_diff() |
| 136 | + toscans = [os.path.join(source_dir, diff.path) for diff in diffs if |
| 137 | + diff.path.lower().endswith(CODE_EXT) and diff.state != 'del'] |
| 138 | + else: |
| 139 | + toscans = PathMgr().get_dir_files(source_dir, CODE_EXT) |
| 140 | + |
| 141 | + # filter include and exclude path |
| 142 | + relpos = len(source_dir) + 1 |
| 143 | + toscans = FilterPathUtil(params).get_include_files(toscans, relpos) |
| 144 | + |
| 145 | + if not toscans: |
| 146 | + logger.debug("To-be-scanned files is empty ") |
| 147 | + return [] |
| 148 | + logger.debug("files to scan: %d" % len(toscans)) |
| 149 | + with open(files_path, "w", encoding="UTF-8") as f: |
| 150 | + f.write("\n".join(toscans)) |
| 151 | + |
| 152 | + # 写入规则 |
| 153 | + config_rules_path = self.__format_rules(work_dir, params['rule_list']) |
| 154 | + |
| 155 | + # 执行分析工具 |
| 156 | + options = [ |
| 157 | + "--filelist=%s" % files_path, |
| 158 | + "--project-root=%s" % source_dir, |
| 159 | + "--ruleset=%s" % config_rules_path, |
| 160 | + "--output-format=json", |
| 161 | + "--output=%s" % output_path, |
| 162 | + ] |
| 163 | + scan_cmd = self.get_cmd(options) |
| 164 | + logger.info(f"scan_cmd: {' '.join(scan_cmd)}") |
| 165 | + |
| 166 | + subproc = SubProcController( |
| 167 | + scan_cmd, stdout_line_callback=logger.info, stderr_line_callback=logger.info) |
| 168 | + subproc.wait() |
| 169 | + |
| 170 | + if not os.path.exists(output_path): |
| 171 | + logger.info("没有生成结果文件") |
| 172 | + raise AnalyzeTaskError("工具执行错误") |
| 173 | + |
| 174 | + issues = [] |
| 175 | + with open(output_path, "r") as f: |
| 176 | + outputs_data = json.load(f) |
| 177 | + if not outputs_data: |
| 178 | + return [] |
| 179 | + for item in outputs_data: |
| 180 | + if item["rule"] not in rules: |
| 181 | + continue |
| 182 | + issue = dict() |
| 183 | + issue["path"] = item["path"] |
| 184 | + issue["line"] = item["line"] |
| 185 | + issue["column"] = item["column"] |
| 186 | + issue["msg"] = item["msg"] |
| 187 | + issue["rule"] = item["rule"] |
| 188 | + issue["refs"] = [] |
| 189 | + issues.append(issue) |
| 190 | + |
| 191 | + logger.debug(issues) |
| 192 | + return issues |
| 193 | + |
| 194 | + def get_cmd(self, args): |
| 195 | + tool_path = os.path.join(self.tool_home, "bin", settings.PLATFORMS[sys.platform], self.tool_name) |
| 196 | + if settings.PLATFORMS[sys.platform] == "windows": |
| 197 | + tool_path = f"{tool_path}.exe" |
| 198 | + return __lu__().format_cmd(tool_path, args) |
| 199 | + |
| 200 | + def set_filter_type_list(self): |
| 201 | + ''' |
| 202 | + 通过覆盖该函数来选择过滤类型 |
| 203 | + 目前存在的过滤类型有: |
| 204 | + 1. NO_FILTER 不需要过滤 |
| 205 | + 2. DIFF_FILTER 将非修改的代码文件进行过滤 |
| 206 | + 3. REVISION_FILTER 通过起始版本号进行过滤 |
| 207 | + 4. PATH_FILTER 通过用户设置的黑白名单进行过滤 |
| 208 | + 过滤选项可以多选,但NO_FILTER为阻止使用过滤器 |
| 209 | + :return: |
| 210 | + ''' |
| 211 | + # 已经过滤文件和diff增量,不需要重复过滤,只需要根据revision过滤 |
| 212 | + return [REVISION_FILTER, PATH_FILTER] |
| 213 | + |
| 214 | + |
| 215 | +tool = RegexScanner |
| 216 | + |
| 217 | +if __name__ == '__main__': |
| 218 | + pass |
0 commit comments