Skip to content

Commit de6d505

Browse files
fabianbs96mxHuber
andauthored
TaintConfig Serialization (#672)
* LLVMTaintConfigYAML class * basic structure * beginning of restructuring * compiles, untested * Constructor init * minor bug fixes * new start w better approach * basic working version * removed unneccesary includes and functions * new TaintConfigData structure * fully refactored, doesn't compile * added func/var structs * compiling version, tests fail * only 3 unittests fail now * fixed a bug with sink values causing a crash * all unittests pass * review fixes * one faulty unittest remaining * all unittests pass + myphasartool revert * pre-commit stuff * review changes + unittest fixed * added static to handle functions * cleanup * minor * Pin swift version * Remove unnecessary forward declaration --------- Co-authored-by: mxHuber <huber.maximilian.leo@gmail.com>
1 parent 82a89f6 commit de6d505

7 files changed

Lines changed: 323 additions & 195 deletions

File tree

include/phasar/PhasarLLVM/TaintConfig/LLVMTaintConfig.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
namespace psr {
2020
class LLVMTaintConfig;
2121
class LLVMProjectIRDB;
22+
struct TaintConfigData;
2223

2324
template <> struct TaintConfigTraits<LLVMTaintConfig> {
2425
using n_t = const llvm::Instruction *;
@@ -31,7 +32,7 @@ class LLVMTaintConfig : public TaintConfigBase<LLVMTaintConfig> {
3132

3233
public:
3334
explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &Code,
34-
const nlohmann::json &Config);
35+
const psr::TaintConfigData &Config);
3536
explicit LLVMTaintConfig(const psr::LLVMProjectIRDB &AnnotatedCode);
3637
explicit LLVMTaintConfig(
3738
TaintDescriptionCallBackTy SourceCB, TaintDescriptionCallBackTy SinkCB,
@@ -93,7 +94,7 @@ class LLVMTaintConfig : public TaintConfigBase<LLVMTaintConfig> {
9394
// --- utilities
9495

9596
void addAllFunctions(const LLVMProjectIRDB &IRDB,
96-
const nlohmann::json &Config);
97+
const TaintConfigData &Config);
9798

9899
// --- data members
99100

include/phasar/PhasarLLVM/TaintConfig/TaintConfigBase.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,22 @@
1010
#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H
1111
#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGBASE_H
1212

13+
#include "phasar/PhasarLLVM/TaintConfig/TaintConfigData.h"
1314
#include "phasar/Utils/Nullable.h"
1415

1516
#include "llvm/ADT/FunctionExtras.h"
1617
#include "llvm/ADT/Twine.h"
1718
#include "llvm/Support/Compiler.h"
1819
#include "llvm/Support/raw_ostream.h"
1920

20-
#include "nlohmann/json.hpp"
21-
2221
#include <map>
2322
#include <set>
2423
#include <type_traits>
2524
#include <utility>
2625

2726
namespace psr {
2827

29-
enum class TaintCategory { Source, Sink, Sanitizer, None };
28+
enum class TaintCategory { None, Source, Sink, Sanitizer };
3029

3130
[[nodiscard]] llvm::StringRef to_string(TaintCategory Cat) noexcept;
3231
[[nodiscard]] TaintCategory toTaintCategory(llvm::StringRef Str) noexcept;
@@ -159,8 +158,9 @@ template <typename Derived> class TaintConfigBase {
159158
//===----------------------------------------------------------------------===//
160159
// Miscellaneous helper functions
161160

162-
nlohmann::json parseTaintConfig(const llvm::Twine &Path);
163-
std::optional<nlohmann::json> parseTaintConfigOrNull(const llvm::Twine &Path);
161+
[[nodiscard]] TaintConfigData parseTaintConfig(const llvm::Twine &Path);
162+
[[nodiscard]] std::optional<TaintConfigData>
163+
parseTaintConfigOrNull(const llvm::Twine &Path) noexcept;
164164

165165
} // namespace psr
166166

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/******************************************************************************
2+
* Copyright (c) 2023 Fabian Schiebel.
3+
* All rights reserved. This program and the accompanying materials are made
4+
* available under the terms of LICENSE.txt.
5+
*
6+
* Contributors:
7+
* Maximilian Leo Huber and others
8+
*****************************************************************************/
9+
10+
#ifndef PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H
11+
#define PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H
12+
13+
#include <string>
14+
#include <vector>
15+
16+
namespace psr {
17+
enum class TaintCategory;
18+
19+
struct FunctionData {
20+
FunctionData() noexcept = default;
21+
22+
std::string Name;
23+
TaintCategory ReturnCat{};
24+
std::vector<uint32_t> SourceValues;
25+
std::vector<uint32_t> SinkValues;
26+
std::vector<uint32_t> SanitizerValues;
27+
bool HasAllSinkParam = false;
28+
};
29+
30+
struct VariableData {
31+
VariableData() noexcept = default;
32+
33+
size_t Line{};
34+
std::string Name;
35+
std::string Scope;
36+
TaintCategory Cat{};
37+
};
38+
39+
struct TaintConfigData {
40+
std::vector<FunctionData> Functions;
41+
std::vector<VariableData> Variables;
42+
};
43+
44+
} // namespace psr
45+
46+
#endif // PHASAR_PHASARLLVM_TAINTCONFIG_TAINTCONFIGDATA_H

lib/PhasarLLVM/TaintConfig/LLVMTaintConfig.cpp

Lines changed: 88 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,14 @@
1515
#include "phasar/PhasarLLVM/Utils/Annotation.h"
1616
#include "phasar/PhasarLLVM/Utils/LLVMShorthands.h"
1717
#include "phasar/Utils/Logger.h"
18-
#include "phasar/Utils/NlohmannLogging.h"
1918

2019
#include "llvm/IR/DebugInfo.h"
2120
#include "llvm/IR/Function.h"
2221
#include "llvm/IR/InstIterator.h"
2322
#include "llvm/IR/IntrinsicInst.h"
2423

24+
#include <string>
25+
2526
namespace psr {
2627

2728
static llvm::SmallVector<const llvm::Function *>
@@ -59,9 +60,9 @@ findAllFunctionDefs(const LLVMProjectIRDB &IRDB, llvm::StringRef Name) {
5960
}
6061

6162
void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB,
62-
const nlohmann::json &Config) {
63-
for (const auto &FunDesc : Config["functions"]) {
64-
auto Name = FunDesc["name"].get<std::string>();
63+
const TaintConfigData &Config) {
64+
for (const auto &FunDesc : Config.Functions) {
65+
const auto &Name = FunDesc.Name;
6566

6667
auto FnDefs = findAllFunctionDefs(IRDB, Name);
6768

@@ -72,127 +73,105 @@ void LLVMTaintConfig::addAllFunctions(const LLVMProjectIRDB &IRDB,
7273

7374
const auto *Fun = FnDefs[0];
7475

75-
// handle a function's parameters
76-
if (FunDesc.contains("params")) {
77-
auto Params = FunDesc["params"];
78-
if (Params.contains("source")) {
79-
for (unsigned Idx : Params["source"]) {
80-
if (Idx >= Fun->arg_size()) {
81-
llvm::errs()
82-
<< "ERROR: The source-function parameter index is out of "
83-
"bounds: "
84-
<< Idx << "\n";
85-
// Use 'continue' instead of 'break' to get error messages for the
86-
// remaining parameters as well
87-
continue;
88-
}
89-
addTaintCategory(Fun->getArg(Idx), TaintCategory::Source);
90-
}
76+
// handle a function's source parameters
77+
for (const auto &Idx : FunDesc.SourceValues) {
78+
if (Idx >= Fun->arg_size()) {
79+
llvm::errs() << "ERROR: The source-function parameter index is out of "
80+
"bounds: "
81+
<< Idx << "\n";
82+
// Use 'continue' instead of 'break' to get error messages for the
83+
// remaining parameters as well
84+
continue;
9185
}
92-
if (Params.contains("sink")) {
93-
for (const auto &Idx : Params["sink"]) {
94-
if (Idx.is_number()) {
95-
if (Idx >= Fun->arg_size()) {
96-
llvm::errs()
97-
<< "ERROR: The source-function parameter index is out of "
98-
"bounds: "
99-
<< Idx << "\n";
100-
continue;
101-
}
102-
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink);
103-
} else if (Idx.is_string()) {
104-
const auto Sinks = Idx.get<std::string>();
105-
if (Sinks == "all") {
106-
for (const auto &Arg : Fun->args()) {
107-
addTaintCategory(&Arg, TaintCategory::Sink);
108-
}
109-
}
110-
}
111-
}
86+
87+
addTaintCategory(Fun->getArg(Idx), TaintCategory::Source);
88+
}
89+
for (const auto &Idx : FunDesc.SinkValues) {
90+
if (Idx >= Fun->arg_size()) {
91+
llvm::errs() << "ERROR: The sink-function parameter index is out of "
92+
"bounds: "
93+
<< Idx << "\n";
94+
continue;
11295
}
113-
if (Params.contains("sanitizer")) {
114-
for (unsigned Idx : Params["sanitizer"]) {
115-
if (Idx >= Fun->arg_size()) {
116-
llvm::errs()
117-
<< "ERROR: The source-function parameter index is out of "
118-
"bounds: "
119-
<< Idx << "\n";
120-
continue;
121-
}
122-
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer);
123-
}
96+
97+
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sink);
98+
}
99+
100+
if (FunDesc.HasAllSinkParam) {
101+
for (const auto &Arg : Fun->args()) {
102+
addTaintCategory(&Arg, TaintCategory::Sink);
124103
}
125104
}
126-
// handle a function's return value
127-
if (FunDesc.contains("ret")) {
128-
for (const auto &User : Fun->users()) {
129-
addTaintCategory(User, FunDesc["ret"].get<std::string>());
105+
106+
for (const auto &Idx : FunDesc.SanitizerValues) {
107+
if (Idx >= Fun->arg_size()) {
108+
llvm::errs()
109+
<< "ERROR: The sanitizer-function parameter index is out of "
110+
"bounds: "
111+
<< Idx << "\n";
112+
continue;
130113
}
114+
addTaintCategory(Fun->getArg(Idx), TaintCategory::Sanitizer);
115+
}
116+
// handle a function's return value
117+
for (const auto &User : Fun->users()) {
118+
addTaintCategory(User, FunDesc.ReturnCat);
131119
}
132120
}
133121
}
134122

135123
LLVMTaintConfig::LLVMTaintConfig(const psr::LLVMProjectIRDB &Code,
136-
const nlohmann::json &Config) {
124+
const TaintConfigData &Config) {
137125
// handle functions
138-
if (Config.contains("functions")) {
139-
addAllFunctions(Code, Config);
140-
}
126+
addAllFunctions(Code, Config);
141127

142128
// handle variables
143-
if (Config.contains("variables")) {
144-
// scope can be a function name or a struct.
145-
std::unordered_map<const llvm::Type *, const nlohmann::json>
146-
StructConfigMap;
147-
148-
// read all struct types from config
149-
for (const auto &VarDesc : Config["variables"]) {
150-
llvm::DebugInfoFinder DIF;
151-
const auto *M = Code.getModule();
152-
153-
DIF.processModule(*M);
154-
for (const auto &Ty : DIF.types()) {
155-
if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type &&
156-
Ty->getName().equals(VarDesc["scope"].get<std::string>())) {
157-
for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) {
158-
StructConfigMap.insert(
159-
std::pair<const llvm::Type *, const nlohmann::json>(
160-
LlvmStructTy, VarDesc));
161-
}
129+
// scope can be a function name or a struct.
130+
std::unordered_map<const llvm::Type *, const std::string> StructConfigMap;
131+
132+
// read all struct types from config
133+
size_t Counter = 0;
134+
for (const auto &VarDesc : Config.Variables) {
135+
llvm::DebugInfoFinder DIF;
136+
const auto *M = Code.getModule();
137+
138+
DIF.processModule(*M);
139+
for (const auto &Ty : DIF.types()) {
140+
if (Ty->getTag() == llvm::dwarf::DW_TAG_structure_type &&
141+
Ty->getName().equals(VarDesc.Scope)) {
142+
for (const auto &LlvmStructTy : M->getIdentifiedStructTypes()) {
143+
StructConfigMap.insert(
144+
std::pair<const llvm::Type *, const std::string>(LlvmStructTy,
145+
VarDesc.Name));
162146
}
163147
}
164-
DIF.reset();
165148
}
166-
167-
// add corresponding Allocas or getElementPtr instructions to the taint
168-
// category
169-
for (const auto &VarDesc : Config["variables"]) {
170-
for (const auto &Fun : Code.getAllFunctions()) {
171-
for (const auto &I : llvm::instructions(Fun)) {
172-
if (const auto *DbgDeclare =
173-
llvm::dyn_cast<llvm::DbgDeclareInst>(&I)) {
174-
const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable();
175-
// matching line number with for Allocas
176-
if (LocalVar->getName().equals(
177-
VarDesc["name"].get<std::string>()) &&
178-
LocalVar->getLine() == VarDesc["line"].get<unsigned int>()) {
179-
addTaintCategory(DbgDeclare->getAddress(),
180-
VarDesc["cat"].get<std::string>());
181-
}
182-
} else if (!StructConfigMap.empty()) {
183-
// Ignorning line numbers for getElementPtr instructions
184-
if (const auto *Gep = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
185-
const auto *StType = llvm::dyn_cast<llvm::StructType>(
186-
Gep->getPointerOperandType()->getPointerElementType());
187-
if (StType && StructConfigMap.count(StType)) {
188-
const auto VarDesc = StructConfigMap.at(StType);
189-
auto VarName = VarDesc["name"].get<std::string>();
190-
// using substr to cover the edge case in which same variable
191-
// name is present as a local variable and also as a struct
192-
// member variable. (Ex. JsonConfig/fun_member_02.cpp)
193-
if (Gep->getName().substr(0, VarName.size()).equals(VarName)) {
194-
addTaintCategory(Gep, VarDesc["cat"].get<std::string>());
195-
}
149+
DIF.reset();
150+
}
151+
// add corresponding Allocas or getElementPtr instructions to the taint
152+
// category
153+
for (const auto &VarDesc : Config.Variables) {
154+
for (const auto &Fun : Code.getAllFunctions()) {
155+
for (const auto &I : llvm::instructions(Fun)) {
156+
if (const auto *DbgDeclare = llvm::dyn_cast<llvm::DbgDeclareInst>(&I)) {
157+
const llvm::DILocalVariable *LocalVar = DbgDeclare->getVariable();
158+
// matching line number with for Allocas
159+
if (LocalVar->getName().equals(VarDesc.Name) &&
160+
LocalVar->getLine() == VarDesc.Line) {
161+
addTaintCategory(DbgDeclare->getAddress(), VarDesc.Cat);
162+
}
163+
} else if (!StructConfigMap.empty()) {
164+
// Ignorning line numbers for getElementPtr instructions
165+
if (const auto *Gep = llvm::dyn_cast<llvm::GetElementPtrInst>(&I)) {
166+
const auto *StType = llvm::dyn_cast<llvm::StructType>(
167+
Gep->getPointerOperandType()->getPointerElementType());
168+
if (StType && StructConfigMap.count(StType)) {
169+
auto VarName = StructConfigMap.at(StType);
170+
// using substr to cover the edge case in which same variable
171+
// name is present as a local variable and also as a struct
172+
// member variable. (Ex. JsonConfig/fun_member_02.cpp)
173+
if (Gep->getName().substr(0, VarName.size()).equals(VarName)) {
174+
addTaintCategory(Gep, VarDesc.Cat);
196175
}
197176
}
198177
}

0 commit comments

Comments
 (0)