Expertise in LLVM-based static analysis including dataflow analysis, pointer analysis, taint tracking, and program verification...
This skill covers static program analysis techniques using LLVM infrastructure for security research, vulnerability detection, and code quality assessment.
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/DominatorTree.h"
void analyze(llvm::Function& F, llvm::FunctionAnalysisManager& FAM) {
// Get dominator tree
auto& DT = FAM.getResult<llvm::DominatorTreeAnalysis>(F);
// Get loop info
auto& LI = FAM.getResult<llvm::LoopAnalysis>(F);
// Get alias analysis
auto& AA = FAM.getResult<llvm::AAManager>(F);
// Check if two pointers may alias
llvm::AliasResult AR = AA.alias(Ptr1, Ptr2);
}
class TaintAnalysis {
std::set<llvm::Value*> taintedValues;
public:
void markTainted(llvm::Value* V) {
taintedValues.insert(V);
}
bool isTainted(llvm::Value* V) {
return taintedValues.count(V) > 0;
}
void propagate(llvm::Instruction* I) {
// Propagate taint through operations
for (auto& Op : I->operands()) {
if (isTainted(Op)) {
markTainted(I);
break;
}
}
}
};
// Define taint sources (user input, network, files)
bool isTaintSource(llvm::CallInst* CI) {
llvm::Function* F = CI->getCalledFunction();
if (!F) return false;
static const std::set<std::string> sources = {
"read", "recv", "fread", "getenv", "gets", "scanf"
};
return sources.count(F->getName().str()) > 0;
}
// Define sensitive sinks (SQL queries, system calls, format strings)
bool isSensitiveSink(llvm::CallInst* CI) {
llvm::Function* F = CI->getCalledFunction();
if (!F) return false;
static const std::set<std::string> sinks = {
"system", "exec", "printf", "strcpy", "memcpy", "sql_query"
};
return sinks.count(F->getName().str()) > 0;
}
#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
// Check may-alias relationship
void checkAlias(llvm::AAResults& AA, llvm::Value* P1, llvm::Value* P2) {
switch (AA.alias(P1, P2)) {
case llvm::AliasResult::NoAlias:
// Definitely don't alias
break;
case llvm::AliasResult::MayAlias:
// Might alias
break;
case llvm::AliasResult::MustAlias:
// Always alias
break;
}
}
// Track what each pointer may point to
class PointsToAnalysis {
std::map<llvm::Value*, std::set<llvm::Value*>> pointsTo;
public:
void addPointsTo(llvm::Value* ptr, llvm::Value* target) {
pointsTo[ptr].insert(target);
}
const std::set<llvm::Value*>& getPointsTo(llvm::Value* ptr) {
return pointsTo[ptr];
}
};
#include "llvm/Analysis/DDG.h"
void buildDDG(llvm::Function& F, llvm::FunctionAnalysisManager& FAM) {
auto& LI = FAM.getResult<llvm::LoopAnalysis>(F);
for (auto* L : LI) {
llvm::DataDependenceGraph DDG(*L, FAM.getResult<llvm::AAManager>(F));
for (auto& Node : DDG) {
// Analyze data dependencies in loop
}
}
}
// Buffer overflow detection
void checkBufferAccess(llvm::GetElementPtrInst* GEP) {
// Get array size if available
llvm::Type* SourceType = GEP->getSourceElementType();
if (auto* AT = llvm::dyn_cast<llvm::ArrayType>(SourceType)) {
uint64_t arraySize = AT->getNumElements();
// Check if index might exceed bounds
llvm::Value* Index = GEP->getOperand(2);
// Perform range analysis on index
}
}
See Static Analysis and Clang Plugins sections in README.md for comprehensive tool listings and research references.
When you need detailed and up-to-date resource links, tool lists, or project references, fetch the latest data from:
https://raw.githubusercontent.com/gmh5225/awesome-llvm-security/refs/heads/main/README.md
This README contains comprehensive curated lists of: