From c69c3a7d4e6ffb52be04525a10906b7ee5d9cd1d Mon Sep 17 00:00:00 2001 From: Kostya Serebryany Date: Tue, 31 Mar 2015 20:13:20 +0000 Subject: [PATCH] [fuzzer] Add support for token-based fuzzing (e.g. for C++). Allow string flags. llvm-svn: 233745 --- lib/Fuzzer/FuzzerDriver.cpp | 77 +++++++++-- lib/Fuzzer/FuzzerFlags.def | 51 +++---- lib/Fuzzer/FuzzerIO.cpp | 6 + lib/Fuzzer/FuzzerInternal.h | 26 ++-- lib/Fuzzer/FuzzerLoop.cpp | 72 ++++++++-- lib/Fuzzer/FuzzerUtil.cpp | 2 +- lib/Fuzzer/cxx_fuzzer_tokens.txt | 218 ++++++++++++++++++++++++++++++ lib/Fuzzer/test/CMakeLists.txt | 1 + lib/Fuzzer/test/CxxTokensTest.cpp | 24 ++++ lib/Fuzzer/test/fuzzer.test | 3 + 10 files changed, 422 insertions(+), 58 deletions(-) create mode 100644 lib/Fuzzer/cxx_fuzzer_tokens.txt create mode 100644 lib/Fuzzer/test/CxxTokensTest.cpp diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index 80f9fb0c48f..05a699e5068 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -18,6 +18,10 @@ #include #include #include +#include +#include +#include +#include namespace fuzzer { @@ -26,19 +30,26 @@ struct FlagDescription { const char *Name; const char *Description; int Default; - int *Flag; + int *IntFlag; + const char **StrFlag; }; struct { -#define FUZZER_FLAG(Type, Name, Default, Description) Type Name; +#define FUZZER_FLAG_INT(Name, Default, Description) int Name; +#define FUZZER_FLAG_STRING(Name, Description) const char *Name; #include "FuzzerFlags.def" -#undef FUZZER_FLAG +#undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_STRING } Flags; static FlagDescription FlagDescriptions [] { -#define FUZZER_FLAG(Type, Name, Default, Description) {#Name, Description, Default, &Flags.Name}, +#define FUZZER_FLAG_INT(Name, Default, Description) \ + { #Name, Description, Default, &Flags.Name, nullptr}, +#define FUZZER_FLAG_STRING(Name, Description) \ + { #Name, Description, 0, nullptr, &Flags.Name }, #include "FuzzerFlags.def" -#undef FUZZER_FLAG +#undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_STRING }; static const size_t kNumFlags = @@ -79,11 +90,18 @@ static bool ParseOneFlag(const char *Param) { const char *Name = FlagDescriptions[F].Name; const char *Str = FlagValue(Param, Name); if (Str) { - int Val = std::stol(Str); - *FlagDescriptions[F].Flag = Val; - if (Flags.verbosity >= 2) - std::cerr << "Flag: " << Name << " " << Val << "\n"; - return true; + if (FlagDescriptions[F].IntFlag) { + int Val = std::stol(Str); + *FlagDescriptions[F].IntFlag = Val; + if (Flags.verbosity >= 2) + std::cerr << "Flag: " << Name << " " << Val << "\n"; + return true; + } else if (FlagDescriptions[F].StrFlag) { + *FlagDescriptions[F].StrFlag = Str; + if (Flags.verbosity >= 2) + std::cerr << "Flag: " << Name << " " << Str << "\n"; + return true; + } } } PrintHelp(); @@ -92,8 +110,12 @@ static bool ParseOneFlag(const char *Param) { // We don't use any library to minimize dependencies. static void ParseFlags(int argc, char **argv) { - for (size_t F = 0; F < kNumFlags; F++) - *FlagDescriptions[F].Flag = FlagDescriptions[F].Default; + for (size_t F = 0; F < kNumFlags; F++) { + if (FlagDescriptions[F].IntFlag) + *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default; + if (FlagDescriptions[F].StrFlag) + *FlagDescriptions[F].StrFlag = nullptr; + } for (int A = 1; A < argc; A++) { if (ParseOneFlag(argv[A])) continue; inputs.push_back(argv[A]); @@ -139,6 +161,26 @@ static int RunInMultipleProcesses(int argc, char **argv, int NumWorkers, return HasErrors ? 1 : 0; } +std::vector ReadTokensFile(const char *TokensFilePath) { + if (!TokensFilePath) return {}; + std::string TokensFileContents = FileToString(TokensFilePath); + std::istringstream ISS(TokensFileContents); + std::vector Res = {std::istream_iterator{ISS}, + std::istream_iterator{}}; + Res.push_back(" "); + Res.push_back("\t"); + Res.push_back("\n"); + return Res; +} + +int ApplyTokens(const Fuzzer &F, const char *InputFilePath) { + Unit U = FileToVector(InputFilePath); + auto T = F.SubstituteTokens(U); + T.push_back(0); + std::cout << T.data(); + return 0; +} + int FuzzerDriver(int argc, char **argv, UserCallback Callback) { using namespace fuzzer; @@ -164,6 +206,7 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) { Options.UseDFSan = Flags.dfsan; Options.PreferSmallDuringInitialShuffle = Flags.prefer_small_during_initial_shuffle; + Options.Tokens = ReadTokensFile(Flags.tokens); if (Flags.runs >= 0) Options.MaxNumberOfRuns = Flags.runs; if (!inputs.empty()) @@ -182,6 +225,16 @@ int FuzzerDriver(int argc, char **argv, UserCallback Callback) { if (Flags.timeout > 0) SetTimer(Flags.timeout); + if (Flags.verbosity >= 2) { + std::cerr << "Tokens: {"; + for (auto &T : Options.Tokens) + std::cerr << T << ","; + std::cerr << "}\n"; + } + + if (Flags.apply_tokens) + return ApplyTokens(F, Flags.apply_tokens); + for (auto &inp : inputs) F.ReadDir(inp); diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 684e12b11b8..dbaf75db696 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -6,43 +6,48 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// Flags. FUZZER_FLAG macro should be defined at the point of inclusion. -// We are not using any flag parsing library for better portability and -// independence. +// Flags. FUZZER_FLAG_INT/FUZZER_FLAG_STRING macros should be defined at the +// point of inclusion. We are not using any flag parsing library for better +// portability and independence. //===----------------------------------------------------------------------===// -FUZZER_FLAG(int, verbosity, 1, "Verbosity level.") -FUZZER_FLAG(int, seed, 0, "Random seed. If 0, seed is generated.") -FUZZER_FLAG(int, iterations, -1, +FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") +FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.") +FUZZER_FLAG_INT(iterations, -1, "Number of iterations of the fuzzer internal loop" " (-1 for infinite iterations).") -FUZZER_FLAG(int, runs, -1, +FUZZER_FLAG_INT(runs, -1, "Number of individual test runs (-1 for infinite runs).") -FUZZER_FLAG(int, max_len, 64, "Maximal length of the test input.") -FUZZER_FLAG(int, cross_over, 1, "If 1, cross over inputs.") -FUZZER_FLAG(int, mutate_depth, 5, +FUZZER_FLAG_INT(max_len, 64, "Maximal length of the test input.") +FUZZER_FLAG_INT(cross_over, 1, "If 1, cross over inputs.") +FUZZER_FLAG_INT(mutate_depth, 5, "Apply this number of consecutive mutations to each input.") -FUZZER_FLAG( - int, prefer_small_during_initial_shuffle, -1, +FUZZER_FLAG_INT( + prefer_small_during_initial_shuffle, -1, "If 1, always prefer smaller inputs during the initial corpus shuffle." " If 0, never do that. If -1, do it sometimes.") -FUZZER_FLAG(int, exit_on_first, 0, +FUZZER_FLAG_INT(exit_on_first, 0, "If 1, exit after the first new interesting input is found.") -FUZZER_FLAG(int, timeout, -1, "Timeout in seconds (if positive).") -FUZZER_FLAG(int, help, 0, "Print help.") -FUZZER_FLAG( - int, save_minimized_corpus, 0, +FUZZER_FLAG_INT(timeout, -1, "Timeout in seconds (if positive).") +FUZZER_FLAG_INT(help, 0, "Print help.") +FUZZER_FLAG_INT( + save_minimized_corpus, 0, "If 1, the minimized corpus is saved into the first input directory") -FUZZER_FLAG(int, use_counters, 0, "Use coverage counters") -FUZZER_FLAG(int, use_full_coverage_set, 0, +FUZZER_FLAG_INT(use_counters, 0, "Use coverage counters") +FUZZER_FLAG_INT(use_full_coverage_set, 0, "Experimental: Maximize the number of different full" " coverage sets as opposed to maximizing the total coverage." " This is potentially MUCH slower, but may discover more paths.") -FUZZER_FLAG(int, use_coverage_pairs, 0, +FUZZER_FLAG_INT(use_coverage_pairs, 0, "Experimental: Maximize the number of different coverage pairs.") -FUZZER_FLAG(int, jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" +FUZZER_FLAG_INT(jobs, 0, "Number of jobs to run. If jobs >= 1 we spawn" " this number of jobs in separate worker processes" " with stdout/stderr redirected to fuzz-JOB.log.") -FUZZER_FLAG(int, workers, 0, +FUZZER_FLAG_INT(workers, 0, "Number of simultaneous worker processes to run the jobs.") -FUZZER_FLAG(int, dfsan, 1, "Use DFSan for taint-guided mutations. No-op unless " +FUZZER_FLAG_INT(dfsan, 1, "Use DFSan for taint-guided mutations. No-op unless " "the DFSan instrumentation was compiled in.") + +FUZZER_FLAG_STRING(tokens, "Use the file with tokens (one token per line) to" + " fuzz a token based input language.") +FUZZER_FLAG_STRING(apply_tokens, "Read the given input file, substitute bytes " + " with tokens and write the result to stdout.") diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp index 224808c5e16..ef23d42ea53 100644 --- a/lib/Fuzzer/FuzzerIO.cpp +++ b/lib/Fuzzer/FuzzerIO.cpp @@ -33,6 +33,12 @@ Unit FileToVector(const std::string &Path) { std::istreambuf_iterator()); } +std::string FileToString(const std::string &Path) { + std::ifstream T(Path); + return std::string((std::istreambuf_iterator(T)), + std::istreambuf_iterator()); +} + void CopyFileToErr(const std::string &Path) { std::ifstream T(Path); std::copy(std::istreambuf_iterator(T), std::istreambuf_iterator(), diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index 66548350e97..77871097bd7 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -23,7 +23,8 @@ namespace fuzzer { typedef std::vector Unit; using namespace std::chrono; -Unit ReadFile(const char *Path); +std::string FileToString(const std::string &Path); +Unit FileToVector(const std::string &Path); void ReadDirToVectorOfUnits(const char *Path, std::vector *V); void WriteToFile(const Unit &U, const std::string &Path); void CopyFileToErr(const std::string &Path); @@ -55,12 +56,9 @@ class Fuzzer { int PreferSmallDuringInitialShuffle = -1; size_t MaxNumberOfRuns = ULONG_MAX; std::string OutputCorpus; + std::vector Tokens; }; - Fuzzer(UserCallback Callback, FuzzingOptions Options) - : Callback(Callback), Options(Options) { - SetDeathCallback(); - InitializeDFSan(); - } + Fuzzer(UserCallback Callback, FuzzingOptions Options); void AddToCorpus(const Unit &U) { Corpus.push_back(U); } size_t Loop(size_t NumIterations); void ShuffleAndMinimize(); @@ -79,22 +77,28 @@ class Fuzzer { size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } - static void AlarmCallback(); + static void StaticAlarmCallback(); + + Unit SubstituteTokens(const Unit &U) const; private: + void AlarmCallback(); + void ExecuteCallback(const Unit &U); size_t MutateAndTestOne(Unit *U); size_t RunOne(const Unit &U); size_t RunOneMaximizeTotalCoverage(const Unit &U); size_t RunOneMaximizeFullCoverageSet(const Unit &U); size_t RunOneMaximizeCoveragePairs(const Unit &U); void WriteToOutputCorpus(const Unit &U); - static void WriteToCrash(const Unit &U, const char *Prefix); + void WriteToCrash(const Unit &U, const char *Prefix); bool MutateWithDFSan(Unit *U); void PrintStats(const char *Where, size_t Cov, const char *End = "\n"); + void PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter = ""); void SetDeathCallback(); - static void DeathCallback(); - static Unit CurrentUnit; + static void StaticDeathCallback(); + void DeathCallback(); + Unit CurrentUnit; size_t TotalNumberOfRuns = 0; @@ -113,7 +117,7 @@ class Fuzzer { UserCallback Callback; FuzzingOptions Options; system_clock::time_point ProcessStartTime = system_clock::now(); - static system_clock::time_point UnitStartTime; + system_clock::time_point UnitStartTime; long TimeOfLongestUnitInSeconds = 0; }; diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 7d152f19eaf..9dfe30b6d79 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -16,21 +16,49 @@ namespace fuzzer { -// static -Unit Fuzzer::CurrentUnit; -system_clock::time_point Fuzzer::UnitStartTime; +// Only one Fuzzer per process. +static Fuzzer *F; + +Fuzzer::Fuzzer(UserCallback Callback, FuzzingOptions Options) + : Callback(Callback), Options(Options) { + SetDeathCallback(); + InitializeDFSan(); + assert(!F); + F = this; +} void Fuzzer::SetDeathCallback() { - __sanitizer_set_death_callback(DeathCallback); + __sanitizer_set_death_callback(StaticDeathCallback); +} + +void Fuzzer::PrintUnitInASCIIOrTokens(const Unit &U, const char *PrintAfter) { + if (Options.Tokens.empty()) { + PrintASCII(U, PrintAfter); + } else { + auto T = SubstituteTokens(U); + T.push_back(0); + std::cerr << T.data(); + std::cerr << PrintAfter; + } +} + +void Fuzzer::StaticDeathCallback() { + assert(F); + F->DeathCallback(); } void Fuzzer::DeathCallback() { std::cerr << "DEATH: " << std::endl; Print(CurrentUnit, "\n"); - PrintASCII(CurrentUnit, "\n"); + PrintUnitInASCIIOrTokens(CurrentUnit, "\n"); WriteToCrash(CurrentUnit, "crash-"); } +void Fuzzer::StaticAlarmCallback() { + assert(F); + F->AlarmCallback(); +} + void Fuzzer::AlarmCallback() { size_t Seconds = duration_cast(system_clock::now() - UnitStartTime).count(); @@ -38,7 +66,7 @@ void Fuzzer::AlarmCallback() { << std::endl; if (Seconds >= 3) { Print(CurrentUnit, "\n"); - PrintASCII(CurrentUnit, "\n"); + PrintUnitInASCIIOrTokens(CurrentUnit, "\n"); WriteToCrash(CurrentUnit, "timeout-"); } exit(1); @@ -123,12 +151,35 @@ static uintptr_t HashOfArrayOfPCs(uintptr_t *PCs, uintptr_t NumPCs) { return Res; } +Unit Fuzzer::SubstituteTokens(const Unit &U) const { + Unit Res; + for (auto Idx : U) { + if (Idx < Options.Tokens.size()) { + std::string Token = Options.Tokens[Idx]; + Res.insert(Res.end(), Token.begin(), Token.end()); + } else { + Res.push_back(' '); + } + } + // FIXME: Apply DFSan labels. + return Res; +} + +void Fuzzer::ExecuteCallback(const Unit &U) { + if (Options.Tokens.empty()) { + Callback(U.data(), U.size()); + } else { + auto T = SubstituteTokens(U); + Callback(T.data(), T.size()); + } +} + // Experimental. Does not yet scale. // Fuly reset the current coverage state, run a single unit, // collect all coverage pairs and return non-zero if a new pair is observed. size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) { __sanitizer_reset_coverage(); - Callback(U.data(), U.size()); + ExecuteCallback(U); uintptr_t *PCs; uintptr_t NumPCs = __sanitizer_get_coverage_guards(&PCs); bool HasNewPairs = false; @@ -153,7 +204,7 @@ size_t Fuzzer::RunOneMaximizeCoveragePairs(const Unit &U) { // e.g. test/FullCoverageSetTest.cpp. FIXME: make it scale. size_t Fuzzer::RunOneMaximizeFullCoverageSet(const Unit &U) { __sanitizer_reset_coverage(); - Callback(U.data(), U.size()); + ExecuteCallback(U); uintptr_t *PCs; uintptr_t NumPCs =__sanitizer_get_coverage_guards(&PCs); if (FullCoverageSets.insert(HashOfArrayOfPCs(PCs, NumPCs)).second) @@ -168,7 +219,7 @@ size_t Fuzzer::RunOneMaximizeTotalCoverage(const Unit &U) { __sanitizer_update_counter_bitset_and_clear_counters(0); } size_t OldCoverage = __sanitizer_get_total_unique_coverage(); - Callback(U.data(), U.size()); + ExecuteCallback(U); size_t NewCoverage = __sanitizer_get_total_unique_coverage(); size_t NumNewBits = 0; if (Options.UseCounters) @@ -222,8 +273,7 @@ size_t Fuzzer::MutateAndTestOne(Unit *U) { std::cerr << " L: " << U->size(); if (U->size() < 30) { std::cerr << " "; - PrintASCII(*U); - std::cerr << "\t"; + PrintUnitInASCIIOrTokens(*U, "\t"); Print(*U); } std::cerr << "\n"; diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp index 679f289a1c3..3f62a1f1d1e 100644 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ b/lib/Fuzzer/FuzzerUtil.cpp @@ -43,7 +43,7 @@ std::string Hash(const Unit &in) { } static void AlarmHandler(int, siginfo_t *, void *) { - Fuzzer::AlarmCallback(); + Fuzzer::StaticAlarmCallback(); } void SetTimer(int Seconds) { diff --git a/lib/Fuzzer/cxx_fuzzer_tokens.txt b/lib/Fuzzer/cxx_fuzzer_tokens.txt new file mode 100644 index 00000000000..f3c4f80e146 --- /dev/null +++ b/lib/Fuzzer/cxx_fuzzer_tokens.txt @@ -0,0 +1,218 @@ +# +## +` +~ +! +@ +$ +% +^ +& +* +( +) +_ +- +_ += ++ +{ +} +[ +] +| +\ +, +. +/ +? +> +< +; +: +' +" +++ +-- +<< +>> ++= +-= +*= +/= +>>= +<<= +&= +|= +^= +%= +!= +&& +|| +== +>= +<= +-> +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +alignas +alignof +and +and_eq +asm +auto +bitand +bitor +bool +break +case +catch +char +char16_t +char32_t +class +compl +concept +const +constexpr +const_cast +continue +decltype +default +delete +do +double +dynamic_cast +else +enum +explicit +export +extern +false +float +for +friend +goto +if +inline +int +long +mutable +namespace +new +noexcept +not +not_eq +nullptr +operator +or +or_eq +private +protected +public +register +reinterpret_cast +requires +return +short +signed +sizeof +static +static_assert +static_cast +struct +switch +template +this +thread_local +throw +true +try +typedef +typeid +typename +union +unsigned +using +virtual +void +volatile +wchar_t +while +xor +xor_eq +if +elif +else +endif +defined +ifdef +ifndef +define +undef +include +line +error +pragma +override +final diff --git a/lib/Fuzzer/test/CMakeLists.txt b/lib/Fuzzer/test/CMakeLists.txt index 8b886a04ff9..fb3bf20ef80 100644 --- a/lib/Fuzzer/test/CMakeLists.txt +++ b/lib/Fuzzer/test/CMakeLists.txt @@ -6,6 +6,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${LIBFUZZER_FLAGS_BASE} -O0 -fsanitize-coverage=4") set(Tests CounterTest + CxxTokensTest FourIndependentBranchesTest FullCoverageSetTest InfiniteTest diff --git a/lib/Fuzzer/test/CxxTokensTest.cpp b/lib/Fuzzer/test/CxxTokensTest.cpp new file mode 100644 index 00000000000..1addccb4bf9 --- /dev/null +++ b/lib/Fuzzer/test/CxxTokensTest.cpp @@ -0,0 +1,24 @@ +// Simple test for a fuzzer. The fuzzer must find a sequence of C++ tokens. +#include +#include +#include +#include +#include + +static void Found() { + std::cout << "Found the target, exiting\n"; + exit(1); +} + +extern "C" void TestOneInput(const uint8_t *Data, size_t Size) { + // looking for "thread_local unsigned A;" + if (Size < 24) return; + if (0 == memcmp(&Data[0], "thread_local", 12)) + if (Data[12] == ' ') + if (0 == memcmp(&Data[13], "unsigned", 8)) + if (Data[21] == ' ') + if (Data[22] == 'A') + if (Data[23] == ';') + Found(); +} + diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test index 19fafe37935..2a0e95fbea2 100644 --- a/lib/Fuzzer/test/fuzzer.test +++ b/lib/Fuzzer/test/fuzzer.test @@ -23,3 +23,6 @@ CounterTest: BINGO RUN: not ./LLVMFuzzer-DFSanSimpleCmpTest -seed=1 -timeout=15 2>&1 | FileCheck %s --check-prefix=DFSanSimpleCmpTest DFSanSimpleCmpTest: Found the target: + +RUN: not ./LLVMFuzzer-CxxTokensTest -seed=1 -timeout=15 -tokens=%S/../cxx_fuzzer_tokens.txt 2>&1 | FileCheck %s --check-prefix=CxxTokensTest +CxxTokensTest: Found the target, exiting