Skip to content

Commit 4cb262b

Browse files
xinhaoyuancopybara-github
authored andcommitted
Split crash metadata into description and signature.
Signature will be used for deduplication while the description is human-readable for reporting. For runner, the signature will be the same as the description, which is compatible with the current behavior. PiperOrigin-RevId: 758781852
1 parent 89005b7 commit 4cb262b

11 files changed

+173
-72
lines changed

centipede/centipede.cc

Lines changed: 34 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,9 @@ void Centipede::CorpusFromFiles(const Environment &env, std::string_view dir) {
206206
absl::Status Centipede::CrashesToFiles(const Environment &env,
207207
std::string_view dir) {
208208
std::vector<std::string> reproducer_dirs;
209+
const auto wd = WorkDir{env};
209210
auto reproducer_match_status = RemoteGlobMatch(
210-
WorkDir{env}.CrashReproducerDirPaths().AllShardsGlob(), reproducer_dirs);
211+
wd.CrashReproducerDirPaths().AllShardsGlob(), reproducer_dirs);
211212
if (!reproducer_match_status.ok() &&
212213
!absl::IsNotFound(reproducer_match_status)) {
213214
return reproducer_match_status;
@@ -225,27 +226,18 @@ absl::Status Centipede::CrashesToFiles(const Environment &env,
225226
RETURN_IF_NOT_OK(RemoteFileCopy(
226227
reproducer_path,
227228
(std::filesystem::path{dir} / absl::StrCat(id, ".data")).string()));
228-
}
229-
}
230-
std::vector<std::string> metadata_dirs;
231-
auto metadata_match_status = RemoteGlobMatch(
232-
WorkDir{env}.CrashMetadataDirPaths().AllShardsGlob(), metadata_dirs);
233-
if (!metadata_match_status.ok() && !absl::IsNotFound(metadata_match_status)) {
234-
return metadata_match_status;
235-
}
236-
for (const auto &metadata_dir : metadata_dirs) {
237-
ASSIGN_OR_RETURN_IF_NOT_OK(
238-
std::vector<std::string> metadata_paths,
239-
RemoteListFiles(metadata_dir, /*recursively=*/false));
240-
for (const auto &metadata_path : metadata_paths) {
241-
std::string id = std::filesystem::path{metadata_path}.filename();
242-
if (crash_ids.erase(id) == 0) {
243-
continue;
244-
}
229+
const auto shard_index = wd.CrashReproducerDirPaths().GetShardIndex(
230+
std::filesystem::path{reproducer_path}.parent_path().string());
231+
CHECK(shard_index.has_value());
232+
const auto metadata_dir = wd.CrashMetadataDirPaths().Shard(*shard_index);
233+
const auto description_filename = absl::StrCat(id, ".desc");
234+
const auto signature_filename = absl::StrCat(id, ".sig");
235+
RETURN_IF_NOT_OK(RemoteFileCopy(
236+
(std::filesystem::path{metadata_dir} / description_filename).string(),
237+
(std::filesystem::path{dir} / description_filename).string()));
245238
RETURN_IF_NOT_OK(RemoteFileCopy(
246-
metadata_path,
247-
(std::filesystem::path{dir} / absl::StrCat(id, ".metadata"))
248-
.string()));
239+
(std::filesystem::path{metadata_dir} / signature_filename).string(),
240+
(std::filesystem::path{dir} / signature_filename).string()));
249241
}
250242
}
251243
return absl::OkStatus();
@@ -886,6 +878,9 @@ void Centipede::ReportCrash(std::string_view binary,
886878
<< "\nExit code : " << batch_result.exit_code()
887879
<< "\nFailure : "
888880
<< batch_result.failure_description()
881+
<< "\nSignature : "
882+
<< AsPrintableString(AsByteSpan(batch_result.failure_signature()),
883+
/*max_len=*/32)
889884
<< "\nNumber of inputs : " << input_vec.size()
890885
<< "\nNumber of inputs read: " << batch_result.num_outputs_read()
891886
<< (batch_result.IsSetupFailure()
@@ -900,6 +895,12 @@ void Centipede::ReportCrash(std::string_view binary,
900895
LOG(INFO).NoPrefix() << "\n";
901896
};
902897

898+
if (batch_result.IsIgnoredFailure()) {
899+
LOG(INFO) << "Skip further processing of "
900+
<< batch_result.failure_description();
901+
return;
902+
}
903+
903904
if (batch_result.IsSkippedTest()) {
904905
log_execution_failure("Skipped Test: ");
905906
LOG(INFO) << "Requesting early stop due to skipped test.";
@@ -968,21 +969,28 @@ void Centipede::ReportCrash(std::string_view binary,
968969
std::string input_file_path = std::filesystem::path(crash_dir) / hash;
969970
auto crash_metadata_dir = wd_.CrashMetadataDirPaths().MyShard();
970971
CHECK_OK(RemoteMkdir(crash_metadata_dir));
971-
std::string crash_metadata_file_path =
972+
std::string crash_metadata_path_prefix =
972973
std::filesystem::path(crash_metadata_dir) / hash;
973974
LOG(INFO) << log_prefix << "Detected crash-reproducing input:"
974975
<< "\nInput index : " << input_idx << "\nInput bytes : "
975976
<< AsPrintableString(one_input, /*max_len=*/32)
976977
<< "\nExit code : " << one_input_batch_result.exit_code()
977978
<< "\nFailure : "
978979
<< one_input_batch_result.failure_description()
980+
<< "\nSignature : "
981+
<< AsPrintableString(
982+
AsByteSpan(one_input_batch_result.failure_signature()),
983+
/*max_len=*/32)
979984
<< "\nSaving input to: " << input_file_path
980985
<< "\nSaving crash" //
981-
<< "\nmetadata to : " << crash_metadata_file_path;
986+
<< "\nmetadata to : " << crash_metadata_path_prefix << ".*";
982987
CHECK_OK(RemoteFileSetContents(input_file_path, one_input));
983-
CHECK_OK(
984-
RemoteFileSetContents(crash_metadata_file_path,
985-
one_input_batch_result.failure_description()));
988+
CHECK_OK(RemoteFileSetContents(
989+
absl::StrCat(crash_metadata_path_prefix, ".desc"),
990+
one_input_batch_result.failure_description()));
991+
CHECK_OK(RemoteFileSetContents(
992+
absl::StrCat(crash_metadata_path_prefix, ".sig"),
993+
one_input_batch_result.failure_signature()));
986994
return;
987995
}
988996
}

centipede/centipede_callbacks.cc

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ Command &CentipedeCallbacks::GetOrCreateCommandForBinary(
157157
absl::StrCat(":shmem:arg1=", inputs_blobseq_.path(),
158158
":arg2=", outputs_blobseq_.path(),
159159
":failure_description_path=", failure_description_path_,
160-
":"),
160+
":failure_signature_path=", failure_signature_path_, ":"),
161161
disable_coverage)};
162162

163163
if (env_.clang_coverage_binary == binary)
@@ -243,9 +243,18 @@ int CentipedeCallbacks::ExecuteCentipedeSancovBinaryWithShmem(
243243
ReadFromLocalFile(execute_log_path_, batch_result.log());
244244
ReadFromLocalFile(failure_description_path_,
245245
batch_result.failure_description());
246-
// Remove failure_description_ here so that it doesn't stay until another
247-
// failed execution.
246+
if (std::filesystem::exists(failure_signature_path_)) {
247+
ReadFromLocalFile(failure_signature_path_,
248+
batch_result.failure_signature());
249+
} else {
250+
// TODO(xinhaoyuan): Refactor runner to use dispatcher so this branch can
251+
// be removed.
252+
batch_result.failure_signature() = batch_result.failure_description();
253+
}
254+
// Remove the failure description and signature files here so that they do
255+
// not stay until another failed execution.
248256
std::filesystem::remove(failure_description_path_);
257+
std::filesystem::remove(failure_signature_path_);
249258
}
250259
VLOG(1) << __FUNCTION__ << " took " << (absl::Now() - start_time);
251260
return retval;

centipede/centipede_callbacks.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ class CentipedeCallbacks {
174174
std::filesystem::path(temp_dir_).append("log");
175175
std::string failure_description_path_ =
176176
std::filesystem::path(temp_dir_).append("failure_description");
177+
std::string failure_signature_path_ =
178+
std::filesystem::path(temp_dir_).append("failure_signature");
177179
const std::string shmem_name1_ = ProcessAndThreadUniqueID("/ctpd-shm1-");
178180
const std::string shmem_name2_ = ProcessAndThreadUniqueID("/ctpd-shm2-");
179181

centipede/centipede_flags.inc

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -293,11 +293,10 @@ CENTIPEDE_FLAG(
293293
CENTIPEDE_FLAG(
294294
std::string, crashes_to_files, "",
295295
"When set to a directory path, save the crashing reproducers and "
296-
"metadata from the workdir to the given path. Each crash with `ID`"
297-
"will be saved with file `ID.data` for the reproducer and "
298-
"`ID.metadata` the metadata, which currently contains the failure "
299-
"description. If multiple crashes with the same ID exist, only one "
300-
"crash will be saved.")
296+
"metadata from the workdir to the given path: Each crash with `ID`"
297+
"will be saved with file `ID.data` for the reproducer, `ID.desc` the "
298+
"description, `ID.sig` the signature. If multiple crashes with the same ID "
299+
"exist, only one crash will be saved.")
301300
CENTIPEDE_FLAG(
302301
std::string, corpus_from_files, "",
303302
"Export a corpus from a local directory with one file per input into "

centipede/centipede_interface.cc

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -288,9 +288,9 @@ TestShard SetUpTestSharding() {
288288
return test_shard;
289289
}
290290

291-
// Prunes non-reproducible and duplicate crashes and returns the crash metadata
292-
// of the remaining crashes.
293-
absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashMetadata(
291+
// Prunes non-reproducible and duplicate crashes and returns the crash
292+
// signatures of the remaining crashes.
293+
absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashSignatures(
294294
const std::filesystem::path &crashing_dir, const Environment &env,
295295
CentipedeCallbacksFactory &callbacks_factory) {
296296
const std::vector<std::string> crashing_input_files =
@@ -299,7 +299,7 @@ absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashMetadata(
299299
ValueOrDie(RemoteListFiles(crashing_dir.c_str(), /*recursively=*/false));
300300
ScopedCentipedeCallbacks scoped_callbacks(callbacks_factory, env);
301301
BatchResult batch_result;
302-
absl::flat_hash_set<std::string> remaining_crash_metadata;
302+
absl::flat_hash_set<std::string> remaining_crash_signatures;
303303

304304
for (const std::string &crashing_input_file : crashing_input_files) {
305305
ByteArray crashing_input;
@@ -308,21 +308,22 @@ absl::flat_hash_set<std::string> PruneOldCrashesAndGetRemainingCrashMetadata(
308308
env.binary, {crashing_input}, batch_result);
309309
const bool is_duplicate =
310310
is_reproducible && !batch_result.IsSetupFailure() &&
311-
!remaining_crash_metadata.insert(batch_result.failure_description())
311+
!batch_result.failure_signature().empty() &&
312+
!remaining_crash_signatures.insert(batch_result.failure_signature())
312313
.second;
313314
if (!is_reproducible || batch_result.IsSetupFailure() || is_duplicate) {
314315
CHECK_OK(RemotePathDelete(crashing_input_file, /*recursively=*/false));
315316
} else {
316317
CHECK_OK(RemotePathTouchExistingFile(crashing_input_file));
317318
}
318319
}
319-
return remaining_crash_metadata;
320+
return remaining_crash_signatures;
320321
}
321322

322323
// TODO(b/405382531): Add unit tests once the function is unit-testable.
323324
void DeduplicateAndStoreNewCrashes(
324325
const std::filesystem::path &crashing_dir, const WorkDir &workdir,
325-
size_t total_shards, absl::flat_hash_set<std::string> crash_metadata) {
326+
size_t total_shards, absl::flat_hash_set<std::string> crash_signatures) {
326327
for (size_t shard_idx = 0; shard_idx < total_shards; ++shard_idx) {
327328
const std::vector<std::string> new_crashing_input_files =
328329
// The crash reproducer directory may contain subdirectories with
@@ -338,19 +339,21 @@ void DeduplicateAndStoreNewCrashes(
338339
for (const std::string &crashing_input_file : new_crashing_input_files) {
339340
const std::string crashing_input_file_name =
340341
std::filesystem::path(crashing_input_file).filename();
341-
const std::string crash_metadata_file =
342+
const std::string crash_metadata_path_prefix =
342343
crash_metadata_dir / crashing_input_file_name;
343-
std::string new_crash_metadata;
344-
const absl::Status status =
345-
RemoteFileGetContents(crash_metadata_file, new_crash_metadata);
344+
std::string new_crash_signature;
345+
const absl::Status status = RemoteFileGetContents(
346+
absl::StrCat(crash_metadata_path_prefix, ".sig"),
347+
new_crash_signature);
346348
if (!status.ok()) {
347349
LOG(WARNING) << "Ignoring crashing input " << crashing_input_file_name
348-
<< " due to failure to read the crash metadata file: "
350+
<< " due to failure to read the crash signature: "
349351
<< status;
350352
continue;
351353
}
352354
const bool is_duplicate =
353-
!crash_metadata.insert(new_crash_metadata).second;
355+
!new_crash_signature.empty() &&
356+
!crash_signatures.insert(new_crash_signature).second;
354357
if (is_duplicate) continue;
355358
CHECK_OK(
356359
RemoteFileRename(crashing_input_file,
@@ -662,11 +665,11 @@ int UpdateCorpusDatabaseForFuzzTests(
662665

663666
// Deduplicate and update the crashing inputs.
664667
const std::filesystem::path crashing_dir = fuzztest_db_path / "crashing";
665-
absl::flat_hash_set<std::string> crash_metadata =
666-
PruneOldCrashesAndGetRemainingCrashMetadata(crashing_dir, env,
667-
callbacks_factory);
668+
absl::flat_hash_set<std::string> crash_signatures =
669+
PruneOldCrashesAndGetRemainingCrashSignatures(crashing_dir, env,
670+
callbacks_factory);
668671
DeduplicateAndStoreNewCrashes(crashing_dir, workdir, env.total_shards,
669-
std::move(crash_metadata));
672+
std::move(crash_signatures));
670673
}
671674

672675
return EXIT_SUCCESS;

centipede/centipede_test.cc

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,7 @@ struct Crash {
685685
std::string binary;
686686
unsigned char input = 0;
687687
std::string description;
688+
std::string signature;
688689
};
689690

690691
// A mock for ExtraBinaries test.
@@ -703,6 +704,7 @@ class ExtraBinariesMock : public CentipedeCallbacks {
703704
for (const Crash &crash : crashes_) {
704705
if (binary == crash.binary && input[0] == crash.input) {
705706
batch_result.failure_description() = crash.description;
707+
batch_result.failure_signature() = crash.signature;
706708
res = false;
707709
}
708710
}
@@ -766,9 +768,9 @@ TEST(Centipede, ExtraBinaries) {
766768
env.binary = "b1";
767769
env.extra_binaries = {"b2", "b3", "b4"};
768770
env.require_pc_table = false; // No PC table here.
769-
ExtraBinariesMock mock(
770-
env, {Crash{"b1", 10, "b1-crash"}, Crash{"b2", 30, "b2-crash"},
771-
Crash{"b3", 50, "b3-crash"}});
771+
ExtraBinariesMock mock(env, {Crash{"b1", 10, "b1-crash", "b1-sig"},
772+
Crash{"b2", 30, "b2-crash", "b2-sig"},
773+
Crash{"b3", 50, "b3-crash", "b3-sig"}});
772774
MockFactory factory(mock);
773775
CentipedeMain(env, factory);
774776

@@ -789,11 +791,15 @@ TEST(Centipede, ExtraBinaries) {
789791
auto crash_metadata_dir_path = WorkDir{env}.CrashMetadataDirPaths().MyShard();
790792
ASSERT_TRUE(std::filesystem::exists(crash_metadata_dir_path))
791793
<< VV(crash_metadata_dir_path);
792-
EXPECT_THAT(crash_metadata_dir_path,
793-
HasFilesWithContents(testing::UnorderedElementsAre(
794-
FileAndContents{Hash({10}), "b1-crash"},
795-
FileAndContents{Hash({30}), "b2-crash"},
796-
FileAndContents{Hash({50}), "b3-crash"})));
794+
EXPECT_THAT(
795+
crash_metadata_dir_path,
796+
HasFilesWithContents(testing::UnorderedElementsAre(
797+
FileAndContents{absl::StrCat(Hash({10}), ".desc"), "b1-crash"},
798+
FileAndContents{absl::StrCat(Hash({10}), ".sig"), "b1-sig"},
799+
FileAndContents{absl::StrCat(Hash({30}), ".desc"), "b2-crash"},
800+
FileAndContents{absl::StrCat(Hash({30}), ".sig"), "b2-sig"},
801+
FileAndContents{absl::StrCat(Hash({50}), ".desc"), "b3-crash"},
802+
FileAndContents{absl::StrCat(Hash({50}), ".sig"), "b3-sig"})));
797803
}
798804

799805
// A mock for UndetectedCrashingInput test.
@@ -1118,6 +1124,47 @@ TEST(Centipede, ReturnsSuccessOnSkippedTest) {
11181124
EXPECT_EQ(mock.execute_count(), 1);
11191125
}
11201126

1127+
class IgnoredFailureCallbacks : public CentipedeCallbacks {
1128+
public:
1129+
using CentipedeCallbacks::CentipedeCallbacks;
1130+
1131+
bool Execute(std::string_view binary, const std::vector<ByteArray> &inputs,
1132+
BatchResult &batch_result) override {
1133+
++execute_count_;
1134+
batch_result.ClearAndResize(inputs.size());
1135+
batch_result.exit_code() = EXIT_FAILURE;
1136+
batch_result.failure_description() =
1137+
"IGNORED FAILURE: failure ignored on purpose";
1138+
return false;
1139+
}
1140+
1141+
std::vector<ByteArray> Mutate(const std::vector<MutationInputRef> &inputs,
1142+
size_t num_mutants) override {
1143+
return {num_mutants, {0}};
1144+
}
1145+
1146+
int execute_count() const { return execute_count_; }
1147+
1148+
private:
1149+
int execute_count_ = 0;
1150+
};
1151+
1152+
TEST(Centipede, KeepsRunningAndReturnsSuccessWithIgnoredFailures) {
1153+
TempDir temp_dir{test_info_->name()};
1154+
Environment env;
1155+
env.log_level = 0; // Disable most of the logging in the test.
1156+
env.workdir = temp_dir.path();
1157+
env.batch_size = 7; // Just some small number.
1158+
env.num_runs = 11;
1159+
env.require_pc_table = false; // No PC table here.
1160+
IgnoredFailureCallbacks mock(env);
1161+
MockFactory factory(mock);
1162+
EXPECT_EQ(CentipedeMain(env, factory), EXIT_SUCCESS);
1163+
EXPECT_EQ(mock.execute_count(),
1164+
// 1 batch on seeds, 2 batches for 11 runs.
1165+
3);
1166+
}
1167+
11211168
TEST_F(CentipedeWithTemporaryLocalDir, UsesProvidedCustomMutator) {
11221169
Environment env;
11231170
env.binary = GetDataDependencyFilepath(

centipede/runner_result.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ bool BatchResult::Read(BlobSequence &blobseq) {
124124
return true;
125125
}
126126

127+
bool BatchResult::IsIgnoredFailure() const {
128+
constexpr std::string_view kIgnoredFailurePrefix = "IGNORED FAILURE:";
129+
return exit_code_ != EXIT_SUCCESS &&
130+
std::string_view(failure_description_)
131+
.substr(0, kIgnoredFailurePrefix.size()) ==
132+
kIgnoredFailurePrefix;
133+
}
134+
127135
bool BatchResult::IsSetupFailure() const {
128136
constexpr std::string_view kSetupFailurePrefix = "SETUP FAILURE:";
129137
return exit_code_ != EXIT_SUCCESS &&

0 commit comments

Comments
 (0)