Skip to content

Commit

Permalink
Add response outputs to profile export
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewkotila committed Feb 28, 2024
1 parent 820539b commit 8601412
Show file tree
Hide file tree
Showing 15 changed files with 212 additions and 99 deletions.
6 changes: 5 additions & 1 deletion src/c++/library/common.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -530,6 +530,10 @@ class InferResult {
/// \return Error object indicating the success or failure.
virtual Error IsNullResponse(bool* is_null_response) const = 0;

/// Get output for this response.
/// \return Error object indicating the success or failure.
virtual Error Output(std::string& output) const = 0;

/// Get the result data as a vector of strings. The vector will
/// receive a copy of result data. An error will be generated if
/// the datatype of output is not 'BYTES'.
Expand Down
22 changes: 21 additions & 1 deletion src/c++/library/grpc_client.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -206,6 +206,7 @@ class InferResultGrpc : public InferResult {
size_t* byte_size) const override;
Error IsFinalResponse(bool* is_final_response) const override;
Error IsNullResponse(bool* is_null_response) const override;
Error Output(std::string& output) const override;
Error StringData(
const std::string& output_name,
std::vector<std::string>* string_result) const override;
Expand Down Expand Up @@ -349,6 +350,25 @@ InferResultGrpc::IsNullResponse(bool* is_null_response) const
return Error::Success;
}

Error
InferResultGrpc::Output(std::string& output) const
{
// Only supports LLM outputs with name 'text_output' currently
if (output_name_to_buffer_map_.find("text_output") ==
output_name_to_buffer_map_.end()) {
return Error::Success;
}

const uint8_t* buf{nullptr};
size_t byte_size{0};
Error e{RawData("text_output", &buf, &byte_size)};
if (e.IsOk() == false) {
return e;
}
output.assign(reinterpret_cast<const char*>(buf), byte_size);
return Error::Success;
}

Error
InferResultGrpc::StringData(
const std::string& output_name,
Expand Down
22 changes: 21 additions & 1 deletion src/c++/library/http_client.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -758,6 +758,7 @@ class InferResultHttp : public InferResult {
size_t* byte_size) const override;
Error IsFinalResponse(bool* is_final_response) const override;
Error IsNullResponse(bool* is_null_response) const override;
Error Output(std::string& output) const override;
Error StringData(
const std::string& output_name,
std::vector<std::string>* string_result) const override;
Expand Down Expand Up @@ -980,6 +981,25 @@ InferResultHttp::IsNullResponse(bool* is_null_response) const
return Error::Success;
}

Error
InferResultHttp::Output(std::string& output) const
{
// Only supports LLM outputs with name 'text_output' currently
if (output_name_to_buffer_map_.find("text_output") ==
output_name_to_buffer_map_.end()) {
return Error::Success;
}

const uint8_t* buf{nullptr};
size_t byte_size{0};
Error e{RawData("text_output", &buf, &byte_size)};
if (e.IsOk() == false) {
return e;
}
output.assign(reinterpret_cast<const char*>(buf), byte_size);
return Error::Success;
}

Error
InferResultHttp::StringData(
const std::string& output_name,
Expand Down
9 changes: 8 additions & 1 deletion src/c++/perf_analyzer/client_backend/client_backend.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -642,6 +642,13 @@ class InferResult {
{
return Error("InferResult::IsNullResponse() not implemented");
};

/// Get output for this response.
/// \return Error object indicating the success or failure.
virtual Error Output(std::string& output) const
{
return Error("InferResult::Output() not implemented");
}
};

}}} // namespace triton::perfanalyzer::clientbackend
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -842,6 +842,13 @@ TritonInferResult::IsNullResponse(bool* is_null_response) const
return Error::Success;
}

Error
TritonInferResult::Output(std::string& output) const
{
RETURN_IF_TRITON_ERROR(result_->Output(output));
return Error::Success;
}

//==============================================================================

}}}} // namespace triton::perfanalyzer::clientbackend::tritonremote
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -333,6 +333,8 @@ class TritonInferResult : public InferResult {
Error IsFinalResponse(bool* is_final_response) const override;
/// See InferResult::IsNullResponse()
Error IsNullResponse(bool* is_null_response) const override;
/// See InferResult::Output()
Error Output(std::string& output) const override;

private:
std::unique_ptr<tc::InferResult> result_;
Expand Down
19 changes: 13 additions & 6 deletions src/c++/perf_analyzer/infer_context.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -149,8 +149,10 @@ InferContext::SendRequest(
&results, *(infer_data_.options_), infer_data_.valid_inputs_,
infer_data_.outputs_);
thread_stat_->idle_timer.Stop();
std::string output{""};
if (results != nullptr) {
if (thread_stat_->status_.IsOk()) {
results->Output(output);
thread_stat_->status_ = ValidateOutputs(results);
}
delete results;
Expand All @@ -167,7 +169,7 @@ InferContext::SendRequest(
std::lock_guard<std::mutex> lock(thread_stat_->mu_);
auto total = end_time_sync - start_time_sync;
thread_stat_->request_records_.emplace_back(RequestRecord(
start_time_sync, std::move(end_time_syncs),
start_time_sync, std::move(end_time_syncs), {output},
infer_data_.options_->sequence_end_, delayed, sequence_id, false));
thread_stat_->status_ =
infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
Expand Down Expand Up @@ -258,7 +260,11 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
if (thread_stat_->cb_status_.IsOk() == false) {
return;
}
it->second.response_times_.push_back(std::chrono::system_clock::now());
it->second.response_timestamps_.push_back(
std::chrono::system_clock::now());
std::string response_output{""};
result->Output(response_output);
it->second.response_outputs_.push_back(response_output);
num_responses_++;
if (is_null_response == true) {
it->second.has_null_last_response_ = true;
Expand All @@ -271,9 +277,10 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result)
if (is_final_response) {
has_received_final_response_ = is_final_response;
thread_stat_->request_records_.emplace_back(
it->second.start_time_, it->second.response_times_,
it->second.sequence_end_, it->second.delayed_,
it->second.sequence_id_, it->second.has_null_last_response_);
it->second.start_time_, it->second.response_timestamps_,
it->second.response_outputs_, it->second.sequence_end_,
it->second.delayed_, it->second.sequence_id_,
it->second.has_null_last_response_);
infer_backend_->ClientInferStat(&(thread_stat_->contexts_stat_[id_]));
thread_stat_->cb_status_ = ValidateOutputs(result);
async_req_map_.erase(request_id);
Expand Down
13 changes: 7 additions & 6 deletions src/c++/perf_analyzer/inference_profiler.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -1282,11 +1282,12 @@ InferenceProfiler::ValidLatencyMeasurement(
uint64_t request_end_ns;

if (request_record.has_null_last_response_ == false) {
request_end_ns = CHRONO_TO_NANOS(request_record.response_times_.back());
} else if (request_record.response_times_.size() > 1) {
size_t last_response_idx{request_record.response_times_.size() - 2};
request_end_ns =
CHRONO_TO_NANOS(request_record.response_times_[last_response_idx]);
CHRONO_TO_NANOS(request_record.response_timestamps_.back());
} else if (request_record.response_timestamps_.size() > 1) {
size_t last_response_idx{request_record.response_timestamps_.size() - 2};
request_end_ns = CHRONO_TO_NANOS(
request_record.response_timestamps_[last_response_idx]);
} else {
erase_indices.push_back(i);
continue;
Expand All @@ -1297,7 +1298,7 @@ InferenceProfiler::ValidLatencyMeasurement(
if ((request_end_ns >= valid_range.first) &&
(request_end_ns <= valid_range.second)) {
valid_latencies->push_back(request_end_ns - request_start_ns);
response_count += request_record.response_times_.size();
response_count += request_record.response_timestamps_.size();
if (request_record.has_null_last_response_) {
response_count--;
}
Expand Down
40 changes: 29 additions & 11 deletions src/c++/perf_analyzer/profile_data_exporter.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -122,25 +122,43 @@ ProfileDataExporter::AddRequests(
request.AddMember("sequence_id", sequence_id, document_.GetAllocator());
}

rapidjson::Value responses(rapidjson::kArrayType);
AddResponses(responses, raw_request.response_times_);
rapidjson::Value response_timestamps(rapidjson::kArrayType);
AddResponseTimestamps(
response_timestamps, raw_request.response_timestamps_);
request.AddMember(
"response_timestamps", responses, document_.GetAllocator());
"response_timestamps", response_timestamps, document_.GetAllocator());

rapidjson::Value response_outputs(rapidjson::kArrayType);
AddResponseOutputs(response_outputs, raw_request.response_outputs_);
request.AddMember(
"response_outputs", response_outputs, document_.GetAllocator());

requests.PushBack(request, document_.GetAllocator());
}
entry.AddMember("requests", requests, document_.GetAllocator());
}

void
ProfileDataExporter::AddResponses(
rapidjson::Value& responses,
ProfileDataExporter::AddResponseTimestamps(
rapidjson::Value& timestamps_json,
const std::vector<std::chrono::time_point<std::chrono::system_clock>>&
response_times)
timestamps)
{
for (auto& timestamp : timestamps) {
rapidjson::Value timestamp_json;
timestamp_json.SetUint64(timestamp.time_since_epoch().count());
timestamps_json.PushBack(timestamp_json, document_.GetAllocator());
}
}

void
ProfileDataExporter::AddResponseOutputs(
rapidjson::Value& outputs_json, const std::vector<std::string>& outputs)
{
for (auto& response : response_times) {
rapidjson::Value time;
time.SetUint64(response.time_since_epoch().count());
responses.PushBack(time, document_.GetAllocator());
for (auto& output : outputs) {
rapidjson::Value output_json;
output_json.SetString(output.c_str(), output.size());
outputs_json.PushBack(output_json, document_.GetAllocator());
}
}

Expand Down
10 changes: 6 additions & 4 deletions src/c++/perf_analyzer/profile_data_exporter.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -69,10 +69,12 @@ class ProfileDataExporter {
void AddRequests(
rapidjson::Value& entry, rapidjson::Value& requests,
const Experiment& raw_experiment);
void AddResponses(
rapidjson::Value& responses,
void AddResponseTimestamps(
rapidjson::Value& timestamps_json,
const std::vector<std::chrono::time_point<std::chrono::system_clock>>&
response_times);
timestamps);
void AddResponseOutputs(
rapidjson::Value& outputs_json, const std::vector<std::string>& outputs);
void AddWindowBoundaries(
rapidjson::Value& entry, rapidjson::Value& window_boundaries,
const Experiment& raw_experiment);
Expand Down
25 changes: 14 additions & 11 deletions src/c++/perf_analyzer/request_record.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -34,24 +34,27 @@ namespace triton { namespace perfanalyzer {

/// A record of an individual request
struct RequestRecord {
RequestRecord() = default;
RequestRecord(
std::chrono::time_point<std::chrono::system_clock> start_time,
std::chrono::time_point<std::chrono::system_clock> start_time =
std::chrono::time_point<std::chrono::system_clock>(),
std::vector<std::chrono::time_point<std::chrono::system_clock>>
response_times,
bool sequence_end, bool delayed, uint64_t sequence_id,
bool has_null_last_response)
: start_time_(start_time), response_times_(response_times),
sequence_end_(sequence_end), delayed_(delayed),
sequence_id_(sequence_id),
response_timestamps = {},
std::vector<std::string> response_outputs = {}, bool sequence_end = true,
bool delayed = false, uint64_t sequence_id = 0,
bool has_null_last_response = false)
: start_time_(start_time), response_timestamps_(response_timestamps),
response_outputs_(response_outputs), sequence_end_(sequence_end),
delayed_(delayed), sequence_id_(sequence_id),
has_null_last_response_(has_null_last_response)
{
}
// The timestamp of when the request was started.
std::chrono::time_point<std::chrono::system_clock> start_time_;
// Collection of response times
// Collection of response timestamps
std::vector<std::chrono::time_point<std::chrono::system_clock>>
response_times_;
response_timestamps_;
// Collection of response outputs
std::vector<std::string> response_outputs_;
// Whether or not the request is at the end of a sequence.
bool sequence_end_;
// Whether or not the request is delayed as per schedule.
Expand Down
Loading

0 comments on commit 8601412

Please sign in to comment.