Skip to content

Commit

Permalink
Merge pull request #257 from Tom94/perf
Browse files Browse the repository at this point in the history
Various performance improvements
  • Loading branch information
Tom94 authored Feb 24, 2025
2 parents 5e742b1 + 2a1bdb1 commit 8a89453
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 121 deletions.
3 changes: 3 additions & 0 deletions dependencies/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ if (TEV_USE_LIBHEIF)

add_library(lcms2 STATIC ${LCMS_SRCS} ${LCMS_HDRS})
set_target_properties(lcms2 PROPERTIES PUBLIC_HEADER "${LCMS_HDRS}")

# We don't use the SSE2 components of CMS in tev; disable to simplify ARM compilation
target_compile_definitions(lcms2 PRIVATE -DCMS_DONT_USE_SSE2=1)
target_include_directories(lcms2 PUBLIC
"${CMAKE_CURRENT_SOURCE_DIR}/Little-CMS/include"
# "${CMAKE_CURRENT_SOURCE_DIR}/Little-CMS/plugins/fast_float/include"
Expand Down
58 changes: 37 additions & 21 deletions include/tev/Channel.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,45 +25,41 @@ class Channel {

static nanogui::Color color(std::string fullChannel);

Channel(const std::string& name, const nanogui::Vector2i& size);
Channel(
const std::string& name,
const nanogui::Vector2i& size,
std::shared_ptr<std::vector<float>> data = nullptr,
size_t dataOffset = 0,
size_t dataStride = 1
);

const std::string& name() const { return mName; }

const std::vector<float>& data() const { return mData; }

float eval(size_t index) const {
if (index >= mData.size()) {
return 0;
}

return mData[index];
}

float eval(nanogui::Vector2i index) const {
if (index.x() < 0 || index.x() >= mSize.x() || index.y() < 0 || index.y() >= mSize.y()) {
return 0;
}

return mData[index.x() + index.y() * (size_t)mSize.x()];
return at(index.x() + (size_t)index.y() * (size_t)mSize.x());
}

float& at(size_t index) { return mData[index]; }

float at(size_t index) const { return mData[index]; }

float& at(nanogui::Vector2i index) { return at(index.x() + index.y() * (size_t)mSize.x()); }

float at(nanogui::Vector2i index) const { return at(index.x() + index.y() * (size_t)mSize.x()); }

size_t numPixels() const { return mData.size(); }
size_t numPixels() const { return (size_t)mSize.x() * mSize.y(); }

const nanogui::Vector2i& size() const { return mSize; }

std::tuple<float, float, float> minMaxMean() const {
float min = std::numeric_limits<float>::infinity();
float max = -std::numeric_limits<float>::infinity();
float mean = 0;
for (float f : mData) {

const size_t nPixels = numPixels();
for (size_t i = 0; i < nPixels; ++i) {
const float f = at(i);

mean += f;
if (f < min) {
min = f;
Expand All @@ -74,21 +70,41 @@ class Channel {
}
}

return {min, max, mean / numPixels()};
return {min, max, mean / nPixels};
}

Task<void> divideByAsync(const Channel& other, int priority);

Task<void> multiplyWithAsync(const Channel& other, int priority);

void setZero() { memset(mData.data(), 0, mData.size() * sizeof(float)); }
void setZero() {
if (mDataStride == 1) {
memset(data(), 0, numPixels() * sizeof(float));
} else {
const size_t nPixels = numPixels();
for (size_t i = 0; i < nPixels; ++i) {
at(i) = 0.0f;
}
}
}

void updateTile(int x, int y, int width, int height, const std::vector<float>& newData);

float& at(size_t index) { return data()[index * mDataStride]; }

float at(size_t index) const { return data()[index * mDataStride]; }

float* data() const { return mData->data() + mDataOffset; }

size_t offset() const { return mDataOffset; }
size_t stride() const { return mDataStride; }

private:
std::string mName;
nanogui::Vector2i mSize;
std::vector<float> mData;
std::shared_ptr<std::vector<float>> mData;
size_t mDataOffset;
size_t mDataStride;
};

} // namespace tev
2 changes: 1 addition & 1 deletion include/tev/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@

#define TEV_ASSERT(cond, description, ...) \
if (UNLIKELY(!(cond))) \
throw std::runtime_error{fmt::format(description, ##__VA_ARGS__)};
throw std::runtime_error{fmt::format(description, ##__VA_ARGS__)}

#ifndef TEV_VERSION
# define TEV_VERSION "undefined"
Expand Down
10 changes: 10 additions & 0 deletions include/tev/Image.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,16 @@ class Image {
bool hasChannel(const std::string& channelName) const { return mData.hasChannel(channelName); }

const Channel* channel(const std::string& channelName) const { return mData.channel(channelName); }
std::vector<const Channel*> channels(const std::vector<std::string>& channelNames) const {
std::vector<const Channel*> result;
for (const auto& channelName : channelNames) {
result.push_back(channel(channelName));
}

return result;
}

bool isInterleavedRgba(const std::vector<std::string>& channelNames) const;

nanogui::Texture* texture(const std::string& channelGroupName);
nanogui::Texture* texture(const std::vector<std::string>& channelNames);
Expand Down
15 changes: 12 additions & 3 deletions src/Channel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <tev/Channel.h>
#include <tev/ThreadPool.h>

#include <numeric>
#include <memory>

using namespace nanogui;
using namespace std;
Expand Down Expand Up @@ -42,8 +42,17 @@ Color Channel::color(string channel) {
return Color(1.0f, 1.0f);
}

Channel::Channel(const std::string& name, const nanogui::Vector2i& size) : mName{name}, mSize{size} {
mData.resize((size_t)mSize.x() * mSize.y());
Channel::Channel(const string& name, const nanogui::Vector2i& size, shared_ptr<vector<float>> data, size_t dataOffset, size_t dataStride) :
mName{name}, mSize{size} {
if (data) {
mData = data;
mDataOffset = dataOffset;
mDataStride = dataStride;
} else {
mData = make_shared<vector<float>>((size_t)size.x() * size.y());
mDataOffset = 0;
mDataStride = 1;
}
}

Task<void> Channel::divideByAsync(const Channel& other, int priority) {
Expand Down
62 changes: 43 additions & 19 deletions src/Image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,26 @@ string Image::shortName() const {
return result;
}

bool Image::isInterleavedRgba(const vector<string>& channelNames) const {
const float* interleavedData = nullptr;
for (size_t i = 0; i < 4; ++i) {
const auto* chan = channel(channelNames[i]);
if (!chan) {
break;
}

if (i == 0) {
interleavedData = chan->data();
}

if (interleavedData != chan->data() - i || chan->stride() != 4) {
return false;
}
}

return interleavedData;
}

Texture* Image::texture(const string& channelGroupName) { return texture(channelsInGroup(channelGroupName)); }

Texture* Image::texture(const vector<string>& channelNames) {
Expand Down Expand Up @@ -264,33 +284,37 @@ Texture* Image::texture(const vector<string>& channelNames) {
);
auto& texture = mTextures.at(lookup).nanoguiTexture;

auto numPixels = this->numPixels();
vector<float> data(numPixels * 4);
// Check if channel layout is already interleaved. If yes, can directly copy onto GPU!
if (isInterleavedRgba(channelNames)) {
texture->upload((uint8_t*)channel(channelNames[0])->data());
} else {
auto numPixels = this->numPixels();
vector<float> data = vector<float>(numPixels * 4);

vector<Task<void>> tasks;
for (size_t i = 0; i < 4; ++i) {
float defaultVal = i == 3 ? 1 : 0;
if (i < channelNames.size()) {
const auto* chan = channel(channelNames[i]);
if (!chan) {
tasks.emplace_back(ThreadPool::global().parallelForAsync<size_t>(
0, numPixels, [&data, defaultVal, i](size_t j) { data[j * 4 + i] = defaultVal; }, std::numeric_limits<int>::max()
));
vector<Task<void>> tasks;
for (size_t i = 0; i < 4; ++i) {
float defaultVal = i == 3 ? 1 : 0;
if (i < channelNames.size()) {
const auto* chan = channel(channelNames[i]);
if (!chan) {
tasks.emplace_back(ThreadPool::global().parallelForAsync<size_t>(
0, numPixels, [&data, defaultVal, i](size_t j) { data[j * 4 + i] = defaultVal; }, std::numeric_limits<int>::max()
));
} else {
tasks.emplace_back(ThreadPool::global().parallelForAsync<size_t>(
0, numPixels, [chan, &data, i](size_t j) { data[j * 4 + i] = chan->at(j); }, std::numeric_limits<int>::max()
));
}
} else {
const auto& channelData = chan->data();
tasks.emplace_back(ThreadPool::global().parallelForAsync<size_t>(
0, numPixels, [&channelData, &data, i](size_t j) { data[j * 4 + i] = channelData[j]; }, std::numeric_limits<int>::max()
0, numPixels, [&data, defaultVal, i](size_t j) { data[j * 4 + i] = defaultVal; }, std::numeric_limits<int>::max()
));
}
} else {
tasks.emplace_back(ThreadPool::global().parallelForAsync<size_t>(
0, numPixels, [&data, defaultVal, i](size_t j) { data[j * 4 + i] = defaultVal; }, std::numeric_limits<int>::max()
));
}
waitAll(tasks);
texture->upload((uint8_t*)data.data());
}
waitAll(tasks);

texture->upload((uint8_t*)data.data());
texture->generate_mipmap();
return texture.get();
}
Expand Down
Loading

0 comments on commit 8a89453

Please sign in to comment.