From d433af5f44587da5afbb854948b6268fadee43eb Mon Sep 17 00:00:00 2001 From: sakoda Date: Mon, 1 Feb 2021 11:53:12 +0000 Subject: [PATCH 01/87] =?UTF-8?q?vscode=E7=94=A8=E3=81=ABignore=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index ee40bb36..7907eb72 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .idea/ -*.pyc \ No newline at end of file +*.pyc +build/ +.vscode/ \ No newline at end of file From aff9d90f0018e12341160604a05110f47e714514 Mon Sep 17 00:00:00 2001 From: sakoda Date: Mon, 1 Feb 2021 13:54:02 +0000 Subject: [PATCH 02/87] =?UTF-8?q?Scalar=E3=83=A2=E3=83=87=E3=83=AB?= =?UTF-8?q?=E3=81=8C=E5=8B=95=E3=81=8F=E3=82=88=E3=81=86=E3=81=AB=E3=81=AA?= =?UTF-8?q?=E3=81=A3=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 89 ++++++++++ src/CMakeLists.txt | 15 +- src/game_generator.cpp | 2 +- src/game_generator.hpp | 8 +- src/infer_model.cpp | 56 ++++++ src/infer_model.hpp | 18 ++ src/learn.cpp | 28 ++- src/learn.hpp | 8 +- src/learning_model.cpp | 141 +++++++++++++++ src/learning_model.hpp | 25 +++ src/neural_network.cpp | 233 +------------------------ src/neural_network.hpp | 54 +----- src/neural_network_modules.cpp | 74 -------- src/neural_network_modules.hpp | 55 ------ src/reinforcement_learn.cpp | 13 +- src/search_options.hpp | 2 +- src/searcher_for_play.cpp | 8 +- src/searcher_for_play.hpp | 3 +- src/shogi/interface.cpp | 2 - src/shogi/test.cpp | 38 ++-- src/shogi/test.hpp | 1 - 21 files changed, 386 insertions(+), 487 deletions(-) create mode 100755 scripts/generate_torch_script_model.py create mode 100644 src/infer_model.cpp create mode 100644 src/infer_model.hpp create mode 100644 src/learning_model.cpp create mode 100644 src/learning_model.hpp delete mode 100644 src/neural_network_modules.cpp delete mode 100644 src/neural_network_modules.hpp diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py new file mode 100755 index 00000000..def6957f --- /dev/null +++ b/scripts/generate_torch_script_model.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.jit + +INPUT_CHANNEL_NUM = 42 +BOARD_SIZE = 9 +BLOCK_NUM = 10 +CHANNEL_NUM = 128 +KERNEL_SIZE = 3 +REDUCTION = 8 +POLICY_CHANNEL_NUM = 27 +VALUE_HIDDEN_NUM = 256 + +class Conv2DwithBatchNorm(nn.Module): + def __init__(self, input_ch, output_ch, kernel_size): + super(Conv2DwithBatchNorm, self).__init__() + self.conv_ = nn.Conv2d(input_ch, output_ch, kernel_size, padding=kernel_size // 2) + self.norm_ = nn.BatchNorm2d(output_ch) + + def forward(self, x): + t = self.conv_.forward(x) + t = self.norm_.forward(t) + return t + +class ResidualBlock(nn.Module): + def __init__(self, channel_num, kernel_size, reduction): + super(ResidualBlock, self).__init__() + self.conv_and_norm0_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) + self.conv_and_norm1_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) + self.linear0_ = nn.Linear(channel_num, channel_num // reduction, bias=False) + self.linear1_ = nn.Linear(channel_num // reduction, channel_num, bias=False) + + def forward(self, x): + t = x + t = self.conv_and_norm0_.forward(t) + t = F.relu(t) + t = self.conv_and_norm1_.forward(t) + + y = F.avg_pool2d(t, [t.shape[2], t.shape[3]]) + y = y.view([-1, t.shape[1]]) + y = self.linear0_.forward(y) + y = F.relu(y) + y = self.linear1_.forward(y) + y = torch.sigmoid(y) + y = y.view([-1, t.shape[1], 1, 1]) + t = t * y + + t = F.relu(x + t) + return t + + +class NeuralNetwork(nn.Module): + def __init__(self): + super(NeuralNetwork, self).__init__() + + self.first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, CHANNEL_NUM, 3) + self.blocks = nn.Sequential() + for i in range(BLOCK_NUM): + self.blocks.add_module(f"block{i}", ResidualBlock(CHANNEL_NUM, KERNEL_SIZE, REDUCTION)) + self.policy_conv_ = nn.Conv2d(CHANNEL_NUM, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) + self.value_conv_and_norm_ = Conv2DwithBatchNorm(CHANNEL_NUM, CHANNEL_NUM, 1) + self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * CHANNEL_NUM, VALUE_HIDDEN_NUM) + self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, 1) + + def forward(self, x): + x = self.first_conv_and_norm_.forward(x) + x = F.relu(x) + + x = self.blocks.forward(x) + policy = self.policy_conv_.forward(x) + + value = self.value_conv_and_norm_.forward(x) + value = F.relu(value) + value = value.view([-1, BOARD_SIZE * BOARD_SIZE * CHANNEL_NUM]) + value = self.value_linear0_.forward(value) + value = F.relu(value) + value = self.value_linear1_.forward(value) + value = torch.sigmoid(value) + + return policy, value + + +model = NeuralNetwork() +input_data = torch.randn([8, INPUT_CHANNEL_NUM, BOARD_SIZE, BOARD_SIZE]) +script_model = torch.jit.trace(model, input_data) +# script_model = torch.jit.script(model) +script_model.save(f"./sca_bl{BLOCK_NUM}_ch{CHANNEL_NUM}.model") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 730a5945..dc11ff30 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -3,7 +3,7 @@ project(Miacis) #共通する設定 set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_FLAGS "-march=native") +set(CMAKE_CXX_FLAGS "-march=native -I/root/trtorch/include/") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") list(APPEND CMAKE_PREFIX_PATH ../../libtorch-1.7.0) @@ -14,7 +14,6 @@ set(SRCS hash_table.cpp replay_buffer.cpp game_generator.cpp neural_network.cpp - neural_network_modules.cpp learn.cpp supervised_learn.cpp reinforcement_learn.cpp @@ -24,7 +23,9 @@ set(SRCS hash_table.cpp hyperparameter_loader.cpp segment_tree.cpp common.cpp - timer.cpp + timer.cpp + learning_model.cpp + infer_model.cpp ) #将棋用のバイナリをコンパイルするのに必要なソース @@ -65,8 +66,8 @@ if (MSVC) target_link_libraries(Miacis_othello_scalar "${TORCH_LIBRARIES}") target_link_libraries(Miacis_othello_categorical "${TORCH_LIBRARIES}") else() - target_link_libraries(Miacis_shogi_scalar pthread stdc++fs "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_shogi_categorical pthread stdc++fs "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_scalar pthread stdc++fs "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_categorical pthread stdc++fs "${TORCH_LIBRARIES}") + target_link_libraries(Miacis_shogi_scalar pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) + target_link_libraries(Miacis_shogi_categorical pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) + target_link_libraries(Miacis_othello_scalar pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) + target_link_libraries(Miacis_othello_categorical pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) endif() \ No newline at end of file diff --git a/src/game_generator.cpp b/src/game_generator.cpp index c6a2e238..0c235cd4 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -78,7 +78,7 @@ void GameGenerator::evalWithGPU(int64_t thread_id) { gpu_mutex.lock(); torch::NoGradGuard no_grad_guard; std::pair, std::vector> result = - neural_network_->policyAndValueBatch(gpu_queues_[thread_id].inputs); + neural_network_.policyAndValueBatch(gpu_queues_[thread_id].inputs); gpu_mutex.unlock(); const std::vector& policies = result.first; const std::vector& values = result.second; diff --git a/src/game_generator.hpp b/src/game_generator.hpp index 9b900118..b4d157ac 100644 --- a/src/game_generator.hpp +++ b/src/game_generator.hpp @@ -6,6 +6,7 @@ #include "search_options.hpp" #include "searcher.hpp" #include "searcher_for_mate.hpp" +#include "infer_model.hpp" #include #include #include @@ -16,11 +17,10 @@ class GameGenerator { public: GameGenerator(const SearchOptions& search_options, int64_t worker_num, float Q_dist_lambda, int64_t noise_mode, - float noise_epsilon, float noise_alpha, ReplayBuffer& rb, NeuralNetwork nn) + float noise_epsilon, float noise_alpha, ReplayBuffer& rb, InferModel& nn) : stop_signal(false), search_options_(search_options), worker_num_(worker_num), Q_dist_lambda_(Q_dist_lambda), noise_mode_(noise_mode), noise_epsilon_(noise_epsilon), noise_alpha_(noise_alpha), replay_buffer_(rb), - neural_network_(std::move(nn)), gpu_queues_(search_options_.thread_num_per_gpu) { - neural_network_->eval(); + neural_network_(nn), gpu_queues_(search_options_.thread_num_per_gpu) { assert(0 <= noise_mode_ && noise_mode_ < NOISE_MODE_SIZE); }; @@ -70,7 +70,7 @@ class GameGenerator { ReplayBuffer& replay_buffer_; //局面評価に用いるネットワーク - NeuralNetwork neural_network_; + InferModel& neural_network_; //評価要求を受け付けるQueue std::vector gpu_queues_; diff --git a/src/infer_model.cpp b/src/infer_model.cpp new file mode 100644 index 00000000..6d786206 --- /dev/null +++ b/src/infer_model.cpp @@ -0,0 +1,56 @@ +#include "infer_model.hpp" +#include "common.hpp" +#include "include_switch.hpp" +#include +#include + +void InferModel::load(const std::string& model_path, int64_t gpu_id) { + module_ = torch::jit::load(model_path); + device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); + module_.to(torch::kCUDA, torch::kHalf); + module_.eval(); + + torch::Tensor in_min = torch::randn({ 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, torch::kCUDA).to(torch::kHalf); + torch::Tensor in_opt = torch::randn({ 128, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, torch::kCUDA).to(torch::kHalf); + torch::Tensor in_max = torch::randn({ 256, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, torch::kCUDA).to(torch::kHalf); + + //trtorch + trtorch::CompileSpec::InputRange range(in_min.sizes(), in_opt.sizes(), in_max.sizes()); + trtorch::CompileSpec info({ range }); + info.op_precision = torch::kHalf; + module_ = trtorch::CompileGraph(module_, info); +} + +std::pair, std::vector> InferModel::policyAndValueBatch(const std::vector& inputs) { + torch::Tensor x = torch::tensor(inputs).to(device_).to(torch::kHalf); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + auto out = module_.forward({ x }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + uint64_t batch_size = inputs.size() / (SQUARE_NUM * INPUT_CHANNEL_NUM); + + std::vector policies(batch_size); + std::vector values(batch_size); + + //CPUに持ってくる + policy = policy.cpu(); + torch::Half* p = policy.data_ptr(); + for (uint64_t i = 0; i < batch_size; i++) { + policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); + } + +#ifdef USE_CATEGORICAL + torch::Tensor value = torch::softmax(y.second, 1).cpu(); + torch::Half* value_p = value.data_ptr(); + for (uint64_t i = 0; i < batch_size; i++) { + std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); + } +#else + //CPUに持ってくる + value = value.cpu(); + std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); +#endif + return std::make_pair(policies, values); +} \ No newline at end of file diff --git a/src/infer_model.hpp b/src/infer_model.hpp new file mode 100644 index 00000000..a33b7ad5 --- /dev/null +++ b/src/infer_model.hpp @@ -0,0 +1,18 @@ +#ifndef INFER_MODEL_HPP +#define INFER_MODEL_HPP + +#include "neural_network.hpp" +#include + +class InferModel { +public: + InferModel() : device_(torch::kCPU) {} + void load(const std::string& model_path, int64_t gpu_id); + std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); + +private: + torch::jit::Module module_; + torch::Device device_; +}; + +#endif \ No newline at end of file diff --git a/src/learn.cpp b/src/learn.cpp index 33cb8ef7..8e6c3947 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -6,7 +6,7 @@ #include #include -std::array validation(NeuralNetwork nn, const std::vector& valid_data, uint64_t batch_size) { +std::array validation(LearningModel& model, const std::vector& valid_data, uint64_t batch_size) { torch::NoGradGuard no_grad_guard; std::array losses{}; for (uint64_t index = 0; index < valid_data.size();) { @@ -15,7 +15,7 @@ std::array validation(NeuralNetwork nn, const std::vector< curr_data.push_back(valid_data[index++]); } - std::array loss = nn->validLoss(curr_data); + std::array loss = model.validLoss(curr_data); for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { losses[i] += loss[i].sum().item(); } @@ -59,12 +59,6 @@ std::vector loadData(const std::string& file_path, bool data_augme return data_buffer; } -void initParams() { - NeuralNetwork nn; - torch::save(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - std::cout << "初期化したパラメータを" << NeuralNetworkImpl::DEFAULT_MODEL_NAME << "に出力" << std::endl; -} - LearnManager::LearnManager(const std::string& learn_name) { assert(learn_name == "supervised" || learn_name == "reinforcement"); HyperparameterLoader settings(learn_name + "_learn_settings.txt"); @@ -82,7 +76,8 @@ LearnManager::LearnManager(const std::string& learn_name) { torch::optim::SGDOptions sgd_option(learn_rate_); sgd_option.momentum(settings.get("momentum")); sgd_option.weight_decay(settings.get("weight_decay")); - optimizer_ = std::make_unique(neural_network->parameters(), sgd_option); + std::vector parameters; + optimizer_ = std::make_unique(neural_network.parameters(), sgd_option); //学習推移のログファイル train_log_.open(learn_name + "_train_log.txt"); @@ -94,11 +89,10 @@ LearnManager::LearnManager(const std::string& learn_name) { } //評価関数読み込み - torch::load(neural_network, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - neural_network->setGPU(0); + neural_network.load(DEFAULT_MODEL_NAME, 0); //学習前のパラメータを出力 - torch::save(neural_network, NeuralNetworkImpl::MODEL_PREFIX + "_before_learn.model"); + neural_network.save(MODEL_PREFIX + "_before_learn.model"); //パラメータの保存間隔 save_interval_ = settings.get("save_interval"); @@ -130,7 +124,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d //学習 optimizer_->zero_grad(); - std::array loss = neural_network->loss(curr_data); + std::array loss = neural_network.loss(curr_data); torch::Tensor loss_sum = torch::zeros({ batch_size }); for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { loss_sum += coefficients_[i] * loss[i].cpu(); @@ -174,7 +168,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d optimizer_->zero_grad(); //再計算 - loss = neural_network->loss(curr_data); + loss = neural_network.loss(curr_data); loss_sum = torch::zeros({ batch_size }); for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { loss_sum += coefficients_[i] * loss[i].cpu(); @@ -210,9 +204,9 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d if (stem_num % validation_interval_ == 0) { //validation_lossを計算 - neural_network->eval(); + neural_network.eval(); std::array valid_loss = validation(neural_network, valid_data_, batch_size); - neural_network->train(); + neural_network.train(); float sum_loss = 0; for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { sum_loss += coefficients_[i] * valid_loss[i]; @@ -228,7 +222,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d //パラメータをステップ付きで保存 if (stem_num % save_interval_ == 0) { - torch::save(neural_network, NeuralNetworkImpl::MODEL_PREFIX + "_" + std::to_string(stem_num) + ".model"); + neural_network.save(MODEL_PREFIX + "_" + std::to_string(stem_num) + ".model"); } //学習率の変化はoptimizer_->defaults();を使えそうな気がする diff --git a/src/learn.hpp b/src/learn.hpp index fc8dca05..6ac9f788 100644 --- a/src/learn.hpp +++ b/src/learn.hpp @@ -2,6 +2,7 @@ #define MIACIS_LEARN_HPP #include "neural_network.hpp" +#include "learning_model.hpp" #include "timer.hpp" //標準出力とファイルストリームに同時に出力するためのクラス @@ -54,7 +55,7 @@ class LearnManager { torch::Tensor learnOneStep(const std::vector& curr_data, int64_t stem_num); //学習するモデル。強化学習時に定期的な同期を挟むためにpublicに置く - NeuralNetwork neural_network; + LearningModel neural_network; private: //Optimizer @@ -104,10 +105,7 @@ class LearnManager { std::vector loadData(const std::string& file_path, bool data_augmentation, float rate_threshold); //validationを行う関数 -std::array validation(NeuralNetwork nn, const std::vector& valid_data, uint64_t batch_size); - -//パラメータを初期化 -void initParams(); +std::array validation(LearningModel& model, const std::vector& valid_data, uint64_t batch_size); //棋譜からの教師あり学習 void supervisedLearn(); diff --git a/src/learning_model.cpp b/src/learning_model.cpp new file mode 100644 index 00000000..cabd3982 --- /dev/null +++ b/src/learning_model.cpp @@ -0,0 +1,141 @@ +#include "learning_model.hpp" +#include "common.hpp" +#include "include_switch.hpp" +#include + +void LearningModel::load(const std::string& model_path, int64_t gpu_id) { + module_ = torch::jit::load(model_path); + device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); + module_.to(torch::kCUDA); +} + +void LearningModel::save(const std::string& model_path) { + module_.save(model_path); +} + +torch::Tensor LearningModel::encode(const std::vector& inputs) const { + torch::Tensor x = torch::tensor(inputs).to(device_); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + return x; +} + +std::array LearningModel::loss(const std::vector& data) { + static Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + value_teachers.push_back(data[i].value); + } + + torch::Tensor input_tensor = encode(inputs); + auto out = module_.forward({ input_tensor }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + torch::Tensor policy_logits = policy.view({ -1, POLICY_DIM }); + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(policy_logits, 1), 1, false); + +#ifdef USE_CATEGORICAL + torch::Tensor categorical_target = torch::tensor(value_teachers).to(device_); + torch::Tensor value_loss = torch::nll_loss(torch::log_softmax(y.second, 1), categorical_target); +#else + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + value = value.view(-1); +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif +#endif + + return { policy_loss, value_loss }; +} + +std::array LearningModel::validLoss(const std::vector& data) { +#ifdef USE_CATEGORICAL + Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + if (data[i].value != 0 && data[i].value != BIN_SIZE - 1) { + std::cerr << "Categoricalの検証データは現状のところValueが-1 or 1でないといけない" << std::endl; + std::exit(1); + } + value_teachers.push_back(data[i].value == 0 ? MIN_SCORE : MAX_SCORE); + } + + std::pair y = forward(inputs); + torch::Tensor logits = y.first.view({ -1, POLICY_DIM }); + + torch::Tensor policy_target = + (fp16_ ? torch::tensor(policy_teachers).to(device_, torch::kHalf) : torch::tensor(policy_teachers).to(device_)) + .view({ -1, POLICY_DIM }); + + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); + + //Valueの分布を取得 + torch::Tensor value_cat = torch::softmax(y.second, 1); + + //i番目の要素が示す値はMIN_SCORE + (i + 0.5) * VALUE_WIDTH + std::vector each_value; + for (int64_t i = 0; i < BIN_SIZE; i++) { + each_value.emplace_back(MIN_SCORE + (i + 0.5) * VALUE_WIDTH); + } + torch::Tensor each_value_tensor = torch::tensor(each_value).to(device_); + + //Categorical分布と内積を取ることで期待値を求める + torch::Tensor value = (each_value_tensor * value_cat).sum(1); + + torch::Tensor value_t = + (fp16_ ? torch::tensor(value_teachers).to(device_, torch::kHalf) : torch::tensor(value_teachers).to(device_)); + +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif + + return { policy_loss, value_loss }; +#else + //Scalarモデルの場合はloss関数と同じ + return loss(data); +#endif +} + +std::vector LearningModel::parameters() { + std::vector parameters; + for (auto p : module_.parameters()) { + parameters.push_back(p); + } + return parameters; +} diff --git a/src/learning_model.hpp b/src/learning_model.hpp new file mode 100644 index 00000000..e7cfafc8 --- /dev/null +++ b/src/learning_model.hpp @@ -0,0 +1,25 @@ +#ifndef LEARNING_MODEL_HPP +#define LEARNING_MODEL_HPP + +#include "neural_network.hpp" +#include + +class LearningModel { +public: + LearningModel() : device_(torch::kCPU) {} + void load(const std::string& model_path, int64_t gpu_id); + void save(const std::string& model_path); + torch::Tensor encode(const std::vector& inputs) const; + std::array loss(const std::vector& data); + std::array validLoss(const std::vector& data); + std::vector parameters(); + + void train() { module_.train() ;} + void eval() { module_.eval(); } + +private: + torch::jit::Module module_; + torch::Device device_; +}; + +#endif \ No newline at end of file diff --git a/src/neural_network.cpp b/src/neural_network.cpp index 7e372e2e..ef212df4 100644 --- a/src/neural_network.cpp +++ b/src/neural_network.cpp @@ -15,236 +15,9 @@ static constexpr int32_t REDUCTION = 8; static constexpr int32_t VALUE_HIDDEN_NUM = 256; #ifdef USE_CATEGORICAL -const std::string NeuralNetworkImpl::MODEL_PREFIX = "cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +const std::string MODEL_PREFIX = "cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); #else -const std::string NeuralNetworkImpl::MODEL_PREFIX = "sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +const std::string MODEL_PREFIX = "sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); #endif //デフォルトで読み書きするファイル名 -const std::string NeuralNetworkImpl::DEFAULT_MODEL_NAME = NeuralNetworkImpl::MODEL_PREFIX + ".model"; - -NeuralNetworkImpl::NeuralNetworkImpl() : device_(torch::kCUDA), fp16_(false), state_blocks_(BLOCK_NUM, nullptr) { - state_first_conv_and_norm_ = - register_module("state_first_conv_and_norm_", Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, CHANNEL_NUM, KERNEL_SIZE)); - for (int32_t i = 0; i < BLOCK_NUM; i++) { - state_blocks_[i] = - register_module("state_blocks_" + std::to_string(i), ResidualBlock(CHANNEL_NUM, KERNEL_SIZE, REDUCTION)); - } -#ifdef REPRESENTATION_DROPOUT - representation_dropout_ = register_module("representation_dropout_", torch::nn::Dropout2d()); -#endif - policy_conv_ = register_module( - "policy_conv_", torch::nn::Conv2d(torch::nn::Conv2dOptions(CHANNEL_NUM, POLICY_CHANNEL_NUM, 1).padding(0).bias(true))); - value_conv_and_norm_ = register_module("value_conv_and_norm_", Conv2DwithBatchNorm(CHANNEL_NUM, CHANNEL_NUM, 1)); - value_linear0_ = register_module("value_linear0_", torch::nn::Linear(SQUARE_NUM * CHANNEL_NUM, VALUE_HIDDEN_NUM)); - value_linear1_ = register_module("value_linear1_", torch::nn::Linear(VALUE_HIDDEN_NUM, BIN_SIZE)); -} - -torch::Tensor NeuralNetworkImpl::encode(const std::vector& inputs) { - torch::Tensor x = (fp16_ ? torch::tensor(inputs).to(device_, torch::kHalf) : torch::tensor(inputs).to(device_)); - x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); - x = state_first_conv_and_norm_->forward(x); - x = activation(x); - - for (ResidualBlock& block : state_blocks_) { - x = block->forward(x); - } - -#ifdef REPRESENTATION_DROPOUT - x = representation_dropout_->forward(x); -#endif - return x; -} - -std::pair NeuralNetworkImpl::decode(const torch::Tensor& representation) { - //policy - torch::Tensor policy = policy_conv_->forward(representation); - - //value - torch::Tensor value = value_conv_and_norm_->forward(representation); - value = activation(value); - value = value.view({ -1, SQUARE_NUM * CHANNEL_NUM }); - value = value_linear0_->forward(value); - value = activation(value); - value = value_linear1_->forward(value); - -#ifndef USE_CATEGORICAL -#ifdef USE_SIGMOID - value = torch::sigmoid(value); -#else - value = torch::tanh(value); -#endif -#endif - - return { policy, value }; -} - -std::pair NeuralNetworkImpl::forward(const std::vector& inputs) { - return decode(encode(inputs)); -} - -std::pair, std::vector> -NeuralNetworkImpl::policyAndValueBatch(const std::vector& inputs) { - std::pair y = forward(inputs); - - uint64_t batch_size = inputs.size() / (SQUARE_NUM * INPUT_CHANNEL_NUM); - - std::vector policies(batch_size); - std::vector values(batch_size); - - //CPUに持ってくる - torch::Tensor policy = y.first.cpu(); - if (fp16_) { - torch::Half* p = policy.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); - } - } else { - float* p = policy.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); - } - } - -#ifdef USE_CATEGORICAL - torch::Tensor value = torch::softmax(y.second, 1).cpu(); - if (fp16_) { - torch::Half* value_p = value.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); - } - } else { - float* value_p = value.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); - } - } -#else - //CPUに持ってくる - torch::Tensor value = y.second.cpu(); - if (fp16_) { - std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); - } else { - std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); - } -#endif - return { policies, values }; -} - -std::array NeuralNetworkImpl::loss(const std::vector& data) { - static Position pos; - std::vector inputs; - std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); - std::vector value_teachers; - - for (uint64_t i = 0; i < data.size(); i++) { - pos.fromStr(data[i].position_str); - - //入力 - const std::vector feature = pos.makeFeature(); - inputs.insert(inputs.end(), feature.begin(), feature.end()); - - //policyの教師信号 - for (const std::pair& e : data[i].policy) { - policy_teachers[i * POLICY_DIM + e.first] = e.second; - } - - //valueの教師信号 - value_teachers.push_back(data[i].value); - } - - std::pair y = forward(inputs); - torch::Tensor logits = y.first.view({ -1, POLICY_DIM }); - - torch::Tensor policy_target = - (fp16_ ? torch::tensor(policy_teachers).to(device_, torch::kHalf) : torch::tensor(policy_teachers).to(device_)) - .view({ -1, POLICY_DIM }); - - torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); - -#ifdef USE_CATEGORICAL - torch::Tensor categorical_target = torch::tensor(value_teachers).to(device_); - torch::Tensor value_loss = torch::nll_loss(torch::log_softmax(y.second, 1), categorical_target); -#else - torch::Tensor value_t = - (fp16_ ? torch::tensor(value_teachers).to(device_, torch::kHalf) : torch::tensor(value_teachers).to(device_)); - torch::Tensor value = y.second.view(-1); -#ifdef USE_SIGMOID - torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); -#else - torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); -#endif -#endif - - return { policy_loss, value_loss }; -} - -std::array NeuralNetworkImpl::validLoss(const std::vector& data) { -#ifdef USE_CATEGORICAL - Position pos; - std::vector inputs; - std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); - std::vector value_teachers; - - for (uint64_t i = 0; i < data.size(); i++) { - pos.fromStr(data[i].position_str); - - //入力 - const std::vector feature = pos.makeFeature(); - inputs.insert(inputs.end(), feature.begin(), feature.end()); - - //policyの教師信号 - for (const std::pair& e : data[i].policy) { - policy_teachers[i * POLICY_DIM + e.first] = e.second; - } - - //valueの教師信号 - if (data[i].value != 0 && data[i].value != BIN_SIZE - 1) { - std::cerr << "Categoricalの検証データは現状のところValueが-1 or 1でないといけない" << std::endl; - std::exit(1); - } - value_teachers.push_back(data[i].value == 0 ? MIN_SCORE : MAX_SCORE); - } - - std::pair y = forward(inputs); - torch::Tensor logits = y.first.view({ -1, POLICY_DIM }); - - torch::Tensor policy_target = - (fp16_ ? torch::tensor(policy_teachers).to(device_, torch::kHalf) : torch::tensor(policy_teachers).to(device_)) - .view({ -1, POLICY_DIM }); - - torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); - - //Valueの分布を取得 - torch::Tensor value_cat = torch::softmax(y.second, 1); - - //i番目の要素が示す値はMIN_SCORE + (i + 0.5) * VALUE_WIDTH - std::vector each_value; - for (int64_t i = 0; i < BIN_SIZE; i++) { - each_value.emplace_back(MIN_SCORE + (i + 0.5) * VALUE_WIDTH); - } - torch::Tensor each_value_tensor = torch::tensor(each_value).to(device_); - - //Categorical分布と内積を取ることで期待値を求める - torch::Tensor value = (each_value_tensor * value_cat).sum(1); - - torch::Tensor value_t = - (fp16_ ? torch::tensor(value_teachers).to(device_, torch::kHalf) : torch::tensor(value_teachers).to(device_)); - -#ifdef USE_SIGMOID - torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); -#else - torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); -#endif - - return { policy_loss, value_loss }; -#else - //Scalarモデルの場合はloss関数と同じ - return loss(data); -#endif -} - -void NeuralNetworkImpl::setGPU(int16_t gpu_id, bool fp16) { - device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); - fp16_ = fp16; - (fp16_ ? to(device_, torch::kHalf) : to(device_, torch::kFloat)); -} \ No newline at end of file +const std::string DEFAULT_MODEL_NAME = MODEL_PREFIX + ".model"; \ No newline at end of file diff --git a/src/neural_network.hpp b/src/neural_network.hpp index e1e90e33..a02249e1 100644 --- a/src/neural_network.hpp +++ b/src/neural_network.hpp @@ -1,8 +1,8 @@ #ifndef MIACIS_NEURAL_NETWORK_HPP #define MIACIS_NEURAL_NETWORK_HPP -#include "neural_network_modules.hpp" #include "types.hpp" +#include //型のエイリアス using PolicyType = std::vector; @@ -25,61 +25,15 @@ struct LearningData { ValueTeacherType value; }; +extern const std::string MODEL_PREFIX; +extern const std::string DEFAULT_MODEL_NAME; + //損失の種類 enum LossType { POLICY_LOSS_INDEX, VALUE_LOSS_INDEX, LOSS_TYPE_NUM }; //各損失の名前を示す文字列 const std::array LOSS_TYPE_NAME{ "policy", "value" }; -//#define REPRESENTATION_DROPOUT - -//使用する全体のニューラルネットワーク -class NeuralNetworkImpl : public torch::nn::Module { -public: - NeuralNetworkImpl(); - - //入力として局面の特徴量を並べたvectorを受け取ってPolicyとValueに対応するTensorを返す関数 - std::pair forward(const std::vector& inputs); - - //複数局面の特徴量を1次元vectorにしたものを受け取ってそれぞれに対する評価を返す関数 - std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); - - //学習データについて損失を返す関数 - std::array loss(const std::vector& data); - - //データに対して検証損失を返す関数 - std::array validLoss(const std::vector& data); - - //GPUにネットワークを送る関数 - void setGPU(int16_t gpu_id, bool fp16 = false); - - //評価パラメータを読み書きするファイルのprefix - static const std::string MODEL_PREFIX; - - //デフォルトで読み書きするファイル名 - static const std::string DEFAULT_MODEL_NAME; - -private: - torch::Tensor encode(const std::vector& inputs); - std::pair decode(const torch::Tensor& representation); - - torch::Device device_; - bool fp16_; - - Conv2DwithBatchNorm state_first_conv_and_norm_{ nullptr }; - std::vector state_blocks_; - -#ifdef REPRESENTATION_DROPOUT - torch::nn::Dropout2d representation_dropout_{ nullptr }; -#endif - - torch::nn::Conv2d policy_conv_{ nullptr }; - Conv2DwithBatchNorm value_conv_and_norm_{ nullptr }; - torch::nn::Linear value_linear0_{ nullptr }; - torch::nn::Linear value_linear1_{ nullptr }; -}; -TORCH_MODULE(NeuralNetwork); - //Categorical分布に対する操作 #ifdef USE_CATEGORICAL inline int32_t valueToIndex(float value) { return std::min((int32_t)((value - MIN_SCORE) / VALUE_WIDTH), BIN_SIZE - 1); } diff --git a/src/neural_network_modules.cpp b/src/neural_network_modules.cpp deleted file mode 100644 index e98f6c4d..00000000 --- a/src/neural_network_modules.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "neural_network_modules.hpp" - -#ifdef USE_SEPARABLE_CONV -SeparableConvImpl::SeparableConvImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size) { - depth_wise_conv_ = - register_module("depth_wise_conv_", torch::nn::Conv2d(torch::nn::Conv2dOptions(input_ch, input_ch, kernel_size) - .with_bias(false) - .padding(kernel_size / 2) - .groups(input_ch))); - point_wise_conv_ = register_module( - "point_wise_conv_", torch::nn::Conv2d(torch::nn::Conv2dOptions(input_ch, output_ch, 1).with_bias(false).padding(0))); -} - -torch::Tensor SeparableConvImpl::forward(const torch::Tensor& x) { return point_wise_conv_(depth_wise_conv_(x)); } -#endif - -Conv2DwithBatchNormImpl::Conv2DwithBatchNormImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size) { -#ifdef USE_SEPARABLE_CONV - conv_ = register_module("conv_", SeparableConv(input_ch, output_ch, kernel_size)); -#else - conv_ = register_module( - "conv_", - torch::nn::Conv2d(torch::nn::Conv2dOptions(input_ch, output_ch, kernel_size).bias(false).padding(kernel_size / 2))); -#endif - norm_ = register_module("norm_", torch::nn::BatchNorm2d(output_ch)); -} - -torch::Tensor Conv2DwithBatchNormImpl::forward(const torch::Tensor& x) { - torch::Tensor t = x; - t = conv_->forward(t); - t = norm_->forward(t); - return t; -} - -ResidualBlockImpl::ResidualBlockImpl(int64_t channel_num, int64_t kernel_size, int64_t reduction) { - conv_and_norm0_ = register_module("conv_and_norm0_", Conv2DwithBatchNorm(channel_num, channel_num, kernel_size)); - conv_and_norm1_ = register_module("conv_and_norm1_", Conv2DwithBatchNorm(channel_num, channel_num, kernel_size)); - linear0_ = register_module("linear0_", - torch::nn::Linear(torch::nn::LinearOptions(channel_num, channel_num / reduction).bias(false))); - linear1_ = register_module("linear1_", - torch::nn::Linear(torch::nn::LinearOptions(channel_num / reduction, channel_num).bias(false))); -} - -torch::Tensor ResidualBlockImpl::forward(const torch::Tensor& x) { - torch::Tensor t = x; - - t = conv_and_norm0_->forward(t); - t = activation(t); - t = conv_and_norm1_->forward(t); - - //SENet構造 - torch::Tensor y = torch::avg_pool2d(t, { t.size(2), t.size(3) }); - y = y.view({ -1, t.size(1) }); - y = linear0_->forward(y); - y = activation(y); - y = linear1_->forward(y); - y = torch::sigmoid(y); - y = y.view({ -1, t.size(1), 1, 1 }); - t = t * y; - - t = activation(x + t); - return t; -} - -torch::Tensor activation(const torch::Tensor& x) { - //ReLU - return torch::relu(x); - - //Mish - //return x * torch::tanh(torch::softplus(x)); - - //Swish - //return x * torch::sigmoid(x); -} \ No newline at end of file diff --git a/src/neural_network_modules.hpp b/src/neural_network_modules.hpp deleted file mode 100644 index cae15f42..00000000 --- a/src/neural_network_modules.hpp +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef MIACIS_NEURAL_NETWORK_MODULES_HPP -#define MIACIS_NEURAL_NETWORK_MODULES_HPP - -#include - -//#define USE_SEPARABLE_CONV - -#ifdef USE_SEPARABLE_CONV -//Separable Conv -//1回の3×3畳み込みをDepth-wise ConvとPoint-wise Convに分解することで効率化 -class SeparableConvImpl : public torch::nn::Module { -public: - SeparableConvImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size); - torch::Tensor forward(const torch::Tensor& x); - -private: - torch::nn::Conv2d depth_wise_conv_{ nullptr }; - torch::nn::Conv2d point_wise_conv_{ nullptr }; -}; -TORCH_MODULE(SeparableConv); -#endif - -//畳み込みとBatchNormalizationをまとめたユニット -class Conv2DwithBatchNormImpl : public torch::nn::Module { -public: - Conv2DwithBatchNormImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size); - torch::Tensor forward(const torch::Tensor& x); - -private: -#ifdef USE_SEPARABLE_CONV - SeparableConv conv_{ nullptr }; -#else - torch::nn::Conv2d conv_{ nullptr }; -#endif - torch::nn::BatchNorm2d norm_{ nullptr }; -}; -TORCH_MODULE(Conv2DwithBatchNorm); - -//残差ブロック:SENetの構造を利用 -class ResidualBlockImpl : public torch::nn::Module { -public: - ResidualBlockImpl(int64_t channel_num, int64_t kernel_size, int64_t reduction); - torch::Tensor forward(const torch::Tensor& x); - -private: - Conv2DwithBatchNorm conv_and_norm0_{ nullptr }; - Conv2DwithBatchNorm conv_and_norm1_{ nullptr }; - torch::nn::Linear linear0_{ nullptr }; - torch::nn::Linear linear1_{ nullptr }; -}; -TORCH_MODULE(ResidualBlock); - -torch::Tensor activation(const torch::Tensor& x); - -#endif //MIACIS_NEURAL_NETWORK_MODULES_HPP \ No newline at end of file diff --git a/src/reinforcement_learn.cpp b/src/reinforcement_learn.cpp index c22bb98c..0d334bb7 100644 --- a/src/reinforcement_learn.cpp +++ b/src/reinforcement_learn.cpp @@ -1,6 +1,8 @@ #include "game_generator.hpp" #include "hyperparameter_loader.hpp" #include "learn.hpp" +#include "infer_model.hpp" +#include void reinforcementLearn() { // clang-format off @@ -58,12 +60,11 @@ void reinforcementLearn() { //GPUの数だけネットワーク,自己対局生成器を生成 size_t gpu_num = torch::getNumGPUs(); - std::vector neural_networks(gpu_num); + std::vector neural_networks(gpu_num); std::vector> generators(gpu_num); std::vector gen_threads; for (uint64_t i = 0; i < gpu_num; i++) { - torch::load(neural_networks[i], NeuralNetworkImpl::DEFAULT_MODEL_NAME); - neural_networks[i]->setGPU(static_cast(i), search_options.use_fp16); + neural_networks[i].load(DEFAULT_MODEL_NAME, static_cast(i)); generators[i] = std::make_unique(search_options, worker_num_per_thread, Q_dist_lambda, noise_mode, noise_epsilon, noise_alpha, replay_buffer, neural_networks[i]); gen_threads.emplace_back([&generators, i]() { generators[i]->genGames(); }); @@ -97,7 +98,7 @@ void reinforcementLearn() { //一定間隔でActorのパラメータをLearnerと同期 if (step_num % update_interval == 0) { //学習パラメータを保存 - torch::save(learn_manager.neural_network, NeuralNetworkImpl::DEFAULT_MODEL_NAME); + learn_manager.neural_network.save(DEFAULT_MODEL_NAME); //各ネットワークで保存されたパラメータを読み込み for (uint64_t i = 0; i < gpu_num; i++) { @@ -107,9 +108,7 @@ void reinforcementLearn() { //ロードするときは一度fp32に直さないとエラーになる //もっと良いやり方はありそうだがなぁ - neural_networks[i]->setGPU(i, false); - torch::load(neural_networks[i], NeuralNetworkImpl::DEFAULT_MODEL_NAME); - neural_networks[i]->setGPU(static_cast(i), search_options.use_fp16); + neural_networks[i].load(DEFAULT_MODEL_NAME, static_cast(i)); if (i > 0) { generators[i]->gpu_mutex.unlock(); } diff --git a/src/search_options.hpp b/src/search_options.hpp index 5e65d2cf..a1c7f78f 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -61,7 +61,7 @@ struct SearchOptions { spin_options.emplace("print_policy_num", SpinOption(print_policy_num = 0, 0, 593)); spin_options.emplace("remained_turn_divisor", SpinOption(remained_turn_divisor = 1, 1, MAX)); spin_options.emplace("hold_moves_num", SpinOption(hold_moves_num = 32, 1, 593)); - filename_options.emplace("model_name", FilenameOption(model_name = NeuralNetworkImpl::DEFAULT_MODEL_NAME)); + filename_options.emplace("model_name", FilenameOption(model_name = DEFAULT_MODEL_NAME)); filename_options.emplace("book_file_name", FilenameOption(book_file_name = "book.txt")); // clang-format on } diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index 320842b5..56350acb 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -16,9 +16,7 @@ SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) //GPUを準備 for (int64_t i = 0; i < search_options.gpu_num; i++) { neural_networks_.emplace_back(); - torch::load(neural_networks_[i], search_options_.model_name); - neural_networks_[i]->setGPU(i, search_options_.use_fp16); - neural_networks_[i]->eval(); + neural_networks_[i].load(search_options_.model_name, i); } //GPUに対するmutexを準備 @@ -98,7 +96,7 @@ Move SearcherForPlay::think(Position& root, int64_t time_limit) { } torch::NoGradGuard no_grad_guard; std::pair, std::vector> y = - neural_networks_[0]->policyAndValueBatch(gpu_queues_[0][0].inputs); + neural_networks_[0].policyAndValueBatch(gpu_queues_[0][0].inputs); //ルートノードへ書き込み curr_node.nn_policy.resize(curr_node.moves.size()); @@ -256,7 +254,7 @@ void SearcherForPlay::workerThreadFunc(Position root, int64_t gpu_id, int64_t th torch::NoGradGuard no_grad_guard; gpu_mutexes_[gpu_id].lock(); std::pair, std::vector> y = - neural_networks_[gpu_id]->policyAndValueBatch(gpu_queue.inputs); + neural_networks_[gpu_id].policyAndValueBatch(gpu_queue.inputs); gpu_mutexes_[gpu_id].unlock(); //書き込み diff --git a/src/searcher_for_play.hpp b/src/searcher_for_play.hpp index 27837629..ca6302ac 100644 --- a/src/searcher_for_play.hpp +++ b/src/searcher_for_play.hpp @@ -4,6 +4,7 @@ #include "neural_network.hpp" #include "searcher.hpp" #include "searcher_for_mate.hpp" +#include "infer_model.hpp" #include #include @@ -46,7 +47,7 @@ class SearcherForPlay { HashTable hash_table_; //GPUは複数 - std::vector neural_networks_; + std::vector neural_networks_; std::vector gpu_mutexes_; //1つのGPUに対してgpu_queue,searcherを複数 diff --git a/src/shogi/interface.cpp b/src/shogi/interface.cpp index 29ef24f0..e61281e7 100644 --- a/src/shogi/interface.cpp +++ b/src/shogi/interface.cpp @@ -19,7 +19,6 @@ Interface::Interface() : searcher_(nullptr) { command_["gameover"] = [this] { gameover(); }; //メンバ関数以外 - command_["initParams"] = initParams; command_["cleanGames"] = cleanGames; command_["supervisedLearn"] = supervisedLearn; command_["reinforcementLearn"] = reinforcementLearn; @@ -35,7 +34,6 @@ Interface::Interface() : searcher_(nullptr) { command_["checkBook"] = checkBook; command_["makeBook"] = makeBook; command_["searchWithLog"] = searchWithLog; - command_["convertModelToCPU"] = convertModelToCPU; // clang-format on } diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index f7c0b3ee..20a391a2 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -1,5 +1,6 @@ #include "test.hpp" #include "../game_generator.hpp" +#include "../infer_model.hpp" #include "../searcher_for_play.hpp" #include "book.hpp" @@ -12,10 +13,8 @@ void test() { search_options.thread_num_per_gpu = 1; search_options.search_batch_size = 1; search_options.output_log_file = true; - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->setGPU(0); - nn->eval(); + InferModel nn; + nn.load(DEFAULT_MODEL_NAME, 0); SearcherForPlay searcher(search_options); Position pos; @@ -82,8 +81,8 @@ void infiniteTest() { } void checkGenSpeed() { - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); + InferModel nn; + nn.load(DEFAULT_MODEL_NAME, 0); constexpr int64_t buffer_size = 1048576; SearchOptions search_options; @@ -96,9 +95,6 @@ void checkGenSpeed() { constexpr float noise_epsilon = 0.25; constexpr float noise_alpha = 0.15; - nn->setGPU(0, search_options.use_fp16); - nn->eval(); - int64_t total_worker_num = 0; std::cout << "total_worker_num(デフォルトは128): "; std::cin >> total_worker_num; @@ -263,10 +259,9 @@ void checkVal() { std::cout << "data.size() = " << data.size() << std::endl; //ネットワークの準備 - NeuralNetwork nn; - torch::load(nn, model_file); - nn->setGPU(0); - nn->eval(); + LearningModel nn; + nn.load(model_file, 0); + nn.eval(); std::array v = validation(nn, data, batch_size); std::cout << std::fixed << std::setprecision(4); @@ -280,10 +275,8 @@ void checkPredictSpeed() { constexpr int64_t REPEAT_NUM = 1000; std::cout << std::fixed; - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->setGPU(0); - nn->eval(); + InferModel nn; + nn.load(DEFAULT_MODEL_NAME, 0); for (int64_t batch_size = 1; batch_size <= 4096; batch_size *= 2) { //バッチサイズ分入力を取得 @@ -307,7 +300,7 @@ void checkPredictSpeed() { for (int64_t i = 0; i < REPEAT_NUM; i++) { auto start = std::chrono::steady_clock::now(); torch::NoGradGuard no_grad_guard; - nn->policyAndValueBatch(pos.makeFeature()); + nn.policyAndValueBatch(pos.makeFeature()); auto end = std::chrono::steady_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); time += elapsed.count(); @@ -464,13 +457,4 @@ void searchWithLog() { } } -void convertModelToCPU() { - //ネットワークの準備 - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->to(torch::kCPU); - torch::save(nn, NeuralNetworkImpl::MODEL_PREFIX + "_cpu.model"); - std::cout << "finish convertModelToCPU" << std::endl; -} - } // namespace Shogi \ No newline at end of file diff --git a/src/shogi/test.hpp b/src/shogi/test.hpp index 36d161de..a5e981cf 100644 --- a/src/shogi/test.hpp +++ b/src/shogi/test.hpp @@ -16,7 +16,6 @@ void checkMirror(); void checkBook(); void makeBook(); void searchWithLog(); -void convertModelToCPU(); } // namespace Shogi From fb1a01b16ac8d0b3b70e0aa71f7e462950b76cdb Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 15:02:28 +0900 Subject: [PATCH 03/87] =?UTF-8?q?Dockerfile=E3=82=92=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E3=80=82=E6=B4=97=E7=B7=B4=E3=81=95=E3=82=8C=E3=81=A6=E3=81=AA?= =?UTF-8?q?=E3=81=84=E3=81=8C=E3=81=A8=E3=82=8A=E3=81=82=E3=81=88=E3=81=9A?= =?UTF-8?q?=E5=8B=95=E3=81=8F=E3=81=93=E3=81=A8=E3=81=AF=E7=A2=BA=E8=AA=8D?= =?UTF-8?q?=E3=81=97=E3=81=A6=E3=81=82=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/Dockerfile | 94 +++++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 35 deletions(-) diff --git a/scripts/Dockerfile b/scripts/Dockerfile index 00273207..477b23fe 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -1,41 +1,65 @@ -FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 -MAINTAINER Shintaro Sakoda - -RUN set -x && \ - : "必要なものをインストール" && \ - apt-get update && \ - apt-get install sudo -y && \ - sudo apt-get install git -y && \ - sudo apt-get install vim -y && \ - sudo apt-get install cmake -y && \ - sudo apt-get install python3 -y && \ - sudo apt-get install python3-pip -y && \ - sudo apt-get install p7zip-full -y && \ - sudo apt-get install wget -y && \ - sudo apt-get install curl -y && \ - sudo apt-get install zip -y && \ - sudo apt-get install unzip -y && \ - pip3 install natsort && \ - : "日本語の導入" && \ - sudo apt-get install language-pack-ja-base language-pack-ja -y && \ - echo "export LANG='ja_JP.UTF-8'" >> ~/.bashrc && \ - : "Miacisの取得" && \ - cd ~ && \ - git clone https://github.com/SakodaShintaro/Miacis && \ - : "libtorchの取得" && \ - ./Miacis/scripts/download_libtorch.sh && \ - : "ビルド更新スクリプトの準備" && \ - mkdir Miacis/src/cmake-build-release && \ - cd Miacis/src/cmake-build-release && \ - echo "git fetch" > update.sh && \ +FROM nvcr.io/nvidia/pytorch:20.10-py3 + +RUN apt-get update && apt-get install -y curl gnupg && rm -rf /var/lib/apt/lists/* + +RUN curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \ +echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list + +RUN apt-get update && apt-get install -y bazel-3.7.1 && rm -rf /var/lib/apt/lists/* +RUN ln -s /usr/bin/bazel-3.7.1 /usr/bin/bazel + +RUN pip install notebook + +# trtorchの導入 +WORKDIR /opt +RUN git clone https://github.com/NVIDIA/TRTorch trtorch +WORKDIR /opt/trtorch +RUN git checkout b228bf239aadd5f104af38ea64416bdda5f0aa57 +RUN cp /opt/trtorch/docker/WORKSPACE.cu.docker /opt/trtorch/WORKSPACE + +# Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container +RUN cp /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer_static.a + +WORKDIR /opt/trtorch +RUN bazel build //:libtrtorch --compilation_mode opt + +WORKDIR /opt/trtorch/py + +RUN pip install ipywidgets +RUN jupyter nbextension enable --py widgetsnbextension + +# Locale is not set by default +RUN apt-get update && apt-get install -y locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 +RUN python3 setup.py install --use-cxx11-abi + +RUN conda init bash + +ENV LD_LIBRARY_PATH /opt/conda/lib/python3.6/site-packages/torch/lib:$LD_LIBRARY_PATh + + +# ここから自分の設定 +# 言語の設定 +RUN apt-get update && apt-get install -y language-pack-ja-base language-pack-ja && rm -rf /var/lib/apt/lists/* +ENV LANG='ja_JP.UTF-8' + +# trtorchを適切な場所へ展開 +WORKDIR /root +RUN tar xvf /opt/trtorch/bazel-bin/libtrtorch.tar.gz . + +# Miacisの導入 +RUN git clone https://github.com/SakodaShintaro/Miacis +RUN ./Miacis/scripts/download_libtorch.sh +WORKDIR /root/Miacis/src/cmake-build-release +RUN echo "git fetch" > update.sh && \ echo "git reset --hard origin/master" >> update.sh && \ echo "cmake -DCMAKE_BUILD_TYPE=Release .." >> update.sh && \ echo "make -j$(nproc)" >> update.sh && \ chmod +x update.sh && \ - ./update.sh && \ - : "dotfilesの取得" && \ - cd ~ && \ - git clone https://github.com/SakodaShintaro/dotfiles && \ - ./dotfiles/setup.sh + ./update.sh +# dotfileの導入 WORKDIR /root +RUN git clone https://github.com/SakodaShintaro/dotfiles && ./dotfiles/setup.sh \ No newline at end of file From 3ac37a4338141d588a30a8228f2a27a00ae104fa Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 10:54:33 +0000 Subject: [PATCH 04/87] =?UTF-8?q?value=E3=81=AE=E6=9C=80=E5=BE=8C=E3=82=92?= =?UTF-8?q?sigmoid=E3=81=8B=E3=82=89tanh=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py index def6957f..8949b261 100755 --- a/scripts/generate_torch_script_model.py +++ b/scripts/generate_torch_script_model.py @@ -77,7 +77,7 @@ def forward(self, x): value = self.value_linear0_.forward(value) value = F.relu(value) value = self.value_linear1_.forward(value) - value = torch.sigmoid(value) + value = torch.tanh(value) return policy, value From 29c762189c2333de515b74e2fb5edfc20ad0d6bd Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 11:36:24 +0000 Subject: [PATCH 05/87] =?UTF-8?q?optimizer=E3=81=AE=E7=94=9F=E6=88=90?= =?UTF-8?q?=E3=81=A8=E3=83=A2=E3=83=87=E3=83=AB=E3=81=AE=E8=AA=AD=E3=81=BF?= =?UTF-8?q?=E8=BE=BC=E3=81=BF=E3=81=AE=E9=A0=86=E7=95=AA=E3=82=92=E5=A4=89?= =?UTF-8?q?=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/learn.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/learn.cpp b/src/learn.cpp index 8e6c3947..6f8eed6e 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -6,7 +6,8 @@ #include #include -std::array validation(LearningModel& model, const std::vector& valid_data, uint64_t batch_size) { +std::array validation(LearningModel& model, const std::vector& valid_data, + uint64_t batch_size) { torch::NoGradGuard no_grad_guard; std::array losses{}; for (uint64_t index = 0; index < valid_data.size();) { @@ -71,14 +72,6 @@ LearnManager::LearnManager(const std::string& learn_name) { coefficients_[i] = settings.get(LOSS_TYPE_NAME[i] + "_loss_coeff"); } - //optimizerの準備 - learn_rate_ = settings.get("learn_rate"); - torch::optim::SGDOptions sgd_option(learn_rate_); - sgd_option.momentum(settings.get("momentum")); - sgd_option.weight_decay(settings.get("weight_decay")); - std::vector parameters; - optimizer_ = std::make_unique(neural_network.parameters(), sgd_option); - //学習推移のログファイル train_log_.open(learn_name + "_train_log.txt"); valid_log_.open(learn_name + "_valid_log.txt"); @@ -94,6 +87,14 @@ LearnManager::LearnManager(const std::string& learn_name) { //学習前のパラメータを出力 neural_network.save(MODEL_PREFIX + "_before_learn.model"); + //optimizerの準備 + learn_rate_ = settings.get("learn_rate"); + torch::optim::SGDOptions sgd_option(learn_rate_); + sgd_option.momentum(settings.get("momentum")); + sgd_option.weight_decay(settings.get("weight_decay")); + std::vector parameters; + optimizer_ = std::make_unique(neural_network.parameters(), sgd_option); + //パラメータの保存間隔 save_interval_ = settings.get("save_interval"); From c465c2d58346030903d3664cbb2d279a6da3f17d Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 11:39:14 +0000 Subject: [PATCH 06/87] =?UTF-8?q?initParmas=E3=82=92=E5=BB=83=E6=AD=A2?= =?UTF-8?q?=E3=81=97=E3=81=9F=E3=81=AE=E3=81=A7=E3=81=9D=E3=82=8C=E3=81=AB?= =?UTF-8?q?=E5=90=88=E3=82=8F=E3=81=9B=E3=81=A6=E5=AD=A6=E7=BF=92=E3=82=B9?= =?UTF-8?q?=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/reinforcement_learn.sh | 3 ++- scripts/supervised_learn.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/reinforcement_learn.sh b/scripts/reinforcement_learn.sh index 8e3e99a4..c3bf2f74 100755 --- a/scripts/reinforcement_learn.sh +++ b/scripts/reinforcement_learn.sh @@ -1,6 +1,7 @@ git show -s >git_commit_id.txt git diff >>git_commit_id.txt -echo -e "initParams\nreinforcementLearn\nquit\n" | ./Miacis_* +~/Miacis/script/generate_torch_script_model.py +echo -e "reinforcementLearn\nquit\n" | ./Miacis_* zip -rq learn_kifu.zip learn_kifu rm -rf learn_kifu scp -r $(pwd) sakoda:~/learn_result/reinforcement/ diff --git a/scripts/supervised_learn.sh b/scripts/supervised_learn.sh index 959afde6..dd5d9672 100755 --- a/scripts/supervised_learn.sh +++ b/scripts/supervised_learn.sh @@ -1,4 +1,5 @@ git show -s > git_commit_id.txt git diff >> git_commit_id.txt -echo -e "initParams\nsupervisedLearn\nquit\n" | ./Miacis_* +~/Miacis/script/generate_torch_script_model.py +echo -e "supervisedLearn\nquit\n" | ./Miacis_* scp -r `pwd` sakoda:~/learn_result/supervised/ \ No newline at end of file From 3891525d92a6c1f0e9931039f5eb4bf69130693e Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 11:40:16 +0000 Subject: [PATCH 07/87] fix typo --- scripts/reinforcement_learn.sh | 2 +- scripts/supervised_learn.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/reinforcement_learn.sh b/scripts/reinforcement_learn.sh index c3bf2f74..03c4c6b1 100755 --- a/scripts/reinforcement_learn.sh +++ b/scripts/reinforcement_learn.sh @@ -1,6 +1,6 @@ git show -s >git_commit_id.txt git diff >>git_commit_id.txt -~/Miacis/script/generate_torch_script_model.py +~/Miacis/scripts/generate_torch_script_model.py echo -e "reinforcementLearn\nquit\n" | ./Miacis_* zip -rq learn_kifu.zip learn_kifu rm -rf learn_kifu diff --git a/scripts/supervised_learn.sh b/scripts/supervised_learn.sh index dd5d9672..58a9512c 100755 --- a/scripts/supervised_learn.sh +++ b/scripts/supervised_learn.sh @@ -1,5 +1,5 @@ git show -s > git_commit_id.txt git diff >> git_commit_id.txt -~/Miacis/script/generate_torch_script_model.py +~/Miacis/scripts/generate_torch_script_model.py echo -e "supervisedLearn\nquit\n" | ./Miacis_* scp -r `pwd` sakoda:~/learn_result/supervised/ \ No newline at end of file From cc39e3620a60569ca3b106676bbabcf248aaadc5 Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 12:20:51 +0000 Subject: [PATCH 08/87] =?UTF-8?q?shogi=E3=81=AEcategorical=E3=81=AB?= =?UTF-8?q?=E3=82=82=E5=AF=BE=E5=BF=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 84 ++++++++++++++++++++------ src/infer_model.cpp | 4 +- src/learning_model.cpp | 20 +++--- src/neural_network.cpp | 12 +++- 4 files changed, 88 insertions(+), 32 deletions(-) diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py index 8949b261..fd7e22ac 100755 --- a/scripts/generate_torch_script_model.py +++ b/scripts/generate_torch_script_model.py @@ -3,15 +3,29 @@ import torch.nn as nn import torch.nn.functional as F import torch.jit +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("-game", choices=["shogi", "othello"]) +parser.add_argument("-value_type", choices=["sca", "cat"]) +parser.add_argument("--block_num", type=int, default=10) +parser.add_argument("--channel_num", type=int, default=128) +args = parser.parse_args() -INPUT_CHANNEL_NUM = 42 -BOARD_SIZE = 9 -BLOCK_NUM = 10 -CHANNEL_NUM = 128 -KERNEL_SIZE = 3 REDUCTION = 8 -POLICY_CHANNEL_NUM = 27 -VALUE_HIDDEN_NUM = 256 +KERNEL_SIZE = 3 + +if args.game == "shogi": + INPUT_CHANNEL_NUM = 42 + BOARD_SIZE = 9 + POLICY_CHANNEL_NUM = 27 + VALUE_HIDDEN_NUM = 256 +elif args.game == "othello": + INPUT_CHANNEL_NUM = 2 + BOARD_SIZE = 8 + POLICY_CHANNEL_NUM = 2 + VALUE_HIDDEN_NUM = 256 + class Conv2DwithBatchNorm(nn.Module): def __init__(self, input_ch, output_ch, kernel_size): @@ -51,17 +65,17 @@ def forward(self, x): return t -class NeuralNetwork(nn.Module): - def __init__(self): - super(NeuralNetwork, self).__init__() +class ScalarNetwork(nn.Module): + def __init__(self, channel_num): + super(ScalarNetwork, self).__init__() - self.first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, CHANNEL_NUM, 3) + self.first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) self.blocks = nn.Sequential() - for i in range(BLOCK_NUM): - self.blocks.add_module(f"block{i}", ResidualBlock(CHANNEL_NUM, KERNEL_SIZE, REDUCTION)) - self.policy_conv_ = nn.Conv2d(CHANNEL_NUM, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) - self.value_conv_and_norm_ = Conv2DwithBatchNorm(CHANNEL_NUM, CHANNEL_NUM, 1) - self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * CHANNEL_NUM, VALUE_HIDDEN_NUM) + for i in range(args.block_num): + self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) + self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) + self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) + self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, 1) def forward(self, x): @@ -73,7 +87,7 @@ def forward(self, x): value = self.value_conv_and_norm_.forward(x) value = F.relu(value) - value = value.view([-1, BOARD_SIZE * BOARD_SIZE * CHANNEL_NUM]) + value = value.view([-1, args.channel_num * BOARD_SIZE * BOARD_SIZE]) value = self.value_linear0_.forward(value) value = F.relu(value) value = self.value_linear1_.forward(value) @@ -81,9 +95,41 @@ def forward(self, x): return policy, value +class CategoricalNetwork(nn.Module): + def __init__(self, channel_num): + super(CategoricalNetwork, self).__init__() + + self.first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) + self.blocks = nn.Sequential() + for i in range(args.block_num): + self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) + self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) + self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) + self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) + self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, 51) + + def forward(self, x): + x = self.first_conv_and_norm_.forward(x) + x = F.relu(x) + + x = self.blocks.forward(x) + policy = self.policy_conv_.forward(x) + + value = self.value_conv_and_norm_.forward(x) + value = F.relu(value) + value = value.view([-1, args.channel_num * BOARD_SIZE * BOARD_SIZE]) + value = self.value_linear0_.forward(value) + value = F.relu(value) + value = self.value_linear1_.forward(value) + + return policy, value -model = NeuralNetwork() +model = None +if args.value_type == "sca": + model = ScalarNetwork(args.channel_num) +elif args.value_type == "cat": + model = CategoricalNetwork(args.channel_num) input_data = torch.randn([8, INPUT_CHANNEL_NUM, BOARD_SIZE, BOARD_SIZE]) script_model = torch.jit.trace(model, input_data) # script_model = torch.jit.script(model) -script_model.save(f"./sca_bl{BLOCK_NUM}_ch{CHANNEL_NUM}.model") +script_model.save(f"./{args.game}_{args.value_type}_bl{args.block_num}_ch{args.channel_num}.model") diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 6d786206..15f0e6bc 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -27,7 +27,7 @@ std::pair, std::vector> InferModel::policyAnd auto out = module_.forward({ x }); auto tuple = out.toTuple(); torch::Tensor policy = tuple->elements()[0].toTensor(); - torch::Tensor value = tuple->elements()[1].toTensor(); + torch::Tensor value_logit = tuple->elements()[1].toTensor(); uint64_t batch_size = inputs.size() / (SQUARE_NUM * INPUT_CHANNEL_NUM); @@ -42,7 +42,7 @@ std::pair, std::vector> InferModel::policyAnd } #ifdef USE_CATEGORICAL - torch::Tensor value = torch::softmax(y.second, 1).cpu(); + torch::Tensor value = torch::softmax(value_logit, 1).cpu(); torch::Half* value_p = value.data_ptr(); for (uint64_t i = 0; i < batch_size; i++) { std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); diff --git a/src/learning_model.cpp b/src/learning_model.cpp index cabd3982..4bfa19f8 100644 --- a/src/learning_model.cpp +++ b/src/learning_model.cpp @@ -53,7 +53,7 @@ std::array LearningModel::loss(const std::vector LearningModel::validLoss(const std::vec value_teachers.push_back(data[i].value == 0 ? MIN_SCORE : MAX_SCORE); } - std::pair y = forward(inputs); - torch::Tensor logits = y.first.view({ -1, POLICY_DIM }); + torch::Tensor input_tensor = encode(inputs); + auto out = module_.forward({ input_tensor }); + auto tuple = out.toTuple(); + torch::Tensor policy_logit = tuple->elements()[0].toTensor(); + torch::Tensor value_logit = tuple->elements()[1].toTensor(); - torch::Tensor policy_target = - (fp16_ ? torch::tensor(policy_teachers).to(device_, torch::kHalf) : torch::tensor(policy_teachers).to(device_)) - .view({ -1, POLICY_DIM }); + torch::Tensor logits = policy_logit.view({ -1, POLICY_DIM }); + + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); //Valueの分布を取得 - torch::Tensor value_cat = torch::softmax(y.second, 1); + torch::Tensor value_cat = torch::softmax(value_logit, 1); //i番目の要素が示す値はMIN_SCORE + (i + 0.5) * VALUE_WIDTH std::vector each_value; @@ -116,8 +119,7 @@ std::array LearningModel::validLoss(const std::vec //Categorical分布と内積を取ることで期待値を求める torch::Tensor value = (each_value_tensor * value_cat).sum(1); - torch::Tensor value_t = - (fp16_ ? torch::tensor(value_teachers).to(device_, torch::kHalf) : torch::tensor(value_teachers).to(device_)); + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); #ifdef USE_SIGMOID torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); diff --git a/src/neural_network.cpp b/src/neural_network.cpp index ef212df4..850568dd 100644 --- a/src/neural_network.cpp +++ b/src/neural_network.cpp @@ -15,9 +15,17 @@ static constexpr int32_t REDUCTION = 8; static constexpr int32_t VALUE_HIDDEN_NUM = 256; #ifdef USE_CATEGORICAL -const std::string MODEL_PREFIX = "cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +#ifdef SHOGI +const std::string MODEL_PREFIX = "shogi_cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +#else +const std::string MODEL_PREFIX = "othello_cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +#endif #else -const std::string MODEL_PREFIX = "sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +#ifdef SHOGI +const std::string MODEL_PREFIX = "shogi_sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +#else +const std::string MODEL_PREFIX = "othello_sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +#endif #endif //デフォルトで読み書きするファイル名 const std::string DEFAULT_MODEL_NAME = MODEL_PREFIX + ".model"; \ No newline at end of file From dd86412723b309c195ba55f699d0ff4ab42b7794 Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 12:22:38 +0000 Subject: [PATCH 09/87] =?UTF-8?q?=E5=90=8D=E5=89=8D=E3=81=AE=E8=A1=9D?= =?UTF-8?q?=E7=AA=81=E3=81=AB=E5=AF=BE=E5=87=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 15f0e6bc..4bc7c1f8 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -27,7 +27,7 @@ std::pair, std::vector> InferModel::policyAnd auto out = module_.forward({ x }); auto tuple = out.toTuple(); torch::Tensor policy = tuple->elements()[0].toTensor(); - torch::Tensor value_logit = tuple->elements()[1].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); uint64_t batch_size = inputs.size() / (SQUARE_NUM * INPUT_CHANNEL_NUM); @@ -42,7 +42,7 @@ std::pair, std::vector> InferModel::policyAnd } #ifdef USE_CATEGORICAL - torch::Tensor value = torch::softmax(value_logit, 1).cpu(); + value = torch::softmax(value, 1).cpu(); torch::Half* value_p = value.data_ptr(); for (uint64_t i = 0; i < batch_size; i++) { std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); From ea70d712c5fba9c997952f6bd97fc807de792b72 Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 12:37:28 +0000 Subject: [PATCH 10/87] =?UTF-8?q?=E3=82=AA=E3=82=BB=E3=83=AD=E3=81=A7?= =?UTF-8?q?=E3=82=82=E3=82=B3=E3=83=B3=E3=83=91=E3=82=A4=E3=83=AB=E5=8F=AF?= =?UTF-8?q?=E8=83=BD=E3=81=AB=E3=81=AA=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/othello/interface.cpp | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/othello/interface.cpp b/src/othello/interface.cpp index 2e448af3..846bc2a7 100644 --- a/src/othello/interface.cpp +++ b/src/othello/interface.cpp @@ -22,7 +22,6 @@ Interface::Interface() : searcher_(nullptr) { command_["quit"] = [this] { quit(); }; //メンバ関数以外 - command_["initParams"] = initParams; command_["supervisedLearn"] = supervisedLearn; command_["reinforcementLearn"] = reinforcementLearn; // clang-format on @@ -107,10 +106,8 @@ void Interface::test() { search_options.thread_num_per_gpu = 1; search_options.search_batch_size = 1; search_options.output_log_file = true; - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->setGPU(0); - nn->eval(); + InferModel nn; + nn.load(DEFAULT_MODEL_NAME, 0); SearcherForPlay searcher(search_options); Position pos; @@ -289,9 +286,8 @@ void Interface::quit() { void Interface::outputValue() { root_.init(); std::ofstream ofs("value_output.txt"); - NeuralNetwork nn; - torch::load(nn, options_.model_name); - nn->setGPU(0); + InferModel nn; + nn.load(options_.model_name, 0); std::uniform_real_distribution dist(0.0, 1.0); @@ -300,7 +296,7 @@ void Interface::outputValue() { std::vector feature = root_.makeFeature(); root_.print(); - std::pair, std::vector> y = nn->policyAndValueBatch(feature); + std::pair, std::vector> y = nn.policyAndValueBatch(feature); PolicyType policy; std::vector moves = root_.generateAllMoves(); for (const Move& move : moves) { From 3cc6abfce5f2849eeecc165402546853667e1eac Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 13:48:03 +0000 Subject: [PATCH 11/87] =?UTF-8?q?device=5F=E3=82=92=E4=BD=BF=E3=81=A3?= =?UTF-8?q?=E3=81=A6=E3=81=84=E3=81=AA=E3=81=8B=E3=81=A3=E3=81=9F=E3=83=9F?= =?UTF-8?q?=E3=82=B9=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 8 ++++---- src/learning_model.cpp | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 4bc7c1f8..a7739d03 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -7,12 +7,12 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id) { module_ = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); - module_.to(torch::kCUDA, torch::kHalf); + module_.to(device_, torch::kHalf); module_.eval(); - torch::Tensor in_min = torch::randn({ 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, torch::kCUDA).to(torch::kHalf); - torch::Tensor in_opt = torch::randn({ 128, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, torch::kCUDA).to(torch::kHalf); - torch::Tensor in_max = torch::randn({ 256, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, torch::kCUDA).to(torch::kHalf); + torch::Tensor in_min = torch::randn({ 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, device_).to(torch::kHalf); + torch::Tensor in_opt = torch::randn({ 128, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, device_).to(torch::kHalf); + torch::Tensor in_max = torch::randn({ 256, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, device_).to(torch::kHalf); //trtorch trtorch::CompileSpec::InputRange range(in_min.sizes(), in_opt.sizes(), in_max.sizes()); diff --git a/src/learning_model.cpp b/src/learning_model.cpp index 4bfa19f8..ce439dd7 100644 --- a/src/learning_model.cpp +++ b/src/learning_model.cpp @@ -6,7 +6,7 @@ void LearningModel::load(const std::string& model_path, int64_t gpu_id) { module_ = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); - module_.to(torch::kCUDA); + module_.to(device_); } void LearningModel::save(const std::string& model_path) { From e76f7a572fe0a2dce396586571f89ab49653a58f Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Feb 2021 22:34:48 +0000 Subject: [PATCH 12/87] =?UTF-8?q?info=E3=81=ABgpu=5Fid=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index a7739d03..af907210 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -18,6 +18,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id) { trtorch::CompileSpec::InputRange range(in_min.sizes(), in_opt.sizes(), in_max.sizes()); trtorch::CompileSpec info({ range }); info.op_precision = torch::kHalf; + info.device.gpu_id = gpu_id; module_ = trtorch::CompileGraph(module_, info); } From b95e5fd0f7c512d5bf24dbeab0580172c7d61ed3 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 01:01:24 +0000 Subject: [PATCH 13/87] =?UTF-8?q?=E3=81=97=E3=81=B0=E3=82=89=E3=81=8F?= =?UTF-8?q?=E3=81=AF2015=E3=81=AE=E3=81=BF=E3=82=92=E3=83=80=E3=82=A6?= =?UTF-8?q?=E3=83=B3=E3=83=AD=E3=83=BC=E3=83=89=E3=81=99=E3=82=8B=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/download_floodgate_kifu.sh | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/scripts/download_floodgate_kifu.sh b/scripts/download_floodgate_kifu.sh index 0caff412..69bdef50 100755 --- a/scripts/download_floodgate_kifu.sh +++ b/scripts/download_floodgate_kifu.sh @@ -1,3 +1,5 @@ +echo "\$1(1番目の引数): $1" + # どこに保存するかの基準位置($0 = ./の2つ上がMiacisと同階層なのでそこに置く) root_dir=$(dirname "$0")/../../data @@ -5,20 +7,20 @@ root_dir=$(dirname "$0")/../../data download_path=${root_dir}/floodgate_kifu mkdir -p "${download_path}" wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2015.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2016.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2017.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2018.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2019.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2016.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2017.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2018.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2019.7z" # 学習用データ(2016年以降) -train_path=${download_path}/train -mkdir -p "${train_path}" -7z e "${download_path}"/wdoor2016.7z -o"${train_path}" -7z e "${download_path}"/wdoor2017.7z -o"${train_path}" -7z e "${download_path}"/wdoor2018.7z -o"${train_path}" -7z e "${download_path}"/wdoor2019.7z -o"${train_path}" +# train_path=${download_path}/train +# mkdir -p "${train_path}" +# 7z e "${download_path}"/wdoor2016.7z -o"${train_path}" +# 7z e "${download_path}"/wdoor2017.7z -o"${train_path}" +# 7z e "${download_path}"/wdoor2018.7z -o"${train_path}" +# 7z e "${download_path}"/wdoor2019.7z -o"${train_path}" # 検証用データ(2015年) valid_path=${download_path}/valid mkdir -p "${valid_path}" -7z e "${download_path}"/wdoor2015.7z -o"${valid_path}" +7z e "${download_path}"/wdoor2015.7z -o"${valid_path}" \ No newline at end of file From abf1e62de91842bd0861fe0c0a8024ee0694d304 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 01:12:31 +0000 Subject: [PATCH 14/87] =?UTF-8?q?=E5=90=84=E3=82=B9=E3=83=AC=E3=83=83?= =?UTF-8?q?=E3=83=89=E3=81=A7load=E3=82=92=E8=A1=8C=E3=81=86=E7=94=A8?= =?UTF-8?q?=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 10 ++++++++++ src/game_generator.hpp | 12 +++++++++--- src/reinforcement_learn.cpp | 10 ++++------ src/shogi/test.cpp | 3 +-- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index 0c235cd4..e6964313 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -2,6 +2,10 @@ #include void GameGenerator::genGames() { + //まず最初のロード + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_); + need_load = false; + //生成スレッドを生成 std::vector threads; for (int64_t i = 0; i < search_options_.thread_num_per_gpu; i++) { @@ -48,6 +52,12 @@ void GameGenerator::genSlave(int64_t thread_id) { for (int32_t i = 0; i < worker_num_; i++) { workers[i]->backup(); } + + //パラメータをロードし直す必要があれば実行 + if (need_load) { + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_); + need_load = false; + } } } diff --git a/src/game_generator.hpp b/src/game_generator.hpp index b4d157ac..31908418 100644 --- a/src/game_generator.hpp +++ b/src/game_generator.hpp @@ -17,10 +17,10 @@ class GameGenerator { public: GameGenerator(const SearchOptions& search_options, int64_t worker_num, float Q_dist_lambda, int64_t noise_mode, - float noise_epsilon, float noise_alpha, ReplayBuffer& rb, InferModel& nn) + float noise_epsilon, float noise_alpha, ReplayBuffer& rb, int64_t gpu_id) : stop_signal(false), search_options_(search_options), worker_num_(worker_num), Q_dist_lambda_(Q_dist_lambda), noise_mode_(noise_mode), noise_epsilon_(noise_epsilon), noise_alpha_(noise_alpha), replay_buffer_(rb), - neural_network_(nn), gpu_queues_(search_options_.thread_num_per_gpu) { + neural_network_(), gpu_id_(gpu_id), gpu_queues_(search_options_.thread_num_per_gpu) { assert(0 <= noise_mode_ && noise_mode_ < NOISE_MODE_SIZE); }; @@ -30,6 +30,9 @@ class GameGenerator { //排他制御用のmutex。AlphaZeroTrainerから触れるようにpublicに置いている std::mutex gpu_mutex; + //評価パラメータの読み込みが必要かどうかのシグナル + bool need_load; + //停止信号。止めたいときは外部からこれをtrueにする bool stop_signal; @@ -70,7 +73,10 @@ class GameGenerator { ReplayBuffer& replay_buffer_; //局面評価に用いるネットワーク - InferModel& neural_network_; + InferModel neural_network_; + + //CUDAがスレッドごとに紐付くのでgpu_id_を明に保持する必要がある + int64_t gpu_id_; //評価要求を受け付けるQueue std::vector gpu_queues_; diff --git a/src/reinforcement_learn.cpp b/src/reinforcement_learn.cpp index 0d334bb7..8ead163f 100644 --- a/src/reinforcement_learn.cpp +++ b/src/reinforcement_learn.cpp @@ -60,13 +60,11 @@ void reinforcementLearn() { //GPUの数だけネットワーク,自己対局生成器を生成 size_t gpu_num = torch::getNumGPUs(); - std::vector neural_networks(gpu_num); std::vector> generators(gpu_num); std::vector gen_threads; for (uint64_t i = 0; i < gpu_num; i++) { - neural_networks[i].load(DEFAULT_MODEL_NAME, static_cast(i)); generators[i] = std::make_unique(search_options, worker_num_per_thread, Q_dist_lambda, noise_mode, - noise_epsilon, noise_alpha, replay_buffer, neural_networks[i]); + noise_epsilon, noise_alpha, replay_buffer, i); gen_threads.emplace_back([&generators, i]() { generators[i]->genGames(); }); } @@ -106,9 +104,9 @@ void reinforcementLearn() { generators[i]->gpu_mutex.lock(); } - //ロードするときは一度fp32に直さないとエラーになる - //もっと良いやり方はありそうだがなぁ - neural_networks[i].load(DEFAULT_MODEL_NAME, static_cast(i)); + //パラメータをロードするべきというシグナルを出す + generators[i]->need_load = true; + if (i > 0) { generators[i]->gpu_mutex.unlock(); } diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 20a391a2..251e3d41 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -112,8 +112,7 @@ void checkGenSpeed() { for (search_options.search_batch_size = 2; search_options.search_batch_size <= 4; search_options.search_batch_size *= 2) { ReplayBuffer buffer(0, buffer_size, 1, 1.0, 1.0, false); auto start = std::chrono::steady_clock::now(); - GameGenerator generator(search_options, worker_num, Q_dist_lambda, noise_mode, noise_epsilon, noise_alpha, buffer, - nn); + GameGenerator generator(search_options, worker_num, Q_dist_lambda, noise_mode, noise_epsilon, noise_alpha, buffer, 0); std::thread t(&GameGenerator::genGames, &generator); for (int64_t i = 0; i < num; i++) { std::this_thread::sleep_for(std::chrono::seconds(sec)); From fa4a8644a50929d266e8aff642079bb57f1685b8 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 03:23:04 +0000 Subject: [PATCH 15/87] =?UTF-8?q?set=5Fdevice=E3=82=92=E8=A1=8C=E3=81=86?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index e6964313..2bc8a479 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -1,5 +1,6 @@ #include "game_generator.hpp" #include +#include void GameGenerator::genGames() { //まず最初のロード @@ -19,6 +20,9 @@ void GameGenerator::genGames() { } void GameGenerator::genSlave(int64_t thread_id) { + //スレッドごとにCUDAをセットしておかないとエラーが出る + trtorch::set_device(gpu_id_); + //Workerを準備 std::vector> workers(worker_num_); for (int32_t i = 0; i < worker_num_; i++) { @@ -54,10 +58,12 @@ void GameGenerator::genSlave(int64_t thread_id) { } //パラメータをロードし直す必要があれば実行 + gpu_mutex.lock(); if (need_load) { neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_); need_load = false; } + gpu_mutex.unlock(); } } From 12c4210756a8531db5705f95503842ef7253d747 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 04:46:24 +0000 Subject: [PATCH 16/87] =?UTF-8?q?=E3=83=A2=E3=83=87=E3=83=AB=E7=94=9F?= =?UTF-8?q?=E6=88=90=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88=E3=82=92?= =?UTF-8?q?=E6=94=B9=E8=89=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 82 +++++++++++++++----------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py index fd7e22ac..4f849b6c 100755 --- a/scripts/generate_torch_script_model.py +++ b/scripts/generate_torch_script_model.py @@ -14,17 +14,16 @@ REDUCTION = 8 KERNEL_SIZE = 3 +VALUE_HIDDEN_NUM = 256 if args.game == "shogi": INPUT_CHANNEL_NUM = 42 BOARD_SIZE = 9 POLICY_CHANNEL_NUM = 27 - VALUE_HIDDEN_NUM = 256 elif args.game == "othello": INPUT_CHANNEL_NUM = 2 BOARD_SIZE = 8 POLICY_CHANNEL_NUM = 2 - VALUE_HIDDEN_NUM = 256 class Conv2DwithBatchNorm(nn.Module): @@ -38,6 +37,7 @@ def forward(self, x): t = self.norm_.forward(t) return t + class ResidualBlock(nn.Module): def __init__(self, channel_num, kernel_size, reduction): super(ResidualBlock, self).__init__() @@ -65,65 +65,77 @@ def forward(self, x): return t -class ScalarNetwork(nn.Module): +class Encoder(nn.Module): def __init__(self, channel_num): - super(ScalarNetwork, self).__init__() - + super(Encoder, self).__init__() self.first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) self.blocks = nn.Sequential() for i in range(args.block_num): self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) - self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) - self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) - self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) - self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, 1) def forward(self, x): x = self.first_conv_and_norm_.forward(x) x = F.relu(x) - x = self.blocks.forward(x) + return x + + +class PolicyHead(nn.Module): + def __init__(self, channel_num): + super(PolicyHead, self).__init__() + self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) + + def forward(self, x): policy = self.policy_conv_.forward(x) + return policy + + +class ValueHead(nn.Module): + def __init__(self, channel_num, unit_num): + super(ValueHead, self).__init__() + self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) + self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) + self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, unit_num) + def forward(self, x): value = self.value_conv_and_norm_.forward(x) value = F.relu(value) value = value.view([-1, args.channel_num * BOARD_SIZE * BOARD_SIZE]) value = self.value_linear0_.forward(value) value = F.relu(value) value = self.value_linear1_.forward(value) - value = torch.tanh(value) + return value - return policy, value -class CategoricalNetwork(nn.Module): +class ScalarNetwork(nn.Module): def __init__(self, channel_num): - super(CategoricalNetwork, self).__init__() - - self.first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) - self.blocks = nn.Sequential() - for i in range(args.block_num): - self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) - self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) - self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) - self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) - self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, 51) + super(ScalarNetwork, self).__init__() + self.encoder_ = Encoder(channel_num) + self.policy_head_ = PolicyHead(channel_num) + self.value_head_ = ValueHead(channel_num, 1) def forward(self, x): - x = self.first_conv_and_norm_.forward(x) - x = F.relu(x) + x = self.encoder_.forward(x) + policy = self.policy_head_.forward(x) + value = self.value_head_.forward(x) + value = torch.tanh(value) + return policy, value - x = self.blocks.forward(x) - policy = self.policy_conv_.forward(x) - value = self.value_conv_and_norm_.forward(x) - value = F.relu(value) - value = value.view([-1, args.channel_num * BOARD_SIZE * BOARD_SIZE]) - value = self.value_linear0_.forward(value) - value = F.relu(value) - value = self.value_linear1_.forward(value) +class CategoricalNetwork(nn.Module): + def __init__(self, channel_num): + super(CategoricalNetwork, self).__init__() + self.encoder_ = Encoder(channel_num) + self.policy_head_ = PolicyHead(channel_num) + self.value_head_ = ValueHead(channel_num, 51) + def forward(self, x): + x = self.encoder_.forward(x) + policy = self.policy_head_.forward(x) + value = self.value_head_.forward(x) return policy, value + model = None if args.value_type == "sca": model = ScalarNetwork(args.channel_num) @@ -132,4 +144,6 @@ def forward(self, x): input_data = torch.randn([8, INPUT_CHANNEL_NUM, BOARD_SIZE, BOARD_SIZE]) script_model = torch.jit.trace(model, input_data) # script_model = torch.jit.script(model) -script_model.save(f"./{args.game}_{args.value_type}_bl{args.block_num}_ch{args.channel_num}.model") +model_path = f"./{args.game}_{args.value_type}_bl{args.block_num}_ch{args.channel_num}.model" +script_model.save(model_path) +print(f"{model_path}にパラメータを保存") From e2b704f5d7efdd746cd289d1f010b3c6c6ab9f1d Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 06:06:38 +0000 Subject: [PATCH 17/87] =?UTF-8?q?=E7=84=A1=E9=A7=84=E3=81=ABtensor?= =?UTF-8?q?=E3=82=92=E4=BD=9C=E3=81=A3=E3=81=A6=E3=81=9D=E3=81=93=E3=81=8B?= =?UTF-8?q?=E3=82=89=E3=82=B5=E3=82=A4=E3=82=BA=E3=82=92=E5=8F=96=E3=82=8A?= =?UTF-8?q?=E5=87=BA=E3=81=97=E3=81=A6=E3=81=84=E3=81=9F=E9=83=A8=E5=88=86?= =?UTF-8?q?=E3=82=92=E7=9B=B4=E6=8E=A5=E3=82=B5=E3=82=A4=E3=82=BA=E3=81=AE?= =?UTF-8?q?vector=E3=82=92=E4=BD=9C=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index af907210..d4bd3bee 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -10,12 +10,12 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id) { module_.to(device_, torch::kHalf); module_.eval(); - torch::Tensor in_min = torch::randn({ 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, device_).to(torch::kHalf); - torch::Tensor in_opt = torch::randn({ 128, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, device_).to(torch::kHalf); - torch::Tensor in_max = torch::randn({ 256, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }, device_).to(torch::kHalf); + std::vector in_min = { 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + std::vector in_opt = { 128, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + std::vector in_max = { 256, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; //trtorch - trtorch::CompileSpec::InputRange range(in_min.sizes(), in_opt.sizes(), in_max.sizes()); + trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); trtorch::CompileSpec info({ range }); info.op_precision = torch::kHalf; info.device.gpu_id = gpu_id; From 5da466aa9bad1acddda68049b03ae5d293769c20 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 06:08:56 +0000 Subject: [PATCH 18/87] =?UTF-8?q?=E4=B8=8D=E8=A6=81=E3=81=AA=E5=A4=89?= =?UTF-8?q?=E6=95=B0=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/neural_network.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/neural_network.cpp b/src/neural_network.cpp index 850568dd..a28a5c98 100644 --- a/src/neural_network.cpp +++ b/src/neural_network.cpp @@ -10,9 +10,6 @@ static constexpr int32_t CHANNEL_NUM = 128; static constexpr int32_t BLOCK_NUM = 5; static constexpr int32_t CHANNEL_NUM = 64; #endif -static constexpr int32_t KERNEL_SIZE = 3; -static constexpr int32_t REDUCTION = 8; -static constexpr int32_t VALUE_HIDDEN_NUM = 256; #ifdef USE_CATEGORICAL #ifdef SHOGI From c7fd954ac4520cc97d5aad3cc577dbe46de73d4e Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 12:46:31 +0000 Subject: [PATCH 19/87] =?UTF-8?q?=E3=81=A8=E3=82=8A=E3=81=82=E3=81=88?= =?UTF-8?q?=E3=81=9A=E3=83=AD=E3=83=BC=E3=82=AB=E3=83=AB=E3=81=ABjit::Modu?= =?UTF-8?q?le=E3=82=92=E4=BD=9C=E3=82=8B=E5=BD=A2=E3=81=A7=E8=A9=A6?= =?UTF-8?q?=E3=81=97=E3=81=A6=E3=81=BF=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index d4bd3bee..5144925e 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -5,10 +5,10 @@ #include void InferModel::load(const std::string& model_path, int64_t gpu_id) { - module_ = torch::jit::load(model_path); + torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); - module_.to(device_, torch::kHalf); - module_.eval(); + module.to(device_, torch::kHalf); + module.eval(); std::vector in_min = { 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_opt = { 128, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; @@ -19,7 +19,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id) { trtorch::CompileSpec info({ range }); info.op_precision = torch::kHalf; info.device.gpu_id = gpu_id; - module_ = trtorch::CompileGraph(module_, info); + module_ = trtorch::CompileGraph(module, info); } std::pair, std::vector> InferModel::policyAndValueBatch(const std::vector& inputs) { From c0ecc7d6386031e4ff74877996b64d596d3031cf Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 13:02:51 +0000 Subject: [PATCH 20/87] =?UTF-8?q?ConvAndNorm=E3=81=AEbias=E3=81=8Ctrue?= =?UTF-8?q?=E3=81=AB=E3=81=AA=E3=81=A3=E3=81=A6=E3=81=84=E3=81=9F=E3=81=AE?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3=E3=80=82=E3=81=9D=E3=82=8C=E3=81=A0?= =?UTF-8?q?=E3=81=91=E3=81=A7=E3=81=9D=E3=82=93=E3=81=AA=E3=81=AB=E5=A4=89?= =?UTF-8?q?=E3=82=8F=E3=82=8B=E3=81=AE=E3=81=8B=E3=81=AA=EF=BC=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py index 4f849b6c..540fc490 100755 --- a/scripts/generate_torch_script_model.py +++ b/scripts/generate_torch_script_model.py @@ -29,7 +29,7 @@ class Conv2DwithBatchNorm(nn.Module): def __init__(self, input_ch, output_ch, kernel_size): super(Conv2DwithBatchNorm, self).__init__() - self.conv_ = nn.Conv2d(input_ch, output_ch, kernel_size, padding=kernel_size // 2) + self.conv_ = nn.Conv2d(input_ch, output_ch, kernel_size, bias=False, padding=kernel_size // 2) self.norm_ = nn.BatchNorm2d(output_ch) def forward(self, x): From 750ffc255d3588148a77857e3f202ef1fa8dfbd9 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Feb 2021 13:25:04 +0000 Subject: [PATCH 21/87] =?UTF-8?q?p7zip-full=E3=81=AE=E3=82=A4=E3=83=B3?= =?UTF-8?q?=E3=82=B9=E3=83=88=E3=83=BC=E3=83=AB=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/Dockerfile b/scripts/Dockerfile index 477b23fe..53628ecf 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -45,6 +45,9 @@ ENV LD_LIBRARY_PATH /opt/conda/lib/python3.6/site-packages/torch/lib:$LD_LIBRARY RUN apt-get update && apt-get install -y language-pack-ja-base language-pack-ja && rm -rf /var/lib/apt/lists/* ENV LANG='ja_JP.UTF-8' +# 必要なもののインストール +RUN apt-get update && apt-get install -y p7zip-full && rm -rf /var/lib/apt/lists/* + # trtorchを適切な場所へ展開 WORKDIR /root RUN tar xvf /opt/trtorch/bazel-bin/libtrtorch.tar.gz . From 820e73e8c262fa7fe1d98e785b09d38f8c5508f2 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 4 Feb 2021 01:00:26 +0000 Subject: [PATCH 22/87] =?UTF-8?q?InferModel=E3=82=92load=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=81=AE=E3=81=AF=E3=82=B9=E3=83=AC=E3=83=83=E3=83=89id=3D0?= =?UTF-8?q?=E3=81=AE=E3=82=82=E3=81=AE=E3=81=AB=E4=BB=BB=E3=81=9B=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index 2bc8a479..f8675d6e 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -58,12 +58,13 @@ void GameGenerator::genSlave(int64_t thread_id) { } //パラメータをロードし直す必要があれば実行 - gpu_mutex.lock(); - if (need_load) { + //全スレッドが読み込もうとする必要はないので代表してid=0のスレッドに任せる + if (need_load && thread_id == 0) { + gpu_mutex.lock(); neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_); need_load = false; + gpu_mutex.unlock(); } - gpu_mutex.unlock(); } } From 025b6d5b21731e2d41fe6cddb9197c1aa91b9d58 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 4 Feb 2021 19:30:14 +0900 Subject: [PATCH 23/87] =?UTF-8?q?TRTorch=E3=81=AE=E8=A8=AD=E5=AE=9A?= =?UTF-8?q?=E9=83=A8=E5=88=86=E3=81=AA=E3=81=A9CMake=E3=82=92=E6=B4=97?= =?UTF-8?q?=E7=B7=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/CMakeLists.txt | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index dc11ff30..0f552fac 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,15 +1,20 @@ cmake_minimum_required(VERSION 3.10) project(Miacis) -#共通する設定 +# 共通する設定 set(CMAKE_CXX_STANDARD 17) -set(CMAKE_CXX_FLAGS "-march=native -I/root/trtorch/include/") +set(CMAKE_CXX_FLAGS "-march=native") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") +# LibTorch list(APPEND CMAKE_PREFIX_PATH ../../libtorch-1.7.0) find_package(Torch REQUIRED) -#各ゲームで共通する部分のソース +# TRTorch +set(TRTORCH_INCLUDE ../../trtorch/include) +file(GLOB TRTORCH_LIBRARIES ../../trtorch/lib/*.so) + +# 各ゲームで共通する部分のソース set(SRCS hash_table.cpp replay_buffer.cpp game_generator.cpp @@ -28,7 +33,7 @@ set(SRCS hash_table.cpp infer_model.cpp ) -#将棋用のバイナリをコンパイルするのに必要なソース +# 将棋用のバイナリをコンパイルするのに必要なソース set(SHOGI_SRCS shogi/main.cpp shogi/test.cpp shogi/bitboard.cpp @@ -40,7 +45,7 @@ set(SHOGI_SRCS shogi/main.cpp shogi/game.cpp shogi/book.cpp) -#オセロ用のバイナリをコンパイルするのに必要なソース +# オセロ用のバイナリをコンパイルするのに必要なソース set(OTHELLO_SRCS othello/main.cpp othello/position.cpp othello/square.cpp @@ -48,26 +53,27 @@ set(OTHELLO_SRCS othello/main.cpp othello/interface.cpp othello/game.cpp) +# 実行ファイルの定義 add_executable(Miacis_shogi_scalar ${SRCS} ${SHOGI_SRCS}) add_executable(Miacis_shogi_categorical ${SRCS} ${SHOGI_SRCS}) target_compile_definitions(Miacis_shogi_scalar PUBLIC SHOGI) -target_compile_definitions(Miacis_shogi_categorical PUBLIC SHOGI) -target_compile_definitions(Miacis_shogi_categorical PUBLIC USE_CATEGORICAL) +target_compile_definitions(Miacis_shogi_categorical PUBLIC SHOGI USE_CATEGORICAL) add_executable(Miacis_othello_scalar ${SRCS} ${OTHELLO_SRCS}) add_executable(Miacis_othello_categorical ${SRCS} ${OTHELLO_SRCS}) target_compile_definitions(Miacis_othello_scalar PUBLIC OTHELLO) -target_compile_definitions(Miacis_othello_categorical PUBLIC OTHELLO) -target_compile_definitions(Miacis_othello_categorical PUBLIC USE_CATEGORICAL) +target_compile_definitions(Miacis_othello_categorical PUBLIC OTHELLO USE_CATEGORICAL) + +set(BIN Miacis_shogi_scalar Miacis_shogi_categorical Miacis_othello_scalar Miacis_othello_categorical) if (MSVC) - target_link_libraries(Miacis_shogi_scalar "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_shogi_categorical "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_scalar "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_categorical "${TORCH_LIBRARIES}") + foreach(bin IN ITEMS ${BIN}) + target_link_libraries(${bin} ${TORCH_LIBRARIES} ${TRTORCH_LIBRARIES}) + target_include_directories(${bin} PRIVATE ${TRTORCH_INCLUDE}) + endforeach() else() - target_link_libraries(Miacis_shogi_scalar pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) - target_link_libraries(Miacis_shogi_categorical pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) - target_link_libraries(Miacis_othello_scalar pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) - target_link_libraries(Miacis_othello_categorical pthread stdc++fs "${TORCH_LIBRARIES}" /root/trtorch/lib/libtrtorch.so /root/trtorch/lib/libtrtorchrt.so) + foreach(bin IN ITEMS ${BIN}) + target_link_libraries(${bin} pthread stdc++fs ${TORCH_LIBRARIES} ${TRTORCH_LIBRARIES}) + target_include_directories(${bin} PRIVATE ${TRTORCH_INCLUDE}) + endforeach() endif() \ No newline at end of file From 7a14d9a7622a37b5115fea7bc642e2af94959353 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 4 Feb 2021 19:33:22 +0900 Subject: [PATCH 24/87] =?UTF-8?q?CMakeLists.txt=E3=81=A7=E5=90=84=E3=82=B2?= =?UTF-8?q?=E3=83=BC=E3=83=A0=E3=82=84=E5=85=B1=E9=80=9A=E9=83=A8=E5=88=86?= =?UTF-8?q?=E3=81=AE=E3=82=B3=E3=83=BC=E3=83=89=E3=82=92=E5=8F=96=E5=BE=97?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=81=9F=E3=82=81=E3=81=ABGLOB=E3=82=92?= =?UTF-8?q?=E5=88=A9=E7=94=A8=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=80=82=E3=82=AD=E3=83=A3=E3=83=83=E3=82=B7?= =?UTF-8?q?=E3=83=A5=E3=81=8C=E6=AE=8B=E3=81=A3=E3=81=A6=E3=81=84=E3=81=9F?= =?UTF-8?q?=E3=82=8A=E3=81=99=E3=82=8B=E3=81=A8=E6=83=B3=E5=AE=9A=E5=A4=96?= =?UTF-8?q?=E3=81=AE=E6=8C=99=E5=8B=95=E3=81=AB=E3=81=AA=E3=82=8A=E3=81=9D?= =?UTF-8?q?=E3=81=86=E3=81=AA=E3=81=AE=E3=81=A7=E6=B3=A8=E6=84=8F=E3=81=99?= =?UTF-8?q?=E3=82=8B=E3=81=93=E3=81=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/CMakeLists.txt | 37 ++++--------------------------------- 1 file changed, 4 insertions(+), 33 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0f552fac..72fc37d6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -15,43 +15,13 @@ set(TRTORCH_INCLUDE ../../trtorch/include) file(GLOB TRTORCH_LIBRARIES ../../trtorch/lib/*.so) # 各ゲームで共通する部分のソース -set(SRCS hash_table.cpp - replay_buffer.cpp - game_generator.cpp - neural_network.cpp - learn.cpp - supervised_learn.cpp - reinforcement_learn.cpp - searcher.cpp - searcher_for_play.cpp - searcher_for_mate.cpp - hyperparameter_loader.cpp - segment_tree.cpp - common.cpp - timer.cpp - learning_model.cpp - infer_model.cpp - ) +file(GLOB SRCS ./*.cpp) # 将棋用のバイナリをコンパイルするのに必要なソース -set(SHOGI_SRCS shogi/main.cpp - shogi/test.cpp - shogi/bitboard.cpp - shogi/move.cpp - shogi/piece.cpp - shogi/position.cpp - shogi/square.cpp - shogi/interface.cpp - shogi/game.cpp - shogi/book.cpp) +file(GLOB SHOGI_SRCS ./shogi/*.cpp) # オセロ用のバイナリをコンパイルするのに必要なソース -set(OTHELLO_SRCS othello/main.cpp - othello/position.cpp - othello/square.cpp - othello/piece.cpp - othello/interface.cpp - othello/game.cpp) +file(GLOB OTHELLO_SRCS ./othello/*.cpp) # 実行ファイルの定義 add_executable(Miacis_shogi_scalar ${SRCS} ${SHOGI_SRCS}) @@ -64,6 +34,7 @@ add_executable(Miacis_othello_categorical ${SRCS} ${OTHELLO_SRCS}) target_compile_definitions(Miacis_othello_scalar PUBLIC OTHELLO) target_compile_definitions(Miacis_othello_categorical PUBLIC OTHELLO USE_CATEGORICAL) +# foreachで回すためにリスト化 set(BIN Miacis_shogi_scalar Miacis_shogi_categorical Miacis_othello_scalar Miacis_othello_categorical) if (MSVC) From 001627e66b59d65a58629e343b72b95158b1d4bc Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 4 Feb 2021 19:34:33 +0900 Subject: [PATCH 25/87] =?UTF-8?q?return=E3=81=99=E3=82=8BTensor=E3=82=92?= =?UTF-8?q?=E3=83=87=E3=82=BF=E3=83=83=E3=83=81=E3=81=97=E3=81=A6=E3=81=8A?= =?UTF-8?q?=E3=81=8F=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/learn.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/learn.cpp b/src/learn.cpp index 6f8eed6e..ef25dc5d 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -245,5 +245,5 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d } } - return loss_sum; + return loss_sum.detach(); } \ No newline at end of file From da612cdbfd99560bb1dc3a0cebcc4d956a5193b8 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 4 Feb 2021 19:34:55 +0900 Subject: [PATCH 26/87] =?UTF-8?q?=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88?= =?UTF-8?q?=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/game_generator.hpp b/src/game_generator.hpp index 31908418..e162a65b 100644 --- a/src/game_generator.hpp +++ b/src/game_generator.hpp @@ -27,7 +27,7 @@ class GameGenerator { //生成してリプレイバッファに送り続ける関数 void genGames(); - //排他制御用のmutex。AlphaZeroTrainerから触れるようにpublicに置いている + //排他制御用のmutex。強化学習時に外部からアクセスできるようpublicに置いている std::mutex gpu_mutex; //評価パラメータの読み込みが必要かどうかのシグナル From edff1b2279b99aff60bbc8351826b8a9ba79fe97 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 4 Feb 2021 19:35:36 +0900 Subject: [PATCH 27/87] =?UTF-8?q?=E3=82=AF=E3=83=AA=E3=83=86=E3=82=A3?= =?UTF-8?q?=E3=82=AB=E3=83=AB=E3=82=BB=E3=82=AF=E3=82=B7=E3=83=A7=E3=83=B3?= =?UTF-8?q?=E3=82=92=E7=9F=AD=E3=81=8F=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/reinforcement_learn.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/src/reinforcement_learn.cpp b/src/reinforcement_learn.cpp index 8ead163f..87a514bc 100644 --- a/src/reinforcement_learn.cpp +++ b/src/reinforcement_learn.cpp @@ -1,7 +1,7 @@ #include "game_generator.hpp" #include "hyperparameter_loader.hpp" -#include "learn.hpp" #include "infer_model.hpp" +#include "learn.hpp" #include void reinforcementLearn() { @@ -89,6 +89,9 @@ void reinforcementLearn() { //1ステップ学習し、損失を取得 torch::Tensor loss_sum = learn_manager.learnOneStep(curr_data, step_num); + //GPUを解放 + generators.front()->gpu_mutex.unlock(); + //replay_bufferのpriorityを更新 std::vector loss_vec(loss_sum.data_ptr(), loss_sum.data_ptr() + batch_size); replay_buffer.update(loss_vec); @@ -100,22 +103,15 @@ void reinforcementLearn() { //各ネットワークで保存されたパラメータを読み込み for (uint64_t i = 0; i < gpu_num; i++) { - if (i > 0) { - generators[i]->gpu_mutex.lock(); - } + generators[i]->gpu_mutex.lock(); //パラメータをロードするべきというシグナルを出す generators[i]->need_load = true; - if (i > 0) { - generators[i]->gpu_mutex.unlock(); - } + generators[i]->gpu_mutex.unlock(); } } - //GPUを解放 - generators.front()->gpu_mutex.unlock(); - //学習スレッドを眠らせることで擬似的にActorの数を増やす std::this_thread::sleep_for(std::chrono::milliseconds(sleep_msec)); } From ad06880a713eb0540f8bcba8b05ced31ea5eba2e Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 5 Feb 2021 07:59:15 +0900 Subject: [PATCH 28/87] =?UTF-8?q?=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB?= =?UTF-8?q?=E3=83=88=E5=80=A4=E3=82=92shogi,=20cat=E3=81=AB=E6=8C=87?= =?UTF-8?q?=E5=AE=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py index 540fc490..d5d9fca8 100755 --- a/scripts/generate_torch_script_model.py +++ b/scripts/generate_torch_script_model.py @@ -6,8 +6,8 @@ import argparse parser = argparse.ArgumentParser() -parser.add_argument("-game", choices=["shogi", "othello"]) -parser.add_argument("-value_type", choices=["sca", "cat"]) +parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) +parser.add_argument("-value_type", default="cat", choices=["sca", "cat"]) parser.add_argument("--block_num", type=int, default=10) parser.add_argument("--channel_num", type=int, default=128) args = parser.parse_args() From f84a570920a49845c4059e381dbf4ee57695d48b Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 5 Feb 2021 10:40:59 +0900 Subject: [PATCH 29/87] =?UTF-8?q?Segmentation=20Fault=E3=81=8C=E8=B5=B7?= =?UTF-8?q?=E3=81=93=E3=82=8B=E3=81=A7=E3=81=8D=E3=82=8B=E3=81=A0=E3=81=91?= =?UTF-8?q?=E5=B0=8F=E3=81=95=E3=81=84=E9=96=A2=E6=95=B0=E3=82=92=E4=BD=9C?= =?UTF-8?q?=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/shogi/interface.cpp | 1 + src/shogi/test.cpp | 38 ++++++++++++++++++++++++++++++++++++++ src/shogi/test.hpp | 1 + 3 files changed, 40 insertions(+) diff --git a/src/shogi/interface.cpp b/src/shogi/interface.cpp index e61281e7..66acaa7d 100644 --- a/src/shogi/interface.cpp +++ b/src/shogi/interface.cpp @@ -34,6 +34,7 @@ Interface::Interface() : searcher_(nullptr) { command_["checkBook"] = checkBook; command_["makeBook"] = makeBook; command_["searchWithLog"] = searchWithLog; + command_["testLoad"] = testLoad; // clang-format on } diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 251e3d41..52f05f38 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -456,4 +456,42 @@ void searchWithLog() { } } +void testLoad() { + constexpr int64_t LOOP_NUM = 20; + + //時間計測開始 + Timer timer; + timer.start(); + int64_t pre = 0; + //通常試行 + std::cout << "通常の試行" << std::endl; + for (int64_t num = 0; num < LOOP_NUM; num++) { + InferModel model; + model.load(DEFAULT_MODEL_NAME, 0); + int64_t ela = timer.elapsedSeconds(); + int64_t curr = ela - pre; + pre = ela; + std::cout << std::setw(3) << num + 1 << "回目終了, 今回" << curr << "秒, 平均" << ela / (num + 1.0) << "秒" << std::endl; + } + + //スレッドを作成しての試行 + timer.start(); + pre = 0; + std::cout << "スレッドを作成しての試行" << std::endl; + for (int64_t num = 0; num < LOOP_NUM; num++) { + std::thread thread([]() { + InferModel model; + model.load(DEFAULT_MODEL_NAME, 0); + }); + thread.join(); + int64_t ela = timer.elapsedSeconds(); + int64_t curr = ela - pre; + pre = ela; + std::cout << std::setw(3) << num + 1 << "回目終了, 今回" << curr << "秒, 平均" << ela / (num + 1.0) << "秒" << std::endl; + } + + std::cout << "finish testLoad" << std::endl; + std::exit(0); +} + } // namespace Shogi \ No newline at end of file diff --git a/src/shogi/test.hpp b/src/shogi/test.hpp index a5e981cf..f424092f 100644 --- a/src/shogi/test.hpp +++ b/src/shogi/test.hpp @@ -16,6 +16,7 @@ void checkMirror(); void checkBook(); void makeBook(); void searchWithLog(); +void testLoad(); } // namespace Shogi From a31bd053ff91cd0edc8e17f840e775d2831a3ca6 Mon Sep 17 00:00:00 2001 From: sakoda Date: Sat, 6 Feb 2021 20:05:47 +0900 Subject: [PATCH 30/87] =?UTF-8?q?=E3=82=B3=E3=83=9F=E3=83=83=E3=83=88ID?= =?UTF-8?q?=E3=82=92=E6=8C=87=E5=AE=9A=E3=81=99=E3=82=8B=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/download_YaneuraOu.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/download_YaneuraOu.sh b/scripts/download_YaneuraOu.sh index 251b3ebe..f6ac79e0 100755 --- a/scripts/download_YaneuraOu.sh +++ b/scripts/download_YaneuraOu.sh @@ -9,6 +9,7 @@ root_dir=../.. # GitHubからソースコードをダウンロード git clone https://github.com/yaneurao/YaneuraOu ${root_dir}/YaneuraOu cd ${root_dir}/YaneuraOu/source +git checkout 1048b6a1874ead3e385e5c62064cf790a9bdebea # デフォルトではclangを使うようになっているがg++を使いたいのでMakefileを書き換える sed -i -e "s/#COMPILER = g++/COMPILER = g++/g" Makefile From c28eb1cb82e6303368dd25d35a8aebbb4cebb19c Mon Sep 17 00:00:00 2001 From: sakoda Date: Sat, 6 Feb 2021 20:37:26 +0900 Subject: [PATCH 31/87] =?UTF-8?q?vsYaneuraou.py=E3=81=A7=E4=BD=BF=E3=81=86?= =?UTF-8?q?natsort=E3=81=AE=E3=82=A4=E3=83=B3=E3=82=B9=E3=83=88=E3=83=BC?= =?UTF-8?q?=E3=83=AB=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/Dockerfile b/scripts/Dockerfile index 53628ecf..0f9c6b89 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -47,6 +47,7 @@ ENV LANG='ja_JP.UTF-8' # 必要なもののインストール RUN apt-get update && apt-get install -y p7zip-full && rm -rf /var/lib/apt/lists/* +RUN pip install natsort # trtorchを適切な場所へ展開 WORKDIR /root From 6bc1093d37027afa6af9968652a19e3d38269f50 Mon Sep 17 00:00:00 2001 From: sakoda Date: Sun, 7 Feb 2021 13:38:48 +0900 Subject: [PATCH 32/87] =?UTF-8?q?=E5=8F=A4=E3=81=84=E3=83=91=E3=83=A9?= =?UTF-8?q?=E3=83=A1=E3=83=BC=E3=82=BF=E3=82=92TorchScript=E3=81=B8?= =?UTF-8?q?=E3=82=B3=E3=83=B3=E3=83=90=E3=83=BC=E3=83=88=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88=E3=82=92=E5=AE=9F?= =?UTF-8?q?=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/convert_old_model_file.py | 151 ++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100755 scripts/convert_old_model_file.py diff --git a/scripts/convert_old_model_file.py b/scripts/convert_old_model_file.py new file mode 100755 index 00000000..d17dd03c --- /dev/null +++ b/scripts/convert_old_model_file.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 + +# !/usr/bin/env python3 +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.jit +import argparse +import glob +import os +from natsort import natsorted + +parser = argparse.ArgumentParser() +parser.add_argument("-source_dir", type=str, required=True) +parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) +parser.add_argument("-value_type", default="cat", choices=["sca", "cat"]) +parser.add_argument("--block_num", type=int, default=10) +parser.add_argument("--channel_num", type=int, default=128) +args = parser.parse_args() + +REDUCTION = 8 +KERNEL_SIZE = 3 +VALUE_HIDDEN_NUM = 256 + +if args.game == "shogi": + INPUT_CHANNEL_NUM = 42 + BOARD_SIZE = 9 + POLICY_CHANNEL_NUM = 27 +elif args.game == "othello": + INPUT_CHANNEL_NUM = 2 + BOARD_SIZE = 8 + POLICY_CHANNEL_NUM = 2 + + +class Conv2DwithBatchNorm(nn.Module): + def __init__(self, input_ch, output_ch, kernel_size): + super(Conv2DwithBatchNorm, self).__init__() + self.conv_ = nn.Conv2d(input_ch, output_ch, kernel_size, bias=False, padding=kernel_size // 2) + self.norm_ = nn.BatchNorm2d(output_ch) + + def forward(self, x): + t = self.conv_.forward(x) + t = self.norm_.forward(t) + return t + + +class ResidualBlock(nn.Module): + def __init__(self, channel_num, kernel_size, reduction): + super(ResidualBlock, self).__init__() + self.conv_and_norm0_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) + self.conv_and_norm1_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) + self.linear0_ = nn.Linear(channel_num, channel_num // reduction, bias=False) + self.linear1_ = nn.Linear(channel_num // reduction, channel_num, bias=False) + + def forward(self, x): + t = x + t = self.conv_and_norm0_.forward(t) + t = F.relu(t) + t = self.conv_and_norm1_.forward(t) + + y = F.avg_pool2d(t, [t.shape[2], t.shape[3]]) + y = y.view([-1, t.shape[1]]) + y = self.linear0_.forward(y) + y = F.relu(y) + y = self.linear1_.forward(y) + y = torch.sigmoid(y) + y = y.view([-1, t.shape[1], 1, 1]) + t = t * y + + t = F.relu(x + t) + return t + + +class CategoricalNetwork(nn.Module): + def __init__(self, channel_num): + super(CategoricalNetwork, self).__init__() + self.state_first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) + self.blocks = nn.Sequential() + for i in range(args.block_num): + self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) + self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) + + self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) + self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) + self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, 51) + + def forward(self, x): + x = self.state_first_conv_and_norm_.forward(x) + x = F.relu(x) + x = self.blocks.forward(x) + + policy = self.policy_conv_.forward(x) + + value = self.value_conv_and_norm_.forward(x) + value = F.relu(value) + value = value.view([-1, args.channel_num * BOARD_SIZE * BOARD_SIZE]) + value = self.value_linear0_.forward(value) + value = F.relu(value) + value = self.value_linear1_.forward(value) + + return policy, value + + +def load_conv_and_norm(dst, src): + dst.conv_.weight.data = src.conv_.weight.data + dst.norm_.weight.data = src.norm_.weight.data + dst.norm_.bias.data = src.norm_.bias.data + dst.norm_.running_mean = src.norm_.running_mean + dst.norm_.running_var = src.norm_.running_var + + +# インスタンス生成 +model = CategoricalNetwork(args.channel_num) + +# ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う +model_names = natsorted(glob.glob(f"{args.source_dir}/*.model")) + +for source_model_name in model_names: + source = torch.jit.load("/home/sakoda/learn_result/supervised/20201029_cat/cat_bl10_ch128_3000000.model").cpu() + + # state_first + load_conv_and_norm(model.state_first_conv_and_norm_, source.state_first_conv_and_norm_) + + # block + for i, v in enumerate(model.__dict__["_modules"]["blocks"]): + source_m = source.__dict__["_modules"][f"state_blocks_{i}"] + load_conv_and_norm(v.conv_and_norm0_, source_m.conv_and_norm0_) + load_conv_and_norm(v.conv_and_norm1_, source_m.conv_and_norm1_) + v.linear0_.weight.data = source_m.linear0_.weight.data + v.linear1_.weight.data = source_m.linear1_.weight.data + + # policy_conv + model.policy_conv_.weight.data = source.policy_conv_.weight.data + model.policy_conv_.bias.data = source.policy_conv_.bias.data + + # value_conv_norm_ + load_conv_and_norm(model.value_conv_and_norm_, source.value_conv_and_norm_) + + # value_linear + model.value_linear0_.weight.data = source.value_linear0_.weight.data + model.value_linear0_.bias.data = source.value_linear0_.bias.data + model.value_linear1_.weight.data = source.value_linear1_.weight.data + model.value_linear1_.bias.data = source.value_linear1_.bias.data + + input_data = torch.ones([1, INPUT_CHANNEL_NUM, BOARD_SIZE, BOARD_SIZE]) + model.eval() + script_model = torch.jit.trace(model, input_data) + # script_model = torch.jit.script(model) + model_path = f"{args.game}_{os.path.basename(source_model_name)}" + script_model.save(model_path) + print(f"{model_path}にパラメータを保存") From a377c6bb52382b6ff033b5eb59268c235c747fcc Mon Sep 17 00:00:00 2001 From: sakoda Date: Sun, 7 Feb 2021 15:26:09 +0900 Subject: [PATCH 33/87] =?UTF-8?q?=E9=96=93=E9=81=95=E3=81=A3=E3=81=A6?= =?UTF-8?q?=E3=83=91=E3=82=B9=E3=82=92=E3=83=86=E3=82=B9=E3=83=88=E6=99=82?= =?UTF-8?q?=E3=81=AE=E3=82=82=E3=81=AE=E3=81=AB=E3=81=97=E3=81=9F=E3=81=BE?= =?UTF-8?q?=E3=81=BE=E3=81=A0=E3=81=A3=E3=81=9F=E3=83=9F=E3=82=B9=E3=82=92?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/convert_old_model_file.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/convert_old_model_file.py b/scripts/convert_old_model_file.py index d17dd03c..89fb0a7b 100755 --- a/scripts/convert_old_model_file.py +++ b/scripts/convert_old_model_file.py @@ -116,7 +116,7 @@ def load_conv_and_norm(dst, src): model_names = natsorted(glob.glob(f"{args.source_dir}/*.model")) for source_model_name in model_names: - source = torch.jit.load("/home/sakoda/learn_result/supervised/20201029_cat/cat_bl10_ch128_3000000.model").cpu() + source = torch.jit.load(source_model_name).cpu() # state_first load_conv_and_norm(model.state_first_conv_and_norm_, source.state_first_conv_and_norm_) From a08f943f180562b79ebcf3157bf22b0ed67d7a73 Mon Sep 17 00:00:00 2001 From: sakoda Date: Mon, 8 Feb 2021 09:56:17 +0900 Subject: [PATCH 34/87] =?UTF-8?q?=E3=83=A2=E3=83=87=E3=83=AB=E5=90=8D?= =?UTF-8?q?=E3=81=8B=E3=82=89=E8=87=AA=E5=8B=95=E7=9A=84=E3=81=AB=E3=83=96?= =?UTF-8?q?=E3=83=AD=E3=83=83=E3=82=AF=E6=95=B0,=E3=83=81=E3=83=A3?= =?UTF-8?q?=E3=83=B3=E3=83=8D=E3=83=AB=E6=95=B0=E3=82=92=E8=AA=AD=E3=81=BF?= =?UTF-8?q?=E5=8F=96=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/convert_old_model_file.py | 52 ++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/scripts/convert_old_model_file.py b/scripts/convert_old_model_file.py index 89fb0a7b..e307cff1 100755 --- a/scripts/convert_old_model_file.py +++ b/scripts/convert_old_model_file.py @@ -8,14 +8,12 @@ import argparse import glob import os +import re from natsort import natsorted parser = argparse.ArgumentParser() parser.add_argument("-source_dir", type=str, required=True) parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) -parser.add_argument("-value_type", default="cat", choices=["sca", "cat"]) -parser.add_argument("--block_num", type=int, default=10) -parser.add_argument("--channel_num", type=int, default=128) args = parser.parse_args() REDUCTION = 8 @@ -71,12 +69,38 @@ def forward(self, x): return t +def load_conv_and_norm(dst, src): + dst.conv_.weight.data = src.conv_.weight.data + dst.norm_.weight.data = src.norm_.weight.data + dst.norm_.bias.data = src.norm_.bias.data + dst.norm_.running_mean = src.norm_.running_mean + dst.norm_.running_var = src.norm_.running_var + + +# ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う +source_model_names = natsorted(glob.glob(f"{args.source_dir}/*.model")) + +# 1番目のモデル名からブロック数,チャンネル数を読み取る.これらは1ディレクトリ内で共通だという前提 +basename_without_ext = os.path.splitext(os.path.basename(source_model_names[0]))[0] +parts = basename_without_ext.split("_") +block_num = None +channel_num = None +for p in parts: + if "bl" in p: + block_num = int(re.sub("\\D", "", p)) + elif "ch" in p: + channel_num = int(re.sub("\\D", "", p)) + + +# 上で取得したブロック数, チャンネル数を前提にクラスを定義 +# forwardでもchannel_numを使うのでコンストラクタの引数として渡すと面倒なので定義を遅らせて外部の変数を利用する形に +# self.channel_num などに保存するのも、余計な変数をクラスに保持させたくない(というかTorchScript化の際にエラーになったような) class CategoricalNetwork(nn.Module): - def __init__(self, channel_num): + def __init__(self): super(CategoricalNetwork, self).__init__() self.state_first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) self.blocks = nn.Sequential() - for i in range(args.block_num): + for i in range(block_num): self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) @@ -93,7 +117,7 @@ def forward(self, x): value = self.value_conv_and_norm_.forward(x) value = F.relu(value) - value = value.view([-1, args.channel_num * BOARD_SIZE * BOARD_SIZE]) + value = value.view([-1, channel_num * BOARD_SIZE * BOARD_SIZE]) value = self.value_linear0_.forward(value) value = F.relu(value) value = self.value_linear1_.forward(value) @@ -101,21 +125,11 @@ def forward(self, x): return policy, value -def load_conv_and_norm(dst, src): - dst.conv_.weight.data = src.conv_.weight.data - dst.norm_.weight.data = src.norm_.weight.data - dst.norm_.bias.data = src.norm_.bias.data - dst.norm_.running_mean = src.norm_.running_mean - dst.norm_.running_var = src.norm_.running_var - - # インスタンス生成 -model = CategoricalNetwork(args.channel_num) - -# ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う -model_names = natsorted(glob.glob(f"{args.source_dir}/*.model")) +model = CategoricalNetwork() -for source_model_name in model_names: +# 各モデルファイルのパラメータをコピーしてTorchScriptとして保存 +for source_model_name in source_model_names: source = torch.jit.load(source_model_name).cpu() # state_first From 45c5a27353205cc97268384392ead56aaf779175 Mon Sep 17 00:00:00 2001 From: sakoda Date: Mon, 8 Feb 2021 12:27:26 +0900 Subject: [PATCH 35/87] =?UTF-8?q?load=E6=99=82=E3=81=ABopt=5Fbatch=5Fsize?= =?UTF-8?q?=E3=82=92=E6=8C=87=E5=AE=9A=E3=81=99=E3=82=8B=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 4 ++-- src/infer_model.cpp | 6 +++--- src/infer_model.hpp | 2 +- src/othello/interface.cpp | 4 ++-- src/searcher_for_play.cpp | 2 +- src/shogi/test.cpp | 15 +++++++-------- 6 files changed, 16 insertions(+), 17 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index f8675d6e..d4364024 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -4,7 +4,7 @@ void GameGenerator::genGames() { //まず最初のロード - neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_); + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size); need_load = false; //生成スレッドを生成 @@ -61,7 +61,7 @@ void GameGenerator::genSlave(int64_t thread_id) { //全スレッドが読み込もうとする必要はないので代表してid=0のスレッドに任せる if (need_load && thread_id == 0) { gpu_mutex.lock(); - neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_); + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size); need_load = false; gpu_mutex.unlock(); } diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 5144925e..7716e96a 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -4,15 +4,15 @@ #include #include -void InferModel::load(const std::string& model_path, int64_t gpu_id) { +void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size) { torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); module.to(device_, torch::kHalf); module.eval(); std::vector in_min = { 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; - std::vector in_opt = { 128, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; - std::vector in_max = { 256, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; //trtorch trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); diff --git a/src/infer_model.hpp b/src/infer_model.hpp index a33b7ad5..a00beb86 100644 --- a/src/infer_model.hpp +++ b/src/infer_model.hpp @@ -7,7 +7,7 @@ class InferModel { public: InferModel() : device_(torch::kCPU) {} - void load(const std::string& model_path, int64_t gpu_id); + void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size); std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); private: diff --git a/src/othello/interface.cpp b/src/othello/interface.cpp index 846bc2a7..b50708ae 100644 --- a/src/othello/interface.cpp +++ b/src/othello/interface.cpp @@ -107,7 +107,7 @@ void Interface::test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size); SearcherForPlay searcher(search_options); Position pos; @@ -287,7 +287,7 @@ void Interface::outputValue() { root_.init(); std::ofstream ofs("value_output.txt"); InferModel nn; - nn.load(options_.model_name, 0); + nn.load(options_.model_name, 0, 1); std::uniform_real_distribution dist(0.0, 1.0); diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index 56350acb..f929db61 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -16,7 +16,7 @@ SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) //GPUを準備 for (int64_t i = 0; i < search_options.gpu_num; i++) { neural_networks_.emplace_back(); - neural_networks_[i].load(search_options_.model_name, i); + neural_networks_[i].load(search_options_.model_name, i, search_options.search_batch_size); } //GPUに対するmutexを準備 diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 52f05f38..148e79c6 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -14,7 +14,7 @@ void test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size); SearcherForPlay searcher(search_options); Position pos; @@ -81,9 +81,6 @@ void infiniteTest() { } void checkGenSpeed() { - InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0); - constexpr int64_t buffer_size = 1048576; SearchOptions search_options; search_options.search_limit = 800; @@ -272,12 +269,13 @@ void checkVal() { void checkPredictSpeed() { Position pos; constexpr int64_t REPEAT_NUM = 1000; + constexpr int64_t BATCH_SIZE = 4096; std::cout << std::fixed; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0); + nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); - for (int64_t batch_size = 1; batch_size <= 4096; batch_size *= 2) { + for (int64_t batch_size = 1; batch_size <= BATCH_SIZE; batch_size *= 2) { //バッチサイズ分入力を取得 std::vector input; for (int64_t k = 0; k < batch_size; k++) { @@ -458,6 +456,7 @@ void searchWithLog() { void testLoad() { constexpr int64_t LOOP_NUM = 20; + constexpr int64_t BATCH_SIZE = 128; //時間計測開始 Timer timer; @@ -467,7 +466,7 @@ void testLoad() { std::cout << "通常の試行" << std::endl; for (int64_t num = 0; num < LOOP_NUM; num++) { InferModel model; - model.load(DEFAULT_MODEL_NAME, 0); + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); int64_t ela = timer.elapsedSeconds(); int64_t curr = ela - pre; pre = ela; @@ -481,7 +480,7 @@ void testLoad() { for (int64_t num = 0; num < LOOP_NUM; num++) { std::thread thread([]() { InferModel model; - model.load(DEFAULT_MODEL_NAME, 0); + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); }); thread.join(); int64_t ela = timer.elapsedSeconds(); From a45aed49ec96f17899fd7f5ddd075b46e8f6a701 Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 9 Feb 2021 09:59:31 +0900 Subject: [PATCH 36/87] =?UTF-8?q?YaneuraOu=E3=81=AEcommit=20id=E3=82=92?= =?UTF-8?q?=E6=96=B0=E3=81=97=E3=81=84=E3=82=82=E3=81=AE=E3=81=AB=E6=9B=B4?= =?UTF-8?q?=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/download_Suisho.sh | 5 +---- scripts/download_YaneuraOu.sh | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/download_Suisho.sh b/scripts/download_Suisho.sh index 8ba588dc..cc6a2769 100755 --- a/scripts/download_Suisho.sh +++ b/scripts/download_Suisho.sh @@ -9,10 +9,7 @@ root_dir=../.. # GitHubからソースコードをダウンロード git clone https://github.com/yaneurao/YaneuraOu ${root_dir}/Suisho cd ${root_dir}/Suisho/source - -# Gitの特定コミットへ移動 -# (dlshogiが対局していたものに合わせる cf. https://tadaoyamaoka.hatenablog.com/entry/2020/08/10/220411) -git checkout 276faf80d51dd6cae053112db8021171d5dbf4e8 +git checkout b0a3a2a4f7565bbefb85999368df15e9c90c621f # デフォルトではclangを使うようになっているがg++を使いたいのでMakefileを書き換える sed -i -e "s/#COMPILER = g++/COMPILER = g++/g" Makefile diff --git a/scripts/download_YaneuraOu.sh b/scripts/download_YaneuraOu.sh index f6ac79e0..61d94a8e 100755 --- a/scripts/download_YaneuraOu.sh +++ b/scripts/download_YaneuraOu.sh @@ -9,7 +9,7 @@ root_dir=../.. # GitHubからソースコードをダウンロード git clone https://github.com/yaneurao/YaneuraOu ${root_dir}/YaneuraOu cd ${root_dir}/YaneuraOu/source -git checkout 1048b6a1874ead3e385e5c62064cf790a9bdebea +git checkout b0a3a2a4f7565bbefb85999368df15e9c90c621f # デフォルトではclangを使うようになっているがg++を使いたいのでMakefileを書き換える sed -i -e "s/#COMPILER = g++/COMPILER = g++/g" Makefile From 3c09d5671875a87cb197ad6066c67c417f5fa618 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 11 Feb 2021 11:36:06 +0900 Subject: [PATCH 37/87] =?UTF-8?q?SENNICHITE=E3=82=92=E9=99=A4=E5=A4=96?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/shogi/game.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/shogi/game.cpp b/src/shogi/game.cpp index 0249fffa..cc1d3dfa 100644 --- a/src/shogi/game.cpp +++ b/src/shogi/game.cpp @@ -59,14 +59,12 @@ std::tuple loadCSAOneGame(std::ifstream& ifs, float rate_threshold) if (buf.substr(0, 6) == "%TORYO") { game.result = (pos.color() == BLACK ? MIN_SCORE : MAX_SCORE); ok = true; - } else if (buf.substr(0, 11) == "%SENNICHITE") { - game.result = (MAX_SCORE + MIN_SCORE) / 2; - ok = true; } else if (buf.substr(0, 6) == "%KACHI") { game.result = (pos.color() == BLACK ? MAX_SCORE : MIN_SCORE); ok = true; - } else if (buf.substr(0, 7) == "%CHUDAN" || buf.substr(0, 16) == "%+ILLEGAL_ACTION" || - buf.substr(0, 16) == "%-ILLEGAL_ACTION" || buf.substr(0, 8) == "%TIME_UP") { + } else if (buf.substr(0, 11) == "%SENNICHITE" || buf.substr(0, 7) == "%CHUDAN" || + buf.substr(0, 16) == "%+ILLEGAL_ACTION" || buf.substr(0, 16) == "%-ILLEGAL_ACTION" || + buf.substr(0, 8) == "%TIME_UP") { //ダメな対局であったというフラグを返す return std::make_tuple(game, false); } else { From d6e3ded3934d37c790880b5cf0cdf4d97934b9ce Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 11 Feb 2021 18:19:03 +0900 Subject: [PATCH 38/87] =?UTF-8?q?=E3=81=A8=E3=82=8A=E3=81=82=E3=81=88?= =?UTF-8?q?=E3=81=9AINT8=E3=81=A7=E3=81=AE=E6=8E=A2=E7=B4=A2=E3=81=8C?= =?UTF-8?q?=E5=8F=AF=E8=83=BD=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dataset.cpp | 36 ++++++++++++++++++++++++++++++++++++ src/dataset.hpp | 21 +++++++++++++++++++++ src/infer_model.cpp | 19 ++++++++++++++++--- 3 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 src/dataset.cpp create mode 100644 src/dataset.hpp diff --git a/src/dataset.cpp b/src/dataset.cpp new file mode 100644 index 00000000..f588bc9b --- /dev/null +++ b/src/dataset.cpp @@ -0,0 +1,36 @@ +#include "dataset.hpp" +#include "include_switch.hpp" +#include "learn.hpp" +#include "neural_network.hpp" + +MyDataset::MyDataset(const std::string& root) { + std::vector data = loadData(root, false, 3200); + Position pos; + std::vector value_teachers; + + for (const LearningData& datum : data) { + pos.fromStr(datum.position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + std::vector inputs; + inputs.insert(inputs.end(), feature.begin(), feature.end()); + data_.push_back(torch::tensor(inputs).view({ 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH })); + + //policyの教師信号 + std::vector policy_teachers(POLICY_DIM, 0.0); + for (const std::pair& e : datum.policy) { + policy_teachers[e.first] = e.second; + } + targets_.push_back(torch::tensor(policy_teachers)); + + //valueの教師信号 + value_teachers.push_back(datum.value); + } +} + +torch::data::Example<> MyDataset::get(size_t index) { + return { data_[index].clone().to(torch::kCUDA), targets_[index].clone().to(torch::kCUDA) }; +} + +c10::optional MyDataset::size() const { return data_.size(); } \ No newline at end of file diff --git a/src/dataset.hpp b/src/dataset.hpp new file mode 100644 index 00000000..77d27603 --- /dev/null +++ b/src/dataset.hpp @@ -0,0 +1,21 @@ +#ifndef MIACIS_DATASET_HPP +#define MIACIS_DATASET_HPP + +#include + +// The MyDataset Dataset +class MyDataset : public torch::data::datasets::Dataset { +public: + explicit MyDataset(const std::string& root); + + // Returns the pair at index in the dataset + torch::data::Example<> get(size_t index) override; + + // The size of the dataset + c10::optional size() const override; + +private: + std::vector data_, targets_; +}; + +#endif //MIACIS_DATASET_HPP \ No newline at end of file diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 7716e96a..3e421266 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -1,7 +1,9 @@ #include "infer_model.hpp" #include "common.hpp" +#include "dataset.hpp" #include "include_switch.hpp" #include +#include #include void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size) { @@ -14,16 +16,27 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + auto dataset = MyDataset("/home/sakoda/data/floodgate_kifu/small").map(torch::data::transforms::Stack<>()); + auto dataloader = + torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(32).workers(1)); + + const std::string name = "calibration_cache_file.txt"; + auto calibrator = trtorch::ptq::make_int8_calibrator<>(std::move(dataloader), name, true); + //trtorch trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); trtorch::CompileSpec info({ range }); - info.op_precision = torch::kHalf; + info.op_precision = torch::kI8; info.device.gpu_id = gpu_id; + info.ptq_calibrator = calibrator; + info.workspace_size = (1ull << 29); + info.max_batch_size = opt_batch_size * 2; + module_ = trtorch::CompileGraph(module, info); } std::pair, std::vector> InferModel::policyAndValueBatch(const std::vector& inputs) { - torch::Tensor x = torch::tensor(inputs).to(device_).to(torch::kHalf); + torch::Tensor x = torch::tensor(inputs).to(device_); x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); auto out = module_.forward({ x }); auto tuple = out.toTuple(); @@ -37,7 +50,7 @@ std::pair, std::vector> InferModel::policyAnd //CPUに持ってくる policy = policy.cpu(); - torch::Half* p = policy.data_ptr(); + float* p = policy.data_ptr(); for (uint64_t i = 0; i < batch_size; i++) { policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); } From cdc176dcf97c62f98d5d9fcca40736389c8911b1 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 12 Feb 2021 08:26:20 +0900 Subject: [PATCH 39/87] =?UTF-8?q?validation.py=E3=82=92=E5=B0=91=E3=81=97?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/validation.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/validation.py b/scripts/validation.py index 26f7acfc..e8f8378f 100755 --- a/scripts/validation.py +++ b/scripts/validation.py @@ -8,7 +8,8 @@ parser = argparse.ArgumentParser() parser.add_argument("--kifu_path", type=str, default="/root/data/floodgate_kifu/valid") -parser.add_argument("--batch_size", type=int, default=4096) +parser.add_argument("--batch_size", type=int, default=512) +parser.add_argument("--init_model_step", type=int, default=0) args = parser.parse_args() # カレントディレクトリ内にある{prefix}_{step}.modelを評価する @@ -18,7 +19,7 @@ curr_path += "/" # 結果を書き込むファイルを取得 -f = open(curr_path + "validation_loss.txt", "w") +f = open(curr_path + "validation_loss.txt", "a") # ディレクトリにある以下のprefixを持ったパラメータを用いて検証損失の計算を行う model_names = natsorted(glob.glob(curr_path + "*0.model")) @@ -35,6 +36,10 @@ # 最後に出てくるアンダーバーから.modelの直前までにステップ数が記録されているという前提 step = int(model_name[model_name.rfind("_") + 1:model_name.find(".model")]) + # args.init_model_stepより小さいものは調べない + if step < args.init_model_step: + continue + scalar_or_categorical = "scalar" if "sca" in model_name else "categorical" miacis_path = f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}" command = f"checkVal\n{args.kifu_path}\n{args.batch_size}\n{model_name}\nquit" From 2f83705b8b9dab90f86333310aa48baa9d895c51 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 12 Feb 2021 13:45:47 +0900 Subject: [PATCH 40/87] =?UTF-8?q?=E5=AE=89=E5=85=A8=E6=80=A7=E3=81=AE?= =?UTF-8?q?=E9=AB=98=E3=81=9D=E3=81=86=E3=81=AA=E9=AB=98=E9=80=9F=E5=8C=96?= =?UTF-8?q?=E3=82=92=E5=B0=8E=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/searcher.cpp | 12 ++++++------ src/searcher.hpp | 10 +++++++++- src/shogi/position.cpp | 12 +++++++----- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/src/searcher.cpp b/src/searcher.cpp index 7deaa785..c5ad2614 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -2,8 +2,7 @@ int32_t Searcher::selectMaxUcbChild(const HashEntry& node) const { #ifdef USE_CATEGORICAL - int32_t best_index = std::max_element(node.N.begin(), node.N.end()) - node.N.begin(); - float best_value = expOfValueDist(hash_table_.QfromNextValue(node, best_index)); + float best_value = expOfValueDist(node.value); int32_t best_value_index = std::min(valueToIndex(best_value) + 1, BIN_SIZE - 1); int32_t reversed_best_value_index = BIN_SIZE - best_value_index; #endif @@ -12,23 +11,24 @@ int32_t Searcher::selectMaxUcbChild(const HashEntry& node) const { float max_value = INT_MIN; const int32_t sum = node.sum_N + node.virtual_sum_N; + const float U_numerator = std::sqrt(sum + 1); for (uint64_t i = 0; i < node.moves.size(); i++) { - float U = std::sqrt(sum + 1) / (node.N[i] + node.virtual_N[i] + 1); + float U = U_numerator / (node.N[i] + node.virtual_N[i] + 1); assert(U >= 0.0); #ifdef USE_CATEGORICAL float P = 0.0; if (node.child_indices[i] == HashTable::NOT_EXPANDED) { - P = (node.N[i] == 0 ? search_options_.FPU_x1000 / 1000.0 : 1); + P = (node.N[i] == 0 ? fpu_ : 1); } else { std::unique_lock lock(hash_table_[node.child_indices[i]].mutex); for (int32_t j = 0; j < reversed_best_value_index; j++) { P += hash_table_[node.child_indices[i]].value[j]; } } - float ucb = search_options_.C_PUCT_x1000 / 1000.0 * node.nn_policy[i] * U + search_options_.P_coeff_x1000 / 1000.0 * P; + float ucb = c_puct_ * node.nn_policy[i] * U + p_coeff_ * P; if (search_options_.Q_coeff_x1000 > 0) { - ucb += search_options_.Q_coeff_x1000 / 1000.0 * hash_table_.expQfromNext(node, i); + ucb += q_coeff_ * hash_table_.expQfromNext(node, i); } #else float Q = (node.N[i] == 0 ? search_options_.FPU_x1000 / 1000.0 : hash_table_.QfromNextValue(node, i)); diff --git a/src/searcher.hpp b/src/searcher.hpp index 7ea7ed0d..00305fce 100644 --- a/src/searcher.hpp +++ b/src/searcher.hpp @@ -24,7 +24,9 @@ struct BackupQueue { class Searcher { public: explicit Searcher(const SearchOptions& search_options, HashTable& hash_table, GPUQueue& gpu_queue) - : hash_table_(hash_table), search_options_(search_options), gpu_queue_(gpu_queue) {} + : hash_table_(hash_table), search_options_(search_options), gpu_queue_(gpu_queue), + fpu_(search_options_.FPU_x1000 / 1000.0), c_puct_(search_options_.C_PUCT_x1000 / 1000.0), + p_coeff_(search_options_.P_coeff_x1000 / 1000.0), q_coeff_(search_options_.Q_coeff_x1000 / 1000.0) {} //再帰しない探索関数 void select(Position& pos); @@ -58,6 +60,12 @@ class Searcher { //バックアップ要求を貯めるキュー。これは各インスタンスが生成して保持する BackupQueue backup_queue_; + + //select時の定数 + const float fpu_; + const float c_puct_; + const float p_coeff_; + const float q_coeff_; }; #endif //MIACIS_SEARCHER_HPP \ No newline at end of file diff --git a/src/shogi/position.cpp b/src/shogi/position.cpp index 891ffa63..196bf92b 100644 --- a/src/shogi/position.cpp +++ b/src/shogi/position.cpp @@ -1085,11 +1085,13 @@ bool Position::isLastMoveDropPawn() const { return (lastMove().isDrop() && kind( bool Position::isFinish(float& score, bool check_repeat) { //詰みの確認 - std::vector moves = generateAllMoves(); - if (moves.empty()) { - //打ち歩詰めなら手番側(詰まされた側)が勝ち、そうでないなら手番側が負け - score = isLastMoveDropPawn() ? MAX_SCORE : MIN_SCORE; - return true; + if (is_checked_) { + std::vector moves = generateAllMoves(); + if (moves.empty()) { + //打ち歩詰めなら手番側(詰まされた側)が勝ち、そうでないなら手番側が負け + score = isLastMoveDropPawn() ? MAX_SCORE : MIN_SCORE; + return true; + } } //千日手の確認 From e3f35c5f80ca39162d8aa4a8f3467e3119d61a88 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 12 Feb 2021 20:01:43 +0900 Subject: [PATCH 41/87] =?UTF-8?q?checkPredictSpeed=E3=82=92=E6=94=B9?= =?UTF-8?q?=E8=89=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/shogi/test.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 148e79c6..9d561e56 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -269,7 +269,7 @@ void checkVal() { void checkPredictSpeed() { Position pos; constexpr int64_t REPEAT_NUM = 1000; - constexpr int64_t BATCH_SIZE = 4096; + constexpr int64_t BATCH_SIZE = 512; std::cout << std::fixed; InferModel nn; @@ -291,20 +291,17 @@ void checkPredictSpeed() { } } - std::cout << input.size() << std::endl; - float time = 0.0; for (int64_t i = 0; i < REPEAT_NUM; i++) { auto start = std::chrono::steady_clock::now(); torch::NoGradGuard no_grad_guard; - nn.policyAndValueBatch(pos.makeFeature()); + nn.policyAndValueBatch(input); auto end = std::chrono::steady_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); time += elapsed.count(); } - std::cout << "batch_size = " << std::setw(5) << batch_size << ", " << time / REPEAT_NUM << " microsec / batch" - << std::endl; + std::cout << std::setw(5) << batch_size << "\t" << time / REPEAT_NUM << "\tmicrosec/batch" << std::endl; } } From d3f51d14e8338abacec965099c7a48186bef43ce Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 12 Feb 2021 20:19:38 +0900 Subject: [PATCH 42/87] =?UTF-8?q?=E3=82=AF=E3=83=A9=E3=82=B9=E5=90=8D?= =?UTF-8?q?=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dataset.cpp | 22 ++++++---------------- src/dataset.hpp | 7 ++----- src/infer_model.cpp | 2 +- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/dataset.cpp b/src/dataset.cpp index f588bc9b..2d281c31 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -3,34 +3,24 @@ #include "learn.hpp" #include "neural_network.hpp" -MyDataset::MyDataset(const std::string& root) { +CalibrationDataset::CalibrationDataset(const std::string& root) { std::vector data = loadData(root, false, 3200); Position pos; - std::vector value_teachers; for (const LearningData& datum : data) { pos.fromStr(datum.position_str); //入力 - const std::vector feature = pos.makeFeature(); - std::vector inputs; - inputs.insert(inputs.end(), feature.begin(), feature.end()); + std::vector inputs = pos.makeFeature(); data_.push_back(torch::tensor(inputs).view({ 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH })); - //policyの教師信号 - std::vector policy_teachers(POLICY_DIM, 0.0); - for (const std::pair& e : datum.policy) { - policy_teachers[e.first] = e.second; - } - targets_.push_back(torch::tensor(policy_teachers)); - - //valueの教師信号 - value_teachers.push_back(datum.value); + //targetの方は使わないのでダミーの適当な値を入れる + targets_.push_back(torch::tensor({ 0 })); } } -torch::data::Example<> MyDataset::get(size_t index) { +torch::data::Example<> CalibrationDataset::get(size_t index) { return { data_[index].clone().to(torch::kCUDA), targets_[index].clone().to(torch::kCUDA) }; } -c10::optional MyDataset::size() const { return data_.size(); } \ No newline at end of file +c10::optional CalibrationDataset::size() const { return data_.size(); } \ No newline at end of file diff --git a/src/dataset.hpp b/src/dataset.hpp index 77d27603..ab2c14b5 100644 --- a/src/dataset.hpp +++ b/src/dataset.hpp @@ -3,15 +3,12 @@ #include -// The MyDataset Dataset -class MyDataset : public torch::data::datasets::Dataset { +class CalibrationDataset : public torch::data::datasets::Dataset { public: - explicit MyDataset(const std::string& root); + explicit CalibrationDataset(const std::string& root); - // Returns the pair at index in the dataset torch::data::Example<> get(size_t index) override; - // The size of the dataset c10::optional size() const override; private: diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 3e421266..0dc8dc58 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -16,7 +16,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; - auto dataset = MyDataset("/home/sakoda/data/floodgate_kifu/small").map(torch::data::transforms::Stack<>()); + auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/small").map(torch::data::transforms::Stack<>()); auto dataloader = torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(32).workers(1)); From 48be0d881c55abac1cceb2782b8d278675e90863 Mon Sep 17 00:00:00 2001 From: sakoda Date: Sun, 14 Feb 2021 16:46:07 +0900 Subject: [PATCH 43/87] =?UTF-8?q?INT8=E3=81=B8=E3=81=AECalibration?= =?UTF-8?q?=E3=81=8C=E6=AD=A3=E5=B8=B8=E3=81=AB=E5=8B=95=E3=81=8F=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E3=81=AA=E3=81=A3=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dataset.cpp | 8 ++++++++ src/infer_model.cpp | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/dataset.cpp b/src/dataset.cpp index 2d281c31..9de10ce4 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -16,7 +16,15 @@ CalibrationDataset::CalibrationDataset(const std::string& root) { //targetの方は使わないのでダミーの適当な値を入れる targets_.push_back(torch::tensor({ 0 })); + + //データ数をどれくらい取れば良いのかはよくわからない + //とりあえず多めに4000データ取ることにする + if (data_.size() >= 4000) { + break; + } } + + std::cout << data_.size() << " " << targets_.size() << std::endl; } torch::data::Example<> CalibrationDataset::get(size_t index) { diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 0dc8dc58..785a491e 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -9,19 +9,19 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size) { torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); - module.to(device_, torch::kHalf); + module.to(device_); module.eval(); std::vector in_min = { 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; - auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/small").map(torch::data::transforms::Stack<>()); + auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid").map(torch::data::transforms::Stack<>()); auto dataloader = torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(32).workers(1)); const std::string name = "calibration_cache_file.txt"; - auto calibrator = trtorch::ptq::make_int8_calibrator<>(std::move(dataloader), name, true); + auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, true); //trtorch trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); @@ -57,7 +57,7 @@ std::pair, std::vector> InferModel::policyAnd #ifdef USE_CATEGORICAL value = torch::softmax(value, 1).cpu(); - torch::Half* value_p = value.data_ptr(); + float* value_p = value.data_ptr(); for (uint64_t i = 0; i < batch_size; i++) { std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); } From 59f8c64c8d759523b8c770a9865f84277bf23b0a Mon Sep 17 00:00:00 2001 From: sakoda Date: Sun, 14 Feb 2021 19:42:15 +0900 Subject: [PATCH 44/87] =?UTF-8?q?=E6=A4=9C=E8=A8=BC=E3=81=AE=E7=B5=90?= =?UTF-8?q?=E6=9E=9C1000=E3=83=87=E3=83=BC=E3=82=BF=E3=81=8C=E4=B8=80?= =?UTF-8?q?=E7=95=AA=E8=89=AF=E3=81=84=E7=B5=90=E6=9E=9C=E3=81=A8=E3=81=AA?= =?UTF-8?q?=E3=81=A3=E3=81=9F=E3=81=AE=E3=81=A7=E3=81=9D=E3=82=8C=E3=82=92?= =?UTF-8?q?=E6=8E=A1=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dataset.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/dataset.cpp b/src/dataset.cpp index 9de10ce4..2c9762f1 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -17,14 +17,11 @@ CalibrationDataset::CalibrationDataset(const std::string& root) { //targetの方は使わないのでダミーの適当な値を入れる targets_.push_back(torch::tensor({ 0 })); - //データ数をどれくらい取れば良いのかはよくわからない - //とりあえず多めに4000データ取ることにする - if (data_.size() >= 4000) { + //先頭1000データのみを用いる + if (data_.size() >= 1000) { break; } } - - std::cout << data_.size() << " " << targets_.size() << std::endl; } torch::data::Example<> CalibrationDataset::get(size_t index) { From 05fd17332ba7d16d62b1c59c3f7e944094eed02a Mon Sep 17 00:00:00 2001 From: sakoda Date: Sun, 14 Feb 2021 19:46:14 +0900 Subject: [PATCH 45/87] =?UTF-8?q?Calibration=E6=99=82=E3=81=AE=E3=83=90?= =?UTF-8?q?=E3=83=83=E3=83=81=E3=82=B5=E3=82=A4=E3=82=BA=E3=82=92128?= =?UTF-8?q?=E3=81=AB=E3=81=99=E3=82=8B=E3=81=93=E3=81=A8=E3=81=A7=E5=AE=89?= =?UTF-8?q?=E5=AE=9A=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E3=81=AA?= =?UTF-8?q?=E3=81=A3=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 785a491e..ddc50cae 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -18,7 +18,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid").map(torch::data::transforms::Stack<>()); auto dataloader = - torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(32).workers(1)); + torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(128).workers(1)); const std::string name = "calibration_cache_file.txt"; auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, true); From 81bc15b7fbbadbc4130d1cbda880b4950982017b Mon Sep 17 00:00:00 2001 From: sakoda Date: Mon, 15 Feb 2021 10:39:17 +0900 Subject: [PATCH 46/87] =?UTF-8?q?generate=5Ftorch=5Fscript=5Fmodel.py?= =?UTF-8?q?=E3=82=92=E6=94=B9=E8=89=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 102 +++++++++++++------------ 1 file changed, 53 insertions(+), 49 deletions(-) diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py index d5d9fca8..0a2e1c18 100755 --- a/scripts/generate_torch_script_model.py +++ b/scripts/generate_torch_script_model.py @@ -5,26 +5,6 @@ import torch.jit import argparse -parser = argparse.ArgumentParser() -parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) -parser.add_argument("-value_type", default="cat", choices=["sca", "cat"]) -parser.add_argument("--block_num", type=int, default=10) -parser.add_argument("--channel_num", type=int, default=128) -args = parser.parse_args() - -REDUCTION = 8 -KERNEL_SIZE = 3 -VALUE_HIDDEN_NUM = 256 - -if args.game == "shogi": - INPUT_CHANNEL_NUM = 42 - BOARD_SIZE = 9 - POLICY_CHANNEL_NUM = 27 -elif args.game == "othello": - INPUT_CHANNEL_NUM = 2 - BOARD_SIZE = 8 - POLICY_CHANNEL_NUM = 2 - class Conv2DwithBatchNorm(nn.Module): def __init__(self, input_ch, output_ch, kernel_size): @@ -66,12 +46,12 @@ def forward(self, x): class Encoder(nn.Module): - def __init__(self, channel_num): + def __init__(self, input_channel_num, block_num, channel_num, kernel_size=3, reduction=8): super(Encoder, self).__init__() - self.first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) + self.first_conv_and_norm_ = Conv2DwithBatchNorm(input_channel_num, channel_num, 3) self.blocks = nn.Sequential() - for i in range(args.block_num): - self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) + for i in range(block_num): + self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, kernel_size, reduction)) def forward(self, x): x = self.first_conv_and_norm_.forward(x) @@ -81,9 +61,9 @@ def forward(self, x): class PolicyHead(nn.Module): - def __init__(self, channel_num): + def __init__(self, channel_num, policy_channel_num): super(PolicyHead, self).__init__() - self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) + self.policy_conv_ = nn.Conv2d(channel_num, policy_channel_num, 1, bias=True, padding=0) def forward(self, x): policy = self.policy_conv_.forward(x) @@ -91,16 +71,17 @@ def forward(self, x): class ValueHead(nn.Module): - def __init__(self, channel_num, unit_num): + def __init__(self, channel_num, board_size, unit_num, hidden_size=256): super(ValueHead, self).__init__() self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) - self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) - self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, unit_num) + self.hidden_size = channel_num * board_size * board_size + self.value_linear0_ = nn.Linear(self.hidden_size, hidden_size) + self.value_linear1_ = nn.Linear(hidden_size, unit_num) def forward(self, x): value = self.value_conv_and_norm_.forward(x) value = F.relu(value) - value = value.view([-1, args.channel_num * BOARD_SIZE * BOARD_SIZE]) + value = value.view([-1, self.hidden_size]) value = self.value_linear0_.forward(value) value = F.relu(value) value = self.value_linear1_.forward(value) @@ -108,11 +89,11 @@ def forward(self, x): class ScalarNetwork(nn.Module): - def __init__(self, channel_num): + def __init__(self, input_channel_num, block_num, channel_num, policy_channel_num, board_size): super(ScalarNetwork, self).__init__() - self.encoder_ = Encoder(channel_num) - self.policy_head_ = PolicyHead(channel_num) - self.value_head_ = ValueHead(channel_num, 1) + self.encoder_ = Encoder(input_channel_num, block_num, channel_num) + self.policy_head_ = PolicyHead(channel_num, policy_channel_num) + self.value_head_ = ValueHead(channel_num, board_size, 1) def forward(self, x): x = self.encoder_.forward(x) @@ -123,11 +104,11 @@ def forward(self, x): class CategoricalNetwork(nn.Module): - def __init__(self, channel_num): + def __init__(self, input_channel_num, block_num, channel_num, policy_channel_num, board_size): super(CategoricalNetwork, self).__init__() - self.encoder_ = Encoder(channel_num) - self.policy_head_ = PolicyHead(channel_num) - self.value_head_ = ValueHead(channel_num, 51) + self.encoder_ = Encoder(input_channel_num, block_num, channel_num) + self.policy_head_ = PolicyHead(channel_num, policy_channel_num) + self.value_head_ = ValueHead(channel_num, board_size, 51) def forward(self, x): x = self.encoder_.forward(x) @@ -136,14 +117,37 @@ def forward(self, x): return policy, value -model = None -if args.value_type == "sca": - model = ScalarNetwork(args.channel_num) -elif args.value_type == "cat": - model = CategoricalNetwork(args.channel_num) -input_data = torch.randn([8, INPUT_CHANNEL_NUM, BOARD_SIZE, BOARD_SIZE]) -script_model = torch.jit.trace(model, input_data) -# script_model = torch.jit.script(model) -model_path = f"./{args.game}_{args.value_type}_bl{args.block_num}_ch{args.channel_num}.model" -script_model.save(model_path) -print(f"{model_path}にパラメータを保存") +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) + parser.add_argument("-value_type", default="cat", choices=["sca", "cat"]) + parser.add_argument("--block_num", type=int, default=10) + parser.add_argument("--channel_num", type=int, default=128) + args = parser.parse_args() + + if args.game == "shogi": + input_channel_num = 42 + board_size = 9 + policy_channel_num = 27 + elif args.game == "othello": + input_channel_num = 2 + board_size = 8 + policy_channel_num = 2 + else: + exit(1) + + model = None + if args.value_type == "sca": + model = ScalarNetwork(input_channel_num, args.block_num, args.channel_num, policy_channel_num, board_size) + elif args.value_type == "cat": + model = CategoricalNetwork(input_channel_num, args.block_num, args.channel_num, policy_channel_num, board_size) + input_data = torch.randn([8, input_channel_num, board_size, board_size]) + # script_model = torch.jit.trace(model, input_data) + script_model = torch.jit.script(model) + model_path = f"./{args.game}_{args.value_type}_bl{args.block_num}_ch{args.channel_num}.model" + script_model.save(model_path) + print(f"{model_path}にパラメータを保存") + + +if __name__ == "__main__": + main() From d72b12a34fbf461590efc3f7dbe92bffb81bbaae Mon Sep 17 00:00:00 2001 From: sakoda Date: Mon, 15 Feb 2021 11:13:29 +0900 Subject: [PATCH 47/87] =?UTF-8?q?convert=5Fold=5Fmodel=5Ffile.py=E3=81=A7?= =?UTF-8?q?=E3=82=82generate=5Ftorch=5Fscript=5Fmodel.py=E3=81=A7=E5=AE=9A?= =?UTF-8?q?=E7=BE=A9=E3=81=97=E3=81=9F=E3=83=8D=E3=83=83=E3=83=88=E3=83=AF?= =?UTF-8?q?=E3=83=BC=E3=82=AF=E3=82=92=E5=88=A9=E7=94=A8=E3=81=99=E3=82=8B?= =?UTF-8?q?=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/convert_old_model_file.py | 138 ++++++------------------------ 1 file changed, 28 insertions(+), 110 deletions(-) diff --git a/scripts/convert_old_model_file.py b/scripts/convert_old_model_file.py index e307cff1..adb02536 100755 --- a/scripts/convert_old_model_file.py +++ b/scripts/convert_old_model_file.py @@ -1,74 +1,12 @@ #!/usr/bin/env python3 - -# !/usr/bin/env python3 -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.jit -import argparse import glob import os import re from natsort import natsorted - -parser = argparse.ArgumentParser() -parser.add_argument("-source_dir", type=str, required=True) -parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) -args = parser.parse_args() - -REDUCTION = 8 -KERNEL_SIZE = 3 -VALUE_HIDDEN_NUM = 256 - -if args.game == "shogi": - INPUT_CHANNEL_NUM = 42 - BOARD_SIZE = 9 - POLICY_CHANNEL_NUM = 27 -elif args.game == "othello": - INPUT_CHANNEL_NUM = 2 - BOARD_SIZE = 8 - POLICY_CHANNEL_NUM = 2 - - -class Conv2DwithBatchNorm(nn.Module): - def __init__(self, input_ch, output_ch, kernel_size): - super(Conv2DwithBatchNorm, self).__init__() - self.conv_ = nn.Conv2d(input_ch, output_ch, kernel_size, bias=False, padding=kernel_size // 2) - self.norm_ = nn.BatchNorm2d(output_ch) - - def forward(self, x): - t = self.conv_.forward(x) - t = self.norm_.forward(t) - return t - - -class ResidualBlock(nn.Module): - def __init__(self, channel_num, kernel_size, reduction): - super(ResidualBlock, self).__init__() - self.conv_and_norm0_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) - self.conv_and_norm1_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) - self.linear0_ = nn.Linear(channel_num, channel_num // reduction, bias=False) - self.linear1_ = nn.Linear(channel_num // reduction, channel_num, bias=False) - - def forward(self, x): - t = x - t = self.conv_and_norm0_.forward(t) - t = F.relu(t) - t = self.conv_and_norm1_.forward(t) - - y = F.avg_pool2d(t, [t.shape[2], t.shape[3]]) - y = y.view([-1, t.shape[1]]) - y = self.linear0_.forward(y) - y = F.relu(y) - y = self.linear1_.forward(y) - y = torch.sigmoid(y) - y = y.view([-1, t.shape[1], 1, 1]) - t = t * y - - t = F.relu(x + t) - return t +from generate_torch_script_model import * +# batch_normがある場合はちょっと特殊なので関数として切り出しておく def load_conv_and_norm(dst, src): dst.conv_.weight.data = src.conv_.weight.data dst.norm_.weight.data = src.norm_.weight.data @@ -77,6 +15,20 @@ def load_conv_and_norm(dst, src): dst.norm_.running_var = src.norm_.running_var +parser = argparse.ArgumentParser() +parser.add_argument("--source_dir", type=str, required=True) +parser.add_argument("--game", default="shogi", choices=["shogi", "othello"]) +args = parser.parse_args() + +if args.game == "shogi": + input_channel_num = 42 + board_size = 9 + policy_channel_num = 27 +elif args.game == "othello": + input_channel_num = 2 + board_size = 8 + policy_channel_num = 2 + # ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う source_model_names = natsorted(glob.glob(f"{args.source_dir}/*.model")) @@ -91,52 +43,18 @@ def load_conv_and_norm(dst, src): elif "ch" in p: channel_num = int(re.sub("\\D", "", p)) - -# 上で取得したブロック数, チャンネル数を前提にクラスを定義 -# forwardでもchannel_numを使うのでコンストラクタの引数として渡すと面倒なので定義を遅らせて外部の変数を利用する形に -# self.channel_num などに保存するのも、余計な変数をクラスに保持させたくない(というかTorchScript化の際にエラーになったような) -class CategoricalNetwork(nn.Module): - def __init__(self): - super(CategoricalNetwork, self).__init__() - self.state_first_conv_and_norm_ = Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, channel_num, 3) - self.blocks = nn.Sequential() - for i in range(block_num): - self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, KERNEL_SIZE, REDUCTION)) - self.policy_conv_ = nn.Conv2d(channel_num, POLICY_CHANNEL_NUM, 1, bias=True, padding=0) - - self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) - self.value_linear0_ = nn.Linear(BOARD_SIZE * BOARD_SIZE * channel_num, VALUE_HIDDEN_NUM) - self.value_linear1_ = nn.Linear(VALUE_HIDDEN_NUM, 51) - - def forward(self, x): - x = self.state_first_conv_and_norm_.forward(x) - x = F.relu(x) - x = self.blocks.forward(x) - - policy = self.policy_conv_.forward(x) - - value = self.value_conv_and_norm_.forward(x) - value = F.relu(value) - value = value.view([-1, channel_num * BOARD_SIZE * BOARD_SIZE]) - value = self.value_linear0_.forward(value) - value = F.relu(value) - value = self.value_linear1_.forward(value) - - return policy, value - - # インスタンス生成 -model = CategoricalNetwork() +model = CategoricalNetwork(input_channel_num, block_num, channel_num, policy_channel_num, board_size) # 各モデルファイルのパラメータをコピーしてTorchScriptとして保存 for source_model_name in source_model_names: source = torch.jit.load(source_model_name).cpu() - # state_first - load_conv_and_norm(model.state_first_conv_and_norm_, source.state_first_conv_and_norm_) + # first_conv + load_conv_and_norm(model.encoder_.first_conv_and_norm_, source.state_first_conv_and_norm_) # block - for i, v in enumerate(model.__dict__["_modules"]["blocks"]): + for i, v in enumerate(model.encoder_.__dict__["_modules"]["blocks"]): source_m = source.__dict__["_modules"][f"state_blocks_{i}"] load_conv_and_norm(v.conv_and_norm0_, source_m.conv_and_norm0_) load_conv_and_norm(v.conv_and_norm1_, source_m.conv_and_norm1_) @@ -144,19 +62,19 @@ def forward(self, x): v.linear1_.weight.data = source_m.linear1_.weight.data # policy_conv - model.policy_conv_.weight.data = source.policy_conv_.weight.data - model.policy_conv_.bias.data = source.policy_conv_.bias.data + model.policy_head_.policy_conv_.weight.data = source.policy_conv_.weight.data + model.policy_head_.policy_conv_.bias.data = source.policy_conv_.bias.data # value_conv_norm_ - load_conv_and_norm(model.value_conv_and_norm_, source.value_conv_and_norm_) + load_conv_and_norm(model.value_head_.value_conv_and_norm_, source.value_conv_and_norm_) # value_linear - model.value_linear0_.weight.data = source.value_linear0_.weight.data - model.value_linear0_.bias.data = source.value_linear0_.bias.data - model.value_linear1_.weight.data = source.value_linear1_.weight.data - model.value_linear1_.bias.data = source.value_linear1_.bias.data + model.value_head_.value_linear0_.weight.data = source.value_linear0_.weight.data + model.value_head_.value_linear0_.bias.data = source.value_linear0_.bias.data + model.value_head_.value_linear1_.weight.data = source.value_linear1_.weight.data + model.value_head_.value_linear1_.bias.data = source.value_linear1_.bias.data - input_data = torch.ones([1, INPUT_CHANNEL_NUM, BOARD_SIZE, BOARD_SIZE]) + input_data = torch.ones([1, input_channel_num, board_size, board_size]) model.eval() script_model = torch.jit.trace(model, input_data) # script_model = torch.jit.script(model) From 856a1b42435e3ccda1077373becb11b7a612f79b Mon Sep 17 00:00:00 2001 From: sakoda Date: Mon, 15 Feb 2021 11:14:09 +0900 Subject: [PATCH 48/87] =?UTF-8?q?=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB?= =?UTF-8?q?=E3=83=88=E3=81=A7=E3=81=AFSAM=E3=82=AA=E3=83=97=E3=83=86?= =?UTF-8?q?=E3=82=A3=E3=83=9E=E3=82=A4=E3=82=B6=E3=82=92=E4=B8=8D=E4=BD=BF?= =?UTF-8?q?=E7=94=A8=E3=81=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 2 +- setting/supervised_learn_settings.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index 74fee238..5fd6a266 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -37,7 +37,7 @@ save_interval 50000 validation_interval 50000 sleep_msec -1 noise_mode 0 -use_sam_optim 1 +use_sam_optim 0 # Shogi init_buffer_by_kifu 0 diff --git a/setting/supervised_learn_settings.txt b/setting/supervised_learn_settings.txt index 95187ac0..f496a703 100644 --- a/setting/supervised_learn_settings.txt +++ b/setting/supervised_learn_settings.txt @@ -15,7 +15,7 @@ learn_rate_decay_step2 1200000 learn_rate_decay_step3 1800000 learn_rate_decay_step4 2400000 learn_rate_decay_period 100000 -use_sam_optim 1 +use_sam_optim 0 # Shogi(AobaZero) load_multi_dir 1 From 7500301e99f89dd951fa00894f926547899c3cbe Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 16 Feb 2021 17:41:01 +0900 Subject: [PATCH 49/87] =?UTF-8?q?testLoad=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 2 +- src/shogi/test.cpp | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index ddc50cae..2cfc8686 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -21,7 +21,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(128).workers(1)); const std::string name = "calibration_cache_file.txt"; - auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, true); + auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, false); //trtorch trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 9d561e56..4c070830 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -453,7 +453,7 @@ void searchWithLog() { void testLoad() { constexpr int64_t LOOP_NUM = 20; - constexpr int64_t BATCH_SIZE = 128; + constexpr int64_t BATCH_SIZE = 256; //時間計測開始 Timer timer; @@ -474,12 +474,18 @@ void testLoad() { timer.start(); pre = 0; std::cout << "スレッドを作成しての試行" << std::endl; + const int64_t gpu_num = torch::getNumGPUs(); for (int64_t num = 0; num < LOOP_NUM; num++) { - std::thread thread([]() { - InferModel model; - model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); - }); - thread.join(); + std::vector threads; + for (int64_t i = 0; i < gpu_num; i++) { + threads.emplace_back([&]() { + InferModel model; + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); + }); + } + for (int64_t i = 0; i < gpu_num; i++) { + threads[i].join(); + } int64_t ela = timer.elapsedSeconds(); int64_t curr = ela - pre; pre = ela; From a7108a33268974b34ab91a785f4e5d083c2b2893 Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 16 Feb 2021 17:43:22 +0900 Subject: [PATCH 50/87] =?UTF-8?q?testLoad=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/shogi/test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 4c070830..b99a402e 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -461,7 +461,7 @@ void testLoad() { int64_t pre = 0; //通常試行 std::cout << "通常の試行" << std::endl; - for (int64_t num = 0; num < LOOP_NUM; num++) { + for (int64_t num = 0; num < 0; num++) { InferModel model; model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); int64_t ela = timer.elapsedSeconds(); @@ -480,7 +480,7 @@ void testLoad() { for (int64_t i = 0; i < gpu_num; i++) { threads.emplace_back([&]() { InferModel model; - model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); + model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE); }); } for (int64_t i = 0; i < gpu_num; i++) { From fb3a918f0df79d50724537cf8861cc456f07ff8e Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 16 Feb 2021 20:33:01 +0900 Subject: [PATCH 51/87] =?UTF-8?q?dataloader=E3=81=AEbatch=5Fsize=E3=82=92o?= =?UTF-8?q?pt=5Fbatch=5Fsize=E3=81=AB=E5=90=88=E3=82=8F=E3=81=9B=E3=82=8B?= =?UTF-8?q?=E3=81=A8=E4=B8=8A=E6=89=8B=E3=81=8F=E3=81=84=E3=81=8F=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 2cfc8686..e3fc4a31 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -18,7 +18,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid").map(torch::data::transforms::Stack<>()); auto dataloader = - torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(128).workers(1)); + torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(opt_batch_size).workers(1)); const std::string name = "calibration_cache_file.txt"; auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, false); From cd4fcd0ff87d6bb24b2f79c73b4d3267de61862b Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 16 Feb 2021 20:33:38 +0900 Subject: [PATCH 52/87] =?UTF-8?q?testLoad=E3=81=AB=E3=81=8A=E3=81=84?= =?UTF-8?q?=E3=81=A6=E5=8F=82=E7=85=A7=E3=81=A7=E3=81=AF=E3=81=AA=E3=81=8F?= =?UTF-8?q?=E3=82=B3=E3=83=94=E3=83=BC=E3=81=A7=E3=82=AD=E3=83=A3=E3=83=97?= =?UTF-8?q?=E3=83=81=E3=83=A3=E3=81=99=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/shogi/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index b99a402e..e734466d 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -478,7 +478,7 @@ void testLoad() { for (int64_t num = 0; num < LOOP_NUM; num++) { std::vector threads; for (int64_t i = 0; i < gpu_num; i++) { - threads.emplace_back([&]() { + threads.emplace_back([i]() { InferModel model; model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE); }); From e7eac4d973942d62556186f408dbfed54dbdc246 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 17 Feb 2021 11:06:38 +0900 Subject: [PATCH 53/87] =?UTF-8?q?Dataset=E3=82=AF=E3=83=A9=E3=82=B9?= =?UTF-8?q?=E3=81=AE=E3=82=B3=E3=83=B3=E3=82=B9=E3=83=88=E3=83=A9=E3=82=AF?= =?UTF-8?q?=E3=82=BF=E3=81=ABdata=5Fnum=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/dataset.cpp | 6 +++--- src/dataset.hpp | 2 +- src/infer_model.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/dataset.cpp b/src/dataset.cpp index 2c9762f1..fa519c9e 100644 --- a/src/dataset.cpp +++ b/src/dataset.cpp @@ -3,7 +3,7 @@ #include "learn.hpp" #include "neural_network.hpp" -CalibrationDataset::CalibrationDataset(const std::string& root) { +CalibrationDataset::CalibrationDataset(const std::string& root, int64_t data_num) { std::vector data = loadData(root, false, 3200); Position pos; @@ -17,8 +17,8 @@ CalibrationDataset::CalibrationDataset(const std::string& root) { //targetの方は使わないのでダミーの適当な値を入れる targets_.push_back(torch::tensor({ 0 })); - //先頭1000データのみを用いる - if (data_.size() >= 1000) { + //全データだと多いので、先頭からいくつかのみを用いる + if (data_.size() >= data_num) { break; } } diff --git a/src/dataset.hpp b/src/dataset.hpp index ab2c14b5..8e74a481 100644 --- a/src/dataset.hpp +++ b/src/dataset.hpp @@ -5,7 +5,7 @@ class CalibrationDataset : public torch::data::datasets::Dataset { public: - explicit CalibrationDataset(const std::string& root); + explicit CalibrationDataset(const std::string& root, int64_t data_num); torch::data::Example<> get(size_t index) override; diff --git a/src/infer_model.cpp b/src/infer_model.cpp index e3fc4a31..94210479 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -16,7 +16,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; - auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid").map(torch::data::transforms::Stack<>()); + auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid", 2000).map(torch::data::transforms::Stack<>()); auto dataloader = torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(opt_batch_size).workers(1)); From 46de982d5fab0524f1283abf0f0671c6785c5d33 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 17 Feb 2021 11:10:59 +0900 Subject: [PATCH 54/87] =?UTF-8?q?using=20namespace=20torch::data=E3=82=92?= =?UTF-8?q?=E7=94=A8=E3=81=84=E3=82=8B=E3=81=93=E3=81=A8=E3=81=A7=E8=A8=98?= =?UTF-8?q?=E8=BF=B0=E3=82=92=E7=B0=A1=E7=95=A5=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 94210479..26e55975 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -16,9 +16,9 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; - auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid", 2000).map(torch::data::transforms::Stack<>()); - auto dataloader = - torch::data::make_data_loader(std::move(dataset), torch::data::DataLoaderOptions().batch_size(opt_batch_size).workers(1)); + using namespace torch::data; + auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid", 2000).map(transforms::Stack<>()); + auto dataloader = make_data_loader(std::move(dataset), DataLoaderOptions().batch_size(opt_batch_size).workers(1)); const std::string name = "calibration_cache_file.txt"; auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, false); From e8e679d37295d94e26daf7e4fac507fd6a8a55cb Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 17 Feb 2021 11:25:18 +0900 Subject: [PATCH 55/87] =?UTF-8?q?SearchOption=E3=81=ABcalibration=5Fkifu?= =?UTF-8?q?=5Fpath=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 13 +++++++------ src/infer_model.cpp | 5 +++-- src/infer_model.hpp | 2 +- src/othello/interface.cpp | 5 +++-- src/search_options.hpp | 2 ++ src/searcher_for_play.cpp | 3 ++- src/shogi/test.cpp | 14 +++++++++----- 7 files changed, 27 insertions(+), 17 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index d4364024..0554f25c 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -4,7 +4,8 @@ void GameGenerator::genGames() { //まず最初のロード - neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size); + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, + search_options_.calibration_kifu_path); need_load = false; //生成スレッドを生成 @@ -61,7 +62,8 @@ void GameGenerator::genSlave(int64_t thread_id) { //全スレッドが読み込もうとする必要はないので代表してid=0のスレッドに任せる if (need_load && thread_id == 0) { gpu_mutex.lock(); - neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size); + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, + search_options_.calibration_kifu_path); need_load = false; gpu_mutex.unlock(); } @@ -208,10 +210,9 @@ OneTurnElement GenerateWorker::resultForCurrPos() { //選択回数が0ならMIN_SCORE //選択回数が0ではないのに未展開なら詰み探索が詰みを発見したということなのでMAX_SCORE //その他は普通に計算 - Q_dist[i] = - (N[i] == 0 ? MIN_SCORE - : root_node.child_indices[i] == HashTable::NOT_EXPANDED ? MAX_SCORE - : hash_table_.expQfromNext(root_node, i)); + Q_dist[i] = (N[i] == 0 ? MIN_SCORE + : root_node.child_indices[i] == HashTable::NOT_EXPANDED ? MAX_SCORE + : hash_table_.expQfromNext(root_node, i)); } Q_dist = softmax(Q_dist, std::max(search_options_.temperature_x1000 / 1000.0f, 1e-4f)); diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 26e55975..243275a3 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -6,7 +6,8 @@ #include #include -void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size) { +void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, + const std::string& calibration_kifu_path) { torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); module.to(device_); @@ -17,7 +18,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; using namespace torch::data; - auto dataset = CalibrationDataset("/home/sakoda/data/floodgate_kifu/valid", 2000).map(transforms::Stack<>()); + auto dataset = CalibrationDataset(calibration_kifu_path, 2000).map(transforms::Stack<>()); auto dataloader = make_data_loader(std::move(dataset), DataLoaderOptions().batch_size(opt_batch_size).workers(1)); const std::string name = "calibration_cache_file.txt"; diff --git a/src/infer_model.hpp b/src/infer_model.hpp index a00beb86..d266ad3c 100644 --- a/src/infer_model.hpp +++ b/src/infer_model.hpp @@ -7,7 +7,7 @@ class InferModel { public: InferModel() : device_(torch::kCPU) {} - void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size); + void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path); std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); private: diff --git a/src/othello/interface.cpp b/src/othello/interface.cpp index b50708ae..ff8e5732 100644 --- a/src/othello/interface.cpp +++ b/src/othello/interface.cpp @@ -107,7 +107,7 @@ void Interface::test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path); SearcherForPlay searcher(search_options); Position pos; @@ -286,8 +286,9 @@ void Interface::quit() { void Interface::outputValue() { root_.init(); std::ofstream ofs("value_output.txt"); + SearchOptions search_option; InferModel nn; - nn.load(options_.model_name, 0, 1); + nn.load(options_.model_name, 0, 1, search_option.calibration_kifu_path); std::uniform_real_distribution dist(0.0, 1.0); diff --git a/src/search_options.hpp b/src/search_options.hpp index a1c7f78f..cc44699f 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -63,6 +63,7 @@ struct SearchOptions { spin_options.emplace("hold_moves_num", SpinOption(hold_moves_num = 32, 1, 593)); filename_options.emplace("model_name", FilenameOption(model_name = DEFAULT_MODEL_NAME)); filename_options.emplace("book_file_name", FilenameOption(book_file_name = "book.txt")); + filename_options.emplace("calibration_kifu_path", FilenameOption(calibration_kifu_path = "../../../data/floodgate_kifu/valid")); // clang-format on } bool USI_Ponder; @@ -93,6 +94,7 @@ struct SearchOptions { int64_t hold_moves_num; std::string model_name; std::string book_file_name; + std::string calibration_kifu_path; std::map check_options; std::map spin_options; diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index f929db61..9da1ae4c 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -16,7 +16,8 @@ SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) //GPUを準備 for (int64_t i = 0; i < search_options.gpu_num; i++) { neural_networks_.emplace_back(); - neural_networks_[i].load(search_options_.model_name, i, search_options.search_batch_size); + neural_networks_[i].load(search_options_.model_name, i, search_options.search_batch_size, + search_options.calibration_kifu_path); } //GPUに対するmutexを準備 diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index e734466d..4c5b04e8 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -14,7 +14,7 @@ void test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path); SearcherForPlay searcher(search_options); Position pos; @@ -272,8 +272,10 @@ void checkPredictSpeed() { constexpr int64_t BATCH_SIZE = 512; std::cout << std::fixed; + SearchOptions search_options; + InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); + nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); for (int64_t batch_size = 1; batch_size <= BATCH_SIZE; batch_size *= 2) { //バッチサイズ分入力を取得 @@ -455,6 +457,8 @@ void testLoad() { constexpr int64_t LOOP_NUM = 20; constexpr int64_t BATCH_SIZE = 256; + SearchOptions search_options; + //時間計測開始 Timer timer; timer.start(); @@ -463,7 +467,7 @@ void testLoad() { std::cout << "通常の試行" << std::endl; for (int64_t num = 0; num < 0; num++) { InferModel model; - model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE); + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); int64_t ela = timer.elapsedSeconds(); int64_t curr = ela - pre; pre = ela; @@ -478,9 +482,9 @@ void testLoad() { for (int64_t num = 0; num < LOOP_NUM; num++) { std::vector threads; for (int64_t i = 0; i < gpu_num; i++) { - threads.emplace_back([i]() { + threads.emplace_back([i, search_options]() { InferModel model; - model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE); + model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path); }); } for (int64_t i = 0; i < gpu_num; i++) { From d539b36e7fd8055a874b981b42590ab45b3443cf Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 17 Feb 2021 16:15:03 +0900 Subject: [PATCH 56/87] =?UTF-8?q?checkValInfer=E3=82=92=E5=AE=9F=E8=A3=85?= =?UTF-8?q?=E3=80=82=E4=B8=8D=E8=A6=81=E3=81=AB=E3=81=AA=E3=81=A3=E3=81=9F?= =?UTF-8?q?=E3=82=89=E5=89=8A=E9=99=A4=E3=81=97=E3=82=88=E3=81=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 106 ++++++++++++++++++++++++++++++++++++++++ src/infer_model.hpp | 1 + src/learn.cpp | 7 ++- src/learn.hpp | 6 ++- src/shogi/interface.cpp | 1 + src/shogi/test.cpp | 29 +++++++++++ src/shogi/test.hpp | 1 + 7 files changed, 147 insertions(+), 4 deletions(-) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 243275a3..8986dbbe 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -68,4 +68,110 @@ std::pair, std::vector> InferModel::policyAnd std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); #endif return std::make_pair(policies, values); +} + +std::array InferModel::validLoss(const std::vector& data) { +#ifdef USE_CATEGORICAL + Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + if (data[i].value != 0 && data[i].value != BIN_SIZE - 1) { + std::cerr << "Categoricalの検証データは現状のところValueが-1 or 1でないといけない" << std::endl; + std::exit(1); + } + value_teachers.push_back(data[i].value == 0 ? MIN_SCORE : MAX_SCORE); + } + + torch::Tensor x = torch::tensor(inputs).to(device_); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + auto out = module_.forward({ x }); + auto tuple = out.toTuple(); + torch::Tensor policy_logit = tuple->elements()[0].toTensor(); + torch::Tensor value_logit = tuple->elements()[1].toTensor(); + + torch::Tensor logits = policy_logit.view({ -1, POLICY_DIM }); + + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); + + //Valueの分布を取得 + torch::Tensor value_cat = torch::softmax(value_logit, 1); + + //i番目の要素が示す値はMIN_SCORE + (i + 0.5) * VALUE_WIDTH + std::vector each_value; + for (int64_t i = 0; i < BIN_SIZE; i++) { + each_value.emplace_back(MIN_SCORE + (i + 0.5) * VALUE_WIDTH); + } + torch::Tensor each_value_tensor = torch::tensor(each_value).to(device_); + + //Categorical分布と内積を取ることで期待値を求める + torch::Tensor value = (each_value_tensor * value_cat).sum(1); + + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif + return { policy_loss, value_loss }; + +#else + static Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + value_teachers.push_back(data[i].value); + } + + torch::Tensor input_tensor = encode(inputs); + auto out = module_.forward({ input_tensor }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + torch::Tensor policy_logits = policy.view({ -1, POLICY_DIM }); + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(policy_logits, 1), 1, false); + + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + value = value.view(-1); +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif + + return { policy_loss, value_loss }; +#endif } \ No newline at end of file diff --git a/src/infer_model.hpp b/src/infer_model.hpp index d266ad3c..3726d4f7 100644 --- a/src/infer_model.hpp +++ b/src/infer_model.hpp @@ -9,6 +9,7 @@ class InferModel { InferModel() : device_(torch::kCPU) {} void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path); std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); + std::array validLoss(const std::vector& data); private: torch::jit::Module module_; diff --git a/src/learn.cpp b/src/learn.cpp index ef25dc5d..a64c92ef 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -6,8 +6,8 @@ #include #include -std::array validation(LearningModel& model, const std::vector& valid_data, - uint64_t batch_size) { +template +std::array validation(ModelType& model, const std::vector& valid_data, uint64_t batch_size) { torch::NoGradGuard no_grad_guard; std::array losses{}; for (uint64_t index = 0; index < valid_data.size();) { @@ -30,6 +30,9 @@ std::array validation(LearningModel& model, const std::vec return losses; } +template std::array validation(InferModel& model, const std::vector& valid_data, + uint64_t batch_size); + std::vector loadData(const std::string& file_path, bool data_augmentation, float rate_threshold) { //棋譜を読み込めるだけ読み込む std::vector games = loadGames(file_path, rate_threshold); diff --git a/src/learn.hpp b/src/learn.hpp index 6ac9f788..b891e8ee 100644 --- a/src/learn.hpp +++ b/src/learn.hpp @@ -1,8 +1,9 @@ #ifndef MIACIS_LEARN_HPP #define MIACIS_LEARN_HPP -#include "neural_network.hpp" +#include "infer_model.hpp" #include "learning_model.hpp" +#include "neural_network.hpp" #include "timer.hpp" //標準出力とファイルストリームに同時に出力するためのクラス @@ -105,7 +106,8 @@ class LearnManager { std::vector loadData(const std::string& file_path, bool data_augmentation, float rate_threshold); //validationを行う関数 -std::array validation(LearningModel& model, const std::vector& valid_data, uint64_t batch_size); +template +std::array validation(ModelType& model, const std::vector& valid_data, uint64_t batch_size); //棋譜からの教師あり学習 void supervisedLearn(); diff --git a/src/shogi/interface.cpp b/src/shogi/interface.cpp index 66acaa7d..4b14d236 100644 --- a/src/shogi/interface.cpp +++ b/src/shogi/interface.cpp @@ -29,6 +29,7 @@ Interface::Interface() : searcher_(nullptr) { command_["checkGenSpeed"] = checkGenSpeed; command_["checkPredictSpeed"] = checkPredictSpeed; command_["checkVal"] = checkVal; + command_["checkValInfer"] = checkValInfer; command_["checkDoAndUndo"] = checkDoAndUndo; command_["checkMirror"] = checkMirror; command_["checkBook"] = checkBook; diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 4c5b04e8..e563a6b4 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -266,6 +266,35 @@ void checkVal() { } } +void checkValInfer() { + //データを取得 + std::string path; + std::cout << "validation kifu path : "; + std::cin >> path; + int64_t batch_size; + std::cout << "batch_size : "; + std::cin >> batch_size; + std::string model_file; + std::cout << "model_file : "; + std::cin >> model_file; + std::string calibration_kifu_path; + std::cout << "calibration_kifu_path : "; + std::cin >> calibration_kifu_path; + + std::vector data = loadData(path, false, 3000); + std::cout << "data.size() = " << data.size() << std::endl; + + //ネットワークの準備 + InferModel nn; + nn.load(model_file, 0, batch_size, calibration_kifu_path); + + std::array v = validation(nn, data, batch_size); + std::cout << std::fixed << std::setprecision(4); + for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { + std::cout << v[i] << " \n"[i == LOSS_TYPE_NUM - 1]; + } +} + void checkPredictSpeed() { Position pos; constexpr int64_t REPEAT_NUM = 1000; diff --git a/src/shogi/test.hpp b/src/shogi/test.hpp index f424092f..fff57d69 100644 --- a/src/shogi/test.hpp +++ b/src/shogi/test.hpp @@ -10,6 +10,7 @@ void checkSearchSpeed2(); void checkGenSpeed(); void checkPredictSpeed(); void checkVal(); +void checkValInfer(); void checkSegmentTree(); void checkDoAndUndo(); void checkMirror(); From ce0629da6d2f855fec5b2d6c4d20a01dc0f9c29e Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 17 Feb 2021 21:45:54 +0900 Subject: [PATCH 57/87] =?UTF-8?q?calibration=5Fdata=5Fnum=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 4 ++-- src/infer_model.cpp | 9 +++++---- src/infer_model.hpp | 3 ++- src/search_options.hpp | 2 ++ src/searcher_for_play.cpp | 2 +- src/shogi/test.cpp | 25 ++++++++++++++++--------- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index 0554f25c..1d82a549 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -5,7 +5,7 @@ void GameGenerator::genGames() { //まず最初のロード neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, - search_options_.calibration_kifu_path); + search_options_.calibration_kifu_path, search_options_.calibration_data_num); need_load = false; //生成スレッドを生成 @@ -63,7 +63,7 @@ void GameGenerator::genSlave(int64_t thread_id) { if (need_load && thread_id == 0) { gpu_mutex.lock(); neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, - search_options_.calibration_kifu_path); + search_options_.calibration_kifu_path, search_options_.calibration_data_num); need_load = false; gpu_mutex.unlock(); } diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 8986dbbe..71dcbf4b 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -7,7 +7,7 @@ #include void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, - const std::string& calibration_kifu_path) { + const std::string& calibration_kifu_path, int64_t calibration_data_num) { torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); module.to(device_); @@ -18,7 +18,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; using namespace torch::data; - auto dataset = CalibrationDataset(calibration_kifu_path, 2000).map(transforms::Stack<>()); + auto dataset = CalibrationDataset(calibration_kifu_path, calibration_data_num).map(transforms::Stack<>()); auto dataloader = make_data_loader(std::move(dataset), DataLoaderOptions().batch_size(opt_batch_size).workers(1)); const std::string name = "calibration_cache_file.txt"; @@ -154,8 +154,9 @@ std::array InferModel::validLoss(const std::vector value_teachers.push_back(data[i].value); } - torch::Tensor input_tensor = encode(inputs); - auto out = module_.forward({ input_tensor }); + torch::Tensor x = torch::tensor(inputs).to(device_); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + auto out = module_.forward({ x }); auto tuple = out.toTuple(); torch::Tensor policy = tuple->elements()[0].toTensor(); torch::Tensor value = tuple->elements()[1].toTensor(); diff --git a/src/infer_model.hpp b/src/infer_model.hpp index 3726d4f7..bbf3918c 100644 --- a/src/infer_model.hpp +++ b/src/infer_model.hpp @@ -7,7 +7,8 @@ class InferModel { public: InferModel() : device_(torch::kCPU) {} - void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path); + void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path, + int64_t calibration_data_num); std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); std::array validLoss(const std::vector& data); diff --git a/src/search_options.hpp b/src/search_options.hpp index cc44699f..1468e629 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -61,6 +61,7 @@ struct SearchOptions { spin_options.emplace("print_policy_num", SpinOption(print_policy_num = 0, 0, 593)); spin_options.emplace("remained_turn_divisor", SpinOption(remained_turn_divisor = 1, 1, MAX)); spin_options.emplace("hold_moves_num", SpinOption(hold_moves_num = 32, 1, 593)); + spin_options.emplace("calibration_data_num", SpinOption(calibration_data_num = 1, 128, MAX)); filename_options.emplace("model_name", FilenameOption(model_name = DEFAULT_MODEL_NAME)); filename_options.emplace("book_file_name", FilenameOption(book_file_name = "book.txt")); filename_options.emplace("calibration_kifu_path", FilenameOption(calibration_kifu_path = "../../../data/floodgate_kifu/valid")); @@ -92,6 +93,7 @@ struct SearchOptions { int64_t print_policy_num; int64_t remained_turn_divisor; int64_t hold_moves_num; + int64_t calibration_data_num; std::string model_name; std::string book_file_name; std::string calibration_kifu_path; diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index 9da1ae4c..b497dab7 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -17,7 +17,7 @@ SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) for (int64_t i = 0; i < search_options.gpu_num; i++) { neural_networks_.emplace_back(); neural_networks_[i].load(search_options_.model_name, i, search_options.search_batch_size, - search_options.calibration_kifu_path); + search_options.calibration_kifu_path, search_options.calibration_data_num); } //GPUに対するmutexを準備 diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index e563a6b4..b0c8743c 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -14,7 +14,8 @@ void test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path, + search_options.calibration_data_num); SearcherForPlay searcher(search_options); Position pos; @@ -286,13 +287,18 @@ void checkValInfer() { //ネットワークの準備 InferModel nn; - nn.load(model_file, 0, batch_size, calibration_kifu_path); - std::array v = validation(nn, data, batch_size); - std::cout << std::fixed << std::setprecision(4); - for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { - std::cout << v[i] << " \n"[i == LOSS_TYPE_NUM - 1]; + for (int64_t calibration_data_num = batch_size; calibration_data_num <= (batch_size << 5); calibration_data_num *= 2) { + nn.load(model_file, 0, batch_size, calibration_kifu_path, calibration_data_num); + + std::array v = validation(nn, data, batch_size); + std::cout << std::fixed << std::setprecision(4); + std::cout << std::setw(10) << calibration_data_num << " "; + for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { + std::cout << v[i] << " \n"[i == LOSS_TYPE_NUM - 1]; + } } + std::cout << "finish checkValInfer" << std::endl; } void checkPredictSpeed() { @@ -304,7 +310,7 @@ void checkPredictSpeed() { SearchOptions search_options; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); + nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.calibration_data_num); for (int64_t batch_size = 1; batch_size <= BATCH_SIZE; batch_size *= 2) { //バッチサイズ分入力を取得 @@ -496,7 +502,7 @@ void testLoad() { std::cout << "通常の試行" << std::endl; for (int64_t num = 0; num < 0; num++) { InferModel model; - model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.calibration_data_num); int64_t ela = timer.elapsedSeconds(); int64_t curr = ela - pre; pre = ela; @@ -513,7 +519,8 @@ void testLoad() { for (int64_t i = 0; i < gpu_num; i++) { threads.emplace_back([i, search_options]() { InferModel model; - model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path); + model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path, + search_options.calibration_data_num); }); } for (int64_t i = 0; i < gpu_num; i++) { From 1476cb9cd8e07e47bab827a724fa1e56c6c564dd Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 17 Feb 2021 22:39:59 +0900 Subject: [PATCH 58/87] =?UTF-8?q?calibration=5Fdata=5Fnum=E3=81=AE?= =?UTF-8?q?=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88=E3=81=A8min?= =?UTF-8?q?=E3=81=8C=E9=80=86=E3=81=A0=E3=81=A3=E3=81=9F=E3=81=AE=E3=82=92?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/search_options.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/search_options.hpp b/src/search_options.hpp index 1468e629..39550299 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -61,7 +61,7 @@ struct SearchOptions { spin_options.emplace("print_policy_num", SpinOption(print_policy_num = 0, 0, 593)); spin_options.emplace("remained_turn_divisor", SpinOption(remained_turn_divisor = 1, 1, MAX)); spin_options.emplace("hold_moves_num", SpinOption(hold_moves_num = 32, 1, 593)); - spin_options.emplace("calibration_data_num", SpinOption(calibration_data_num = 1, 128, MAX)); + spin_options.emplace("calibration_data_num", SpinOption(calibration_data_num = 128, 1, MAX)); filename_options.emplace("model_name", FilenameOption(model_name = DEFAULT_MODEL_NAME)); filename_options.emplace("book_file_name", FilenameOption(book_file_name = "book.txt")); filename_options.emplace("calibration_kifu_path", FilenameOption(calibration_kifu_path = "../../../data/floodgate_kifu/valid")); From af864ff3edc23c445ca639edabd9c9baf1ae98c4 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 18 Feb 2021 21:41:19 +0900 Subject: [PATCH 59/87] =?UTF-8?q?calibration=5Fdata=5Fnum=E3=82=92?= =?UTF-8?q?=E5=89=8A=E9=99=A4=E3=80=82opt=5Fbatch=5Fsize=E3=81=AE2?= =?UTF-8?q?=E5=80=8D=E3=81=A7=E8=89=AF=E3=81=95=E3=81=9D=E3=81=86=E3=81=AA?= =?UTF-8?q?=E3=81=AE=E3=81=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 4 ++-- src/infer_model.cpp | 4 ++-- src/infer_model.hpp | 3 +-- src/search_options.hpp | 2 -- src/searcher_for_play.cpp | 2 +- src/shogi/test.cpp | 12 +++++------- 6 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index 1d82a549..0554f25c 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -5,7 +5,7 @@ void GameGenerator::genGames() { //まず最初のロード neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, - search_options_.calibration_kifu_path, search_options_.calibration_data_num); + search_options_.calibration_kifu_path); need_load = false; //生成スレッドを生成 @@ -63,7 +63,7 @@ void GameGenerator::genSlave(int64_t thread_id) { if (need_load && thread_id == 0) { gpu_mutex.lock(); neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, - search_options_.calibration_kifu_path, search_options_.calibration_data_num); + search_options_.calibration_kifu_path); need_load = false; gpu_mutex.unlock(); } diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 71dcbf4b..bb0a3959 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -7,7 +7,7 @@ #include void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, - const std::string& calibration_kifu_path, int64_t calibration_data_num) { + const std::string& calibration_kifu_path) { torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); module.to(device_); @@ -18,7 +18,7 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; using namespace torch::data; - auto dataset = CalibrationDataset(calibration_kifu_path, calibration_data_num).map(transforms::Stack<>()); + auto dataset = CalibrationDataset(calibration_kifu_path, opt_batch_size * 2).map(transforms::Stack<>()); auto dataloader = make_data_loader(std::move(dataset), DataLoaderOptions().batch_size(opt_batch_size).workers(1)); const std::string name = "calibration_cache_file.txt"; diff --git a/src/infer_model.hpp b/src/infer_model.hpp index bbf3918c..3726d4f7 100644 --- a/src/infer_model.hpp +++ b/src/infer_model.hpp @@ -7,8 +7,7 @@ class InferModel { public: InferModel() : device_(torch::kCPU) {} - void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path, - int64_t calibration_data_num); + void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path); std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); std::array validLoss(const std::vector& data); diff --git a/src/search_options.hpp b/src/search_options.hpp index 39550299..cc44699f 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -61,7 +61,6 @@ struct SearchOptions { spin_options.emplace("print_policy_num", SpinOption(print_policy_num = 0, 0, 593)); spin_options.emplace("remained_turn_divisor", SpinOption(remained_turn_divisor = 1, 1, MAX)); spin_options.emplace("hold_moves_num", SpinOption(hold_moves_num = 32, 1, 593)); - spin_options.emplace("calibration_data_num", SpinOption(calibration_data_num = 128, 1, MAX)); filename_options.emplace("model_name", FilenameOption(model_name = DEFAULT_MODEL_NAME)); filename_options.emplace("book_file_name", FilenameOption(book_file_name = "book.txt")); filename_options.emplace("calibration_kifu_path", FilenameOption(calibration_kifu_path = "../../../data/floodgate_kifu/valid")); @@ -93,7 +92,6 @@ struct SearchOptions { int64_t print_policy_num; int64_t remained_turn_divisor; int64_t hold_moves_num; - int64_t calibration_data_num; std::string model_name; std::string book_file_name; std::string calibration_kifu_path; diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index b497dab7..9da1ae4c 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -17,7 +17,7 @@ SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) for (int64_t i = 0; i < search_options.gpu_num; i++) { neural_networks_.emplace_back(); neural_networks_[i].load(search_options_.model_name, i, search_options.search_batch_size, - search_options.calibration_kifu_path, search_options.calibration_data_num); + search_options.calibration_kifu_path); } //GPUに対するmutexを準備 diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index b0c8743c..20fc0605 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -14,8 +14,7 @@ void test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path, - search_options.calibration_data_num); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path); SearcherForPlay searcher(search_options); Position pos; @@ -289,7 +288,7 @@ void checkValInfer() { InferModel nn; for (int64_t calibration_data_num = batch_size; calibration_data_num <= (batch_size << 5); calibration_data_num *= 2) { - nn.load(model_file, 0, batch_size, calibration_kifu_path, calibration_data_num); + nn.load(model_file, 0, batch_size, calibration_kifu_path); std::array v = validation(nn, data, batch_size); std::cout << std::fixed << std::setprecision(4); @@ -310,7 +309,7 @@ void checkPredictSpeed() { SearchOptions search_options; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.calibration_data_num); + nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); for (int64_t batch_size = 1; batch_size <= BATCH_SIZE; batch_size *= 2) { //バッチサイズ分入力を取得 @@ -502,7 +501,7 @@ void testLoad() { std::cout << "通常の試行" << std::endl; for (int64_t num = 0; num < 0; num++) { InferModel model; - model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.calibration_data_num); + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); int64_t ela = timer.elapsedSeconds(); int64_t curr = ela - pre; pre = ela; @@ -519,8 +518,7 @@ void testLoad() { for (int64_t i = 0; i < gpu_num; i++) { threads.emplace_back([i, search_options]() { InferModel model; - model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path, - search_options.calibration_data_num); + model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path); }); } for (int64_t i = 0; i < gpu_num; i++) { From cb3a7b9db5f48b5eb5ae0b1e6f5aad519f378ca5 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 18 Feb 2021 21:42:43 +0900 Subject: [PATCH 60/87] =?UTF-8?q?load=E3=81=AE=E3=81=9F=E3=81=B3=E3=81=AB3?= =?UTF-8?q?0=E7=A7=92=E3=82=B9=E3=83=AA=E3=83=BC=E3=83=97=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 2 +- src/reinforcement_learn.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index 74fee238..73aa269d 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -23,7 +23,7 @@ learn_rate_decay_step2 2000000 learn_rate_decay_step3 2000000 learn_rate_decay_step4 2000000 learn_rate_decay_period 100000 -update_interval 500 +update_interval 1000 batch_size_per_gen 2 worker_num_per_thread 64 max_stack_size 1048576 diff --git a/src/reinforcement_learn.cpp b/src/reinforcement_learn.cpp index 87a514bc..27dddf95 100644 --- a/src/reinforcement_learn.cpp +++ b/src/reinforcement_learn.cpp @@ -110,6 +110,9 @@ void reinforcementLearn() { generators[i]->gpu_mutex.unlock(); } + + //loadに30秒ほどかかるのでその期間スリープ + std::this_thread::sleep_for(std::chrono::seconds(30)); } //学習スレッドを眠らせることで擬似的にActorの数を増やす From eca2e50c992e35d058e8856e409bf620ef009022 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 18 Feb 2021 21:43:04 +0900 Subject: [PATCH 61/87] =?UTF-8?q?SAM=E3=82=92=E3=83=87=E3=83=95=E3=82=A9?= =?UTF-8?q?=E3=83=AB=E3=83=88=E3=81=A7=E3=82=AA=E3=83=95=E3=81=AB=E5=A4=89?= =?UTF-8?q?=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 2 +- setting/supervised_learn_settings.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index 73aa269d..d5356a8d 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -37,7 +37,7 @@ save_interval 50000 validation_interval 50000 sleep_msec -1 noise_mode 0 -use_sam_optim 1 +use_sam_optim 0 # Shogi init_buffer_by_kifu 0 diff --git a/setting/supervised_learn_settings.txt b/setting/supervised_learn_settings.txt index 95187ac0..f496a703 100644 --- a/setting/supervised_learn_settings.txt +++ b/setting/supervised_learn_settings.txt @@ -15,7 +15,7 @@ learn_rate_decay_step2 1200000 learn_rate_decay_step3 1800000 learn_rate_decay_step4 2400000 learn_rate_decay_period 100000 -use_sam_optim 1 +use_sam_optim 0 # Shogi(AobaZero) load_multi_dir 1 From da2f1372595b1910321e8d97d03bd423f15ff225 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 18 Feb 2021 21:46:12 +0900 Subject: [PATCH 62/87] =?UTF-8?q?calibration=5Fkifu=5Fpath=E3=82=92?= =?UTF-8?q?=E8=A8=AD=E5=AE=9A=E3=81=AB=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 1 + src/reinforcement_learn.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index d5356a8d..adc779b5 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -38,6 +38,7 @@ validation_interval 50000 sleep_msec -1 noise_mode 0 use_sam_optim 0 +calibration_kifu_path /root/data/floodgate_kifu/valid # Shogi init_buffer_by_kifu 0 diff --git a/src/reinforcement_learn.cpp b/src/reinforcement_learn.cpp index 27dddf95..e9de5379 100644 --- a/src/reinforcement_learn.cpp +++ b/src/reinforcement_learn.cpp @@ -36,6 +36,7 @@ void reinforcementLearn() { bool data_augmentation = settings.get("data_augmentation"); bool Q_search = settings.get("Q_search"); std::string train_kifu_path = settings.get("train_kifu_path"); + search_options.calibration_kifu_path = settings.get("calibration_kifu_path"); // clang-format on //学習クラスを生成 From 00696218a24b4d60a3306b33e4a32eef5152d317 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 19 Feb 2021 00:24:59 +0900 Subject: [PATCH 63/87] =?UTF-8?q?=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB?= =?UTF-8?q?=E3=83=88=E3=83=81=E3=83=A3=E3=83=B3=E3=83=8D=E3=83=AB=E6=95=B0?= =?UTF-8?q?=E3=82=92256=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/generate_torch_script_model.py | 2 +- src/neural_network.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py index 0a2e1c18..efe70e8e 100755 --- a/scripts/generate_torch_script_model.py +++ b/scripts/generate_torch_script_model.py @@ -122,7 +122,7 @@ def main(): parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) parser.add_argument("-value_type", default="cat", choices=["sca", "cat"]) parser.add_argument("--block_num", type=int, default=10) - parser.add_argument("--channel_num", type=int, default=128) + parser.add_argument("--channel_num", type=int, default=256) args = parser.parse_args() if args.game == "shogi": diff --git a/src/neural_network.cpp b/src/neural_network.cpp index a28a5c98..53362640 100644 --- a/src/neural_network.cpp +++ b/src/neural_network.cpp @@ -5,7 +5,7 @@ //ネットワークの設定 #ifdef SHOGI static constexpr int32_t BLOCK_NUM = 10; -static constexpr int32_t CHANNEL_NUM = 128; +static constexpr int32_t CHANNEL_NUM = 256; #elif defined(OTHELLO) static constexpr int32_t BLOCK_NUM = 5; static constexpr int32_t CHANNEL_NUM = 64; From 2776b17f5a7ae6d7255091199ffe0a487920dda9 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 19 Feb 2021 14:47:12 +0900 Subject: [PATCH 64/87] =?UTF-8?q?supervised=5Flearn.cpp=E3=81=AB=E3=81=8A?= =?UTF-8?q?=E3=81=84=E3=81=A6=E4=BD=BF=E7=94=A8=E3=83=87=E3=83=BC=E3=82=BF?= =?UTF-8?q?=E3=81=AE=E8=A8=98=E9=8C=B2=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/supervised_learn.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/supervised_learn.cpp b/src/supervised_learn.cpp index b47b14d1..9a54bb52 100644 --- a/src/supervised_learn.cpp +++ b/src/supervised_learn.cpp @@ -32,6 +32,11 @@ void supervisedLearn() { //データを取得 std::vector train_data = loadData(train_kifu_path, data_augmentation, train_rate_threshold); + //どのEpochでどのデータを使っているかを記録する + std::ofstream epoch_log("epoch_log.txt"); + epoch_log << "dir_path.size() = " << dir_paths.size() << std::endl; + epoch_log << "0 " << train_data.size() << std::endl; + //学習クラスを生成 LearnManager learn_manager("supervised"); @@ -53,6 +58,7 @@ void supervisedLearn() { if (load_multi_dir) { train_data = loadData(dir_paths[epoch % dir_paths.size()], data_augmentation, train_rate_threshold); + epoch_log << epoch << " " << train_data.size() << std::endl; } } From 107e7502916b8a8d89f193e5cc980b0531829742 Mon Sep 17 00:00:00 2001 From: sakoda Date: Sun, 21 Feb 2021 09:40:08 +0900 Subject: [PATCH 65/87] =?UTF-8?q?Dockerfile=E3=82=92=E5=B0=91=E3=81=97?= =?UTF-8?q?=E3=81=A0=E3=81=91=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/Dockerfile b/scripts/Dockerfile index 0f9c6b89..a2e511d8 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -37,7 +37,7 @@ RUN python3 setup.py install --use-cxx11-abi RUN conda init bash -ENV LD_LIBRARY_PATH /opt/conda/lib/python3.6/site-packages/torch/lib:$LD_LIBRARY_PATh +ENV LD_LIBRARY_PATH /opt/conda/lib/python3.6/site-packages/torch/lib:$LD_LIBRARY_PATH # ここから自分の設定 @@ -46,7 +46,7 @@ RUN apt-get update && apt-get install -y language-pack-ja-base language-pack-ja ENV LANG='ja_JP.UTF-8' # 必要なもののインストール -RUN apt-get update && apt-get install -y p7zip-full && rm -rf /var/lib/apt/lists/* +RUN apt-get update && apt-get install -y p7zip-full zip && rm -rf /var/lib/apt/lists/* RUN pip install natsort # trtorchを適切な場所へ展開 From f0eda013f68f08185059e5b582823f9d4e0b622a Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 23 Feb 2021 11:49:09 +0900 Subject: [PATCH 66/87] =?UTF-8?q?=E3=83=A1=E3=83=A2=E3=83=AA=E5=91=A8?= =?UTF-8?q?=E3=82=8A=E3=81=AE=E5=95=8F=E9=A1=8C=E3=81=B8=E9=83=A8=E5=88=86?= =?UTF-8?q?=E7=9A=84=E3=81=AB=E5=AF=BE=E5=87=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/othello/position.hpp | 3 +++ src/searcher.cpp | 13 +++++++++++++ src/searcher_for_play.cpp | 2 +- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/othello/position.hpp b/src/othello/position.hpp index 6d8b0e5c..db16df9e 100644 --- a/src/othello/position.hpp +++ b/src/othello/position.hpp @@ -43,6 +43,9 @@ class Position { //終了判定 bool isFinish(float& score, bool check_repeat = true) const; + //ループ判定:将棋の方で使うのでオセロでも持っておく + static bool isRepeating(float& score) { return false; } + //ハッシュ static void initHashSeed(); diff --git a/src/searcher.cpp b/src/searcher.cpp index c5ad2614..1264591f 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -182,6 +182,19 @@ Index Searcher::expand(Position& pos, std::stack& indices, std::stack search_options_.draw_turn) { + float dummy_score; + if (!pos.isRepeating(dummy_score) || pos.turnNumber() > search_options_.draw_turn) { + //この局面にはどう到達しても絶対に終わりなので指し手情報などを消して良い + curr_node.moves.clear(); + curr_node.moves.shrink_to_fit(); + curr_node.child_indices.clear(); + curr_node.child_indices.shrink_to_fit(); + curr_node.N.clear(); + curr_node.N.shrink_to_fit(); + curr_node.virtual_N.clear(); + curr_node.virtual_N.shrink_to_fit(); + } + #ifdef USE_CATEGORICAL curr_node.value = onehotDist(finish_score); #else diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index 9da1ae4c..41dfe9ad 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -11,7 +11,7 @@ struct MoveWithScore { SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) : stop_signal(false), search_options_(search_options), - hash_table_(search_options.USI_Hash * 1024 * 1024 / (60 * search_options.hold_moves_num)), + hash_table_(search_options.USI_Hash * 1024 * 1024 / (120 * search_options.hold_moves_num)), mate_searcher_(hash_table_, search_options) { //GPUを準備 for (int64_t i = 0; i < search_options.gpu_num; i++) { From e457329d7338cd73ebdb3eda994af274ed5add47 Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 23 Feb 2021 11:54:22 +0900 Subject: [PATCH 67/87] =?UTF-8?q?=E5=AE=A3=E8=A8=80=E5=8B=9D=E3=81=A1?= =?UTF-8?q?=E3=81=AE=E3=81=A8=E3=81=8D=E3=81=ABPonder=E3=81=B8=E7=AA=81?= =?UTF-8?q?=E5=85=A5=E3=81=97=E3=81=A6=E8=90=BD=E3=81=A1=E3=82=8B=E3=83=90?= =?UTF-8?q?=E3=82=B0=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/shogi/interface.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shogi/interface.cpp b/src/shogi/interface.cpp index 4b14d236..f277376d 100644 --- a/src/shogi/interface.cpp +++ b/src/shogi/interface.cpp @@ -202,7 +202,7 @@ void Interface::go() { Move best_move = (root_.canWinDeclare() ? DECLARE_MOVE : searcher_->think(root_, time_limit - search_options_.byoyomi_margin)); std::cout << "bestmove " << best_move << std::endl; - if (search_options_.USI_Ponder && best_move != NULL_MOVE) { + if (search_options_.USI_Ponder && best_move != NULL_MOVE && best_move != DECLARE_MOVE) { root_.doMove(best_move); float score{}; if (!root_.isFinish(score) && root_.turnNumber() <= search_options_.draw_turn) { From 1e47692feb4de8ad62b8e1906b622c995f4acd87 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 26 Feb 2021 11:20:32 +0900 Subject: [PATCH 68/87] =?UTF-8?q?Dockerfile=E3=81=AB=E3=81=8A=E3=81=91?= =?UTF-8?q?=E3=82=8BTRTorch=E3=81=AE=E3=82=B3=E3=83=9F=E3=83=83=E3=83=88?= =?UTF-8?q?=E3=82=92=E8=AB=B8=E3=80=85=E3=81=AE=E3=83=90=E3=82=B0=E3=81=8C?= =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E3=81=95=E3=82=8C=E3=81=9F=E6=99=82=E7=82=B9?= =?UTF-8?q?=E3=81=AE=E3=82=82=E3=81=AE=E3=81=AB=E3=82=A2=E3=83=83=E3=83=97?= =?UTF-8?q?=E3=83=87=E3=83=BC=E3=83=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Dockerfile b/scripts/Dockerfile index a2e511d8..e0f82d90 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -14,7 +14,7 @@ RUN pip install notebook WORKDIR /opt RUN git clone https://github.com/NVIDIA/TRTorch trtorch WORKDIR /opt/trtorch -RUN git checkout b228bf239aadd5f104af38ea64416bdda5f0aa57 +RUN git checkout 721b071f7166e1826183f28305823f406eac4807 RUN cp /opt/trtorch/docker/WORKSPACE.cu.docker /opt/trtorch/WORKSPACE # Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container From ff38b0ba4fed26115625f98b213f06f82987e3e7 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 26 Feb 2021 20:55:56 +0900 Subject: [PATCH 69/87] =?UTF-8?q?use=5Ffp16=E3=81=AE=E3=82=AA=E3=83=97?= =?UTF-8?q?=E3=82=B7=E3=83=A7=E3=83=B3=E3=82=92=E3=81=A1=E3=82=83=E3=82=93?= =?UTF-8?q?=E3=81=A8=E4=BD=BF=E3=81=86=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89?= =?UTF-8?q?=E6=9B=B4(=E3=83=87=E3=83=95=E3=82=A9=E3=83=AB=E3=83=88?= =?UTF-8?q?=E3=81=AFfalse=E3=81=AB=E8=A8=AD=E5=AE=9A)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 4 +-- src/infer_model.cpp | 65 ++++++++++++++++++++++++++------------- src/infer_model.hpp | 4 ++- src/othello/interface.cpp | 5 +-- src/search_options.hpp | 2 +- src/searcher_for_play.cpp | 2 +- src/shogi/test.cpp | 14 ++++++--- 7 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index 0554f25c..0af12386 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -5,7 +5,7 @@ void GameGenerator::genGames() { //まず最初のロード neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, - search_options_.calibration_kifu_path); + search_options_.calibration_kifu_path, search_options_.use_fp16); need_load = false; //生成スレッドを生成 @@ -63,7 +63,7 @@ void GameGenerator::genSlave(int64_t thread_id) { if (need_load && thread_id == 0) { gpu_mutex.lock(); neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, - search_options_.calibration_kifu_path); + search_options_.calibration_kifu_path, search_options_.use_fp16); need_load = false; gpu_mutex.unlock(); } diff --git a/src/infer_model.cpp b/src/infer_model.cpp index bb0a3959..fb15ad4a 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -7,7 +7,7 @@ #include void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, - const std::string& calibration_kifu_path) { + const std::string& calibration_kifu_path, bool use_fp16) { torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); module.to(device_); @@ -17,28 +17,39 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; - using namespace torch::data; - auto dataset = CalibrationDataset(calibration_kifu_path, opt_batch_size * 2).map(transforms::Stack<>()); - auto dataloader = make_data_loader(std::move(dataset), DataLoaderOptions().batch_size(opt_batch_size).workers(1)); - - const std::string name = "calibration_cache_file.txt"; - auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, false); - - //trtorch - trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); - trtorch::CompileSpec info({ range }); - info.op_precision = torch::kI8; - info.device.gpu_id = gpu_id; - info.ptq_calibrator = calibrator; - info.workspace_size = (1ull << 29); - info.max_batch_size = opt_batch_size * 2; - - module_ = trtorch::CompileGraph(module, info); + use_fp16_ = use_fp16; + if (use_fp16_) { + trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); + trtorch::CompileSpec info({ range }); + info.op_precision = torch::kHalf; + info.device.gpu_id = gpu_id; + module_ = trtorch::CompileGraph(module, info); + } else { + using namespace torch::data; + auto dataset = CalibrationDataset(calibration_kifu_path, opt_batch_size * 2).map(transforms::Stack<>()); + auto dataloader = make_data_loader(std::move(dataset), DataLoaderOptions().batch_size(opt_batch_size).workers(1)); + + const std::string name = "calibration_cache_file.txt"; + auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, false); + + trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); + trtorch::CompileSpec info({ range }); + info.op_precision = torch::kI8; + info.device.gpu_id = gpu_id; + info.ptq_calibrator = calibrator; + info.workspace_size = (1ull << 29); + info.max_batch_size = opt_batch_size * 2; + + module_ = trtorch::CompileGraph(module, info); + } } std::pair, std::vector> InferModel::policyAndValueBatch(const std::vector& inputs) { torch::Tensor x = torch::tensor(inputs).to(device_); x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + if (use_fp16_) { + x = x.to(torch::kFloat16); + } auto out = module_.forward({ x }); auto tuple = out.toTuple(); torch::Tensor policy = tuple->elements()[0].toTensor(); @@ -51,13 +62,23 @@ std::pair, std::vector> InferModel::policyAnd //CPUに持ってくる policy = policy.cpu(); - float* p = policy.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); + if (use_fp16_) { + torch::Half* p = policy.data_ptr(); + for (uint64_t i = 0; i < batch_size; i++) { + policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); + } + } else { + float* p = policy.data_ptr(); + for (uint64_t i = 0; i < batch_size; i++) { + policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); + } } #ifdef USE_CATEGORICAL value = torch::softmax(value, 1).cpu(); + + //valueの方はfp16化してもなぜかHalfではなくFloatとして返ってくる + //ひょっとしたらTRTorchのバグかも float* value_p = value.data_ptr(); for (uint64_t i = 0; i < batch_size; i++) { std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); @@ -65,7 +86,7 @@ std::pair, std::vector> InferModel::policyAnd #else //CPUに持ってくる value = value.cpu(); - std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); + std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); #endif return std::make_pair(policies, values); } diff --git a/src/infer_model.hpp b/src/infer_model.hpp index 3726d4f7..413776be 100644 --- a/src/infer_model.hpp +++ b/src/infer_model.hpp @@ -7,13 +7,15 @@ class InferModel { public: InferModel() : device_(torch::kCPU) {} - void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path); + void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path, + bool use_fp16); std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); std::array validLoss(const std::vector& data); private: torch::jit::Module module_; torch::Device device_; + bool use_fp16_; }; #endif \ No newline at end of file diff --git a/src/othello/interface.cpp b/src/othello/interface.cpp index ff8e5732..a7d59a1d 100644 --- a/src/othello/interface.cpp +++ b/src/othello/interface.cpp @@ -107,7 +107,8 @@ void Interface::test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path, + search_options.use_fp16); SearcherForPlay searcher(search_options); Position pos; @@ -288,7 +289,7 @@ void Interface::outputValue() { std::ofstream ofs("value_output.txt"); SearchOptions search_option; InferModel nn; - nn.load(options_.model_name, 0, 1, search_option.calibration_kifu_path); + nn.load(options_.model_name, 0, 1, search_option.calibration_kifu_path, search_option.use_fp16); std::uniform_real_distribution dist(0.0, 1.0); diff --git a/src/search_options.hpp b/src/search_options.hpp index cc44699f..c1497128 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -32,7 +32,7 @@ struct SearchOptions { // clang-format off check_options.emplace("USI_Ponder", CheckOption(USI_Ponder = false)); check_options.emplace("leave_root", CheckOption(leave_root = true)); - check_options.emplace("use_fp16", CheckOption(use_fp16 = true)); + check_options.emplace("use_fp16", CheckOption(use_fp16 = false)); check_options.emplace("use_book", CheckOption(use_book = false)); check_options.emplace("print_info", CheckOption(print_info = true)); check_options.emplace("output_log_file", CheckOption(output_log_file = false)); diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index 41dfe9ad..2c8cbdcd 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -17,7 +17,7 @@ SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) for (int64_t i = 0; i < search_options.gpu_num; i++) { neural_networks_.emplace_back(); neural_networks_[i].load(search_options_.model_name, i, search_options.search_batch_size, - search_options.calibration_kifu_path); + search_options.calibration_kifu_path, search_options.use_fp16); } //GPUに対するmutexを準備 diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index 20fc0605..ba324b32 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -14,7 +14,8 @@ void test() { search_options.search_batch_size = 1; search_options.output_log_file = true; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path); + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path, + search_options.use_fp16); SearcherForPlay searcher(search_options); Position pos; @@ -280,6 +281,9 @@ void checkValInfer() { std::string calibration_kifu_path; std::cout << "calibration_kifu_path : "; std::cin >> calibration_kifu_path; + bool use_fp16; + std::cout << "fp16 : "; + std::cin >> use_fp16; std::vector data = loadData(path, false, 3000); std::cout << "data.size() = " << data.size() << std::endl; @@ -288,7 +292,7 @@ void checkValInfer() { InferModel nn; for (int64_t calibration_data_num = batch_size; calibration_data_num <= (batch_size << 5); calibration_data_num *= 2) { - nn.load(model_file, 0, batch_size, calibration_kifu_path); + nn.load(model_file, 0, batch_size, calibration_kifu_path, use_fp16); std::array v = validation(nn, data, batch_size); std::cout << std::fixed << std::setprecision(4); @@ -309,7 +313,7 @@ void checkPredictSpeed() { SearchOptions search_options; InferModel nn; - nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); + nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.use_fp16); for (int64_t batch_size = 1; batch_size <= BATCH_SIZE; batch_size *= 2) { //バッチサイズ分入力を取得 @@ -501,7 +505,7 @@ void testLoad() { std::cout << "通常の試行" << std::endl; for (int64_t num = 0; num < 0; num++) { InferModel model; - model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path); + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.use_fp16); int64_t ela = timer.elapsedSeconds(); int64_t curr = ela - pre; pre = ela; @@ -518,7 +522,7 @@ void testLoad() { for (int64_t i = 0; i < gpu_num; i++) { threads.emplace_back([i, search_options]() { InferModel model; - model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path); + model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path, search_options.use_fp16); }); } for (int64_t i = 0; i < gpu_num; i++) { From 974c39a106267ef7f1602fc3004232e41fbc0d85 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 26 Feb 2021 20:57:29 +0900 Subject: [PATCH 70/87] =?UTF-8?q?Dockerfile=E3=81=A7Miacis=E3=82=92make?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88?= =?UTF-8?q?=E3=81=AEtarget=E3=81=AB=E3=81=AFshogi=5Fcategorical=E3=81=AE?= =?UTF-8?q?=E3=81=BF=E3=82=92=E6=8C=87=E5=AE=9A=E3=81=99=E3=82=8B=E3=82=88?= =?UTF-8?q?=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/Dockerfile b/scripts/Dockerfile index e0f82d90..b84d7a79 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -60,7 +60,7 @@ WORKDIR /root/Miacis/src/cmake-build-release RUN echo "git fetch" > update.sh && \ echo "git reset --hard origin/master" >> update.sh && \ echo "cmake -DCMAKE_BUILD_TYPE=Release .." >> update.sh && \ - echo "make -j$(nproc)" >> update.sh && \ + echo "make -j$(nproc) Miacis_shogi_categorical" >> update.sh && \ chmod +x update.sh && \ ./update.sh From 1ad6ccd736a44e362a71fc356a232a7ab0dffa34 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 26 Feb 2021 23:10:39 +0900 Subject: [PATCH 71/87] =?UTF-8?q?GPU=E3=81=AE=E3=82=AF=E3=83=AA=E3=83=86?= =?UTF-8?q?=E3=82=A3=E3=82=AB=E3=83=AB=E3=82=BB=E3=82=AF=E3=82=B7=E3=83=A7?= =?UTF-8?q?=E3=83=B3=E3=82=92=E7=B4=B0=E3=81=8B=E3=81=8F=E5=88=86=E9=9B=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/game_generator.cpp | 5 +++-- src/infer_model.cpp | 28 +++++++++++++++++++++------- src/infer_model.hpp | 3 +++ src/searcher_for_play.cpp | 4 ++-- 4 files changed, 29 insertions(+), 11 deletions(-) diff --git a/src/game_generator.cpp b/src/game_generator.cpp index 0af12386..dd427406 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -96,9 +96,10 @@ void GameGenerator::evalWithGPU(int64_t thread_id) { //順伝播計算 gpu_mutex.lock(); torch::NoGradGuard no_grad_guard; - std::pair, std::vector> result = - neural_network_.policyAndValueBatch(gpu_queues_[thread_id].inputs); + std::tuple output = neural_network_.infer(gpu_queues_[thread_id].inputs); gpu_mutex.unlock(); + std::pair, std::vector> result = neural_network_.decode(output); + const std::vector& policies = result.first; const std::vector& values = result.second; diff --git a/src/infer_model.cpp b/src/infer_model.cpp index fb15ad4a..296bb266 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -45,6 +45,10 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt } std::pair, std::vector> InferModel::policyAndValueBatch(const std::vector& inputs) { + return decode(infer(inputs)); +} + +std::tuple InferModel::infer(const std::vector& inputs) { torch::Tensor x = torch::tensor(inputs).to(device_); x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); if (use_fp16_) { @@ -55,13 +59,27 @@ std::pair, std::vector> InferModel::policyAnd torch::Tensor policy = tuple->elements()[0].toTensor(); torch::Tensor value = tuple->elements()[1].toTensor(); - uint64_t batch_size = inputs.size() / (SQUARE_NUM * INPUT_CHANNEL_NUM); + //CPUに持ってくる + policy = policy.cpu(); + + //valueはcategoricalのときだけはsoftmaxをかけてからcpuへ +#ifdef USE_CATEGORICAL + value = torch::softmax(value, 1).cpu(); +#else + value = value.cpu(); +#endif + + return std::make_tuple(policy, value); +} + +std::pair, std::vector> +InferModel::decode(const std::tuple& output) const { + const auto& [policy, value] = output; + uint64_t batch_size = policy.size(0); std::vector policies(batch_size); std::vector values(batch_size); - //CPUに持ってくる - policy = policy.cpu(); if (use_fp16_) { torch::Half* p = policy.data_ptr(); for (uint64_t i = 0; i < batch_size; i++) { @@ -75,8 +93,6 @@ std::pair, std::vector> InferModel::policyAnd } #ifdef USE_CATEGORICAL - value = torch::softmax(value, 1).cpu(); - //valueの方はfp16化してもなぜかHalfではなくFloatとして返ってくる //ひょっとしたらTRTorchのバグかも float* value_p = value.data_ptr(); @@ -84,8 +100,6 @@ std::pair, std::vector> InferModel::policyAnd std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); } #else - //CPUに持ってくる - value = value.cpu(); std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); #endif return std::make_pair(policies, values); diff --git a/src/infer_model.hpp b/src/infer_model.hpp index 413776be..e488125d 100644 --- a/src/infer_model.hpp +++ b/src/infer_model.hpp @@ -10,6 +10,9 @@ class InferModel { void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path, bool use_fp16); std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); + std::tuple infer(const std::vector& inputs); + std::pair, std::vector> + decode(const std::tuple& output) const; std::array validLoss(const std::vector& data); private: diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index 2c8cbdcd..48b765e8 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -254,9 +254,9 @@ void SearcherForPlay::workerThreadFunc(Position root, int64_t gpu_id, int64_t th if (!gpu_queue.inputs.empty()) { torch::NoGradGuard no_grad_guard; gpu_mutexes_[gpu_id].lock(); - std::pair, std::vector> y = - neural_networks_[gpu_id].policyAndValueBatch(gpu_queue.inputs); + std::tuple output = neural_networks_[gpu_id].infer(gpu_queue.inputs); gpu_mutexes_[gpu_id].unlock(); + std::pair, std::vector> y = neural_networks_[gpu_id].decode(output); //書き込み for (uint64_t i = 0; i < gpu_queue.indices.size(); i++) { From c57812a68c8b00e2e11234438e5e8ef9de49bd15 Mon Sep 17 00:00:00 2001 From: sakoda Date: Sat, 27 Feb 2021 18:32:00 +0900 Subject: [PATCH 72/87] =?UTF-8?q?=E7=B5=82=E4=BA=86=E5=88=A4=E5=AE=9A?= =?UTF-8?q?=E3=81=8C=E9=96=93=E9=81=95=E3=81=A3=E3=81=A6=E3=81=84=E3=81=9F?= =?UTF-8?q?=E3=81=AE=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/download_AobaZero_kifu.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/download_AobaZero_kifu.sh b/scripts/download_AobaZero_kifu.sh index e49c9668..80fe285f 100755 --- a/scripts/download_AobaZero_kifu.sh +++ b/scripts/download_AobaZero_kifu.sh @@ -27,7 +27,7 @@ while read row; do # THRESHOLDより大きいものだけをダウンロード # 2重にダウンロードしないように存在判定を入れる - if [ "${file_number}" -ge ${START_FILE_ID} ] && [ "${file_number}" -lt ${START_FILE_ID} ] && [ ! -f "${root_dir}/${file_name}" ]; then + if [ "${file_number}" -ge ${START_FILE_ID} ] && [ "${file_number}" -lt ${END_FILE_ID} ] && [ ! -f "${root_dir}/${file_name}" ]; then # ダウンロード curl -sc /tmp/cookie "https://drive.google.com/uc?export=download&id=${file_id}" >/dev/null CODE="$(awk '/_warning_/ {print $NF}' /tmp/cookie)" From f12465b9ea775186914a128d1ae7214718fa0975 Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Mar 2021 10:44:50 +0900 Subject: [PATCH 73/87] =?UTF-8?q?=E4=BD=99=E8=A8=88=E3=81=AAvsYaneuraOu?= =?UTF-8?q?=E7=B3=BB=E3=81=AE=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88?= =?UTF-8?q?=E3=82=92=E5=89=8A=E9=99=A4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/vsYaneuraOu_PO800.py | 129 ---------------------------- scripts/vsYaneuraOu_with_Optuna.py | 130 ----------------------------- 2 files changed, 259 deletions(-) delete mode 100755 scripts/vsYaneuraOu_PO800.py delete mode 100755 scripts/vsYaneuraOu_with_Optuna.py diff --git a/scripts/vsYaneuraOu_PO800.py b/scripts/vsYaneuraOu_PO800.py deleted file mode 100755 index 0371f071..00000000 --- a/scripts/vsYaneuraOu_PO800.py +++ /dev/null @@ -1,129 +0,0 @@ -#! /usr/bin/env python3 -import os -import sys - -# Ayaneをインポート -script_dir = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(script_dir + "/../../Ayane/source") -import shogi.Ayane as ayane - -# その他必要なものをインポート -import time -import glob -from natsort import natsorted -from collections import defaultdict -import argparse -from calc_elo_rate import calc_elo_rate - -parser = argparse.ArgumentParser() -parser.add_argument("--Threads", type=int, default=1) -parser.add_argument("--NodesLimit", type=int, default=100000) -parser.add_argument("--game_num", type=int, default=500) -args = parser.parse_args() - -# 対局数(先後行うので偶数でなければならない) -assert args.game_num % 2 == 0 - -# 勝ち,負け,引き分けの結果を示す定数 -WIN = 0 -DRAW = 1 -LOSE = 2 - -# Ayaneにおける結果をここでの結果に変換する辞書 -result_converter = { ayane.GameResult.BLACK_WIN: WIN, - ayane.GameResult.WHITE_WIN: LOSE, - ayane.GameResult.DRAW : DRAW, - ayane.GameResult.MAX_MOVES: DRAW } - - -# インスタンス生成 -server = ayane.AyaneruServer() - -# サーバの設定 -server.error_print = True -server.set_time_setting(f"byoyomi {10000000}") -server.moves_to_draw = 320 - -# YaneuraOuの設定 -server.engines[1].set_engine_options({"USI_Ponder": "false", - "Threads": args.Threads, - "NodesLimit": args.NodesLimit, - "USI_Hash": 1024, - "BookMoves": 0, - "NetworkDelay": 0, - "NetworkDelay2": 0 - }) -server.engines[1].connect(script_dir + "/../../YaneuraOu/bin/YaneuraOu-by-gcc") - -# カレントディレクトリ内にある{prefix}_{step}.modelを評価する -curr_path = os.getcwd() -# ディレクトリ名が"/"で終わっていることの確認 -if curr_path[-1] != "/": - curr_path += "/" - -# 結果を書き込むファイルを取得 -f = open(curr_path + "result_PO800.txt", mode="a") -f.write(f"YaneuraOu Threads = {args.Threads} NodesLimit = {args.NodesLimit}\n") - -# ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う -model_names = natsorted(glob.glob(curr_path + "*0.model")) -assert len(model_names) > 0 - -# パラメータを探索 -for temperature_x1000 in range(0, 10, 1): - # Miacisを準備 - server.engines[0].set_engine_options({"random_turn": 320, - "temperature_x1000": temperature_x1000, - "print_interval": 10000000, - "USI_Hash": 4096, - "search_limit": 800, - "gpu_num": 1, - "thread_num_per_gpu": 1, - "search_batch_size": 4, - "C_PUCT_x1000": 1500, - "model_name": model_names[-1]}) - scalar_or_categorical = "scalar" if "sca" in model_names[-1] else "categorical" - server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") - - # 戦績を初期化 - total_num = [0, 0, 0] - - # 棋譜の集合を初期化 - sfens = defaultdict(int) - - # iが偶数のときMiacis先手 - for i in range(args.game_num): - # 対局を実行 - server.game_start() - while not server.game_result.is_gameover(): - time.sleep(1) - - # 重複を確認 - if sfens[server.sfen] > 0: - # 同じ棋譜が2回生成された場合は記録しない - print(f"\n重複:", server.sfen) - else: - # 結果を記録 - result = result_converter[server.game_result] - total_num[result if not server.flip_turn else LOSE - result] += 1 - - sfens[server.sfen] += 1 - - # ここまでの結果を文字列化 - winning_rate = (total_num[WIN] + 0.5 * total_num[DRAW]) / sum(total_num) - elo_rate = calc_elo_rate(winning_rate) - result_str = f"temperature_x1000={temperature_x1000:2d} {total_num[WIN]:3d}勝 {total_num[DRAW]:3d}引き分け {total_num[LOSE]:3d}敗 勝率 {100 * winning_rate:4.1f}% 相対レート {elo_rate:6.1f}" - - sys.stdout.write("\033[2K\033[G") - print(result_str, end="\n" if i == args.game_num - 1 else "") - sys.stdout.flush() - - # 手番反転 - server.flip_turn = not server.flip_turn - - # ファイルに書き込み - f.write(result_str + "\n") - f.flush() - - -server.terminate() diff --git a/scripts/vsYaneuraOu_with_Optuna.py b/scripts/vsYaneuraOu_with_Optuna.py deleted file mode 100755 index bf28e840..00000000 --- a/scripts/vsYaneuraOu_with_Optuna.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys - -# Ayaneをインポート -script_dir = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(script_dir + "/../../Ayane/source") -import shogi.Ayane as ayane - -# その他必要なものをインポート -import time -import glob -import optuna -from natsort import natsorted -from collections import defaultdict -import argparse -from calc_elo_rate import calc_elo_rate - -parser = argparse.ArgumentParser() -parser.add_argument("--time1", type=int, default=1000) -parser.add_argument("--time2", type=int, default=200) -parser.add_argument("--Threads", type=int, default=4) -parser.add_argument("--game_num", type=int, default=100) -args = parser.parse_args() - -# 対局数(先後行うので偶数でなければならない) -assert args.game_num % 2 == 0 - -# 勝ち,負け,引き分けの結果を示す定数 -WIN = 0 -DRAW = 1 -LOSE = 2 - -# Ayaneにおける結果をここでの結果に変換する辞書 -result_converter = { ayane.GameResult.BLACK_WIN: WIN, - ayane.GameResult.WHITE_WIN: LOSE, - ayane.GameResult.DRAW : DRAW, - ayane.GameResult.MAX_MOVES: DRAW } - -# インスタンス生成 -server = ayane.AyaneruServer() -server.error_print = True -server.set_time_setting(f"byoyomi1p {args.time1} byoyomi2p {args.time2}") -server.moves_to_draw = 320 - -# YaneuraOuの設定 -server.engines[1].set_engine_options({"USI_Ponder": "false", - "Threads": args.Threads, - "USI_Hash": 1024, - "BookMoves": 0, - "NetworkDelay": 0, - "NetworkDelay2": 0 - }) -server.engines[1].connect(script_dir + "/../../YaneuraOu/bin/YaneuraOu-by-gcc") - -# カレントディレクトリ内にある最終ステップのパラメータを用いて対局を行う -model_name = natsorted(glob.glob(os.getcwd() + "/*0.model"))[-1] - - -def objective(trial): - # パラメータを準備 - if trial.id == 0: - # 現状わかっている最適パラメータで実行 - C_PUCT_x1000 = trial.suggest_int("C_PUCT_x1000", 2500, 2500) - Q_coeff_x1000 = trial.suggest_int("Q_coeff_x1000", 0, 0) - # P_coeff_x1000 = trial.suggest_int("P_coeff_x1000", 0, 10000) - search_batch_size = trial.suggest_int("search_batch_size", 64, 64) - else: - C_PUCT_x1000 = trial.suggest_int("C_PUCT_x1000", 1000, 5000) - Q_coeff_x1000 = trial.suggest_int("Q_coeff_x1000", 0, 10000) - # P_coeff_x1000 = trial.suggest_int("P_coeff_x1000", 0, 10000) - search_batch_size = trial.suggest_int("search_batch_size", 8, 512) - - # Miacisを準備 - server.engines[0].set_engine_options({"random_turn": 30, - "print_interval": 10000000, - "USI_Hash": 8192, - "C_PUCT_x1000": C_PUCT_x1000, - "Q_coeff_x1000": Q_coeff_x1000, - "search_batch_size": search_batch_size, - "model_name": model_name}) - scalar_or_categorical = "scalar" if "sca" in model_name else "categorical" - server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") - - # 戦績を初期化 - total_num = [0, 0, 0] - - # 棋譜の集合を初期化 - sfens = defaultdict(int) - - # iが偶数のときMiacis先手 - for i in range(args.game_num): - # 対局を実行 - server.game_start() - while not server.game_result.is_gameover(): - time.sleep(1) - - # 重複を確認 - if sfens[server.sfen] > 0: - # 同じ棋譜が2回生成された場合は記録しない - print(f"\n重複:", server.sfen) - else: - # 結果を記録 - result = result_converter[server.game_result] - total_num[result if not server.flip_turn else LOSE - result] += 1 - - sfens[server.sfen] += 1 - - # ここまでの結果を文字列化 - winning_rate = (total_num[WIN] + 0.5 * total_num[DRAW]) / sum(total_num) - elo_rate = calc_elo_rate(winning_rate) - result_str = f"{total_num[WIN]:3d}勝 {total_num[DRAW]:3d}引き分け {total_num[LOSE]:3d}敗 勝率 {100 * winning_rate:4.1f}% 相対レート {elo_rate:6.1f}" - - sys.stdout.write("\033[2K\033[G") - print(result_str, end="\n" if i == args.game_num - 1 else "") - sys.stdout.flush() - - # 手番反転 - server.flip_turn = not server.flip_turn - return elo_rate - - -study = optuna.create_study(direction="maximize", study_name="vsYaneuraOu", storage="sqlite:///optuna_result.db", load_if_exists=True) -study.optimize(objective, n_trials=100) - -server.terminate() - -print(study.best_params) -print(study.best_value) -print(len(study.trials)) From 7bf7471ea414a5ebe20a50900a19e1cc2e7ac64e Mon Sep 17 00:00:00 2001 From: sakoda Date: Tue, 2 Mar 2021 11:25:10 +0900 Subject: [PATCH 74/87] =?UTF-8?q?global=5Fstep=E3=82=92=E3=83=95=E3=82=A1?= =?UTF-8?q?=E3=82=A4=E3=83=AB=E3=81=AB=E6=9B=B8=E3=81=8D=E5=87=BA=E3=81=99?= =?UTF-8?q?=E3=82=88=E3=81=86=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/supervised_learn.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/supervised_learn.cpp b/src/supervised_learn.cpp index 9a54bb52..bc3d2b19 100644 --- a/src/supervised_learn.cpp +++ b/src/supervised_learn.cpp @@ -35,7 +35,7 @@ void supervisedLearn() { //どのEpochでどのデータを使っているかを記録する std::ofstream epoch_log("epoch_log.txt"); epoch_log << "dir_path.size() = " << dir_paths.size() << std::endl; - epoch_log << "0 " << train_data.size() << std::endl; + epoch_log << "0 0 " << train_data.size() << std::endl; //学習クラスを生成 LearnManager learn_manager("supervised"); @@ -58,7 +58,7 @@ void supervisedLearn() { if (load_multi_dir) { train_data = loadData(dir_paths[epoch % dir_paths.size()], data_augmentation, train_rate_threshold); - epoch_log << epoch << " " << train_data.size() << std::endl; + epoch_log << epoch << " " << global_step << " " << train_data.size() << std::endl; } } From 9bf52c4bb3625d92b9a3f0c26e46c0b19d224942 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Mar 2021 10:50:21 +0900 Subject: [PATCH 75/87] =?UTF-8?q?use=5Ffp16=E3=81=AF=E3=83=87=E3=83=95?= =?UTF-8?q?=E3=82=A9=E3=83=AB=E3=83=88=E3=81=A7=E3=82=AA=E3=83=95=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index adc779b5..94840977 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -11,7 +11,7 @@ Q_dist_lambda 1.0 noise_epsilon 0.25 noise_alpha 0.15 C_PUCT 2.5 -use_fp16 1 +use_fp16 0 draw_turn 320 random_turn 320 batch_size 512 From da2118ef6c6d2ed696d69b872195c0307751ed13 Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Mar 2021 10:56:01 +0900 Subject: [PATCH 76/87] =?UTF-8?q?InferModel=E3=81=AEload=E3=81=AB=E6=8E=92?= =?UTF-8?q?=E4=BB=96=E5=88=B6=E5=BE=A1=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/infer_model.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/infer_model.cpp b/src/infer_model.cpp index 296bb266..0005d369 100644 --- a/src/infer_model.cpp +++ b/src/infer_model.cpp @@ -8,6 +8,10 @@ void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path, bool use_fp16) { + //マルチGPU環境で同時にloadすると時々Segmentation Faultが発生するので排他制御を入れる + static std::mutex load_mutex; + std::lock_guard guard(load_mutex); + torch::jit::Module module = torch::jit::load(model_path); device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); module.to(device_); From 571312d7ed531da6d6622f9ae72b73f631b357ea Mon Sep 17 00:00:00 2001 From: sakoda Date: Wed, 3 Mar 2021 10:58:46 +0900 Subject: [PATCH 77/87] =?UTF-8?q?=E5=BC=B7=E5=8C=96=E5=AD=A6=E7=BF=92?= =?UTF-8?q?=E3=81=AE=E3=82=AA=E3=83=97=E3=82=B7=E3=83=A7=E3=83=B3=E3=81=AB?= =?UTF-8?q?wait=5Fsec=5Fper=5Fload=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 1 + src/reinforcement_learn.cpp | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index 94840977..8e225e65 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -37,6 +37,7 @@ save_interval 50000 validation_interval 50000 sleep_msec -1 noise_mode 0 +wait_sec_per_load 90 use_sam_optim 0 calibration_kifu_path /root/data/floodgate_kifu/valid diff --git a/src/reinforcement_learn.cpp b/src/reinforcement_learn.cpp index e9de5379..d6ad9702 100644 --- a/src/reinforcement_learn.cpp +++ b/src/reinforcement_learn.cpp @@ -33,6 +33,7 @@ void reinforcementLearn() { int64_t sleep_msec = settings.get("sleep_msec"); int64_t init_buffer_by_kifu = settings.get("init_buffer_by_kifu"); int64_t noise_mode = settings.get("noise_mode"); + int64_t wait_sec_per_load = settings.get("wait_sec_per_load"); bool data_augmentation = settings.get("data_augmentation"); bool Q_search = settings.get("Q_search"); std::string train_kifu_path = settings.get("train_kifu_path"); @@ -112,8 +113,8 @@ void reinforcementLearn() { generators[i]->gpu_mutex.unlock(); } - //loadに30秒ほどかかるのでその期間スリープ - std::this_thread::sleep_for(std::chrono::seconds(30)); + //int8の場合は特にloadで時間がかかるのでその期間スリープ + std::this_thread::sleep_for(std::chrono::seconds(wait_sec_per_load)); } //学習スレッドを眠らせることで擬似的にActorの数を増やす From 858f6d819a1388882e88a8cf3276388163112377 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 5 Mar 2021 13:03:08 +0900 Subject: [PATCH 78/87] =?UTF-8?q?=E5=8B=BE=E9=85=8D=E3=82=AF=E3=83=AA?= =?UTF-8?q?=E3=83=83=E3=83=94=E3=83=B3=E3=82=B0=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 1 + setting/supervised_learn_settings.txt | 1 + src/learn.cpp | 6 ++++++ src/learn.hpp | 3 +++ 4 files changed, 11 insertions(+) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index 8e225e65..0ca06b31 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -39,6 +39,7 @@ sleep_msec -1 noise_mode 0 wait_sec_per_load 90 use_sam_optim 0 +clip_grad_norm 10 calibration_kifu_path /root/data/floodgate_kifu/valid # Shogi diff --git a/setting/supervised_learn_settings.txt b/setting/supervised_learn_settings.txt index f496a703..fd39428e 100644 --- a/setting/supervised_learn_settings.txt +++ b/setting/supervised_learn_settings.txt @@ -16,6 +16,7 @@ learn_rate_decay_step3 1800000 learn_rate_decay_step4 2400000 learn_rate_decay_period 100000 use_sam_optim 0 +clip_grad_norm 10 # Shogi(AobaZero) load_multi_dir 1 diff --git a/src/learn.cpp b/src/learn.cpp index a64c92ef..6b31f03d 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -118,6 +118,9 @@ LearnManager::LearnManager(const std::string& learn_name) { //SAM use_sam_optim_ = settings.get("use_sam_optim"); + //clip_grad_norm_の値 + clip_grad_norm_ = settings.get("clip_grad_norm"); + //学習開始時間の設定 timer_.start(); } @@ -194,6 +197,9 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d } } + //勾配をクリップ + torch::nn::utils::clip_grad_norm_(neural_network.parameters(), clip_grad_norm_); + //パラメータを更新 optimizer_->step(); diff --git a/src/learn.hpp b/src/learn.hpp index b891e8ee..86ce4b0c 100644 --- a/src/learn.hpp +++ b/src/learn.hpp @@ -100,6 +100,9 @@ class LearnManager { //Sharpness-Aware Minimizationを行うかどうか bool use_sam_optim_; + + //勾配クリッピングの値 + float clip_grad_norm_; }; //教師データを読み込む関数 From 11b0e44b1001c138be4ea1c1d0f02bedbbcbe4e5 Mon Sep 17 00:00:00 2001 From: sakoda Date: Fri, 12 Mar 2021 13:50:36 +0900 Subject: [PATCH 79/87] =?UTF-8?q?Mixup=E3=82=92=E5=86=8D=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 1 + setting/supervised_learn_settings.txt | 1 + src/learn.cpp | 6 ++- src/learn.hpp | 3 ++ src/learning_model.cpp | 63 ++++++++++++++++++++++++ src/learning_model.hpp | 6 ++- 6 files changed, 78 insertions(+), 2 deletions(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index 0ca06b31..7959f2e6 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -6,6 +6,7 @@ policy_loss_coeff 1.0 value_loss_coeff 1.0 lambda 0.75 per_alpha 2.0 +mixup_alpha 0.0 Q_dist_temperature 0.01 Q_dist_lambda 1.0 noise_epsilon 0.25 diff --git a/setting/supervised_learn_settings.txt b/setting/supervised_learn_settings.txt index fd39428e..47e083c0 100644 --- a/setting/supervised_learn_settings.txt +++ b/setting/supervised_learn_settings.txt @@ -2,6 +2,7 @@ learn_rate 0.025 min_learn_rate 0.0025 momentum 0.9 weight_decay 0.0001 +mixup_alpha 0.0 policy_loss_coeff 1.0 value_loss_coeff 1.0 data_augmentation 1 diff --git a/src/learn.cpp b/src/learn.cpp index 6b31f03d..9151af2e 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -115,6 +115,9 @@ LearnManager::LearnManager(const std::string& learn_name) { learn_rate_decay_period_ = settings.get("learn_rate_decay_period"); min_learn_rate_ = settings.get("min_learn_rate"); + //mixupの混合比を決定する値 + mixup_alpha_ = settings.get("mixup_alpha"); + //SAM use_sam_optim_ = settings.get("use_sam_optim"); @@ -131,7 +134,8 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d //学習 optimizer_->zero_grad(); - std::array loss = neural_network.loss(curr_data); + std::array loss = + (mixup_alpha_ == 0 ? neural_network.loss(curr_data) : neural_network.mixUpLoss(curr_data, mixup_alpha_)); torch::Tensor loss_sum = torch::zeros({ batch_size }); for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { loss_sum += coefficients_[i] * loss[i].cpu(); diff --git a/src/learn.hpp b/src/learn.hpp index 86ce4b0c..a8851571 100644 --- a/src/learn.hpp +++ b/src/learn.hpp @@ -98,6 +98,9 @@ class LearnManager { //Cosine annealing時の最小値 float min_learn_rate_; + //mixupを行う場合の混合比を決定する値 + float mixup_alpha_; + //Sharpness-Aware Minimizationを行うかどうか bool use_sam_optim_; diff --git a/src/learning_model.cpp b/src/learning_model.cpp index ce439dd7..5c74123a 100644 --- a/src/learning_model.cpp +++ b/src/learning_model.cpp @@ -134,6 +134,69 @@ std::array LearningModel::validLoss(const std::vec #endif } +std::array LearningModel::mixUpLoss(const std::vector& data, float alpha) { + std::gamma_distribution gamma_dist(alpha); + float gamma1 = gamma_dist(engine), gamma2 = gamma_dist(engine); + float beta = gamma1 / (gamma1 + gamma2); + + static Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + value_teachers.push_back(data[i].value); + } + + torch::Tensor input_tensor = encode(inputs); + + //入力時のmixup + input_tensor = beta * input_tensor + (1 - beta) * input_tensor.roll(1, 0); + + auto out = module_.forward({ input_tensor }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + torch::Tensor policy_logits = policy.view({ -1, POLICY_DIM }); + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + + //教師データのmixup + policy_target = beta * policy_target + (1 - beta) * policy_target.roll(1, 0); + + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(policy_logits, 1), 1, false); + +#ifdef USE_CATEGORICAL + torch::Tensor categorical_target = torch::tensor(value_teachers).to(device_); + torch::Tensor value_loss1 = torch::nll_loss(torch::log_softmax(value, 1), categorical_target); + torch::Tensor value_loss2 = torch::nll_loss(torch::log_softmax(value, 1), categorical_target.roll(1, 0)); + torch::Tensor value_loss = beta * value_loss1 + (1 - beta) * value_loss2; +#else + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + value_t = beta * value_t + (1 - beta) * value_t.roll(1, 0); + value = value.view(-1); +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif +#endif + + return { policy_loss, value_loss }; +} + std::vector LearningModel::parameters() { std::vector parameters; for (auto p : module_.parameters()) { diff --git a/src/learning_model.hpp b/src/learning_model.hpp index e7cfafc8..52e72937 100644 --- a/src/learning_model.hpp +++ b/src/learning_model.hpp @@ -12,9 +12,13 @@ class LearningModel { torch::Tensor encode(const std::vector& inputs) const; std::array loss(const std::vector& data); std::array validLoss(const std::vector& data); + + //MixUpを行って損失を返す関数 + std::array mixUpLoss(const std::vector& data, float alpha); + std::vector parameters(); - void train() { module_.train() ;} + void train() { module_.train(); } void eval() { module_.eval(); } private: From 0021f7491eb07404532c42d700b4ae9afa219773 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 18 Mar 2021 13:31:31 +0900 Subject: [PATCH 80/87] =?UTF-8?q?=E3=83=97=E3=83=AD=E3=83=83=E3=83=88?= =?UTF-8?q?=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88=E3=82=92=E4=BF=AE?= =?UTF-8?q?=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/plot_reinforcement_result.py | 27 ++++++++------------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/scripts/plot_reinforcement_result.py b/scripts/plot_reinforcement_result.py index ba7abcc4..d4a4dc2b 100755 --- a/scripts/plot_reinforcement_result.py +++ b/scripts/plot_reinforcement_result.py @@ -9,11 +9,10 @@ TIME = 0 STEP = 1 -SUM_LOSS = 2 -POLICY_LOSS = 3 -VALUE_LOSS = 4 -ELO_RATE = 5 -ELEMENT_NUM = 6 +POLICY_LOSS = 2 +VALUE_LOSS = 3 +ELO_RATE = 4 +ELEMENT_NUM = 5 # ディレクトリの名前をコマンドライン引数として受け取る parser = argparse.ArgumentParser() @@ -38,7 +37,7 @@ data = [list() for _ in range(ELEMENT_NUM)] # まず損失のデータを取得する - loss_file_name = dir_name + "/alphazero_valid_log.txt" + loss_file_name = dir_name + "/reinforcement_valid_log.txt" if not os.path.exists(loss_file_name): print("There is not a such file : ", loss_file_name) break @@ -64,17 +63,6 @@ hour = float(e[0]) + float(e[1]) / 60 + float(e[2]) / 3600 data[i].append(hour) - # 対局結果はresult.txtにある - result_file_name = dir_name + "/result.txt" - if os.path.exists(result_file_name): - for line in open(result_file_name): - # 空白区切りで"相対レート"という要素の次にレートが記録されていることを前提とする - elements = line.strip().split() - if "相対レート" in elements: - data[ELO_RATE].append(float(elements[elements.index("相対レート") + 1])) - else: - print("There is not a such file : ", result_file_name) - all_data.append(data) # timeという名前にしているが時間で換算した方がわかりやすいので名前を変える @@ -82,7 +70,7 @@ # グラフの描画 for i in [STEP, TIME]: # x軸 - for j in [POLICY_LOSS, VALUE_LOSS, ELO_RATE]: # y軸 + for j in [POLICY_LOSS, VALUE_LOSS]: # y軸 plt.xlabel(label[i]) plt.ylabel(label[j]) @@ -90,7 +78,8 @@ for k, data in enumerate(all_data): d = len(data[i]) // len(data[j]) plt.plot(data[i][d - 1::d], data[j], label=args.labels[k], marker=markers[k]) - texts.append(plt.text(data[i][-1] * 1.01, data[j][-1], args.labels[k], color=plt.get_cmap("tab10")(k))) + if len(all_data) > 1: + texts.append(plt.text(data[i][-1] * 1.01, data[j][-1], args.labels[k], color=plt.get_cmap("tab10")(k))) texts.sort(key=lambda text: text.get_position()[1]) pre_y = -float("inf") margin = (plt.ylim()[1] - plt.ylim()[0]) / 30 From ecc86bfeab88b9dcc26873a11b733bb04a3c3e66 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 18 Mar 2021 13:33:04 +0900 Subject: [PATCH 81/87] =?UTF-8?q?=E5=AD=A6=E7=BF=92=E8=A8=AD=E5=AE=9A?= =?UTF-8?q?=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index 7959f2e6..b114a47a 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -25,7 +25,7 @@ learn_rate_decay_step3 2000000 learn_rate_decay_step4 2000000 learn_rate_decay_period 100000 update_interval 1000 -batch_size_per_gen 2 +batch_size_per_gen 1 worker_num_per_thread 64 max_stack_size 1048576 first_wait 1048576 From 6c2498c8f6996538015686db695d4478aa9a0351 Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 18 Mar 2021 13:39:42 +0900 Subject: [PATCH 82/87] =?UTF-8?q?min=5Flearn=5Frate=E3=82=AA=E3=83=97?= =?UTF-8?q?=E3=82=B7=E3=83=A7=E3=83=B3=E3=82=92=E5=89=8A=E9=99=A4=E3=81=97?= =?UTF-8?q?=E3=81=A60=E3=81=AB=E3=81=BE=E3=81=A7=E4=B8=8B=E3=81=92?= =?UTF-8?q?=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- setting/reinforcement_learn_settings.txt | 1 - setting/supervised_learn_settings.txt | 1 - src/learn.cpp | 3 +-- src/learn.hpp | 3 --- 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index b114a47a..69eb0106 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -1,5 +1,4 @@ learn_rate 0.025 -min_learn_rate 0.0025 momentum 0.9 weight_decay 0.0 policy_loss_coeff 1.0 diff --git a/setting/supervised_learn_settings.txt b/setting/supervised_learn_settings.txt index 47e083c0..c0c6dea8 100644 --- a/setting/supervised_learn_settings.txt +++ b/setting/supervised_learn_settings.txt @@ -1,5 +1,4 @@ learn_rate 0.025 -min_learn_rate 0.0025 momentum 0.9 weight_decay 0.0001 mixup_alpha 0.0 diff --git a/src/learn.cpp b/src/learn.cpp index 9151af2e..1ec134f8 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -113,7 +113,6 @@ LearnManager::LearnManager(const std::string& learn_name) { learn_rate_decay_step3_ = settings.get("learn_rate_decay_step3"); learn_rate_decay_step4_ = settings.get("learn_rate_decay_step4"); learn_rate_decay_period_ = settings.get("learn_rate_decay_period"); - min_learn_rate_ = settings.get("min_learn_rate"); //mixupの混合比を決定する値 mixup_alpha_ = settings.get("mixup_alpha"); @@ -250,7 +249,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d //Cosine annealing int64_t curr_step = stem_num % learn_rate_decay_period_; (dynamic_cast(optimizer_->param_groups().front().options())).lr() = - min_learn_rate_ + 0.5 * (learn_rate_ - min_learn_rate_) * (1 + cos(acos(-1) * curr_step / learn_rate_decay_period_)); + 0.5 * learn_rate_ * (1 + cos(acos(-1) * curr_step / learn_rate_decay_period_)); } else if (learn_rate_decay_mode_ == 3) { //指数的な減衰 if (stem_num % learn_rate_decay_period_ == 0) { diff --git a/src/learn.hpp b/src/learn.hpp index a8851571..178e9f73 100644 --- a/src/learn.hpp +++ b/src/learn.hpp @@ -95,9 +95,6 @@ class LearnManager { //その他周期的なスケジューリングの周期 int64_t learn_rate_decay_period_; - //Cosine annealing時の最小値 - float min_learn_rate_; - //mixupを行う場合の混合比を決定する値 float mixup_alpha_; From 029a44d5b18b7d0e6e7ee8a87519a4ca803cef1d Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 25 Mar 2021 10:27:01 +0900 Subject: [PATCH 83/87] =?UTF-8?q?=E5=AD=A6=E7=BF=92=E7=B5=90=E6=9E=9C?= =?UTF-8?q?=E3=81=AE=E3=83=97=E3=83=AD=E3=83=83=E3=83=88=E3=82=B9=E3=82=AF?= =?UTF-8?q?=E3=83=AA=E3=83=97=E3=83=88=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/plot_reinforcement_result.py | 7 ++----- scripts/plot_reinforcement_result_othello.py | 7 ++----- scripts/plot_supervised_result.py | 22 +++++++++----------- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/scripts/plot_reinforcement_result.py b/scripts/plot_reinforcement_result.py index d4a4dc2b..23e25ae9 100755 --- a/scripts/plot_reinforcement_result.py +++ b/scripts/plot_reinforcement_result.py @@ -16,12 +16,9 @@ # ディレクトリの名前をコマンドライン引数として受け取る parser = argparse.ArgumentParser() -parser.add_argument("-dirs", type=(lambda x: x.split())) -parser.add_argument("--labels", type=(lambda x: x.split()), default=None) +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) args = parser.parse_args() -if args.labels is None: - args.labels = [""] - assert len(args.dirs) == len(args.labels) # 3次元 diff --git a/scripts/plot_reinforcement_result_othello.py b/scripts/plot_reinforcement_result_othello.py index 938092ff..4695418b 100755 --- a/scripts/plot_reinforcement_result_othello.py +++ b/scripts/plot_reinforcement_result_othello.py @@ -13,12 +13,9 @@ # ディレクトリの名前をコマンドライン引数として受け取る parser = argparse.ArgumentParser() -parser.add_argument("-dirs", type=(lambda x: x.split())) -parser.add_argument("--labels", type=(lambda x: x.split("%")), default=None) +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) args = parser.parse_args() -if args.labels is None: - args.labels = [""] - assert len(args.dirs) == len(args.labels) # 3次元 diff --git a/scripts/plot_supervised_result.py b/scripts/plot_supervised_result.py index 21b21763..e845d57e 100755 --- a/scripts/plot_supervised_result.py +++ b/scripts/plot_supervised_result.py @@ -7,12 +7,9 @@ # ディレクトリの名前をコマンドライン引数として受け取る parser = argparse.ArgumentParser() -parser.add_argument("-dirs", type=(lambda x: x.split())) -parser.add_argument("--labels", type=(lambda x: x.split()), default=None) +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) args = parser.parse_args() -if args.labels is None: - args.labels = [""] - assert len(args.dirs) == len(args.labels) @@ -34,11 +31,10 @@ def get_labels_and_data(file_name): TIME = 0 -EPOCH = 1 -STEP = 2 -POLICY_LOSS = 3 -VALUE_LOSS = 4 -ELO_RATE = 5 +STEP = 1 +POLICY_LOSS = 2 +VALUE_LOSS = 3 +ELO_RATE = 4 train_labels = None train_data = list() @@ -90,7 +86,8 @@ def get_labels_and_data(file_name): plt.plot(data[x], data[y], label=name) plt.xlabel(train_labels[x]) plt.ylabel(train_labels[y]) - plt.legend() + if len(args.labels) > 1: + plt.legend() plt.savefig("compare_train_" + train_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) plt.clf() @@ -99,7 +96,8 @@ def get_labels_and_data(file_name): plt.plot(data[x], data[y], label=name) plt.xlabel(valid_labels[x]) plt.ylabel(valid_labels[y]) - plt.legend() + if len(args.labels) > 1: + plt.legend() plt.savefig("compare_valid_" + valid_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) plt.clf() From d4f5d7c7a263ae5236e3503f1387dd1449c7d6ac Mon Sep 17 00:00:00 2001 From: sakoda Date: Thu, 25 Mar 2021 12:12:41 +0900 Subject: [PATCH 84/87] =?UTF-8?q?=E6=95=99=E5=B8=AB=E3=81=82=E3=82=8A?= =?UTF-8?q?=E5=AD=A6=E7=BF=92=E3=80=81=E5=BC=B7=E5=8C=96=E5=AD=A6=E7=BF=92?= =?UTF-8?q?=E3=81=A7=E6=B1=8E=E7=94=A8=E3=81=AA=E3=83=97=E3=83=AD=E3=83=83?= =?UTF-8?q?=E3=83=88=E3=82=B9=E3=82=AF=E3=83=AA=E3=83=97=E3=83=88=E3=82=92?= =?UTF-8?q?=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/plot_loss.py | 121 ++++++++++++++++++++++++++++++ scripts/plot_supervised_result.py | 6 +- 2 files changed, 125 insertions(+), 2 deletions(-) create mode 100755 scripts/plot_loss.py diff --git a/scripts/plot_loss.py b/scripts/plot_loss.py new file mode 100755 index 00000000..5fb31568 --- /dev/null +++ b/scripts/plot_loss.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +import matplotlib.pyplot as plt +import numpy as np +import os +import japanize_matplotlib +import argparse + +# ディレクトリの名前をコマンドライン引数として受け取る +parser = argparse.ArgumentParser() +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) +parser.add_argument("--prefix", type=str, required=True) +args = parser.parse_args() +assert len(args.dirs) == len(args.labels) + + +def get_labels_and_data(file_name): + f = open(file_name) + labels = [_ for _ in f.readline().strip().split("\t")] + data = [list() for _ in range(len(labels))] + for line in f: + line = line.strip() + line = line.split("\t") + for i in range(len(line)): + try: + data[i].append(float(line[i])) + except: + e = line[i].split(":") + hour = float(e[0]) + float(e[1]) / 60 + float(e[2]) / 3600 + data[i].append(hour) + return labels, data + + +TIME = 0 +STEP = 1 +POLICY_LOSS = 2 +VALUE_LOSS = 3 +ELO_RATE = 4 + +train_labels = None +train_data = list() +valid_labels = None +valid_data = list() +battle_result = list() + +for dir_name in args.dirs: + if dir_name[-1] != "/": + dir_name += "/" + train_labels, t_data = get_labels_and_data(dir_name + f"{args.prefix}_train_log.txt") + # trainデータは1ステップごとに記録されていて多すぎるのでSKIP個になるようにまとめて平均を取る + SKIP = 200 + for i in range(len(t_data)): + t_data[i] = np.array(t_data[i]).reshape(SKIP, -1).mean(axis=1) + train_data.append(t_data) + valid_labels, v_data = get_labels_and_data(dir_name + f"{args.prefix}_valid_log.txt") + valid_data.append(v_data) + + # 対局結果を取得 + # 対局結果はresult.txtにある + result_file_name = dir_name + "/result.txt" + if not os.path.exists(result_file_name): + print("result.txt does not exist in ", dir_name) + continue + + steps = list() + rates = list() + for line in open(result_file_name): + # 空白区切りで"相対レート"という要素の次にレートが記録されていることを前提とする + elements = line.strip().split() + for e in elements: + if "ステップ" in e: + steps.append(int(e.replace("ステップ", ""))) + if "相対レート" in elements: + rates.append(float(elements[elements.index("相対レート") + 1])) + + c = zip(steps, rates) + c = sorted(c) + steps, rates = zip(*c) + + battle_result.append((steps, rates)) + +# policy, valueそれぞれプロット +for x in [STEP]: + for y in [POLICY_LOSS, VALUE_LOSS]: + # train + for name, data in zip(args.labels, train_data): + plt.plot(data[x], data[y], label=name) + plt.xlabel(train_labels[x]) + plt.ylabel(train_labels[y]) + if len(args.labels) > 1: + plt.legend() + plt.savefig("compare_train_" + train_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) + plt.clf() + + # valid + for name, data in zip(args.labels, valid_data): + plt.plot(data[x], data[y], label=name, marker=".") + plt.xlabel(valid_labels[x]) + plt.ylabel(valid_labels[y]) + if len(args.labels) > 1: + plt.legend() + plt.savefig("compare_valid_" + valid_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) + plt.clf() + + # train and valid + for name, data in zip(args.labels, train_data): + plt.plot(data[x], data[y], label="train_" + name, linestyle="dashed") + for name, data in zip(args.labels, valid_data): + plt.plot(data[x], data[y], label="valid_" + name, marker=".") + plt.xlabel(train_labels[x]) + plt.ylabel(train_labels[y]) + plt.legend() + plt.savefig("compare_train_and_valid_" + train_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) + plt.clf() + +# 対局結果をプロット +for name, data in zip(args.labels, battle_result): + plt.plot(data[0], data[1], label=name) +plt.legend() +plt.savefig("compare_battle_result.png", bbox_inches="tight", pad_inches=0.1) +plt.clf() diff --git a/scripts/plot_supervised_result.py b/scripts/plot_supervised_result.py index e845d57e..5f3bed47 100755 --- a/scripts/plot_supervised_result.py +++ b/scripts/plot_supervised_result.py @@ -12,6 +12,8 @@ args = parser.parse_args() assert len(args.dirs) == len(args.labels) +prefix = "supervised" + def get_labels_and_data(file_name): f = open(file_name) @@ -45,13 +47,13 @@ def get_labels_and_data(file_name): for dir_name in args.dirs: if dir_name[-1] != "/": dir_name += "/" - train_labels, t_data = get_labels_and_data(dir_name + "supervised_train_log.txt") + train_labels, t_data = get_labels_and_data(dir_name + f"{prefix}_train_log.txt") # trainデータは1ステップごとに記録されていて多すぎるのでSKIP個になるようにまとめて平均を取る SKIP = 200 for i in range(len(t_data)): t_data[i] = np.array(t_data[i]).reshape(SKIP, -1).mean(axis=1) train_data.append(t_data) - valid_labels, v_data = get_labels_and_data(dir_name + "supervised_valid_log.txt") + valid_labels, v_data = get_labels_and_data(dir_name + f"{prefix}_valid_log.txt") valid_data.append(v_data) # 対局結果を取得 From 756a582291eb8bbb48ca37d3cfbeba1ab324450f Mon Sep 17 00:00:00 2001 From: sakoda Date: Sat, 27 Mar 2021 09:42:55 +0900 Subject: [PATCH 85/87] =?UTF-8?q?vsYaneuraOu.py=E3=81=8B=E3=82=89Thread?= =?UTF-8?q?=E3=82=AA=E3=83=97=E3=82=B7=E3=83=A7=E3=83=B3=E3=82=92=E3=81=AA?= =?UTF-8?q?=E3=81=8F=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/vsYaneuraOu.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/vsYaneuraOu.py b/scripts/vsYaneuraOu.py index b64c0984..a99d7e37 100755 --- a/scripts/vsYaneuraOu.py +++ b/scripts/vsYaneuraOu.py @@ -18,7 +18,6 @@ parser = argparse.ArgumentParser() parser.add_argument("--time1", type=int, default=1000) parser.add_argument("--time2", type=int, default=400) -parser.add_argument("--Threads", type=int, default=4) parser.add_argument("--NodesLimit", type=int, default=0) parser.add_argument("--game_num", type=int, default=500) parser.add_argument("--init_model_step", type=int, default=0) @@ -53,7 +52,6 @@ # YaneuraOuの設定 server.engines[1].set_engine_options({"USI_Ponder": "false", - "Threads": args.Threads, "NodesLimit": args.NodesLimit, "USI_Hash": 1024, "BookMoves": 0, @@ -76,7 +74,7 @@ f.write("~/Miacis/scripts/vsYaneuraOu.py") for w in sys.argv: f.write(" " + w) -f.write(f"\nMiacis time = {args.time1}, YaneuraOu time = {args.time2}, YaneuraOu Threads = {args.Threads}\n") +f.write(f"\nMiacis time = {args.time1}, YaneuraOu time = {args.time2}\n") # ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う model_names = natsorted(glob.glob(curr_path + "*0.model")) From c9ddc0f577eeab97a28e472bcf2614bb55bb08e5 Mon Sep 17 00:00:00 2001 From: sakoda Date: Sat, 27 Mar 2021 09:52:18 +0900 Subject: [PATCH 86/87] =?UTF-8?q?=E5=AF=BE=E5=B1=80=E8=A8=AD=E5=AE=9A?= =?UTF-8?q?=E3=82=92=E3=81=A7=E3=81=8D=E3=82=8B=E3=81=A0=E3=81=91=E5=B9=B3?= =?UTF-8?q?=E7=AD=89=E3=81=AB=E8=BF=91=E3=81=A5=E3=81=91=E3=82=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/vsYaneuraOu.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/scripts/vsYaneuraOu.py b/scripts/vsYaneuraOu.py index a99d7e37..1b54fbf4 100755 --- a/scripts/vsYaneuraOu.py +++ b/scripts/vsYaneuraOu.py @@ -17,9 +17,9 @@ parser = argparse.ArgumentParser() parser.add_argument("--time1", type=int, default=1000) -parser.add_argument("--time2", type=int, default=400) +parser.add_argument("--time2", type=int, default=1000) parser.add_argument("--NodesLimit", type=int, default=0) -parser.add_argument("--game_num", type=int, default=500) +parser.add_argument("--game_num", type=int, default=1000) parser.add_argument("--init_model_step", type=int, default=0) parser.add_argument("--reverse", action="store_true") parser.add_argument("--option", type=str, default=None) @@ -30,15 +30,18 @@ # 対局数(先後行うので偶数でなければならない) assert args.game_num % 2 == 0 +# ハッシュサイズ(共通) +hash_size = 2048 + # 勝ち,負け,引き分けの結果を示す定数 -WIN = 0 +WIN = 0 DRAW = 1 LOSE = 2 # Ayaneにおける結果をここでの結果に変換する辞書 -result_converter = { ayane.GameResult.BLACK_WIN: WIN, - ayane.GameResult.WHITE_WIN: LOSE, - ayane.GameResult.DRAW : DRAW, +result_converter = {ayane.GameResult.BLACK_WIN: WIN, + ayane.GameResult.WHITE_WIN: LOSE, + ayane.GameResult.DRAW: DRAW, ayane.GameResult.MAX_MOVES: DRAW } @@ -53,7 +56,7 @@ # YaneuraOuの設定 server.engines[1].set_engine_options({"USI_Ponder": "false", "NodesLimit": args.NodesLimit, - "USI_Hash": 1024, + "USI_Hash": hash_size, "BookMoves": 0, "NetworkDelay": 0, "NetworkDelay2": 0 @@ -71,10 +74,7 @@ # 結果を書き込むファイルを取得 f = open(curr_path + "result.txt", mode="a") -f.write("~/Miacis/scripts/vsYaneuraOu.py") -for w in sys.argv: - f.write(" " + w) -f.write(f"\nMiacis time = {args.time1}, YaneuraOu time = {args.time2}\n") +f.write(f"\ntime1 = {args.time1}, time2 = {args.time2}, NodesLimit = {args.NodesLimit}\n") # ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う model_names = natsorted(glob.glob(curr_path + "*0.model")) @@ -96,7 +96,7 @@ # Miacisを準備 server.engines[0].set_engine_options({"random_turn": 30, "print_interval": 10000000, - "USI_Hash": 4096, + "USI_Hash": hash_size, "model_name": model_name}) scalar_or_categorical = "scalar" if "sca" in model_name else "categorical" server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") @@ -146,7 +146,7 @@ # Miacisを準備 server.engines[0].set_engine_options({"random_turn": 30, "print_interval": 10000000, - "USI_Hash": 4096, + "USI_Hash": hash_size, args.option: parameter, "model_name": model_names[-1]}) scalar_or_categorical = "scalar" if "sca" in model_names[-1] else "categorical" From b6de9898702ad101938ff699e4c710ee8030d960 Mon Sep 17 00:00:00 2001 From: sakoda Date: Sat, 27 Mar 2021 10:03:34 +0900 Subject: [PATCH 87/87] =?UTF-8?q?=E6=88=A6=E7=B8=BE=E3=82=92=E5=BC=95?= =?UTF-8?q?=E6=95=B0=E3=81=A7=E4=B8=8E=E3=81=88=E3=81=A6=E9=80=94=E4=B8=AD?= =?UTF-8?q?=E3=81=8B=E3=82=89=E5=86=8D=E9=96=8B=E3=81=99=E3=82=8B=E6=A9=9F?= =?UTF-8?q?=E8=83=BD=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/vsYaneuraOu.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/scripts/vsYaneuraOu.py b/scripts/vsYaneuraOu.py index 1b54fbf4..7ba3a997 100755 --- a/scripts/vsYaneuraOu.py +++ b/scripts/vsYaneuraOu.py @@ -25,6 +25,7 @@ parser.add_argument("--option", type=str, default=None) parser.add_argument("--parameters", type=(lambda x: list(map(int, x.split())))) parser.add_argument("--Suisho", action="store_true") +parser.add_argument("--total_num", type=(lambda x: list(map(int, x.split()))), default=[0, 0, 0]) args = parser.parse_args() # 対局数(先後行うので偶数でなければならない) @@ -102,13 +103,16 @@ server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") # 戦績を初期化 - total_num = [0, 0, 0] + total_num = args.total_num + + # 引数で初期化するのは最初だけにしたいのでここで[0, 0, 0]を入れてしまう + args.total_num = [0, 0, 0] # 棋譜の集合を初期化 sfens = defaultdict(int) # iが偶数のときMiacis先手 - for i in range(args.game_num): + for i in range(sum(total_num), args.game_num): # 対局を実行 server.game_start() while not server.game_result.is_gameover(): @@ -153,7 +157,10 @@ server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") # 戦績を初期化 - total_num = [0, 0, 0] + total_num = args.total_num + + # 引数で初期化するのは最初だけにしたいのでここで[0, 0, 0]を入れてしまう + args.total_num = [0, 0, 0] # 棋譜の集合を初期化 sfens = defaultdict(int)