diff --git a/.gitignore b/.gitignore index ee40bb36..7907eb72 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .idea/ -*.pyc \ No newline at end of file +*.pyc +build/ +.vscode/ \ No newline at end of file diff --git a/scripts/Dockerfile b/scripts/Dockerfile index 00273207..b84d7a79 100644 --- a/scripts/Dockerfile +++ b/scripts/Dockerfile @@ -1,41 +1,69 @@ -FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 -MAINTAINER Shintaro Sakoda - -RUN set -x && \ - : "必要なものをインストール" && \ - apt-get update && \ - apt-get install sudo -y && \ - sudo apt-get install git -y && \ - sudo apt-get install vim -y && \ - sudo apt-get install cmake -y && \ - sudo apt-get install python3 -y && \ - sudo apt-get install python3-pip -y && \ - sudo apt-get install p7zip-full -y && \ - sudo apt-get install wget -y && \ - sudo apt-get install curl -y && \ - sudo apt-get install zip -y && \ - sudo apt-get install unzip -y && \ - pip3 install natsort && \ - : "日本語の導入" && \ - sudo apt-get install language-pack-ja-base language-pack-ja -y && \ - echo "export LANG='ja_JP.UTF-8'" >> ~/.bashrc && \ - : "Miacisの取得" && \ - cd ~ && \ - git clone https://github.com/SakodaShintaro/Miacis && \ - : "libtorchの取得" && \ - ./Miacis/scripts/download_libtorch.sh && \ - : "ビルド更新スクリプトの準備" && \ - mkdir Miacis/src/cmake-build-release && \ - cd Miacis/src/cmake-build-release && \ - echo "git fetch" > update.sh && \ +FROM nvcr.io/nvidia/pytorch:20.10-py3 + +RUN apt-get update && apt-get install -y curl gnupg && rm -rf /var/lib/apt/lists/* + +RUN curl https://bazel.build/bazel-release.pub.gpg | apt-key add - && \ +echo "deb [arch=amd64] https://storage.googleapis.com/bazel-apt stable jdk1.8" | tee /etc/apt/sources.list.d/bazel.list + +RUN apt-get update && apt-get install -y bazel-3.7.1 && rm -rf /var/lib/apt/lists/* +RUN ln -s /usr/bin/bazel-3.7.1 /usr/bin/bazel + +RUN pip install notebook + +# trtorchの導入 +WORKDIR /opt +RUN git clone https://github.com/NVIDIA/TRTorch trtorch +WORKDIR /opt/trtorch +RUN git checkout 721b071f7166e1826183f28305823f406eac4807 +RUN cp /opt/trtorch/docker/WORKSPACE.cu.docker /opt/trtorch/WORKSPACE + +# Workaround for bazel expecting both static and shared versions, we only use shared libraries inside container +RUN cp /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer_static.a + +WORKDIR /opt/trtorch +RUN bazel build //:libtrtorch --compilation_mode opt + +WORKDIR /opt/trtorch/py + +RUN pip install ipywidgets +RUN jupyter nbextension enable --py widgetsnbextension + +# Locale is not set by default +RUN apt-get update && apt-get install -y locales ninja-build && rm -rf /var/lib/apt/lists/* && locale-gen en_US.UTF-8 +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 +RUN python3 setup.py install --use-cxx11-abi + +RUN conda init bash + +ENV LD_LIBRARY_PATH /opt/conda/lib/python3.6/site-packages/torch/lib:$LD_LIBRARY_PATH + + +# ここから自分の設定 +# 言語の設定 +RUN apt-get update && apt-get install -y language-pack-ja-base language-pack-ja && rm -rf /var/lib/apt/lists/* +ENV LANG='ja_JP.UTF-8' + +# 必要なもののインストール +RUN apt-get update && apt-get install -y p7zip-full zip && rm -rf /var/lib/apt/lists/* +RUN pip install natsort + +# trtorchを適切な場所へ展開 +WORKDIR /root +RUN tar xvf /opt/trtorch/bazel-bin/libtrtorch.tar.gz . + +# Miacisの導入 +RUN git clone https://github.com/SakodaShintaro/Miacis +RUN ./Miacis/scripts/download_libtorch.sh +WORKDIR /root/Miacis/src/cmake-build-release +RUN echo "git fetch" > update.sh && \ echo "git reset --hard origin/master" >> update.sh && \ echo "cmake -DCMAKE_BUILD_TYPE=Release .." >> update.sh && \ - echo "make -j$(nproc)" >> update.sh && \ + echo "make -j$(nproc) Miacis_shogi_categorical" >> update.sh && \ chmod +x update.sh && \ - ./update.sh && \ - : "dotfilesの取得" && \ - cd ~ && \ - git clone https://github.com/SakodaShintaro/dotfiles && \ - ./dotfiles/setup.sh + ./update.sh +# dotfileの導入 WORKDIR /root +RUN git clone https://github.com/SakodaShintaro/dotfiles && ./dotfiles/setup.sh \ No newline at end of file diff --git a/scripts/convert_old_model_file.py b/scripts/convert_old_model_file.py new file mode 100755 index 00000000..adb02536 --- /dev/null +++ b/scripts/convert_old_model_file.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +import glob +import os +import re +from natsort import natsorted +from generate_torch_script_model import * + + +# batch_normがある場合はちょっと特殊なので関数として切り出しておく +def load_conv_and_norm(dst, src): + dst.conv_.weight.data = src.conv_.weight.data + dst.norm_.weight.data = src.norm_.weight.data + dst.norm_.bias.data = src.norm_.bias.data + dst.norm_.running_mean = src.norm_.running_mean + dst.norm_.running_var = src.norm_.running_var + + +parser = argparse.ArgumentParser() +parser.add_argument("--source_dir", type=str, required=True) +parser.add_argument("--game", default="shogi", choices=["shogi", "othello"]) +args = parser.parse_args() + +if args.game == "shogi": + input_channel_num = 42 + board_size = 9 + policy_channel_num = 27 +elif args.game == "othello": + input_channel_num = 2 + board_size = 8 + policy_channel_num = 2 + +# ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う +source_model_names = natsorted(glob.glob(f"{args.source_dir}/*.model")) + +# 1番目のモデル名からブロック数,チャンネル数を読み取る.これらは1ディレクトリ内で共通だという前提 +basename_without_ext = os.path.splitext(os.path.basename(source_model_names[0]))[0] +parts = basename_without_ext.split("_") +block_num = None +channel_num = None +for p in parts: + if "bl" in p: + block_num = int(re.sub("\\D", "", p)) + elif "ch" in p: + channel_num = int(re.sub("\\D", "", p)) + +# インスタンス生成 +model = CategoricalNetwork(input_channel_num, block_num, channel_num, policy_channel_num, board_size) + +# 各モデルファイルのパラメータをコピーしてTorchScriptとして保存 +for source_model_name in source_model_names: + source = torch.jit.load(source_model_name).cpu() + + # first_conv + load_conv_and_norm(model.encoder_.first_conv_and_norm_, source.state_first_conv_and_norm_) + + # block + for i, v in enumerate(model.encoder_.__dict__["_modules"]["blocks"]): + source_m = source.__dict__["_modules"][f"state_blocks_{i}"] + load_conv_and_norm(v.conv_and_norm0_, source_m.conv_and_norm0_) + load_conv_and_norm(v.conv_and_norm1_, source_m.conv_and_norm1_) + v.linear0_.weight.data = source_m.linear0_.weight.data + v.linear1_.weight.data = source_m.linear1_.weight.data + + # policy_conv + model.policy_head_.policy_conv_.weight.data = source.policy_conv_.weight.data + model.policy_head_.policy_conv_.bias.data = source.policy_conv_.bias.data + + # value_conv_norm_ + load_conv_and_norm(model.value_head_.value_conv_and_norm_, source.value_conv_and_norm_) + + # value_linear + model.value_head_.value_linear0_.weight.data = source.value_linear0_.weight.data + model.value_head_.value_linear0_.bias.data = source.value_linear0_.bias.data + model.value_head_.value_linear1_.weight.data = source.value_linear1_.weight.data + model.value_head_.value_linear1_.bias.data = source.value_linear1_.bias.data + + input_data = torch.ones([1, input_channel_num, board_size, board_size]) + model.eval() + script_model = torch.jit.trace(model, input_data) + # script_model = torch.jit.script(model) + model_path = f"{args.game}_{os.path.basename(source_model_name)}" + script_model.save(model_path) + print(f"{model_path}にパラメータを保存") diff --git a/scripts/download_AobaZero_kifu.sh b/scripts/download_AobaZero_kifu.sh index e49c9668..80fe285f 100755 --- a/scripts/download_AobaZero_kifu.sh +++ b/scripts/download_AobaZero_kifu.sh @@ -27,7 +27,7 @@ while read row; do # THRESHOLDより大きいものだけをダウンロード # 2重にダウンロードしないように存在判定を入れる - if [ "${file_number}" -ge ${START_FILE_ID} ] && [ "${file_number}" -lt ${START_FILE_ID} ] && [ ! -f "${root_dir}/${file_name}" ]; then + if [ "${file_number}" -ge ${START_FILE_ID} ] && [ "${file_number}" -lt ${END_FILE_ID} ] && [ ! -f "${root_dir}/${file_name}" ]; then # ダウンロード curl -sc /tmp/cookie "https://drive.google.com/uc?export=download&id=${file_id}" >/dev/null CODE="$(awk '/_warning_/ {print $NF}' /tmp/cookie)" diff --git a/scripts/download_Suisho.sh b/scripts/download_Suisho.sh index 8ba588dc..cc6a2769 100755 --- a/scripts/download_Suisho.sh +++ b/scripts/download_Suisho.sh @@ -9,10 +9,7 @@ root_dir=../.. # GitHubからソースコードをダウンロード git clone https://github.com/yaneurao/YaneuraOu ${root_dir}/Suisho cd ${root_dir}/Suisho/source - -# Gitの特定コミットへ移動 -# (dlshogiが対局していたものに合わせる cf. https://tadaoyamaoka.hatenablog.com/entry/2020/08/10/220411) -git checkout 276faf80d51dd6cae053112db8021171d5dbf4e8 +git checkout b0a3a2a4f7565bbefb85999368df15e9c90c621f # デフォルトではclangを使うようになっているがg++を使いたいのでMakefileを書き換える sed -i -e "s/#COMPILER = g++/COMPILER = g++/g" Makefile diff --git a/scripts/download_YaneuraOu.sh b/scripts/download_YaneuraOu.sh index 251b3ebe..61d94a8e 100755 --- a/scripts/download_YaneuraOu.sh +++ b/scripts/download_YaneuraOu.sh @@ -9,6 +9,7 @@ root_dir=../.. # GitHubからソースコードをダウンロード git clone https://github.com/yaneurao/YaneuraOu ${root_dir}/YaneuraOu cd ${root_dir}/YaneuraOu/source +git checkout b0a3a2a4f7565bbefb85999368df15e9c90c621f # デフォルトではclangを使うようになっているがg++を使いたいのでMakefileを書き換える sed -i -e "s/#COMPILER = g++/COMPILER = g++/g" Makefile diff --git a/scripts/download_floodgate_kifu.sh b/scripts/download_floodgate_kifu.sh index 0caff412..69bdef50 100755 --- a/scripts/download_floodgate_kifu.sh +++ b/scripts/download_floodgate_kifu.sh @@ -1,3 +1,5 @@ +echo "\$1(1番目の引数): $1" + # どこに保存するかの基準位置($0 = ./の2つ上がMiacisと同階層なのでそこに置く) root_dir=$(dirname "$0")/../../data @@ -5,20 +7,20 @@ root_dir=$(dirname "$0")/../../data download_path=${root_dir}/floodgate_kifu mkdir -p "${download_path}" wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2015.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2016.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2017.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2018.7z" -wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2019.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2016.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2017.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2018.7z" +# wget -P "${download_path}" "http://wdoor.c.u-tokyo.ac.jp/shogi/x/wdoor2019.7z" # 学習用データ(2016年以降) -train_path=${download_path}/train -mkdir -p "${train_path}" -7z e "${download_path}"/wdoor2016.7z -o"${train_path}" -7z e "${download_path}"/wdoor2017.7z -o"${train_path}" -7z e "${download_path}"/wdoor2018.7z -o"${train_path}" -7z e "${download_path}"/wdoor2019.7z -o"${train_path}" +# train_path=${download_path}/train +# mkdir -p "${train_path}" +# 7z e "${download_path}"/wdoor2016.7z -o"${train_path}" +# 7z e "${download_path}"/wdoor2017.7z -o"${train_path}" +# 7z e "${download_path}"/wdoor2018.7z -o"${train_path}" +# 7z e "${download_path}"/wdoor2019.7z -o"${train_path}" # 検証用データ(2015年) valid_path=${download_path}/valid mkdir -p "${valid_path}" -7z e "${download_path}"/wdoor2015.7z -o"${valid_path}" +7z e "${download_path}"/wdoor2015.7z -o"${valid_path}" \ No newline at end of file diff --git a/scripts/generate_torch_script_model.py b/scripts/generate_torch_script_model.py new file mode 100755 index 00000000..efe70e8e --- /dev/null +++ b/scripts/generate_torch_script_model.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python3 +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.jit +import argparse + + +class Conv2DwithBatchNorm(nn.Module): + def __init__(self, input_ch, output_ch, kernel_size): + super(Conv2DwithBatchNorm, self).__init__() + self.conv_ = nn.Conv2d(input_ch, output_ch, kernel_size, bias=False, padding=kernel_size // 2) + self.norm_ = nn.BatchNorm2d(output_ch) + + def forward(self, x): + t = self.conv_.forward(x) + t = self.norm_.forward(t) + return t + + +class ResidualBlock(nn.Module): + def __init__(self, channel_num, kernel_size, reduction): + super(ResidualBlock, self).__init__() + self.conv_and_norm0_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) + self.conv_and_norm1_ = Conv2DwithBatchNorm(channel_num, channel_num, kernel_size) + self.linear0_ = nn.Linear(channel_num, channel_num // reduction, bias=False) + self.linear1_ = nn.Linear(channel_num // reduction, channel_num, bias=False) + + def forward(self, x): + t = x + t = self.conv_and_norm0_.forward(t) + t = F.relu(t) + t = self.conv_and_norm1_.forward(t) + + y = F.avg_pool2d(t, [t.shape[2], t.shape[3]]) + y = y.view([-1, t.shape[1]]) + y = self.linear0_.forward(y) + y = F.relu(y) + y = self.linear1_.forward(y) + y = torch.sigmoid(y) + y = y.view([-1, t.shape[1], 1, 1]) + t = t * y + + t = F.relu(x + t) + return t + + +class Encoder(nn.Module): + def __init__(self, input_channel_num, block_num, channel_num, kernel_size=3, reduction=8): + super(Encoder, self).__init__() + self.first_conv_and_norm_ = Conv2DwithBatchNorm(input_channel_num, channel_num, 3) + self.blocks = nn.Sequential() + for i in range(block_num): + self.blocks.add_module(f"block{i}", ResidualBlock(channel_num, kernel_size, reduction)) + + def forward(self, x): + x = self.first_conv_and_norm_.forward(x) + x = F.relu(x) + x = self.blocks.forward(x) + return x + + +class PolicyHead(nn.Module): + def __init__(self, channel_num, policy_channel_num): + super(PolicyHead, self).__init__() + self.policy_conv_ = nn.Conv2d(channel_num, policy_channel_num, 1, bias=True, padding=0) + + def forward(self, x): + policy = self.policy_conv_.forward(x) + return policy + + +class ValueHead(nn.Module): + def __init__(self, channel_num, board_size, unit_num, hidden_size=256): + super(ValueHead, self).__init__() + self.value_conv_and_norm_ = Conv2DwithBatchNorm(channel_num, channel_num, 1) + self.hidden_size = channel_num * board_size * board_size + self.value_linear0_ = nn.Linear(self.hidden_size, hidden_size) + self.value_linear1_ = nn.Linear(hidden_size, unit_num) + + def forward(self, x): + value = self.value_conv_and_norm_.forward(x) + value = F.relu(value) + value = value.view([-1, self.hidden_size]) + value = self.value_linear0_.forward(value) + value = F.relu(value) + value = self.value_linear1_.forward(value) + return value + + +class ScalarNetwork(nn.Module): + def __init__(self, input_channel_num, block_num, channel_num, policy_channel_num, board_size): + super(ScalarNetwork, self).__init__() + self.encoder_ = Encoder(input_channel_num, block_num, channel_num) + self.policy_head_ = PolicyHead(channel_num, policy_channel_num) + self.value_head_ = ValueHead(channel_num, board_size, 1) + + def forward(self, x): + x = self.encoder_.forward(x) + policy = self.policy_head_.forward(x) + value = self.value_head_.forward(x) + value = torch.tanh(value) + return policy, value + + +class CategoricalNetwork(nn.Module): + def __init__(self, input_channel_num, block_num, channel_num, policy_channel_num, board_size): + super(CategoricalNetwork, self).__init__() + self.encoder_ = Encoder(input_channel_num, block_num, channel_num) + self.policy_head_ = PolicyHead(channel_num, policy_channel_num) + self.value_head_ = ValueHead(channel_num, board_size, 51) + + def forward(self, x): + x = self.encoder_.forward(x) + policy = self.policy_head_.forward(x) + value = self.value_head_.forward(x) + return policy, value + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("-game", default="shogi", choices=["shogi", "othello"]) + parser.add_argument("-value_type", default="cat", choices=["sca", "cat"]) + parser.add_argument("--block_num", type=int, default=10) + parser.add_argument("--channel_num", type=int, default=256) + args = parser.parse_args() + + if args.game == "shogi": + input_channel_num = 42 + board_size = 9 + policy_channel_num = 27 + elif args.game == "othello": + input_channel_num = 2 + board_size = 8 + policy_channel_num = 2 + else: + exit(1) + + model = None + if args.value_type == "sca": + model = ScalarNetwork(input_channel_num, args.block_num, args.channel_num, policy_channel_num, board_size) + elif args.value_type == "cat": + model = CategoricalNetwork(input_channel_num, args.block_num, args.channel_num, policy_channel_num, board_size) + input_data = torch.randn([8, input_channel_num, board_size, board_size]) + # script_model = torch.jit.trace(model, input_data) + script_model = torch.jit.script(model) + model_path = f"./{args.game}_{args.value_type}_bl{args.block_num}_ch{args.channel_num}.model" + script_model.save(model_path) + print(f"{model_path}にパラメータを保存") + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_loss.py b/scripts/plot_loss.py new file mode 100755 index 00000000..5fb31568 --- /dev/null +++ b/scripts/plot_loss.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +import matplotlib.pyplot as plt +import numpy as np +import os +import japanize_matplotlib +import argparse + +# ディレクトリの名前をコマンドライン引数として受け取る +parser = argparse.ArgumentParser() +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) +parser.add_argument("--prefix", type=str, required=True) +args = parser.parse_args() +assert len(args.dirs) == len(args.labels) + + +def get_labels_and_data(file_name): + f = open(file_name) + labels = [_ for _ in f.readline().strip().split("\t")] + data = [list() for _ in range(len(labels))] + for line in f: + line = line.strip() + line = line.split("\t") + for i in range(len(line)): + try: + data[i].append(float(line[i])) + except: + e = line[i].split(":") + hour = float(e[0]) + float(e[1]) / 60 + float(e[2]) / 3600 + data[i].append(hour) + return labels, data + + +TIME = 0 +STEP = 1 +POLICY_LOSS = 2 +VALUE_LOSS = 3 +ELO_RATE = 4 + +train_labels = None +train_data = list() +valid_labels = None +valid_data = list() +battle_result = list() + +for dir_name in args.dirs: + if dir_name[-1] != "/": + dir_name += "/" + train_labels, t_data = get_labels_and_data(dir_name + f"{args.prefix}_train_log.txt") + # trainデータは1ステップごとに記録されていて多すぎるのでSKIP個になるようにまとめて平均を取る + SKIP = 200 + for i in range(len(t_data)): + t_data[i] = np.array(t_data[i]).reshape(SKIP, -1).mean(axis=1) + train_data.append(t_data) + valid_labels, v_data = get_labels_and_data(dir_name + f"{args.prefix}_valid_log.txt") + valid_data.append(v_data) + + # 対局結果を取得 + # 対局結果はresult.txtにある + result_file_name = dir_name + "/result.txt" + if not os.path.exists(result_file_name): + print("result.txt does not exist in ", dir_name) + continue + + steps = list() + rates = list() + for line in open(result_file_name): + # 空白区切りで"相対レート"という要素の次にレートが記録されていることを前提とする + elements = line.strip().split() + for e in elements: + if "ステップ" in e: + steps.append(int(e.replace("ステップ", ""))) + if "相対レート" in elements: + rates.append(float(elements[elements.index("相対レート") + 1])) + + c = zip(steps, rates) + c = sorted(c) + steps, rates = zip(*c) + + battle_result.append((steps, rates)) + +# policy, valueそれぞれプロット +for x in [STEP]: + for y in [POLICY_LOSS, VALUE_LOSS]: + # train + for name, data in zip(args.labels, train_data): + plt.plot(data[x], data[y], label=name) + plt.xlabel(train_labels[x]) + plt.ylabel(train_labels[y]) + if len(args.labels) > 1: + plt.legend() + plt.savefig("compare_train_" + train_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) + plt.clf() + + # valid + for name, data in zip(args.labels, valid_data): + plt.plot(data[x], data[y], label=name, marker=".") + plt.xlabel(valid_labels[x]) + plt.ylabel(valid_labels[y]) + if len(args.labels) > 1: + plt.legend() + plt.savefig("compare_valid_" + valid_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) + plt.clf() + + # train and valid + for name, data in zip(args.labels, train_data): + plt.plot(data[x], data[y], label="train_" + name, linestyle="dashed") + for name, data in zip(args.labels, valid_data): + plt.plot(data[x], data[y], label="valid_" + name, marker=".") + plt.xlabel(train_labels[x]) + plt.ylabel(train_labels[y]) + plt.legend() + plt.savefig("compare_train_and_valid_" + train_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) + plt.clf() + +# 対局結果をプロット +for name, data in zip(args.labels, battle_result): + plt.plot(data[0], data[1], label=name) +plt.legend() +plt.savefig("compare_battle_result.png", bbox_inches="tight", pad_inches=0.1) +plt.clf() diff --git a/scripts/plot_reinforcement_result.py b/scripts/plot_reinforcement_result.py index ba7abcc4..23e25ae9 100755 --- a/scripts/plot_reinforcement_result.py +++ b/scripts/plot_reinforcement_result.py @@ -9,20 +9,16 @@ TIME = 0 STEP = 1 -SUM_LOSS = 2 -POLICY_LOSS = 3 -VALUE_LOSS = 4 -ELO_RATE = 5 -ELEMENT_NUM = 6 +POLICY_LOSS = 2 +VALUE_LOSS = 3 +ELO_RATE = 4 +ELEMENT_NUM = 5 # ディレクトリの名前をコマンドライン引数として受け取る parser = argparse.ArgumentParser() -parser.add_argument("-dirs", type=(lambda x: x.split())) -parser.add_argument("--labels", type=(lambda x: x.split()), default=None) +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) args = parser.parse_args() -if args.labels is None: - args.labels = [""] - assert len(args.dirs) == len(args.labels) # 3次元 @@ -38,7 +34,7 @@ data = [list() for _ in range(ELEMENT_NUM)] # まず損失のデータを取得する - loss_file_name = dir_name + "/alphazero_valid_log.txt" + loss_file_name = dir_name + "/reinforcement_valid_log.txt" if not os.path.exists(loss_file_name): print("There is not a such file : ", loss_file_name) break @@ -64,17 +60,6 @@ hour = float(e[0]) + float(e[1]) / 60 + float(e[2]) / 3600 data[i].append(hour) - # 対局結果はresult.txtにある - result_file_name = dir_name + "/result.txt" - if os.path.exists(result_file_name): - for line in open(result_file_name): - # 空白区切りで"相対レート"という要素の次にレートが記録されていることを前提とする - elements = line.strip().split() - if "相対レート" in elements: - data[ELO_RATE].append(float(elements[elements.index("相対レート") + 1])) - else: - print("There is not a such file : ", result_file_name) - all_data.append(data) # timeという名前にしているが時間で換算した方がわかりやすいので名前を変える @@ -82,7 +67,7 @@ # グラフの描画 for i in [STEP, TIME]: # x軸 - for j in [POLICY_LOSS, VALUE_LOSS, ELO_RATE]: # y軸 + for j in [POLICY_LOSS, VALUE_LOSS]: # y軸 plt.xlabel(label[i]) plt.ylabel(label[j]) @@ -90,7 +75,8 @@ for k, data in enumerate(all_data): d = len(data[i]) // len(data[j]) plt.plot(data[i][d - 1::d], data[j], label=args.labels[k], marker=markers[k]) - texts.append(plt.text(data[i][-1] * 1.01, data[j][-1], args.labels[k], color=plt.get_cmap("tab10")(k))) + if len(all_data) > 1: + texts.append(plt.text(data[i][-1] * 1.01, data[j][-1], args.labels[k], color=plt.get_cmap("tab10")(k))) texts.sort(key=lambda text: text.get_position()[1]) pre_y = -float("inf") margin = (plt.ylim()[1] - plt.ylim()[0]) / 30 diff --git a/scripts/plot_reinforcement_result_othello.py b/scripts/plot_reinforcement_result_othello.py index 938092ff..4695418b 100755 --- a/scripts/plot_reinforcement_result_othello.py +++ b/scripts/plot_reinforcement_result_othello.py @@ -13,12 +13,9 @@ # ディレクトリの名前をコマンドライン引数として受け取る parser = argparse.ArgumentParser() -parser.add_argument("-dirs", type=(lambda x: x.split())) -parser.add_argument("--labels", type=(lambda x: x.split("%")), default=None) +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) args = parser.parse_args() -if args.labels is None: - args.labels = [""] - assert len(args.dirs) == len(args.labels) # 3次元 diff --git a/scripts/plot_supervised_result.py b/scripts/plot_supervised_result.py index 21b21763..5f3bed47 100755 --- a/scripts/plot_supervised_result.py +++ b/scripts/plot_supervised_result.py @@ -7,14 +7,13 @@ # ディレクトリの名前をコマンドライン引数として受け取る parser = argparse.ArgumentParser() -parser.add_argument("-dirs", type=(lambda x: x.split())) -parser.add_argument("--labels", type=(lambda x: x.split()), default=None) +parser.add_argument("--dirs", type=(lambda x: x.split()), required=True) +parser.add_argument("--labels", type=(lambda x: x.split()), required=True) args = parser.parse_args() -if args.labels is None: - args.labels = [""] - assert len(args.dirs) == len(args.labels) +prefix = "supervised" + def get_labels_and_data(file_name): f = open(file_name) @@ -34,11 +33,10 @@ def get_labels_and_data(file_name): TIME = 0 -EPOCH = 1 -STEP = 2 -POLICY_LOSS = 3 -VALUE_LOSS = 4 -ELO_RATE = 5 +STEP = 1 +POLICY_LOSS = 2 +VALUE_LOSS = 3 +ELO_RATE = 4 train_labels = None train_data = list() @@ -49,13 +47,13 @@ def get_labels_and_data(file_name): for dir_name in args.dirs: if dir_name[-1] != "/": dir_name += "/" - train_labels, t_data = get_labels_and_data(dir_name + "supervised_train_log.txt") + train_labels, t_data = get_labels_and_data(dir_name + f"{prefix}_train_log.txt") # trainデータは1ステップごとに記録されていて多すぎるのでSKIP個になるようにまとめて平均を取る SKIP = 200 for i in range(len(t_data)): t_data[i] = np.array(t_data[i]).reshape(SKIP, -1).mean(axis=1) train_data.append(t_data) - valid_labels, v_data = get_labels_and_data(dir_name + "supervised_valid_log.txt") + valid_labels, v_data = get_labels_and_data(dir_name + f"{prefix}_valid_log.txt") valid_data.append(v_data) # 対局結果を取得 @@ -90,7 +88,8 @@ def get_labels_and_data(file_name): plt.plot(data[x], data[y], label=name) plt.xlabel(train_labels[x]) plt.ylabel(train_labels[y]) - plt.legend() + if len(args.labels) > 1: + plt.legend() plt.savefig("compare_train_" + train_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) plt.clf() @@ -99,7 +98,8 @@ def get_labels_and_data(file_name): plt.plot(data[x], data[y], label=name) plt.xlabel(valid_labels[x]) plt.ylabel(valid_labels[y]) - plt.legend() + if len(args.labels) > 1: + plt.legend() plt.savefig("compare_valid_" + valid_labels[y] + ".png", bbox_inches="tight", pad_inches=0.1) plt.clf() diff --git a/scripts/reinforcement_learn.sh b/scripts/reinforcement_learn.sh index 8e3e99a4..03c4c6b1 100755 --- a/scripts/reinforcement_learn.sh +++ b/scripts/reinforcement_learn.sh @@ -1,6 +1,7 @@ git show -s >git_commit_id.txt git diff >>git_commit_id.txt -echo -e "initParams\nreinforcementLearn\nquit\n" | ./Miacis_* +~/Miacis/scripts/generate_torch_script_model.py +echo -e "reinforcementLearn\nquit\n" | ./Miacis_* zip -rq learn_kifu.zip learn_kifu rm -rf learn_kifu scp -r $(pwd) sakoda:~/learn_result/reinforcement/ diff --git a/scripts/supervised_learn.sh b/scripts/supervised_learn.sh index 959afde6..58a9512c 100755 --- a/scripts/supervised_learn.sh +++ b/scripts/supervised_learn.sh @@ -1,4 +1,5 @@ git show -s > git_commit_id.txt git diff >> git_commit_id.txt -echo -e "initParams\nsupervisedLearn\nquit\n" | ./Miacis_* +~/Miacis/scripts/generate_torch_script_model.py +echo -e "supervisedLearn\nquit\n" | ./Miacis_* scp -r `pwd` sakoda:~/learn_result/supervised/ \ No newline at end of file diff --git a/scripts/validation.py b/scripts/validation.py index 26f7acfc..e8f8378f 100755 --- a/scripts/validation.py +++ b/scripts/validation.py @@ -8,7 +8,8 @@ parser = argparse.ArgumentParser() parser.add_argument("--kifu_path", type=str, default="/root/data/floodgate_kifu/valid") -parser.add_argument("--batch_size", type=int, default=4096) +parser.add_argument("--batch_size", type=int, default=512) +parser.add_argument("--init_model_step", type=int, default=0) args = parser.parse_args() # カレントディレクトリ内にある{prefix}_{step}.modelを評価する @@ -18,7 +19,7 @@ curr_path += "/" # 結果を書き込むファイルを取得 -f = open(curr_path + "validation_loss.txt", "w") +f = open(curr_path + "validation_loss.txt", "a") # ディレクトリにある以下のprefixを持ったパラメータを用いて検証損失の計算を行う model_names = natsorted(glob.glob(curr_path + "*0.model")) @@ -35,6 +36,10 @@ # 最後に出てくるアンダーバーから.modelの直前までにステップ数が記録されているという前提 step = int(model_name[model_name.rfind("_") + 1:model_name.find(".model")]) + # args.init_model_stepより小さいものは調べない + if step < args.init_model_step: + continue + scalar_or_categorical = "scalar" if "sca" in model_name else "categorical" miacis_path = f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}" command = f"checkVal\n{args.kifu_path}\n{args.batch_size}\n{model_name}\nquit" diff --git a/scripts/vsYaneuraOu.py b/scripts/vsYaneuraOu.py index b64c0984..68fdb233 100755 --- a/scripts/vsYaneuraOu.py +++ b/scripts/vsYaneuraOu.py @@ -17,30 +17,33 @@ parser = argparse.ArgumentParser() parser.add_argument("--time1", type=int, default=1000) -parser.add_argument("--time2", type=int, default=400) -parser.add_argument("--Threads", type=int, default=4) +parser.add_argument("--time2", type=int, default=1000) parser.add_argument("--NodesLimit", type=int, default=0) -parser.add_argument("--game_num", type=int, default=500) +parser.add_argument("--game_num", type=int, default=1000) parser.add_argument("--init_model_step", type=int, default=0) parser.add_argument("--reverse", action="store_true") parser.add_argument("--option", type=str, default=None) parser.add_argument("--parameters", type=(lambda x: list(map(int, x.split())))) parser.add_argument("--Suisho", action="store_true") +parser.add_argument("--total_num", type=(lambda x: list(map(int, x.split()))), default=[0, 0, 0]) args = parser.parse_args() # 対局数(先後行うので偶数でなければならない) assert args.game_num % 2 == 0 +# ハッシュサイズ(共通) +hash_size = 2048 + # 勝ち,負け,引き分けの結果を示す定数 -WIN = 0 +WIN = 0 DRAW = 1 LOSE = 2 # Ayaneにおける結果をここでの結果に変換する辞書 -result_converter = { ayane.GameResult.BLACK_WIN: WIN, - ayane.GameResult.WHITE_WIN: LOSE, - ayane.GameResult.DRAW : DRAW, - ayane.GameResult.MAX_MOVES: DRAW } +result_converter = {ayane.GameResult.BLACK_WIN: WIN, + ayane.GameResult.WHITE_WIN: LOSE, + ayane.GameResult.DRAW: DRAW, + ayane.GameResult.MAX_MOVES: DRAW} # インスタンス生成 @@ -53,9 +56,8 @@ # YaneuraOuの設定 server.engines[1].set_engine_options({"USI_Ponder": "false", - "Threads": args.Threads, "NodesLimit": args.NodesLimit, - "USI_Hash": 1024, + "USI_Hash": hash_size, "BookMoves": 0, "NetworkDelay": 0, "NetworkDelay2": 0 @@ -73,10 +75,7 @@ # 結果を書き込むファイルを取得 f = open(curr_path + "result.txt", mode="a") -f.write("~/Miacis/scripts/vsYaneuraOu.py") -for w in sys.argv: - f.write(" " + w) -f.write(f"\nMiacis time = {args.time1}, YaneuraOu time = {args.time2}, YaneuraOu Threads = {args.Threads}\n") +f.write(f"\ntime1 = {args.time1}, time2 = {args.time2}, NodesLimit = {args.NodesLimit}\n") # ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う model_names = natsorted(glob.glob(curr_path + "*0.model")) @@ -98,19 +97,22 @@ # Miacisを準備 server.engines[0].set_engine_options({"random_turn": 30, "print_interval": 10000000, - "USI_Hash": 4096, + "USI_Hash": hash_size, "model_name": model_name}) scalar_or_categorical = "scalar" if "sca" in model_name else "categorical" server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") # 戦績を初期化 - total_num = [0, 0, 0] + total_num = args.total_num + + # 引数で初期化するのは最初だけにしたいのでここで[0, 0, 0]を入れてしまう + args.total_num = [0, 0, 0] # 棋譜の集合を初期化 sfens = defaultdict(int) # iが偶数のときMiacis先手 - for i in range(args.game_num): + for i in range(sum(total_num), args.game_num): # 対局を実行 server.game_start() while not server.game_result.is_gameover(): @@ -148,14 +150,17 @@ # Miacisを準備 server.engines[0].set_engine_options({"random_turn": 30, "print_interval": 10000000, - "USI_Hash": 4096, + "USI_Hash": hash_size, args.option: parameter, "model_name": model_names[-1]}) scalar_or_categorical = "scalar" if "sca" in model_names[-1] else "categorical" server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") # 戦績を初期化 - total_num = [0, 0, 0] + total_num = args.total_num + + # 引数で初期化するのは最初だけにしたいのでここで[0, 0, 0]を入れてしまう + args.total_num = [0, 0, 0] # 棋譜の集合を初期化 sfens = defaultdict(int) diff --git a/scripts/vsYaneuraOu_PO800.py b/scripts/vsYaneuraOu_PO800.py deleted file mode 100755 index 0371f071..00000000 --- a/scripts/vsYaneuraOu_PO800.py +++ /dev/null @@ -1,129 +0,0 @@ -#! /usr/bin/env python3 -import os -import sys - -# Ayaneをインポート -script_dir = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(script_dir + "/../../Ayane/source") -import shogi.Ayane as ayane - -# その他必要なものをインポート -import time -import glob -from natsort import natsorted -from collections import defaultdict -import argparse -from calc_elo_rate import calc_elo_rate - -parser = argparse.ArgumentParser() -parser.add_argument("--Threads", type=int, default=1) -parser.add_argument("--NodesLimit", type=int, default=100000) -parser.add_argument("--game_num", type=int, default=500) -args = parser.parse_args() - -# 対局数(先後行うので偶数でなければならない) -assert args.game_num % 2 == 0 - -# 勝ち,負け,引き分けの結果を示す定数 -WIN = 0 -DRAW = 1 -LOSE = 2 - -# Ayaneにおける結果をここでの結果に変換する辞書 -result_converter = { ayane.GameResult.BLACK_WIN: WIN, - ayane.GameResult.WHITE_WIN: LOSE, - ayane.GameResult.DRAW : DRAW, - ayane.GameResult.MAX_MOVES: DRAW } - - -# インスタンス生成 -server = ayane.AyaneruServer() - -# サーバの設定 -server.error_print = True -server.set_time_setting(f"byoyomi {10000000}") -server.moves_to_draw = 320 - -# YaneuraOuの設定 -server.engines[1].set_engine_options({"USI_Ponder": "false", - "Threads": args.Threads, - "NodesLimit": args.NodesLimit, - "USI_Hash": 1024, - "BookMoves": 0, - "NetworkDelay": 0, - "NetworkDelay2": 0 - }) -server.engines[1].connect(script_dir + "/../../YaneuraOu/bin/YaneuraOu-by-gcc") - -# カレントディレクトリ内にある{prefix}_{step}.modelを評価する -curr_path = os.getcwd() -# ディレクトリ名が"/"で終わっていることの確認 -if curr_path[-1] != "/": - curr_path += "/" - -# 結果を書き込むファイルを取得 -f = open(curr_path + "result_PO800.txt", mode="a") -f.write(f"YaneuraOu Threads = {args.Threads} NodesLimit = {args.NodesLimit}\n") - -# ディレクトリにある以下のprefixを持ったパラメータを用いて対局を行う -model_names = natsorted(glob.glob(curr_path + "*0.model")) -assert len(model_names) > 0 - -# パラメータを探索 -for temperature_x1000 in range(0, 10, 1): - # Miacisを準備 - server.engines[0].set_engine_options({"random_turn": 320, - "temperature_x1000": temperature_x1000, - "print_interval": 10000000, - "USI_Hash": 4096, - "search_limit": 800, - "gpu_num": 1, - "thread_num_per_gpu": 1, - "search_batch_size": 4, - "C_PUCT_x1000": 1500, - "model_name": model_names[-1]}) - scalar_or_categorical = "scalar" if "sca" in model_names[-1] else "categorical" - server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") - - # 戦績を初期化 - total_num = [0, 0, 0] - - # 棋譜の集合を初期化 - sfens = defaultdict(int) - - # iが偶数のときMiacis先手 - for i in range(args.game_num): - # 対局を実行 - server.game_start() - while not server.game_result.is_gameover(): - time.sleep(1) - - # 重複を確認 - if sfens[server.sfen] > 0: - # 同じ棋譜が2回生成された場合は記録しない - print(f"\n重複:", server.sfen) - else: - # 結果を記録 - result = result_converter[server.game_result] - total_num[result if not server.flip_turn else LOSE - result] += 1 - - sfens[server.sfen] += 1 - - # ここまでの結果を文字列化 - winning_rate = (total_num[WIN] + 0.5 * total_num[DRAW]) / sum(total_num) - elo_rate = calc_elo_rate(winning_rate) - result_str = f"temperature_x1000={temperature_x1000:2d} {total_num[WIN]:3d}勝 {total_num[DRAW]:3d}引き分け {total_num[LOSE]:3d}敗 勝率 {100 * winning_rate:4.1f}% 相対レート {elo_rate:6.1f}" - - sys.stdout.write("\033[2K\033[G") - print(result_str, end="\n" if i == args.game_num - 1 else "") - sys.stdout.flush() - - # 手番反転 - server.flip_turn = not server.flip_turn - - # ファイルに書き込み - f.write(result_str + "\n") - f.flush() - - -server.terminate() diff --git a/scripts/vsYaneuraOu_with_Optuna.py b/scripts/vsYaneuraOu_with_Optuna.py deleted file mode 100755 index bf28e840..00000000 --- a/scripts/vsYaneuraOu_with_Optuna.py +++ /dev/null @@ -1,130 +0,0 @@ -#!/usr/bin/env python3 -import os -import sys - -# Ayaneをインポート -script_dir = os.path.dirname(os.path.abspath(__file__)) -sys.path.append(script_dir + "/../../Ayane/source") -import shogi.Ayane as ayane - -# その他必要なものをインポート -import time -import glob -import optuna -from natsort import natsorted -from collections import defaultdict -import argparse -from calc_elo_rate import calc_elo_rate - -parser = argparse.ArgumentParser() -parser.add_argument("--time1", type=int, default=1000) -parser.add_argument("--time2", type=int, default=200) -parser.add_argument("--Threads", type=int, default=4) -parser.add_argument("--game_num", type=int, default=100) -args = parser.parse_args() - -# 対局数(先後行うので偶数でなければならない) -assert args.game_num % 2 == 0 - -# 勝ち,負け,引き分けの結果を示す定数 -WIN = 0 -DRAW = 1 -LOSE = 2 - -# Ayaneにおける結果をここでの結果に変換する辞書 -result_converter = { ayane.GameResult.BLACK_WIN: WIN, - ayane.GameResult.WHITE_WIN: LOSE, - ayane.GameResult.DRAW : DRAW, - ayane.GameResult.MAX_MOVES: DRAW } - -# インスタンス生成 -server = ayane.AyaneruServer() -server.error_print = True -server.set_time_setting(f"byoyomi1p {args.time1} byoyomi2p {args.time2}") -server.moves_to_draw = 320 - -# YaneuraOuの設定 -server.engines[1].set_engine_options({"USI_Ponder": "false", - "Threads": args.Threads, - "USI_Hash": 1024, - "BookMoves": 0, - "NetworkDelay": 0, - "NetworkDelay2": 0 - }) -server.engines[1].connect(script_dir + "/../../YaneuraOu/bin/YaneuraOu-by-gcc") - -# カレントディレクトリ内にある最終ステップのパラメータを用いて対局を行う -model_name = natsorted(glob.glob(os.getcwd() + "/*0.model"))[-1] - - -def objective(trial): - # パラメータを準備 - if trial.id == 0: - # 現状わかっている最適パラメータで実行 - C_PUCT_x1000 = trial.suggest_int("C_PUCT_x1000", 2500, 2500) - Q_coeff_x1000 = trial.suggest_int("Q_coeff_x1000", 0, 0) - # P_coeff_x1000 = trial.suggest_int("P_coeff_x1000", 0, 10000) - search_batch_size = trial.suggest_int("search_batch_size", 64, 64) - else: - C_PUCT_x1000 = trial.suggest_int("C_PUCT_x1000", 1000, 5000) - Q_coeff_x1000 = trial.suggest_int("Q_coeff_x1000", 0, 10000) - # P_coeff_x1000 = trial.suggest_int("P_coeff_x1000", 0, 10000) - search_batch_size = trial.suggest_int("search_batch_size", 8, 512) - - # Miacisを準備 - server.engines[0].set_engine_options({"random_turn": 30, - "print_interval": 10000000, - "USI_Hash": 8192, - "C_PUCT_x1000": C_PUCT_x1000, - "Q_coeff_x1000": Q_coeff_x1000, - "search_batch_size": search_batch_size, - "model_name": model_name}) - scalar_or_categorical = "scalar" if "sca" in model_name else "categorical" - server.engines[0].connect(f"{script_dir}/../src/cmake-build-release/Miacis_shogi_{scalar_or_categorical}") - - # 戦績を初期化 - total_num = [0, 0, 0] - - # 棋譜の集合を初期化 - sfens = defaultdict(int) - - # iが偶数のときMiacis先手 - for i in range(args.game_num): - # 対局を実行 - server.game_start() - while not server.game_result.is_gameover(): - time.sleep(1) - - # 重複を確認 - if sfens[server.sfen] > 0: - # 同じ棋譜が2回生成された場合は記録しない - print(f"\n重複:", server.sfen) - else: - # 結果を記録 - result = result_converter[server.game_result] - total_num[result if not server.flip_turn else LOSE - result] += 1 - - sfens[server.sfen] += 1 - - # ここまでの結果を文字列化 - winning_rate = (total_num[WIN] + 0.5 * total_num[DRAW]) / sum(total_num) - elo_rate = calc_elo_rate(winning_rate) - result_str = f"{total_num[WIN]:3d}勝 {total_num[DRAW]:3d}引き分け {total_num[LOSE]:3d}敗 勝率 {100 * winning_rate:4.1f}% 相対レート {elo_rate:6.1f}" - - sys.stdout.write("\033[2K\033[G") - print(result_str, end="\n" if i == args.game_num - 1 else "") - sys.stdout.flush() - - # 手番反転 - server.flip_turn = not server.flip_turn - return elo_rate - - -study = optuna.create_study(direction="maximize", study_name="vsYaneuraOu", storage="sqlite:///optuna_result.db", load_if_exists=True) -study.optimize(objective, n_trials=100) - -server.terminate() - -print(study.best_params) -print(study.best_value) -print(len(study.trials)) diff --git a/setting/reinforcement_learn_settings.txt b/setting/reinforcement_learn_settings.txt index ff306b43..69eb0106 100644 --- a/setting/reinforcement_learn_settings.txt +++ b/setting/reinforcement_learn_settings.txt @@ -1,17 +1,17 @@ learn_rate 0.025 -min_learn_rate 0.0025 momentum 0.9 weight_decay 0.0 policy_loss_coeff 1.0 value_loss_coeff 1.0 lambda 0.75 per_alpha 2.0 +mixup_alpha 0.0 Q_dist_temperature 0.01 Q_dist_lambda 1.0 noise_epsilon 0.25 noise_alpha 0.15 C_PUCT 2.5 -use_fp16 1 +use_fp16 0 draw_turn 320 random_turn 320 batch_size 512 @@ -23,8 +23,8 @@ learn_rate_decay_step2 2000000 learn_rate_decay_step3 2000000 learn_rate_decay_step4 2000000 learn_rate_decay_period 100000 -update_interval 500 -batch_size_per_gen 2 +update_interval 1000 +batch_size_per_gen 1 worker_num_per_thread 64 max_stack_size 1048576 first_wait 1048576 @@ -37,8 +37,10 @@ save_interval 50000 validation_interval 50000 sleep_msec -1 noise_mode 0 +wait_sec_per_load 90 use_sam_optim 0 -clip_grad_norm_ 10 +clip_grad_norm 10 +calibration_kifu_path /root/data/floodgate_kifu/valid # Shogi init_buffer_by_kifu 0 diff --git a/setting/supervised_learn_settings.txt b/setting/supervised_learn_settings.txt index 815adf45..c0c6dea8 100644 --- a/setting/supervised_learn_settings.txt +++ b/setting/supervised_learn_settings.txt @@ -1,7 +1,7 @@ learn_rate 0.025 -min_learn_rate 0.0025 momentum 0.9 weight_decay 0.0001 +mixup_alpha 0.0 policy_loss_coeff 1.0 value_loss_coeff 1.0 data_augmentation 1 @@ -16,7 +16,7 @@ learn_rate_decay_step3 1800000 learn_rate_decay_step4 2400000 learn_rate_decay_period 100000 use_sam_optim 0 -clip_grad_norm_ 10 +clip_grad_norm 10 # Shogi(AobaZero) load_multi_dir 1 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 730a5945..2e24ac61 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,72 +1,50 @@ cmake_minimum_required(VERSION 3.10) project(Miacis) -#共通する設定 +# 共通する設定 set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "-march=native") set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG") +# LibTorch list(APPEND CMAKE_PREFIX_PATH ../../libtorch-1.7.0) find_package(Torch REQUIRED) -#各ゲームで共通する部分のソース -set(SRCS hash_table.cpp - replay_buffer.cpp - game_generator.cpp - neural_network.cpp - neural_network_modules.cpp - learn.cpp - supervised_learn.cpp - reinforcement_learn.cpp - searcher.cpp - searcher_for_play.cpp - searcher_for_mate.cpp - hyperparameter_loader.cpp - segment_tree.cpp - common.cpp - timer.cpp - ) +# TRTorch +set(TRTORCH_INCLUDE ../../trtorch/include) +file(GLOB TRTORCH_LIBRARIES ../../trtorch/lib/*.so) -#将棋用のバイナリをコンパイルするのに必要なソース -set(SHOGI_SRCS shogi/main.cpp - shogi/test.cpp - shogi/bitboard.cpp - shogi/move.cpp - shogi/piece.cpp - shogi/position.cpp - shogi/square.cpp - shogi/interface.cpp - shogi/game.cpp - shogi/book.cpp) +# 各ゲームで共通する部分のソース +file(GLOB SRCS ./*.cpp) -#オセロ用のバイナリをコンパイルするのに必要なソース -set(OTHELLO_SRCS othello/main.cpp - othello/position.cpp - othello/square.cpp - othello/piece.cpp - othello/interface.cpp - othello/game.cpp) +# 将棋用のバイナリをコンパイルするのに必要なソース +file(GLOB SHOGI_SRCS ./shogi/*.cpp) -add_executable(Miacis_shogi_scalar ${SRCS} ${SHOGI_SRCS}) +# オセロ用のバイナリをコンパイルするのに必要なソース +file(GLOB OTHELLO_SRCS ./othello/*.cpp) + +# 実行ファイルの定義 +add_executable(Miacis_shogi_scalar ${SRCS} ${SHOGI_SRCS}) add_executable(Miacis_shogi_categorical ${SRCS} ${SHOGI_SRCS}) -target_compile_definitions(Miacis_shogi_scalar PUBLIC SHOGI) -target_compile_definitions(Miacis_shogi_categorical PUBLIC SHOGI) -target_compile_definitions(Miacis_shogi_categorical PUBLIC USE_CATEGORICAL) +target_compile_definitions(Miacis_shogi_scalar PUBLIC SHOGI) +target_compile_definitions(Miacis_shogi_categorical PUBLIC SHOGI USE_CATEGORICAL) -add_executable(Miacis_othello_scalar ${SRCS} ${OTHELLO_SRCS}) +add_executable(Miacis_othello_scalar ${SRCS} ${OTHELLO_SRCS}) add_executable(Miacis_othello_categorical ${SRCS} ${OTHELLO_SRCS}) -target_compile_definitions(Miacis_othello_scalar PUBLIC OTHELLO) -target_compile_definitions(Miacis_othello_categorical PUBLIC OTHELLO) -target_compile_definitions(Miacis_othello_categorical PUBLIC USE_CATEGORICAL) +target_compile_definitions(Miacis_othello_scalar PUBLIC OTHELLO) +target_compile_definitions(Miacis_othello_categorical PUBLIC OTHELLO USE_CATEGORICAL) + +# foreachで回すためにリスト化 +set(BIN Miacis_shogi_scalar Miacis_shogi_categorical Miacis_othello_scalar Miacis_othello_categorical) if (MSVC) - target_link_libraries(Miacis_shogi_scalar "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_shogi_categorical "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_scalar "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_categorical "${TORCH_LIBRARIES}") -else() - target_link_libraries(Miacis_shogi_scalar pthread stdc++fs "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_shogi_categorical pthread stdc++fs "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_scalar pthread stdc++fs "${TORCH_LIBRARIES}") - target_link_libraries(Miacis_othello_categorical pthread stdc++fs "${TORCH_LIBRARIES}") -endif() \ No newline at end of file + foreach (bin IN ITEMS ${BIN}) + target_link_libraries(${bin} ${TORCH_LIBRARIES} ${TRTORCH_LIBRARIES}) + target_include_directories(${bin} PRIVATE ${TRTORCH_INCLUDE}) + endforeach () +else () + foreach (bin IN ITEMS ${BIN}) + target_link_libraries(${bin} pthread stdc++fs ${TORCH_LIBRARIES} ${TRTORCH_LIBRARIES}) + target_include_directories(${bin} PRIVATE ${TRTORCH_INCLUDE}) + endforeach () +endif () \ No newline at end of file diff --git a/src/dataset.cpp b/src/dataset.cpp new file mode 100644 index 00000000..fa519c9e --- /dev/null +++ b/src/dataset.cpp @@ -0,0 +1,31 @@ +#include "dataset.hpp" +#include "include_switch.hpp" +#include "learn.hpp" +#include "neural_network.hpp" + +CalibrationDataset::CalibrationDataset(const std::string& root, int64_t data_num) { + std::vector data = loadData(root, false, 3200); + Position pos; + + for (const LearningData& datum : data) { + pos.fromStr(datum.position_str); + + //入力 + std::vector inputs = pos.makeFeature(); + data_.push_back(torch::tensor(inputs).view({ 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH })); + + //targetの方は使わないのでダミーの適当な値を入れる + targets_.push_back(torch::tensor({ 0 })); + + //全データだと多いので、先頭からいくつかのみを用いる + if (data_.size() >= data_num) { + break; + } + } +} + +torch::data::Example<> CalibrationDataset::get(size_t index) { + return { data_[index].clone().to(torch::kCUDA), targets_[index].clone().to(torch::kCUDA) }; +} + +c10::optional CalibrationDataset::size() const { return data_.size(); } \ No newline at end of file diff --git a/src/dataset.hpp b/src/dataset.hpp new file mode 100644 index 00000000..8e74a481 --- /dev/null +++ b/src/dataset.hpp @@ -0,0 +1,18 @@ +#ifndef MIACIS_DATASET_HPP +#define MIACIS_DATASET_HPP + +#include + +class CalibrationDataset : public torch::data::datasets::Dataset { +public: + explicit CalibrationDataset(const std::string& root, int64_t data_num); + + torch::data::Example<> get(size_t index) override; + + c10::optional size() const override; + +private: + std::vector data_, targets_; +}; + +#endif //MIACIS_DATASET_HPP \ No newline at end of file diff --git a/src/game_generator.cpp b/src/game_generator.cpp index c6a2e238..dd427406 100644 --- a/src/game_generator.cpp +++ b/src/game_generator.cpp @@ -1,7 +1,13 @@ #include "game_generator.hpp" #include +#include void GameGenerator::genGames() { + //まず最初のロード + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, + search_options_.calibration_kifu_path, search_options_.use_fp16); + need_load = false; + //生成スレッドを生成 std::vector threads; for (int64_t i = 0; i < search_options_.thread_num_per_gpu; i++) { @@ -15,6 +21,9 @@ void GameGenerator::genGames() { } void GameGenerator::genSlave(int64_t thread_id) { + //スレッドごとにCUDAをセットしておかないとエラーが出る + trtorch::set_device(gpu_id_); + //Workerを準備 std::vector> workers(worker_num_); for (int32_t i = 0; i < worker_num_; i++) { @@ -48,6 +57,16 @@ void GameGenerator::genSlave(int64_t thread_id) { for (int32_t i = 0; i < worker_num_; i++) { workers[i]->backup(); } + + //パラメータをロードし直す必要があれば実行 + //全スレッドが読み込もうとする必要はないので代表してid=0のスレッドに任せる + if (need_load && thread_id == 0) { + gpu_mutex.lock(); + neural_network_.load(DEFAULT_MODEL_NAME, gpu_id_, worker_num_ * search_options_.search_batch_size, + search_options_.calibration_kifu_path, search_options_.use_fp16); + need_load = false; + gpu_mutex.unlock(); + } } } @@ -77,9 +96,10 @@ void GameGenerator::evalWithGPU(int64_t thread_id) { //順伝播計算 gpu_mutex.lock(); torch::NoGradGuard no_grad_guard; - std::pair, std::vector> result = - neural_network_->policyAndValueBatch(gpu_queues_[thread_id].inputs); + std::tuple output = neural_network_.infer(gpu_queues_[thread_id].inputs); gpu_mutex.unlock(); + std::pair, std::vector> result = neural_network_.decode(output); + const std::vector& policies = result.first; const std::vector& values = result.second; @@ -191,10 +211,9 @@ OneTurnElement GenerateWorker::resultForCurrPos() { //選択回数が0ならMIN_SCORE //選択回数が0ではないのに未展開なら詰み探索が詰みを発見したということなのでMAX_SCORE //その他は普通に計算 - Q_dist[i] = - (N[i] == 0 ? MIN_SCORE - : root_node.child_indices[i] == HashTable::NOT_EXPANDED ? MAX_SCORE - : hash_table_.expQfromNext(root_node, i)); + Q_dist[i] = (N[i] == 0 ? MIN_SCORE + : root_node.child_indices[i] == HashTable::NOT_EXPANDED ? MAX_SCORE + : hash_table_.expQfromNext(root_node, i)); } Q_dist = softmax(Q_dist, std::max(search_options_.temperature_x1000 / 1000.0f, 1e-4f)); diff --git a/src/game_generator.hpp b/src/game_generator.hpp index 9b900118..cfd96670 100644 --- a/src/game_generator.hpp +++ b/src/game_generator.hpp @@ -2,6 +2,7 @@ #define MIACIS_GAME_GENERATOR_HPP #include "game.hpp" +#include "infer_model.hpp" #include "replay_buffer.hpp" #include "search_options.hpp" #include "searcher.hpp" @@ -16,20 +17,22 @@ class GameGenerator { public: GameGenerator(const SearchOptions& search_options, int64_t worker_num, float Q_dist_lambda, int64_t noise_mode, - float noise_epsilon, float noise_alpha, ReplayBuffer& rb, NeuralNetwork nn) + float noise_epsilon, float noise_alpha, ReplayBuffer& rb, int64_t gpu_id) : stop_signal(false), search_options_(search_options), worker_num_(worker_num), Q_dist_lambda_(Q_dist_lambda), noise_mode_(noise_mode), noise_epsilon_(noise_epsilon), noise_alpha_(noise_alpha), replay_buffer_(rb), - neural_network_(std::move(nn)), gpu_queues_(search_options_.thread_num_per_gpu) { - neural_network_->eval(); + neural_network_(), gpu_id_(gpu_id), gpu_queues_(search_options_.thread_num_per_gpu) { assert(0 <= noise_mode_ && noise_mode_ < NOISE_MODE_SIZE); }; //生成してリプレイバッファに送り続ける関数 void genGames(); - //排他制御用のmutex。AlphaZeroTrainerから触れるようにpublicに置いている + //排他制御用のmutex。強化学習時に外部からアクセスできるようpublicに置いている std::mutex gpu_mutex; + //評価パラメータの読み込みが必要かどうかのシグナル + bool need_load; + //停止信号。止めたいときは外部からこれをtrueにする bool stop_signal; @@ -70,7 +73,10 @@ class GameGenerator { ReplayBuffer& replay_buffer_; //局面評価に用いるネットワーク - NeuralNetwork neural_network_; + InferModel neural_network_; + + //CUDAがスレッドごとに紐付くのでgpu_id_を明に保持する必要がある + int64_t gpu_id_; //評価要求を受け付けるQueue std::vector gpu_queues_; diff --git a/src/infer_model.cpp b/src/infer_model.cpp new file mode 100644 index 00000000..0005d369 --- /dev/null +++ b/src/infer_model.cpp @@ -0,0 +1,217 @@ +#include "infer_model.hpp" +#include "common.hpp" +#include "dataset.hpp" +#include "include_switch.hpp" +#include +#include +#include + +void InferModel::load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, + const std::string& calibration_kifu_path, bool use_fp16) { + //マルチGPU環境で同時にloadすると時々Segmentation Faultが発生するので排他制御を入れる + static std::mutex load_mutex; + std::lock_guard guard(load_mutex); + + torch::jit::Module module = torch::jit::load(model_path); + device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); + module.to(device_); + module.eval(); + + std::vector in_min = { 1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + std::vector in_opt = { opt_batch_size, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + std::vector in_max = { opt_batch_size * 2, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }; + + use_fp16_ = use_fp16; + if (use_fp16_) { + trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); + trtorch::CompileSpec info({ range }); + info.op_precision = torch::kHalf; + info.device.gpu_id = gpu_id; + module_ = trtorch::CompileGraph(module, info); + } else { + using namespace torch::data; + auto dataset = CalibrationDataset(calibration_kifu_path, opt_batch_size * 2).map(transforms::Stack<>()); + auto dataloader = make_data_loader(std::move(dataset), DataLoaderOptions().batch_size(opt_batch_size).workers(1)); + + const std::string name = "calibration_cache_file.txt"; + auto calibrator = trtorch::ptq::make_int8_calibrator(std::move(dataloader), name, false); + + trtorch::CompileSpec::InputRange range(in_min, in_opt, in_max); + trtorch::CompileSpec info({ range }); + info.op_precision = torch::kI8; + info.device.gpu_id = gpu_id; + info.ptq_calibrator = calibrator; + info.workspace_size = (1ull << 29); + info.max_batch_size = opt_batch_size * 2; + + module_ = trtorch::CompileGraph(module, info); + } +} + +std::pair, std::vector> InferModel::policyAndValueBatch(const std::vector& inputs) { + return decode(infer(inputs)); +} + +std::tuple InferModel::infer(const std::vector& inputs) { + torch::Tensor x = torch::tensor(inputs).to(device_); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + if (use_fp16_) { + x = x.to(torch::kFloat16); + } + auto out = module_.forward({ x }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + //CPUに持ってくる + policy = policy.cpu(); + + //valueはcategoricalのときだけはsoftmaxをかけてからcpuへ +#ifdef USE_CATEGORICAL + value = torch::softmax(value, 1).cpu(); +#else + value = value.cpu(); +#endif + + return std::make_tuple(policy, value); +} + +std::pair, std::vector> +InferModel::decode(const std::tuple& output) const { + const auto& [policy, value] = output; + uint64_t batch_size = policy.size(0); + + std::vector policies(batch_size); + std::vector values(batch_size); + + if (use_fp16_) { + torch::Half* p = policy.data_ptr(); + for (uint64_t i = 0; i < batch_size; i++) { + policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); + } + } else { + float* p = policy.data_ptr(); + for (uint64_t i = 0; i < batch_size; i++) { + policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); + } + } + +#ifdef USE_CATEGORICAL + //valueの方はfp16化してもなぜかHalfではなくFloatとして返ってくる + //ひょっとしたらTRTorchのバグかも + float* value_p = value.data_ptr(); + for (uint64_t i = 0; i < batch_size; i++) { + std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); + } +#else + std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); +#endif + return std::make_pair(policies, values); +} + +std::array InferModel::validLoss(const std::vector& data) { +#ifdef USE_CATEGORICAL + Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + if (data[i].value != 0 && data[i].value != BIN_SIZE - 1) { + std::cerr << "Categoricalの検証データは現状のところValueが-1 or 1でないといけない" << std::endl; + std::exit(1); + } + value_teachers.push_back(data[i].value == 0 ? MIN_SCORE : MAX_SCORE); + } + + torch::Tensor x = torch::tensor(inputs).to(device_); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + auto out = module_.forward({ x }); + auto tuple = out.toTuple(); + torch::Tensor policy_logit = tuple->elements()[0].toTensor(); + torch::Tensor value_logit = tuple->elements()[1].toTensor(); + + torch::Tensor logits = policy_logit.view({ -1, POLICY_DIM }); + + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); + + //Valueの分布を取得 + torch::Tensor value_cat = torch::softmax(value_logit, 1); + + //i番目の要素が示す値はMIN_SCORE + (i + 0.5) * VALUE_WIDTH + std::vector each_value; + for (int64_t i = 0; i < BIN_SIZE; i++) { + each_value.emplace_back(MIN_SCORE + (i + 0.5) * VALUE_WIDTH); + } + torch::Tensor each_value_tensor = torch::tensor(each_value).to(device_); + + //Categorical分布と内積を取ることで期待値を求める + torch::Tensor value = (each_value_tensor * value_cat).sum(1); + + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif + return { policy_loss, value_loss }; + +#else + static Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + value_teachers.push_back(data[i].value); + } + + torch::Tensor x = torch::tensor(inputs).to(device_); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + auto out = module_.forward({ x }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + torch::Tensor policy_logits = policy.view({ -1, POLICY_DIM }); + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(policy_logits, 1), 1, false); + + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + value = value.view(-1); +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif + + return { policy_loss, value_loss }; +#endif +} \ No newline at end of file diff --git a/src/infer_model.hpp b/src/infer_model.hpp new file mode 100644 index 00000000..e488125d --- /dev/null +++ b/src/infer_model.hpp @@ -0,0 +1,24 @@ +#ifndef INFER_MODEL_HPP +#define INFER_MODEL_HPP + +#include "neural_network.hpp" +#include + +class InferModel { +public: + InferModel() : device_(torch::kCPU) {} + void load(const std::string& model_path, int64_t gpu_id, int64_t opt_batch_size, const std::string& calibration_kifu_path, + bool use_fp16); + std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); + std::tuple infer(const std::vector& inputs); + std::pair, std::vector> + decode(const std::tuple& output) const; + std::array validLoss(const std::vector& data); + +private: + torch::jit::Module module_; + torch::Device device_; + bool use_fp16_; +}; + +#endif \ No newline at end of file diff --git a/src/learn.cpp b/src/learn.cpp index 2f4deb6e..1ec134f8 100644 --- a/src/learn.cpp +++ b/src/learn.cpp @@ -6,7 +6,8 @@ #include #include -std::array validation(NeuralNetwork nn, const std::vector& valid_data, uint64_t batch_size) { +template +std::array validation(ModelType& model, const std::vector& valid_data, uint64_t batch_size) { torch::NoGradGuard no_grad_guard; std::array losses{}; for (uint64_t index = 0; index < valid_data.size();) { @@ -15,7 +16,7 @@ std::array validation(NeuralNetwork nn, const std::vector< curr_data.push_back(valid_data[index++]); } - std::array loss = nn->validLoss(curr_data); + std::array loss = model.validLoss(curr_data); for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { losses[i] += loss[i].sum().item(); } @@ -29,6 +30,9 @@ std::array validation(NeuralNetwork nn, const std::vector< return losses; } +template std::array validation(InferModel& model, const std::vector& valid_data, + uint64_t batch_size); + std::vector loadData(const std::string& file_path, bool data_augmentation, float rate_threshold) { //棋譜を読み込めるだけ読み込む std::vector games = loadGames(file_path, rate_threshold); @@ -59,12 +63,6 @@ std::vector loadData(const std::string& file_path, bool data_augme return data_buffer; } -void initParams() { - NeuralNetwork nn; - torch::save(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - std::cout << "初期化したパラメータを" << NeuralNetworkImpl::DEFAULT_MODEL_NAME << "に出力" << std::endl; -} - LearnManager::LearnManager(const std::string& learn_name) { assert(learn_name == "supervised" || learn_name == "reinforcement"); HyperparameterLoader settings(learn_name + "_learn_settings.txt"); @@ -77,13 +75,6 @@ LearnManager::LearnManager(const std::string& learn_name) { coefficients_[i] = settings.get(LOSS_TYPE_NAME[i] + "_loss_coeff"); } - //optimizerの準備 - learn_rate_ = settings.get("learn_rate"); - torch::optim::SGDOptions sgd_option(learn_rate_); - sgd_option.momentum(settings.get("momentum")); - sgd_option.weight_decay(settings.get("weight_decay")); - optimizer_ = std::make_unique(neural_network->parameters(), sgd_option); - //学習推移のログファイル train_log_.open(learn_name + "_train_log.txt"); valid_log_.open(learn_name + "_valid_log.txt"); @@ -94,11 +85,18 @@ LearnManager::LearnManager(const std::string& learn_name) { } //評価関数読み込み - torch::load(neural_network, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - neural_network->setGPU(0); + neural_network.load(DEFAULT_MODEL_NAME, 0); //学習前のパラメータを出力 - torch::save(neural_network, NeuralNetworkImpl::MODEL_PREFIX + "_before_learn.model"); + neural_network.save(MODEL_PREFIX + "_before_learn.model"); + + //optimizerの準備 + learn_rate_ = settings.get("learn_rate"); + torch::optim::SGDOptions sgd_option(learn_rate_); + sgd_option.momentum(settings.get("momentum")); + sgd_option.weight_decay(settings.get("weight_decay")); + std::vector parameters; + optimizer_ = std::make_unique(neural_network.parameters(), sgd_option); //パラメータの保存間隔 save_interval_ = settings.get("save_interval"); @@ -115,13 +113,15 @@ LearnManager::LearnManager(const std::string& learn_name) { learn_rate_decay_step3_ = settings.get("learn_rate_decay_step3"); learn_rate_decay_step4_ = settings.get("learn_rate_decay_step4"); learn_rate_decay_period_ = settings.get("learn_rate_decay_period"); - min_learn_rate_ = settings.get("min_learn_rate"); + + //mixupの混合比を決定する値 + mixup_alpha_ = settings.get("mixup_alpha"); //SAM use_sam_optim_ = settings.get("use_sam_optim"); //clip_grad_norm_の値 - clip_grad_norm_ = settings.get("clip_grad_norm_"); + clip_grad_norm_ = settings.get("clip_grad_norm"); //学習開始時間の設定 timer_.start(); @@ -133,7 +133,8 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d //学習 optimizer_->zero_grad(); - std::array loss = neural_network->loss(curr_data); + std::array loss = + (mixup_alpha_ == 0 ? neural_network.loss(curr_data) : neural_network.mixUpLoss(curr_data, mixup_alpha_)); torch::Tensor loss_sum = torch::zeros({ batch_size }); for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { loss_sum += coefficients_[i] * loss[i].cpu(); @@ -177,7 +178,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d optimizer_->zero_grad(); //再計算 - loss = neural_network->loss(curr_data); + loss = neural_network.loss(curr_data); loss_sum = torch::zeros({ batch_size }); for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { loss_sum += coefficients_[i] * loss[i].cpu(); @@ -200,7 +201,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d } //勾配をクリップ - torch::nn::utils::clip_grad_norm_(neural_network->parameters(), clip_grad_norm_); + torch::nn::utils::clip_grad_norm_(neural_network.parameters(), clip_grad_norm_); //パラメータを更新 optimizer_->step(); @@ -216,9 +217,9 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d if (stem_num % validation_interval_ == 0) { //validation_lossを計算 - neural_network->eval(); + neural_network.eval(); std::array valid_loss = validation(neural_network, valid_data_, batch_size); - neural_network->train(); + neural_network.train(); float sum_loss = 0; for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { sum_loss += coefficients_[i] * valid_loss[i]; @@ -234,7 +235,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d //パラメータをステップ付きで保存 if (stem_num % save_interval_ == 0) { - torch::save(neural_network, NeuralNetworkImpl::MODEL_PREFIX + "_" + std::to_string(stem_num) + ".model"); + neural_network.save(MODEL_PREFIX + "_" + std::to_string(stem_num) + ".model"); } //学習率の変化はoptimizer_->defaults();を使えそうな気がする @@ -248,7 +249,7 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d //Cosine annealing int64_t curr_step = stem_num % learn_rate_decay_period_; (dynamic_cast(optimizer_->param_groups().front().options())).lr() = - min_learn_rate_ + 0.5 * (learn_rate_ - min_learn_rate_) * (1 + cos(acos(-1) * curr_step / learn_rate_decay_period_)); + 0.5 * learn_rate_ * (1 + cos(acos(-1) * curr_step / learn_rate_decay_period_)); } else if (learn_rate_decay_mode_ == 3) { //指数的な減衰 if (stem_num % learn_rate_decay_period_ == 0) { @@ -256,5 +257,5 @@ torch::Tensor LearnManager::learnOneStep(const std::vector& curr_d } } - return loss_sum; + return loss_sum.detach(); } \ No newline at end of file diff --git a/src/learn.hpp b/src/learn.hpp index d3749c0b..178e9f73 100644 --- a/src/learn.hpp +++ b/src/learn.hpp @@ -1,6 +1,8 @@ #ifndef MIACIS_LEARN_HPP #define MIACIS_LEARN_HPP +#include "infer_model.hpp" +#include "learning_model.hpp" #include "neural_network.hpp" #include "timer.hpp" @@ -54,7 +56,7 @@ class LearnManager { torch::Tensor learnOneStep(const std::vector& curr_data, int64_t stem_num); //学習するモデル。強化学習時に定期的な同期を挟むためにpublicに置く - NeuralNetwork neural_network; + LearningModel neural_network; private: //Optimizer @@ -93,13 +95,13 @@ class LearnManager { //その他周期的なスケジューリングの周期 int64_t learn_rate_decay_period_; - //Cosine annealing時の最小値 - float min_learn_rate_; + //mixupを行う場合の混合比を決定する値 + float mixup_alpha_; //Sharpness-Aware Minimizationを行うかどうか bool use_sam_optim_; - //clip_grad_norm_ + //勾配クリッピングの値 float clip_grad_norm_; }; @@ -107,10 +109,8 @@ class LearnManager { std::vector loadData(const std::string& file_path, bool data_augmentation, float rate_threshold); //validationを行う関数 -std::array validation(NeuralNetwork nn, const std::vector& valid_data, uint64_t batch_size); - -//パラメータを初期化 -void initParams(); +template +std::array validation(ModelType& model, const std::vector& valid_data, uint64_t batch_size); //棋譜からの教師あり学習 void supervisedLearn(); diff --git a/src/learning_model.cpp b/src/learning_model.cpp new file mode 100644 index 00000000..df0347c6 --- /dev/null +++ b/src/learning_model.cpp @@ -0,0 +1,204 @@ +#include "learning_model.hpp" +#include "common.hpp" +#include "include_switch.hpp" +#include + +void LearningModel::load(const std::string& model_path, int64_t gpu_id) { + module_ = torch::jit::load(model_path); + device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); + module_.to(device_); +} + +void LearningModel::save(const std::string& model_path) { module_.save(model_path); } + +torch::Tensor LearningModel::encode(const std::vector& inputs) const { + torch::Tensor x = torch::tensor(inputs).to(device_); + x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); + return x; +} + +std::array LearningModel::loss(const std::vector& data) { + static Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + value_teachers.push_back(data[i].value); + } + + torch::Tensor input_tensor = encode(inputs); + auto out = module_.forward({ input_tensor }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + torch::Tensor policy_logits = policy.view({ -1, POLICY_DIM }); + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(policy_logits, 1), 1, false); + +#ifdef USE_CATEGORICAL + torch::Tensor categorical_target = torch::tensor(value_teachers).to(device_); + torch::Tensor value_loss = torch::nll_loss(torch::log_softmax(value, 1), categorical_target); +#else + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + value = value.view(-1); +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif +#endif + + return { policy_loss, value_loss }; +} + +std::array LearningModel::validLoss(const std::vector& data) { +#ifdef USE_CATEGORICAL + Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + if (data[i].value != 0 && data[i].value != BIN_SIZE - 1) { + std::cerr << "Categoricalの検証データは現状のところValueが-1 or 1でないといけない" << std::endl; + std::exit(1); + } + value_teachers.push_back(data[i].value == 0 ? MIN_SCORE : MAX_SCORE); + } + + torch::Tensor input_tensor = encode(inputs); + auto out = module_.forward({ input_tensor }); + auto tuple = out.toTuple(); + torch::Tensor policy_logit = tuple->elements()[0].toTensor(); + torch::Tensor value_logit = tuple->elements()[1].toTensor(); + + torch::Tensor logits = policy_logit.view({ -1, POLICY_DIM }); + + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); + + //Valueの分布を取得 + torch::Tensor value_cat = torch::softmax(value_logit, 1); + + //i番目の要素が示す値はMIN_SCORE + (i + 0.5) * VALUE_WIDTH + std::vector each_value; + for (int64_t i = 0; i < BIN_SIZE; i++) { + each_value.emplace_back(MIN_SCORE + (i + 0.5) * VALUE_WIDTH); + } + torch::Tensor each_value_tensor = torch::tensor(each_value).to(device_); + + //Categorical分布と内積を取ることで期待値を求める + torch::Tensor value = (each_value_tensor * value_cat).sum(1); + + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif + + return { policy_loss, value_loss }; +#else + //Scalarモデルの場合はloss関数と同じ + return loss(data); +#endif +} + +std::array LearningModel::mixUpLoss(const std::vector& data, float alpha) { + std::gamma_distribution gamma_dist(alpha); + float gamma1 = gamma_dist(engine), gamma2 = gamma_dist(engine); + float beta = gamma1 / (gamma1 + gamma2); + + static Position pos; + std::vector inputs; + std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); + std::vector value_teachers; + + for (uint64_t i = 0; i < data.size(); i++) { + pos.fromStr(data[i].position_str); + + //入力 + const std::vector feature = pos.makeFeature(); + inputs.insert(inputs.end(), feature.begin(), feature.end()); + + //policyの教師信号 + for (const std::pair& e : data[i].policy) { + policy_teachers[i * POLICY_DIM + e.first] = e.second; + } + + //valueの教師信号 + value_teachers.push_back(data[i].value); + } + + torch::Tensor input_tensor = encode(inputs); + + //入力時のmixup + input_tensor = beta * input_tensor + (1 - beta) * input_tensor.roll(1, 0); + + auto out = module_.forward({ input_tensor }); + auto tuple = out.toTuple(); + torch::Tensor policy = tuple->elements()[0].toTensor(); + torch::Tensor value = tuple->elements()[1].toTensor(); + + torch::Tensor policy_logits = policy.view({ -1, POLICY_DIM }); + torch::Tensor policy_target = torch::tensor(policy_teachers).to(device_).view({ -1, POLICY_DIM }); + + //教師データのmixup + policy_target = beta * policy_target + (1 - beta) * policy_target.roll(1, 0); + + torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(policy_logits, 1), 1, false); + +#ifdef USE_CATEGORICAL + torch::Tensor categorical_target = torch::tensor(value_teachers).to(device_); + torch::Tensor value_loss1 = torch::nll_loss(torch::log_softmax(value, 1), categorical_target); + torch::Tensor value_loss2 = torch::nll_loss(torch::log_softmax(value, 1), categorical_target.roll(1, 0)); + torch::Tensor value_loss = beta * value_loss1 + (1 - beta) * value_loss2; +#else + torch::Tensor value_t = torch::tensor(value_teachers).to(device_); + value_t = beta * value_t + (1 - beta) * value_t.roll(1, 0); + value = value.view(-1); +#ifdef USE_SIGMOID + torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#else + torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +#endif +#endif + + return { policy_loss, value_loss }; +} + +std::vector LearningModel::parameters() { + std::vector parameters; + for (auto p : module_.parameters()) { + parameters.push_back(p); + } + return parameters; +} diff --git a/src/learning_model.hpp b/src/learning_model.hpp new file mode 100644 index 00000000..52e72937 --- /dev/null +++ b/src/learning_model.hpp @@ -0,0 +1,29 @@ +#ifndef LEARNING_MODEL_HPP +#define LEARNING_MODEL_HPP + +#include "neural_network.hpp" +#include + +class LearningModel { +public: + LearningModel() : device_(torch::kCPU) {} + void load(const std::string& model_path, int64_t gpu_id); + void save(const std::string& model_path); + torch::Tensor encode(const std::vector& inputs) const; + std::array loss(const std::vector& data); + std::array validLoss(const std::vector& data); + + //MixUpを行って損失を返す関数 + std::array mixUpLoss(const std::vector& data, float alpha); + + std::vector parameters(); + + void train() { module_.train(); } + void eval() { module_.eval(); } + +private: + torch::jit::Module module_; + torch::Device device_; +}; + +#endif \ No newline at end of file diff --git a/src/neural_network.cpp b/src/neural_network.cpp index 7e372e2e..53362640 100644 --- a/src/neural_network.cpp +++ b/src/neural_network.cpp @@ -5,246 +5,24 @@ //ネットワークの設定 #ifdef SHOGI static constexpr int32_t BLOCK_NUM = 10; -static constexpr int32_t CHANNEL_NUM = 128; +static constexpr int32_t CHANNEL_NUM = 256; #elif defined(OTHELLO) static constexpr int32_t BLOCK_NUM = 5; static constexpr int32_t CHANNEL_NUM = 64; #endif -static constexpr int32_t KERNEL_SIZE = 3; -static constexpr int32_t REDUCTION = 8; -static constexpr int32_t VALUE_HIDDEN_NUM = 256; #ifdef USE_CATEGORICAL -const std::string NeuralNetworkImpl::MODEL_PREFIX = "cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); +#ifdef SHOGI +const std::string MODEL_PREFIX = "shogi_cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); #else -const std::string NeuralNetworkImpl::MODEL_PREFIX = "sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); -#endif -//デフォルトで読み書きするファイル名 -const std::string NeuralNetworkImpl::DEFAULT_MODEL_NAME = NeuralNetworkImpl::MODEL_PREFIX + ".model"; - -NeuralNetworkImpl::NeuralNetworkImpl() : device_(torch::kCUDA), fp16_(false), state_blocks_(BLOCK_NUM, nullptr) { - state_first_conv_and_norm_ = - register_module("state_first_conv_and_norm_", Conv2DwithBatchNorm(INPUT_CHANNEL_NUM, CHANNEL_NUM, KERNEL_SIZE)); - for (int32_t i = 0; i < BLOCK_NUM; i++) { - state_blocks_[i] = - register_module("state_blocks_" + std::to_string(i), ResidualBlock(CHANNEL_NUM, KERNEL_SIZE, REDUCTION)); - } -#ifdef REPRESENTATION_DROPOUT - representation_dropout_ = register_module("representation_dropout_", torch::nn::Dropout2d()); -#endif - policy_conv_ = register_module( - "policy_conv_", torch::nn::Conv2d(torch::nn::Conv2dOptions(CHANNEL_NUM, POLICY_CHANNEL_NUM, 1).padding(0).bias(true))); - value_conv_and_norm_ = register_module("value_conv_and_norm_", Conv2DwithBatchNorm(CHANNEL_NUM, CHANNEL_NUM, 1)); - value_linear0_ = register_module("value_linear0_", torch::nn::Linear(SQUARE_NUM * CHANNEL_NUM, VALUE_HIDDEN_NUM)); - value_linear1_ = register_module("value_linear1_", torch::nn::Linear(VALUE_HIDDEN_NUM, BIN_SIZE)); -} - -torch::Tensor NeuralNetworkImpl::encode(const std::vector& inputs) { - torch::Tensor x = (fp16_ ? torch::tensor(inputs).to(device_, torch::kHalf) : torch::tensor(inputs).to(device_)); - x = x.view({ -1, INPUT_CHANNEL_NUM, BOARD_WIDTH, BOARD_WIDTH }); - x = state_first_conv_and_norm_->forward(x); - x = activation(x); - - for (ResidualBlock& block : state_blocks_) { - x = block->forward(x); - } - -#ifdef REPRESENTATION_DROPOUT - x = representation_dropout_->forward(x); +const std::string MODEL_PREFIX = "othello_cat_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); #endif - return x; -} - -std::pair NeuralNetworkImpl::decode(const torch::Tensor& representation) { - //policy - torch::Tensor policy = policy_conv_->forward(representation); - - //value - torch::Tensor value = value_conv_and_norm_->forward(representation); - value = activation(value); - value = value.view({ -1, SQUARE_NUM * CHANNEL_NUM }); - value = value_linear0_->forward(value); - value = activation(value); - value = value_linear1_->forward(value); - -#ifndef USE_CATEGORICAL -#ifdef USE_SIGMOID - value = torch::sigmoid(value); #else - value = torch::tanh(value); -#endif -#endif - - return { policy, value }; -} - -std::pair NeuralNetworkImpl::forward(const std::vector& inputs) { - return decode(encode(inputs)); -} - -std::pair, std::vector> -NeuralNetworkImpl::policyAndValueBatch(const std::vector& inputs) { - std::pair y = forward(inputs); - - uint64_t batch_size = inputs.size() / (SQUARE_NUM * INPUT_CHANNEL_NUM); - - std::vector policies(batch_size); - std::vector values(batch_size); - - //CPUに持ってくる - torch::Tensor policy = y.first.cpu(); - if (fp16_) { - torch::Half* p = policy.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); - } - } else { - float* p = policy.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - policies[i].assign(p + i * POLICY_DIM, p + (i + 1) * POLICY_DIM); - } - } - -#ifdef USE_CATEGORICAL - torch::Tensor value = torch::softmax(y.second, 1).cpu(); - if (fp16_) { - torch::Half* value_p = value.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); - } - } else { - float* value_p = value.data_ptr(); - for (uint64_t i = 0; i < batch_size; i++) { - std::copy(value_p + i * BIN_SIZE, value_p + (i + 1) * BIN_SIZE, values[i].begin()); - } - } -#else - //CPUに持ってくる - torch::Tensor value = y.second.cpu(); - if (fp16_) { - std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); - } else { - std::copy(value.data_ptr(), value.data_ptr() + batch_size, values.begin()); - } -#endif - return { policies, values }; -} - -std::array NeuralNetworkImpl::loss(const std::vector& data) { - static Position pos; - std::vector inputs; - std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); - std::vector value_teachers; - - for (uint64_t i = 0; i < data.size(); i++) { - pos.fromStr(data[i].position_str); - - //入力 - const std::vector feature = pos.makeFeature(); - inputs.insert(inputs.end(), feature.begin(), feature.end()); - - //policyの教師信号 - for (const std::pair& e : data[i].policy) { - policy_teachers[i * POLICY_DIM + e.first] = e.second; - } - - //valueの教師信号 - value_teachers.push_back(data[i].value); - } - - std::pair y = forward(inputs); - torch::Tensor logits = y.first.view({ -1, POLICY_DIM }); - - torch::Tensor policy_target = - (fp16_ ? torch::tensor(policy_teachers).to(device_, torch::kHalf) : torch::tensor(policy_teachers).to(device_)) - .view({ -1, POLICY_DIM }); - - torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); - -#ifdef USE_CATEGORICAL - torch::Tensor categorical_target = torch::tensor(value_teachers).to(device_); - torch::Tensor value_loss = torch::nll_loss(torch::log_softmax(y.second, 1), categorical_target); -#else - torch::Tensor value_t = - (fp16_ ? torch::tensor(value_teachers).to(device_, torch::kHalf) : torch::tensor(value_teachers).to(device_)); - torch::Tensor value = y.second.view(-1); -#ifdef USE_SIGMOID - torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); -#else - torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); -#endif -#endif - - return { policy_loss, value_loss }; -} - -std::array NeuralNetworkImpl::validLoss(const std::vector& data) { -#ifdef USE_CATEGORICAL - Position pos; - std::vector inputs; - std::vector policy_teachers(data.size() * POLICY_DIM, 0.0); - std::vector value_teachers; - - for (uint64_t i = 0; i < data.size(); i++) { - pos.fromStr(data[i].position_str); - - //入力 - const std::vector feature = pos.makeFeature(); - inputs.insert(inputs.end(), feature.begin(), feature.end()); - - //policyの教師信号 - for (const std::pair& e : data[i].policy) { - policy_teachers[i * POLICY_DIM + e.first] = e.second; - } - - //valueの教師信号 - if (data[i].value != 0 && data[i].value != BIN_SIZE - 1) { - std::cerr << "Categoricalの検証データは現状のところValueが-1 or 1でないといけない" << std::endl; - std::exit(1); - } - value_teachers.push_back(data[i].value == 0 ? MIN_SCORE : MAX_SCORE); - } - - std::pair y = forward(inputs); - torch::Tensor logits = y.first.view({ -1, POLICY_DIM }); - - torch::Tensor policy_target = - (fp16_ ? torch::tensor(policy_teachers).to(device_, torch::kHalf) : torch::tensor(policy_teachers).to(device_)) - .view({ -1, POLICY_DIM }); - - torch::Tensor policy_loss = torch::sum(-policy_target * torch::log_softmax(logits, 1), 1, false); - - //Valueの分布を取得 - torch::Tensor value_cat = torch::softmax(y.second, 1); - - //i番目の要素が示す値はMIN_SCORE + (i + 0.5) * VALUE_WIDTH - std::vector each_value; - for (int64_t i = 0; i < BIN_SIZE; i++) { - each_value.emplace_back(MIN_SCORE + (i + 0.5) * VALUE_WIDTH); - } - torch::Tensor each_value_tensor = torch::tensor(each_value).to(device_); - - //Categorical分布と内積を取ることで期待値を求める - torch::Tensor value = (each_value_tensor * value_cat).sum(1); - - torch::Tensor value_t = - (fp16_ ? torch::tensor(value_teachers).to(device_, torch::kHalf) : torch::tensor(value_teachers).to(device_)); - -#ifdef USE_SIGMOID - torch::Tensor value_loss = torch::binary_cross_entropy(value, value_t, {}, torch::Reduction::None); +#ifdef SHOGI +const std::string MODEL_PREFIX = "shogi_sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); #else - torch::Tensor value_loss = torch::mse_loss(value, value_t, torch::Reduction::None); +const std::string MODEL_PREFIX = "othello_sca_bl" + std::to_string(BLOCK_NUM) + "_ch" + std::to_string(CHANNEL_NUM); #endif - - return { policy_loss, value_loss }; -#else - //Scalarモデルの場合はloss関数と同じ - return loss(data); #endif -} - -void NeuralNetworkImpl::setGPU(int16_t gpu_id, bool fp16) { - device_ = (torch::cuda::is_available() ? torch::Device(torch::kCUDA, gpu_id) : torch::Device(torch::kCPU)); - fp16_ = fp16; - (fp16_ ? to(device_, torch::kHalf) : to(device_, torch::kFloat)); -} \ No newline at end of file +//デフォルトで読み書きするファイル名 +const std::string DEFAULT_MODEL_NAME = MODEL_PREFIX + ".model"; \ No newline at end of file diff --git a/src/neural_network.hpp b/src/neural_network.hpp index e1e90e33..a02249e1 100644 --- a/src/neural_network.hpp +++ b/src/neural_network.hpp @@ -1,8 +1,8 @@ #ifndef MIACIS_NEURAL_NETWORK_HPP #define MIACIS_NEURAL_NETWORK_HPP -#include "neural_network_modules.hpp" #include "types.hpp" +#include //型のエイリアス using PolicyType = std::vector; @@ -25,61 +25,15 @@ struct LearningData { ValueTeacherType value; }; +extern const std::string MODEL_PREFIX; +extern const std::string DEFAULT_MODEL_NAME; + //損失の種類 enum LossType { POLICY_LOSS_INDEX, VALUE_LOSS_INDEX, LOSS_TYPE_NUM }; //各損失の名前を示す文字列 const std::array LOSS_TYPE_NAME{ "policy", "value" }; -//#define REPRESENTATION_DROPOUT - -//使用する全体のニューラルネットワーク -class NeuralNetworkImpl : public torch::nn::Module { -public: - NeuralNetworkImpl(); - - //入力として局面の特徴量を並べたvectorを受け取ってPolicyとValueに対応するTensorを返す関数 - std::pair forward(const std::vector& inputs); - - //複数局面の特徴量を1次元vectorにしたものを受け取ってそれぞれに対する評価を返す関数 - std::pair, std::vector> policyAndValueBatch(const std::vector& inputs); - - //学習データについて損失を返す関数 - std::array loss(const std::vector& data); - - //データに対して検証損失を返す関数 - std::array validLoss(const std::vector& data); - - //GPUにネットワークを送る関数 - void setGPU(int16_t gpu_id, bool fp16 = false); - - //評価パラメータを読み書きするファイルのprefix - static const std::string MODEL_PREFIX; - - //デフォルトで読み書きするファイル名 - static const std::string DEFAULT_MODEL_NAME; - -private: - torch::Tensor encode(const std::vector& inputs); - std::pair decode(const torch::Tensor& representation); - - torch::Device device_; - bool fp16_; - - Conv2DwithBatchNorm state_first_conv_and_norm_{ nullptr }; - std::vector state_blocks_; - -#ifdef REPRESENTATION_DROPOUT - torch::nn::Dropout2d representation_dropout_{ nullptr }; -#endif - - torch::nn::Conv2d policy_conv_{ nullptr }; - Conv2DwithBatchNorm value_conv_and_norm_{ nullptr }; - torch::nn::Linear value_linear0_{ nullptr }; - torch::nn::Linear value_linear1_{ nullptr }; -}; -TORCH_MODULE(NeuralNetwork); - //Categorical分布に対する操作 #ifdef USE_CATEGORICAL inline int32_t valueToIndex(float value) { return std::min((int32_t)((value - MIN_SCORE) / VALUE_WIDTH), BIN_SIZE - 1); } diff --git a/src/neural_network_modules.cpp b/src/neural_network_modules.cpp deleted file mode 100644 index e98f6c4d..00000000 --- a/src/neural_network_modules.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "neural_network_modules.hpp" - -#ifdef USE_SEPARABLE_CONV -SeparableConvImpl::SeparableConvImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size) { - depth_wise_conv_ = - register_module("depth_wise_conv_", torch::nn::Conv2d(torch::nn::Conv2dOptions(input_ch, input_ch, kernel_size) - .with_bias(false) - .padding(kernel_size / 2) - .groups(input_ch))); - point_wise_conv_ = register_module( - "point_wise_conv_", torch::nn::Conv2d(torch::nn::Conv2dOptions(input_ch, output_ch, 1).with_bias(false).padding(0))); -} - -torch::Tensor SeparableConvImpl::forward(const torch::Tensor& x) { return point_wise_conv_(depth_wise_conv_(x)); } -#endif - -Conv2DwithBatchNormImpl::Conv2DwithBatchNormImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size) { -#ifdef USE_SEPARABLE_CONV - conv_ = register_module("conv_", SeparableConv(input_ch, output_ch, kernel_size)); -#else - conv_ = register_module( - "conv_", - torch::nn::Conv2d(torch::nn::Conv2dOptions(input_ch, output_ch, kernel_size).bias(false).padding(kernel_size / 2))); -#endif - norm_ = register_module("norm_", torch::nn::BatchNorm2d(output_ch)); -} - -torch::Tensor Conv2DwithBatchNormImpl::forward(const torch::Tensor& x) { - torch::Tensor t = x; - t = conv_->forward(t); - t = norm_->forward(t); - return t; -} - -ResidualBlockImpl::ResidualBlockImpl(int64_t channel_num, int64_t kernel_size, int64_t reduction) { - conv_and_norm0_ = register_module("conv_and_norm0_", Conv2DwithBatchNorm(channel_num, channel_num, kernel_size)); - conv_and_norm1_ = register_module("conv_and_norm1_", Conv2DwithBatchNorm(channel_num, channel_num, kernel_size)); - linear0_ = register_module("linear0_", - torch::nn::Linear(torch::nn::LinearOptions(channel_num, channel_num / reduction).bias(false))); - linear1_ = register_module("linear1_", - torch::nn::Linear(torch::nn::LinearOptions(channel_num / reduction, channel_num).bias(false))); -} - -torch::Tensor ResidualBlockImpl::forward(const torch::Tensor& x) { - torch::Tensor t = x; - - t = conv_and_norm0_->forward(t); - t = activation(t); - t = conv_and_norm1_->forward(t); - - //SENet構造 - torch::Tensor y = torch::avg_pool2d(t, { t.size(2), t.size(3) }); - y = y.view({ -1, t.size(1) }); - y = linear0_->forward(y); - y = activation(y); - y = linear1_->forward(y); - y = torch::sigmoid(y); - y = y.view({ -1, t.size(1), 1, 1 }); - t = t * y; - - t = activation(x + t); - return t; -} - -torch::Tensor activation(const torch::Tensor& x) { - //ReLU - return torch::relu(x); - - //Mish - //return x * torch::tanh(torch::softplus(x)); - - //Swish - //return x * torch::sigmoid(x); -} \ No newline at end of file diff --git a/src/neural_network_modules.hpp b/src/neural_network_modules.hpp deleted file mode 100644 index cae15f42..00000000 --- a/src/neural_network_modules.hpp +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef MIACIS_NEURAL_NETWORK_MODULES_HPP -#define MIACIS_NEURAL_NETWORK_MODULES_HPP - -#include - -//#define USE_SEPARABLE_CONV - -#ifdef USE_SEPARABLE_CONV -//Separable Conv -//1回の3×3畳み込みをDepth-wise ConvとPoint-wise Convに分解することで効率化 -class SeparableConvImpl : public torch::nn::Module { -public: - SeparableConvImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size); - torch::Tensor forward(const torch::Tensor& x); - -private: - torch::nn::Conv2d depth_wise_conv_{ nullptr }; - torch::nn::Conv2d point_wise_conv_{ nullptr }; -}; -TORCH_MODULE(SeparableConv); -#endif - -//畳み込みとBatchNormalizationをまとめたユニット -class Conv2DwithBatchNormImpl : public torch::nn::Module { -public: - Conv2DwithBatchNormImpl(int64_t input_ch, int64_t output_ch, int64_t kernel_size); - torch::Tensor forward(const torch::Tensor& x); - -private: -#ifdef USE_SEPARABLE_CONV - SeparableConv conv_{ nullptr }; -#else - torch::nn::Conv2d conv_{ nullptr }; -#endif - torch::nn::BatchNorm2d norm_{ nullptr }; -}; -TORCH_MODULE(Conv2DwithBatchNorm); - -//残差ブロック:SENetの構造を利用 -class ResidualBlockImpl : public torch::nn::Module { -public: - ResidualBlockImpl(int64_t channel_num, int64_t kernel_size, int64_t reduction); - torch::Tensor forward(const torch::Tensor& x); - -private: - Conv2DwithBatchNorm conv_and_norm0_{ nullptr }; - Conv2DwithBatchNorm conv_and_norm1_{ nullptr }; - torch::nn::Linear linear0_{ nullptr }; - torch::nn::Linear linear1_{ nullptr }; -}; -TORCH_MODULE(ResidualBlock); - -torch::Tensor activation(const torch::Tensor& x); - -#endif //MIACIS_NEURAL_NETWORK_MODULES_HPP \ No newline at end of file diff --git a/src/othello/interface.cpp b/src/othello/interface.cpp index 2e448af3..a7d59a1d 100644 --- a/src/othello/interface.cpp +++ b/src/othello/interface.cpp @@ -22,7 +22,6 @@ Interface::Interface() : searcher_(nullptr) { command_["quit"] = [this] { quit(); }; //メンバ関数以外 - command_["initParams"] = initParams; command_["supervisedLearn"] = supervisedLearn; command_["reinforcementLearn"] = reinforcementLearn; // clang-format on @@ -107,10 +106,9 @@ void Interface::test() { search_options.thread_num_per_gpu = 1; search_options.search_batch_size = 1; search_options.output_log_file = true; - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->setGPU(0); - nn->eval(); + InferModel nn; + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path, + search_options.use_fp16); SearcherForPlay searcher(search_options); Position pos; @@ -289,9 +287,9 @@ void Interface::quit() { void Interface::outputValue() { root_.init(); std::ofstream ofs("value_output.txt"); - NeuralNetwork nn; - torch::load(nn, options_.model_name); - nn->setGPU(0); + SearchOptions search_option; + InferModel nn; + nn.load(options_.model_name, 0, 1, search_option.calibration_kifu_path, search_option.use_fp16); std::uniform_real_distribution dist(0.0, 1.0); @@ -300,7 +298,7 @@ void Interface::outputValue() { std::vector feature = root_.makeFeature(); root_.print(); - std::pair, std::vector> y = nn->policyAndValueBatch(feature); + std::pair, std::vector> y = nn.policyAndValueBatch(feature); PolicyType policy; std::vector moves = root_.generateAllMoves(); for (const Move& move : moves) { diff --git a/src/othello/position.hpp b/src/othello/position.hpp index 6d8b0e5c..db16df9e 100644 --- a/src/othello/position.hpp +++ b/src/othello/position.hpp @@ -43,6 +43,9 @@ class Position { //終了判定 bool isFinish(float& score, bool check_repeat = true) const; + //ループ判定:将棋の方で使うのでオセロでも持っておく + static bool isRepeating(float& score) { return false; } + //ハッシュ static void initHashSeed(); diff --git a/src/reinforcement_learn.cpp b/src/reinforcement_learn.cpp index c22bb98c..d6ad9702 100644 --- a/src/reinforcement_learn.cpp +++ b/src/reinforcement_learn.cpp @@ -1,6 +1,8 @@ #include "game_generator.hpp" #include "hyperparameter_loader.hpp" +#include "infer_model.hpp" #include "learn.hpp" +#include void reinforcementLearn() { // clang-format off @@ -31,9 +33,11 @@ void reinforcementLearn() { int64_t sleep_msec = settings.get("sleep_msec"); int64_t init_buffer_by_kifu = settings.get("init_buffer_by_kifu"); int64_t noise_mode = settings.get("noise_mode"); + int64_t wait_sec_per_load = settings.get("wait_sec_per_load"); bool data_augmentation = settings.get("data_augmentation"); bool Q_search = settings.get("Q_search"); std::string train_kifu_path = settings.get("train_kifu_path"); + search_options.calibration_kifu_path = settings.get("calibration_kifu_path"); // clang-format on //学習クラスを生成 @@ -58,14 +62,11 @@ void reinforcementLearn() { //GPUの数だけネットワーク,自己対局生成器を生成 size_t gpu_num = torch::getNumGPUs(); - std::vector neural_networks(gpu_num); std::vector> generators(gpu_num); std::vector gen_threads; for (uint64_t i = 0; i < gpu_num; i++) { - torch::load(neural_networks[i], NeuralNetworkImpl::DEFAULT_MODEL_NAME); - neural_networks[i]->setGPU(static_cast(i), search_options.use_fp16); generators[i] = std::make_unique(search_options, worker_num_per_thread, Q_dist_lambda, noise_mode, - noise_epsilon, noise_alpha, replay_buffer, neural_networks[i]); + noise_epsilon, noise_alpha, replay_buffer, i); gen_threads.emplace_back([&generators, i]() { generators[i]->genGames(); }); } @@ -90,6 +91,9 @@ void reinforcementLearn() { //1ステップ学習し、損失を取得 torch::Tensor loss_sum = learn_manager.learnOneStep(curr_data, step_num); + //GPUを解放 + generators.front()->gpu_mutex.unlock(); + //replay_bufferのpriorityを更新 std::vector loss_vec(loss_sum.data_ptr(), loss_sum.data_ptr() + batch_size); replay_buffer.update(loss_vec); @@ -97,27 +101,21 @@ void reinforcementLearn() { //一定間隔でActorのパラメータをLearnerと同期 if (step_num % update_interval == 0) { //学習パラメータを保存 - torch::save(learn_manager.neural_network, NeuralNetworkImpl::DEFAULT_MODEL_NAME); + learn_manager.neural_network.save(DEFAULT_MODEL_NAME); //各ネットワークで保存されたパラメータを読み込み for (uint64_t i = 0; i < gpu_num; i++) { - if (i > 0) { - generators[i]->gpu_mutex.lock(); - } - - //ロードするときは一度fp32に直さないとエラーになる - //もっと良いやり方はありそうだがなぁ - neural_networks[i]->setGPU(i, false); - torch::load(neural_networks[i], NeuralNetworkImpl::DEFAULT_MODEL_NAME); - neural_networks[i]->setGPU(static_cast(i), search_options.use_fp16); - if (i > 0) { - generators[i]->gpu_mutex.unlock(); - } + generators[i]->gpu_mutex.lock(); + + //パラメータをロードするべきというシグナルを出す + generators[i]->need_load = true; + + generators[i]->gpu_mutex.unlock(); } - } - //GPUを解放 - generators.front()->gpu_mutex.unlock(); + //int8の場合は特にloadで時間がかかるのでその期間スリープ + std::this_thread::sleep_for(std::chrono::seconds(wait_sec_per_load)); + } //学習スレッドを眠らせることで擬似的にActorの数を増やす std::this_thread::sleep_for(std::chrono::milliseconds(sleep_msec)); diff --git a/src/search_options.hpp b/src/search_options.hpp index 5e65d2cf..c1497128 100644 --- a/src/search_options.hpp +++ b/src/search_options.hpp @@ -32,7 +32,7 @@ struct SearchOptions { // clang-format off check_options.emplace("USI_Ponder", CheckOption(USI_Ponder = false)); check_options.emplace("leave_root", CheckOption(leave_root = true)); - check_options.emplace("use_fp16", CheckOption(use_fp16 = true)); + check_options.emplace("use_fp16", CheckOption(use_fp16 = false)); check_options.emplace("use_book", CheckOption(use_book = false)); check_options.emplace("print_info", CheckOption(print_info = true)); check_options.emplace("output_log_file", CheckOption(output_log_file = false)); @@ -61,8 +61,9 @@ struct SearchOptions { spin_options.emplace("print_policy_num", SpinOption(print_policy_num = 0, 0, 593)); spin_options.emplace("remained_turn_divisor", SpinOption(remained_turn_divisor = 1, 1, MAX)); spin_options.emplace("hold_moves_num", SpinOption(hold_moves_num = 32, 1, 593)); - filename_options.emplace("model_name", FilenameOption(model_name = NeuralNetworkImpl::DEFAULT_MODEL_NAME)); + filename_options.emplace("model_name", FilenameOption(model_name = DEFAULT_MODEL_NAME)); filename_options.emplace("book_file_name", FilenameOption(book_file_name = "book.txt")); + filename_options.emplace("calibration_kifu_path", FilenameOption(calibration_kifu_path = "../../../data/floodgate_kifu/valid")); // clang-format on } bool USI_Ponder; @@ -93,6 +94,7 @@ struct SearchOptions { int64_t hold_moves_num; std::string model_name; std::string book_file_name; + std::string calibration_kifu_path; std::map check_options; std::map spin_options; diff --git a/src/searcher.cpp b/src/searcher.cpp index 7deaa785..1264591f 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -2,8 +2,7 @@ int32_t Searcher::selectMaxUcbChild(const HashEntry& node) const { #ifdef USE_CATEGORICAL - int32_t best_index = std::max_element(node.N.begin(), node.N.end()) - node.N.begin(); - float best_value = expOfValueDist(hash_table_.QfromNextValue(node, best_index)); + float best_value = expOfValueDist(node.value); int32_t best_value_index = std::min(valueToIndex(best_value) + 1, BIN_SIZE - 1); int32_t reversed_best_value_index = BIN_SIZE - best_value_index; #endif @@ -12,23 +11,24 @@ int32_t Searcher::selectMaxUcbChild(const HashEntry& node) const { float max_value = INT_MIN; const int32_t sum = node.sum_N + node.virtual_sum_N; + const float U_numerator = std::sqrt(sum + 1); for (uint64_t i = 0; i < node.moves.size(); i++) { - float U = std::sqrt(sum + 1) / (node.N[i] + node.virtual_N[i] + 1); + float U = U_numerator / (node.N[i] + node.virtual_N[i] + 1); assert(U >= 0.0); #ifdef USE_CATEGORICAL float P = 0.0; if (node.child_indices[i] == HashTable::NOT_EXPANDED) { - P = (node.N[i] == 0 ? search_options_.FPU_x1000 / 1000.0 : 1); + P = (node.N[i] == 0 ? fpu_ : 1); } else { std::unique_lock lock(hash_table_[node.child_indices[i]].mutex); for (int32_t j = 0; j < reversed_best_value_index; j++) { P += hash_table_[node.child_indices[i]].value[j]; } } - float ucb = search_options_.C_PUCT_x1000 / 1000.0 * node.nn_policy[i] * U + search_options_.P_coeff_x1000 / 1000.0 * P; + float ucb = c_puct_ * node.nn_policy[i] * U + p_coeff_ * P; if (search_options_.Q_coeff_x1000 > 0) { - ucb += search_options_.Q_coeff_x1000 / 1000.0 * hash_table_.expQfromNext(node, i); + ucb += q_coeff_ * hash_table_.expQfromNext(node, i); } #else float Q = (node.N[i] == 0 ? search_options_.FPU_x1000 / 1000.0 : hash_table_.QfromNextValue(node, i)); @@ -182,6 +182,19 @@ Index Searcher::expand(Position& pos, std::stack& indices, std::stack search_options_.draw_turn) { + float dummy_score; + if (!pos.isRepeating(dummy_score) || pos.turnNumber() > search_options_.draw_turn) { + //この局面にはどう到達しても絶対に終わりなので指し手情報などを消して良い + curr_node.moves.clear(); + curr_node.moves.shrink_to_fit(); + curr_node.child_indices.clear(); + curr_node.child_indices.shrink_to_fit(); + curr_node.N.clear(); + curr_node.N.shrink_to_fit(); + curr_node.virtual_N.clear(); + curr_node.virtual_N.shrink_to_fit(); + } + #ifdef USE_CATEGORICAL curr_node.value = onehotDist(finish_score); #else diff --git a/src/searcher.hpp b/src/searcher.hpp index 7ea7ed0d..00305fce 100644 --- a/src/searcher.hpp +++ b/src/searcher.hpp @@ -24,7 +24,9 @@ struct BackupQueue { class Searcher { public: explicit Searcher(const SearchOptions& search_options, HashTable& hash_table, GPUQueue& gpu_queue) - : hash_table_(hash_table), search_options_(search_options), gpu_queue_(gpu_queue) {} + : hash_table_(hash_table), search_options_(search_options), gpu_queue_(gpu_queue), + fpu_(search_options_.FPU_x1000 / 1000.0), c_puct_(search_options_.C_PUCT_x1000 / 1000.0), + p_coeff_(search_options_.P_coeff_x1000 / 1000.0), q_coeff_(search_options_.Q_coeff_x1000 / 1000.0) {} //再帰しない探索関数 void select(Position& pos); @@ -58,6 +60,12 @@ class Searcher { //バックアップ要求を貯めるキュー。これは各インスタンスが生成して保持する BackupQueue backup_queue_; + + //select時の定数 + const float fpu_; + const float c_puct_; + const float p_coeff_; + const float q_coeff_; }; #endif //MIACIS_SEARCHER_HPP \ No newline at end of file diff --git a/src/searcher_for_play.cpp b/src/searcher_for_play.cpp index 320842b5..48b765e8 100644 --- a/src/searcher_for_play.cpp +++ b/src/searcher_for_play.cpp @@ -11,14 +11,13 @@ struct MoveWithScore { SearcherForPlay::SearcherForPlay(const SearchOptions& search_options) : stop_signal(false), search_options_(search_options), - hash_table_(search_options.USI_Hash * 1024 * 1024 / (60 * search_options.hold_moves_num)), + hash_table_(search_options.USI_Hash * 1024 * 1024 / (120 * search_options.hold_moves_num)), mate_searcher_(hash_table_, search_options) { //GPUを準備 for (int64_t i = 0; i < search_options.gpu_num; i++) { neural_networks_.emplace_back(); - torch::load(neural_networks_[i], search_options_.model_name); - neural_networks_[i]->setGPU(i, search_options_.use_fp16); - neural_networks_[i]->eval(); + neural_networks_[i].load(search_options_.model_name, i, search_options.search_batch_size, + search_options.calibration_kifu_path, search_options.use_fp16); } //GPUに対するmutexを準備 @@ -98,7 +97,7 @@ Move SearcherForPlay::think(Position& root, int64_t time_limit) { } torch::NoGradGuard no_grad_guard; std::pair, std::vector> y = - neural_networks_[0]->policyAndValueBatch(gpu_queues_[0][0].inputs); + neural_networks_[0].policyAndValueBatch(gpu_queues_[0][0].inputs); //ルートノードへ書き込み curr_node.nn_policy.resize(curr_node.moves.size()); @@ -255,9 +254,9 @@ void SearcherForPlay::workerThreadFunc(Position root, int64_t gpu_id, int64_t th if (!gpu_queue.inputs.empty()) { torch::NoGradGuard no_grad_guard; gpu_mutexes_[gpu_id].lock(); - std::pair, std::vector> y = - neural_networks_[gpu_id]->policyAndValueBatch(gpu_queue.inputs); + std::tuple output = neural_networks_[gpu_id].infer(gpu_queue.inputs); gpu_mutexes_[gpu_id].unlock(); + std::pair, std::vector> y = neural_networks_[gpu_id].decode(output); //書き込み for (uint64_t i = 0; i < gpu_queue.indices.size(); i++) { diff --git a/src/searcher_for_play.hpp b/src/searcher_for_play.hpp index 27837629..6878b5c2 100644 --- a/src/searcher_for_play.hpp +++ b/src/searcher_for_play.hpp @@ -1,6 +1,7 @@ #ifndef MIACIS_SEARCHER_FOR_PLAY_HPP #define MIACIS_SEARCHER_FOR_PLAY_HPP +#include "infer_model.hpp" #include "neural_network.hpp" #include "searcher.hpp" #include "searcher_for_mate.hpp" @@ -46,7 +47,7 @@ class SearcherForPlay { HashTable hash_table_; //GPUは複数 - std::vector neural_networks_; + std::vector neural_networks_; std::vector gpu_mutexes_; //1つのGPUに対してgpu_queue,searcherを複数 diff --git a/src/shogi/game.cpp b/src/shogi/game.cpp index 0249fffa..cc1d3dfa 100644 --- a/src/shogi/game.cpp +++ b/src/shogi/game.cpp @@ -59,14 +59,12 @@ std::tuple loadCSAOneGame(std::ifstream& ifs, float rate_threshold) if (buf.substr(0, 6) == "%TORYO") { game.result = (pos.color() == BLACK ? MIN_SCORE : MAX_SCORE); ok = true; - } else if (buf.substr(0, 11) == "%SENNICHITE") { - game.result = (MAX_SCORE + MIN_SCORE) / 2; - ok = true; } else if (buf.substr(0, 6) == "%KACHI") { game.result = (pos.color() == BLACK ? MAX_SCORE : MIN_SCORE); ok = true; - } else if (buf.substr(0, 7) == "%CHUDAN" || buf.substr(0, 16) == "%+ILLEGAL_ACTION" || - buf.substr(0, 16) == "%-ILLEGAL_ACTION" || buf.substr(0, 8) == "%TIME_UP") { + } else if (buf.substr(0, 11) == "%SENNICHITE" || buf.substr(0, 7) == "%CHUDAN" || + buf.substr(0, 16) == "%+ILLEGAL_ACTION" || buf.substr(0, 16) == "%-ILLEGAL_ACTION" || + buf.substr(0, 8) == "%TIME_UP") { //ダメな対局であったというフラグを返す return std::make_tuple(game, false); } else { diff --git a/src/shogi/interface.cpp b/src/shogi/interface.cpp index 29ef24f0..f277376d 100644 --- a/src/shogi/interface.cpp +++ b/src/shogi/interface.cpp @@ -19,7 +19,6 @@ Interface::Interface() : searcher_(nullptr) { command_["gameover"] = [this] { gameover(); }; //メンバ関数以外 - command_["initParams"] = initParams; command_["cleanGames"] = cleanGames; command_["supervisedLearn"] = supervisedLearn; command_["reinforcementLearn"] = reinforcementLearn; @@ -30,12 +29,13 @@ Interface::Interface() : searcher_(nullptr) { command_["checkGenSpeed"] = checkGenSpeed; command_["checkPredictSpeed"] = checkPredictSpeed; command_["checkVal"] = checkVal; + command_["checkValInfer"] = checkValInfer; command_["checkDoAndUndo"] = checkDoAndUndo; command_["checkMirror"] = checkMirror; command_["checkBook"] = checkBook; command_["makeBook"] = makeBook; command_["searchWithLog"] = searchWithLog; - command_["convertModelToCPU"] = convertModelToCPU; + command_["testLoad"] = testLoad; // clang-format on } @@ -202,7 +202,7 @@ void Interface::go() { Move best_move = (root_.canWinDeclare() ? DECLARE_MOVE : searcher_->think(root_, time_limit - search_options_.byoyomi_margin)); std::cout << "bestmove " << best_move << std::endl; - if (search_options_.USI_Ponder && best_move != NULL_MOVE) { + if (search_options_.USI_Ponder && best_move != NULL_MOVE && best_move != DECLARE_MOVE) { root_.doMove(best_move); float score{}; if (!root_.isFinish(score) && root_.turnNumber() <= search_options_.draw_turn) { diff --git a/src/shogi/position.cpp b/src/shogi/position.cpp index 891ffa63..196bf92b 100644 --- a/src/shogi/position.cpp +++ b/src/shogi/position.cpp @@ -1085,11 +1085,13 @@ bool Position::isLastMoveDropPawn() const { return (lastMove().isDrop() && kind( bool Position::isFinish(float& score, bool check_repeat) { //詰みの確認 - std::vector moves = generateAllMoves(); - if (moves.empty()) { - //打ち歩詰めなら手番側(詰まされた側)が勝ち、そうでないなら手番側が負け - score = isLastMoveDropPawn() ? MAX_SCORE : MIN_SCORE; - return true; + if (is_checked_) { + std::vector moves = generateAllMoves(); + if (moves.empty()) { + //打ち歩詰めなら手番側(詰まされた側)が勝ち、そうでないなら手番側が負け + score = isLastMoveDropPawn() ? MAX_SCORE : MIN_SCORE; + return true; + } } //千日手の確認 diff --git a/src/shogi/test.cpp b/src/shogi/test.cpp index f7c0b3ee..ba324b32 100644 --- a/src/shogi/test.cpp +++ b/src/shogi/test.cpp @@ -1,5 +1,6 @@ #include "test.hpp" #include "../game_generator.hpp" +#include "../infer_model.hpp" #include "../searcher_for_play.hpp" #include "book.hpp" @@ -12,10 +13,9 @@ void test() { search_options.thread_num_per_gpu = 1; search_options.search_batch_size = 1; search_options.output_log_file = true; - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->setGPU(0); - nn->eval(); + InferModel nn; + nn.load(DEFAULT_MODEL_NAME, 0, search_options.search_batch_size, search_options.calibration_kifu_path, + search_options.use_fp16); SearcherForPlay searcher(search_options); Position pos; @@ -82,9 +82,6 @@ void infiniteTest() { } void checkGenSpeed() { - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - constexpr int64_t buffer_size = 1048576; SearchOptions search_options; search_options.search_limit = 800; @@ -96,9 +93,6 @@ void checkGenSpeed() { constexpr float noise_epsilon = 0.25; constexpr float noise_alpha = 0.15; - nn->setGPU(0, search_options.use_fp16); - nn->eval(); - int64_t total_worker_num = 0; std::cout << "total_worker_num(デフォルトは128): "; std::cin >> total_worker_num; @@ -116,8 +110,7 @@ void checkGenSpeed() { for (search_options.search_batch_size = 2; search_options.search_batch_size <= 4; search_options.search_batch_size *= 2) { ReplayBuffer buffer(0, buffer_size, 1, 1.0, 1.0, false); auto start = std::chrono::steady_clock::now(); - GameGenerator generator(search_options, worker_num, Q_dist_lambda, noise_mode, noise_epsilon, noise_alpha, buffer, - nn); + GameGenerator generator(search_options, worker_num, Q_dist_lambda, noise_mode, noise_epsilon, noise_alpha, buffer, 0); std::thread t(&GameGenerator::genGames, &generator); for (int64_t i = 0; i < num; i++) { std::this_thread::sleep_for(std::chrono::seconds(sec)); @@ -263,10 +256,9 @@ void checkVal() { std::cout << "data.size() = " << data.size() << std::endl; //ネットワークの準備 - NeuralNetwork nn; - torch::load(nn, model_file); - nn->setGPU(0); - nn->eval(); + LearningModel nn; + nn.load(model_file, 0); + nn.eval(); std::array v = validation(nn, data, batch_size); std::cout << std::fixed << std::setprecision(4); @@ -275,17 +267,55 @@ void checkVal() { } } +void checkValInfer() { + //データを取得 + std::string path; + std::cout << "validation kifu path : "; + std::cin >> path; + int64_t batch_size; + std::cout << "batch_size : "; + std::cin >> batch_size; + std::string model_file; + std::cout << "model_file : "; + std::cin >> model_file; + std::string calibration_kifu_path; + std::cout << "calibration_kifu_path : "; + std::cin >> calibration_kifu_path; + bool use_fp16; + std::cout << "fp16 : "; + std::cin >> use_fp16; + + std::vector data = loadData(path, false, 3000); + std::cout << "data.size() = " << data.size() << std::endl; + + //ネットワークの準備 + InferModel nn; + + for (int64_t calibration_data_num = batch_size; calibration_data_num <= (batch_size << 5); calibration_data_num *= 2) { + nn.load(model_file, 0, batch_size, calibration_kifu_path, use_fp16); + + std::array v = validation(nn, data, batch_size); + std::cout << std::fixed << std::setprecision(4); + std::cout << std::setw(10) << calibration_data_num << " "; + for (int64_t i = 0; i < LOSS_TYPE_NUM; i++) { + std::cout << v[i] << " \n"[i == LOSS_TYPE_NUM - 1]; + } + } + std::cout << "finish checkValInfer" << std::endl; +} + void checkPredictSpeed() { Position pos; constexpr int64_t REPEAT_NUM = 1000; + constexpr int64_t BATCH_SIZE = 512; std::cout << std::fixed; - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->setGPU(0); - nn->eval(); + SearchOptions search_options; + + InferModel nn; + nn.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.use_fp16); - for (int64_t batch_size = 1; batch_size <= 4096; batch_size *= 2) { + for (int64_t batch_size = 1; batch_size <= BATCH_SIZE; batch_size *= 2) { //バッチサイズ分入力を取得 std::vector input; for (int64_t k = 0; k < batch_size; k++) { @@ -301,20 +331,17 @@ void checkPredictSpeed() { } } - std::cout << input.size() << std::endl; - float time = 0.0; for (int64_t i = 0; i < REPEAT_NUM; i++) { auto start = std::chrono::steady_clock::now(); torch::NoGradGuard no_grad_guard; - nn->policyAndValueBatch(pos.makeFeature()); + nn.policyAndValueBatch(input); auto end = std::chrono::steady_clock::now(); auto elapsed = std::chrono::duration_cast(end - start); time += elapsed.count(); } - std::cout << "batch_size = " << std::setw(5) << batch_size << ", " << time / REPEAT_NUM << " microsec / batch" - << std::endl; + std::cout << std::setw(5) << batch_size << "\t" << time / REPEAT_NUM << "\tmicrosec/batch" << std::endl; } } @@ -464,13 +491,51 @@ void searchWithLog() { } } -void convertModelToCPU() { - //ネットワークの準備 - NeuralNetwork nn; - torch::load(nn, NeuralNetworkImpl::DEFAULT_MODEL_NAME); - nn->to(torch::kCPU); - torch::save(nn, NeuralNetworkImpl::MODEL_PREFIX + "_cpu.model"); - std::cout << "finish convertModelToCPU" << std::endl; +void testLoad() { + constexpr int64_t LOOP_NUM = 20; + constexpr int64_t BATCH_SIZE = 256; + + SearchOptions search_options; + + //時間計測開始 + Timer timer; + timer.start(); + int64_t pre = 0; + //通常試行 + std::cout << "通常の試行" << std::endl; + for (int64_t num = 0; num < 0; num++) { + InferModel model; + model.load(DEFAULT_MODEL_NAME, 0, BATCH_SIZE, search_options.calibration_kifu_path, search_options.use_fp16); + int64_t ela = timer.elapsedSeconds(); + int64_t curr = ela - pre; + pre = ela; + std::cout << std::setw(3) << num + 1 << "回目終了, 今回" << curr << "秒, 平均" << ela / (num + 1.0) << "秒" << std::endl; + } + + //スレッドを作成しての試行 + timer.start(); + pre = 0; + std::cout << "スレッドを作成しての試行" << std::endl; + const int64_t gpu_num = torch::getNumGPUs(); + for (int64_t num = 0; num < LOOP_NUM; num++) { + std::vector threads; + for (int64_t i = 0; i < gpu_num; i++) { + threads.emplace_back([i, search_options]() { + InferModel model; + model.load(DEFAULT_MODEL_NAME, i, BATCH_SIZE, search_options.calibration_kifu_path, search_options.use_fp16); + }); + } + for (int64_t i = 0; i < gpu_num; i++) { + threads[i].join(); + } + int64_t ela = timer.elapsedSeconds(); + int64_t curr = ela - pre; + pre = ela; + std::cout << std::setw(3) << num + 1 << "回目終了, 今回" << curr << "秒, 平均" << ela / (num + 1.0) << "秒" << std::endl; + } + + std::cout << "finish testLoad" << std::endl; + std::exit(0); } } // namespace Shogi \ No newline at end of file diff --git a/src/shogi/test.hpp b/src/shogi/test.hpp index 36d161de..fff57d69 100644 --- a/src/shogi/test.hpp +++ b/src/shogi/test.hpp @@ -10,13 +10,14 @@ void checkSearchSpeed2(); void checkGenSpeed(); void checkPredictSpeed(); void checkVal(); +void checkValInfer(); void checkSegmentTree(); void checkDoAndUndo(); void checkMirror(); void checkBook(); void makeBook(); void searchWithLog(); -void convertModelToCPU(); +void testLoad(); } // namespace Shogi diff --git a/src/supervised_learn.cpp b/src/supervised_learn.cpp index b47b14d1..bc3d2b19 100644 --- a/src/supervised_learn.cpp +++ b/src/supervised_learn.cpp @@ -32,6 +32,11 @@ void supervisedLearn() { //データを取得 std::vector train_data = loadData(train_kifu_path, data_augmentation, train_rate_threshold); + //どのEpochでどのデータを使っているかを記録する + std::ofstream epoch_log("epoch_log.txt"); + epoch_log << "dir_path.size() = " << dir_paths.size() << std::endl; + epoch_log << "0 0 " << train_data.size() << std::endl; + //学習クラスを生成 LearnManager learn_manager("supervised"); @@ -53,6 +58,7 @@ void supervisedLearn() { if (load_multi_dir) { train_data = loadData(dir_paths[epoch % dir_paths.size()], data_augmentation, train_rate_threshold); + epoch_log << epoch << " " << global_step << " " << train_data.size() << std::endl; } }