Update readme, small simplifications

yburda · yburda · commit 0c3d179fd61e · 2018-10-11T13:44:16.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,108 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+.idea/
diff --git a/README.md b/README.md
@@ -25,7 +25,11 @@ This is a TensorFlow based implementation for our [paper on large-scale study of
     }
 
 ### Installation and Usage
-Stay tuned! To be released soon.
+The following command should train a pure exploration agent on Breakout with default experiment parameters.
+```bash
+python run.py
+```
+To use more than one gpu/machine, use MPI (e.g. `mpiexec -n 8 python run.py` should use 1024 parallel environments to collect experience instead of the default 128 on an 8 gpu machine). 
 
 ### Other helpful pointers
 - [Paper](https://pathak22.github.io/large-scale-curiosity/resources/largeScaleCuriosity2018.pdf)
diff --git a/run.py b/run.py
@@ -4,20 +4,21 @@
 except:
     print("no OpenGL.GLU")
 import functools
-import gym
 import os.path as osp
+from functools import partial
+
+import gym
 import tensorflow as tf
 from baselines import logger
 from baselines.bench import Monitor
 from baselines.common.atari_wrappers import NoopResetEnv, FrameStack
-from functools import partial
 from mpi4py import MPI
 
 from auxiliary_tasks import FeatureExtractor, InverseDynamics, VAE, JustPixels
 from cnn_policy import CnnPolicy
 from cppo_agent import PpoOptimizer
 from dynamics import Dynamics, UNet
-from utils import random_agent_ob_mean_std, save_exp_details
+from utils import random_agent_ob_mean_std
 from wrappers import MontezumaInfoWrapper, make_mario_env, make_robo_pong, make_robo_hockey, \
     make_multi_pong, AddRandomStateToInfo, MaxAndSkipEnv, ProcessFrame84, ExtraTimeLimit
 
@@ -32,7 +33,6 @@ def start_experiment(**args):
     with log, tf_sess:
         logdir = logger.get_dir()
         print("results will be saved to ", logdir)
-        save_exp_details(filename=__file__, savedir=logdir, args=args)
         trainer.train()
 
 
diff --git a/utils.py b/utils.py
@@ -1,13 +1,11 @@
 import multiprocessing
-import numpy as np
 import os
-import pickle
 import platform
-import subprocess
-import sys
+from functools import partial
+
+import numpy as np
 import tensorflow as tf
 from baselines.common.tf_util import normc_initializer
-from functools import partial
 from mpi4py import MPI
 
 
@@ -227,24 +225,3 @@ def row(i):
 
     return np.concatenate([row(i) for i in range(n_rows)], axis=0)
 
-
-def save_exp_details(filename, savedir, args):
-    source_dirname = os.path.dirname(os.path.abspath(filename))
-    git_hash = subprocess.run("git log --pretty=format:%H -n 1".split(' '), cwd=source_dirname,
-                              stdout=subprocess.PIPE).stdout.decode('utf-8')
-    git_diff = subprocess.run("git diff {} --full-index".format(git_hash).split(' '), cwd=source_dirname,
-                              stdout=subprocess.PIPE).stdout.decode('utf-8')
-    ordered_arg_names = sorted(list(args.keys()))
-    sorted_args = [(k, args[k]) for k in ordered_arg_names]
-
-    rank_zero_savedir = savedir if MPI.COMM_WORLD.Get_rank() == 0 else None
-    rank_zero_savedir = MPI.COMM_WORLD.bcast(rank_zero_savedir, root=0)
-
-    with open(os.path.join(savedir, "exp_details.pkl"), 'wb') as f:
-        obj = {'git_hash': git_hash,
-               'git_diff': git_diff,
-               'args': sorted_args,
-               'argv': ' '.join(sys.argv),
-               'name': args['exp_name'],
-               'rank_zero_savedir': rank_zero_savedir}
-        pickle.dump(obj, f, protocol=0)
diff --git a/wrappers.py b/wrappers.py
@@ -119,40 +119,13 @@ class MontezumaInfoWrapper(gym.Wrapper):
     ram_map = {
         "room": dict(
             index=3,
-            values=range(24),
-            value_type="range",
         ),
         "x": dict(
             index=42,
-            values=range(0, 152),
-            value_type="range",
         ),
         "y": dict(
             index=43,
-            values=range(148, 256),
-            value_type="range",
         ),
-        # "objects": dict(
-        #     index=67,
-        #     values=range(16, 32),
-        #     value_type="categorical",
-        # ),  # 1st level: doors, skeleton, key
-        # "skeleton_location": dict(
-        #     index=47,
-        #     values=range(20, 80),  # not exactly the min/max, but good enough
-        #     value_type="range",
-        # ),
-        # "beam_wall": dict(
-        #     index=27,
-        #     values=[253, 209],
-        #     value_type="categorical",
-        #     meanings=["off", "on"]
-        # ),
-        # "beam_countdown": dict(
-        #     index=83,
-        #     values=range(37),
-        #     value_type="range",
-        # ),
     }
 
     def __init__(self, env):