diff --git a/Dockerfile b/Dockerfile
deleted file mode 100644
index 33e9f4a..0000000
--- a/Dockerfile
+++ /dev/null
@@ -1,65 +0,0 @@
-# Base image
-FROM nvidia/cudagl:10.1-devel-ubuntu16.04
-
-# Setup basic packages
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential \
-    git \
-    curl \
-    vim \
-    ca-certificates \
-    libjpeg-dev \
-    libpng-dev \
-    libglfw3-dev \
-    libglm-dev \
-    libx11-dev \
-    libomp-dev \
-    libegl1-mesa-dev \
-    pkg-config \
-    wget \
-    zip \
-    htop \
-    tmux \
-    unzip &&\
-    rm -rf /var/lib/apt/lists/*
-
-# Install conda
-RUN wget -O $HOME/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh  &&\
-    chmod +x ~/miniconda.sh &&\
-    ~/miniconda.sh -b -p /custom/conda &&\
-    rm ~/miniconda.sh &&\
-    /custom/conda/bin/conda install numpy pyyaml scipy ipython mkl mkl-include &&\
-    /custom/conda/bin/conda clean -ya
-ENV PATH /custom/conda/bin:$PATH
-
-# Install cmake
-RUN wget https://github.com/Kitware/CMake/releases/download/v3.14.0/cmake-3.14.0-Linux-x86_64.sh
-RUN mkdir /opt/cmake
-RUN sh /cmake-3.14.0-Linux-x86_64.sh --prefix=/opt/cmake --skip-license
-RUN ln -s /opt/cmake/bin/cmake /usr/local/bin/cmake
-RUN cmake --version
-
-# Setup habitat-sim
-RUN git clone https://github.com/facebookresearch/habitat-sim.git
-RUN /bin/bash -c "cd habitat-sim; git checkout tags/v0.1.5; pip install -r requirements.txt; python setup.py install --headless --with-cuda"
-
-# Install challenge specific habitat-api
-RUN git clone https://github.com/facebookresearch/habitat-api.git
-RUN /bin/bash -c "cd habitat-api; git checkout tags/v0.1.5; pip install -e ."
-RUN /bin/bash -c "cd habitat-api; wget http://dl.fbaipublicfiles.com/habitat/habitat-test-scenes.zip; unzip habitat-test-scenes.zip"
-
-# Silence habitat-sim logs
-ENV GLOG_minloglevel=2
-ENV MAGNUM_LOG="quiet"
-
-# Install project specific packages
-RUN /bin/bash -c "apt-get update; apt-get install -y libsm6 libxext6 libxrender-dev; pip install opencv-python"
-RUN /bin/bash -c "pip install --upgrade cython numpy"
-RUN /bin/bash -c "pip install matplotlib seaborn==0.9.0 scikit-fmm==2019.1.30 scikit-image==0.15.0 imageio==2.6.0 scikit-learn==0.22.2.post1 ifcfg"
-
-# Install pytorch and torch_scatter
-RUN conda install pytorch=1.6.0 torchvision=0.7.0 cudatoolkit=10.2 -c pytorch
-RUN /bin/bash -c "pip install torch_scatter"
-
-# Install detectron2
-RUN /bin/bash -c "python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html"
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 9f37f1a..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,21 +0,0 @@
-MIT License
-
-Copyright (c) 2020 Devendra Chaplot
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
diff --git a/README.md b/README.md
deleted file mode 100644
index e7a6f76..0000000
--- a/README.md
+++ /dev/null
@@ -1,155 +0,0 @@
-# Object Goal Navigation using Goal-Oriented Semantic Exploration
-This is a PyTorch implementation of the NeurIPS-20 paper:
-
-[Object Goal Navigation using Goal-Oriented Semantic Exploration](https://arxiv.org/pdf/2007.00643.pdf)<br />
-Devendra Singh Chaplot, Dhiraj Gandhi, Abhinav Gupta, Ruslan Salakhutdinov<br />
-Carnegie Mellon University, Facebook AI Research
-
-Winner of the [CVPR 2020 Habitat ObjectNav Challenge](https://aihabitat.org/challenge/2020/).
-
-Project Website: https://devendrachaplot.github.io/projects/semantic-exploration
-
-![example](./docs/example.gif)
-
-### Overview:
-The Goal-Oriented Semantic Exploration (SemExp) model consists of three modules: a Semantic Mapping Module, a Goal-Oriented Semantic Policy, and a deterministic Local Policy. 
-As shown below, the Semantic Mapping model builds a semantic map over time. The Goal-Oriented Semantic Policy selects a long-term goal based on the semantic
-map to reach the given object goal efficiently. A deterministic local policy based on analytical planners is used to take low-level navigation actions to reach the long-term goal.
-
-![overview](./docs/overview.jpg)
-
-### This repository contains:
-- Episode train and test datasets for [Object Goal Navigation](https://arxiv.org/pdf/2007.00643.pdf) task for the Gibson dataset in the Habitat Simulator.
-- The code to train and evaluate the Semantic Exploration (SemExp) model on the Object Goal Navigation task.
-- Pretrained SemExp model.
-
-## Installing Dependencies
-- We use earlier versions of [habitat-sim](https://github.com/facebookresearch/habitat-sim) and [habitat-lab](https://github.com/facebookresearch/habitat-lab) as specified below:
-
-Installing habitat-sim:
-```
-git clone https://github.com/facebookresearch/habitat-sim.git
-cd habitat-sim; git checkout tags/v0.1.5; 
-pip install -r requirements.txt; 
-python setup.py install --headless
-python setup.py install # (for Mac OS)
-```
-
-Installing habitat-lab:
-```
-git clone https://github.com/facebookresearch/habitat-lab.git
-cd habitat-lab; git checkout tags/v0.1.5; 
-pip install -e .
-```
-Check habitat installation by running `python examples/benchmark.py` in the habitat-lab folder.
-
-- Install [pytorch](https://pytorch.org/) according to your system configuration. The code is tested on pytorch v1.6.0 and cudatoolkit v10.2. If you are using conda:
-```
-conda install pytorch==1.6.0 torchvision==0.7.0 cudatoolkit=10.2 #(Linux with GPU)
-conda install pytorch==1.6.0 torchvision==0.7.0 -c pytorch #(Mac OS)
-```
-
-- Install [detectron2](https://github.com/facebookresearch/detectron2/) according to your system configuration. If you are using conda:
-```
-python -m pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.6/index.html #(Linux with GPU)
-CC=clang CXX=clang++ ARCHFLAGS="-arch x86_64" python -m pip install 'git+https://github.com/facebookresearch/detectron2.git' #(Mac OS)
-```
-
-### Docker and Singularity images:
-We provide experimental [docker](https://www.docker.com/) and [singularity](https://sylabs.io/) images with all the dependencies installed, see [Docker Instructions](./docs/DOCKER_INSTRUCTIONS.md).
-
-
-## Setup
-Clone the repository and install other requirements:
-```
-git clone https://github.com/devendrachaplot/Object-Goal-Navigation/
-cd Object-Goal-Navigation/;
-pip install -r requirements.txt
-```
-
-### Downloading scene dataset
-- Download the Gibson dataset using the instructions here: https://github.com/facebookresearch/habitat-lab#scenes-datasets (download the 11GB file `gibson_habitat_trainval.zip`)
-- Move the Gibson scene dataset or create a symlink at `data/scene_datasets/gibson_semantic`. 
-
-### Downloading episode dataset
-- Download the episode dataset:
-```
-wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1tslnZAkH8m3V5nP8pbtBmaR2XEfr8Rau' -O objectnav_gibson_v1.1.zip
-```
-- Unzip the dataset into `data/datasets/objectnav/gibson/v1.1/`
-
-### Setting up datasets
-The code requires the datasets in a `data` folder in the following format (same as habitat-lab):
-```
-Object-Goal-Navigation/
-  data/
-    scene_datasets/
-      gibson_semantic/
-        Adrian.glb
-        Adrian.navmesh
-        ...
-    datasets/
-      objectnav/
-        gibson/
-          v1.1/
-            train/
-            val/
-```
-
-
-### Test setup
-To verify that the data is setup correctly, run:
-```
-python test.py --agent random -n1 --num_eval_episodes 1 --auto_gpu_config 0
-```
-
-## Usage
-
-### Training:
-For training the SemExp model on the Object Goal Navigation task:
-```
-python main.py
-```
-
-### Downloading pre-trained models
-```
-mkdir pretrained_models;
-wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=171ZA7XNu5vi3XLpuKs8DuGGZrYyuSjL0' -O pretrained_models/sem_exp.pth
-```
-
-### For evaluation: 
-For evaluating the pre-trained model:
-```
-python main.py --split val --eval 1 --load pretrained_models/sem_exp.pth
-```
-
-For visualizing the agent observations and predicted semantic map, add `-v 1` as an argument to the above command.
-
-The pre-trained model should get 0.657 Success, 0.339 SPL and 1.474 DTG.
-
-For more detailed instructions, see [INSTRUCTIONS](./docs/INSTRUCTIONS.md).
-
-
-## Cite as
->Chaplot, D.S., Gandhi, D., Gupta, A. and Salakhutdinov, R., 2020. Object Goal Navigation using Goal-Oriented Semantic Exploration. In Neural Information Processing Systems (NeurIPS-20). ([PDF](https://arxiv.org/pdf/2007.00643.pdf))
-
-### Bibtex:
-```
-@inproceedings{chaplot2020object,
-  title={Object Goal Navigation using Goal-Oriented Semantic Exploration},
-  author={Chaplot, Devendra Singh and Gandhi, Dhiraj and
-            Gupta, Abhinav and Salakhutdinov, Ruslan},
-  booktitle={In Neural Information Processing Systems (NeurIPS)},
-  year={2020}
-  }
-```
-
-## Related Projects
-- This project builds on the [Active Neural SLAM](https://devendrachaplot.github.io/projects/Neural-SLAM) paper. The code and pretrained models for the Active Neural SLAM system are available at:
-https://github.com/devendrachaplot/Neural-SLAM.
-- The Semantic Mapping module is similar to the one used in [Semantic Curiosity](https://devendrachaplot.github.io/projects/SemanticCuriosity).
-
-## Acknowledgements
-This repository uses [Habitat Lab](https://github.com/facebookresearch/habitat-lab) implementation for running the RL environment.
-The implementation of PPO is borrowed from [ikostrikov/pytorch-a2c-ppo-acktr-gail](https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/). 
-The Mask-RCNN implementation is based on the [detectron2](https://github.com/facebookresearch/detectron2/) repository. We would also like to thank Shubham Tulsiani and Saurabh Gupta for their help in implementing some parts of the code.
diff --git a/agents/sem_exp.py b/agents/sem_exp.py
deleted file mode 100644
index 80283fc..0000000
--- a/agents/sem_exp.py
+++ /dev/null
@@ -1,416 +0,0 @@
-import math
-import os
-import cv2
-import numpy as np
-import skimage.morphology
-from PIL import Image
-from torchvision import transforms
-
-from envs.utils.fmm_planner import FMMPlanner
-from envs.habitat.objectgoal_env import ObjectGoal_Env
-from agents.utils.semantic_prediction import SemanticPredMaskRCNN
-from constants import color_palette
-import envs.utils.pose as pu
-import agents.utils.visualization as vu
-
-
-class Sem_Exp_Env_Agent(ObjectGoal_Env):
-    """The Sem_Exp environment agent class. A seperate Sem_Exp_Env_Agent class
-    object is used for each environment thread.
-
-    """
-
-    def __init__(self, args, rank, config_env, dataset):
-
-        self.args = args
-        super().__init__(args, rank, config_env, dataset)
-
-        # initialize transform for RGB observations
-        self.res = transforms.Compose(
-            [transforms.ToPILImage(),
-             transforms.Resize((args.frame_height, args.frame_width),
-                               interpolation=Image.NEAREST)])
-
-        # initialize semantic segmentation prediction model
-        if args.sem_gpu_id == -1:
-            args.sem_gpu_id = config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID
-
-        self.sem_pred = SemanticPredMaskRCNN(args)
-
-        # initializations for planning:
-        self.selem = skimage.morphology.disk(3)
-
-        self.obs = None
-        self.obs_shape = None
-        self.collision_map = None
-        self.visited = None
-        self.visited_vis = None
-        self.col_width = None
-        self.curr_loc = None
-        self.last_loc = None
-        self.last_action = None
-        self.count_forward_actions = None
-
-        if args.visualize or args.print_images:
-            self.legend = cv2.imread('docs/legend.png')
-            self.vis_image = None
-            self.rgb_vis = None
-
-    def reset(self):
-        args = self.args
-
-        obs, info = super().reset()
-        obs = self._preprocess_obs(obs)
-
-        self.obs_shape = obs.shape
-
-        # Episode initializations
-        map_shape = (args.map_size_cm // args.map_resolution,
-                     args.map_size_cm // args.map_resolution)
-        self.collision_map = np.zeros(map_shape)
-        self.visited = np.zeros(map_shape)
-        self.visited_vis = np.zeros(map_shape)
-        self.col_width = 1
-        self.count_forward_actions = 0
-        self.curr_loc = [args.map_size_cm / 100.0 / 2.0,
-                         args.map_size_cm / 100.0 / 2.0, 0.]
-        self.last_action = None
-
-        if args.visualize or args.print_images:
-            self.vis_image = vu.init_vis_image(self.goal_name, self.legend)
-
-        return obs, info
-
-    def plan_act_and_preprocess(self, planner_inputs):
-        """Function responsible for planning, taking the action and
-        preprocessing observations
-
-        Args:
-            planner_inputs (dict):
-                dict with following keys:
-                    'map_pred'  (ndarray): (M, M) map prediction
-                    'goal'      (ndarray): (M, M) mat denoting goal locations
-                    'pose_pred' (ndarray): (7,) array denoting pose (x,y,o)
-                                 and planning window (gx1, gx2, gy1, gy2)
-                     'found_goal' (bool): whether the goal object is found
-
-        Returns:
-            obs (ndarray): preprocessed observations ((4+C) x H x W)
-            reward (float): amount of reward returned after previous action
-            done (bool): whether the episode has ended
-            info (dict): contains timestep, pose, goal category and
-                         evaluation metric info
-        """
-
-        # plan
-        if planner_inputs["wait"]:
-            self.last_action = None
-            self.info["sensor_pose"] = [0., 0., 0.]
-            return np.zeros(self.obs.shape), 0., False, self.info
-
-        # Reset reward if new long-term goal
-        if planner_inputs["new_goal"]:
-            self.info["g_reward"] = 0
-
-        action = self._plan(planner_inputs)
-
-        if self.args.visualize or self.args.print_images:
-            self._visualize(planner_inputs)
-
-        if action >= 0:
-
-            # act
-            action = {'action': action}
-            obs, rew, done, info = super().step(action)
-
-            # preprocess obs
-            obs = self._preprocess_obs(obs) 
-            self.last_action = action['action']
-            self.obs = obs
-            self.info = info
-
-            info['g_reward'] += rew
-
-            return obs, rew, done, info
-
-        else:
-            self.last_action = None
-            self.info["sensor_pose"] = [0., 0., 0.]
-            return np.zeros(self.obs_shape), 0., False, self.info
-
-    def _plan(self, planner_inputs):
-        """Function responsible for planning
-
-        Args:
-            planner_inputs (dict):
-                dict with following keys:
-                    'map_pred'  (ndarray): (M, M) map prediction
-                    'goal'      (ndarray): (M, M) goal locations
-                    'pose_pred' (ndarray): (7,) array  denoting pose (x,y,o)
-                                 and planning window (gx1, gx2, gy1, gy2)
-                    'found_goal' (bool): whether the goal object is found
-
-        Returns:
-            action (int): action id
-        """
-        args = self.args
-
-        self.last_loc = self.curr_loc
-
-        # Get Map prediction
-        map_pred = np.rint(planner_inputs['map_pred'])
-        goal = planner_inputs['goal']
-
-        # Get pose prediction and global policy planning window
-        start_x, start_y, start_o, gx1, gx2, gy1, gy2 = \
-            planner_inputs['pose_pred']
-        gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
-        planning_window = [gx1, gx2, gy1, gy2]
-
-        # Get curr loc
-        self.curr_loc = [start_x, start_y, start_o]
-        r, c = start_y, start_x
-        start = [int(r * 100.0 / args.map_resolution - gx1),
-                 int(c * 100.0 / args.map_resolution - gy1)]
-        start = pu.threshold_poses(start, map_pred.shape)
-
-        self.visited[gx1:gx2, gy1:gy2][start[0] - 0:start[0] + 1,
-                                       start[1] - 0:start[1] + 1] = 1
-
-        if args.visualize or args.print_images:
-            # Get last loc
-            last_start_x, last_start_y = self.last_loc[0], self.last_loc[1]
-            r, c = last_start_y, last_start_x
-            last_start = [int(r * 100.0 / args.map_resolution - gx1),
-                          int(c * 100.0 / args.map_resolution - gy1)]
-            last_start = pu.threshold_poses(last_start, map_pred.shape)
-            self.visited_vis[gx1:gx2, gy1:gy2] = \
-                vu.draw_line(last_start, start,
-                             self.visited_vis[gx1:gx2, gy1:gy2])
-
-        # Collision check
-        if self.last_action == 1:
-            x1, y1, t1 = self.last_loc
-            x2, y2, _ = self.curr_loc
-            buf = 4
-            length = 2
-
-            if abs(x1 - x2) < 0.05 and abs(y1 - y2) < 0.05:
-                self.col_width += 2
-                if self.col_width == 7:
-                    length = 4
-                    buf = 3
-                self.col_width = min(self.col_width, 5)
-            else:
-                self.col_width = 1
-
-            dist = pu.get_l2_distance(x1, x2, y1, y2)
-            if dist < args.collision_threshold:  # Collision
-                width = self.col_width
-                for i in range(length):
-                    for j in range(width):
-                        wx = x1 + 0.05 * \
-                            ((i + buf) * np.cos(np.deg2rad(t1))
-                             + (j - width // 2) * np.sin(np.deg2rad(t1)))
-                        wy = y1 + 0.05 * \
-                            ((i + buf) * np.sin(np.deg2rad(t1))
-                             - (j - width // 2) * np.cos(np.deg2rad(t1)))
-                        r, c = wy, wx
-                        r, c = int(r * 100 / args.map_resolution), \
-                            int(c * 100 / args.map_resolution)
-                        [r, c] = pu.threshold_poses([r, c],
-                                                    self.collision_map.shape)
-                        self.collision_map[r, c] = 1
-
-        stg, stop = self._get_stg(map_pred, start, np.copy(goal),
-                                  planning_window)
-
-        # Deterministic Local Policy
-        if stop and planner_inputs['found_goal'] == 1:
-            action = 0  # Stop
-        else:
-            (stg_x, stg_y) = stg
-            angle_st_goal = math.degrees(math.atan2(stg_x - start[0],
-                                                    stg_y - start[1]))
-            angle_agent = (start_o) % 360.0
-            if angle_agent > 180:
-                angle_agent -= 360
-
-            relative_angle = (angle_agent - angle_st_goal) % 360.0
-            if relative_angle > 180:
-                relative_angle -= 360
-
-            if relative_angle > self.args.turn_angle / 2.:
-                action = 3  # Right
-            elif relative_angle < -self.args.turn_angle / 2.:
-                action = 2  # Left
-            else:
-                action = 1  # Forward
-
-        return action
-
-    def _get_stg(self, grid, start, goal, planning_window):
-        """Get short-term goal"""
-
-        [gx1, gx2, gy1, gy2] = planning_window
-
-        x1, y1, = 0, 0
-        x2, y2 = grid.shape
-
-        def add_boundary(mat, value=1):
-            h, w = mat.shape
-            new_mat = np.zeros((h + 2, w + 2)) + value
-            new_mat[1:h + 1, 1:w + 1] = mat
-            return new_mat
-
-        traversible = skimage.morphology.binary_dilation(
-            grid[x1:x2, y1:y2],
-            self.selem) != True
-        traversible[self.collision_map[gx1:gx2, gy1:gy2]
-                    [x1:x2, y1:y2] == 1] = 0
-        traversible[self.visited[gx1:gx2, gy1:gy2][x1:x2, y1:y2] == 1] = 1
-
-        traversible[int(start[0] - x1) - 1:int(start[0] - x1) + 2,
-                    int(start[1] - y1) - 1:int(start[1] - y1) + 2] = 1
-
-        traversible = add_boundary(traversible)
-        goal = add_boundary(goal, value=0)
-
-        planner = FMMPlanner(traversible)
-        selem = skimage.morphology.disk(10)
-        goal = skimage.morphology.binary_dilation(
-            goal, selem) != True
-        goal = 1 - goal * 1.
-        planner.set_multi_goal(goal)
-
-        state = [start[0] - x1 + 1, start[1] - y1 + 1]
-        stg_x, stg_y, _, stop = planner.get_short_term_goal(state)
-
-        stg_x, stg_y = stg_x + x1 - 1, stg_y + y1 - 1
-
-        return (stg_x, stg_y), stop
-
-    def _preprocess_obs(self, obs, use_seg=True):
-        args = self.args
-        obs = obs.transpose(1, 2, 0)
-        rgb = obs[:, :, :3]
-        depth = obs[:, :, 3:4]
-
-        sem_seg_pred = self._get_sem_pred(
-            rgb.astype(np.uint8), use_seg=use_seg)
-        depth = self._preprocess_depth(depth, args.min_depth, args.max_depth)
-
-        ds = args.env_frame_width // args.frame_width  # Downscaling factor
-        if ds != 1:
-            rgb = np.asarray(self.res(rgb.astype(np.uint8)))
-            depth = depth[ds // 2::ds, ds // 2::ds]
-            sem_seg_pred = sem_seg_pred[ds // 2::ds, ds // 2::ds]
-
-        depth = np.expand_dims(depth, axis=2)
-        state = np.concatenate((rgb, depth, sem_seg_pred),
-                               axis=2).transpose(2, 0, 1)
-
-        return state
-
-    def _preprocess_depth(self, depth, min_d, max_d):
-        depth = depth[:, :, 0] * 1
-
-        for i in range(depth.shape[1]):
-            depth[:, i][depth[:, i] == 0.] = depth[:, i].max()
-
-        mask2 = depth > 0.99
-        depth[mask2] = 0.
-
-        mask1 = depth == 0
-        depth[mask1] = 100.0
-        depth = min_d * 100.0 + depth * max_d * 100.0
-        return depth
-
-    def _get_sem_pred(self, rgb, use_seg=True):
-        if use_seg:
-            semantic_pred, self.rgb_vis = self.sem_pred.get_prediction(rgb)
-            semantic_pred = semantic_pred.astype(np.float32)
-        else:
-            semantic_pred = np.zeros((rgb.shape[0], rgb.shape[1], 16))
-            self.rgb_vis = rgb[:, :, ::-1]
-        return semantic_pred
-
-    def _visualize(self, inputs):
-        args = self.args
-        dump_dir = "{}/dump/{}/".format(args.dump_location,
-                                        args.exp_name)
-        ep_dir = '{}/episodes/thread_{}/eps_{}/'.format(
-            dump_dir, self.rank, self.episode_no)
-        if not os.path.exists(ep_dir):
-            os.makedirs(ep_dir)
-
-        map_pred = inputs['map_pred']
-        exp_pred = inputs['exp_pred']
-        start_x, start_y, start_o, gx1, gx2, gy1, gy2 = inputs['pose_pred']
-
-        goal = inputs['goal']
-        sem_map = inputs['sem_map_pred']
-
-        gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
-
-        sem_map += 5
-
-        no_cat_mask = sem_map == 20
-        map_mask = np.rint(map_pred) == 1
-        exp_mask = np.rint(exp_pred) == 1
-        vis_mask = self.visited_vis[gx1:gx2, gy1:gy2] == 1
-
-        sem_map[no_cat_mask] = 0
-        m1 = np.logical_and(no_cat_mask, exp_mask)
-        sem_map[m1] = 2
-
-        m2 = np.logical_and(no_cat_mask, map_mask)
-        sem_map[m2] = 1
-
-        sem_map[vis_mask] = 3
-
-        selem = skimage.morphology.disk(4)
-        goal_mat = 1 - skimage.morphology.binary_dilation(
-            goal, selem) != True
-
-        goal_mask = goal_mat == 1
-        sem_map[goal_mask] = 4
-
-        color_pal = [int(x * 255.) for x in color_palette]
-        sem_map_vis = Image.new("P", (sem_map.shape[1],
-                                      sem_map.shape[0]))
-        sem_map_vis.putpalette(color_pal)
-        sem_map_vis.putdata(sem_map.flatten().astype(np.uint8))
-        sem_map_vis = sem_map_vis.convert("RGB")
-        sem_map_vis = np.flipud(sem_map_vis)
-
-        sem_map_vis = sem_map_vis[:, :, [2, 1, 0]]
-        sem_map_vis = cv2.resize(sem_map_vis, (480, 480),
-                                 interpolation=cv2.INTER_NEAREST)
-        self.vis_image[50:530, 15:655] = self.rgb_vis
-        self.vis_image[50:530, 670:1150] = sem_map_vis
-
-        pos = (
-            (start_x * 100. / args.map_resolution - gy1)
-            * 480 / map_pred.shape[0],
-            (map_pred.shape[1] - start_y * 100. / args.map_resolution + gx1)
-            * 480 / map_pred.shape[1],
-            np.deg2rad(-start_o)
-        )
-
-        agent_arrow = vu.get_contour_points(pos, origin=(670, 50))
-        color = (int(color_palette[11] * 255),
-                 int(color_palette[10] * 255),
-                 int(color_palette[9] * 255))
-        cv2.drawContours(self.vis_image, [agent_arrow], 0, color, -1)
-
-        if args.visualize:
-            # Displaying the image
-            cv2.imshow("Thread {}".format(self.rank), self.vis_image)
-            cv2.waitKey(1)
-
-        if args.print_images:
-            fn = '{}/episodes/thread_{}/eps_{}/{}-{}-Vis-{}.png'.format(
-                dump_dir, self.rank, self.episode_no,
-                self.rank, self.episode_no, self.timestep)
-            cv2.imwrite(fn, self.vis_image)
diff --git a/algo/__init__.py b/algo/__init__.py
deleted file mode 100644
index 91ac6f5..0000000
--- a/algo/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from .ppo import PPO
diff --git a/algo/ppo.py b/algo/ppo.py
deleted file mode 100644
index e2ea796..0000000
--- a/algo/ppo.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# The following code is largely borrowed from:
-# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/algo/ppo.py
-
-import torch
-import torch.nn as nn
-import torch.optim as optim
-
-
-class PPO():
-
-    def __init__(
-            self,
-            actor_critic,
-            clip_param,
-            ppo_epoch,
-            num_mini_batch,
-            value_loss_coef,
-            entropy_coef,
-            lr=None,
-            eps=None,
-            max_grad_norm=None,
-            use_clipped_value_loss=True):
-
-        self.actor_critic = actor_critic
-
-        self.clip_param = clip_param
-        self.ppo_epoch = ppo_epoch
-        self.num_mini_batch = num_mini_batch
-
-        self.value_loss_coef = value_loss_coef
-        self.entropy_coef = entropy_coef
-
-        self.max_grad_norm = max_grad_norm
-        self.use_clipped_value_loss = use_clipped_value_loss
-
-        self.optimizer = optim.Adam(filter(lambda p: p.requires_grad,
-                                           actor_critic.parameters()),
-                                    lr=lr, eps=eps)
-
-    def update(self, rollouts):
-        advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1]
-        advantages = (advantages - advantages.mean()) / (
-            advantages.std() + 1e-5)
-
-        value_loss_epoch = 0
-        action_loss_epoch = 0
-        dist_entropy_epoch = 0
-
-        for _ in range(self.ppo_epoch):
-
-            if self.actor_critic.is_recurrent:
-                data_generator = rollouts.recurrent_generator(
-                    advantages, self.num_mini_batch)
-            else:
-                data_generator = rollouts.feed_forward_generator(
-                    advantages, self.num_mini_batch)
-
-            for sample in data_generator:
-
-                value_preds = sample['value_preds']
-                returns = sample['returns']
-                adv_targ = sample['adv_targ']
-
-                # Reshape to do in a single forward pass for all steps
-                values, action_log_probs, dist_entropy, _ = \
-                    self.actor_critic.evaluate_actions(
-                        sample['obs'], sample['rec_states'],
-                        sample['masks'], sample['actions'],
-                        extras=sample['extras']
-                    )
-
-                ratio = torch.exp(action_log_probs -
-                                  sample['old_action_log_probs'])
-                surr1 = ratio * adv_targ
-                surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
-                                    1.0 + self.clip_param) * adv_targ
-                action_loss = -torch.min(surr1, surr2).mean()
-
-                if self.use_clipped_value_loss:
-                    value_pred_clipped = value_preds + \
-                        (values - value_preds).clamp(
-                            -self.clip_param, self.clip_param)
-                    value_losses = (values - returns).pow(2)
-                    value_losses_clipped = (value_pred_clipped
-                                            - returns).pow(2)
-                    value_loss = .5 * torch.max(value_losses,
-                                                value_losses_clipped).mean()
-                else:
-                    value_loss = 0.5 * (returns - values).pow(2).mean()
-
-                self.optimizer.zero_grad()
-                (value_loss * self.value_loss_coef + action_loss -
-                 dist_entropy * self.entropy_coef).backward()
-                nn.utils.clip_grad_norm_(self.actor_critic.parameters(),
-                                         self.max_grad_norm)
-                self.optimizer.step()
-
-                value_loss_epoch += value_loss.item()
-                action_loss_epoch += action_loss.item()
-                dist_entropy_epoch += dist_entropy.item()
-
-        num_updates = self.ppo_epoch * self.num_mini_batch
-
-        value_loss_epoch /= num_updates
-        action_loss_epoch /= num_updates
-        dist_entropy_epoch /= num_updates
-
-        return value_loss_epoch, action_loss_epoch, dist_entropy_epoch
diff --git a/arguments.py b/arguments.py
deleted file mode 100644
index baafb20..0000000
--- a/arguments.py
+++ /dev/null
@@ -1,230 +0,0 @@
-import argparse
-import torch
-
-
-def get_args():
-    parser = argparse.ArgumentParser(
-        description='Goal-Oriented-Semantic-Exploration')
-
-    # General Arguments
-    parser.add_argument('--seed', type=int, default=1,
-                        help='random seed (default: 1)')
-    parser.add_argument('--auto_gpu_config', type=int, default=1)
-    parser.add_argument('--total_num_scenes', type=str, default="auto")
-    parser.add_argument('-n', '--num_processes', type=int, default=5,
-                        help="""how many training processes to use (default:5)
-                                Overridden when auto_gpu_config=1
-                                and training on gpus""")
-    parser.add_argument('--num_processes_per_gpu', type=int, default=6)
-    parser.add_argument('--num_processes_on_first_gpu', type=int, default=1)
-    parser.add_argument('--eval', type=int, default=0,
-                        help='0: Train, 1: Evaluate (default: 0)')
-    parser.add_argument('--num_training_frames', type=int, default=10000000,
-                        help='total number of training frames')
-    parser.add_argument('--num_eval_episodes', type=int, default=200,
-                        help="number of test episodes per scene")
-    parser.add_argument('--num_train_episodes', type=int, default=10000,
-                        help="""number of train episodes per scene
-                                before loading the next scene""")
-    parser.add_argument('--no_cuda', action='store_true', default=False,
-                        help='disables CUDA training')
-    parser.add_argument("--sim_gpu_id", type=int, default=0,
-                        help="gpu id on which scenes are loaded")
-    parser.add_argument("--sem_gpu_id", type=int, default=-1,
-                        help="""gpu id for semantic model,
-                                -1: same as sim gpu, -2: cpu""")
-
-    # Logging, loading models, visualization
-    parser.add_argument('--log_interval', type=int, default=10,
-                        help="""log interval, one log per n updates
-                                (default: 10) """)
-    parser.add_argument('--save_interval', type=int, default=1,
-                        help="""save interval""")
-    parser.add_argument('-d', '--dump_location', type=str, default="./tmp/",
-                        help='path to dump models and log (default: ./tmp/)')
-    parser.add_argument('--exp_name', type=str, default="exp1",
-                        help='experiment name (default: exp1)')
-    parser.add_argument('--save_periodic', type=int, default=500000,
-                        help='Model save frequency in number of updates')
-    parser.add_argument('--load', type=str, default="0",
-                        help="""model path to load,
-                                0 to not reload (default: 0)""")
-    parser.add_argument('-v', '--visualize', type=int, default=0,
-                        help="""1: Render the observation and
-                                   the predicted semantic map,
-                                2: Render the observation with semantic
-                                   predictions and the predicted semantic map
-                                (default: 0)""")
-    parser.add_argument('--print_images', type=int, default=0,
-                        help='1: save visualization as images')
-
-    # Environment, dataset and episode specifications
-    parser.add_argument('-efw', '--env_frame_width', type=int, default=640,
-                        help='Frame width (default:640)')
-    parser.add_argument('-efh', '--env_frame_height', type=int, default=480,
-                        help='Frame height (default:480)')
-    parser.add_argument('-fw', '--frame_width', type=int, default=160,
-                        help='Frame width (default:160)')
-    parser.add_argument('-fh', '--frame_height', type=int, default=120,
-                        help='Frame height (default:120)')
-    parser.add_argument('-el', '--max_episode_length', type=int, default=500,
-                        help="""Maximum episode length""")
-    parser.add_argument("--task_config", type=str,
-                        default="tasks/objectnav_gibson.yaml",
-                        help="path to config yaml containing task information")
-    parser.add_argument("--split", type=str, default="train",
-                        help="dataset split (train | val | val_mini) ")
-    parser.add_argument('--camera_height', type=float, default=0.88,
-                        help="agent camera height in metres")
-    parser.add_argument('--hfov', type=float, default=79.0,
-                        help="horizontal field of view in degrees")
-    parser.add_argument('--turn_angle', type=float, default=30,
-                        help="Agent turn angle in degrees")
-    parser.add_argument('--min_depth', type=float, default=0.5,
-                        help="Minimum depth for depth sensor in meters")
-    parser.add_argument('--max_depth', type=float, default=5.0,
-                        help="Maximum depth for depth sensor in meters")
-    parser.add_argument('--success_dist', type=float, default=1.0,
-                        help="success distance threshold in meters")
-    parser.add_argument('--floor_thr', type=int, default=50,
-                        help="floor threshold in cm")
-    parser.add_argument('--min_d', type=float, default=1.5,
-                        help="min distance to goal during training in meters")
-    parser.add_argument('--max_d', type=float, default=100.0,
-                        help="max distance to goal during training in meters")
-    parser.add_argument('--version', type=str, default="v1.1",
-                        help="dataset version")
-
-    # Model Hyperparameters
-    parser.add_argument('--agent', type=str, default="sem_exp")
-    parser.add_argument('--lr', type=float, default=2.5e-5,
-                        help='learning rate (default: 2.5e-5)')
-    parser.add_argument('--global_hidden_size', type=int, default=256,
-                        help='global_hidden_size')
-    parser.add_argument('--eps', type=float, default=1e-5,
-                        help='RL Optimizer epsilon (default: 1e-5)')
-    parser.add_argument('--alpha', type=float, default=0.99,
-                        help='RL Optimizer alpha (default: 0.99)')
-    parser.add_argument('--gamma', type=float, default=0.99,
-                        help='discount factor for rewards (default: 0.99)')
-    parser.add_argument('--use_gae', action='store_true', default=False,
-                        help='use generalized advantage estimation')
-    parser.add_argument('--tau', type=float, default=0.95,
-                        help='gae parameter (default: 0.95)')
-    parser.add_argument('--entropy_coef', type=float, default=0.001,
-                        help='entropy term coefficient (default: 0.01)')
-    parser.add_argument('--value_loss_coef', type=float, default=0.5,
-                        help='value loss coefficient (default: 0.5)')
-    parser.add_argument('--max_grad_norm', type=float, default=0.5,
-                        help='max norm of gradients (default: 0.5)')
-    parser.add_argument('--num_global_steps', type=int, default=20,
-                        help='number of forward steps in A2C (default: 5)')
-    parser.add_argument('--ppo_epoch', type=int, default=4,
-                        help='number of ppo epochs (default: 4)')
-    parser.add_argument('--num_mini_batch', type=str, default="auto",
-                        help='number of batches for ppo (default: 32)')
-    parser.add_argument('--clip_param', type=float, default=0.2,
-                        help='ppo clip parameter (default: 0.2)')
-    parser.add_argument('--use_recurrent_global', type=int, default=0,
-                        help='use a recurrent global policy')
-    parser.add_argument('--num_local_steps', type=int, default=25,
-                        help="""Number of steps the local policy
-                                between each global step""")
-    parser.add_argument('--reward_coeff', type=float, default=0.1,
-                        help="Object goal reward coefficient")
-    parser.add_argument('--intrinsic_rew_coeff', type=float, default=0.02,
-                        help="intrinsic exploration reward coefficient")
-    parser.add_argument('--num_sem_categories', type=float, default=16)
-    parser.add_argument('--sem_pred_prob_thr', type=float, default=0.9,
-                        help="Semantic prediction confidence threshold")
-
-    # Mapping
-    parser.add_argument('--global_downscaling', type=int, default=2)
-    parser.add_argument('--vision_range', type=int, default=100)
-    parser.add_argument('--map_resolution', type=int, default=5)
-    parser.add_argument('--du_scale', type=int, default=1)
-    parser.add_argument('--map_size_cm', type=int, default=2400)
-    parser.add_argument('--cat_pred_threshold', type=float, default=5.0)
-    parser.add_argument('--map_pred_threshold', type=float, default=1.0)
-    parser.add_argument('--exp_pred_threshold', type=float, default=1.0)
-    parser.add_argument('--collision_threshold', type=float, default=0.20)
-
-    # parse arguments
-    args = parser.parse_args()
-
-    args.cuda = not args.no_cuda and torch.cuda.is_available()
-
-    if args.cuda:
-        if args.auto_gpu_config:
-            num_gpus = torch.cuda.device_count()
-            if args.total_num_scenes != "auto":
-                args.total_num_scenes = int(args.total_num_scenes)
-            elif "objectnav_gibson" in args.task_config and \
-                    "train" in args.split:
-                args.total_num_scenes = 25
-            elif "objectnav_gibson" in args.task_config and \
-                    "val" in args.split:
-                args.total_num_scenes = 5
-            else:
-                assert False, "Unknown task config, please specify" + \
-                    " total_num_scenes"
-
-            # GPU Memory required for the SemExp model:
-            #       0.8 + 0.4 * args.total_num_scenes (GB)
-            # GPU Memory required per thread: 2.6 (GB)
-            min_memory_required = max(0.8 + 0.4 * args.total_num_scenes, 2.6)
-            # Automatically configure number of training threads based on
-            # number of GPUs available and GPU memory size
-            gpu_memory = 1000
-            for i in range(num_gpus):
-                gpu_memory = min(gpu_memory,
-                                 torch.cuda.get_device_properties(
-                                     i).total_memory
-                                 / 1024 / 1024 / 1024)
-                assert gpu_memory > min_memory_required, \
-                    """Insufficient GPU memory for GPU {}, gpu memory ({}GB)
-                    needs to be greater than {}GB""".format(
-                        i, gpu_memory, min_memory_required)
-
-            num_processes_per_gpu = int(gpu_memory / 2.6)
-            num_processes_on_first_gpu = \
-                int((gpu_memory - min_memory_required) / 2.6)
-
-            if args.eval:
-                max_threads = num_processes_per_gpu * (num_gpus - 1) \
-                    + num_processes_on_first_gpu
-                assert max_threads >= args.total_num_scenes, \
-                    """Insufficient GPU memory for evaluation"""
-
-            if num_gpus == 1:
-                args.num_processes_on_first_gpu = num_processes_on_first_gpu
-                args.num_processes_per_gpu = 0
-                args.num_processes = num_processes_on_first_gpu
-                assert args.num_processes > 0, "Insufficient GPU memory"
-            else:
-                num_threads = num_processes_per_gpu * (num_gpus - 1) \
-                    + num_processes_on_first_gpu
-                num_threads = min(num_threads, args.total_num_scenes)
-                args.num_processes_per_gpu = num_processes_per_gpu
-                args.num_processes_on_first_gpu = max(
-                    0,
-                    num_threads - args.num_processes_per_gpu * (num_gpus - 1))
-                args.num_processes = num_threads
-
-            args.sim_gpu_id = 1
-
-            print("Auto GPU config:")
-            print("Number of processes: {}".format(args.num_processes))
-            print("Number of processes on GPU 0: {}".format(
-                args.num_processes_on_first_gpu))
-            print("Number of processes per GPU: {}".format(
-                args.num_processes_per_gpu))
-    else:
-        args.sem_gpu_id = -2
-
-    if args.num_mini_batch == "auto":
-        args.num_mini_batch = max(args.num_processes // 2, 1)
-    else:
-        args.num_mini_batch = int(args.num_mini_batch)
-
-    return args
diff --git a/configs/Base-RCNN-FPN.yaml b/configs/Base-RCNN-FPN.yaml
deleted file mode 100644
index 3e020f2..0000000
--- a/configs/Base-RCNN-FPN.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-MODEL:
-  META_ARCHITECTURE: "GeneralizedRCNN"
-  BACKBONE:
-    NAME: "build_resnet_fpn_backbone"
-  RESNETS:
-    OUT_FEATURES: ["res2", "res3", "res4", "res5"]
-  FPN:
-    IN_FEATURES: ["res2", "res3", "res4", "res5"]
-  ANCHOR_GENERATOR:
-    SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
-    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
-  RPN:
-    IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
-    PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
-    PRE_NMS_TOPK_TEST: 1000  # Per FPN level
-    # Detectron1 uses 2000 proposals per-batch,
-    # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
-    # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
-    POST_NMS_TOPK_TRAIN: 1000
-    POST_NMS_TOPK_TEST: 1000
-  ROI_HEADS:
-    NAME: "StandardROIHeads"
-    IN_FEATURES: ["p2", "p3", "p4", "p5"]
-  ROI_BOX_HEAD:
-    NAME: "FastRCNNConvFCHead"
-    NUM_FC: 2
-    POOLER_RESOLUTION: 7
-  ROI_MASK_HEAD:
-    NAME: "MaskRCNNConvUpsampleHead"
-    NUM_CONV: 4
-    POOLER_RESOLUTION: 14
-DATASETS:
-  TRAIN: ("coco_2017_train",)
-  TEST: ("coco_2017_val",)
-SOLVER:
-  IMS_PER_BATCH: 16
-  BASE_LR: 0.02
-  STEPS: (60000, 80000)
-  MAX_ITER: 90000
-INPUT:
-  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
-VERSION: 2
diff --git a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml b/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
deleted file mode 100644
index be7d06b..0000000
--- a/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-_BASE_: "../Base-RCNN-FPN.yaml"
-MODEL:
-  WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
-  MASK_ON: True
-  RESNETS:
-    DEPTH: 50
-SOLVER:
-  STEPS: (210000, 250000)
-  MAX_ITER: 270000
diff --git a/constants.py b/constants.py
deleted file mode 100644
index 1f0179e..0000000
--- a/constants.py
+++ /dev/null
@@ -1,94 +0,0 @@
-scenes = {}
-scenes["train"] = [
-    'Allensville',
-    'Beechwood',
-    'Benevolence',
-    'Coffeen',
-    'Cosmos',
-    'Forkland',
-    'Hanson',
-    'Hiteman',
-    'Klickitat',
-    'Lakeville',
-    'Leonardo',
-    'Lindenwood',
-    'Marstons',
-    'Merom',
-    'Mifflinburg',
-    'Newfields',
-    'Onaga',
-    'Pinesdale',
-    'Pomaria',
-    'Ranchester',
-    'Shelbyville',
-    'Stockman',
-    'Tolstoy',
-    'Wainscott',
-    'Woodbine',
-]
-
-scenes["val"] = [
-    'Collierville',
-    'Corozal',
-    'Darden',
-    'Markleeville',
-    'Wiconisco',
-]
-
-coco_categories = {
-    "chair": 0,
-    "couch": 1,
-    "potted plant": 2,
-    "bed": 3,
-    "toilet": 4,
-    "tv": 5,
-    "dining-table": 6,
-    "oven": 7,
-    "sink": 8,
-    "refrigerator": 9,
-    "book": 10,
-    "clock": 11,
-    "vase": 12,
-    "cup": 13,
-    "bottle": 14
-}
-
-coco_categories_mapping = {
-    56: 0,  # chair
-    57: 1,  # couch
-    58: 2,  # potted plant
-    59: 3,  # bed
-    61: 4,  # toilet
-    62: 5,  # tv
-    60: 6,  # dining-table
-    69: 7,  # oven
-    71: 8,  # sink
-    72: 9,  # refrigerator
-    73: 10,  # book
-    74: 11,  # clock
-    75: 12,  # vase
-    41: 13,  # cup
-    39: 14,  # bottle
-}
-
-color_palette = [
-    1.0, 1.0, 1.0,
-    0.6, 0.6, 0.6,
-    0.95, 0.95, 0.95,
-    0.96, 0.36, 0.26,
-    0.12156862745098039, 0.47058823529411764, 0.7058823529411765,
-    0.9400000000000001, 0.7818, 0.66,
-    0.9400000000000001, 0.8868, 0.66,
-    0.8882000000000001, 0.9400000000000001, 0.66,
-    0.7832000000000001, 0.9400000000000001, 0.66,
-    0.6782000000000001, 0.9400000000000001, 0.66,
-    0.66, 0.9400000000000001, 0.7468000000000001,
-    0.66, 0.9400000000000001, 0.8518000000000001,
-    0.66, 0.9232, 0.9400000000000001,
-    0.66, 0.8182, 0.9400000000000001,
-    0.66, 0.7132, 0.9400000000000001,
-    0.7117999999999999, 0.66, 0.9400000000000001,
-    0.8168, 0.66, 0.9400000000000001,
-    0.9218, 0.66, 0.9400000000000001,
-    0.9400000000000001, 0.66, 0.8531999999999998,
-    0.9400000000000001, 0.66, 0.748199999999999]
diff --git a/envs/habitat/__init__.py b/envs/habitat/__init__.py
deleted file mode 100644
index e04b9ed..0000000
--- a/envs/habitat/__init__.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# Parts of the code in this file have been borrowed from:
-#    https://github.com/facebookresearch/habitat-api
-import os
-import numpy as np
-import torch
-from habitat.config.default import get_config as cfg_env
-from habitat.datasets.pointnav.pointnav_dataset import PointNavDatasetV1
-from habitat import Config, Env, RLEnv, VectorEnv, make_dataset
-
-from agents.sem_exp import Sem_Exp_Env_Agent
-from .objectgoal_env import ObjectGoal_Env
-
-from .utils.vector_env import VectorEnv
-
-
-def make_env_fn(args, config_env, rank):
-    dataset = make_dataset(config_env.DATASET.TYPE, config=config_env.DATASET)
-    config_env.defrost()
-    config_env.SIMULATOR.SCENE = dataset.episodes[0].scene_id
-    config_env.freeze()
-
-    if args.agent == "sem_exp":
-        env = Sem_Exp_Env_Agent(args=args, rank=rank,
-                                config_env=config_env,
-                                dataset=dataset
-                                )
-    else:
-        env = ObjectGoal_Env(args=args, rank=rank,
-                             config_env=config_env,
-                             dataset=dataset
-                             )
-
-    env.seed(rank)
-    return env
-
-
-def _get_scenes_from_folder(content_dir):
-    scene_dataset_ext = ".glb.json.gz"
-    scenes = []
-    for filename in os.listdir(content_dir):
-        if filename.endswith(scene_dataset_ext):
-            scene = filename[: -len(scene_dataset_ext) + 4]
-            scenes.append(scene)
-    scenes.sort()
-    return scenes
-
-
-def construct_envs(args):
-    env_configs = []
-    args_list = []
-
-    basic_config = cfg_env(config_paths=["envs/habitat/configs/"
-                                         + args.task_config])
-    basic_config.defrost()
-    basic_config.DATASET.SPLIT = args.split
-    basic_config.DATASET.DATA_PATH = \
-        basic_config.DATASET.DATA_PATH.replace("v1", args.version)
-    basic_config.DATASET.EPISODES_DIR = \
-        basic_config.DATASET.EPISODES_DIR.replace("v1", args.version)
-    basic_config.freeze()
-
-    scenes = basic_config.DATASET.CONTENT_SCENES
-    if "*" in basic_config.DATASET.CONTENT_SCENES:
-        content_dir = os.path.join(basic_config.DATASET.EPISODES_DIR.format(
-            split=args.split), "content")
-        scenes = _get_scenes_from_folder(content_dir)
-
-    if len(scenes) > 0:
-        assert len(scenes) >= args.num_processes, (
-            "reduce the number of processes as there "
-            "aren't enough number of scenes"
-        )
-
-        scene_split_sizes = [int(np.floor(len(scenes) / args.num_processes))
-                             for _ in range(args.num_processes)]
-        for i in range(len(scenes) % args.num_processes):
-            scene_split_sizes[i] += 1
-
-    print("Scenes per thread:")
-    for i in range(args.num_processes):
-        config_env = cfg_env(config_paths=["envs/habitat/configs/"
-                                           + args.task_config])
-        config_env.defrost()
-
-        if len(scenes) > 0:
-            config_env.DATASET.CONTENT_SCENES = scenes[
-                sum(scene_split_sizes[:i]):
-                sum(scene_split_sizes[:i + 1])
-            ]
-            print("Thread {}: {}".format(i, config_env.DATASET.CONTENT_SCENES))
-
-        if i < args.num_processes_on_first_gpu:
-            gpu_id = 0
-        else:
-            gpu_id = int((i - args.num_processes_on_first_gpu)
-                         // args.num_processes_per_gpu) + args.sim_gpu_id
-        gpu_id = min(torch.cuda.device_count() - 1, gpu_id)
-        config_env.SIMULATOR.HABITAT_SIM_V0.GPU_DEVICE_ID = gpu_id
-
-        agent_sensors = []
-        agent_sensors.append("RGB_SENSOR")
-        agent_sensors.append("DEPTH_SENSOR")
-        # agent_sensors.append("SEMANTIC_SENSOR")
-
-        config_env.SIMULATOR.AGENT_0.SENSORS = agent_sensors
-
-        # Reseting episodes manually, setting high max episode length in sim
-        config_env.ENVIRONMENT.MAX_EPISODE_STEPS = 10000000
-        config_env.ENVIRONMENT.ITERATOR_OPTIONS.SHUFFLE = False
-
-        config_env.SIMULATOR.RGB_SENSOR.WIDTH = args.env_frame_width
-        config_env.SIMULATOR.RGB_SENSOR.HEIGHT = args.env_frame_height
-        config_env.SIMULATOR.RGB_SENSOR.HFOV = args.hfov
-        config_env.SIMULATOR.RGB_SENSOR.POSITION = [0, args.camera_height, 0]
-
-        config_env.SIMULATOR.DEPTH_SENSOR.WIDTH = args.env_frame_width
-        config_env.SIMULATOR.DEPTH_SENSOR.HEIGHT = args.env_frame_height
-        config_env.SIMULATOR.DEPTH_SENSOR.HFOV = args.hfov
-        config_env.SIMULATOR.DEPTH_SENSOR.MIN_DEPTH = args.min_depth
-        config_env.SIMULATOR.DEPTH_SENSOR.MAX_DEPTH = args.max_depth
-        config_env.SIMULATOR.DEPTH_SENSOR.POSITION = [0, args.camera_height, 0]
-
-        # config_env.SIMULATOR.SEMANTIC_SENSOR.WIDTH = args.env_frame_width
-        # config_env.SIMULATOR.SEMANTIC_SENSOR.HEIGHT = args.env_frame_height
-        # config_env.SIMULATOR.SEMANTIC_SENSOR.HFOV = args.hfov
-        # config_env.SIMULATOR.SEMANTIC_SENSOR.POSITION = \
-        #     [0, args.camera_height, 0]
-
-        config_env.SIMULATOR.TURN_ANGLE = args.turn_angle
-        config_env.DATASET.SPLIT = args.split
-        config_env.DATASET.DATA_PATH = \
-            config_env.DATASET.DATA_PATH.replace("v1", args.version)
-        config_env.DATASET.EPISODES_DIR = \
-            config_env.DATASET.EPISODES_DIR.replace("v1", args.version)
-
-        config_env.freeze()
-        env_configs.append(config_env)
-
-        args_list.append(args)
-
-    envs = VectorEnv(
-        make_env_fn=make_env_fn,
-        env_fn_args=tuple(
-            tuple(
-                zip(args_list, env_configs, range(args.num_processes))
-            )
-        ),
-    )
-
-    return envs
diff --git a/envs/habitat/configs/tasks/objectnav_gibson.yaml b/envs/habitat/configs/tasks/objectnav_gibson.yaml
deleted file mode 100644
index d0b7d92..0000000
--- a/envs/habitat/configs/tasks/objectnav_gibson.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-ENVIRONMENT:
-  MAX_EPISODE_STEPS: 500
-SIMULATOR:
-  TURN_ANGLE: 30
-  TILT_ANGLE: 30
-  ACTION_SPACE_CONFIG: "v1"
-  AGENT_0:
-    SENSORS: ['RGB_SENSOR', 'DEPTH_SENSOR', 'SEMANTIC_SENSOR']
-    HEIGHT: 0.88
-    RADIUS: 0.18
-  HABITAT_SIM_V0:
-    GPU_DEVICE_ID: 0
-    ALLOW_SLIDING: True 
-  SEMANTIC_SENSOR:
-    WIDTH: 640
-    HEIGHT: 480
-    HFOV: 79
-    POSITION: [0, 0.88, 0]
-  RGB_SENSOR:
-    WIDTH: 640
-    HEIGHT: 480
-    HFOV: 79
-    POSITION: [0, 0.88, 0]
-  DEPTH_SENSOR:
-    WIDTH: 640
-    HEIGHT: 480
-    HFOV: 79
-    MIN_DEPTH: 0.5
-    MAX_DEPTH: 5.0
-    POSITION: [0, 0.88, 0]
-TASK:
-  TYPE: ObjectNav-v1
-  POSSIBLE_ACTIONS: ["STOP", "MOVE_FORWARD", "TURN_LEFT", "TURN_RIGHT", "LOOK_UP", "LOOK_DOWN"]
-  SENSORS: ['GPS_SENSOR', 'COMPASS_SENSOR']
-  MEASUREMENTS: ['DISTANCE_TO_GOAL', 'SUCCESS', 'SPL']
-  SUCCESS:
-    SUCCESS_DISTANCE: 0.2
-
-DATASET:
-  TYPE: PointNav-v1
-  SPLIT: train
-  DATA_PATH: "data/datasets/objectnav/gibson/v1/{split}/{split}.json.gz"
-  EPISODES_DIR: "data/datasets/objectnav/gibson/v1/{split}/"
-  SCENES_DIR: "data/scene_datasets/"
diff --git a/envs/habitat/objectgoal_env.py b/envs/habitat/objectgoal_env.py
deleted file mode 100644
index a08dd55..0000000
--- a/envs/habitat/objectgoal_env.py
+++ /dev/null
@@ -1,465 +0,0 @@
-import json
-import bz2
-import gzip
-import _pickle as cPickle
-import gym
-import numpy as np
-import quaternion
-import skimage.morphology
-import habitat
-
-from envs.utils.fmm_planner import FMMPlanner
-from constants import coco_categories
-import envs.utils.pose as pu
-
-
-class ObjectGoal_Env(habitat.RLEnv):
-    """The Object Goal Navigation environment class. The class is responsible
-    for loading the dataset, generating episodes, and computing evaluation
-    metrics.
-    """
-
-    def __init__(self, args, rank, config_env, dataset):
-        self.args = args
-        self.rank = rank
-
-        super().__init__(config_env, dataset)
-
-        # Loading dataset info file
-        self.split = config_env.DATASET.SPLIT
-        self.episodes_dir = config_env.DATASET.EPISODES_DIR.format(
-            split=self.split)
-
-        dataset_info_file = self.episodes_dir + \
-            "{split}_info.pbz2".format(split=self.split)
-        with bz2.BZ2File(dataset_info_file, 'rb') as f:
-            self.dataset_info = cPickle.load(f)
-
-        # Specifying action and observation space
-        self.action_space = gym.spaces.Discrete(3)
-
-        self.observation_space = gym.spaces.Box(0, 255,
-                                                (3, args.frame_height,
-                                                 args.frame_width),
-                                                dtype='uint8')
-
-        # Initializations
-        self.episode_no = 0
-
-        # Scene info
-        self.last_scene_path = None
-        self.scene_path = None
-        self.scene_name = None
-
-        # Episode Dataset info
-        self.eps_data = None
-        self.eps_data_idx = None
-        self.gt_planner = None
-        self.object_boundary = None
-        self.goal_idx = None
-        self.goal_name = None
-        self.map_obj_origin = None
-        self.starting_loc = None
-        self.starting_distance = None
-
-        # Episode tracking info
-        self.curr_distance = None
-        self.prev_distance = None
-        self.timestep = None
-        self.stopped = None
-        self.path_length = None
-        self.last_sim_location = None
-        self.trajectory_states = []
-        self.info = {}
-        self.info['distance_to_goal'] = None
-        self.info['spl'] = None
-        self.info['success'] = None
-
-    def load_new_episode(self):
-        """The function loads a fixed episode from the episode dataset. This
-        function is used for evaluating a trained model on the val split.
-        """
-
-        args = self.args
-        self.scene_path = self.habitat_env.sim.config.SCENE
-        scene_name = self.scene_path.split("/")[-1].split(".")[0]
-
-        if self.scene_path != self.last_scene_path:
-            episodes_file = self.episodes_dir + \
-                "content/{}_episodes.json.gz".format(scene_name)
-
-            print("Loading episodes from: {}".format(episodes_file))
-            with gzip.open(episodes_file, 'r') as f:
-                self.eps_data = json.loads(
-                    f.read().decode('utf-8'))["episodes"]
-
-            self.eps_data_idx = 0
-            self.last_scene_path = self.scene_path
-
-        # Load episode info
-        episode = self.eps_data[self.eps_data_idx]
-        self.eps_data_idx += 1
-        self.eps_data_idx = self.eps_data_idx % len(self.eps_data)
-        pos = episode["start_position"]
-        rot = quaternion.from_float_array(episode["start_rotation"])
-
-        goal_name = episode["object_category"]
-        goal_idx = episode["object_id"]
-        floor_idx = episode["floor_id"]
-
-        # Load scene info
-        scene_info = self.dataset_info[scene_name]
-        sem_map = scene_info[floor_idx]['sem_map']
-        map_obj_origin = scene_info[floor_idx]['origin']
-
-        # Setup ground truth planner
-        object_boundary = args.success_dist
-        map_resolution = args.map_resolution
-        selem = skimage.morphology.disk(2)
-        traversible = skimage.morphology.binary_dilation(
-            sem_map[0], selem) != True
-        traversible = 1 - traversible
-        planner = FMMPlanner(traversible)
-        selem = skimage.morphology.disk(
-            int(object_boundary * 100. / map_resolution))
-        goal_map = skimage.morphology.binary_dilation(
-            sem_map[goal_idx + 1], selem) != True
-        goal_map = 1 - goal_map
-        planner.set_multi_goal(goal_map)
-
-        # Get starting loc in GT map coordinates
-        x = -pos[2]
-        y = -pos[0]
-        min_x, min_y = map_obj_origin / 100.0
-        map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.)
-
-        self.gt_planner = planner
-        self.starting_loc = map_loc
-        self.object_boundary = object_boundary
-        self.goal_idx = goal_idx
-        self.goal_name = goal_name
-        self.map_obj_origin = map_obj_origin
-
-        self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc]\
-            / 20.0 + self.object_boundary
-        self.prev_distance = self.starting_distance
-        self._env.sim.set_agent_state(pos, rot)
-
-        # The following two should match approximately
-        # print(starting_loc)
-        # print(self.sim_continuous_to_sim_map(self.get_sim_location()))
-
-        obs = self._env.sim.get_observations_at(pos, rot)
-
-        return obs
-
-    def generate_new_episode(self):
-        """The function generates a random valid episode. This function is used
-        for training a model on the train split.
-        """
-
-        args = self.args
-
-        self.scene_path = self.habitat_env.sim.config.SCENE
-        scene_name = self.scene_path.split("/")[-1].split(".")[0]
-
-        scene_info = self.dataset_info[scene_name]
-        map_resolution = args.map_resolution
-
-        floor_idx = np.random.randint(len(scene_info.keys()))
-        floor_height = scene_info[floor_idx]['floor_height']
-        sem_map = scene_info[floor_idx]['sem_map']
-        map_obj_origin = scene_info[floor_idx]['origin']
-
-        cat_counts = sem_map.sum(2).sum(1)
-        possible_cats = list(np.arange(6))
-
-        for i in range(6):
-            if cat_counts[i + 1] == 0:
-                possible_cats.remove(i)
-
-        object_boundary = args.success_dist
-
-        loc_found = False
-        while not loc_found:
-            if len(possible_cats) == 0:
-                print("No valid objects for {}".format(floor_height))
-                eps = eps - 1
-                continue
-
-            goal_idx = np.random.choice(possible_cats)
-
-            for key, value in coco_categories.items():
-                if value == goal_idx:
-                    goal_name = key
-
-            selem = skimage.morphology.disk(2)
-            traversible = skimage.morphology.binary_dilation(
-                sem_map[0], selem) != True
-            traversible = 1 - traversible
-
-            planner = FMMPlanner(traversible)
-
-            selem = skimage.morphology.disk(
-                int(object_boundary * 100. / map_resolution))
-            goal_map = skimage.morphology.binary_dilation(
-                sem_map[goal_idx + 1], selem) != True
-            goal_map = 1 - goal_map
-
-            planner.set_multi_goal(goal_map)
-
-            m1 = sem_map[0] > 0
-            m2 = planner.fmm_dist > (args.min_d - object_boundary) * 20.0
-            m3 = planner.fmm_dist < (args.max_d - object_boundary) * 20.0
-
-            possible_starting_locs = np.logical_and(m1, m2)
-            possible_starting_locs = np.logical_and(
-                possible_starting_locs, m3) * 1.
-            if possible_starting_locs.sum() != 0:
-                loc_found = True
-            else:
-                print("Invalid object: {} / {} / {}".format(
-                    scene_name, floor_height, goal_name))
-                possible_cats.remove(goal_idx)
-                scene_info[floor_idx]["sem_map"][goal_idx + 1, :, :] = 0.
-                self.dataset_info[scene_name][floor_idx][
-                    "sem_map"][goal_idx + 1, :, :] = 0.
-
-        loc_found = False
-        while not loc_found:
-            pos = self._env.sim.sample_navigable_point()
-            x = -pos[2]
-            y = -pos[0]
-            min_x, min_y = map_obj_origin / 100.0
-            map_loc = int((-y - min_y) * 20.), int((-x - min_x) * 20.)
-            if abs(pos[1] - floor_height) < args.floor_thr / 100.0 and \
-                    possible_starting_locs[map_loc[0], map_loc[1]] == 1:
-                loc_found = True
-
-        agent_state = self._env.sim.get_agent_state(0)
-        rotation = agent_state.rotation
-        rvec = quaternion.as_rotation_vector(rotation)
-        rvec[1] = np.random.rand() * 2 * np.pi
-        rot = quaternion.from_rotation_vector(rvec)
-
-        self.gt_planner = planner
-        self.starting_loc = map_loc
-        self.object_boundary = object_boundary
-        self.goal_idx = goal_idx
-        self.goal_name = goal_name
-        self.map_obj_origin = map_obj_origin
-
-        self.starting_distance = self.gt_planner.fmm_dist[self.starting_loc] \
-            / 20.0 + self.object_boundary
-        self.prev_distance = self.starting_distance
-
-        self._env.sim.set_agent_state(pos, rot)
-
-        # The following two should match approximately
-        # print(starting_loc)
-        # print(self.sim_continuous_to_sim_map(self.get_sim_location()))
-
-        obs = self._env.sim.get_observations_at(pos, rot)
-
-        return obs
-
-    def sim_map_to_sim_continuous(self, coords):
-        """Converts ground-truth 2D Map coordinates to absolute Habitat
-        simulator position and rotation.
-        """
-        agent_state = self._env.sim.get_agent_state(0)
-        y, x = coords
-        min_x, min_y = self.map_obj_origin / 100.0
-
-        cont_x = x / 20. + min_x
-        cont_y = y / 20. + min_y
-        agent_state.position[0] = cont_y
-        agent_state.position[2] = cont_x
-
-        rotation = agent_state.rotation
-        rvec = quaternion.as_rotation_vector(rotation)
-
-        if self.args.train_single_eps:
-            rvec[1] = 0.0
-        else:
-            rvec[1] = np.random.rand() * 2 * np.pi
-        rot = quaternion.from_rotation_vector(rvec)
-
-        return agent_state.position, rot
-
-    def sim_continuous_to_sim_map(self, sim_loc):
-        """Converts absolute Habitat simulator pose to ground-truth 2D Map
-        coordinates.
-        """
-        x, y, o = sim_loc
-        min_x, min_y = self.map_obj_origin / 100.0
-        x, y = int((-x - min_x) * 20.), int((-y - min_y) * 20.)
-
-        o = np.rad2deg(o) + 180.0
-        return y, x, o
-
-    def reset(self):
-        """Resets the environment to a new episode.
-
-        Returns:
-            obs (ndarray): RGBD observations (4 x H x W)
-            info (dict): contains timestep, pose, goal category and
-                         evaluation metric info
-        """
-        args = self.args
-        new_scene = self.episode_no % args.num_train_episodes == 0
-
-        self.episode_no += 1
-
-        # Initializations
-        self.timestep = 0
-        self.stopped = False
-        self.path_length = 1e-5
-        self.trajectory_states = []
-
-        if new_scene:
-            obs = super().reset()
-            self.scene_name = self.habitat_env.sim.config.SCENE
-            print("Changing scene: {}/{}".format(self.rank, self.scene_name))
-
-        self.scene_path = self.habitat_env.sim.config.SCENE
-
-        if self.split == "val":
-            obs = self.load_new_episode()
-        else:
-            obs = self.generate_new_episode()
-
-        rgb = obs['rgb'].astype(np.uint8)
-        depth = obs['depth']
-        state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1)
-        self.last_sim_location = self.get_sim_location()
-
-        # Set info
-        self.info['time'] = self.timestep
-        self.info['sensor_pose'] = [0., 0., 0.]
-        self.info['goal_cat_id'] = self.goal_idx
-        self.info['goal_name'] = self.goal_name
-
-        return state, self.info
-
-    def step(self, action):
-        """Function to take an action in the environment.
-
-        Args:
-            action (dict):
-                dict with following keys:
-                    'action' (int): 0: stop, 1: forward, 2: left, 3: right
-
-        Returns:
-            obs (ndarray): RGBD observations (4 x H x W)
-            reward (float): amount of reward returned after previous action
-            done (bool): whether the episode has ended
-            info (dict): contains timestep, pose, goal category and
-                         evaluation metric info
-        """
-        action = action["action"]
-        if action == 0:
-            self.stopped = True
-            # Not sending stop to simulator, resetting manually
-            action = 3
-
-        obs, rew, done, _ = super().step(action)
-
-        # Get pose change
-        dx, dy, do = self.get_pose_change()
-        self.info['sensor_pose'] = [dx, dy, do]
-        self.path_length += pu.get_l2_distance(0, dx, 0, dy)
-
-        spl, success, dist = 0., 0., 0.
-        if done:
-            spl, success, dist = self.get_metrics()
-            self.info['distance_to_goal'] = dist
-            self.info['spl'] = spl
-            self.info['success'] = success
-
-        rgb = obs['rgb'].astype(np.uint8)
-        depth = obs['depth']
-        state = np.concatenate((rgb, depth), axis=2).transpose(2, 0, 1)
-
-        self.timestep += 1
-        self.info['time'] = self.timestep
-
-        return state, rew, done, self.info
-
-    def get_reward_range(self):
-        """This function is not used, Habitat-RLEnv requires this function"""
-        return (0., 1.0)
-
-    def get_reward(self, observations):
-        curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location())
-        self.curr_distance = self.gt_planner.fmm_dist[curr_loc[0],
-                                                      curr_loc[1]] / 20.0
-
-        reward = (self.prev_distance - self.curr_distance) * \
-            self.args.reward_coeff
-
-        self.prev_distance = self.curr_distance
-        return reward
-
-    def get_metrics(self):
-        """This function computes evaluation metrics for the Object Goal task
-
-        Returns:
-            spl (float): Success weighted by Path Length
-                        (See https://arxiv.org/pdf/1807.06757.pdf)
-            success (int): 0: Failure, 1: Successful
-            dist (float): Distance to Success (DTS),  distance of the agent
-                        from the success threshold boundary in meters.
-                        (See https://arxiv.org/pdf/2007.00643.pdf)
-        """
-        curr_loc = self.sim_continuous_to_sim_map(self.get_sim_location())
-        dist = self.gt_planner.fmm_dist[curr_loc[0], curr_loc[1]] / 20.0
-        if dist == 0.0:
-            success = 1
-        else:
-            success = 0
-        spl = min(success * self.starting_distance / self.path_length, 1)
-        return spl, success, dist
-
-    def get_done(self, observations):
-        if self.info['time'] >= self.args.max_episode_length - 1:
-            done = True
-        elif self.stopped:
-            done = True
-        else:
-            done = False
-        return done
-
-    def get_info(self, observations):
-        """This function is not used, Habitat-RLEnv requires this function"""
-        info = {}
-        return info
-
-    def get_spaces(self):
-        """Returns observation and action spaces for the ObjectGoal task."""
-        return self.observation_space, self.action_space
-
-    def get_sim_location(self):
-        """Returns x, y, o pose of the agent in the Habitat simulator."""
-
-        agent_state = super().habitat_env.sim.get_agent_state(0)
-        x = -agent_state.position[2]
-        y = -agent_state.position[0]
-        axis = quaternion.as_euler_angles(agent_state.rotation)[0]
-        if (axis % (2 * np.pi)) < 0.1 or (axis %
-                                          (2 * np.pi)) > 2 * np.pi - 0.1:
-            o = quaternion.as_euler_angles(agent_state.rotation)[1]
-        else:
-            o = 2 * np.pi - quaternion.as_euler_angles(agent_state.rotation)[1]
-        if o > np.pi:
-            o -= 2 * np.pi
-        return x, y, o
-
-    def get_pose_change(self):
-        """Returns dx, dy, do pose change of the agent relative to the last
-        timestep."""
-        curr_sim_pose = self.get_sim_location()
-        dx, dy, do = pu.get_rel_pose_change(
-            curr_sim_pose, self.last_sim_location)
-        self.last_sim_location = curr_sim_pose
-        return dx, dy, do
diff --git a/envs/habitat/utils/vector_env.py b/envs/habitat/utils/vector_env.py
deleted file mode 100644
index 389300a..0000000
--- a/envs/habitat/utils/vector_env.py
+++ /dev/null
@@ -1,586 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright (c) Facebook, Inc. and its affiliates.
-# This source code is licensed under the MIT license found in the
-# LICENSE file in the root directory of this source tree.
-
-from multiprocessing.connection import Connection
-from multiprocessing.context import BaseContext
-from queue import Queue
-from threading import Thread
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    List,
-    Optional,
-    Sequence,
-    Set,
-    Tuple,
-    Union,
-)
-
-import gym
-import numpy as np
-from gym.spaces.dict_space import Dict as SpaceDict
-
-import habitat
-from habitat.config import Config
-from habitat.core.env import Env, Observations, RLEnv
-from habitat.core.logging import logger
-from habitat.core.utils import tile_images
-
-try:
-    # Use torch.multiprocessing if we can.
-    # We have yet to find a reason to not use it and
-    # you are required to use it when sending a torch.Tensor
-    # between processes
-    import torch.multiprocessing as mp
-except ImportError:
-    import multiprocessing as mp
-
-STEP_COMMAND = "step"
-RESET_COMMAND = "reset"
-RENDER_COMMAND = "render"
-CLOSE_COMMAND = "close"
-OBSERVATION_SPACE_COMMAND = "observation_space"
-ACTION_SPACE_COMMAND = "action_space"
-CALL_COMMAND = "call"
-EPISODE_COMMAND = "current_episode"
-PLAN_ACT_AND_PREPROCESS = "plan_act_and_preprocess"
-COUNT_EPISODES_COMMAND = "count_episodes"
-EPISODE_OVER = "episode_over"
-GET_METRICS = "get_metrics"
-
-
-def _make_env_fn(
-    config: Config, dataset: Optional[habitat.Dataset] = None, rank: int = 0
-) -> Env:
-    """Constructor for default habitat `env.Env`.
-
-    :param config: configuration for environment.
-    :param dataset: dataset for environment.
-    :param rank: rank for setting seed of environment
-    :return: `env.Env` / `env.RLEnv` object
-    """
-    habitat_env = Env(config=config, dataset=dataset)
-    habitat_env.seed(config.SEED + rank)
-    return habitat_env
-
-
-class VectorEnv:
-    r"""Vectorized environment which creates multiple processes where each
-    process runs its own environment. Main class for parallelization of
-    training and evaluation.
-
-
-    All the environments are synchronized on step and reset methods.
-    """
-
-    observation_spaces: List[SpaceDict]
-    action_spaces: List[SpaceDict]
-    _workers: List[Union[mp.Process, Thread]]
-    _is_waiting: bool
-    _num_envs: int
-    _auto_reset_done: bool
-    _mp_ctx: BaseContext
-    _connection_read_fns: List[Callable[[], Any]]
-    _connection_write_fns: List[Callable[[Any], None]]
-
-    def __init__(
-        self,
-        make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn,
-        env_fn_args: Sequence[Tuple] = None,
-        auto_reset_done: bool = True,
-        multiprocessing_start_method: str = "forkserver",
-    ) -> None:
-        """..
-
-        :param make_env_fn: function which creates a single environment. An
-            environment can be of type `env.Env` or `env.RLEnv`
-        :param env_fn_args: tuple of tuple of args to pass to the
-            `_make_env_fn`.
-        :param auto_reset_done: automatically reset the environment when
-            done. This functionality is provided for seamless training
-            of vectorized environments.
-        :param multiprocessing_start_method: the multiprocessing method used to
-            spawn worker processes. Valid methods are
-            :py:`{'spawn', 'forkserver', 'fork'}`; :py:`'forkserver'` is the
-            recommended method as it works well with CUDA. If :py:`'fork'` is
-            used, the subproccess  must be started before any other GPU useage.
-        """
-        self._is_waiting = False
-        self._is_closed = True
-
-        assert (
-            env_fn_args is not None and len(env_fn_args) > 0
-        ), "number of environments to be created should be greater than 0"
-
-        self._num_envs = len(env_fn_args)
-
-        assert multiprocessing_start_method in self._valid_start_methods, (
-            "multiprocessing_start_method must be one of {}. Got '{}'"
-        ).format(self._valid_start_methods, multiprocessing_start_method)
-        self._auto_reset_done = auto_reset_done
-        self._mp_ctx = mp.get_context(multiprocessing_start_method)
-        self._workers = []
-        (
-            self._connection_read_fns,
-            self._connection_write_fns,
-        ) = self._spawn_workers(  # noqa
-            env_fn_args, make_env_fn
-        )
-
-        self._is_closed = False
-
-        for write_fn in self._connection_write_fns:
-            write_fn((OBSERVATION_SPACE_COMMAND, None))
-        self.observation_spaces = [
-            read_fn() for read_fn in self._connection_read_fns
-        ]
-        for write_fn in self._connection_write_fns:
-            write_fn((ACTION_SPACE_COMMAND, None))
-        self.action_spaces = [
-            read_fn() for read_fn in self._connection_read_fns
-        ]
-        self.observation_space = self.observation_spaces[0]
-        self.action_space = self.action_spaces[0]
-        self._paused = []
-
-    @property
-    def num_envs(self):
-        r"""number of individual environments.
-        """
-        return self._num_envs - len(self._paused)
-
-    @staticmethod
-    def _worker_env(
-        connection_read_fn: Callable,
-        connection_write_fn: Callable,
-        env_fn: Callable,
-        env_fn_args: Tuple[Any],
-        auto_reset_done: bool,
-        child_pipe: Optional[Connection] = None,
-        parent_pipe: Optional[Connection] = None,
-    ) -> None:
-        r"""process worker for creating and interacting with the environment.
-        """
-        env = env_fn(*env_fn_args)
-        if parent_pipe is not None:
-            parent_pipe.close()
-        try:
-            command, data = connection_read_fn()
-            while command != CLOSE_COMMAND:
-                if command == STEP_COMMAND:
-                    # different step methods for habitat.RLEnv and habitat.Env
-                    if isinstance(env, habitat.RLEnv) or isinstance(
-                        env, gym.Env
-                    ):
-                        # habitat.RLEnv
-                        observations, reward, done, info = env.step(**data)
-                        if auto_reset_done and done:
-                            observations, info = env.reset()
-                        connection_write_fn((observations, reward, done, info))
-                    elif isinstance(env, habitat.Env):
-                        # habitat.Env
-                        observations = env.step(**data)
-                        if auto_reset_done and env.episode_over:
-                            observations = env.reset()
-                        connection_write_fn(observations)
-                    else:
-                        raise NotImplementedError
-
-                elif command == RESET_COMMAND:
-                    observations = env.reset()
-                    connection_write_fn(observations)
-
-                elif command == RENDER_COMMAND:
-                    connection_write_fn(env.render(*data[0], **data[1]))
-
-                elif (
-                    command == OBSERVATION_SPACE_COMMAND
-                    or command == ACTION_SPACE_COMMAND
-                ):
-                    if isinstance(command, str):
-                        connection_write_fn(getattr(env, command))
-
-                elif command == CALL_COMMAND:
-                    function_name, function_args = data
-                    if function_args is None or len(function_args) == 0:
-                        result = getattr(env, function_name)()
-                    else:
-                        result = getattr(env, function_name)(**function_args)
-                    connection_write_fn(result)
-
-                # TODO: update CALL_COMMAND for getting attribute like this
-                elif command == EPISODE_COMMAND:
-                    connection_write_fn(env.current_episode)
-
-                elif command == PLAN_ACT_AND_PREPROCESS:
-                    observations, reward, done, info = \
-                            env.plan_act_and_preprocess(data)
-                    if auto_reset_done and done:
-                        observations, info = env.reset()
-                    connection_write_fn((observations, reward, done, info))
-
-                elif command == COUNT_EPISODES_COMMAND:
-                    connection_write_fn(len(env.episodes))
-
-                elif command == EPISODE_OVER:
-                    connection_write_fn(env.episode_over)
-
-                elif command == GET_METRICS:
-                    result = env.get_metrics()
-                    connection_write_fn(result)
-
-                else:
-                    raise NotImplementedError
-
-                command, data = connection_read_fn()
-
-            if child_pipe is not None:
-                child_pipe.close()
-        except KeyboardInterrupt:
-            logger.info("Worker KeyboardInterrupt")
-        finally:
-            env.close()
-
-    def _spawn_workers(
-        self,
-        env_fn_args: Sequence[Tuple],
-        make_env_fn: Callable[..., Union[Env, RLEnv]] = _make_env_fn,
-    ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
-        parent_connections, worker_connections = zip(
-            *[self._mp_ctx.Pipe(duplex=True) for _ in range(self._num_envs)]
-        )
-        self._workers = []
-        for worker_conn, parent_conn, env_args in zip(
-            worker_connections, parent_connections, env_fn_args
-        ):
-            ps = self._mp_ctx.Process(
-                target=self._worker_env,
-                args=(
-                    worker_conn.recv,
-                    worker_conn.send,
-                    make_env_fn,
-                    env_args,
-                    self._auto_reset_done,
-                    worker_conn,
-                    parent_conn,
-                ),
-            )
-            self._workers.append(ps)
-            ps.daemon = True
-            ps.start()
-            worker_conn.close()
-        return (
-            [p.recv for p in parent_connections],
-            [p.send for p in parent_connections],
-        )
-
-    def current_episodes(self):
-        self._is_waiting = True
-        for write_fn in self._connection_write_fns:
-            write_fn((EPISODE_COMMAND, None))
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        self._is_waiting = False
-        return results
-
-    def count_episodes(self):
-        self._is_waiting = True
-        for write_fn in self._connection_write_fns:
-            write_fn((COUNT_EPISODES_COMMAND, None))
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        self._is_waiting = False
-        return results
-
-    def episode_over(self):
-        self._is_waiting = True
-        for write_fn in self._connection_write_fns:
-            write_fn((EPISODE_OVER, None))
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        self._is_waiting = False
-        return results
-
-    def get_metrics(self):
-        self._is_waiting = True
-        for write_fn in self._connection_write_fns:
-            write_fn((GET_METRICS, None))
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        self._is_waiting = False
-        return results
-
-    def reset(self):
-        r"""Reset all the vectorized environments
-
-        :return: list of outputs from the reset method of envs.
-        """
-        self._is_waiting = True
-        for write_fn in self._connection_write_fns:
-            write_fn((RESET_COMMAND, None))
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        obs, infos = zip(*results)
-
-        self._is_waiting = False
-        return np.stack(obs), infos
-
-    def reset_at(self, index_env: int):
-        r"""Reset in the index_env environment in the vector.
-
-        :param index_env: index of the environment to be reset
-        :return: list containing the output of reset method of indexed env.
-        """
-        self._is_waiting = True
-        self._connection_write_fns[index_env]((RESET_COMMAND, None))
-        results = [self._connection_read_fns[index_env]()]
-        self._is_waiting = False
-        return results
-
-    def step_at(self, index_env: int, action: Dict[str, Any]):
-        r"""Step in the index_env environment in the vector.
-
-        :param index_env: index of the environment to be stepped into
-        :param action: action to be taken
-        :return: list containing the output of step method of indexed env.
-        """
-        self._is_waiting = True
-        self._connection_write_fns[index_env]((STEP_COMMAND, action))
-        results = [self._connection_read_fns[index_env]()]
-        self._is_waiting = False
-        return results
-
-    def step_async(self, data: List[Union[int, str, Dict[str, Any]]]) -> None:
-        r"""Asynchronously step in the environments.
-
-        :param data: list of size _num_envs containing keyword arguments to
-            pass to `step` method for each Environment. For example,
-            :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`.
-        """
-        # Backward compatibility
-        if isinstance(data[0], (int, np.integer, str)):
-            data = [{"action": {"action": action}} for action in data]
-
-        self._is_waiting = True
-        for write_fn, args in zip(self._connection_write_fns, data):
-            write_fn((STEP_COMMAND, args))
-
-    def step_wait(self) -> List[Observations]:
-        r"""Wait until all the asynchronized environments have synchronized.
-        """
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        self._is_waiting = False
-        obs, rews, dones, infos = zip(*results)
-        return np.stack(obs), np.stack(rews), np.stack(dones), infos
-
-    def step(self, data: List[Union[int, str, Dict[str, Any]]]) -> List[Any]:
-        r"""Perform actions in the vectorized environments.
-
-        :param data: list of size _num_envs containing keyword arguments to
-            pass to `step` method for each Environment. For example,
-            :py:`[{"action": "TURN_LEFT", "action_args": {...}}, ...]`.
-        :return: list of outputs from the step method of envs.
-        """
-        self.step_async(data)
-        return self.step_wait()
-
-    def close(self) -> None:
-        if self._is_closed:
-            return
-
-        if self._is_waiting:
-            for read_fn in self._connection_read_fns:
-                read_fn()
-
-        for write_fn in self._connection_write_fns:
-            write_fn((CLOSE_COMMAND, None))
-
-        for _, _, write_fn, _ in self._paused:
-            write_fn((CLOSE_COMMAND, None))
-
-        for process in self._workers:
-            process.join()
-
-        for _, _, _, process in self._paused:
-            process.join()
-
-        self._is_closed = True
-
-    def pause_at(self, index: int) -> None:
-        r"""Pauses computation on this env without destroying the env.
-
-        :param index: which env to pause. All indexes after this one will be
-            shifted down by one.
-
-        This is useful for not needing to call steps on all environments when
-        only some are active (for example during the last episodes of running
-        eval episodes).
-        """
-        if self._is_waiting:
-            for read_fn in self._connection_read_fns:
-                read_fn()
-        read_fn = self._connection_read_fns.pop(index)
-        write_fn = self._connection_write_fns.pop(index)
-        worker = self._workers.pop(index)
-        self._paused.append((index, read_fn, write_fn, worker))
-
-    def resume_all(self) -> None:
-        r"""Resumes any paused envs.
-        """
-        for index, read_fn, write_fn, worker in reversed(self._paused):
-            self._connection_read_fns.insert(index, read_fn)
-            self._connection_write_fns.insert(index, write_fn)
-            self._workers.insert(index, worker)
-        self._paused = []
-
-    def call_at(
-        self,
-        index: int,
-        function_name: str,
-        function_args: Optional[Dict[str, Any]] = None,
-    ) -> Any:
-        r"""Calls a function (which is passed by name) on the selected env and
-        returns the result.
-
-        :param index: which env to call the function on.
-        :param function_name: the name of the function to call on the env.
-        :param function_args: optional function args.
-        :return: result of calling the function.
-        """
-        self._is_waiting = True
-        self._connection_write_fns[index](
-            (CALL_COMMAND, (function_name, function_args))
-        )
-        result = self._connection_read_fns[index]()
-        self._is_waiting = False
-        return result
-
-    def call(
-        self,
-        function_names: List[str],
-        function_args_list: Optional[List[Any]] = None,
-    ) -> List[Any]:
-        r"""Calls a list of functions (which are passed by name) on the
-        corresponding env (by index).
-
-        :param function_names: the name of the functions to call on the envs.
-        :param function_args_list: list of function args for each function. If
-            provided, :py:`len(function_args_list)` should be as long as
-            :py:`len(function_names)`.
-        :return: result of calling the function.
-        """
-        self._is_waiting = True
-        if function_args_list is None:
-            function_args_list = [None] * len(function_names)
-        assert len(function_names) == len(function_args_list)
-        func_args = zip(function_names, function_args_list)
-        for write_fn, func_args_on in zip(
-            self._connection_write_fns, func_args
-        ):
-            write_fn((CALL_COMMAND, func_args_on))
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        self._is_waiting = False
-        return results
-
-    def render(
-        self, mode: str = "human", *args, **kwargs
-    ) -> Union[np.ndarray, None]:
-        r"""Render observations from all environments in a tiled image.
-        """
-        for write_fn in self._connection_write_fns:
-            write_fn((RENDER_COMMAND, (args, {"mode": "rgb", **kwargs})))
-        images = [read_fn() for read_fn in self._connection_read_fns]
-        tile = tile_images(images)
-        if mode == "human":
-            from habitat.core.utils import try_cv2_import
-
-            cv2 = try_cv2_import()
-
-            cv2.imshow("vecenv", tile[:, :, ::-1])
-            cv2.waitKey(1)
-            return None
-        elif mode == "rgb_array":
-            return tile
-        else:
-            raise NotImplementedError
-
-    def plan_act_and_preprocess(self, inputs):
-        self._assert_not_closed()
-        self._is_waiting = True
-        for e, write_fn in enumerate(self._connection_write_fns):
-            write_fn((PLAN_ACT_AND_PREPROCESS, inputs[e]))
-        results = []
-        for read_fn in self._connection_read_fns:
-            results.append(read_fn())
-        obs, rews, dones, infos = zip(*results)
-        self._is_waiting = False
-        return np.stack(obs), np.stack(rews), np.stack(dones), infos
-
-    def _assert_not_closed(self):
-        assert not self._is_closed, "Trying to operate on a SubprocVecEnv after calling close()"
-
-    @property
-    def _valid_start_methods(self) -> Set[str]:
-        return {"forkserver", "spawn", "fork"}
-
-    def __del__(self):
-        self.close()
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.close()
-
-
-class ThreadedVectorEnv(VectorEnv):
-    r"""Provides same functionality as `VectorEnv`, the only difference is it
-    runs in a multi-thread setup inside a single process.
-
-    `VectorEnv` runs in a multi-proc setup. This makes it much easier to debug
-    when using `VectorEnv` because you can actually put break points in the
-    environment methods. It should not be used for best performance.
-    """
-
-    def _spawn_workers(
-        self,
-        env_fn_args: Sequence[Tuple],
-        make_env_fn: Callable[..., Env] = _make_env_fn,
-    ) -> Tuple[List[Callable[[], Any]], List[Callable[[Any], None]]]:
-        parent_read_queues, parent_write_queues = zip(
-            *[(Queue(), Queue()) for _ in range(self._num_envs)]
-        )
-        self._workers = []
-        for parent_read_queue, parent_write_queue, env_args in zip(
-            parent_read_queues, parent_write_queues, env_fn_args
-        ):
-            thread = Thread(
-                target=self._worker_env,
-                args=(
-                    parent_write_queue.get,
-                    parent_read_queue.put,
-                    make_env_fn,
-                    env_args,
-                    self._auto_reset_done,
-                ),
-            )
-            self._workers.append(thread)
-            thread.daemon = True
-            thread.start()
-        return (
-            [q.get for q in parent_read_queues],
-            [q.put for q in parent_write_queues],
-        )
diff --git a/main.py b/main.py
deleted file mode 100755
index 437c8ad..0000000
--- a/main.py
+++ /dev/null
@@ -1,695 +0,0 @@
-from collections import deque, defaultdict
-import os
-import logging
-import time
-import json
-import gym
-import torch.nn as nn
-import torch
-import numpy as np
-
-from model import RL_Policy, Semantic_Mapping
-from utils.storage import GlobalRolloutStorage
-from envs import make_vec_envs
-from arguments import get_args
-import algo
-
-os.environ["OMP_NUM_THREADS"] = "1"
-
-
-def main():
-    args = get_args()
-
-    np.random.seed(args.seed)
-    torch.manual_seed(args.seed)
-
-    if args.cuda:
-        torch.cuda.manual_seed(args.seed)
-
-    # Setup Logging
-    log_dir = "{}/models/{}/".format(args.dump_location, args.exp_name)
-    dump_dir = "{}/dump/{}/".format(args.dump_location, args.exp_name)
-
-    if not os.path.exists(log_dir):
-        os.makedirs(log_dir)
-    if not os.path.exists(dump_dir):
-        os.makedirs(dump_dir)
-
-    logging.basicConfig(
-        filename=log_dir + 'train.log',
-        level=logging.INFO)
-    print("Dumping at {}".format(log_dir))
-    print(args)
-    logging.info(args)
-
-    # Logging and loss variables
-    num_scenes = args.num_processes
-    num_episodes = int(args.num_eval_episodes)
-    device = args.device = torch.device("cuda:0" if args.cuda else "cpu")
-
-    g_masks = torch.ones(num_scenes).float().to(device)
-
-    best_g_reward = -np.inf
-
-    if args.eval:
-        episode_success = []
-        episode_spl = []
-        episode_dist = []
-        for _ in range(args.num_processes):
-            episode_success.append(deque(maxlen=num_episodes))
-            episode_spl.append(deque(maxlen=num_episodes))
-            episode_dist.append(deque(maxlen=num_episodes))
-
-    else:
-        episode_success = deque(maxlen=1000)
-        episode_spl = deque(maxlen=1000)
-        episode_dist = deque(maxlen=1000)
-
-    finished = np.zeros((args.num_processes))
-    wait_env = np.zeros((args.num_processes))
-
-    g_episode_rewards = deque(maxlen=1000)
-
-    g_value_losses = deque(maxlen=1000)
-    g_action_losses = deque(maxlen=1000)
-    g_dist_entropies = deque(maxlen=1000)
-
-    per_step_g_rewards = deque(maxlen=1000)
-
-    g_process_rewards = np.zeros((num_scenes))
-
-    # Starting environments
-    torch.set_num_threads(1)
-    envs = make_vec_envs(args)
-    obs, infos = envs.reset()
-
-    torch.set_grad_enabled(False)
-
-    # Initialize map variables:
-    # Full map consists of multiple channels containing the following:
-    # 1. Obstacle Map
-    # 2. Exploread Area
-    # 3. Current Agent Location
-    # 4. Past Agent Locations
-    # 5,6,7,.. : Semantic Categories
-    nc = args.num_sem_categories + 4  # num channels
-
-    # Calculating full and local map sizes
-    map_size = args.map_size_cm // args.map_resolution
-    full_w, full_h = map_size, map_size
-    local_w = int(full_w / args.global_downscaling)
-    local_h = int(full_h / args.global_downscaling)
-
-    # Initializing full and local map
-    full_map = torch.zeros(num_scenes, nc, full_w, full_h).float().to(device)
-    local_map = torch.zeros(num_scenes, nc, local_w,
-                            local_h).float().to(device)
-
-    # Initial full and local pose
-    full_pose = torch.zeros(num_scenes, 3).float().to(device)
-    local_pose = torch.zeros(num_scenes, 3).float().to(device)
-
-    # Origin of local map
-    origins = np.zeros((num_scenes, 3))
-
-    # Local Map Boundaries
-    lmb = np.zeros((num_scenes, 4)).astype(int)
-
-    # Planner pose inputs has 7 dimensions
-    # 1-3 store continuous global agent location
-    # 4-7 store local map boundaries
-    planner_pose_inputs = np.zeros((num_scenes, 7))
-
-    def get_local_map_boundaries(agent_loc, local_sizes, full_sizes):
-        loc_r, loc_c = agent_loc
-        local_w, local_h = local_sizes
-        full_w, full_h = full_sizes
-
-        if args.global_downscaling > 1:
-            gx1, gy1 = loc_r - local_w // 2, loc_c - local_h // 2
-            gx2, gy2 = gx1 + local_w, gy1 + local_h
-            if gx1 < 0:
-                gx1, gx2 = 0, local_w
-            if gx2 > full_w:
-                gx1, gx2 = full_w - local_w, full_w
-
-            if gy1 < 0:
-                gy1, gy2 = 0, local_h
-            if gy2 > full_h:
-                gy1, gy2 = full_h - local_h, full_h
-        else:
-            gx1, gx2, gy1, gy2 = 0, full_w, 0, full_h
-
-        return [gx1, gx2, gy1, gy2]
-
-    def init_map_and_pose():
-        full_map.fill_(0.)
-        full_pose.fill_(0.)
-        full_pose[:, :2] = args.map_size_cm / 100.0 / 2.0
-
-        locs = full_pose.cpu().numpy()
-        planner_pose_inputs[:, :3] = locs
-        for e in range(num_scenes):
-            r, c = locs[e, 1], locs[e, 0]
-            loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
-                            int(c * 100.0 / args.map_resolution)]
-
-            full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0
-
-            lmb[e] = get_local_map_boundaries((loc_r, loc_c),
-                                              (local_w, local_h),
-                                              (full_w, full_h))
-
-            planner_pose_inputs[e, 3:] = lmb[e]
-            origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
-                          lmb[e][0] * args.map_resolution / 100.0, 0.]
-
-        for e in range(num_scenes):
-            local_map[e] = full_map[e, :,
-                                    lmb[e, 0]:lmb[e, 1],
-                                    lmb[e, 2]:lmb[e, 3]]
-            local_pose[e] = full_pose[e] - \
-                torch.from_numpy(origins[e]).to(device).float()
-
-    def init_map_and_pose_for_env(e):
-        full_map[e].fill_(0.)
-        full_pose[e].fill_(0.)
-        full_pose[e, :2] = args.map_size_cm / 100.0 / 2.0
-
-        locs = full_pose[e].cpu().numpy()
-        planner_pose_inputs[e, :3] = locs
-        r, c = locs[1], locs[0]
-        loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
-                        int(c * 100.0 / args.map_resolution)]
-
-        full_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.0
-
-        lmb[e] = get_local_map_boundaries((loc_r, loc_c),
-                                          (local_w, local_h),
-                                          (full_w, full_h))
-
-        planner_pose_inputs[e, 3:] = lmb[e]
-        origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
-                      lmb[e][0] * args.map_resolution / 100.0, 0.]
-
-        local_map[e] = full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]]
-        local_pose[e] = full_pose[e] - \
-            torch.from_numpy(origins[e]).to(device).float()
-
-    def update_intrinsic_rew(e):
-        prev_explored_area = full_map[e, 1].sum(1).sum(0)
-        full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \
-            local_map[e]
-        curr_explored_area = full_map[e, 1].sum(1).sum(0)
-        intrinsic_rews[e] = curr_explored_area - prev_explored_area
-        intrinsic_rews[e] *= (args.map_resolution / 100.)**2  # to m^2
-
-    init_map_and_pose()
-
-    # Global policy observation space
-    ngc = 8 + args.num_sem_categories
-    es = 2
-    g_observation_space = gym.spaces.Box(0, 1,
-                                         (ngc,
-                                          local_w,
-                                          local_h), dtype='uint8')
-
-    # Global policy action space
-    g_action_space = gym.spaces.Box(low=0.0, high=0.99,
-                                    shape=(2,), dtype=np.float32)
-
-    # Global policy recurrent layer size
-    g_hidden_size = args.global_hidden_size
-
-    # Semantic Mapping
-    sem_map_module = Semantic_Mapping(args).to(device)
-    sem_map_module.eval()
-
-    # Global policy
-    g_policy = RL_Policy(g_observation_space.shape, g_action_space,
-                         model_type=1,
-                         base_kwargs={'recurrent': args.use_recurrent_global,
-                                      'hidden_size': g_hidden_size,
-                                      'num_sem_categories': ngc - 8
-                                      }).to(device)
-    g_agent = algo.PPO(g_policy, args.clip_param, args.ppo_epoch,
-                       args.num_mini_batch, args.value_loss_coef,
-                       args.entropy_coef, lr=args.lr, eps=args.eps,
-                       max_grad_norm=args.max_grad_norm)
-
-    global_input = torch.zeros(num_scenes, ngc, local_w, local_h)
-    global_orientation = torch.zeros(num_scenes, 1).long()
-    intrinsic_rews = torch.zeros(num_scenes).to(device)
-    extras = torch.zeros(num_scenes, 2)
-
-    # Storage
-    g_rollouts = GlobalRolloutStorage(args.num_global_steps,
-                                      num_scenes, g_observation_space.shape,
-                                      g_action_space, g_policy.rec_state_size,
-                                      es).to(device)
-
-    if args.load != "0":
-        print("Loading model {}".format(args.load))
-        state_dict = torch.load(args.load,
-                                map_location=lambda storage, loc: storage)
-        g_policy.load_state_dict(state_dict)
-
-    if args.eval:
-        g_policy.eval()
-
-    # Predict semantic map from frame 1
-    poses = torch.from_numpy(np.asarray(
-        [infos[env_idx]['sensor_pose'] for env_idx in range(num_scenes)])
-    ).float().to(device)
-
-    _, local_map, _, local_pose = \
-        sem_map_module(obs, poses, local_map, local_pose)
-
-    # Compute Global policy input
-    locs = local_pose.cpu().numpy()
-    global_input = torch.zeros(num_scenes, ngc, local_w, local_h)
-    global_orientation = torch.zeros(num_scenes, 1).long()
-
-    for e in range(num_scenes):
-        r, c = locs[e, 1], locs[e, 0]
-        loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
-                        int(c * 100.0 / args.map_resolution)]
-
-        local_map[e, 2:4, loc_r - 1:loc_r + 2, loc_c - 1:loc_c + 2] = 1.
-        global_orientation[e] = int((locs[e, 2] + 180.0) / 5.)
-
-    global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :].detach()
-    global_input[:, 4:8, :, :] = nn.MaxPool2d(args.global_downscaling)(
-        full_map[:, 0:4, :, :])
-    global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach()
-    goal_cat_id = torch.from_numpy(np.asarray(
-        [infos[env_idx]['goal_cat_id'] for env_idx
-         in range(num_scenes)]))
-
-    extras = torch.zeros(num_scenes, 2)
-    extras[:, 0] = global_orientation[:, 0]
-    extras[:, 1] = goal_cat_id
-
-    g_rollouts.obs[0].copy_(global_input)
-    g_rollouts.extras[0].copy_(extras)
-
-    # Run Global Policy (global_goals = Long-Term Goal)
-    g_value, g_action, g_action_log_prob, g_rec_states = \
-        g_policy.act(
-            g_rollouts.obs[0],
-            g_rollouts.rec_states[0],
-            g_rollouts.masks[0],
-            extras=g_rollouts.extras[0],
-            deterministic=False
-        )
-
-    cpu_actions = nn.Sigmoid()(g_action).cpu().numpy()
-    global_goals = [[int(action[0] * local_w), int(action[1] * local_h)]
-                    for action in cpu_actions]
-    global_goals = [[min(x, int(local_w - 1)), min(y, int(local_h - 1))]
-                    for x, y in global_goals]
-
-    goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)]
-
-    for e in range(num_scenes):
-        goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1
-
-    planner_inputs = [{} for e in range(num_scenes)]
-    for e, p_input in enumerate(planner_inputs):
-        p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy()
-        p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy()
-        p_input['pose_pred'] = planner_pose_inputs[e]
-        p_input['goal'] = goal_maps[e]  # global_goals[e]
-        p_input['new_goal'] = 1
-        p_input['found_goal'] = 0
-        p_input['wait'] = wait_env[e] or finished[e]
-        if args.visualize or args.print_images:
-            local_map[e, -1, :, :] = 1e-5
-            p_input['sem_map_pred'] = local_map[e, 4:, :, :
-                                                ].argmax(0).cpu().numpy()
-
-    obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs)
-
-    start = time.time()
-    g_reward = 0
-
-    torch.set_grad_enabled(False)
-    spl_per_category = defaultdict(list)
-    success_per_category = defaultdict(list)
-
-    for step in range(args.num_training_frames // args.num_processes + 1):
-        if finished.sum() == args.num_processes:
-            break
-
-        g_step = (step // args.num_local_steps) % args.num_global_steps
-        l_step = step % args.num_local_steps
-
-        # ------------------------------------------------------------------
-        # Reinitialize variables when episode ends
-        l_masks = torch.FloatTensor([0 if x else 1
-                                     for x in done]).to(device)
-        g_masks *= l_masks
-
-        for e, x in enumerate(done):
-            if x:
-                spl = infos[e]['spl']
-                success = infos[e]['success']
-                dist = infos[e]['distance_to_goal']
-                spl_per_category[infos[e]['goal_name']].append(spl)
-                success_per_category[infos[e]['goal_name']].append(success)
-                if args.eval:
-                    episode_success[e].append(success)
-                    episode_spl[e].append(spl)
-                    episode_dist[e].append(dist)
-                    if len(episode_success[e]) == num_episodes:
-                        finished[e] = 1
-                else:
-                    episode_success.append(success)
-                    episode_spl.append(spl)
-                    episode_dist.append(dist)
-                wait_env[e] = 1.
-                update_intrinsic_rew(e)
-                init_map_and_pose_for_env(e)
-        # ------------------------------------------------------------------
-
-        # ------------------------------------------------------------------
-        # Semantic Mapping Module
-        poses = torch.from_numpy(np.asarray(
-            [infos[env_idx]['sensor_pose'] for env_idx
-             in range(num_scenes)])
-        ).float().to(device)
-
-        _, local_map, _, local_pose = \
-            sem_map_module(obs, poses, local_map, local_pose)
-
-        locs = local_pose.cpu().numpy()
-        planner_pose_inputs[:, :3] = locs + origins
-        local_map[:, 2, :, :].fill_(0.)  # Resetting current location channel
-        for e in range(num_scenes):
-            r, c = locs[e, 1], locs[e, 0]
-            loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
-                            int(c * 100.0 / args.map_resolution)]
-            local_map[e, 2:4, loc_r - 2:loc_r + 3, loc_c - 2:loc_c + 3] = 1.
-
-        # ------------------------------------------------------------------
-
-        # ------------------------------------------------------------------
-        # Global Policy
-        if l_step == args.num_local_steps - 1:
-            # For every global step, update the full and local maps
-            for e in range(num_scenes):
-                if wait_env[e] == 1:  # New episode
-                    wait_env[e] = 0.
-                else:
-                    update_intrinsic_rew(e)
-
-                full_map[e, :, lmb[e, 0]:lmb[e, 1], lmb[e, 2]:lmb[e, 3]] = \
-                    local_map[e]
-                full_pose[e] = local_pose[e] + \
-                    torch.from_numpy(origins[e]).to(device).float()
-
-                locs = full_pose[e].cpu().numpy()
-                r, c = locs[1], locs[0]
-                loc_r, loc_c = [int(r * 100.0 / args.map_resolution),
-                                int(c * 100.0 / args.map_resolution)]
-
-                lmb[e] = get_local_map_boundaries((loc_r, loc_c),
-                                                  (local_w, local_h),
-                                                  (full_w, full_h))
-
-                planner_pose_inputs[e, 3:] = lmb[e]
-                origins[e] = [lmb[e][2] * args.map_resolution / 100.0,
-                              lmb[e][0] * args.map_resolution / 100.0, 0.]
-
-                local_map[e] = full_map[e, :,
-                                        lmb[e, 0]:lmb[e, 1],
-                                        lmb[e, 2]:lmb[e, 3]]
-                local_pose[e] = full_pose[e] - \
-                    torch.from_numpy(origins[e]).to(device).float()
-
-            locs = local_pose.cpu().numpy()
-            for e in range(num_scenes):
-                global_orientation[e] = int((locs[e, 2] + 180.0) / 5.)
-            global_input[:, 0:4, :, :] = local_map[:, 0:4, :, :]
-            global_input[:, 4:8, :, :] = \
-                nn.MaxPool2d(args.global_downscaling)(
-                    full_map[:, 0:4, :, :])
-            global_input[:, 8:, :, :] = local_map[:, 4:, :, :].detach()
-            goal_cat_id = torch.from_numpy(np.asarray(
-                [infos[env_idx]['goal_cat_id'] for env_idx
-                 in range(num_scenes)]))
-            extras[:, 0] = global_orientation[:, 0]
-            extras[:, 1] = goal_cat_id
-
-            # Get exploration reward and metrics
-            g_reward = torch.from_numpy(np.asarray(
-                [infos[env_idx]['g_reward'] for env_idx in range(num_scenes)])
-            ).float().to(device)
-            g_reward += args.intrinsic_rew_coeff * intrinsic_rews.detach()
-
-            g_process_rewards += g_reward.cpu().numpy()
-            g_total_rewards = g_process_rewards * \
-                (1 - g_masks.cpu().numpy())
-            g_process_rewards *= g_masks.cpu().numpy()
-            per_step_g_rewards.append(np.mean(g_reward.cpu().numpy()))
-
-            if np.sum(g_total_rewards) != 0:
-                for total_rew in g_total_rewards:
-                    if total_rew != 0:
-                        g_episode_rewards.append(total_rew)
-
-            # Add samples to global policy storage
-            if step == 0:
-                g_rollouts.obs[0].copy_(global_input)
-                g_rollouts.extras[0].copy_(extras)
-            else:
-                g_rollouts.insert(
-                    global_input, g_rec_states,
-                    g_action, g_action_log_prob, g_value,
-                    g_reward, g_masks, extras
-                )
-
-            # Sample long-term goal from global policy
-            g_value, g_action, g_action_log_prob, g_rec_states = \
-                g_policy.act(
-                    g_rollouts.obs[g_step + 1],
-                    g_rollouts.rec_states[g_step + 1],
-                    g_rollouts.masks[g_step + 1],
-                    extras=g_rollouts.extras[g_step + 1],
-                    deterministic=False
-                )
-            cpu_actions = nn.Sigmoid()(g_action).cpu().numpy()
-            global_goals = [[int(action[0] * local_w),
-                             int(action[1] * local_h)]
-                            for action in cpu_actions]
-            global_goals = [[min(x, int(local_w - 1)),
-                             min(y, int(local_h - 1))]
-                            for x, y in global_goals]
-
-            g_reward = 0
-            g_masks = torch.ones(num_scenes).float().to(device)
-
-        # ------------------------------------------------------------------
-
-        # ------------------------------------------------------------------
-        # Update long-term goal if target object is found
-        found_goal = [0 for _ in range(num_scenes)]
-        goal_maps = [np.zeros((local_w, local_h)) for _ in range(num_scenes)]
-
-        for e in range(num_scenes):
-            goal_maps[e][global_goals[e][0], global_goals[e][1]] = 1
-
-        for e in range(num_scenes):
-            cn = infos[e]['goal_cat_id'] + 4
-            if local_map[e, cn, :, :].sum() != 0.:
-                cat_semantic_map = local_map[e, cn, :, :].cpu().numpy()
-                cat_semantic_scores = cat_semantic_map
-                cat_semantic_scores[cat_semantic_scores > 0] = 1.
-                goal_maps[e] = cat_semantic_scores
-                found_goal[e] = 1
-        # ------------------------------------------------------------------
-
-        # ------------------------------------------------------------------
-        # Take action and get next observation
-        planner_inputs = [{} for e in range(num_scenes)]
-        for e, p_input in enumerate(planner_inputs):
-            p_input['map_pred'] = local_map[e, 0, :, :].cpu().numpy()
-            p_input['exp_pred'] = local_map[e, 1, :, :].cpu().numpy()
-            p_input['pose_pred'] = planner_pose_inputs[e]
-            p_input['goal'] = goal_maps[e]  # global_goals[e]
-            p_input['new_goal'] = l_step == args.num_local_steps - 1
-            p_input['found_goal'] = found_goal[e]
-            p_input['wait'] = wait_env[e] or finished[e]
-            if args.visualize or args.print_images:
-                local_map[e, -1, :, :] = 1e-5
-                p_input['sem_map_pred'] = local_map[e, 4:, :,
-                                                    :].argmax(0).cpu().numpy()
-
-        obs, _, done, infos = envs.plan_act_and_preprocess(planner_inputs)
-        # ------------------------------------------------------------------
-
-        # ------------------------------------------------------------------
-        # Training
-        torch.set_grad_enabled(True)
-        if g_step % args.num_global_steps == args.num_global_steps - 1 \
-                and l_step == args.num_local_steps - 1:
-            if not args.eval:
-                g_next_value = g_policy.get_value(
-                    g_rollouts.obs[-1],
-                    g_rollouts.rec_states[-1],
-                    g_rollouts.masks[-1],
-                    extras=g_rollouts.extras[-1]
-                ).detach()
-
-                g_rollouts.compute_returns(g_next_value, args.use_gae,
-                                           args.gamma, args.tau)
-                g_value_loss, g_action_loss, g_dist_entropy = \
-                    g_agent.update(g_rollouts)
-                g_value_losses.append(g_value_loss)
-                g_action_losses.append(g_action_loss)
-                g_dist_entropies.append(g_dist_entropy)
-            g_rollouts.after_update()
-
-        torch.set_grad_enabled(False)
-        # ------------------------------------------------------------------
-
-        # ------------------------------------------------------------------
-        # Logging
-        if step % args.log_interval == 0:
-            end = time.time()
-            time_elapsed = time.gmtime(end - start)
-            log = " ".join([
-                "Time: {0:0=2d}d".format(time_elapsed.tm_mday - 1),
-                "{},".format(time.strftime("%Hh %Mm %Ss", time_elapsed)),
-                "num timesteps {},".format(step * num_scenes),
-                "FPS {},".format(int(step * num_scenes / (end - start)))
-            ])
-
-            log += "\n\tRewards:"
-
-            if len(g_episode_rewards) > 0:
-                log += " ".join([
-                    " Global step mean/med rew:",
-                    "{:.4f}/{:.4f},".format(
-                        np.mean(per_step_g_rewards),
-                        np.median(per_step_g_rewards)),
-                    " Global eps mean/med/min/max eps rew:",
-                    "{:.3f}/{:.3f}/{:.3f}/{:.3f},".format(
-                        np.mean(g_episode_rewards),
-                        np.median(g_episode_rewards),
-                        np.min(g_episode_rewards),
-                        np.max(g_episode_rewards))
-                ])
-
-            if args.eval:
-                total_success = []
-                total_spl = []
-                total_dist = []
-                for e in range(args.num_processes):
-                    for acc in episode_success[e]:
-                        total_success.append(acc)
-                    for dist in episode_dist[e]:
-                        total_dist.append(dist)
-                    for spl in episode_spl[e]:
-                        total_spl.append(spl)
-
-                if len(total_spl) > 0:
-                    log += " ObjectNav succ/spl/dtg:"
-                    log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
-                        np.mean(total_success),
-                        np.mean(total_spl),
-                        np.mean(total_dist),
-                        len(total_spl))
-            else:
-                if len(episode_success) > 100:
-                    log += " ObjectNav succ/spl/dtg:"
-                    log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
-                        np.mean(episode_success),
-                        np.mean(episode_spl),
-                        np.mean(episode_dist),
-                        len(episode_spl))
-
-            log += "\n\tLosses:"
-            if len(g_value_losses) > 0 and not args.eval:
-                log += " ".join([
-                    " Policy Loss value/action/dist:",
-                    "{:.3f}/{:.3f}/{:.3f},".format(
-                        np.mean(g_value_losses),
-                        np.mean(g_action_losses),
-                        np.mean(g_dist_entropies))
-                ])
-
-            print(log)
-            logging.info(log)
-        # ------------------------------------------------------------------
-
-        # ------------------------------------------------------------------
-        # Save best models
-        if (step * num_scenes) % args.save_interval < \
-                num_scenes:
-            if len(g_episode_rewards) >= 1000 and \
-                    (np.mean(g_episode_rewards) >= best_g_reward) \
-                    and not args.eval:
-                torch.save(g_policy.state_dict(),
-                           os.path.join(log_dir, "model_best.pth"))
-                best_g_reward = np.mean(g_episode_rewards)
-
-        # Save periodic models
-        if (step * num_scenes) % args.save_periodic < \
-                num_scenes:
-            total_steps = step * num_scenes
-            if not args.eval:
-                torch.save(g_policy.state_dict(),
-                           os.path.join(dump_dir,
-                                        "periodic_{}.pth".format(total_steps)))
-        # ------------------------------------------------------------------
-
-    # Print and save model performance numbers during evaluation
-    if args.eval:
-        print("Dumping eval details...")
-        
-        total_success = []
-        total_spl = []
-        total_dist = []
-        for e in range(args.num_processes):
-            for acc in episode_success[e]:
-                total_success.append(acc)
-            for dist in episode_dist[e]:
-                total_dist.append(dist)
-            for spl in episode_spl[e]:
-                total_spl.append(spl)
-
-        if len(total_spl) > 0:
-            log = "Final ObjectNav succ/spl/dtg:"
-            log += " {:.3f}/{:.3f}/{:.3f}({:.0f}),".format(
-                np.mean(total_success),
-                np.mean(total_spl),
-                np.mean(total_dist),
-                len(total_spl))
-
-        print(log)
-        logging.info(log)
-            
-        # Save the spl per category
-        log = "Success | SPL per category\n"
-        for key in success_per_category:
-            log += "{}: {} | {}\n".format(key,
-                                          sum(success_per_category[key]) /
-                                          len(success_per_category[key]),
-                                          sum(spl_per_category[key]) /
-                                          len(spl_per_category[key]))
-
-        print(log)
-        logging.info(log)
-
-        with open('{}/{}_spl_per_cat_pred_thr.json'.format(
-                dump_dir, args.split), 'w') as f:
-            json.dump(spl_per_category, f)
-
-        with open('{}/{}_success_per_cat_pred_thr.json'.format(
-                dump_dir, args.split), 'w') as f:
-            json.dump(success_per_category, f)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/model.py b/model.py
deleted file mode 100755
index c912ce0..0000000
--- a/model.py
+++ /dev/null
@@ -1,283 +0,0 @@
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-import numpy as np
-
-from utils.distributions import Categorical, DiagGaussian
-from utils.model import get_grid, ChannelPool, Flatten, NNBase
-import envs.utils.depth_utils as du
-
-
-class Goal_Oriented_Semantic_Policy(NNBase):
-
-    def __init__(self, input_shape, recurrent=False, hidden_size=512,
-                 num_sem_categories=16):
-        super(Goal_Oriented_Semantic_Policy, self).__init__(
-            recurrent, hidden_size, hidden_size)
-
-        out_size = int(input_shape[1] / 16.) * int(input_shape[2] / 16.)
-
-        self.main = nn.Sequential(
-            nn.MaxPool2d(2),
-            nn.Conv2d(num_sem_categories + 8, 32, 3, stride=1, padding=1),
-            nn.ReLU(),
-            nn.MaxPool2d(2),
-            nn.Conv2d(32, 64, 3, stride=1, padding=1),
-            nn.ReLU(),
-            nn.MaxPool2d(2),
-            nn.Conv2d(64, 128, 3, stride=1, padding=1),
-            nn.ReLU(),
-            nn.MaxPool2d(2),
-            nn.Conv2d(128, 64, 3, stride=1, padding=1),
-            nn.ReLU(),
-            nn.Conv2d(64, 32, 3, stride=1, padding=1),
-            nn.ReLU(),
-            Flatten()
-        )
-
-        self.linear1 = nn.Linear(out_size * 32 + 8 * 2, hidden_size)
-        self.linear2 = nn.Linear(hidden_size, 256)
-        self.critic_linear = nn.Linear(256, 1)
-        self.orientation_emb = nn.Embedding(72, 8)
-        self.goal_emb = nn.Embedding(num_sem_categories, 8)
-        self.train()
-
-    def forward(self, inputs, rnn_hxs, masks, extras):
-        x = self.main(inputs)
-        orientation_emb = self.orientation_emb(extras[:, 0])
-        goal_emb = self.goal_emb(extras[:, 1])
-
-        x = torch.cat((x, orientation_emb, goal_emb), 1)
-
-        x = nn.ReLU()(self.linear1(x))
-        if self.is_recurrent:
-            x, rnn_hxs = self._forward_gru(x, rnn_hxs, masks)
-
-        x = nn.ReLU()(self.linear2(x))
-
-        return self.critic_linear(x).squeeze(-1), x, rnn_hxs
-
-
-# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L15
-class RL_Policy(nn.Module):
-
-    def __init__(self, obs_shape, action_space, model_type=0,
-                 base_kwargs=None):
-
-        super(RL_Policy, self).__init__()
-        if base_kwargs is None:
-            base_kwargs = {}
-
-        if model_type == 1:
-            self.network = Goal_Oriented_Semantic_Policy(
-                obs_shape, **base_kwargs)
-        else:
-            raise NotImplementedError
-
-        if action_space.__class__.__name__ == "Discrete":
-            num_outputs = action_space.n
-            self.dist = Categorical(self.network.output_size, num_outputs)
-        elif action_space.__class__.__name__ == "Box":
-            num_outputs = action_space.shape[0]
-            self.dist = DiagGaussian(self.network.output_size, num_outputs)
-        else:
-            raise NotImplementedError
-
-        self.model_type = model_type
-
-    @property
-    def is_recurrent(self):
-        return self.network.is_recurrent
-
-    @property
-    def rec_state_size(self):
-        """Size of rnn_hx."""
-        return self.network.rec_state_size
-
-    def forward(self, inputs, rnn_hxs, masks, extras):
-        if extras is None:
-            return self.network(inputs, rnn_hxs, masks)
-        else:
-            return self.network(inputs, rnn_hxs, masks, extras)
-
-    def act(self, inputs, rnn_hxs, masks, extras=None, deterministic=False):
-
-        value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras)
-        dist = self.dist(actor_features)
-
-        if deterministic:
-            action = dist.mode()
-        else:
-            action = dist.sample()
-
-        action_log_probs = dist.log_probs(action)
-
-        return value, action, action_log_probs, rnn_hxs
-
-    def get_value(self, inputs, rnn_hxs, masks, extras=None):
-        value, _, _ = self(inputs, rnn_hxs, masks, extras)
-        return value
-
-    def evaluate_actions(self, inputs, rnn_hxs, masks, action, extras=None):
-
-        value, actor_features, rnn_hxs = self(inputs, rnn_hxs, masks, extras)
-        dist = self.dist(actor_features)
-
-        action_log_probs = dist.log_probs(action)
-        dist_entropy = dist.entropy().mean()
-
-        return value, action_log_probs, dist_entropy, rnn_hxs
-
-
-class Semantic_Mapping(nn.Module):
-
-    """
-    Semantic_Mapping
-    """
-
-    def __init__(self, args):
-        super(Semantic_Mapping, self).__init__()
-
-        self.device = args.device
-        self.screen_h = args.frame_height
-        self.screen_w = args.frame_width
-        self.resolution = args.map_resolution
-        self.z_resolution = args.map_resolution
-        self.map_size_cm = args.map_size_cm // args.global_downscaling
-        self.n_channels = 3
-        self.vision_range = args.vision_range
-        self.dropout = 0.5
-        self.fov = args.hfov
-        self.du_scale = args.du_scale
-        self.cat_pred_threshold = args.cat_pred_threshold
-        self.exp_pred_threshold = args.exp_pred_threshold
-        self.map_pred_threshold = args.map_pred_threshold
-        self.num_sem_categories = args.num_sem_categories
-
-        self.max_height = int(360 / self.z_resolution)
-        self.min_height = int(-40 / self.z_resolution)
-        self.agent_height = args.camera_height * 100.
-        self.shift_loc = [self.vision_range *
-                          self.resolution // 2, 0, np.pi / 2.0]
-        self.camera_matrix = du.get_camera_matrix(
-            self.screen_w, self.screen_h, self.fov)
-
-        self.pool = ChannelPool(1)
-
-        vr = self.vision_range
-
-        self.init_grid = torch.zeros(
-            args.num_processes, 1 + self.num_sem_categories, vr, vr,
-            self.max_height - self.min_height
-        ).float().to(self.device)
-        self.feat = torch.ones(
-            args.num_processes, 1 + self.num_sem_categories,
-            self.screen_h // self.du_scale * self.screen_w // self.du_scale
-        ).float().to(self.device)
-
-    def forward(self, obs, pose_obs, maps_last, poses_last):
-        bs, c, h, w = obs.size()
-        depth = obs[:, 3, :, :]
-
-        point_cloud_t = du.get_point_cloud_from_z_t(
-            depth, self.camera_matrix, self.device, scale=self.du_scale)
-
-        agent_view_t = du.transform_camera_view_t(
-            point_cloud_t, self.agent_height, 0, self.device)
-
-        agent_view_centered_t = du.transform_pose_t(
-            agent_view_t, self.shift_loc, self.device)
-
-        max_h = self.max_height
-        min_h = self.min_height
-        xy_resolution = self.resolution
-        z_resolution = self.z_resolution
-        vision_range = self.vision_range
-        XYZ_cm_std = agent_view_centered_t.float()
-        XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] / xy_resolution)
-        XYZ_cm_std[..., :2] = (XYZ_cm_std[..., :2] -
-                               vision_range // 2.) / vision_range * 2.
-        XYZ_cm_std[..., 2] = XYZ_cm_std[..., 2] / z_resolution
-        XYZ_cm_std[..., 2] = (XYZ_cm_std[..., 2] -
-                              (max_h + min_h) // 2.) / (max_h - min_h) * 2.
-        self.feat[:, 1:, :] = nn.AvgPool2d(self.du_scale)(
-            obs[:, 4:, :, :]
-        ).view(bs, c - 4, h // self.du_scale * w // self.du_scale)
-
-        XYZ_cm_std = XYZ_cm_std.permute(0, 3, 1, 2)
-        XYZ_cm_std = XYZ_cm_std.view(XYZ_cm_std.shape[0],
-                                     XYZ_cm_std.shape[1],
-                                     XYZ_cm_std.shape[2] * XYZ_cm_std.shape[3])
-
-        voxels = du.splat_feat_nd(
-            self.init_grid * 0., self.feat, XYZ_cm_std).transpose(2, 3)
-
-        min_z = int(25 / z_resolution - min_h)
-        max_z = int((self.agent_height + 1) / z_resolution - min_h)
-
-        agent_height_proj = voxels[..., min_z:max_z].sum(4)
-        all_height_proj = voxels.sum(4)
-
-        fp_map_pred = agent_height_proj[:, 0:1, :, :]
-        fp_exp_pred = all_height_proj[:, 0:1, :, :]
-        fp_map_pred = fp_map_pred / self.map_pred_threshold
-        fp_exp_pred = fp_exp_pred / self.exp_pred_threshold
-        fp_map_pred = torch.clamp(fp_map_pred, min=0.0, max=1.0)
-        fp_exp_pred = torch.clamp(fp_exp_pred, min=0.0, max=1.0)
-
-        pose_pred = poses_last
-
-        agent_view = torch.zeros(bs, c,
-                                 self.map_size_cm // self.resolution,
-                                 self.map_size_cm // self.resolution
-                                 ).to(self.device)
-
-        x1 = self.map_size_cm // (self.resolution * 2) - self.vision_range // 2
-        x2 = x1 + self.vision_range
-        y1 = self.map_size_cm // (self.resolution * 2)
-        y2 = y1 + self.vision_range
-        agent_view[:, 0:1, y1:y2, x1:x2] = fp_map_pred
-        agent_view[:, 1:2, y1:y2, x1:x2] = fp_exp_pred
-        agent_view[:, 4:, y1:y2, x1:x2] = torch.clamp(
-            agent_height_proj[:, 1:, :, :] / self.cat_pred_threshold,
-            min=0.0, max=1.0)
-
-        corrected_pose = pose_obs
-
-        def get_new_pose_batch(pose, rel_pose_change):
-
-            pose[:, 1] += rel_pose_change[:, 0] * \
-                torch.sin(pose[:, 2] / 57.29577951308232) \
-                + rel_pose_change[:, 1] * \
-                torch.cos(pose[:, 2] / 57.29577951308232)
-            pose[:, 0] += rel_pose_change[:, 0] * \
-                torch.cos(pose[:, 2] / 57.29577951308232) \
-                - rel_pose_change[:, 1] * \
-                torch.sin(pose[:, 2] / 57.29577951308232)
-            pose[:, 2] += rel_pose_change[:, 2] * 57.29577951308232
-
-            pose[:, 2] = torch.fmod(pose[:, 2] - 180.0, 360.0) + 180.0
-            pose[:, 2] = torch.fmod(pose[:, 2] + 180.0, 360.0) - 180.0
-
-            return pose
-
-        current_poses = get_new_pose_batch(poses_last, corrected_pose)
-        st_pose = current_poses.clone().detach()
-
-        st_pose[:, :2] = - (st_pose[:, :2]
-                            * 100.0 / self.resolution
-                            - self.map_size_cm // (self.resolution * 2)) /\
-            (self.map_size_cm // (self.resolution * 2))
-        st_pose[:, 2] = 90. - (st_pose[:, 2])
-
-        rot_mat, trans_mat = get_grid(st_pose, agent_view.size(),
-                                      self.device)
-
-        rotated = F.grid_sample(agent_view, rot_mat, align_corners=True)
-        translated = F.grid_sample(rotated, trans_mat, align_corners=True)
-
-        maps2 = torch.cat((maps_last.unsqueeze(1), translated.unsqueeze(1)), 1)
-
-        map_pred, _ = torch.max(maps2, 1)
-
-        return fp_map_pred, map_pred, pose_pred, current_poses
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 8c8800c..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-scikit-fmm==2019.1.30
-scikit-learn==0.22.2.post1
-scikit-image==0.15.0
-numpy>=1.20.2
-ifcfg
diff --git a/semantic_exploration/README.md b/semantic_exploration/README.md
new file mode 100644
index 0000000..f0c4e4d
--- /dev/null
+++ b/semantic_exploration/README.md
@@ -0,0 +1 @@
+# semantic_exploration
diff --git a/semantic_exploration/agents/sem_exp.py b/semantic_exploration/agents/sem_exp.py
new file mode 100644
index 0000000..5fb1ec3
--- /dev/null
+++ b/semantic_exploration/agents/sem_exp.py
@@ -0,0 +1,577 @@
+# -*- coding: utf-8 -*-
+import math
+import os
+
+import third_party.semantic_exploration.agents.utils.visualization as vu
+import cv2
+import third_party.semantic_exploration.envs.utils.pose as pu
+from third_party.semantic_exploration.envs.utils.fmm_planner import FMMPlanner
+import numpy as np
+import skimage.morphology
+from third_party.semantic_exploration.agents.utils.detic_semantic_prediction import SemanticPredDetic
+from third_party.semantic_exploration.agents.utils.owlvit_semantic_prediction import SemanticPredOwlvit
+from third_party.semantic_exploration.agents.utils.semantic_prediction import SemanticPredMaskRCNN
+from third_party.semantic_exploration.constants import color_palette
+from PIL import Image
+from torchvision import transforms
+
+
+class Sem_Exp_Env_Agent:
+    """The Sem_Exp environment agent class. A seperate Sem_Exp_Env_Agent class
+    object is used for each environment thread.
+
+    """
+
+    def __init__(self, config, rank=1):
+        self.config = config
+        # initialize transform for RGB observations
+        self.res = transforms.Compose(
+            [
+                transforms.ToPILImage(),
+                transforms.Resize(
+                    (self.config.FRAME_HEIGHT, self.config.FRAME_WIDTH),
+                    interpolation=Image.NEAREST,
+                ),
+            ]
+        )
+
+        if self.config.DETECTION_MODEL == "detectron2":
+            self.sem_pred = SemanticPredMaskRCNN(self.config)
+        elif self.config.DETECTION_MODEL == "detic":
+            self.sem_pred = SemanticPredDetic(self.config)
+        elif self.config.DETECTION_MODEL == "owlvit":
+            self.sem_pred = SemanticPredOwlvit(self.config)
+        else:
+            raise NotImplementedError
+
+        # initializations for planning:
+        self.selem = skimage.morphology.disk(self.config.OBS_DILATION_SELEM_RADIUS)
+        self.obs = None
+        self.info = None
+        self.obs_shape = None
+        self.collision_map = None
+        self.visited = None
+        self.visited_vis = None
+        self.col_width = None
+        self.curr_loc = None
+        self.last_loc = None
+        self.last_action = None
+        self.count_forward_actions = None
+
+        if self.config.PLANNER == "frontier":
+            self.start_obs_dilation_selem_radius = self.config.OBS_DILATION_SELEM_RADIUS
+            self.goal_dilation_selem_radius = self.config.GOAL_DILATION_SELEM_RADIUS
+            self.min_obs_dilation_selem_radius = (
+                self.config.MIN_OBS_DILATION_SELEM_RADIUS
+            )
+            self.agent_cell_radius = self.config.AGENT_CELL_RADIUS
+            self.goal_tolerance = self.config.GOAL_TOLERANCE
+            self.continuous_angle_tolerance = self.config.CONTINUOUS_ANGLE_TOLERANCE
+            self.curr_obs_dilation_selem_radius = None
+            self.obs_dilation_selem = None
+
+        if self.config.VISUALIZE:
+            this_dir = os.path.dirname(os.path.abspath(__file__))
+            semantic_exploration_dir = os.path.join(os.path.dirname(this_dir))
+            self.legend = cv2.imread(semantic_exploration_dir+"/docs/legend.png")
+            self.vis_image = None
+            self.rgb_vis = None
+            self.depth_vis = None
+        self.goal_name = None
+        self.timestep = 0
+        self.rank = rank
+        self.episode_no = 0
+        self.cur_stg = None
+
+    def reset(self, obs_size, goal_name):
+        self.info = None
+        self.obs_shape = obs_size
+        self.goal_name = goal_name
+
+        # Episode initializations
+        map_shape = (
+            self.config.MAP_SIZE_CM // self.config.MAP_RESOLUTION,
+            self.config.MAP_SIZE_CM // self.config.MAP_RESOLUTION,
+        )
+        self.collision_map = np.zeros(map_shape)
+        self.visited = np.zeros(map_shape)
+        self.visited_vis = np.zeros(map_shape)
+        self.col_width = 1
+        self.count_forward_actions = 0
+        self.curr_loc = [
+            self.config.MAP_SIZE_CM / 100.0 / 2.0,
+            self.config.MAP_SIZE_CM / 100.0 / 2.0,
+            0.0,
+        ]
+        self.last_action = None
+
+        if self.config.PLANNER == "frontier":
+            self.curr_obs_dilation_selem_radius = self.start_obs_dilation_selem_radius
+            self.obs_dilation_selem = skimage.morphology.disk(
+                self.curr_obs_dilation_selem_radius
+            )
+
+        if self.config.VISUALIZE:
+            self.vis_image = vu.init_vis_image(self.goal_name, self.legend)
+        self.timestep = 0
+
+    def update_vis_image_goal(self, goal_name):
+        self.goal_name = goal_name
+        if self.config.VISUALIZE:
+            self.vis_image = vu.init_vis_image(self.goal_name, self.legend)
+
+    def plan_act_and_preprocess(self, planner_inputs, info):
+        """Function responsible for planning, taking the action and
+        preprocessing observations
+
+        Args:
+            planner_inputs (dict):
+                dict with following keys:
+                    'map_pred'  (ndarray): (M, M) map prediction
+                    'goal'      (ndarray): (M, M) mat denoting goal locations
+                    'pose_pred' (ndarray): (7,) array denoting pose (x,y,o)
+                                 and planning window (gx1, gx2, gy1, gy2)
+                     'found_goal' (bool): whether the goal object is found
+
+        Returns:
+            obs (ndarray): preprocessed observations ((4+C) x H x W)
+            reward (float): amount of reward returned after previous action
+            done (bool): whether the episode has ended
+            info (dict): contains timestep, pose, goal category and
+                         evaluation metric info
+        """
+
+        self.info = info
+        # plan
+        if planner_inputs["wait"]:
+            self.last_action = None
+            self.info["sensor_pose"] = [0.0, 0.0, 0.0]
+            return np.zeros(self.obs.shape), 0.0, False, self.info
+
+        action = self._plan(planner_inputs)
+
+        if self.config.VISUALIZE:
+            self._visualize(planner_inputs)
+
+        self.timestep += 1
+
+        if action >= 0:
+            # act
+            action = {"action": action}
+            obs = self.info["state"]
+            self.last_action = action["action"]
+            self.obs = obs
+            self.info = info
+            self.info["action"] = action
+
+            return obs, 0.0, False, info
+
+        else:
+            self.last_action = None
+            self.info["sensor_pose"] = [0.0, 0.0, 0.0]
+            self.info["action"] = -1
+            return np.zeros(self.obs_shape), 0.0, False, self.info
+
+    def _reach_goal_if_in_map(self, goal_map, found_goal):
+        height = goal_map.shape[0]
+        width = goal_map.shape[1]
+        init_goal_map = np.zeros((height, width))
+        if found_goal:
+            init_goal_map = goal_map
+        return init_goal_map
+
+    def _explore_otherwise(self, exp_pred, goal_map, found_goal):
+        """Explore closest unexplored region otherwise."""
+        # Select unexplored area
+        frontier_map = exp_pred == 0
+        self.dilate_explored_kernel = skimage.morphology.disk(10)
+        # Dilate explored area
+        frontier_map = 1 - skimage.morphology.binary_dilation(
+            1 - frontier_map, self.dilate_explored_kernel
+        )
+
+        self.select_border_kernel = skimage.morphology.disk(1)
+        # Select the frontier
+        frontier_map = (
+            skimage.morphology.binary_dilation(frontier_map, self.select_border_kernel)
+            - frontier_map
+        )
+
+        if not found_goal:
+            goal_map = frontier_map
+
+        return goal_map
+
+    def _plan(self, planner_inputs):
+        """Function responsible for planning
+
+        Args:
+            planner_inputs (dict):
+                dict with following keys:
+                    'map_pred'  (ndarray): (M, M) map prediction
+                    'goal'      (ndarray): (M, M) goal locations
+                    'pose_pred' (ndarray): (7,) array  denoting pose (x,y,o)
+                                 and planning window (gx1, gx2, gy1, gy2)
+                    'found_goal' (bool): whether the goal object is found
+
+        Returns:
+            action (int): action id
+        """
+
+        self.last_loc = self.curr_loc
+
+        # Get Map prediction (obstacle)
+        map_pred = np.rint(planner_inputs["map_pred"])
+        if self.config.PLANNER == "frontier":
+            goal = self._reach_goal_if_in_map(
+                planner_inputs["goal"], planner_inputs["found_goal"]
+            )
+            goal = self._explore_otherwise(
+                planner_inputs["exp_pred"], goal, planner_inputs["found_goal"]
+            )
+        else:
+            goal = planner_inputs["goal"]
+
+        # Get pose prediction and global policy planning window
+        start_x, start_y, start_o, gx1, gx2, gy1, gy2 = planner_inputs["pose_pred"]
+        gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
+        planning_window = [gx1, gx2, gy1, gy2]
+
+        # Get curr loc
+        self.curr_loc = [start_x, start_y, start_o]
+        r, c = start_y, start_x
+        start = [
+            int(r * 100.0 / self.config.MAP_RESOLUTION - gx1),
+            int(c * 100.0 / self.config.MAP_RESOLUTION - gy1),
+        ]
+        start = pu.threshold_poses(start, map_pred.shape)
+
+        self.visited[gx1:gx2, gy1:gy2][
+            start[0] - 0 : start[0] + 1, start[1] - 0 : start[1] + 1
+        ] = 1
+
+        if self.config.VISUALIZE:
+            # Get last loc
+            last_start_x, last_start_y = self.last_loc[0], self.last_loc[1]
+            r, c = last_start_y, last_start_x
+            last_start = [
+                int(r * 100.0 / self.config.MAP_RESOLUTION - gx1),
+                int(c * 100.0 / self.config.MAP_RESOLUTION - gy1),
+            ]
+            last_start = pu.threshold_poses(last_start, map_pred.shape)
+            self.visited_vis[gx1:gx2, gy1:gy2] = vu.draw_line(
+                last_start, start, self.visited_vis[gx1:gx2, gy1:gy2]
+            )
+
+        # Collision check
+        if self.last_action == 1:
+            x1, y1, t1 = self.last_loc
+            x2, y2, _ = self.curr_loc
+            buf = 4
+            length = 2
+
+            if abs(x1 - x2) < 0.05 and abs(y1 - y2) < 0.05:
+                self.col_width += 2
+                if self.col_width == 7:
+                    length = 4
+                    buf = 3
+                self.col_width = min(self.col_width, 5)
+            else:
+                self.col_width = 1
+
+            dist = pu.get_l2_distance(x1, x2, y1, y2)
+            if dist < self.config.COLLISION_THRESHOLD:  # Collision
+                width = self.col_width
+                for i in range(length):
+                    for j in range(width):
+                        wx = x1 + 0.05 * (
+                            (i + buf) * np.cos(np.deg2rad(t1))
+                            + (j - width // 2) * np.sin(np.deg2rad(t1))
+                        )
+                        wy = y1 + 0.05 * (
+                            (i + buf) * np.sin(np.deg2rad(t1))
+                            - (j - width // 2) * np.cos(np.deg2rad(t1))
+                        )
+                        r, c = wy, wx
+                        r, c = int(r * 100 / self.config.MAP_RESOLUTION), int(
+                            c * 100 / self.config.MAP_RESOLUTION
+                        )
+                        [r, c] = pu.threshold_poses([r, c], self.collision_map.shape)
+                        self.collision_map[r, c] = 1
+
+        stg, replan, stop = self._get_stg(
+            map_pred, start, np.copy(goal), planning_window
+        )
+
+        # We were not able to find a path to the high-level goal
+        if replan and self.config.PLANNER == "frontier":
+            # Clean collision map
+            self.collision_map *= 0
+
+            # Reduce obstacle dilation
+            if self.curr_obs_dilation_selem_radius > 1:
+                self.curr_obs_dilation_selem_radius -= 1
+                self.obs_dilation_selem = skimage.morphology.disk(
+                    self.curr_obs_dilation_selem_radius
+                )
+
+        # Deterministic Local Policy
+        if stop and planner_inputs["found_goal"] == 1:
+            if self._get_distance_to_obstacle() <= 0.2:
+                action = 0
+            else:
+                action = 1
+        else:
+            (stg_x, stg_y) = stg
+            angle_st_goal = math.degrees(math.atan2(stg_x - start[0], stg_y - start[1]))
+            angle_agent = (start_o) % 360.0
+            if angle_agent > 180:
+                angle_agent -= 360
+
+            relative_angle = (angle_agent - angle_st_goal) % 360.0
+            if relative_angle > 180:
+                relative_angle -= 360
+
+            if relative_angle > self.config.TURN_ANGLE / 2.0:
+                # Right
+                action = 3
+            elif relative_angle < -self.config.TURN_ANGLE / 2.0:
+                # Left
+                action = 2
+            else:
+                # Forward
+                action = 1
+
+        self.cur_stg = stg
+
+        return action
+
+    def _get_stg(self, grid, start, goal, planning_window):
+        """Get short-term goal"""
+
+        [gx1, gx2, gy1, gy2] = planning_window
+
+        x1, y1, = (
+            0,
+            0,
+        )
+        x2, y2 = grid.shape
+
+        def add_boundary(mat, value=1):
+            h, w = mat.shape
+            new_mat = np.zeros((h + 2, w + 2)) + value
+            new_mat[1 : h + 1, 1 : w + 1] = mat
+            return new_mat
+
+        if self.config.PLANNER == "frontier":
+            obstacles = grid[x1:x2, y1:y2]
+            # Dilate obstacles
+            dilated_obstacles = cv2.dilate(
+                obstacles, self.obs_dilation_selem, iterations=1
+            )
+            traversible = 1 - dilated_obstacles
+        else:
+            traversible = (
+                skimage.morphology.binary_dilation(grid[x1:x2, y1:y2], self.selem)
+                != True  # noqa
+            )
+        traversible[self.collision_map[gx1:gx2, gy1:gy2][x1:x2, y1:y2] == 1] = 0
+        traversible[self.visited[gx1:gx2, gy1:gy2][x1:x2, y1:y2] == 1] = 1
+
+        traversible[
+            int(start[0] - x1) - 1 : int(start[0] - x1) + 2,
+            int(start[1] - y1) - 1 : int(start[1] - y1) + 2,
+        ] = 1
+
+        traversible = add_boundary(traversible)
+        goal = add_boundary(goal, value=0)
+
+        planner = FMMPlanner(traversible, step_size=self.config.PLANNER_STEP_SIZE)
+        # Set the goal size
+        selem = skimage.morphology.disk(self.config.GOAL_DILATION_SELEM_RADIUS)
+        goal = skimage.morphology.binary_dilation(goal, selem) != True  # noqa
+        goal = 1 - goal * 1.0
+        planner.set_multi_goal(goal)
+
+
+        if self.config.VISUALIZE:
+            dump_dir = "{}/dump/{}/".format(self.config.DUMP_LOCATION, self.config.EXP_NAME)
+            ep_dir = "{}/episodes/thread_{}/eps_{}/".format(
+                dump_dir, self.rank, self.episode_no
+            )
+            if not os.path.exists(ep_dir):
+                os.makedirs(ep_dir)
+            r, c = traversible.shape
+            dist_vis = np.zeros((r, c * 3))
+            dist_vis[:, :c] = np.flipud(traversible)
+            dist_vis[:, c : 2 * c] = np.flipud(goal)
+            dist_vis[:, 2 * c :] = np.flipud(planner.fmm_dist / planner.fmm_dist.max())
+
+            fn = "{}/episodes/thread_{}/eps_{}/frontier-{}-{}-Vis-{}.png".format(
+                dump_dir,
+                self.rank,
+                self.episode_no,
+                self.rank,
+                self.episode_no,
+                self.timestep,
+            )
+
+            font = cv2.FONT_HERSHEY_SIMPLEX
+            fontScale = 0.3
+            color = (0, 0, 255)  # BGR
+            thickness = 1
+            dist_vis = cv2.cvtColor((255.0 * dist_vis).astype(np.uint8), cv2.COLOR_GRAY2BGR)
+            dist_vis = cv2.putText(dist_vis, "trav. (w: trav.; b: can't tarv.)", (2, 25), font, fontScale, color, thickness, cv2.LINE_AA)
+            dist_vis = cv2.putText(dist_vis, "goal (w: goal; b: non-goal)", (c+2,25), font, fontScale, color, thickness, cv2.LINE_AA)
+            dist_vis = cv2.putText(dist_vis, "trav.+goal (w: non-goal target; b: goal target)", (2*c+2,25), font, fontScale, color, thickness, cv2.LINE_AA)
+            cv2.imwrite(fn, dist_vis.astype(np.uint8))
+            cv2.waitKey(1)
+
+        state = [start[0] - x1 + 1, start[1] - y1 + 1]
+        # Add the replan flag
+        stg_x, stg_y, replan, stop = planner.get_short_term_goal(state)
+
+        stg_x, stg_y = stg_x + x1 - 1, stg_y + y1 - 1
+
+        return (stg_x, stg_y), replan, stop
+
+    def _preprocess_obs(self, obs, use_seg=True):
+        obs = obs.transpose(1, 2, 0)
+        rgb = obs[:, :, :3]
+        depth = obs[:, :, 3:4]
+
+        sem_seg_pred = self._get_sem_pred(rgb.astype(np.uint8), use_seg=use_seg)
+        self.depth_vis = depth
+        depth = self._preprocess_depth(
+            depth, self.config.MIN_DEPTH, self.config.MAX_DEPTH
+        )
+
+        ds = (
+            self.config.ENV_FRAME_WIDTH // self.config.FRAME_WIDTH
+        )  # Downscaling factor
+        if ds != 1:
+            rgb = np.asarray(self.res(rgb.astype(np.uint8)))
+            depth = depth[ds // 2 :: ds, ds // 2 :: ds]
+            sem_seg_pred = sem_seg_pred[ds // 2 :: ds, ds // 2 :: ds]
+
+        depth = np.expand_dims(depth, axis=2)
+        state = np.concatenate((rgb, depth, sem_seg_pred), axis=2).transpose(2, 0, 1)
+        return state
+
+    def _preprocess_depth(self, depth, min_d, max_d):
+        depth = depth[:, :, 0] * 1
+
+        for i in range(depth.shape[1]):
+            depth[:, i][depth[:, i] == 0.0] = depth[:, i].max()
+
+        mask2 = depth > 0.99
+        depth[mask2] = 0.0
+
+        mask1 = depth == 0
+        depth[mask1] = 100.0
+        depth = min_d * 100.0 + depth * max_d * 100.0
+        return depth
+
+    def _get_sem_pred(self, rgb, use_seg=True):
+        if use_seg:
+            semantic_pred, self.rgb_vis = self.sem_pred.get_prediction(rgb)
+            semantic_pred = semantic_pred.astype(np.float32)
+        else:
+            semantic_pred = np.zeros((rgb.shape[0], rgb.shape[1], 16))
+            self.rgb_vis = rgb[:, :, ::-1]
+        return semantic_pred
+
+    def _get_distance_to_obstacle(self):
+        """"Return the distance between the obstacle and the robot."""
+        x1, y1, t1 = self.last_loc
+        x2, y2, _ = self.curr_loc
+        dist = pu.get_l2_distance(x1, x2, y1, y2)
+        return dist
+
+
+    def _visualize(self, inputs):
+        dump_dir = "{}/dump/{}/".format(self.config.DUMP_LOCATION, self.config.EXP_NAME)
+        ep_dir = "{}/episodes/thread_{}/eps_{}/".format(
+            dump_dir, self.rank, self.episode_no
+        )
+        if not os.path.exists(ep_dir):
+            os.makedirs(ep_dir)
+
+        map_pred = inputs["map_pred"]
+        exp_pred = inputs["exp_pred"]
+        start_x, start_y, start_o, gx1, gx2, gy1, gy2 = inputs["pose_pred"]
+
+        goal = inputs["goal"]
+        goal[int(self.cur_stg[0]), int(self.cur_stg[1])] = 1
+        sem_map = inputs["sem_map_pred"]
+
+        gx1, gx2, gy1, gy2 = int(gx1), int(gx2), int(gy1), int(gy2)
+
+        sem_map += 5
+
+        no_cat_mask = sem_map == self.config.NUM_SEM_CATEGORIES + 4  # 20
+        map_mask = np.rint(map_pred) == 1
+        exp_mask = np.rint(exp_pred) == 1
+        vis_mask = self.visited_vis[gx1:gx2, gy1:gy2] == 1
+
+        sem_map[no_cat_mask] = 0
+        m1 = np.logical_and(no_cat_mask, exp_mask)
+        sem_map[m1] = 2
+
+        m2 = np.logical_and(no_cat_mask, map_mask)
+        sem_map[m2] = 1
+
+        sem_map[vis_mask] = 3
+
+        selem = skimage.morphology.disk(self.goal_dilation_selem_radius)
+        goal_mat = 1 - skimage.morphology.binary_dilation(goal, selem) != True  # noqa
+
+        goal_mask = goal_mat == 1
+        sem_map[goal_mask] = 4
+
+        color_pal = [int(x * 255.0) for x in color_palette]
+        sem_map_vis = Image.new("P", (sem_map.shape[1], sem_map.shape[0]))
+        sem_map_vis.putpalette(color_pal)
+        sem_map_vis.putdata(sem_map.flatten().astype(np.uint8))
+        sem_map_vis = sem_map_vis.convert("RGB")
+        sem_map_vis = np.flipud(sem_map_vis)
+
+        sem_map_vis = sem_map_vis[:, :, [2, 1, 0]]
+        sem_map_vis = cv2.resize(
+            sem_map_vis, (480, 480), interpolation=cv2.INTER_NEAREST
+        )
+        self.depth_vis = cv2.cvtColor((255.0 * self.depth_vis).astype(np.uint8), cv2.COLOR_GRAY2BGR)
+        self.vis_image[
+            50 : 50 + self.config.ENV_FRAME_HEIGHT,
+            15 : 15 + self.config.ENV_FRAME_WIDTH,
+        ] = self.rgb_vis # depth_vis or rgb_vis
+        self.vis_image[50:530, 670:1150] = sem_map_vis
+
+        pos = (
+            (start_x * 100.0 / self.config.MAP_RESOLUTION - gy1)
+            * 480
+            / map_pred.shape[0],
+            (map_pred.shape[1] - start_y * 100.0 / self.config.MAP_RESOLUTION + gx1)
+            * 480
+            / map_pred.shape[1],
+            np.deg2rad(-start_o),
+        )
+
+        agent_arrow = vu.get_contour_points(pos, origin=(670, 50))
+        color = (
+            int(color_palette[11] * 255),
+            int(color_palette[10] * 255),
+            int(color_palette[9] * 255),
+        )
+        cv2.drawContours(self.vis_image, [agent_arrow], 0, color, -1)
+
+        if self.config.VISUALIZE:
+            fn = "{}/episodes/thread_{}/eps_{}/{}-{}-Vis-{}.png".format(
+                dump_dir,
+                self.rank,
+                self.episode_no,
+                self.rank,
+                self.episode_no,
+                self.timestep,
+            )
+            cv2.imwrite(fn, self.vis_image)
diff --git a/semantic_exploration/agents/utils/detic_semantic_prediction.py b/semantic_exploration/agents/utils/detic_semantic_prediction.py
new file mode 100644
index 0000000..e14d044
--- /dev/null
+++ b/semantic_exploration/agents/utils/detic_semantic_prediction.py
@@ -0,0 +1,338 @@
+# The following code is largely borrowed from
+# https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py and
+# https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py
+
+import argparse
+import pathlib
+import sys
+import time
+from pathlib import Path
+
+import detectron2.data.transforms as T
+import numpy as np
+import torch
+
+ROOT_DETIC = str(Path(__file__).resolve().parent).split("third_party")[0]+"third_party/"
+sys.path.insert(0, ROOT_DETIC + "Detic/third_party/CenterNet2")
+sys.path.insert(0, ROOT_DETIC + "Detic")
+from centernet.config import add_centernet_config  # noqa: E402
+from third_party.semantic_exploration.constants import coco_categories_mapping  # noqa: E402
+from detectron2.checkpoint import DetectionCheckpointer  # noqa: E402
+from detectron2.config import get_cfg  # noqa: E402
+from detectron2.data.catalog import MetadataCatalog  # noqa: E402
+from detectron2.engine.defaults import DefaultPredictor  # noqa: E402
+from detectron2.modeling import build_model  # noqa: E402
+from detectron2.utils.logger import setup_logger  # noqa: E402
+from detectron2.utils.visualizer import ColorMode, Visualizer  # noqa: E402
+from detic.config import add_detic_config  # noqa: E402
+from detic.modeling.text.text_encoder import build_text_encoder  # noqa: E402
+from detic.modeling.utils import reset_cls_test  # noqa: E402
+
+BUILDIN_CLASSIFIER = {
+    "lvis": ROOT_DETIC + "Detic/datasets/metadata/lvis_v1_clip_a+cname.npy",
+    "objects365": ROOT_DETIC + "Detic/datasets/metadata/o365_clip_a+cnamefix.npy",
+    "openimages": ROOT_DETIC + "Detic/datasets/metadata/oid_clip_a+cname.npy",
+    "coco": ROOT_DETIC + "Detic/datasets/metadata/coco_clip_a+cname.npy",
+}
+
+BUILDIN_METADATA_PATH = {
+    "lvis": "lvis_v1_val",
+    "objects365": "objects365_v2_val",
+    "openimages": "oid_val_expanded",
+    "coco": "coco_2017_val",
+}
+
+
+class SemanticPredDetic:
+    def __init__(self, args):
+        self.segmentation_model = ImageSegmentation(args)
+        self.args = args
+
+    def get_prediction(self, img):
+        args = self.args
+        image_list = []
+        img = img[:, :, ::-1]
+        image_list.append(img)
+        seg_predictions, vis_output = self.segmentation_model.get_predictions(
+            image_list, visualize=args.visualize == 2
+        )
+
+        if args.visualize == 2:
+            img = vis_output.get_image()
+
+        semantic_input = np.zeros(
+            (img.shape[0], img.shape[1], 16 + 1)
+        )  # self.args.num_sem_categories )) #15 + 1))
+
+        for j, class_idx in enumerate(
+            seg_predictions[0]["instances"].pred_classes.cpu().numpy()
+        ):
+            if class_idx in list(coco_categories_mapping.keys()):
+                idx = coco_categories_mapping[class_idx]
+                obj_mask = seg_predictions[0]["instances"].pred_masks[j] * 1.0
+                semantic_input[:, :, idx] += obj_mask.cpu().numpy()
+        # The shape of the semantic input is (480, 640, 17)
+        return semantic_input, img
+
+
+def compress_sem_map(sem_map):
+    c_map = np.zeros((sem_map.shape[1], sem_map.shape[2]))
+    for i in range(sem_map.shape[0]):
+        c_map[sem_map[i] > 0.0] = i + 1
+    return c_map
+
+
+class ImageSegmentation:
+    def __init__(self, args):
+        string_args = """
+            --config-file {}
+            --input input1.jpeg
+            --vocabulary coco
+            --confidence-threshold {}
+            --opts MODEL.WEIGHTS {}
+            """.format(
+            ROOT_DETIC + "/Detic/configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.yaml",
+            args.sem_pred_prob_thr,
+            ROOT_DETIC + "/Detic/configs/Detic_LCOCOI21k_CLIP_SwinB_896b32_4x_ft4x_max-size.pth"
+        )
+
+        if args.sem_gpu_id == -2:
+            string_args += """ MODEL.DEVICE cpu"""
+        else:
+            string_args += """ MODEL.DEVICE cuda:{}""".format(args.sem_gpu_id)
+
+        string_args = string_args.split()
+
+        args = get_seg_parser().parse_args(string_args)
+        logger = setup_logger()
+        logger.info("Arguments: " + str(args))
+
+        cfg = setup_cfg(args)
+
+        assert args.vocabulary in ["coco", "custom"]
+        if args.vocabulary == "custom":
+            raise NotImplementedError
+        elif args.vocabulary == "coco":
+            self.metadata = MetadataCatalog.get(BUILDIN_METADATA_PATH[args.vocabulary])
+            classifier = BUILDIN_CLASSIFIER[args.vocabulary]
+            self.categories_mapping = {
+                56: 0,  # chair
+                57: 1,  # couch
+                58: 2,  # plant
+                59: 3,  # bed
+                61: 4,  # toilet
+                62: 5,  # tv
+                60: 6,  # table
+                69: 7,  # oven
+                71: 8,  # sink
+                72: 9,  # refrigerator
+                73: 10,  # book
+                74: 11,  # clock
+                75: 12,  # vase
+                41: 13,  # cup
+                39: 14,  # bottle
+            }
+
+        self.num_sem_categories = len(self.categories_mapping)
+        num_classes = len(self.metadata.thing_classes)
+        self.instance_mode = ColorMode.IMAGE
+        self.demo = VisualizationDemo(cfg, classifier, num_classes)
+
+    def get_predictions(self, img, visualize=0):
+        return self.demo.run_on_image(img, visualize=visualize)
+
+
+def setup_cfg(args):
+    cfg = get_cfg()
+    # We forcefully use cpu here
+    cfg.MODEL.DEVICE = "cpu"
+    add_centernet_config(cfg)
+    add_detic_config(cfg)
+    cfg.merge_from_file(args.config_file)
+    cfg.merge_from_list(args.opts)
+    # Set score_threshold for builtin models
+    cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = (
+        args.confidence_threshold
+    )
+    cfg.MODEL.ROI_BOX_HEAD.ZEROSHOT_WEIGHT_PATH = "rand"  # load later
+    cfg.MODEL.ROI_HEADS.ONE_CLASS_PER_PROPOSAL = True
+    cfg.MODEL.ROI_BOX_HEAD.CAT_FREQ_PATH = (
+        ROOT_DETIC + "Detic/" + cfg.MODEL.ROI_BOX_HEAD.CAT_FREQ_PATH
+    )
+    # Fix cfg paths given we're not running from the Detic folder
+    cfg.MODEL.TEST_CLASSIFIERS[0] = (
+        ROOT_DETIC + "Detic/" + cfg.MODEL.TEST_CLASSIFIERS[0]
+    )
+    cfg.MODEL.TEST_CLASSIFIERS[1] = (
+        ROOT_DETIC + "Detic/" + cfg.MODEL.TEST_CLASSIFIERS[1]
+    )
+    cfg.freeze()
+    return cfg
+
+
+class VisualizationDemo(object):
+    def __init__(self, cfg, classifier, num_classes, instance_mode=ColorMode.IMAGE):
+        """
+        Args:
+            cfg (CfgNode):
+            instance_mode (ColorMode):
+        """
+        self.metadata = MetadataCatalog.get(
+            cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused"
+        )
+        self.cpu_device = torch.device("cpu")
+        self.instance_mode = instance_mode
+        self.predictor = BatchPredictor(cfg)
+
+        if type(classifier) == pathlib.PosixPath:
+            classifier = str(classifier)
+        reset_cls_test(self.predictor.model, classifier, num_classes)
+
+    def run_on_image(self, image_list, visualize=0):
+        """
+        Args:
+            image (np.ndarray): an image of shape (H, W, C) (in BGR order).
+                This is the format used by OpenCV.
+
+        Returns:
+            predictions (dict): the output of the model.
+            vis_output (VisImage): the visualized image output.
+        """
+        vis_output = None
+        all_predictions = self.predictor(image_list)
+
+        # Convert image from OpenCV BGR format to Matplotlib RGB format.
+        if visualize:
+            predictions = all_predictions[0]
+            image = image_list[0]
+            visualizer = Visualizer(
+                image, self.metadata, instance_mode=self.instance_mode
+            )
+            if "panoptic_seg" in predictions:
+                panoptic_seg, segments_info = predictions["panoptic_seg"]
+                vis_output = visualizer.draw_panoptic_seg_predictions(
+                    panoptic_seg.to(self.cpu_device), segments_info
+                )
+            else:
+                if "sem_seg" in predictions:
+                    vis_output = visualizer.draw_sem_seg(
+                        predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
+                    )
+                if "instances" in predictions:
+                    instances = predictions["instances"].to(self.cpu_device)
+                    vis_output = visualizer.draw_instance_predictions(
+                        predictions=instances
+                    )
+
+        return all_predictions, vis_output
+
+
+def get_seg_parser():
+    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
+    parser.add_argument(
+        "--config-file",
+        default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
+        metavar="FILE",
+        help="path to config file",
+    )
+    parser.add_argument(
+        "--webcam", action="store_true", help="Take inputs from webcam."
+    )
+    parser.add_argument("--video-input", help="Path to video file.")
+    parser.add_argument(
+        "--input", nargs="+", help="A list of space separated input images"
+    )
+    parser.add_argument(
+        "--output",
+        help="A file or directory to save output visualizations. "
+        "If not given, will show output in an OpenCV window.",
+    )
+    parser.add_argument(
+        "--vocabulary",
+        default="lvis",
+        choices=["lvis", "openimages", "objects365", "coco", "custom"],
+        help="",
+    )
+    parser.add_argument(
+        "--custom_vocabulary",
+        default="",
+        help="",
+    )
+    parser.add_argument(
+        "--confidence-threshold",
+        type=float,
+        default=0.1,
+        help="Minimum score for instance predictions to be shown",
+    )
+    parser.add_argument(
+        "--opts",
+        help="Modify config options using the command-line 'KEY VALUE' pairs",
+        default=[],
+        nargs=argparse.REMAINDER,
+    )
+    return parser
+
+
+class BatchPredictor:
+    """
+    Create a simple end-to-end predictor with the given config that runs on
+    single device for a list of input images.
+
+    Compared to using the model directly, this class does the following
+    additions:
+
+    1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
+    2. Always take BGR image as the input and apply conversion defined by
+         `cfg.INPUT.FORMAT`.
+    3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
+    4. Take a list of input images
+
+    Attributes:
+        metadata (Metadata): the metadata of the underlying dataset, obtained
+            from cfg.DATASETS.TEST.
+
+    """
+
+    def __init__(self, cfg):
+        self.cfg = cfg.clone()  # cfg can be modified by model
+        self.model = build_model(self.cfg)
+        self.model.eval()
+        self.metadata = MetadataCatalog.get(cfg.DATASETS.TEST[0])
+
+        checkpointer = DetectionCheckpointer(self.model)
+        checkpointer.load(cfg.MODEL.WEIGHTS)
+
+        self.input_format = cfg.INPUT.FORMAT
+        assert self.input_format in ["RGB", "BGR"], self.input_format
+
+    def __call__(self, image_list):
+        """
+        Args:
+            image_list (list of np.ndarray): a list of images of
+                                             shape (H, W, C) (in BGR order).
+
+        Returns:
+            predictions (dict):
+                the output of the model for all images.
+                See :doc:`/tutorials/models` for details about the format.
+        """
+        inputs = []
+        for original_image in image_list:
+            # https://github.com/sphinx-doc/sphinx/issues/4258
+            # Apply pre-processing to image.
+            if self.input_format == "RGB":
+                # whether the model expects BGR inputs or RGB
+                original_image = original_image[:, :, ::-1]
+            height, width = original_image.shape[:2]
+            image = original_image
+            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
+
+            instance = {"image": image, "height": height, "width": width}
+
+            inputs.append(instance)
+
+        with torch.no_grad():
+            predictions = self.model(inputs)
+            return predictions
diff --git a/semantic_exploration/agents/utils/fmm_planner.py b/semantic_exploration/agents/utils/fmm_planner.py
new file mode 100644
index 0000000..5256498
--- /dev/null
+++ b/semantic_exploration/agents/utils/fmm_planner.py
@@ -0,0 +1,153 @@
+import cv2
+import numpy as np
+import skfmm
+import skimage
+from numpy import ma
+
+
+def get_mask(sx, sy, scale, step_size):
+    size = int(step_size // scale) * 2 + 1
+    mask = np.zeros((size, size))
+    for i in range(size):
+        for j in range(size):
+            if ((i + 0.5) - (size // 2 + sx)) ** 2 + (
+                (j + 0.5) - (size // 2 + sy)
+            ) ** 2 <= step_size**2 and ((i + 0.5) - (size // 2 + sx)) ** 2 + (
+                (j + 0.5) - (size // 2 + sy)
+            ) ** 2 > (
+                step_size - 1
+            ) ** 2:
+                mask[i, j] = 1
+
+    mask[size // 2, size // 2] = 1
+    return mask
+
+
+def get_dist(sx, sy, scale, step_size):
+    size = int(step_size // scale) * 2 + 1
+    mask = np.zeros((size, size)) + 1e-10
+    for i in range(size):
+        for j in range(size):
+            if ((i + 0.5) - (size // 2 + sx)) ** 2 + (
+                (j + 0.5) - (size // 2 + sy)
+            ) ** 2 <= step_size**2:
+                mask[i, j] = max(
+                    5,
+                    (
+                        ((i + 0.5) - (size // 2 + sx)) ** 2
+                        + ((j + 0.5) - (size // 2 + sy)) ** 2
+                    )
+                    ** 0.5,
+                )
+    return mask
+
+
+class FMMPlanner:
+    def __init__(self, traversible, scale=1, step_size=5):
+        self.scale = scale
+        self.step_size = step_size
+        if scale != 1.0:
+            self.traversible = cv2.resize(
+                traversible,
+                (traversible.shape[1] // scale, traversible.shape[0] // scale),
+                interpolation=cv2.INTER_NEAREST,
+            )
+            self.traversible = np.rint(self.traversible)
+        else:
+            self.traversible = traversible
+
+        self.du = int(self.step_size / (self.scale * 1.0))
+        self.fmm_dist = None
+
+    def set_goal(self, goal, auto_improve=False):
+        traversible_ma = ma.masked_values(self.traversible * 1, 0)
+        goal_x, goal_y = int(goal[0] / (self.scale * 1.0)), int(
+            goal[1] / (self.scale * 1.0)
+        )
+
+        if self.traversible[goal_x, goal_y] == 0.0 and auto_improve:
+            goal_x, goal_y = self._find_nearest_goal([goal_x, goal_y])
+
+        traversible_ma[goal_x, goal_y] = 0
+        dd = skfmm.distance(traversible_ma, dx=1)
+        dd = ma.filled(dd, np.max(dd) + 1)
+        self.fmm_dist = dd
+        return
+
+    def set_multi_goal(self, goal_map):
+        traversible_ma = ma.masked_values(self.traversible * 1, 0)
+        traversible_ma[goal_map == 1] = 0
+        dd = skfmm.distance(traversible_ma, dx=1)
+        dd = ma.filled(dd, np.max(dd) + 1)
+        self.fmm_dist = dd
+        return
+
+    def get_short_term_goal(self, state):
+        scale = self.scale * 1.0
+        state = [x / scale for x in state]
+        dx, dy = state[0] - int(state[0]), state[1] - int(state[1])
+        mask = get_mask(dx, dy, scale, self.step_size)
+        dist_mask = get_dist(dx, dy, scale, self.step_size)
+
+        state = [int(x) for x in state]
+
+        dist = np.pad(
+            self.fmm_dist,
+            self.du,
+            "constant",
+            constant_values=self.fmm_dist.shape[0] ** 2,
+        )
+        subset = dist[
+            state[0] : state[0] + 2 * self.du + 1, state[1] : state[1] + 2 * self.du + 1
+        ]
+
+        assert (
+            subset.shape[0] == 2 * self.du + 1 and subset.shape[1] == 2 * self.du + 1
+        ), "Planning error: unexpected subset shape {}".format(subset.shape)
+
+        subset *= mask
+        subset += (1 - mask) * self.fmm_dist.shape[0] ** 2
+
+        if subset[self.du, self.du] < self.step_size:  # < 0.25 * 100 / 5.:  # 25cm
+            stop = True
+        else:
+            stop = False
+
+        subset -= subset[self.du, self.du]
+        ratio1 = subset / dist_mask
+        subset[ratio1 < -1.5] = 1
+
+        # Find the smallest number index
+        (stg_x, stg_y) = np.unravel_index(np.argmin(subset), subset.shape)
+
+        if subset[stg_x, stg_y] > -0.0001:
+            replan = True
+        else:
+            replan = False
+
+        return (
+            (stg_x + state[0] - self.du) * scale,
+            (stg_y + state[1] - self.du) * scale,
+            replan,
+            stop,
+        )
+
+    def _find_nearest_goal(self, goal):
+        traversible = (
+            skimage.morphology.binary_dilation(
+                np.zeros(self.traversible.shape), skimage.morphology.disk(2)
+            )
+            != True  # noqa
+        )
+        traversible = traversible * 1.0
+        planner = FMMPlanner(traversible)
+        planner.set_goal(goal)
+
+        mask = self.traversible
+
+        dist_map = planner.fmm_dist * mask
+        dist_map[dist_map == 0] = dist_map.max()
+
+        goal = np.unravel_index(dist_map.argmin(), dist_map.shape)
+
+        return goal
diff --git a/semantic_exploration/agents/utils/owlvit_semantic_prediction.py b/semantic_exploration/agents/utils/owlvit_semantic_prediction.py
new file mode 100644
index 0000000..f40614b
--- /dev/null
+++ b/semantic_exploration/agents/utils/owlvit_semantic_prediction.py
@@ -0,0 +1,107 @@
+# The following code is largely borrowed from
+# https://github.com/facebookresearch/detectron2/blob/master/demo/demo.py and
+# https://github.com/facebookresearch/detectron2/blob/master/demo/predictor.py
+
+import sys
+from pathlib import Path
+ROOT_DETIC = str(Path(__file__).resolve().parent).split("third_party")[0]+"third_party/"
+sys.path.insert(0, ROOT_DETIC + "Detic/third_party/CenterNet2")
+sys.path.insert(0, ROOT_DETIC + "Detic")
+
+import argparse  # noqa: E402
+import pathlib  # noqa: E402
+import time  # noqa: E402
+from pathlib import Path  # noqa: E402
+
+import cv2  # noqa: E402
+import numpy as np  # noqa: E402
+import torch  # noqa: E402
+from third_party.semantic_exploration.constants import coco_categories, coco_categories_mapping  # noqa: E402
+from PIL import Image  # noqa: E402
+from transformers import OwlViTForObjectDetection, OwlViTProcessor  # noqa: E402
+
+
+class SemanticPredOwlvit:
+    def __init__(self, config):
+        self.config = config
+        # Get the device
+        self.device = (
+            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        )
+        # Get the owlvit model
+        self.model = OwlViTForObjectDetection.from_pretrained(
+            "google/owlvit-base-patch32"
+        )
+        self.model.eval()
+        self.model.to(self.device)
+        # Define the prefix
+        self.prefix="an image of "
+        # Get the pretrained model
+        self.processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+        # Get the meta info
+        labels = []
+        for _key in coco_categories:
+            labels.append(self.prefix+_key)
+        self.labels = [labels]
+        self.score_threshold = 0.15
+
+    def get_prediction(self, img):
+        img = img[:, :, ::-1]
+        # Process inputs
+        inputs = self.processor(text=self.labels, images=img, return_tensors="pt")
+        target_sizes = torch.Tensor([img.shape[:2]])
+
+        # Inference
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+
+        # Convert outputs (bounding boxes and class logits) to COCO API
+        results = self.processor.post_process(
+            outputs=outputs, target_sizes=target_sizes
+        )
+
+        # Process the image
+        img_i = 0
+        boxes, scores, labels = (
+            results[img_i]["boxes"],
+            results[img_i]["scores"],
+            results[img_i]["labels"],
+        )
+        semantic_input = np.zeros((img.shape[0], img.shape[1], 16 + 1))
+        for box, score, label in zip(boxes, scores, labels):
+            # Get the location of the bounding box
+            if score >= self.score_threshold:
+                top_left_x, top_left_y, bottom_right_x, bottom_right_y = [
+                    int(round(i, 0)) for i in box.tolist()
+                ]
+                semantic_input[
+                    top_left_x:bottom_right_x, top_left_y:bottom_right_y, int(label)
+                ] = 1
+            if self.config.VISUALIZE is True and score >= self.score_threshold:
+                # Use this line code to add bounding box to the image
+                img = np.ascontiguousarray(img, dtype=np.uint8)
+                cv2.rectangle(
+                    img,
+                    (top_left_x, top_left_y),
+                    (bottom_right_x, bottom_right_y),
+                    (0, 0, 255),
+                    2,
+                )
+                cv2.putText(
+                    img,
+                    self.labels[0][int(label)],
+                    (top_left_x, top_left_y - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX,
+                    0.9,
+                    (0, 0, 255),
+                    2,
+                )
+
+        return semantic_input, img
+
+
+def compress_sem_map(sem_map):
+    c_map = np.zeros((sem_map.shape[1], sem_map.shape[2]))
+    for i in range(sem_map.shape[0]):
+        c_map[sem_map[i] > 0.0] = i + 1
+    return c_map
diff --git a/agents/utils/semantic_prediction.py b/semantic_exploration/agents/utils/semantic_prediction.py
similarity index 84%
rename from agents/utils/semantic_prediction.py
rename to semantic_exploration/agents/utils/semantic_prediction.py
index 3ce9675..3b70ee0 100644
--- a/agents/utils/semantic_prediction.py
+++ b/semantic_exploration/agents/utils/semantic_prediction.py
@@ -4,23 +4,20 @@
 
 import argparse
 import time
-
-import torch
+from pathlib import Path
+import detectron2.data.transforms as T
 import numpy as np
-
+import torch
+from third_party.semantic_exploration.constants import coco_categories_mapping
+from detectron2.checkpoint import DetectionCheckpointer
 from detectron2.config import get_cfg
-from detectron2.utils.logger import setup_logger
 from detectron2.data.catalog import MetadataCatalog
 from detectron2.modeling import build_model
-from detectron2.checkpoint import DetectionCheckpointer
+from detectron2.utils.logger import setup_logger
 from detectron2.utils.visualizer import ColorMode, Visualizer
-import detectron2.data.transforms as T
 
-from constants import coco_categories_mapping
-
-
-class SemanticPredMaskRCNN():
 
+class SemanticPredMaskRCNN:
     def __init__(self, args):
         self.segmentation_model = ImageSegmentation(args)
         self.args = args
@@ -31,39 +28,48 @@ def get_prediction(self, img):
         img = img[:, :, ::-1]
         image_list.append(img)
         seg_predictions, vis_output = self.segmentation_model.get_predictions(
-            image_list, visualize=args.visualize == 2)
+            image_list, visualize=args.visualize == 2
+        )
 
         if args.visualize == 2:
             img = vis_output.get_image()
 
-        semantic_input = np.zeros((img.shape[0], img.shape[1], 15 + 1))
+        semantic_input = np.zeros(
+            (img.shape[0], img.shape[1], 16 + 1)
+        )  # self.args.num_sem_categories )) #15 + 1))
 
         for j, class_idx in enumerate(
-                seg_predictions[0]['instances'].pred_classes.cpu().numpy()):
+            seg_predictions[0]["instances"].pred_classes.cpu().numpy()
+        ):
             if class_idx in list(coco_categories_mapping.keys()):
                 idx = coco_categories_mapping[class_idx]
-                obj_mask = seg_predictions[0]['instances'].pred_masks[j] * 1.
+                obj_mask = seg_predictions[0]["instances"].pred_masks[j] * 1.0
                 semantic_input[:, :, idx] += obj_mask.cpu().numpy()
-
+        # The shape of the semantic input is (480, 640, 17)
         return semantic_input, img
 
 
 def compress_sem_map(sem_map):
     c_map = np.zeros((sem_map.shape[1], sem_map.shape[2]))
     for i in range(sem_map.shape[0]):
-        c_map[sem_map[i] > 0.] = i + 1
+        c_map[sem_map[i] > 0.0] = i + 1
     return c_map
 
 
-class ImageSegmentation():
+class ImageSegmentation:
     def __init__(self, args):
+        ROOT = str(Path(__file__).resolve().parent).split("third_party")[0]+"third_party/"
+        model_path = ROOT + "detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
         string_args = """
-            --config-file configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
+            --config-file {}
             --input input1.jpeg
             --confidence-threshold {}
             --opts MODEL.WEIGHTS
             detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl
-            """.format(args.sem_pred_prob_thr)
+            """.format(
+            model_path,
+            args.sem_pred_prob_thr
+        )
 
         if args.sem_gpu_id == -2:
             string_args += """ MODEL.DEVICE cpu"""
@@ -71,7 +77,6 @@ def __init__(self, args):
             string_args += """ MODEL.DEVICE cuda:{}""".format(args.sem_gpu_id)
 
         string_args = string_args.split()
-
         args = get_seg_parser().parse_args(string_args)
         logger = setup_logger()
         logger.info("Arguments: " + str(args))
@@ -91,15 +96,15 @@ def setup_cfg(args):
     # Set score_threshold for builtin models
     cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold
     cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold
-    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = \
+    cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = (
         args.confidence_threshold
+    )
     cfg.freeze()
     return cfg
 
 
 def get_seg_parser():
-    parser = argparse.ArgumentParser(
-        description="Detectron2 demo for builtin models")
+    parser = argparse.ArgumentParser(description="Detectron2 demo for builtin models")
     parser.add_argument(
         "--config-file",
         default="configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml",
@@ -107,14 +112,12 @@ def get_seg_parser():
         help="path to config file",
     )
     parser.add_argument(
-        "--webcam",
-        action="store_true",
-        help="Take inputs from webcam.")
+        "--webcam", action="store_true", help="Take inputs from webcam."
+    )
     parser.add_argument("--video-input", help="Path to video file.")
     parser.add_argument(
-        "--input",
-        nargs="+",
-        help="A list of space separated input images")
+        "--input", nargs="+", help="A list of space separated input images"
+    )
     parser.add_argument(
         "--output",
         help="A file or directory to save output visualizations. "
@@ -124,7 +127,7 @@ def get_seg_parser():
     parser.add_argument(
         "--confidence-threshold",
         type=float,
-        default=0.5,
+        default=0.1,
         help="Minimum score for instance predictions to be shown",
     )
     parser.add_argument(
@@ -169,7 +172,8 @@ def run_on_image(self, image_list, visualize=0):
             predictions = all_predictions[0]
             image = image_list[0]
             visualizer = Visualizer(
-                image, self.metadata, instance_mode=self.instance_mode)
+                image, self.metadata, instance_mode=self.instance_mode
+            )
             if "panoptic_seg" in predictions:
                 panoptic_seg, segments_info = predictions["panoptic_seg"]
                 vis_output = visualizer.draw_panoptic_seg_predictions(
@@ -178,13 +182,13 @@ def run_on_image(self, image_list, visualize=0):
             else:
                 if "sem_seg" in predictions:
                     vis_output = visualizer.draw_sem_seg(
-                        predictions["sem_seg"].argmax(
-                            dim=0).to(self.cpu_device)
+                        predictions["sem_seg"].argmax(dim=0).to(self.cpu_device)
                     )
                 if "instances" in predictions:
                     instances = predictions["instances"].to(self.cpu_device)
                     vis_output = visualizer.draw_instance_predictions(
-                        predictions=instances)
+                        predictions=instances
+                    )
 
         return all_predictions, vis_output
 
diff --git a/agents/utils/visualization.py b/semantic_exploration/agents/utils/visualization.py
similarity index 61%
rename from agents/utils/visualization.py
rename to semantic_exploration/agents/utils/visualization.py
index 16b3d40..a269bc5 100644
--- a/agents/utils/visualization.py
+++ b/semantic_exploration/agents/utils/visualization.py
@@ -4,14 +4,16 @@
 
 def get_contour_points(pos, origin, size=20):
     x, y, o = pos
-    pt1 = (int(x) + origin[0],
-           int(y) + origin[1])
-    pt2 = (int(x + size / 1.5 * np.cos(o + np.pi * 4 / 3)) + origin[0],
-           int(y + size / 1.5 * np.sin(o + np.pi * 4 / 3)) + origin[1])
-    pt3 = (int(x + size * np.cos(o)) + origin[0],
-           int(y + size * np.sin(o)) + origin[1])
-    pt4 = (int(x + size / 1.5 * np.cos(o - np.pi * 4 / 3)) + origin[0],
-           int(y + size / 1.5 * np.sin(o - np.pi * 4 / 3)) + origin[1])
+    pt1 = (int(x) + origin[0], int(y) + origin[1])
+    pt2 = (
+        int(x + size / 1.5 * np.cos(o + np.pi * 4 / 3)) + origin[0],
+        int(y + size / 1.5 * np.sin(o + np.pi * 4 / 3)) + origin[1],
+    )
+    pt3 = (int(x + size * np.cos(o)) + origin[0], int(y + size * np.sin(o)) + origin[1])
+    pt4 = (
+        int(x + size / 1.5 * np.cos(o - np.pi * 4 / 3)) + origin[0],
+        int(y + size / 1.5 * np.sin(o - np.pi * 4 / 3)) + origin[1],
+    )
 
     return np.array([pt1, pt2, pt3, pt4])
 
@@ -20,7 +22,7 @@ def draw_line(start, end, mat, steps=25, w=1):
     for i in range(steps + 1):
         x = int(np.rint(start[0] + (end[0] - start[0]) * i / steps))
         y = int(np.rint(start[1] + (end[1] - start[1]) * i / steps))
-        mat[x - w:x + w, y - w:y + w] = 1
+        mat[x - w : x + w, y - w : y + w] = 1
     return mat
 
 
@@ -35,17 +37,17 @@ def init_vis_image(goal_name, legend):
     textsize = cv2.getTextSize(text, font, fontScale, thickness)[0]
     textX = (640 - textsize[0]) // 2 + 15
     textY = (50 + textsize[1]) // 2
-    vis_image = cv2.putText(vis_image, text, (textX, textY),
-                            font, fontScale, color, thickness,
-                            cv2.LINE_AA)
+    vis_image = cv2.putText(
+        vis_image, text, (textX, textY), font, fontScale, color, thickness, cv2.LINE_AA
+    )
 
     text = "Predicted Semantic Map"
     textsize = cv2.getTextSize(text, font, fontScale, thickness)[0]
     textX = 640 + (480 - textsize[0]) // 2 + 30
     textY = (50 + textsize[1]) // 2
-    vis_image = cv2.putText(vis_image, text, (textX, textY),
-                            font, fontScale, color, thickness,
-                            cv2.LINE_AA)
+    vis_image = cv2.putText(
+        vis_image, text, (textX, textY), font, fontScale, color, thickness, cv2.LINE_AA
+    )
 
     # draw outlines
     color = [100, 100, 100]
@@ -60,6 +62,6 @@ def init_vis_image(goal_name, legend):
 
     # draw legend
     lx, ly, _ = legend.shape
-    vis_image[537:537 + lx, 155:155 + ly, :] = legend
+    vis_image[537 : 537 + lx, 155 : 155 + ly, :] = legend
 
     return vis_image
diff --git a/semantic_exploration/constants.py b/semantic_exploration/constants.py
new file mode 100644
index 0000000..ac92b95
--- /dev/null
+++ b/semantic_exploration/constants.py
@@ -0,0 +1,155 @@
+scenes = {}
+scenes["train"] = [
+    "Allensville",
+    "Beechwood",
+    "Benevolence",
+    "Coffeen",
+    "Cosmos",
+    "Forkland",
+    "Hanson",
+    "Hiteman",
+    "Klickitat",
+    "Lakeville",
+    "Leonardo",
+    "Lindenwood",
+    "Marstons",
+    "Merom",
+    "Mifflinburg",
+    "Newfields",
+    "Onaga",
+    "Pinesdale",
+    "Pomaria",
+    "Ranchester",
+    "Shelbyville",
+    "Stockman",
+    "Tolstoy",
+    "Wainscott",
+    "Woodbine",
+]
+
+scenes["val"] = [
+    "Collierville",
+    "Corozal",
+    "Darden",
+    "Markleeville",
+    "Wiconisco",
+]
+
+coco_categories = {
+    "chair": 0,
+    "couch": 1,
+    "potted plant": 2,
+    "bed": 3,
+    "toilet": 4,
+    "tv": 5,
+    "dining-table": 6,
+    "oven": 7,
+    "sink": 8,
+    "refrigerator": 9,
+    "book": 10,
+    "clock": 11,
+    "vase": 12,
+    "cup": 13,
+    "bottle": 14,
+}
+
+coco_categories_replica = {
+    "chair": 0,
+    "sofa": 1,
+    "plant": 2,
+    "bed": 3,
+    "toilet": 4,
+    "tv": 5,
+    "table": 6,
+    "oven": 7,
+    "sink": 8,
+    "fridge": 9,
+    "book": 10,
+    "clock": 11,
+    "vase": 12,
+    "cup": 13,
+    "bottle": 14,
+    "person": 15,
+}
+
+coco_categories_mapping = {
+    56: 0,  # chair
+    57: 1,  # couch
+    58: 2,  # potted plant
+    59: 3,  # bed
+    61: 4,  # toilet
+    62: 5,  # tv
+    60: 6,  # dining-table
+    69: 7,  # oven
+    71: 8,  # sink
+    72: 9,  # refrigerator
+    73: 10,  # book
+    74: 11,  # clock
+    75: 12,  # vase
+    41: 13,  # cup
+    39: 14,  # bottle
+    0: 15,  # person
+}
+
+color_palette = [
+    1.0,
+    1.0,
+    1.0,
+    0.6,
+    0.6,
+    0.6,
+    0.95,
+    0.95,
+    0.95,
+    0.96,
+    0.36,
+    0.26,
+    0.12156862745098039,
+    0.47058823529411764,
+    0.7058823529411765,
+    0.9400000000000001,
+    0.7818,
+    0.66,
+    0.9400000000000001,
+    0.8868,
+    0.66,
+    0.8882000000000001,
+    0.9400000000000001,
+    0.66,
+    0.7832000000000001,
+    0.9400000000000001,
+    0.66,
+    0.6782000000000001,
+    0.9400000000000001,
+    0.66,
+    0.66,
+    0.9400000000000001,
+    0.7468000000000001,
+    0.66,
+    0.9400000000000001,
+    0.8518000000000001,
+    0.66,
+    0.9232,
+    0.9400000000000001,
+    0.66,
+    0.8182,
+    0.9400000000000001,
+    0.66,
+    0.7132,
+    0.9400000000000001,
+    0.7117999999999999,
+    0.66,
+    0.9400000000000001,
+    0.8168,
+    0.66,
+    0.9400000000000001,
+    0.9218,
+    0.66,
+    0.9400000000000001,
+    0.9400000000000001,
+    0.66,
+    0.8531999999999998,
+    0.9400000000000001,
+    0.66,
+    0.748199999999999,
+]
diff --git a/docs/DOCKER_INSTRUCTIONS.md b/semantic_exploration/docs/DOCKER_INSTRUCTIONS.md
similarity index 100%
rename from docs/DOCKER_INSTRUCTIONS.md
rename to semantic_exploration/docs/DOCKER_INSTRUCTIONS.md
diff --git a/docs/INSTRUCTIONS.md b/semantic_exploration/docs/INSTRUCTIONS.md
similarity index 100%
rename from docs/INSTRUCTIONS.md
rename to semantic_exploration/docs/INSTRUCTIONS.md
diff --git a/docs/example.gif b/semantic_exploration/docs/example.gif
similarity index 100%
rename from docs/example.gif
rename to semantic_exploration/docs/example.gif
diff --git a/docs/legend.png b/semantic_exploration/docs/legend.png
similarity index 100%
rename from docs/legend.png
rename to semantic_exploration/docs/legend.png
diff --git a/docs/overview.jpg b/semantic_exploration/docs/overview.jpg
similarity index 100%
rename from docs/overview.jpg
rename to semantic_exploration/docs/overview.jpg
diff --git a/envs/__init__.py b/semantic_exploration/envs/__init__.py
similarity index 80%
rename from envs/__init__.py
rename to semantic_exploration/envs/__init__.py
index 2098b62..9ea7d02 100755
--- a/envs/__init__.py
+++ b/semantic_exploration/envs/__init__.py
@@ -1,11 +1,9 @@
 import torch
 
-from .habitat import construct_envs
 
-
-def make_vec_envs(args):
-    envs = construct_envs(args)
-    envs = VecPyTorch(envs, args.device)
+def make_vec_envs(args, is_slurm=False, is_eval=False):
+    envs, num_envs = construct_envs(args, is_slurm, is_eval)
+    envs = VecPyTorch(envs, num_envs, args.device)
     return envs
 
 
@@ -13,11 +11,11 @@ def make_vec_envs(args):
 # https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/envs.py#L159
 class VecPyTorch():
 
-    def __init__(self, venv, device):
+    def __init__(self, venv, num_envs, device):
         self.venv = venv
-        self.num_envs = venv.num_envs
-        self.observation_space = venv.observation_space
-        self.action_space = venv.action_space
+        self.num_envs = num_envs
+        # self.observation_space = venv.observation_space
+        # self.action_space = venv.action_space
         self.device = device
 
     def reset(self):
diff --git a/envs/utils/depth_utils.py b/semantic_exploration/envs/utils/depth_utils.py
similarity index 99%
rename from envs/utils/depth_utils.py
rename to semantic_exploration/envs/utils/depth_utils.py
index afe98e2..a6c430a 100644
--- a/envs/utils/depth_utils.py
+++ b/semantic_exploration/envs/utils/depth_utils.py
@@ -21,7 +21,7 @@
 import numpy as np
 import torch
 
-import envs.utils.rotation_utils as ru
+import third_party.semantic_exploration.envs.utils.rotation_utils as ru
 
 
 def get_camera_matrix(width, height, fov):
diff --git a/envs/utils/fmm_planner.py b/semantic_exploration/envs/utils/fmm_planner.py
similarity index 90%
rename from envs/utils/fmm_planner.py
rename to semantic_exploration/envs/utils/fmm_planner.py
index c2fd0bd..82bce12 100644
--- a/envs/utils/fmm_planner.py
+++ b/semantic_exploration/envs/utils/fmm_planner.py
@@ -37,7 +37,15 @@ def get_dist(sx, sy, scale, step_size):
 
 
 class FMMPlanner():
-    def __init__(self, traversible, scale=1, step_size=5):
+    def __init__(self, traversible, scale=1, step_size=25):
+        """
+        Arguments:
+            traversible: (M + 1, M + 1) binary map encoding traversible regions
+            scale: map scale
+            step_size: maximum distance of the short-term goal selected by the
+             planner
+            vis_dir: folder where to dump visualization
+        """
         self.scale = scale
         self.step_size = step_size
         if scale != 1.:
@@ -95,7 +103,7 @@ def get_short_term_goal(self, state):
         subset *= mask
         subset += (1 - mask) * self.fmm_dist.shape[0] ** 2
 
-        if subset[self.du, self.du] < 0.25 * 100 / 5.:  # 25cm
+        if subset[self.du, self.du] < self.step_size: #< 0.25 * 100 / 5.:  # 25cm
             stop = True
         else:
             stop = False
@@ -104,6 +112,7 @@ def get_short_term_goal(self, state):
         ratio1 = subset / dist_mask
         subset[ratio1 < -1.5] = 1
 
+        # Find the smallest number index
         (stg_x, stg_y) = np.unravel_index(np.argmin(subset), subset.shape)
 
         if subset[stg_x, stg_y] > -0.0001:
diff --git a/envs/utils/map_builder.py b/semantic_exploration/envs/utils/map_builder.py
similarity index 100%
rename from envs/utils/map_builder.py
rename to semantic_exploration/envs/utils/map_builder.py
diff --git a/envs/utils/pose.py b/semantic_exploration/envs/utils/pose.py
similarity index 100%
rename from envs/utils/pose.py
rename to semantic_exploration/envs/utils/pose.py
diff --git a/envs/utils/rotation_utils.py b/semantic_exploration/envs/utils/rotation_utils.py
similarity index 100%
rename from envs/utils/rotation_utils.py
rename to semantic_exploration/envs/utils/rotation_utils.py
diff --git a/semantic_exploration/models/__init__.py b/semantic_exploration/models/__init__.py
new file mode 100644
index 0000000..e50ca4e
--- /dev/null
+++ b/semantic_exploration/models/__init__.py
@@ -0,0 +1,3 @@
+from third_party.semantic_exploration.models.owlvit import OwlVit
+from third_party.semantic_exploration.models.semantic_map import Semantic_Mapping
+from third_party.semantic_exploration.models.sentence_similarity import SentenceSimilarity
diff --git a/semantic_exploration/models/owlvit.py b/semantic_exploration/models/owlvit.py
new file mode 100644
index 0000000..7b4e939
--- /dev/null
+++ b/semantic_exploration/models/owlvit.py
@@ -0,0 +1,241 @@
+# mypy: ignore-errors
+import argparse
+import time
+
+import cv2
+import torch
+from PIL import Image
+from transformers import OwlViTForObjectDetection, OwlViTProcessor
+
+
+class OwlVit:
+    def __init__(self, labels, score_threshold, show_img):
+        # self.device = torch.device('cpu')
+        self.device = (
+            torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
+        )
+
+        self.model = OwlViTForObjectDetection.from_pretrained(
+            "google/owlvit-base-patch32"
+        )
+        self.model.eval()
+        self.model.to(self.device)
+
+        self.processor = OwlViTProcessor.from_pretrained("google/owlvit-base-patch32")
+
+        self.labels = labels
+        self.score_threshold = score_threshold
+        self.show_img = show_img
+
+    def run_inference(self, img):
+        """
+        img: an open cv image in (H, W, C) format
+        """
+        # Process inputs
+        # img = img.to(self.device)
+        inputs = self.processor(text=self.labels, images=img, return_tensors="pt")
+
+        # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
+        # target_sizes = torch.Tensor([img.size[::-1]]) this is for PIL images
+        target_sizes = torch.Tensor([img.shape[:2]]).to(self.device)
+        inputs = inputs.to(self.device)
+
+        # Inference
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+
+        # Convert outputs (bounding boxes and class logits) to COCO API
+        results = self.processor.post_process(
+            outputs=outputs, target_sizes=target_sizes
+        )
+        # img = img.to('cpu')
+
+        if self.show_img:
+            self.show_img_with_overlaid_bounding_boxes(img, results)
+
+        return self.get_most_confident_bounding_box_per_label(results)
+
+    def run_inference_and_return_img(self, img):
+        """
+        img: an open cv image in (H, W, C) format
+        """
+        # img = img.to(self.device)
+
+        inputs = self.processor(text=self.labels, images=img, return_tensors="pt")
+        target_sizes = torch.Tensor([img.shape[:2]]).to(self.device)
+        inputs = inputs.to(self.device)
+        # Inference
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+
+        # Convert outputs (bounding boxes and class logits) to COCO API
+        results = self.processor.post_process(
+            outputs=outputs, target_sizes=target_sizes
+        )
+        # img = img.to('cpu')
+        # if self.show_img:
+        #    self.show_img_with_overlaid_bounding_boxes(img, results)
+
+        return self.get_most_confident_bounding_box_per_label(
+            results
+        ), self.create_img_with_bounding_box(img, results)
+
+    def show_img_with_overlaid_bounding_boxes(self, img, results):
+        img = self.create_img_with_bounding_box(img, results)
+        cv2.imshow("img", img)
+        cv2.waitKey(1)
+
+    def get_bounding_boxes(self, results):
+        """
+        Returns all bounding boxes with a score above the threshold
+        """
+        boxes, scores, labels = (
+            results[0]["boxes"],
+            results[0]["scores"],
+            results[0]["labels"],
+        )
+        boxes = boxes.to("cpu")
+        labels = labels.to("cpu")
+        scores = scores.to("cpu")
+
+        target_boxes = []
+        for box, score, label in zip(boxes, scores, labels):
+            box = [round(i, 2) for i in box.tolist()]
+            if score >= self.score_threshold:
+                target_boxes.append([self.labels[0][label.item()], score.item(), box])
+
+        return target_boxes
+
+    def get_most_confident_bounding_box(self, results):
+        """
+        Returns the most confident bounding box
+        """
+        boxes, scores, labels = (
+            results[0]["boxes"],
+            results[0]["scores"],
+            results[0]["labels"],
+        )
+        boxes = boxes.to("cpu")
+        labels = labels.to("cpu")
+        scores = scores.to("cpu")
+
+        target_box = []
+        target_score = -float("inf")
+
+        for box, score, label in zip(boxes, scores, labels):
+            box = [round(i, 2) for i in box.tolist()]
+            if score >= self.score_threshold:
+                if score > target_score:
+                    target_score = score
+                    target_box = box
+
+        if target_score == -float("inf"):
+            return None
+        else:
+            x1 = int(target_box[0])
+            y1 = int(target_box[1])
+            x2 = int(target_box[2])
+            y2 = int(target_box[3])
+
+            print("location:", x1, y1, x2, y2)
+            return x1, y1, x2, y2
+
+    def get_most_confident_bounding_box_per_label(self, results):
+        """
+        Returns the most confident bounding box for each label above the threshold
+        """
+        boxes, scores, labels = (
+            results[0]["boxes"],
+            results[0]["scores"],
+            results[0]["labels"],
+        )
+        boxes = boxes.to("cpu")
+        labels = labels.to("cpu")
+        scores = scores.to("cpu")
+
+        # Initialize dictionaries to store most confident bounding boxes and scores per label
+        target_boxes = {}
+        target_scores = {}
+
+        for box, score, label in zip(boxes, scores, labels):
+            box = [round(i, 2) for i in box.tolist()]
+            if score >= self.score_threshold:
+                # If the current score is higher than the stored score for this label, update the target box and score
+                if (
+                    label.item() not in target_scores
+                    or score > target_scores[label.item()]
+                ):
+                    target_scores[label.item()] = score.item()
+                    target_boxes[label.item()] = box
+
+        # Format the output
+        result = []
+        for label, box in target_boxes.items():
+            x1 = int(box[0])
+            y1 = int(box[1])
+            x2 = int(box[2])
+            y2 = int(box[3])
+
+            result.append(
+                [self.labels[0][label], target_scores[label], [x1, y1, x2, y2]]
+            )
+
+        return result
+
+    def create_img_with_bounding_box(self, img, results):
+        """
+        Returns an image with all bounding boxes avove the threshold overlaid
+        """
+
+        results = self.get_most_confident_bounding_box_per_label(results)
+        font = cv2.FONT_HERSHEY_SIMPLEX
+
+        for label, score, box in results:
+            img = cv2.rectangle(img, box[:2], box[2:], (255, 0, 0), 5)
+            if box[3] + 25 > 768:
+                y = box[3] - 10
+            else:
+                y = box[3] + 25
+            img = cv2.putText(
+                img, label, (box[0], y), font, 1, (255, 0, 0), 2, cv2.LINE_AA
+            )
+
+        return img
+
+    def update_label(self, labels):
+        self.labels = labels
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--file",
+        type=str,
+        default="/home/akshara/spot/spot_rl_experiments/spot_rl/grasp_visualizations/1650841878.2699108.png",
+    )
+    parser.add_argument("--score_threshold", type=float, default=0.1)
+    parser.add_argument("--show_img", type=bool, default=True)
+    parser.add_argument(
+        "--labels",
+        type=list,
+        default=[
+            [
+                "lion plush",
+                "penguin plush",
+                "teddy bear",
+                "bear plush",
+                "caterpilar plush",
+                "ball plush",
+                "rubiks cube",
+            ]
+        ],
+    )
+    args = parser.parse_args()
+
+    file = args.file
+    img = cv2.imread(file)
+
+    V = OwlVit(args.labels, args.score_threshold, args.show_img)
+    results = V.run_inference(img)
+    # Keep the window open for 10 seconds
+    time.sleep(10)
diff --git a/semantic_exploration/models/semantic_map.py b/semantic_exploration/models/semantic_map.py
new file mode 100644
index 0000000..1b866d0
--- /dev/null
+++ b/semantic_exploration/models/semantic_map.py
@@ -0,0 +1,240 @@
+import numpy as np
+import third_party.semantic_exploration.envs.utils.depth_utils as du
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+
+
+class ChannelPool(nn.MaxPool1d):
+    def forward(self, x):
+        n, c, w, h = x.size()
+        x = x.view(n, c, w * h).permute(0, 2, 1)
+        x = x.contiguous()
+        pooled = F.max_pool1d(x, c, 1)
+        _, _, c = pooled.size()
+        pooled = pooled.permute(0, 2, 1)
+        return pooled.view(n, c, w, h)
+
+
+class Semantic_Mapping(nn.Module):
+
+    """
+    Semantic_Mapping
+    """
+
+    def __init__(self, config):
+        super(Semantic_Mapping, self).__init__()
+
+        self.device = config.DEVICE
+        self.screen_h = config.FRAME_HEIGHT
+        self.screen_w = config.FRAME_WIDTH
+        self.resolution = config.MAP_RESOLUTION
+        self.z_resolution = config.MAP_RESOLUTION
+        self.map_size_cm = config.MAP_SIZE_CM // config.GLOBAL_DOWNSCALING
+        self.n_channels = 3
+        self.vision_range = config.VISION_RANGE
+        self.dropout = 0.5
+        self.fov = config.HFOV
+        self.du_scale = config.DU_SCALE
+        self.cat_pred_threshold = config.CAT_PRED_THRESHOLD
+        self.exp_pred_threshold = config.EXP_PRED_THRESHOLD
+        self.map_pred_threshold = config.MAP_PRED_THRESHOLD
+        self.num_sem_categories = config.NUM_SEM_CATEGORIES
+
+        self.max_height = int(180 / self.z_resolution)
+        self.min_height = int(-40 / self.z_resolution)
+        self.agent_height = config.CAMERA_HEIGHT * 100.0
+        self.shift_loc = [self.vision_range * self.resolution // 2, 0, np.pi / 2.0]
+        self.camera_matrix = du.get_camera_matrix(
+            self.screen_w, self.screen_h, self.fov
+        )
+
+        self.pool = ChannelPool(1)
+
+        vr = self.vision_range
+
+        self.init_grid = (
+            torch.zeros(
+                config.NUM_PROCESSES,
+                1 + self.num_sem_categories,
+                vr,
+                vr,
+                self.max_height - self.min_height,
+            )
+            .float()
+            .to(self.device)
+        )
+        self.feat = (
+            torch.ones(
+                config.NUM_PROCESSES,
+                1 + self.num_sem_categories,
+                self.screen_h // self.du_scale * self.screen_w // self.du_scale,
+            )
+            .float()
+            .to(self.device)
+        )
+
+    def forward(self, obs, pose_obs, maps_last, poses_last):
+        bs, c, h, w = obs.size()
+        depth = obs[:, 3, :, :]
+        point_cloud_t = du.get_point_cloud_from_z_t(
+            depth, self.camera_matrix, self.device, scale=self.du_scale
+        )
+
+        agent_view_t = du.transform_camera_view_t(
+            point_cloud_t, self.agent_height, 0, self.device
+        )
+
+        agent_view_centered_t = du.transform_pose_t(
+            agent_view_t, self.shift_loc, self.device
+        )
+
+        max_h = self.max_height
+        min_h = self.min_height
+        xy_resolution = self.resolution
+        z_resolution = self.z_resolution
+        vision_range = self.vision_range
+        XYZ_cm_std = agent_view_centered_t.float()
+        XYZ_cm_std[..., :2] = XYZ_cm_std[..., :2] / xy_resolution
+        XYZ_cm_std[..., :2] = (
+            (XYZ_cm_std[..., :2] - vision_range // 2.0) / vision_range * 2.0
+        )
+        XYZ_cm_std[..., 2] = XYZ_cm_std[..., 2] / z_resolution
+        XYZ_cm_std[..., 2] = (
+            (XYZ_cm_std[..., 2] - (max_h + min_h) // 2.0) / (max_h - min_h) * 2.0
+        )
+        self.feat[:, 1:, :] = nn.AvgPool2d(self.du_scale)(obs[:, 4:, :, :]).view(
+            bs, c - 4, h // self.du_scale * w // self.du_scale
+        )
+
+        XYZ_cm_std = XYZ_cm_std.permute(0, 3, 1, 2)
+        XYZ_cm_std = XYZ_cm_std.view(
+            XYZ_cm_std.shape[0],
+            XYZ_cm_std.shape[1],
+            XYZ_cm_std.shape[2] * XYZ_cm_std.shape[3],
+        )
+
+        voxels = du.splat_feat_nd(
+            self.init_grid * 0.0, self.feat, XYZ_cm_std
+        ).transpose(2, 3)
+
+        min_z = int(25 / z_resolution - min_h)
+        max_z = int((self.agent_height + 1) / z_resolution - min_h)
+
+        agent_height_proj = voxels[..., min_z:max_z].sum(4)
+        all_height_proj = voxels.sum(4)
+
+        fp_map_pred = agent_height_proj[:, 0:1, :, :]
+        fp_exp_pred = all_height_proj[:, 0:1, :, :]
+        fp_map_pred = fp_map_pred / self.map_pred_threshold
+        fp_exp_pred = fp_exp_pred / self.exp_pred_threshold
+        fp_map_pred = torch.clamp(fp_map_pred, min=0.0, max=1.0)
+        fp_exp_pred = torch.clamp(fp_exp_pred, min=0.0, max=1.0)
+
+        pose_pred = poses_last
+
+        agent_view = torch.zeros(
+            bs,
+            c,
+            self.map_size_cm // self.resolution,
+            self.map_size_cm // self.resolution,
+        ).to(self.device)
+
+        x1 = self.map_size_cm // (self.resolution * 2) - self.vision_range // 2
+        x2 = x1 + self.vision_range
+        y1 = self.map_size_cm // (self.resolution * 2)
+        y2 = y1 + self.vision_range
+
+        agent_view[:, 0:1, y1:y2, x1:x2] = fp_map_pred
+        agent_view[:, 1:2, y1:y2, x1:x2] = fp_exp_pred
+        agent_view[:, 4:, y1:y2, x1:x2] = torch.clamp(
+            agent_height_proj[:, 1:, :, :] / self.cat_pred_threshold, min=0.0, max=1.0
+        )
+
+        corrected_pose = pose_obs
+
+        def get_new_pose_batch(pose, rel_pose_change):
+
+            pose[:, 1] += rel_pose_change[:, 0] * torch.sin(
+                pose[:, 2] / 57.29577951308232
+            ) + rel_pose_change[:, 1] * torch.cos(pose[:, 2] / 57.29577951308232)
+            pose[:, 0] += rel_pose_change[:, 0] * torch.cos(
+                pose[:, 2] / 57.29577951308232
+            ) - rel_pose_change[:, 1] * torch.sin(pose[:, 2] / 57.29577951308232)
+            pose[:, 2] += rel_pose_change[:, 2] * 57.29577951308232
+
+            pose[:, 2] = torch.fmod(pose[:, 2] - 180.0, 360.0) + 180.0
+            pose[:, 2] = torch.fmod(pose[:, 2] + 180.0, 360.0) - 180.0
+
+            return pose
+
+        current_poses = get_new_pose_batch(poses_last, corrected_pose)
+        st_pose = current_poses.clone().detach()
+
+        st_pose[:, :2] = -(
+            st_pose[:, :2] * 100.0 / self.resolution
+            - self.map_size_cm // (self.resolution * 2)
+        ) / (self.map_size_cm // (self.resolution * 2))
+        st_pose[:, 2] = 90.0 - (st_pose[:, 2])
+
+        rot_mat, trans_mat = self.get_grid(st_pose, agent_view.size(), self.device)
+
+        rotated = F.grid_sample(agent_view, rot_mat, align_corners=True)
+        translated = F.grid_sample(rotated, trans_mat, align_corners=True)
+
+        # Remove people in the last map if found new people
+        if translated[:, 19, :, :].sum() > 0.99:
+            maps_last[:, 19, :, :] = 0
+
+        maps2 = torch.cat((maps_last.unsqueeze(1), translated.unsqueeze(1)), 1)
+
+        map_pred, _ = torch.max(maps2, 1)
+
+        if np.sum(np.array(map_pred)[0, 1, :, :]) == 0:
+            import pdb
+
+            pdb.set_trace()
+
+        return fp_map_pred, map_pred, pose_pred, current_poses
+
+    @staticmethod
+    def get_grid(pose, grid_size, device):
+        """
+        Input:
+            `pose` FloatTensor(bs, 3)
+            `grid_size` 4-tuple (bs, _, grid_h, grid_w)
+            `device` torch.device (cpu or gpu)
+        Output:
+            `rot_grid` FloatTensor(bs, grid_h, grid_w, 2)
+            `trans_grid` FloatTensor(bs, grid_h, grid_w, 2)
+
+        """
+        pose = pose.float()
+        x = pose[:, 0]
+        y = pose[:, 1]
+        t = pose[:, 2]
+
+        t = t * np.pi / 180.0
+        cos_t = t.cos()
+        sin_t = t.sin()
+
+        theta11 = torch.stack(
+            [cos_t, -sin_t, torch.zeros(cos_t.shape).float().to(device)], 1
+        )
+        theta12 = torch.stack(
+            [sin_t, cos_t, torch.zeros(cos_t.shape).float().to(device)], 1
+        )
+        theta1 = torch.stack([theta11, theta12], 1)
+
+        theta21 = torch.stack(
+            [torch.ones(x.shape).to(device), -torch.zeros(x.shape).to(device), x], 1
+        )
+        theta22 = torch.stack(
+            [torch.zeros(x.shape).to(device), torch.ones(x.shape).to(device), y], 1
+        )
+        theta2 = torch.stack([theta21, theta22], 1)
+
+        rot_grid = F.affine_grid(theta1, torch.Size(grid_size))
+        trans_grid = F.affine_grid(theta2, torch.Size(grid_size))
+
+        return rot_grid, trans_grid
diff --git a/semantic_exploration/models/sentence_similarity.py b/semantic_exploration/models/sentence_similarity.py
new file mode 100644
index 0000000..f574a63
--- /dev/null
+++ b/semantic_exploration/models/sentence_similarity.py
@@ -0,0 +1,73 @@
+import torch
+import torch.nn.functional as F
+from transformers import AutoModel, AutoTokenizer
+
+
+class SentenceSimilarity:
+    def __init__(self):
+        # Load model from HuggingFace Hub
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            "sentence-transformers/all-MiniLM-L6-v2"
+        )
+        self.model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
+
+    def mean_pooling(self, model_output, attention_mask):
+        # Mean Pooling - Take attention mask into account for correct averaging
+
+        token_embeddings = model_output[
+            0
+        ]  # First element of model_output contains all token embeddings
+        input_mask_expanded = (
+            attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+        )
+        return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
+            input_mask_expanded.sum(1), min=1e-9
+        )
+
+    def get_similarity_two_sentences(self, a, b):
+        sentences = [a, b]
+
+        # Tokenize sentences
+        encoded_input = self.tokenizer(
+            sentences, padding=True, truncation=True, return_tensors="pt"
+        )
+
+        # Compute token embeddings
+        with torch.no_grad():
+            model_output = self.model(**encoded_input)
+
+        # Perform pooling
+        sentence_embeddings = self.mean_pooling(
+            model_output, encoded_input["attention_mask"]
+        )
+
+        # Normalize embeddings
+        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+
+        # compute cosine similarity between embeddings
+        cosine_scores = sentence_embeddings[0] @ sentence_embeddings[1].T
+        return cosine_scores
+
+    def get_most_similar_in_list(self, query_word, list):
+        sentences = [query_word] + [word.replace("_", " ") for word in list]
+        encoded_input = self.tokenizer(
+            sentences, padding=True, truncation=True, return_tensors="pt"
+        )
+        with torch.no_grad():
+            model_output = self.model(**encoded_input)
+
+        # Perform pooling
+        sentence_embeddings = self.mean_pooling(
+            model_output, encoded_input["attention_mask"]
+        )
+
+        # Normalize embeddings
+        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+
+        # compute cosine similarity between embeddings
+        cosine_scores = sentence_embeddings[0] @ sentence_embeddings[1:].T
+        print(
+            f"word queried : {query_word} | word list : {list} | cosine scores : {cosine_scores}"
+        )
+
+        return list[torch.argmax(cosine_scores).item()]
diff --git a/test.py b/test.py
deleted file mode 100644
index 7a81a77..0000000
--- a/test.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import os
-import torch
-import numpy as np
-
-from envs import make_vec_envs
-from arguments import get_args
-
-os.environ["OMP_NUM_THREADS"] = "1"
-
-args = get_args()
-
-np.random.seed(args.seed)
-torch.manual_seed(args.seed)
-
-if args.cuda:
-    torch.cuda.manual_seed(args.seed)
-
-
-def main():
-    num_episodes = int(args.num_eval_episodes)
-    args.device = torch.device("cuda:0" if args.cuda else "cpu")
-
-    torch.set_num_threads(1)
-    envs = make_vec_envs(args)
-    obs, infos = envs.reset()
-
-    for ep_num in range(num_episodes):
-        for step in range(args.max_episode_length):
-            action = torch.randint(0, 3, (args.num_processes,))
-            obs, rew, done, infos = envs.step(action)
-
-            if done:
-                break
-
-    print("Test successfully completed")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/utils/distributions.py b/utils/distributions.py
deleted file mode 100644
index cd025eb..0000000
--- a/utils/distributions.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# The following code is largely borrowed from:
-# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/distributions.py
-
-import torch
-import torch.nn as nn
-
-from utils.model import AddBias
-
-"""
-Modify standard PyTorch distributions so they are compatible with this code.
-"""
-
-FixedCategorical = torch.distributions.Categorical
-
-old_sample = FixedCategorical.sample
-FixedCategorical.sample = lambda self: old_sample(self)
-
-log_prob_cat = FixedCategorical.log_prob
-FixedCategorical.log_probs = lambda self, actions: \
-    log_prob_cat(self, actions.squeeze(-1))
-FixedCategorical.mode = lambda self: self.probs.argmax(dim=1, keepdim=True)
-
-FixedNormal = torch.distributions.Normal
-log_prob_normal = FixedNormal.log_prob
-FixedNormal.log_probs = lambda self, actions: \
-    log_prob_normal(self, actions).sum(-1, keepdim=False)
-
-entropy = FixedNormal.entropy
-FixedNormal.entropy = lambda self: entropy(self).sum(-1)
-
-FixedNormal.mode = lambda self: self.mean
-
-
-class Categorical(nn.Module):
-
-    def __init__(self, num_inputs, num_outputs):
-        super(Categorical, self).__init__()
-        self.linear = nn.Linear(num_inputs, num_outputs)
-
-    def forward(self, x):
-        x = self.linear(x)
-        return FixedCategorical(logits=x)
-
-
-class DiagGaussian(nn.Module):
-
-    def __init__(self, num_inputs, num_outputs):
-        super(DiagGaussian, self).__init__()
-
-        self.fc_mean = nn.Linear(num_inputs, num_outputs)
-        self.logstd = AddBias(torch.zeros(num_outputs))
-
-    def forward(self, x):
-        action_mean = self.fc_mean(x)
-
-        zeros = torch.zeros(action_mean.size())
-        if x.is_cuda:
-            zeros = zeros.cuda()
-
-        action_logstd = self.logstd(zeros)
-        return FixedNormal(action_mean, action_logstd.exp())
diff --git a/utils/model.py b/utils/model.py
deleted file mode 100644
index e55b045..0000000
--- a/utils/model.py
+++ /dev/null
@@ -1,132 +0,0 @@
-import numpy as np
-import torch
-from torch import nn
-from torch.nn import functional as F
-
-
-def get_grid(pose, grid_size, device):
-    """
-    Input:
-        `pose` FloatTensor(bs, 3)
-        `grid_size` 4-tuple (bs, _, grid_h, grid_w)
-        `device` torch.device (cpu or gpu)
-    Output:
-        `rot_grid` FloatTensor(bs, grid_h, grid_w, 2)
-        `trans_grid` FloatTensor(bs, grid_h, grid_w, 2)
-
-    """
-    pose = pose.float()
-    x = pose[:, 0]
-    y = pose[:, 1]
-    t = pose[:, 2]
-
-    bs = x.size(0)
-    t = t * np.pi / 180.
-    cos_t = t.cos()
-    sin_t = t.sin()
-
-    theta11 = torch.stack([cos_t, -sin_t,
-                           torch.zeros(cos_t.shape).float().to(device)], 1)
-    theta12 = torch.stack([sin_t, cos_t,
-                           torch.zeros(cos_t.shape).float().to(device)], 1)
-    theta1 = torch.stack([theta11, theta12], 1)
-
-    theta21 = torch.stack([torch.ones(x.shape).to(device),
-                           -torch.zeros(x.shape).to(device), x], 1)
-    theta22 = torch.stack([torch.zeros(x.shape).to(device),
-                           torch.ones(x.shape).to(device), y], 1)
-    theta2 = torch.stack([theta21, theta22], 1)
-
-    rot_grid = F.affine_grid(theta1, torch.Size(grid_size))
-    trans_grid = F.affine_grid(theta2, torch.Size(grid_size))
-
-    return rot_grid, trans_grid
-
-
-class ChannelPool(nn.MaxPool1d):
-    def forward(self, x):
-        n, c, w, h = x.size()
-        x = x.view(n, c, w * h).permute(0, 2, 1)
-        x = x.contiguous()
-        pooled = F.max_pool1d(x, c, 1)
-        _, _, c = pooled.size()
-        pooled = pooled.permute(0, 2, 1)
-        return pooled.view(n, c, w, h)
-
-
-# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/utils.py#L32
-class AddBias(nn.Module):
-    def __init__(self, bias):
-        super(AddBias, self).__init__()
-        self._bias = nn.Parameter(bias.unsqueeze(1))
-
-    def forward(self, x):
-        if x.dim() == 2:
-            bias = self._bias.t().view(1, -1)
-        else:
-            bias = self._bias.t().view(1, -1, 1, 1)
-
-        return x + bias
-
-
-# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L10
-class Flatten(nn.Module):
-    def forward(self, x):
-        return x.view(x.size(0), -1)
-
-
-# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/model.py#L82
-class NNBase(nn.Module):
-
-    def __init__(self, recurrent, recurrent_input_size, hidden_size):
-
-        super(NNBase, self).__init__()
-        self._hidden_size = hidden_size
-        self._recurrent = recurrent
-
-        if recurrent:
-            self.gru = nn.GRUCell(recurrent_input_size, hidden_size)
-            nn.init.orthogonal_(self.gru.weight_ih.data)
-            nn.init.orthogonal_(self.gru.weight_hh.data)
-            self.gru.bias_ih.data.fill_(0)
-            self.gru.bias_hh.data.fill_(0)
-
-    @property
-    def is_recurrent(self):
-        return self._recurrent
-
-    @property
-    def rec_state_size(self):
-        if self._recurrent:
-            return self._hidden_size
-        return 1
-
-    @property
-    def output_size(self):
-        return self._hidden_size
-
-    def _forward_gru(self, x, hxs, masks):
-        if x.size(0) == hxs.size(0):
-            x = hxs = self.gru(x, hxs * masks[:, None])
-        else:
-            # x is a (T, N, -1) tensor that has been flatten to (T * N, -1)
-            N = hxs.size(0)
-            T = int(x.size(0) / N)
-
-            # unflatten
-            x = x.view(T, N, x.size(1))
-
-            # Same deal with masks
-            masks = masks.view(T, N, 1)
-
-            outputs = []
-            for i in range(T):
-                hx = hxs = self.gru(x[i], hxs * masks[i])
-                outputs.append(hx)
-
-            # x is a (T, N, -1) tensor
-            x = torch.stack(outputs, dim=0)
-            # flatten
-            x = x.view(T * N, -1)
-
-        return x, hxs
diff --git a/utils/optimization.py b/utils/optimization.py
deleted file mode 100644
index 7f4050b..0000000
--- a/utils/optimization.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import inspect
-import re
-
-from torch import optim
-
-
-def get_optimizer(parameters, s):
-    """
-    Parse optimizer parameters.
-    Input should be of the form:
-        - "sgd,lr=0.01"
-        - "adagrad,lr=0.1,lr_decay=0.05"
-    """
-    if "," in s:
-        method = s[:s.find(',')]
-        optim_params = {}
-        for x in s[s.find(',') + 1:].split(','):
-            split = x.split('=')
-            assert len(split) == 2
-            assert re.match(
-                r"^[+-]?(\d+(\.\d*)?|\.\d+)$",
-                split[1]) is not None
-            optim_params[split[0]] = float(split[1])
-    else:
-        method = s
-        optim_params = {}
-
-    if method == 'adadelta':
-        optim_fn = optim.Adadelta
-    elif method == 'adagrad':
-        optim_fn = optim.Adagrad
-    elif method == 'adam':
-        optim_fn = optim.Adam
-        optim_params['betas'] = (optim_params.get('beta1', 0.5),
-                                 optim_params.get('beta2', 0.999))
-        optim_params.pop('beta1', None)
-        optim_params.pop('beta2', None)
-    elif method == 'adamax':
-        optim_fn = optim.Adamax
-    elif method == 'asgd':
-        optim_fn = optim.ASGD
-    elif method == 'rmsprop':
-        optim_fn = optim.RMSprop
-    elif method == 'rprop':
-        optim_fn = optim.Rprop
-    elif method == 'sgd':
-        optim_fn = optim.SGD
-        assert 'lr' in optim_params
-    else:
-        raise Exception('Unknown optimization method: "%s"' % method)
-
-    # check that we give good parameters to the optimizer
-    expected_args = inspect.getargspec(optim_fn.__init__)[0]
-    assert expected_args[:2] == ['self', 'params']
-    if not all(k in expected_args[2:] for k in optim_params.keys()):
-        raise Exception('Unexpected parameters: expected "%s", got "%s"' % (
-            str(expected_args[2:]), str(optim_params.keys())))
-
-    return optim_fn(parameters, **optim_params)
diff --git a/utils/storage.py b/utils/storage.py
deleted file mode 100644
index e71cac3..0000000
--- a/utils/storage.py
+++ /dev/null
@@ -1,203 +0,0 @@
-# The following code is largely borrowed from:
-# https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail/blob/master/a2c_ppo_acktr/storage.py
-
-from collections import namedtuple
-
-import numpy as np
-import torch
-from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
-
-
-def _flatten_helper(T, N, _tensor):
-    return _tensor.view(T * N, *_tensor.size()[2:])
-
-
-class RolloutStorage(object):
-
-    def __init__(self, num_steps, num_processes, obs_shape, action_space,
-                 rec_state_size):
-
-        if action_space.__class__.__name__ == 'Discrete':
-            self.n_actions = 1
-            action_type = torch.long
-        else:
-            self.n_actions = action_space.shape[0]
-            action_type = torch.float32
-
-        self.obs = torch.zeros(num_steps + 1, num_processes, *obs_shape)
-        self.rec_states = torch.zeros(num_steps + 1, num_processes,
-                                      rec_state_size)
-        self.rewards = torch.zeros(num_steps, num_processes)
-        self.value_preds = torch.zeros(num_steps + 1, num_processes)
-        self.returns = torch.zeros(num_steps + 1, num_processes)
-        self.action_log_probs = torch.zeros(num_steps, num_processes)
-        self.actions = torch.zeros((num_steps, num_processes, self.n_actions),
-                                   dtype=action_type)
-        self.masks = torch.ones(num_steps + 1, num_processes)
-
-        self.num_steps = num_steps
-        self.step = 0
-        self.has_extras = False
-        self.extras_size = None
-
-    def to(self, device):
-        self.obs = self.obs.to(device)
-        self.rec_states = self.rec_states.to(device)
-        self.rewards = self.rewards.to(device)
-        self.value_preds = self.value_preds.to(device)
-        self.returns = self.returns.to(device)
-        self.action_log_probs = self.action_log_probs.to(device)
-        self.actions = self.actions.to(device)
-        self.masks = self.masks.to(device)
-        if self.has_extras:
-            self.extras = self.extras.to(device)
-        return self
-
-    def insert(self, obs, rec_states, actions, action_log_probs, value_preds,
-               rewards, masks):
-        self.obs[self.step + 1].copy_(obs)
-        self.rec_states[self.step + 1].copy_(rec_states)
-        self.actions[self.step].copy_(actions.view(-1, self.n_actions))
-        self.action_log_probs[self.step].copy_(action_log_probs)
-        self.value_preds[self.step].copy_(value_preds)
-        self.rewards[self.step].copy_(rewards)
-        self.masks[self.step + 1].copy_(masks)
-
-        self.step = (self.step + 1) % self.num_steps
-
-    def after_update(self):
-        self.obs[0].copy_(self.obs[-1])
-        self.rec_states[0].copy_(self.rec_states[-1])
-        self.masks[0].copy_(self.masks[-1])
-        if self.has_extras:
-            self.extras[0].copy_(self.extras[-1])
-
-    def compute_returns(self, next_value, use_gae, gamma, tau):
-        if use_gae:
-            self.value_preds[-1] = next_value
-            gae = 0
-            for step in reversed(range(self.rewards.size(0))):
-                delta = self.rewards[step] + gamma \
-                    * self.value_preds[step + 1] * self.masks[step + 1] \
-                    - self.value_preds[step]
-                gae = delta + gamma * tau * self.masks[step + 1] * gae
-                self.returns[step] = gae + self.value_preds[step]
-        else:
-            self.returns[-1] = next_value
-            for step in reversed(range(self.rewards.size(0))):
-                self.returns[step] = self.returns[step + 1] * gamma \
-                    * self.masks[step + 1] + self.rewards[step]
-
-    def feed_forward_generator(self, advantages, num_mini_batch):
-
-        num_steps, num_processes = self.rewards.size()[0:2]
-        batch_size = num_processes * num_steps
-        mini_batch_size = batch_size // num_mini_batch
-        assert batch_size >= num_mini_batch, (
-            "PPO requires the number of processes ({}) "
-            "* number of steps ({}) = {} "
-            "to be greater than or equal to "
-            "the number of PPO mini batches ({})."
-            "".format(num_processes, num_steps, num_processes * num_steps,
-                      num_mini_batch))
-
-        sampler = BatchSampler(SubsetRandomSampler(range(batch_size)),
-                               mini_batch_size, drop_last=False)
-
-        for indices in sampler:
-            yield {
-                'obs': self.obs[:-1].view(-1, *self.obs.size()[2:])[indices],
-                'rec_states': self.rec_states[:-1].view(
-                    -1, self.rec_states.size(-1))[indices],
-                'actions': self.actions.view(-1, self.n_actions)[indices],
-                'value_preds': self.value_preds[:-1].view(-1)[indices],
-                'returns': self.returns[:-1].view(-1)[indices],
-                'masks': self.masks[:-1].view(-1)[indices],
-                'old_action_log_probs': self.action_log_probs.view(-1)[indices],
-                'adv_targ': advantages.view(-1)[indices],
-                'extras': self.extras[:-1].view(
-                    -1, self.extras_size)[indices]
-                if self.has_extras else None,
-            }
-
-    def recurrent_generator(self, advantages, num_mini_batch):
-
-        num_processes = self.rewards.size(1)
-        assert num_processes >= num_mini_batch, (
-            "PPO requires the number of processes ({}) "
-            "to be greater than or equal to the number of "
-            "PPO mini batches ({}).".format(num_processes, num_mini_batch))
-        num_envs_per_batch = num_processes // num_mini_batch
-        perm = torch.randperm(num_processes)
-        T, N = self.num_steps, num_envs_per_batch
-
-        for start_ind in range(0, num_processes, num_envs_per_batch):
-
-            obs = []
-            rec_states = []
-            actions = []
-            value_preds = []
-            returns = []
-            masks = []
-            old_action_log_probs = []
-            adv_targ = []
-            if self.has_extras:
-                extras = []
-
-            for offset in range(num_envs_per_batch):
-
-                ind = perm[start_ind + offset]
-                obs.append(self.obs[:-1, ind])
-                rec_states.append(self.rec_states[0:1, ind])
-                actions.append(self.actions[:, ind])
-                value_preds.append(self.value_preds[:-1, ind])
-                returns.append(self.returns[:-1, ind])
-                masks.append(self.masks[:-1, ind])
-                old_action_log_probs.append(self.action_log_probs[:, ind])
-                adv_targ.append(advantages[:, ind])
-                if self.has_extras:
-                    extras.append(self.extras[:-1, ind])
-
-            # These are all tensors of size (T, N, ...)
-            obs = torch.stack(obs, 1)
-            actions = torch.stack(actions, 1)
-            value_preds = torch.stack(value_preds, 1)
-            returns = torch.stack(returns, 1)
-            masks = torch.stack(masks, 1)
-            old_action_log_probs = torch.stack(old_action_log_probs, 1)
-            adv_targ = torch.stack(adv_targ, 1)
-            if self.has_extras:
-                extras = torch.stack(extras, 1)
-
-            yield {
-                'obs': _flatten_helper(T, N, obs),
-                'actions': _flatten_helper(T, N, actions),
-                'value_preds': _flatten_helper(T, N, value_preds),
-                'returns': _flatten_helper(T, N, returns),
-                'masks': _flatten_helper(T, N, masks),
-                'old_action_log_probs': _flatten_helper(
-                    T, N, old_action_log_probs),
-                'adv_targ': _flatten_helper(T, N, adv_targ),
-                'extras': _flatten_helper(
-                    T, N, extras) if self.has_extras else None,
-                'rec_states': torch.stack(rec_states, 1).view(N, -1),
-            }
-
-
-class GlobalRolloutStorage(RolloutStorage):
-
-    def __init__(self, num_steps, num_processes, obs_shape, action_space,
-                 rec_state_size, extras_size):
-        super(GlobalRolloutStorage, self).__init__(
-            num_steps, num_processes, obs_shape, action_space, rec_state_size)
-        self.extras = torch.zeros((num_steps + 1, num_processes, extras_size),
-                                  dtype=torch.long)
-        self.has_extras = True
-        self.extras_size = extras_size
-
-    def insert(self, obs, rec_states, actions, action_log_probs, value_preds,
-               rewards, masks, extras):
-        self.extras[self.step + 1].copy_(extras)
-        super(GlobalRolloutStorage, self).insert(
-            obs, rec_states, actions,
-            action_log_probs, value_preds, rewards, masks)