Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

references added #2

Open
wants to merge 1 commit into
base: doccumentation
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 56 additions & 14 deletions nemo_env_pd.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,27 @@
import jax
from brax.envs import PipelineEnv, State
"""PipelineEnv is a base class for building custom environments, while State represents the environment's state, including observations, rewards, and other metrics.
"""
from jax import numpy as jnp
"""jax.numpy is a NumPy-compatible API for numerical operations that supports automatic differentiation."""
from brax.io import mjcf
"""MJCF (MuJoCo XML format) is used to define the physical properties and structure of the robot. This module helps load the model from the XML file"""
from brax import math
"""Contains mathematical functions for vector operations, rotations, and other numerical calculations."""
import mujoco

"""
MuJoCo (Multi-Joint dynamics with Contact) is a physics engine used for accurate simulation of physical systems, especially for robotics and biomechanics.
It provides realistic contact dynamics and joint control
"""
import rewards


DS_PROP = 0.1
BU_PROP = 0.5

"""
DS_PROP (Double Support Proportion): The fraction of the gait cycle where both feet are in contact with the ground.
BU_PROP (Body Upright Proportion): Likely used to ensure the robot remains upright during a specific portion of the gait.
"""

metrics_dict = {
'reward': 0.0,
Expand All @@ -30,32 +41,64 @@
'angvel_z': 0.0,
'feet_orien': 0.0,
'feet_slip_ang': 0.0}

"""
reward: Total reward accumulated in a step.
flatfoot: Rewards stable foot contact with the ground.
periodic: Encourages periodic gait cycles.
upright: Rewards maintaining an upright posture.
limit: Penalizes exceeding joint limits.
feet_z and feet_zd: Track the z-coordinate and vertical velocity of the feet.
termination: Flags termination events (e.g., falls).
velocity, vel_z: Track horizontal and vertical velocities.
energy: Measures energy consumption.
angvel_xy, angvel_z: Track angular velocities.
action_rate: Penalizes abrupt changes in actions.
feet_slip, feet_slip_ang: Penalize slipping or rotational instability of the feet.
feet_orien: Rewards maintaining the correct foot orientation.
"""
class NemoEnv(PipelineEnv):
def __init__(self):
model = mujoco.MjModel.from_xml_path("nemo2/scene.xml")

"""
loads the MuJoCo XML file describing the robot's physical structure and properties.
"""
model.opt.solver = mujoco.mjtSolver.mjSOL_CG
"""
Conjugate Gradient solver for faster convergence
"""
model.opt.iterations = 6
model.opt.ls_iterations = 6

"""
Control the number of solver iterations, balancing accuracy and computational cost.
"""
self.model = model

system = mjcf.load_model(model)

"""
mjcf.load_model: Converts the MuJoCo model into a Brax system
"""
n_frames = 10

"""
n_frames = 10: Sets the number of frames for rendering smooth animations
"""
super().__init__(sys = system,
backend='mjx',
n_frames = n_frames
)

self.initial_state = jnp.array(system.mj_model.keyframe('stand').qpos)
"""
initial_state: Loads the robot's initial pose from the keyframe labeled "stand" in the XML
"""
self.nv = system.nv
self.nu = system.nu
self.control_range = system.actuator_ctrlrange
self.joint_limit = jnp.array(model.jnt_range)

"""
nv and nu: Represent the number of generalized velocities and controls (actuators) in the system.
control_range: Stores the control range for each actuator.
joint_limit: Stores the joint angle limits.
"""
self.pelvis_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_BODY, 'pelvis')
self.pelvis_b_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, 'pelvis_back')
self.pelvis_f_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_SITE, 'pelvis_front')
Expand All @@ -78,6 +121,9 @@ def __init__(self):


def _get_obs(self, data0, data1, prev_action: jnp.ndarray, state = None):
"""
This method calculates the observation vector for the agent, which includes information about the robot's position, velocity, rotation, and other relevant state variables.
"""
inv_pelvis_rot = math.quat_inv(data1.x.rot[self.pelvis_id - 1])
vel = data1.xd.vel[self.pelvis_id - 1]
angvel = data1.xd.ang[self.pelvis_id - 1]
Expand Down Expand Up @@ -329,21 +375,17 @@ def step(self, state: State, action: jnp.ndarray):
return state.replace(
pipeline_state = data1, obs=obs, reward=reward, done=done
)
def feetColliding(self, data1):
return rewards.geoms_colliding(data1, self.left_geom_id, self.right_geom_id)

def rewards(self, state, data, action, contact):
reward_dict = {}
data0 = state.pipeline_state
min_z, max_z = (0.4, 0.7)
is_healthy = jnp.where(data.q[2] < min_z, 0.0, 1.0)
is_healthy = jnp.where(data.q[2] > max_z, 0.0, is_healthy)

is_healthy = is_healthy*(1 - self.feetColliding(data))
#healthy_reward = 1.2 * is_healthy
#reward_dict["healthy"] = healthy_reward
reward_dict["termination"] = -500 * (1 - is_healthy)


vel_reward = self.velocityReward(state, data0, data)
reward_dict["velocity"] = vel_reward * 2.0

Expand Down Expand Up @@ -517,7 +559,7 @@ def sites2Rew(p1, p2, p3):
dot = jnp.cross(v1, v2)
normal_vec = dot / jnp.linalg.norm(dot)
ca = jnp.abs(normal_vec[2])
reward = jnp.exp(-1 * (ca -1) ** 2 / 0.001) - 1
reward = jnp.exp(-1 * (ca -1) ** 2 / 0.001)
return reward

lp1 = data.site_xpos[self.left_foot_s1]
Expand Down