Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion assets/mujoco_envs/ant.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<map znear=".001"/>
<quality shadowsize="16384"/>
</visual>
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<compiler angle="degree" inertiafromgeom="true"/>
<option integrator="RK4" timestep="0.01"/>
<default>
<joint armature="1" damping="1" limited="true"/>
Expand Down
2 changes: 1 addition & 1 deletion assets/mujoco_envs/gap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<map znear=".001"/>
<quality shadowsize="16384"/>
</visual>
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="1" margin="0.001" solimp=".8 .8 .01" solref=".02 1" rgba=".7 .5 .3 1"/>
Expand Down
2 changes: 1 addition & 1 deletion assets/mujoco_envs/hopper.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<map znear=".001"/>
<quality shadowsize="16384"/>
</visual>
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="1" margin="0.001" solimp=".8 .8 .01" solref=".02 1" rgba=".7 .5 .3 1"/>
Expand Down
2 changes: 1 addition & 1 deletion assets/mujoco_envs/swimmer.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<map znear=".001"/>
<quality shadowsize="16384"/>
</visual>
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<compiler angle="degree" inertiafromgeom="true"/>
<option density="4000" integrator="RK4" timestep="0.01" viscosity="0.1"/>
<default>
<geom conaffinity="0" condim="1" margin="0.001" rgba=".7 .5 .3 1"/>
Expand Down
2 changes: 1 addition & 1 deletion assets/mujoco_envs/walker.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
<map znear=".001"/>
<quality shadowsize="16384"/>
</visual>
<compiler angle="degree" coordinate="global" inertiafromgeom="true"/>
<compiler angle="degree" inertiafromgeom="true"/>
<default>
<joint armature="1" damping="1" limited="true"/>
<geom conaffinity="0" condim="1" margin="0.001" solimp=".8 .8 .01" solref=".02 1" rgba=".7 .5 .3 1"/>
Expand Down
30 changes: 27 additions & 3 deletions design_opt/agents/genesis_agent.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
"""
BodyGenAgent: Training agent for robot body generation using PPO.

This agent implements the proximal policy optimization (PPO) algorithm
for co-designing robot morphology and controller. It supports:
- Multi-threaded trajectory sampling
- Separated value estimation for design and control stages
- Checkpoint loading/saving for training resumption
- Visualization of trained policies
"""

import math
import pickle
import time
Expand All @@ -16,14 +27,25 @@

import wandb


def tensorfy(np_list, device=torch.device('cpu')):
"""Convert numpy arrays to tensors, handling nested structures.

Args:
np_list: List of numpy arrays or nested lists
device: Target torch device

Returns:
Nested list of torch tensors on specified device
"""
if isinstance(np_list[0], list):
return [[torch.tensor(x).to(device) if i <= 1 or i == 4 or i >= 7 else x for i, x in enumerate(y)] for y in np_list]
else:
return [torch.tensor(y).to(device) for y in np_list]


class BodyGenAgent(AgentPPO):
"""PPO agent for robot body generation training."""

def __init__(self, cfg, dtype, device, seed, num_threads, training=True, checkpoint=0):
self.cfg = cfg
Expand Down Expand Up @@ -146,13 +168,15 @@ def sample_worker(self, pid, queue, min_batch_size, mean_action, render):
if pid > 0:
torch.manual_seed(torch.randint(0, 5000, (1,)) * pid)
if hasattr(self.env, 'np_random'):
self.env.np_random.seed(self.env.np_random.randint(5000) * pid)
# NumPy 1.17+ uses Generator.choose() instead of seed()
seed = int(self.env.np_random.integers(0, 5000) * pid)
self.env.np_random = np.random.default_rng(seed)

memory = Memory()
logger = self.logger_cls(**self.logger_kwargs)

while logger.num_steps < min_batch_size:
state = self.env.reset()
state, _ = self.env.reset()
logger.start_episode(self.env)

while True:
Expand Down Expand Up @@ -527,7 +551,7 @@ def visualize_agent(self, num_episode=1, mean_action=True, save_video=False, pau
self.obs_norm.to('cpu')

for _ in range(num_episode):
state = env.reset()
state, _ = env.reset()

env._get_viewer('human')._paused = paused
env.render()
Expand Down
23 changes: 12 additions & 11 deletions design_opt/envs/ant.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import numpy as np
from gym import utils
from gymnasium import utils
from khrylib.rl.envs.common.mujoco_env_gym import MujocoEnv
from khrylib.robot.xml_robot import Robot
from khrylib.utils import get_single_body_qposaddr, get_graph_fc_edges
from khrylib.utils.transformation import quaternion_matrix
from copy import deepcopy
import mujoco_py
import mujoco
import time
import os

Expand Down Expand Up @@ -104,9 +104,11 @@ def action_to_control(self, a):
assert a.shape[0] == len(self.robot.bodies)
for body, body_a in zip(self.robot.bodies[1:], a[1:]):
aname = body.get_actuator_name()
if aname in self.model.actuator_names:
aind = self.model.actuator_names.index(aname)
try:
aind = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_ACTUATOR, aname)
ctrl[aind] = body_a
except ValueError:
pass # actuator not found
return ctrl

def step(self, a):
Expand Down Expand Up @@ -205,8 +207,9 @@ def if_use_transform_action(self):
def get_sim_obs(self):
obs = []
if 'root_offset' in self.sim_specs:
root_pos = self.data.body_xpos[self.model._body_name2id[self.robot.bodies[0].name]]

root_body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, self.robot.bodies[0].name)
root_pos = self.data.body(root_body_id).xpos

for i, body in enumerate(self.robot.bodies):
qvel = self.data.qvel.copy()
if self.clip_qvel:
Expand All @@ -218,11 +221,11 @@ def get_sim_obs(self):
if qe - qs >= 1:
assert qe - qs == 1
obs_i = [np.zeros(11), self.data.qpos[qs:qe], qvel[qs-1:qe-1]]
# print(qs)
else:
obs_i = [np.zeros(13)]
if 'root_offset' in self.sim_specs:
offset = self.data.body_xpos[self.model._body_name2id[body.name]][[0, 2]] - root_pos[[0, 2]]
body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, body.name)
offset = self.data.body(body_id).xpos[[0, 2]] - root_pos[[0, 2]]
obs_i.append(offset)
obs_i = np.concatenate(obs_i)
obs.append(obs_i)
Expand Down Expand Up @@ -342,9 +345,7 @@ def reset_model(self):
return self._get_obs()

def viewer_setup(self):
# self.viewer.cam.trackbodyid = 2
self.viewer.cam.distance = 10
# self.viewer.cam.lookat[2] = 1.15
self.viewer.cam.lookat[:2] = self.data.qpos[:2]
self.viewer.cam.lookat[:2] = self.data.qpos[:2]
self.viewer.cam.elevation = -10
self.viewer.cam.azimuth = 110
28 changes: 15 additions & 13 deletions design_opt/envs/gap.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import numpy as np
import os
import os.path as osp
from gym import utils
from gymnasium import utils
from khrylib.rl.envs.common.mujoco_env_gym import MujocoEnv
from khrylib.robot.xml_robot import Robot
from khrylib.utils import get_single_body_qposaddr, get_graph_fc_edges
import mujoco
import shutil


Expand Down Expand Up @@ -95,7 +96,6 @@ def set_design_params(self, in_design_params):
design_params = in_design_params
for params, body in zip(design_params, self.robot.bodies):
body.set_params(params, pad_zeros=True, map_params=True)
# new_params = body.get_params([], pad_zeros=True, demap_params=True)
body.sync_node()

xml_str = self.robot.export_xml_string()
Expand All @@ -116,8 +116,11 @@ def action_to_control(self, a):
assert a.shape[0] == len(self.robot.bodies)
for body, body_a in zip(self.robot.bodies[1:], a[1:]):
aname = body.get_actuator_name()
aind = self.model.actuator_names.index(aname)
ctrl[aind] = body_a
try:
aind = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_ACTUATOR, aname)
ctrl[aind] = body_a
except ValueError:
pass
return ctrl

def step(self, a):
Expand Down Expand Up @@ -165,19 +168,18 @@ def step(self, a):
assert np.all(a[:, self.control_action_dim:] == 0)
control_a = a[:, :self.control_action_dim]
ctrl = self.action_to_control(control_a)
posbefore = self.sim.data.qpos[0]
posbefore = self.data.qpos[0]

try:
self.do_simulation(ctrl, self.frame_skip)
except:
print(self.cur_xml_str)
return self._get_obs(), 0, True, False, {'use_transform_action': False, 'stage': 'execution'}

posafter, height, ang = self.sim.data.qpos[0:3]
posafter, height, ang = self.data.qpos[0:3]
alive_bonus = self.cfg.reward_specs.get('alive_bonus', 0.0)
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
# reward -= 1e-3 * np.square(a).sum()
scale = self.cfg.reward_specs.get('exec_reward_scale', 1.0)
reward *= scale

Expand Down Expand Up @@ -213,9 +215,10 @@ def if_use_transform_action(self):
def get_sim_obs(self):
obs = []
if 'root_offset' in self.sim_specs:
root_pos = self.data.body_xpos[self.model._body_name2id[self.robot.bodies[0].name]]

phase_ob = [(self.sim.data.qpos[0]) / 3.2 - int((self.sim.data.qpos[0]) / 3.2)]
root_body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, self.robot.bodies[0].name)
root_pos = self.data.body(root_body_id).xpos

phase_ob = [(self.data.qpos[0]) / 3.2 - int((self.data.qpos[0]) / 3.2)]
for i, body in enumerate(self.robot.bodies):
qvel = self.data.qvel.copy()
if self.clip_qvel:
Expand All @@ -226,9 +229,9 @@ def get_sim_obs(self):
qs, qe = get_single_body_qposaddr(self.model, body.name)
assert qe - qs == 1
obs_i = [phase_ob, self.data.qpos[qs:qe], np.zeros(1), qvel[qs:qe], np.zeros(2)]
# print(qs)
if 'root_offset' in self.sim_specs:
offset = self.data.body_xpos[self.model._body_name2id[body.name]][[0, 2]] - root_pos[[0, 2]]
body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, body.name)
offset = self.data.body(body_id).xpos[[0, 2]] - root_pos[[0, 2]]
obs_i.append(offset)
obs_i = np.concatenate(obs_i)
obs.append(obs_i)
Expand Down Expand Up @@ -334,7 +337,6 @@ def reset_state(self, add_noise):
has_contact = False
for contact in self.data.contact[:self.data.ncon]:
g1, g2 = contact.geom1, contact.geom2
# print(f'g1: {g1} g2: {g2}')
if g1 in self.ground_geoms or g2 in self.ground_geoms:
has_contact = True
break
Expand Down
25 changes: 14 additions & 11 deletions design_opt/envs/hopper.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import numpy as np
from gym import utils
from gymnasium import utils
from khrylib.rl.envs.common.mujoco_env_gym import MujocoEnv
from khrylib.robot.xml_robot import Robot
from khrylib.utils import get_single_body_qposaddr, get_graph_fc_edges
from copy import deepcopy
import mujoco_py
import mujoco
import time
import os

Expand Down Expand Up @@ -103,8 +103,11 @@ def action_to_control(self, a):
assert a.shape[0] == len(self.robot.bodies)
for body, body_a in zip(self.robot.bodies[1:], a[1:]):
aname = body.get_actuator_name()
aind = self.model.actuator_names.index(aname)
ctrl[aind] = body_a
try:
aind = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_ACTUATOR, aname)
ctrl[aind] = body_a
except ValueError:
pass
return ctrl

def step(self, a):
Expand Down Expand Up @@ -152,15 +155,15 @@ def step(self, a):
assert np.all(a[:, self.control_action_dim:] == 0)
control_a = a[:, :self.control_action_dim]
ctrl = self.action_to_control(control_a)
posbefore = self.sim.data.qpos[0]
posbefore = self.data.qpos[0]

try:
self.do_simulation(ctrl, self.frame_skip)
except:
print(self.cur_xml_str)
return self._get_obs(), 0, True, False, {'use_transform_action': False, 'stage': 'execution'}

posafter, height, ang = self.sim.data.qpos[0:3]
posafter, height, ang = self.data.qpos[0:3]
alive_bonus = self.cfg.reward_specs.get('alive_bonus', 0.0)
reward = (posafter - posbefore) / self.dt
reward += alive_bonus
Expand Down Expand Up @@ -199,8 +202,9 @@ def if_use_transform_action(self):
def get_sim_obs(self):
obs = []
if 'root_offset' in self.sim_specs:
root_pos = self.data.body_xpos[self.model._body_name2id[self.robot.bodies[0].name]]

root_body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, self.robot.bodies[0].name)
root_pos = self.data.body(root_body_id).xpos

for i, body in enumerate(self.robot.bodies):
qvel = self.data.qvel.copy()
if self.clip_qvel:
Expand All @@ -211,9 +215,9 @@ def get_sim_obs(self):
qs, qe = get_single_body_qposaddr(self.model, body.name)
assert qe - qs == 1
obs_i = [self.data.qpos[qs:qe], np.zeros(1), qvel[qs:qe], np.zeros(2)]
# print(qs)
if 'root_offset' in self.sim_specs:
offset = self.data.body_xpos[self.model._body_name2id[body.name]][[0, 2]] - root_pos[[0, 2]]
body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, body.name)
offset = self.data.body(body_id).xpos[[0, 2]] - root_pos[[0, 2]]
obs_i.append(offset)
obs_i = np.concatenate(obs_i)
obs.append(obs_i)
Expand Down Expand Up @@ -322,7 +326,6 @@ def reset_state(self, add_noise):
has_contact = False
for contact in self.data.contact[:self.data.ncon]:
g1, g2 = contact.geom1, contact.geom2
# print(f'g1: {g1} g2: {g2}')
if g1 in self.ground_geoms or g2 in self.ground_geoms:
has_contact = True
break
Expand Down
Loading