Skip to content

Question about CALVIN benchmark #79

@19tzjia

Description

@19tzjia

Thank you for sharing this great work! I've been trying to reproduce X-VLA on the CALVIN benchmark, but I'm encountering a significant performance gap.
Results Comparison:

My training model result: 1.97
Test 2toINF/X-VLA-Calvin-ABC_D on my GPU: 4.32

My Setup:
I wrote my own script to process the CALVIN ABC-D original dataset to HDF5 format, and followed your guidance to convert from CALVIN proprio (robot_obs[...,:6]) to abs eef actions. And I noticed that the training loss remains around 0.05 throughout training.

Questions:

  1. Could you help verify if my processing code is correct?
  2. Would it be possible to release your CALVIN dataset processing scripts or the processed datasets?
  3. Are there any specific details about data preprocessing that might be critical for achieving the reported performance?

Here is my conversion script:

import h5py
import cv2
import numpy as np
from tqdm import tqdm

# ---------------------------
# Config
# ---------------------------
root = "datasets/56/task_ABC_D/training"
save_root = "datasets/processed/task_ABC_D/training"

os.makedirs(save_root, exist_ok=True)

JPEG_QUALITY = 95


def encode_jpg_rgb(img_rgb: np.ndarray) -> np.ndarray:

    assert img_rgb.dtype == np.uint8
    img_bgr = img_rgb[..., ::-1]  # RGB -> BGR
    ok, enc = cv2.imencode(
        ".jpg",
        img_bgr,
        [int(cv2.IMWRITE_JPEG_QUALITY), JPEG_QUALITY],
    )
    if not ok:
        raise RuntimeError("cv2.imencode failed")
    return enc


def load_episode_data(root, idx):
    path = os.path.join(root, f"episode_{idx:07d}.npz")
    try:
        data = np.load(path)
        out = {
            "third_view": data["rgb_static"],        # (200,200,3) uint8
            "wrist_view": data["rgb_gripper"],     # (84,84,3) uint8
            "robot_state": data["robot_obs"],   # (15,)
        }
        data.close()
        return out
    except FileNotFoundError:
        raise FileNotFoundError(f"Episode file not found: {path}")


# ---------------------------
# Load language segments
# ---------------------------
anno = np.load(os.path.join(root, "lang_annotations/auto_lang_ann.npy"),allow_pickle=True,).item()

instructions = anno["language"]["ann"]
indices = anno["info"]["indx"]

segments = [(start_idx.item(), end_idx.item(), instruction) for (start_idx, end_idx),instruction in zip(indices, instructions)]
segments.sort(key=lambda x: x[0])

print("Total trajectories:", len(segments))


# ---------------------------
# Convert
# ---------------------------
for traj_id, (start, end, lang) in enumerate(tqdm(segments)):
    save_path = os.path.join(save_root, f"traj_{traj_id:06d}.hdf5")

    third_imgs = []
    wrist_imgs = []
    proprios = []

    for frame_idx in range(start, end + 1):
        fr = load_episode_data(root, frame_idx)
        # images
        third_imgs.append(encode_jpg_rgb(fr["third_view"])) 
        wrist_imgs.append(encode_jpg_rgb(fr["wrist_view"]))

        # proprio = robot_obs[:6] + robot_obs[-1]
        robot_state = fr["robot_state"]
        proprio = np.concatenate([robot_state[:6], robot_state[-1:]],axis=0,)
        proprios.append(proprio)

    num_frames = len(proprios)

    # ---------------------------
    # Write HDF5
    # ---------------------------
    with h5py.File(save_path, "w") as f:

        f.create_dataset("language_instruction", data=lang, dtype=h5py.string_dtype("utf-8"),)

        # observation group
        g = f.create_group("observation")

        vlen_u8 = h5py.vlen_dtype(np.uint8)
        d_third = g.create_dataset("third_image", (num_frames,), dtype=vlen_u8)
        d_wrist = g.create_dataset("wrist_image", (num_frames,), dtype=vlen_u8)

        for i in range(num_frames):
            d_third[i] = third_imgs[i]
            d_wrist[i] = wrist_imgs[i]

        # proprio
        f.create_dataset("proprio", data=np.stack(proprios).astype(np.float64),)

print("\n✓ Processing complete!")

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions