Skip to content

Implement flock-based bitmap GPU allocator for CI parallelization #749

Implement flock-based bitmap GPU allocator for CI parallelization

Implement flock-based bitmap GPU allocator for CI parallelization #749

name: Iris External Validation Test
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
env:
DOCKER_IMAGE_NAME: ${{ vars.DOCKER_IMAGE_NAME || 'iris-dev-triton-aafec41' }}
jobs:
build-container-image:
runs-on: [self-hosted, mi3xx]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Apptainer (if not available)
run: |
if ! command -v apptainer &> /dev/null && ! command -v docker &> /dev/null; then
echo "Neither Apptainer nor Docker found, installing Apptainer..."
apt-get update && apt-get install -y software-properties-common
add-apt-repository -y ppa:apptainer/ppa
apt-get update && apt-get install -y apptainer
else
echo "Container runtime already available"
fi
- name: Build Iris container
run: |
# Use the universal container build script
bash .github/scripts/container_build.sh
external-validation-test:
name: External Validation Test
needs: build-container-image
runs-on: [self-hosted, mi3xx]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Cleanup lingering ports before tests
run: |
bash .github/scripts/cleanup_ports.sh
- name: Run External Validation Test
run: |
set -e
echo "::group::Running external validation test"
bash .github/scripts/container_exec.sh "
set -e
# Install tritonBLAS (required dependency)
echo \"Installing tritonBLAS...\"
if [ ! -d \"/tmp/tritonBLAS\" ]; then
cd /tmp && git clone https://github.com/ROCm/tritonBLAS.git 2>&1 | tail -3
fi
if [ -d \"/tmp/tritonBLAS\" ]; then
cd /tmp/tritonBLAS
git checkout 47768c93acb7f89511d797964b84544c30ab81ad 2>&1 | tail -2
pip install -e . 2>&1 | tail -3
else
echo \"Warning: Could not clone tritonBLAS, trying pip install from git...\"
pip install git+https://github.com/ROCm/tritonBLAS.git@47768c93acb7f89511d797964b84544c30ab81ad 2>&1 | tail -3
fi
cd /iris_workspace
pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
wget -O test_iris_distributed.py https://gist.githubusercontent.com/mawad-amd/6375dc078e39e256828f379e03310ec7/raw/a527c3192bee4615292769e340b1c73676f6945a/test_iris_distributed.py
python test_iris_distributed.py
"
echo "::endgroup::"
echo "✅ External validation test passed!"
external-gluon-validation-test:
name: External Gluon Validation Test
needs: build-container-image
runs-on: [self-hosted, mi3xx]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Cleanup lingering ports before tests
run: |
bash .github/scripts/cleanup_ports.sh
- name: Run External Gluon Validation Test
run: |
set -e
echo "::group::Running external gluon validation test"
bash .github/scripts/container_exec.sh --gpus "0,1" "
set -e
# Install tritonBLAS (required dependency)
echo \"Installing tritonBLAS...\"
if [ ! -d \"/tmp/tritonBLAS\" ]; then
cd /tmp && git clone https://github.com/ROCm/tritonBLAS.git 2>&1 | tail -3
fi
if [ -d \"/tmp/tritonBLAS\" ]; then
cd /tmp/tritonBLAS
git checkout 47768c93acb7f89511d797964b84544c30ab81ad 2>&1 | tail -2
pip install -e . 2>&1 | tail -3
else
echo \"Warning: Could not clone tritonBLAS, trying pip install from git...\"
pip install git+https://github.com/ROCm/tritonBLAS.git@47768c93acb7f89511d797964b84544c30ab81ad 2>&1 | tail -3
fi
cd /iris_workspace
pip install git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
wget -O test_iris_gluon_distributed.py https://gist.githubusercontent.com/mawad-amd/2666dde8ebe2755eb0c4f2108709fcd5/raw/aa567ef3185c37a80d25bc9724ae9589548261b4/test_iris_gluon_distributed.py
python test_iris_gluon_distributed.py
"
echo "::endgroup::"
echo "✅ External gluon validation test passed!"