diff --git a/.github/workflows/integration-test-backup.yml b/.github/workflows/integration-test-backup.yml new file mode 100644 index 000000000..ffc5fe3af --- /dev/null +++ b/.github/workflows/integration-test-backup.yml @@ -0,0 +1,75 @@ +name: IntegrationTest + +on: workflow_dispatch + +jobs: + IntegrationTest: + runs-on: self-hosted + strategy: + matrix: + container-image: [ghcr.io/microsoft/mscclpp/mscclpp:base-cuda11.8, ghcr.io/microsoft/mscclpp/mscclpp:base-cuda12.1] + + container: + image: ${{ matrix.container-image }} + options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1 + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Install CMake + run: | + curl -L https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.tar.gz -o /tmp/cmake-3.26.4-linux-x86_64.tar.gz + tar xzf /tmp/cmake-3.26.4-linux-x86_64.tar.gz -C /tmp + + - name: Build + run: | + mkdir build && cd build + MPI_HOME=/usr/local/mpi /tmp/cmake-3.26.4-linux-x86_64/bin/cmake -DCMAKE_BUILD_TYPE=Release .. + make -j + + - name: Lock GPU clock frequency + run: | + sudo nvidia-smi -pm 1 + for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do + sudo nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i + done + + - name: Run mscclpp AllGather test + run: | + set -e + export PATH=/usr/local/mpi/bin:$PATH + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allgather_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl + + - name: Run mscclpp SendRecv test + run: | + set -e + export PATH=/usr/local/mpi/bin:$PATH + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/sendrecv_test_perf -b 1K -e 1G -f 2 -o output.jsonl + + - name: Run mscclpp AllReduce test + run: | + set -e + export PATH=/usr/local/mpi/bin:$PATH + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 2 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 3 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allreduce_test_perf -b 1K -e 1G -f 2 -k 4 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allreduce_test_perf -b 12M -e 48M -i 3145728 2 -k 5 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/allreduce_test_perf -b 24K -e 768K -i 24576 -k 6 -w 100 -n 100 -o output.jsonl + + - name: Run mscclpp AllToAll test + run: | + set -e + export PATH=/usr/local/mpi/bin:$PATH + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -o output.jsonl + mpirun -np 8 --bind-to numa -x MSCCLPP_DEBUG=WARN ./build/test/mscclpp-test/alltoall_test_perf -b 1K -e 1G -f 2 -k 1 -o output.jsonl + + - name: Check collective primitives performance + run: | + set -e + python3 test/mscclpp-test/check_perf_result.py --perf-file output.jsonl --baseline-file test/deploy/perf_ndmv4.jsonl diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index f06e1272d..9cdcf443d 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -38,12 +38,8 @@ jobs: - name: Install Python dependencies run: python3.8 -m pip install black - - name: Run linters - uses: wearerequired/lint-action@v2 - with: - black: true - black_auto_fix: false - black_args: "--config pyproject.toml --check" + - name: Run black + run: python3.8 -m black --check --config pyproject.toml . spelling: runs-on: ubuntu-20.04 diff --git a/.github/workflows/ut-backup.yml b/.github/workflows/ut-backup.yml new file mode 100644 index 000000000..736c800e7 --- /dev/null +++ b/.github/workflows/ut-backup.yml @@ -0,0 +1,63 @@ +name: UnitTest + +on: workflow_dispatch + +jobs: + UnitTest: + runs-on: self-hosted + timeout-minutes: 30 + strategy: + matrix: + container-image: [ghcr.io/microsoft/mscclpp/mscclpp:base-cuda11.8, ghcr.io/microsoft/mscclpp/mscclpp:base-cuda12.1] + + container: + image: ${{ matrix.container-image }} + options: --privileged --ipc=host --gpus=all --ulimit memlock=-1:-1 + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Build + run: | + curl -L -C- https://github.com/Kitware/CMake/releases/download/v3.26.4/cmake-3.26.4-linux-x86_64.tar.gz -o /tmp/cmake-3.26.4-linux-x86_64.tar.gz + tar xzf /tmp/cmake-3.26.4-linux-x86_64.tar.gz -C /tmp + mkdir build && cd build + MPI_HOME=/usr/local/mpi /tmp/cmake-3.26.4-linux-x86_64/bin/cmake -DCMAKE_BUILD_TYPE=Release .. + make -j + working-directory: ${{ github.workspace }} + + - name: LockGPUClock + run: | + sudo nvidia-smi -pm 1 + for i in $(seq 0 $(( $(nvidia-smi -L | wc -l) - 1 ))); do + sudo nvidia-smi -ac $(nvidia-smi --query-gpu=clocks.max.memory,clocks.max.sm --format=csv,noheader,nounits -i $i | sed 's/\ //') -i $i + done + working-directory: ${{ github.workspace }} + + - name: UnitTests + run: | + ./build/test/unit_tests + working-directory: ${{ github.workspace }} + + - name: MpUnitTests + run: | + set -e + export PATH=/usr/local/mpi/bin:$PATH + mpirun -tag-output -np 2 ./build/test/mp_unit_tests + mpirun -tag-output -np 4 ./build/test/mp_unit_tests + mpirun -tag-output -np 8 ./build/test/mp_unit_tests + working-directory: ${{ github.workspace }} + + - name: PyTests + run: | + set -e + export PATH=/usr/local/mpi/bin:$PATH + cd build && make pylib-copy + if [[ '${{ matrix.container-image }}' == *'cuda11'* ]]; then + pip3 install -r ../python/test/requirements_cu11.txt + else + pip3 install -r ../python/test/requirements_cu12.txt + fi + mpirun -tag-output -np 8 ~/.local/bin/pytest ../python/test/test_mscclpp.py -x + working-directory: ${{ github.workspace }}