Skip to content

Commit 4a0bcc0

Browse files
authored
Merge pull request #926 from JuliaParallel/eschnett/CI
Update CI
2 parents b06cb8c + 9fd579e commit 4a0bcc0

6 files changed

Lines changed: 124 additions & 89 deletions

File tree

.buildkite/pipeline.yml

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,24 @@
77
queue: "juliagpu"
88
cuda: "*"
99
env:
10+
# This is broken for OpenMPI 5 and Julia 1.12, so we stick with OpenMPI 4
1011
OPENMPI_VER: "4.1"
11-
OPENMPI_VER_FULL: "4.1.4"
12-
UCX_VER: "1.12.1"
12+
OPENMPI_VER_FULL: "4.1.8"
13+
# OPENMPI_VER: "5.0"
14+
# OPENMPI_VER_FULL: "5.0.9"
15+
UCX_VER: "1.19.1"
1316
CCACHE_DIR: "/root/ccache"
1417
commands: |
1518
echo "--- Install packages"
1619
apt-get install --yes --no-install-recommends curl ccache
17-
export PATH="/usr/lib/ccache/:$$PATH"
20+
export PATH="/usr/lib/ccache:$$PATH"
1821
1922
echo "--- Build UCX"
2023
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
2124
tar -zxf ucx.tar.gz
2225
pushd ucx-*
2326
./configure --with-cuda=/usr/local/cuda --enable-mt --prefix=$$(realpath ../mpi-prefix)
24-
make -j
27+
make -j $(nproc)
2528
make install
2629
popd
2730
@@ -30,7 +33,7 @@
3033
tar -zxf openmpi.tar.gz
3134
pushd openmpi-$${OPENMPI_VER_FULL}
3235
./configure --with-ucx=$$(realpath ../mpi-prefix) --with-cuda=/usr/local/cuda --prefix=$$(realpath ../mpi-prefix)
33-
make -j
36+
make -j $(nproc)
3437
make install
3538
popd
3639
@@ -48,12 +51,8 @@
4851
matrix:
4952
setup:
5053
version:
51-
- "1.6"
52-
- "1.7"
53-
- "1.8"
54-
- "1.9"
5554
- "1.10"
56-
- "1.11"
55+
- "1.12"
5756
concurrency: 1
5857
concurrency_group: mpi_cuda
5958
plugins:
@@ -109,21 +108,25 @@
109108
queue: "juliagpu"
110109
rocm: "*"
111110
env:
111+
# This is broken for OpenMPI 5 and Julia 1.12.
112+
# It is broken for OpenMPI 4 for all versions of Julia. So we use OpenMPI 5 and skip Julia 1.12
112113
OPENMPI_VER: "5.0"
113-
OPENMPI_VER_FULL: "5.0.3"
114-
UCX_VER: "1.17.0"
114+
OPENMPI_VER_FULL: "5.0.9"
115+
# OPENMPI_VER: "4.1"
116+
# OPENMPI_VER_FULL: "4.1.8"
117+
UCX_VER: "1.19.1"
115118
CCACHE_DIR: "/root/ccache"
116119
commands: |
117120
echo "--- Install packages"
118121
apt-get install --yes --no-install-recommends curl ccache
119-
export PATH="/usr/lib/ccache/:$$PATH"
122+
export PATH="/usr/lib/ccache:$$PATH"
120123
121124
echo "--- Build UCX"
122125
curl -L https://github.com/openucx/ucx/releases/download/v$${UCX_VER}/ucx-$${UCX_VER}.tar.gz --output ucx.tar.gz
123126
tar -zxf ucx.tar.gz
124127
pushd ucx-*
125128
./configure --with-rocm --enable-mt --prefix=$$(realpath ../mpi-prefix)
126-
make -j
129+
make -j $(nproc)
127130
make install
128131
popd
129132
@@ -132,7 +135,7 @@
132135
tar -zxf openmpi.tar.gz
133136
pushd openmpi-$${OPENMPI_VER_FULL}
134137
./configure --with-ucx=$$(realpath ../mpi-prefix) --with-rocm --prefix=$$(realpath ../mpi-prefix)
135-
make -j
138+
make -j $(nproc)
136139
make install
137140
popd
138141
@@ -152,6 +155,14 @@
152155
version:
153156
- "1.10"
154157
- "1.11"
158+
#
159+
# Skip the test with Julia 1.12 because it segfaults while installing packages:
160+
# [amdgpu1:516 :0:516] Caught signal 11 (Segmentation fault: invalid permissions for mapped object at address 0x7ee1a9ec5000)
161+
# ==== backtrace (tid: 516) ====
162+
# 0 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(ucs_handle_error+0x2e4) [0x7ee18c9bc4d4]
163+
# 1 /var/lib/buildkite-agent/builds/gpuci-9/julialang/mpi-dot-jl/openmpi/lib/libucs.so.0(+0x3b6ca) [0x7ee18c9bc6ca]
164+
#
165+
# - "1.12"
155166
concurrency: 1
156167
concurrency_group: mpi_rocm
157168
plugins:

.github/workflows/UnitTests.yml

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,19 @@ jobs:
3333
strategy:
3434
matrix:
3535
os:
36-
- macos-13
36+
- macos-15-intel
3737
- ubuntu-latest
3838
- windows-latest
3939
julia_version:
40-
- "1.6"
40+
- "min"
4141
- "1"
4242
- "nightly"
4343
julia_arch: [x64, x86]
4444
exclude:
45-
- os: macos-13
45+
- os: macos-15-intel
4646
julia_arch: x86
4747
include:
48-
- os: macos-14
48+
- os: macos-15
4949
julia_arch: "aarch64"
5050
julia_version: "1"
5151

@@ -88,10 +88,10 @@ jobs:
8888
strategy:
8989
matrix:
9090
os:
91-
- macos-13
91+
- macos-15-intel
9292
- ubuntu-latest
9393
julia_version:
94-
- "1.6"
94+
- "min"
9595
- "1"
9696
- "nightly"
9797
julia_arch: [x64]
@@ -139,8 +139,8 @@ jobs:
139139
strategy:
140140
matrix:
141141
os:
142-
- macos-13
143-
- macos-14
142+
- macos-15
143+
- macos-15-intel
144144
mpi:
145145
- mpich
146146
- openmpi
@@ -150,9 +150,9 @@ jobs:
150150
- "x64"
151151
- "aarch64"
152152
exclude:
153-
- os: macos-13
153+
- os: macos-15-intel
154154
julia_arch: "aarch64"
155-
- os: macos-14
155+
- os: macos-15
156156
julia_arch: "x64"
157157

158158
fail-fast: false
@@ -344,18 +344,18 @@ jobs:
344344
strategy:
345345
matrix:
346346
os:
347-
- macos-13
347+
- macos-15-intel
348348
- ubuntu-latest
349349
mpi: [mpitrampoline]
350350
julia_version:
351-
- "1.6"
351+
- "min"
352352
- "1"
353353
- "nightly"
354354
julia_arch:
355355
- x64
356356
- x86
357357
exclude:
358-
- os: macos-13
358+
- os: macos-15-intel
359359
julia_arch: x86
360360

361361
fail-fast: false
@@ -556,6 +556,8 @@ jobs:
556556
MV2_SMP_USE_CMA: 0
557557
# Work around issue with affinity not set. Ref:
558558
# https://github.com/JuliaParallel/MPI.jl/pull/810#issuecomment-1920255386
559+
# MVAPICH 2 and 3 use different environment variables; set both.
560+
MV2_ENABLE_AFFINITY: 0
559561
MVP_ENABLE_AFFINITY: 0
560562

561563
steps:

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ PrecompileTools = "1.0.1"
3333
Requires = "~0.5, 1.0"
3434
Serialization = "1"
3535
Sockets = "1"
36-
julia = "1.6"
36+
julia = "1.10"
3737

3838
[extensions]
3939
AMDGPUExt = "AMDGPU"

test/runtests.jl

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,18 @@ testfiles = sort(filter(istest, readdir(testdir)))
112112
""" exception=(e, catch_backtrace())
113113
@test_broken false
114114
end
115+
elseif f == "test_cooperative_wait.jl" && Sys.iswindows()
116+
# This test is broken on Windows. We don't know why.
117+
try
118+
run(cmd())
119+
catch e
120+
@error """
121+
$(f) tests failed. Thsi may be because the Windows MPI implementation is quite old;
122+
it appears unsupported and has not seen bug fixes for a long time.
123+
See the full error message for more details. Some messages may have been written above.
124+
""" exception=(e, catch_backtrace())
125+
@test_broken false
126+
end
115127
else
116128
# MPI_Reduce with MPICH 3.4.2 on macOS when root != 0 and
117129
# when recvbuf == C_NULL segfaults

test/test_cooperative_wait.jl

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,42 @@
11
# tests for the various kinds of waits
22
include("common.jl")
33

4-
MPI.Init(threadlevel=:multiple)
4+
provided = MPI.Init(threadlevel=:multiple)
55

6-
myrank = MPI.Comm_rank(MPI.COMM_WORLD)
7-
commsize = MPI.Comm_rank(MPI.COMM_WORLD)
6+
if provided >= MPI.ThreadLevel(:multiple)
87

9-
nsends = 2
10-
send_arr = [ArrayType{Int}([i]) for i = 1:nsends]
11-
recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends]
12-
synchronize()
8+
myrank = MPI.Comm_rank(MPI.COMM_WORLD)
9+
commsize = MPI.Comm_size(MPI.COMM_WORLD)
1310

14-
send_check = zeros(Int, nsends)
15-
recv_check = zeros(Int, nsends)
11+
nsends = 2
12+
send_arr = [ArrayType{Int}([i]) for i = 1:nsends]
13+
recv_arr = [ArrayType{Int}(undef,1) for i = 1:nsends]
14+
synchronize()
1615

17-
@sync for i = 1:nsends
18-
Threads.@spawn begin
19-
recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i)
20-
wait(recv_req)
21-
@test MPI.isnull(recv_req)
22-
recv_check[i] += 1
23-
end
24-
Threads.@spawn begin
25-
send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i)
26-
wait(send_req)
27-
@test MPI.isnull(send_req)
28-
send_check[i] += 1
16+
send_check = zeros(Int, nsends)
17+
recv_check = zeros(Int, nsends)
18+
19+
@sync for i = 1:nsends
20+
Threads.@spawn begin
21+
recv_req = MPI.Irecv!(recv_arr[i], MPI.COMM_WORLD; source=myrank, tag=i)
22+
wait(recv_req)
23+
@test MPI.isnull(recv_req)
24+
recv_check[i] += 1
25+
end
26+
Threads.@spawn begin
27+
send_req = MPI.Isend(send_arr[i], MPI.COMM_WORLD; dest=myrank, tag=i)
28+
wait(send_req)
29+
@test MPI.isnull(send_req)
30+
send_check[i] += 1
31+
end
2932
end
30-
end
3133

32-
@test recv_check == ones(Int, nsends)
33-
@test send_check == ones(Int, nsends)
34+
@test recv_check == ones(Int, nsends)
35+
@test send_check == ones(Int, nsends)
36+
@test all(Array(send_arr[i]) == [i] for i = 1:nsends)
37+
@test all(Array(recv_arr[i]) == [i] for i = 1:nsends)
38+
39+
end
3440

3541
MPI.Barrier(MPI.COMM_WORLD)
3642
MPI.Finalize()

0 commit comments

Comments
 (0)