@@ -37,6 +37,25 @@ resource "ibm_is_instance_template" "node_template" {
3737 subnet = local. subnet_id
3838 security_groups = [local . security_group_id ]
3939 }
40+
41+ user_data = <<- EOT
42+ #cloud-config
43+ users:
44+ - default
45+ - name: k8s-admin
46+ shell: /bin/bash
47+ sudo: ALL=(ALL) NOPASSWD:ALL
48+ groups: [sudo]
49+ ssh_authorized_keys:
50+ - ${ data . ibm_is_ssh_key . ssh_key . public_key }
51+ runcmd:
52+ - |
53+ # Ensure k8s-admin SSH dir has correct permissions
54+ mkdir -p /home/k8s-admin/.ssh
55+ chown -R k8s-admin:k8s-admin /home/k8s-admin/.ssh
56+ chmod 700 /home/k8s-admin/.ssh
57+ chmod 600 /home/k8s-admin/.ssh/authorized_keys
58+ EOT
4059}
4160
4261module "master" {
@@ -59,32 +78,104 @@ module "workers" {
5978}
6079
6180resource "null_resource" "wait-for-master-completes" {
81+ depends_on = [module . master ]
82+
83+ # First wait for cloud-init to complete using root user (still available during boot)
84+ provisioner "local-exec" {
85+ command = <<- EOT
86+ max_attempts=60
87+ attempt=0
88+ while [ $attempt -lt $max_attempts ]; do
89+ # Try k8s-admin first (root SSH is disabled on new IBM Cloud VPC-VSIs)
90+ if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
91+ -i ${ var . ssh_private_key } k8s-admin@${ module . master . public_ip } \
92+ "sudo cloud-init status --wait" 2>/dev/null; then
93+ echo "Cloud-init completed on master (via k8s-admin)"
94+ break
95+ fi
96+ # Fallback to root for older images that still have root SSH enabled
97+ if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
98+ -i ${ var . ssh_private_key } root@${ module . master . public_ip } \
99+ "cloud-init status --wait" 2>/dev/null; then
100+ echo "Cloud-init completed on master (via root)"
101+ break
102+ fi
103+ attempt=$((attempt + 1))
104+ echo "Waiting for cloud-init on master (attempt $attempt/$max_attempts)..."
105+ sleep 10
106+ done
107+ if [ $attempt -eq $max_attempts ]; then
108+ echo "ERROR: Timed out waiting for cloud-init on master"
109+ exit 1
110+ fi
111+ EOT
112+ }
113+
114+ # Then verify k8s-admin user is accessible
62115 connection {
63116 type = " ssh"
64- user = " root "
117+ user = " k8s-admin "
65118 host = module. master . public_ip
66119 private_key = file (var. ssh_private_key )
67- timeout = " 20m "
120+ timeout = " 5m "
68121 }
69122 provisioner "remote-exec" {
70123 inline = [
71- " cloud-init status -w "
124+ " echo 'k8s-admin user is ready on master' "
72125 ]
73126 }
74127}
75128
76129resource "null_resource" "wait-for-workers-completes" {
77- count = var. workers_count
130+ count = var. workers_count
131+ depends_on = [module . workers ]
132+
133+ # First wait for cloud-init to complete using root user (still available during boot)
134+ provisioner "local-exec" {
135+ command = <<- EOT
136+ max_attempts=60
137+ attempt=0
138+ worker_ip="${ module . workers [count . index ]. public_ip } "
139+ worker_index="${ count . index } "
140+ ssh_key="${ var . ssh_private_key } "
141+
142+ while [ $attempt -lt $max_attempts ]; do
143+ # Try k8s-admin first (root SSH is disabled on new IBM Cloud VPC-VSIs)
144+ if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
145+ -i "$ssh_key" k8s-admin@"$worker_ip" \
146+ "sudo cloud-init status --wait" 2>/dev/null; then
147+ echo "Cloud-init completed on worker $worker_index (via k8s-admin)"
148+ break
149+ fi
150+ # Fallback to root for older images that still have root SSH enabled
151+ if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
152+ -i "$ssh_key" root@"$worker_ip" \
153+ "cloud-init status --wait" 2>/dev/null; then
154+ echo "Cloud-init completed on worker $worker_index (via root)"
155+ break
156+ fi
157+ attempt=$((attempt + 1))
158+ echo "Waiting for cloud-init on worker $worker_index (attempt $attempt/$max_attempts)..."
159+ sleep 10
160+ done
161+ if [ $attempt -eq $max_attempts ]; then
162+ echo "ERROR: Timed out waiting for cloud-init on worker $worker_index"
163+ exit 1
164+ fi
165+ EOT
166+ }
167+
168+ # Then verify k8s-admin user is accessible
78169 connection {
79170 type = " ssh"
80- user = " root "
171+ user = " k8s-admin "
81172 host = module. workers [count . index ]. public_ip
82173 private_key = file (var. ssh_private_key )
83- timeout = " 15m "
174+ timeout = " 5m "
84175 }
85176 provisioner "remote-exec" {
86177 inline = [
87- " cloud-init status -w "
178+ " echo 'k8s-admin user is ready on worker ${ count . index } ' "
88179 ]
89180 }
90181}
0 commit comments