Skip to content

Commit 92433af

Browse files
committed
Add k8s-admin user
Create a non-root k8s-admin account on instances and update Terraform/Ansible to use it. Signed-off-by: Sudharshan Muralidharan <sudharshan.muralidharan1@ibm.com>
1 parent 92b4aee commit 92433af

File tree

2 files changed

+124
-8
lines changed

2 files changed

+124
-8
lines changed

kubetest2-tf/data/vpc/main.tf

Lines changed: 98 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,25 @@ resource "ibm_is_instance_template" "node_template" {
3737
subnet = local.subnet_id
3838
security_groups = [local.security_group_id]
3939
}
40+
41+
user_data = <<-EOT
42+
#cloud-config
43+
users:
44+
- default
45+
- name: k8s-admin
46+
shell: /bin/bash
47+
sudo: ALL=(ALL) NOPASSWD:ALL
48+
groups: [sudo]
49+
ssh_authorized_keys:
50+
- ${data.ibm_is_ssh_key.ssh_key.public_key}
51+
runcmd:
52+
- |
53+
# Ensure k8s-admin SSH dir has correct permissions
54+
mkdir -p /home/k8s-admin/.ssh
55+
chown -R k8s-admin:k8s-admin /home/k8s-admin/.ssh
56+
chmod 700 /home/k8s-admin/.ssh
57+
chmod 600 /home/k8s-admin/.ssh/authorized_keys
58+
EOT
4059
}
4160

4261
module "master" {
@@ -59,32 +78,104 @@ module "workers" {
5978
}
6079

6180
resource "null_resource" "wait-for-master-completes" {
81+
depends_on = [module.master]
82+
83+
# First wait for cloud-init to complete using root user (still available during boot)
84+
provisioner "local-exec" {
85+
command = <<-EOT
86+
max_attempts=60
87+
attempt=0
88+
while [ $attempt -lt $max_attempts ]; do
89+
# Try k8s-admin first (root SSH is disabled on new IBM Cloud VPC-VSIs)
90+
if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
91+
-i ${var.ssh_private_key} k8s-admin@${module.master.public_ip} \
92+
"sudo cloud-init status --wait" 2>/dev/null; then
93+
echo "Cloud-init completed on master (via k8s-admin)"
94+
break
95+
fi
96+
# Fallback to root for older images that still have root SSH enabled
97+
if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
98+
-i ${var.ssh_private_key} root@${module.master.public_ip} \
99+
"cloud-init status --wait" 2>/dev/null; then
100+
echo "Cloud-init completed on master (via root)"
101+
break
102+
fi
103+
attempt=$((attempt + 1))
104+
echo "Waiting for cloud-init on master (attempt $attempt/$max_attempts)..."
105+
sleep 10
106+
done
107+
if [ $attempt -eq $max_attempts ]; then
108+
echo "ERROR: Timed out waiting for cloud-init on master"
109+
exit 1
110+
fi
111+
EOT
112+
}
113+
114+
# Then verify k8s-admin user is accessible
62115
connection {
63116
type = "ssh"
64-
user = "root"
117+
user = "k8s-admin"
65118
host = module.master.public_ip
66119
private_key = file(var.ssh_private_key)
67-
timeout = "20m"
120+
timeout = "5m"
68121
}
69122
provisioner "remote-exec" {
70123
inline = [
71-
"cloud-init status -w"
124+
"echo 'k8s-admin user is ready on master'"
72125
]
73126
}
74127
}
75128

76129
resource "null_resource" "wait-for-workers-completes" {
77-
count = var.workers_count
130+
count = var.workers_count
131+
depends_on = [module.workers]
132+
133+
# First wait for cloud-init to complete using root user (still available during boot)
134+
provisioner "local-exec" {
135+
command = <<-EOT
136+
max_attempts=60
137+
attempt=0
138+
worker_ip="${module.workers[count.index].public_ip}"
139+
worker_index="${count.index}"
140+
ssh_key="${var.ssh_private_key}"
141+
142+
while [ $attempt -lt $max_attempts ]; do
143+
# Try k8s-admin first (root SSH is disabled on new IBM Cloud VPC-VSIs)
144+
if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
145+
-i "$ssh_key" k8s-admin@"$worker_ip" \
146+
"sudo cloud-init status --wait" 2>/dev/null; then
147+
echo "Cloud-init completed on worker $worker_index (via k8s-admin)"
148+
break
149+
fi
150+
# Fallback to root for older images that still have root SSH enabled
151+
if ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=5 \
152+
-i "$ssh_key" root@"$worker_ip" \
153+
"cloud-init status --wait" 2>/dev/null; then
154+
echo "Cloud-init completed on worker $worker_index (via root)"
155+
break
156+
fi
157+
attempt=$((attempt + 1))
158+
echo "Waiting for cloud-init on worker $worker_index (attempt $attempt/$max_attempts)..."
159+
sleep 10
160+
done
161+
if [ $attempt -eq $max_attempts ]; then
162+
echo "ERROR: Timed out waiting for cloud-init on worker $worker_index"
163+
exit 1
164+
fi
165+
EOT
166+
}
167+
168+
# Then verify k8s-admin user is accessible
78169
connection {
79170
type = "ssh"
80-
user = "root"
171+
user = "k8s-admin"
81172
host = module.workers[count.index].public_ip
82173
private_key = file(var.ssh_private_key)
83-
timeout = "15m"
174+
timeout = "5m"
84175
}
85176
provisioner "remote-exec" {
86177
inline = [
87-
"cloud-init status -w"
178+
"echo 'k8s-admin user is ready on worker ${count.index}'"
88179
]
89180
}
90181
}

kubetest2-tf/deployer/deployer.go

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ const (
5050
[workers]
5151
{{range .Workers}}{{.}}
5252
{{end}}
53+
{{if .IsVPC}}
54+
[masters:vars]
55+
ansible_user=k8s-admin
56+
ansible_become=true
57+
ansible_become_method=sudo
58+
59+
[workers:vars]
60+
ansible_user=k8s-admin
61+
ansible_become=true
62+
ansible_become_method=sudo
63+
{{end}}
5364
`
5465
)
5566

@@ -58,6 +69,7 @@ var GitTag string
5869
type AnsibleInventory struct {
5970
Masters []string
6071
Workers []string
72+
IsVPC bool
6173
}
6274

6375
// Add additional Linux package dependencies here, used by checkDependencies()
@@ -269,7 +281,9 @@ func (d *deployer) Up() error {
269281
break
270282
}
271283
}
272-
inventory := AnsibleInventory{}
284+
inventory := AnsibleInventory{
285+
IsVPC: d.TargetProvider == "vpc",
286+
}
273287
tfMetaOutput, err := terraform.Output(d.tmpDir, d.TargetProvider)
274288
if err != nil {
275289
return err
@@ -343,6 +357,17 @@ func (d *deployer) Up() error {
343357

344358
// Add-in the extra-vars set to the final set.
345359
maps.Insert(combinedAnsibleVars, maps.All(d.ExtraVars))
360+
361+
// For VPC deployments, override ansible_user to k8s-admin since IBM Cloud
362+
// has disabled root SSH access for new VPC-VSI instances.
363+
// Extra-vars have the highest precedence in Ansible, so this overrides
364+
// the ansible_user: root in group_vars/all (shared with PowerVS).
365+
if d.TargetProvider == "vpc" {
366+
combinedAnsibleVars["ansible_user"] = "k8s-admin"
367+
combinedAnsibleVars["ansible_become"] = "true"
368+
combinedAnsibleVars["ansible_become_method"] = "sudo"
369+
}
370+
346371
klog.Infof("Updated ansible variables with extra vars: %+v", combinedAnsibleVars)
347372
if err = ansible.Playbook(d.tmpDir, filepath.Join(d.tmpDir, "hosts"), d.Playbook, combinedAnsibleVars); err != nil {
348373
return fmt.Errorf("failed to run ansible playbook: %v", err)

0 commit comments

Comments
 (0)