From e637029091c0f453b160256f84c0b9fb28787f2c Mon Sep 17 00:00:00 2001
From: Jan Gutter <jan.gutter@workday.com>
Date: Thu, 8 Aug 2024 21:04:06 +0100
Subject: [PATCH] Fix k8s-crio buildset registry test

* It looks like zuul-jobs-test-registry-buildset-registry-k8s-crio
  is busted with Ubuntu Jammy + cri-o installed from kubic, with
  errors like https://github.com/cri-o/ocicni/issues/77
  (also, kubic has been wound down and cri-o has been spun off)
* cri-o in Noble uninstalls docker-ce, in a follow-up we should
  clean that up and switch to a pure podman profile
* This minikube configuration is not supported, but it seems that
  upstream cri-o might have made some fixes that makes it work

* Update the job to use Ubuntu Noble instead of Jammy
* Update ensure-podman for Ubuntu Noble
  (podman is now part of the Ubuntu distro)
* Update the cri-o install in ensure-minikube for Ubuntu Noble and later
  (cri-o is now part of k8s)

Other miscellaneous fixes and workarounds:

* k8s.gcr.io is being sunsetted, updated the test image:
  https://kubernetes.io/blog/2023/03/10/image-registry-redirect/
* Relaxed the security to run minikube from /tmp (in future,
  we should set the default to /usr/local/bin)
* Updated the microk8s check-distro task for Noble

Change-Id: I3b0cbac5c72c31577797ba294de8b8c025f8c2c3
---
 .../tasks/crio-Ubuntu-20.04.yaml              | 28 ++++++++++
 .../ensure-kubernetes/tasks/crio-default.yaml | 54 +++++++++++++++----
 roles/ensure-kubernetes/tasks/microk8s.yaml   |  4 +-
 roles/ensure-kubernetes/tasks/minikube.yaml   | 31 +++++++++--
 roles/ensure-podman/tasks/Ubuntu.yaml         | 20 ++-----
 .../use-buildset-registry/defaults/main.yaml  |  1 +
 roles/use-buildset-registry/tasks/main.yaml   | 39 +++++++++++---
 test-playbooks/ensure-kubernetes/post.yaml    |  2 +-
 zuul-tests.d/container-roles-jobs.yaml        | 15 ++----
 9 files changed, 144 insertions(+), 50 deletions(-)
 create mode 100644 roles/ensure-kubernetes/tasks/crio-Ubuntu-20.04.yaml

diff --git a/roles/ensure-kubernetes/tasks/crio-Ubuntu-20.04.yaml b/roles/ensure-kubernetes/tasks/crio-Ubuntu-20.04.yaml
new file mode 100644
index 000000000..bc8c2e0b6
--- /dev/null
+++ b/roles/ensure-kubernetes/tasks/crio-Ubuntu-20.04.yaml
@@ -0,0 +1,28 @@
+- name: Add all repositories
+  include_role:
+    name: ensure-package-repositories
+  vars:
+    repositories_keys:
+      - url: "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/Release.key"
+      - url: "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/1.24/xUbuntu_{{ ansible_distribution_version }}/Release.key"
+    repositories_list:
+      - repo: "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/ /"
+      - repo: "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/1.24/xUbuntu_{{ ansible_distribution_version }}/ /"
+- name: Install packages
+  package:
+    name:
+      - cri-o
+      - cri-o-runc
+      - containernetworking-plugins
+      - podman
+      - cri-tools
+    state: present
+  become: true
+- name: Set crio cgroup driver
+  ini_file:
+    path: /etc/crio/crio.conf
+    section: crio.runtime
+    option: cgroup_manager
+    value: '"cgroupfs"'
+    mode: 0644
+  become: true
diff --git a/roles/ensure-kubernetes/tasks/crio-default.yaml b/roles/ensure-kubernetes/tasks/crio-default.yaml
index bc8c2e0b6..46760257b 100644
--- a/roles/ensure-kubernetes/tasks/crio-default.yaml
+++ b/roles/ensure-kubernetes/tasks/crio-default.yaml
@@ -1,28 +1,62 @@
 - name: Add all repositories
+  # Instructions from here: https://github.com/cri-o/packaging making
+  # the assumption that CRIO_VERSION == KUBERNETES_VERSION
   include_role:
     name: ensure-package-repositories
   vars:
     repositories_keys:
-      - url: "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/Release.key"
-      - url: "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/1.24/xUbuntu_{{ ansible_distribution_version }}/Release.key"
+      - url: "https://pkgs.k8s.io/core:/stable:/{{ ensure_kubernetes_kubectl_version }}/deb/Release.key"
+      - url: "https://pkgs.k8s.io/addons:/cri-o:/stable:/{{ ensure_kubernetes_kubectl_version }}/deb/Release.key"
     repositories_list:
-      - repo: "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/ /"
-      - repo: "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable:/cri-o:/1.24/xUbuntu_{{ ansible_distribution_version }}/ /"
+      - repo: "deb https://pkgs.k8s.io/core:/stable:/{{ ensure_kubernetes_kubectl_version }}/deb/ /"
+      - repo: "deb https://pkgs.k8s.io/addons:/cri-o:/stable:/{{ ensure_kubernetes_kubectl_version }}/deb/ /"
+
 - name: Install packages
   package:
     name:
       - cri-o
-      - cri-o-runc
+      - runc
       - containernetworking-plugins
-      - podman
       - cri-tools
+      - podman
+      - kubernetes-cni
     state: present
   become: true
-- name: Set crio cgroup driver
+
+# The the following two options are recommended from cri-o install notes
+- name: Enable ipv4 forwarding
+  sysctl:
+    name: net.ipv4.ip_forward
+    value: '1'
+    sysctl_set: true
+    state: present
+    reload: true
+  become: true
+
+- name: Load br_netfilter
+  modprobe:
+    name: br_netfilter
+    state: present
+    persistent: present
+  become: true
+
+- name: Find networking plugins
   ini_file:
     path: /etc/crio/crio.conf
-    section: crio.runtime
-    option: cgroup_manager
-    value: '"cgroupfs"'
+    section: crio.network
+    option: plugin_dirs
+    value:
+      - '/opt/cni/bin/'
+      - '/usr/lib/cni'
     mode: 0644
   become: true
+  register: _crio_conf_updated
+
+# NOTE: want to restart here rather than notify and do it later, so
+# that we don't go on without the config correct.
+- name: Restart crio to pickup changes  # noqa no-handler
+  service:
+    name: crio
+    state: restarted
+  become: yes
+  when: _crio_conf_updated.changed
diff --git a/roles/ensure-kubernetes/tasks/microk8s.yaml b/roles/ensure-kubernetes/tasks/microk8s.yaml
index 3b747224e..45d6f218b 100644
--- a/roles/ensure-kubernetes/tasks/microk8s.yaml
+++ b/roles/ensure-kubernetes/tasks/microk8s.yaml
@@ -1,7 +1,7 @@
 - name: Check distro
   assert:
-    that: ansible_distribution_release in ['jammy', 'bookworm']
-    msg: 'This role only supported on Jammy or Bookworm'
+    that: ansible_distribution_release in ['jammy', 'bookworm', 'noble']
+    msg: 'This role is only supported on Jammy or Bookworm or Noble'
 
 - name: Install snapd
   become: yes
diff --git a/roles/ensure-kubernetes/tasks/minikube.yaml b/roles/ensure-kubernetes/tasks/minikube.yaml
index 063358e24..4f18ea713 100644
--- a/roles/ensure-kubernetes/tasks/minikube.yaml
+++ b/roles/ensure-kubernetes/tasks/minikube.yaml
@@ -3,6 +3,16 @@
     path: /tmp/minikube
   register: stat_result
 
+# This is needed because minikube is installed in /tmp
+- name: Disable protections for races in /tmp
+  sysctl:
+    name: fs.protected_regular
+    value: '0'
+    sysctl_set: true
+    state: present
+    reload: true
+  become: true
+
 - name: Download Minikube
   get_url:
     url: https://storage.googleapis.com/minikube/releases/{{ minikube_version }}/minikube-linux-amd64
@@ -17,13 +27,28 @@
     dest: /usr/local/bin/kubectl
     state: link
 
+- name: Get the kubernetes version
+  command: >-
+    /tmp/minikube kubectl --
+    version --client=true --output=json
+  changed_when: False
+  register: ensure_kubernetes_kubectl_version_result
+
+- name: Set the kubernetes version
+  vars:
+    kubectl_version: >-
+      {{ ensure_kubernetes_kubectl_version_result.stdout | from_json }}
+  set_fact:
+    ensure_kubernetes_kubectl_version: >-
+      v{{ kubectl_version['clientVersion']['major'] }}.{{ kubectl_version['clientVersion']['minor'] }}
+
 - name: Run ensure-docker role
   include_role:
     name: ensure-docker
 
-# Ubuntu focal doesn't have cri-o-1.15 packages, per distro tasks is
-# required to install crio
-- name: Install crio
+# Ubuntu doesn't have cri-o packages, per distro tasks is
+# required to install cri-o
+- name: Install cri-o
   # Note this is required even for the docker runtime, as minikube only
   # supports cri now.  See below for the docker wrapper
   include_tasks: "{{ zj_distro_os }}"
diff --git a/roles/ensure-podman/tasks/Ubuntu.yaml b/roles/ensure-podman/tasks/Ubuntu.yaml
index 556ef96ef..de353a593 100644
--- a/roles/ensure-podman/tasks/Ubuntu.yaml
+++ b/roles/ensure-podman/tasks/Ubuntu.yaml
@@ -1,12 +1,3 @@
-- name: Add kubic project repository
-  include_role:
-    name: ensure-package-repositories
-  vars:
-    repositories_keys:
-      - url: "https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/Release.key"
-    repositories_list:
-      - repo: "deb https://download.opensuse.org/repositories/devel:/kubic:/libcontainers:/stable/xUbuntu_{{ ansible_distribution_version }}/ /"
-
 - name: Install podman
   package:
     name:
@@ -15,21 +6,16 @@
       - slirp4netns
       - fuse-overlayfs
       - containernetworking-plugins
+      # This enables container network dns resolution:
+      - golang-github-containernetworking-plugin-dnsname
     state: present
   become: yes
 
-# NOTE(pabelanger): Remove default registries.conf file, so we can manage it
-# ourself. It could have v1 syntax, which doesn't work with v2.
-- name: Remove /etc/containers/registries.conf
-  become: true
-  file:
-    state: absent
-    path: /etc/containers/registries.conf
-
 - name: Create containers config dir
   file:
     path: '{{ ansible_user_dir }}/.config/containers'
     state: directory
+
 - name: Force cgroup manager to cgroupfs for Ubuntu
   copy:
     content: |
diff --git a/roles/use-buildset-registry/defaults/main.yaml b/roles/use-buildset-registry/defaults/main.yaml
index fb0971126..e55374faa 100644
--- a/roles/use-buildset-registry/defaults/main.yaml
+++ b/roles/use-buildset-registry/defaults/main.yaml
@@ -2,3 +2,4 @@ buildset_registry_namespaces:
   - ['docker.io', 'https://registry-1.docker.io']
   - ['quay.io', 'https://quay.io']
   - ['gcr.io', 'https://gcr.io']
+  - ['registry.k8s.io', 'https://registry.k8s.io']
diff --git a/roles/use-buildset-registry/tasks/main.yaml b/roles/use-buildset-registry/tasks/main.yaml
index a2fed6249..e6f17d7e8 100644
--- a/roles/use-buildset-registry/tasks/main.yaml
+++ b/roles/use-buildset-registry/tasks/main.yaml
@@ -79,13 +79,38 @@
     mode: 0644
   become: true
 
-- name: Restart docker daemon
-  service:
-    name: docker
-    state: restarted
-  become: true
-  register: docker_restart
-  failed_when: docker_restart is failed and not 'Could not find the requested service' in docker_restart.msg
+- name: Populate service facts
+  service_facts:
+
+# This is a copy of the logic from the ensure-docker handlers
+- name: Restart docker if it exists
+  block:
+    - name: Stop docker.socket to avoid any conflict
+      become: true
+      service:
+        name: docker.socket
+        enabled: yes
+        state: stopped
+      failed_when: false
+
+    - name: Assure docker service is running
+      become: true
+      service:
+        name: docker
+        enabled: yes
+        state: started
+
+    - name: Assure docker.socket service is running
+      become: true
+      service:
+        name: docker.socket
+        enabled: yes
+        state: started
+      failed_when: false
+  when:
+    # docker-ce may have been uninstalled by cri-o
+    - "'docker.service' in ansible_facts.services"
+    - ansible_facts.services['docker.service']['status'] != 'not-found'
 
 - name: Ensure containers directory exists
   become: yes
diff --git a/test-playbooks/ensure-kubernetes/post.yaml b/test-playbooks/ensure-kubernetes/post.yaml
index 76c5d0bda..cf4cc30dc 100644
--- a/test-playbooks/ensure-kubernetes/post.yaml
+++ b/test-playbooks/ensure-kubernetes/post.yaml
@@ -24,7 +24,7 @@
                 restartPolicy: Never
                 containers:
                   - name: test
-                    image: k8s.gcr.io/pause:3.1
+                    image: registry.k8s.io/pause:3.1
 
         - name: Start pod
           command: kubectl apply -f test-pod.yaml
diff --git a/zuul-tests.d/container-roles-jobs.yaml b/zuul-tests.d/container-roles-jobs.yaml
index 55bc0cf00..85d0053ed 100644
--- a/zuul-tests.d/container-roles-jobs.yaml
+++ b/zuul-tests.d/container-roles-jobs.yaml
@@ -368,6 +368,11 @@
       - test-playbooks/registry/test-registry-post.yaml
     vars:
       container_command: podman
+    # There seems to be flakiness in pre-Noble
+    nodeset:
+      nodes:
+        - name: ubuntu-noble
+          label: ubuntu-noble
 
 - job:
     name: zuul-jobs-test-ensure-kubernetes-crio
@@ -396,15 +401,6 @@
         - name: ubuntu-focal
           label: ubuntu-focal
 
-- job:
-    name: zuul-jobs-test-ensure-kubernetes-crio-ubuntu-jammy
-    description: Test the ensure-kubernetes role with crio-o on ubuntu-jammy
-    parent: zuul-jobs-test-ensure-kubernetes-crio
-    nodeset:
-      nodes:
-        - name: ubuntu-jammy
-          label: ubuntu-jammy
-
 - job:
     name: zuul-jobs-test-ensure-kubernetes-microk8s
     description: |
@@ -564,7 +560,6 @@
         - zuul-jobs-test-registry-buildset-registry-k8s-microk8s
         - zuul-jobs-test-registry-buildset-registry-k8s-crio
         - zuul-jobs-test-ensure-kubernetes-crio-ubuntu-focal
-        - zuul-jobs-test-ensure-kubernetes-crio-ubuntu-jammy
         - zuul-jobs-test-ensure-kubernetes-microk8s-ubuntu-jammy
         - zuul-jobs-test-ensure-kubernetes-microk8s-debian-bookworm
         - zuul-jobs-test-ensure-skopeo-debian-bookworm