Tag Archives: k8s

Readiness probe failed: calico/node is not ready: BIRD is not ready: Error querying BIRD: unable to

Installed calico using the tigera-operator method and reported an error after startup, all calico related pods show CrashLoopBackoff.

kubectl -n calico-system describe pod calico-node-2t8w6 and found the following error.

Readiness probe failed: calico/node is not ready: BIRD is not ready: Error querying BIRD: unable to connect to BIRDv4 socket: dial unix /var/ run/calico/ bird.ctl: connect: no such file or directory.

Cause of the problem:

We are experiencing this issue during a Kubernetes Cluster deployment. Since Calico automatically detects IP addresses by default using the first-found method and gets the wrong address, we need to specify the detection method manually.

1. Remove all the claico

kubectl -n tigera-operator get deployments.apps -o yaml > a.yaml
kubectl -n calico-system get daemonsets.apps calico-node -o yaml > b.yaml
kubectl -n calico-system get deployments.apps calico-kube-controllers -o yaml > c.yaml
kubectl -n calico-system get deployments.apps calico-typha -o yaml > d.yaml
kubectl -n calico-apiserver get deployments.apps calico-apiserver -o yaml > e.yaml
kubectl delete -f a.yaml
kubectl delete -f b.yaml
kubectl delete -f c.yaml
kubectl delete -f d.yaml
kubectl delete -f e.yaml
2. Remove custom-resources.yaml
kubectl delete -f tigera-operator.yaml
kubectl delete -f custom-resources.yaml

3. Remove vxlan.calico
ip link delete vxlan.calico

4. Modify custom-resources.yaml file and add nodeAddressAutodetectionV4:
# This section includes base Calico installation configuration.
# For more information, see: https://projectcalico.docs.tigera.io/v3.23/reference/installation/api#operator.tigera.io/v1.Installation
apiVersion: operator.tigera.io/v1
kind: Installation
metadata:
name: default
spec:
# Configures Calico networking.
calicoNetwork:
# Note: The ipPools section cannot be modified post-install.
#bgp: Enabled
#hostPorts: Enabled
ipPools:
– blockSize: 26
cidr: 10.244.0.0/16
encapsulation: VXLANCrossSubnet
natOutgoing: Enabled
nodeSelector: all()
#linuxDataplane: Iptables
#multiInterfaceMode: None
nodeAddressAutodetectionV4:
interface: ens.*

# This section configures the Calico API server.
# For more information, see: https://projectcalico.docs.tigera.io/v3.23/reference/installation/api#operator.tigera.io/v1.APIServer
apiVersion: operator.tigera.io/v1
kind: APIServer
metadata:
name: default
spec: {}
5. Re-create
kubectl create -f tigera-operator.yaml
kubectl create -f custom-resources.yaml
check
kubectl -n calico-system get daemonsets.apps calico-node  -o yaml|grep -A2 IP_AUTODETECTION_METHOD

[ERROR Swap]: running with swap on is not supported. Please disable swap

Failed to install kubeadm, report the following error as below:

[root@k8s1 yum.repos.d]# kubeadm init   –apiserver-advertise-address=192.168.12.10   –image-repository registry.aliyuncs.com/google_containers   –kubernetes-version v1.18.0   –service-cidr=10.96.0.0/12   –pod-network-cidr=10.244.0.0/16
W0928 15:17:23.161858    1999 configset.go:202] WARNING: kubeadm cannot validate component configs for API groups [kubelet.config.k8s.io kubeproxy.config.k8s.io]
[init] Using Kubernetes version: v1.18.0
[preflight] Running pre-flight checks
[WARNING IsDockerSystemdCheck]: detected “cgroupfs” as the Docker cgroup driver. The recommended driver is “systemd”. Please follow the guide at https://kubernetes.io/docs/setup/cri/
error execution phase preflight: [preflight] Some fatal errors occurred:
[ERROR Swap]: running with swap on is not supported. Please disable swap
[preflight] If you know what you are doing, you can make a check non-fatal with `–ignore-preflight-errors=…`
To see the stack trace of this error execute with –v=5 or higher

How to Solve:

Need to turn off swap in linux

# Turn off swap, run both commands to solve the problem
swapoff -a # temporary
sed -ri ‘s/. *swap.*/#&/’ /etc/fstab # permanent

 

[Solved] kubectl top pod error: error: Metrics API not available

k8s version: v1.24.4

kubectl top pod error: error: Metrics API not available
Error: Readiness probe failed: HTTP probe failed with statuscode: 500
vim custom-resources.yaml

apiVersion: v1
kind: ServiceAccount
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
    rbac.authorization.k8s.io/aggregate-to-admin: "true"
    rbac.authorization.k8s.io/aggregate-to-edit: "true"
    rbac.authorization.k8s.io/aggregate-to-view: "true"
  name: system:aggregated-metrics-reader
rules:
- apiGroups:
  - metrics.k8s.io
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
rules:
- apiGroups:
  - ""
  resources:
  - nodes/metrics
  verbs:
  - get
- apiGroups:
  - ""
  resources:
  - pods
  - nodes
  verbs:
  - get
  - list
  - watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server-auth-reader
  namespace: kube-system
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: extension-apiserver-authentication-reader
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server:system:auth-delegator
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:auth-delegator
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  labels:
    k8s-app: metrics-server
  name: system:metrics-server
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: system:metrics-server
subjects:
- kind: ServiceAccount
  name: metrics-server
  namespace: kube-system
---
apiVersion: v1
kind: Service
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  ports:
  - name: https
    port: 443
    protocol: TCP
    targetPort: https
  selector:
    k8s-app: metrics-server
---
apiVersion: apps/v1
kind: Deployment
metadata:
  labels:
    k8s-app: metrics-server
  name: metrics-server
  namespace: kube-system
spec:
  selector:
    matchLabels:
      k8s-app: metrics-server
  strategy:
    rollingUpdate:
      maxUnavailable: 0
  template:
    metadata:
      labels:
        k8s-app: metrics-server
    spec:
      containers:
      - args:
        - --cert-dir=/tmp
        - --secure-port=4443
        - --kubelet-preferred-address-types=InternalIP,Hostname,InternalDNS,ExternalDNS,ExternalIP
        - --kubelet-use-node-status-port
        - --metric-resolution=15s
        - --kubelet-insecure-tls
        image: registry.cn-hangzhou.aliyuncs.com/google_containers/metrics-server:v0.6.1
          #image: k8s.gcr.io/metrics-server/metrics-server:v0.6.1
        imagePullPolicy: IfNotPresent
        livenessProbe:
          failureThreshold: 3
          httpGet:
            path: /livez
            port: https
            scheme: HTTPS
          periodSeconds: 10
        name: metrics-server
        ports:
        - containerPort: 4443
          name: https
          protocol: TCP
        readinessProbe:
          failureThreshold: 3
          httpGet:
            path: /readyz
            port: https
            scheme: HTTPS
          initialDelaySeconds: 20
          periodSeconds: 10
        resources:
          requests:
            cpu: 100m
            memory: 200Mi
        securityContext:
          allowPrivilegeEscalation: false
          readOnlyRootFilesystem: true
          runAsNonRoot: true
          runAsUser: 1000
        volumeMounts:
        - mountPath: /tmp
          name: tmp-dir
      nodeSelector:
        kubernetes.io/os: linux
      priorityClassName: system-cluster-critical
      serviceAccountName: metrics-server
      volumes:
      - emptyDir: {}
        name: tmp-dir
---
apiVersion: apiregistration.k8s.io/v1
kind: APIService
metadata:
  labels:
    k8s-app: metrics-server
  name: v1beta1.metrics.k8s.io
spec:
  group: metrics.k8s.io
  groupPriorityMinimum: 100
  insecureSkipTLSVerify: true
  service:
    name: metrics-server
    namespace: kube-system
  version: v1beta1
  versionPriority: 100

#execute
kubectl apply -f custom-resources.yaml
#view pod
kubectl get pod -A |grep me

[Solved] Canal Error: Could not find first log file name in binary log index file

Check /home/admin/canal-server/logs/example/example.log and find the following error:

2022-07-20 00:00:08.473 [destination = example , address = mall-mysql/192.168.38.131:3306 , EventParser] ERROR com.alibaba.otter.canal.common.alarm.LogAlarmHandler - destination:e
xample[java.io.IOException: Received error packet: errno = 1236, sqlstate = HY000 errmsg = Could not find first log file name in binary log index file                             
        at com.alibaba.otter.canal.parse.inbound.mysql.dbsync.DirectLogFetcher.fetch(DirectLogFetcher.java:102)                                                                    
        at com.alibaba.otter.canal.parse.inbound.mysql.MysqlConnection.dump(MysqlConnection.java:238)                                                                              
        at com.alibaba.otter.canal.parse.inbound.AbstractEventParser$1.run(AbstractEventParser.java:262)                                                                           
        at java.lang.Thread.run(Thread.java:748) 

reason:

The binlog file set in the configuration file was not found

Solution:

Because the configuration file of instance.properties is packaged into the docker image, so it can only be modified in the instance

First check the binlog log file name and position in the database

Query in the mall-mysql database of this example:

mysql> show master status;

Output file: File: mysql-binlog.000233, Position: 652645

Enter the instance:

kubectl exec -ti mall-canal-84f6f7d7cc-xbghn bash -n nsName
xxx> vi /home/admin/canal-server/conf/example/instance.properties

Modify the position Info section:

canal.instance.master.address=mall-mysql:3306                                                                                                                                      
canal.instance.master.journal.name=mysql-binlog.000233                                                                                                                             
canal.instance.master.position=652645                                                                                                                                              
canal.instance.master.timestamp=                                                                                                                                                   
canal.instance.master.gtid=

Restart service:

xxx> cd /home/admin/canal-server
xxx> ./restart.sh

Check the log after restart and solve this error.

[Solved] k8s kubeadmin init Error: http://localhost:10248/healthz‘ failed

Error Messages:

[wait-control-plane] Waiting for the kubelet to boot up the control plane as static Pods from directory "/etc/kubernetes/manifests". This can take up to 4m0s
[kubelet-check] Initial timeout of 40s passed.
[kubelet-check] It seems like the kubelet isn't running or healthy.
[kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10248/healthz' failed with error: Get "http://localhost:10248/healthz": dial tcp [::1]:10248: connect: connection refused.
[kubelet-check] It seems like the kubelet isn't running or healthy.
[kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10248/healthz' failed with error: Get "http://localhost:10248/healthz": dial tcp [::1]:10248: connect: connection refused.
[kubelet-check] It seems like the kubelet isn't running or healthy.
[kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10248/healthz' failed with error: Get "http://localhost:10248/healthz": dial tcp [::1]:10248: connect: connection refused.
[kubelet-check] It seems like the kubelet isn't running or healthy.
[kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10248/healthz' failed with error: Get "http://localhost:10248/healthz": dial tcp [::1]:10248: connect: connection refused.
[kubelet-check] It seems like the kubelet isn't running or healthy.
[kubelet-check] The HTTP call equal to 'curl -sSL http://localhost:10248/healthz' failed with error: Get "http://localhost:10248/healthz": dial tcp [::1]:10248: connect: connection refused.
        Unfortunately, an error has occurred:
                timed out waiting for the condition
        This error is likely caused by:
                - The kubelet is not running
                - The kubelet is unhealthy due to a misconfiguration of the node in some way (required cgroups disabled)
        If you are on a systemd-powered system, you can try to troubleshoot the error with the following commands:
                - 'systemctl status kubelet'
                - 'journalctl -xeu kubelet'
        Additionally, a control plane component may have crashed or exited when started by the container runtime.
        To troubleshoot, list all containers using your preferred container runtimes CLI.
        Here is one example how you may list all Kubernetes containers running in docker:
                - 'docker ps -a | grep kube | grep -v pause'
                Once you have found the failing container, you can inspect its logs with:
                - 'docker logs CONTAINERID'
error execution phase wait-control-plane: couldn't initialize a Kubernetes cluster

 

Use the command to find the startup error reason:

systemctl status kubelet -l
kubelet.service - kubelet: The Kubernetes Node Agent
   Loaded: loaded (/usr/lib/systemd/system/kubelet.service; enabled; vendor preset: disabled)
  Drop-In: /usr/lib/systemd/system/kubelet.service.d
           └─10-kubeadm.conf
   Active: activating (auto-restart) (Result: exit-code) since 四 2022-04-14 19:12:05 CST; 7s ago
     Docs: https://kubernetes.io/docs/
  Process: 4796 ExecStart=/usr/bin/kubelet $KUBELET_KUBECONFIG_ARGS $KUBELET_CONFIG_ARGS $KUBELET_KUBEADM_ARGS $KUBELET_EXTRA_ARGS (code=exited, status=1/FAILURE)
 Main PID: 4796 (code=exited, status=1/FAILURE)
4月 14 19:12:05 K8SMASTER01 systemd[1]: kubelet.service: main process exited, code=exited, status=1/FAILURE
4月 14 19:12:05 K8SMASTER01 kubelet[4796]: E0414 19:12:05.862353    4796 server.go:294] "Failed to run kubelet" err="failed to run Kubelet: misconfiguration: kubelet cgroup driver: \"systemd\" is different from docker cgroup driver: \"cgroupfs\""
4月 14 19:12:05 K8SMASTER01 systemd[1]: Unit kubelet.service entered failed state.
4月 14 19:12:05 K8SMASTER01 systemd[1]: kubelet.service failed.

 

Solution:

[root@K8SMASTER01 ~]# cat > /etc/docker/daemon.json <<EOF
> {"exec-opts": ["native.cgroupdriver=systemd"]}
> EOF
[root@K8SMASTER01 ~]# systemctl restart docker

[Solved] Error from server (InternalError): error when creating “ingress.yaml”: Internal error occurred: fail

When using the ingress exposure service, kubectl apply -f ingress.yaml reports the following error.
Reported error:

Error from server (InternalError): error when creating “ingress.yaml”: Internal error occurred: failed calling webhook “validate.nginx.ingress.kubernetes.io”: failed to call webhook: Post “https://ingress-nginx-controller-admission.ingress-nginx.svc:443/networking/v1/ingresses?timeout=10s”: x509: certificate has expired or is not yet valid: current time 2022-03-26T14:45:34Z is before 2022-03-26T20:16:32Z

 

Solution:
Check kubectl apply -f ingress.yaml

kubectl get validatingwebhookconfigurations

Delete ingress-nginx-admission

kubectl delete -A ValidatingWebhookConfiguration ingress-nginx-admission

Then execute

kubectl apply -f ingress.yaml 

[Solved] k8s error retrieving resource lock default/fuseim.pri-ifs: Unauthorized

When helm installed Prometheus, the NFS client provider serviceaccount was arranged in the default namespace and encountered a title problem

[hadoop@hadoop03 NFS]$ vim nfs-rbac.yaml

apiVersion: v1
kind: ServiceAccount
metadata:
  name: nfs-client-provisioner
  #namespace: nfs-client

---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: nfs-client-provisioner-runner
rules:
  - apiGroups: [""]
    resources: ["persistentvolumes"]
    verbs: ["get", "list", "watch", "create", "delete"]
  - apiGroups: [""]
    resources: ["persistentvolumeclaims"]
    verbs: ["get", "list", "watch", "update"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["events"]
    verbs: ["create", "update", "patch"]   ## Deploy to the default namespace to report an error title error
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: run-nfs-client-provisioner
subjects:
  - kind: ServiceAccount
    name: nfs-client-provisioner
    namespace: default
roleRef:
  kind: ClusterRole
  name: nfs-client-provisioner-runner
  apiGroup: rbac.authorization.k8s.io

---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-client-provisioner
  namespace: default
rules:
  - apiGroups: [""]
    resources: ["endpoints"]
    verbs: ["get", "list", "watch", "create", "update", "patch"]

---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: leader-locking-nfs-client-provisioner
  namespace: default
subjects:
  - kind: ServiceAccount
    name: nfs-client-provisioner
    namespace: default
roleRef:
  kind: Role
  name: leader-locking-nfs-client-provisioner
  apiGroup: rbac.authorization.k8s.io


kubectl logs nfs-client-provisioner-764f44f754-wdtqp nfs provider pod

E1206 08:52:27.293890       1 leaderelection.go:234] error retrieving resource lock default/fuseim.pri-ifs: endpoints "fuseim.pri-ifs" is forbidden: User "system:serviceaccount:default:nfs-client-provisioner" cannot get resource "endpoints" in API group "" in the namespace "default"

Modify clusterrole configuration permissions

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
  name: nfs-client-provisioner-runner
rules:
  - apiGroups: [""]
    resources: ["persistentvolumes"]
    verbs: ["get", "list", "watch", "create", "delete"]
  - apiGroups: [""]
    resources: ["persistentvolumeclaims"]
    verbs: ["get", "list", "watch", "update"]
  - apiGroups: ["storage.k8s.io"]
    resources: ["storageclasses"]
    verbs: ["get", "list", "watch"]
  - apiGroups: [""]
    resources: ["events"]
    verbs: ["get", "list", "watch", "create", "update", "patch"] ### 把权限修改为这个(default namespace)

[Solved] Docker failed to start daemon: error initializing graphdriver: driver not supported

When the kubelet node joins, an error VFS not support is reported

[ERROR SystemVerification]: unsupported graph driver: vfs

/etc/docker/daemon.json

{
        "registry-mirrors":["https://registry.docker-cn.com"],
        "bridge":"nufront-br",
        "storage-driver":"devicemapper",   ####
        "exec-opts": ["native.cgroupdriver=systemd"],
        "insecure-registries": ["hadoop03:5000"]
}

###
systemctl daemon-reload
service docker start #Note error initializing graphdriver: driver not supported

reference resources: https://github.com/moby/moby/issues/15651, it is found that the current node downloads the docker CE decompression package and directly configures the service, not through Yum (offline environment…)

#### 

[root@nufront-worker-02 bin]# cd /opt/module/docker/
[root@nufront-worker-02 docker]# ll

-rwxr-xr-x 1 root root 39593864 Nov 23 11:12 containerd
-rwxr-xr-x 1 root root 21508168 Nov 23 11:12 ctr
-rwxr-xr-x 1 root root 60073904 Nov 23 11:12 docker
-rwxr-xr-x 1 root root 78951368 Nov 23 11:12 dockerd
-rwxr-xr-x 1 root root   708616 Nov 23 11:12 docker-init
-rwxr-xr-x 1 root root  2933646 Nov 23 11:12 docker-proxy


Try RPM installation

#######
[root@nufront-worker-02 docker]# ll
total 350072
-rw-r--r-- 1 root root   104408 Nov 23 11:12 audit-libs-2.8.5-4.el7.x86_64.rpm
-rw-r--r-- 1 root root    78256 Nov 23 11:12 audit-libs-python-2.8.5-4.el7.x86_64.rpm
-rwxr-xr-x 1 root root 39593864 Nov 23 11:12 containerd
-rw-r--r-- 1 root root 35130608 Nov 23 11:12 containerd.io-1.4.6-3.1.el7.x86_64.rpm
-rwxr-xr-x 1 root root  7270400 Nov 23 11:12 containerd-shim
-rwxr-xr-x 1 root root  9953280 Nov 23 11:12 containerd-shim-runc-v2
-rw-r--r-- 1 root root    40816 Nov 23 11:12 container-selinux-2.119.2-1.911c772.el7_8.noarch.rpm
-rwxr-xr-x 1 root root 21508168 Nov 23 11:12 ctr
-rwxr-xr-x 1 root root 60073904 Nov 23 11:12 docker
-rw-r--r-- 1 root root 27902344 Nov 23 11:12 docker-ce-20.10.7-3.el7.x86_64 (1).rpm
-rw-r--r-- 1 root root 34717572 Nov 23 11:12 docker-ce-cli-20.10.7-3.el7.x86_64.rpm
-rw-r--r-- 1 root root  9659320 Nov 23 11:12 docker-ce-rootless-extras-20.10.7-3.el7.x86_64.rpm
-rwxr-xr-x 1 root root 78951368 Nov 23 11:12 dockerd
-rwxr-xr-x 1 root root   708616 Nov 23 11:12 docker-init
-rwxr-xr-x 1 root root  2933646 Nov 23 11:12 docker-proxy
-rw-r--r-- 1 root root  4373740 Nov 23 11:12 docker-scan-plugin-0.8.0-3.el7.x86_64.rpm
-rwxr-xr-x 1 root root     1200 Nov 23 11:12 docker.service
-rw-r--r-- 1 root root    83764 Nov 23 11:12 fuse3-libs-3.6.1-4.el7.x86_64.rpm
-rw-r--r-- 1 root root    95424 Nov 23 11:12 fuse-libs-2.9.2-11.el7.x86_64.rpm
-rw-r--r-- 1 root root    55796 Nov 23 11:12 fuse-overlayfs-0.7.2-6.el7_8.x86_64.rpm
-rw-r--r-- 1 root root    67720 Nov 23 11:12 libcgroup-0.41-21.el7.x86_64.rpm
-rw-r--r-- 1 root root   101800 Nov 23 11:12 libcgroup-tools-0.41-21.el7.x86_64.rpm
-rw-r--r-- 1 root root    56824 Nov 23 11:12 libnetfilter_conntrack-1.0.6-1.el7_3.x86_64.rpm
-rw-r--r-- 1 root root    57460 Nov 23 11:12 libseccomp-2.3.1-4.el7.x86_64.rpm
-rw-r--r-- 1 root root   166012 Nov 23 11:12 libselinux-2.5-15.el7.x86_64.rpm
-rw-r--r-- 1 root root   154876 Nov 23 11:12 libselinux-utils-2.5-15.el7.x86_64.rpm
-rw-r--r-- 1 root root   154244 Nov 23 11:12 libsemanage-2.5-14.el7.x86_64.rpm
-rw-r--r-- 1 root root   115284 Nov 23 11:12 libsemanage-python-2.5-14.el7.x86_64.rpm
-rw-r--r-- 1 root root   304196 Nov 23 11:12 libsepol-2.5-10.el7.x86_64.rpm
-rw-r--r-- 1 root root    78740 Nov 23 11:12 libsepol-devel-2.5-10.el7.x86_64 (1).rpm
-rw-r--r-- 1 root root    78740 Nov 23 11:12 libsepol-devel-2.5-10.el7.x86_64.rpm
-rw-r--r-- 1 root root   938736 Nov 23 11:12 policycoreutils-2.5-34.el7.x86_64.rpm
-rw-r--r-- 1 root root   468316 Nov 23 11:12 policycoreutils-python-2.5-34.el7.x86_64.rpm
-rwxr-xr-x 1 root root 14485560 Nov 23 11:12 runc
-rw-r--r-- 1 root root   509568 Nov 23 11:12 selinux-policy-3.13.1-268.el7_9.2.noarch.rpm
-rw-r--r-- 1 root root  7335504 Nov 23 11:12 selinux-policy-targeted-3.13.1-268.el7_9.2.noarch.rpm
-rw-r--r-- 1 root root    83452 Nov 23 11:12 slirp4netns-0.4.3-4.el7_8.x86_64.rpm

[root@nufront-worker-02 docker]# rpm -ivh *.rpm  --nodeps --force 


[root@nufront-worker-02 docker]# yum list installed | grep docker
docker-ce.x86_64                        3:20.10.7-3.el7                installed
docker-ce-cli.x86_64                    1:20.10.7-3.el7                installed
docker-ce-rootless-extras.x86_64        20.10.7-3.el7                  installed
docker-scan-plugin.x86_64               0.8.0-3.el7                    installed

Docker can be started again…

[root@nufront-worker-02 docker]# docker info
Client:
 Context:    default
 Debug Mode: false
 Plugins:
  app: Docker App (Docker Inc., v0.9.1-beta3)
  buildx: Build with BuildKit (Docker Inc., v0.5.1-docker)
  scan: Docker Scan (Docker Inc., v0.8.0)

Server:
 Containers: 0
  Running: 0
  Paused: 0
  Stopped: 0
 Images: 0
 Server Version: 20.10.7
 Storage Driver: devicemapper ###
  Pool Name: docker-253:0-812466384-pool
  Pool Blocksize: 65.54kB
  Base Device Size: 10.74GB
  Backing Filesystem: xfs
  Udev Sync Supported: true
  Data file: /dev/loop0
  Metadata file: /dev/loop1
  Data loop file: /var/lib/docker/devicemapper/devicemapper/data
  Metadata loop file: /var/lib/docker/devicemapper/devicemapper/metadata
  Data Space Used: 11.8MB
  Data Space Total: 107.4GB
  Data Space Available: 107.4GB
  Metadata Space Used: 581.6kB
  Metadata Space Total: 2.147GB
  Metadata Space Available: 2.147GB
  Thin Pool Minimum Free Space: 10.74GB
  Deferred Removal Enabled: true
  Deferred Deletion Enabled: true
  Deferred Deleted Device Count: 0
  Library Version: 1.02.107-RHEL7 (2015-10-14)
 Logging Driver: json-file
 Cgroup Driver: systemd
 Cgroup Version: 1
 Plugins:
...

error: error validating “ingress-tomcat6.yaml“: error validating data: [ValidationError(Ingress.spec

kubectl apply -f ingress-tomcat6.yaml
error: error validating “ingress-tomcat6.yaml”: error validating data: [ValidationError(Ingress.spec.rules[0].http.paths[0]): unknown field “serviceName” in io.k8s.api.extensions.v1beta1.HTTPIngressPath, ValidationError(Ingress.spec.rules[0].http.paths[0]): unknown field “servicePort” in io.k8s.api.extensions.v1beta1.HTTPIngressPath, ValidationError(Ingress.spec.rules[0].http.paths[0]): missing required field “backend” in io.k8s.api.extensions.v1beta1.HTTPIngressPath]; if you choose to ignore these errors, turn validation off with –validate=false

Ingress yaml file-backend has one space missing in the next line
before Modified

Modified

Etcd Error: mvcc: database space exceeded

etcd report error: mvcc: database space exceeded

Main analysis: auto-compact (auto-compact)
etcd does not recognize automatic compact, require an initial parameter, or compact through an order, and if more frequently recommended installation, the cost and error of space and internally existing. etcd v3 honored backend quota 2GB, if not compact, boltdb document is larger than that limit, errors: ” Error: etcdserver: mvcc: database spaceexceeded”, resulting in the failure to write data.

processing process:
My master point here is 192.168.10.203, 192.168.10.204, 192.168.10.204, 192.168.10.205

1 View alarms
[root@ ~]# /opt/k8s/bin/etcdctl –endpoints=https://192.168.10.203:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem alarm list
[root@~]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.204:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem alarm list
[root@~]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.205:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem alarm list

2 To get the old copy
[root@~]#rev=$(/opt/k8s/bin/etcdctl –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem –endpoints=”https://127.0.0.1:2379
[root@~]#echo $rev
846418475

3 compression of old copy data
[root@~]#opt/k8s/bin/etcdctl –endpoints=https://192.168.10.203:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem compact $rev
[root@]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.204:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem compact $rev
[root@]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.205:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem compact $rev

4 Executive debris integration
[root@~]#opt/k8s/bin/etcdctl –endpoints=https://192.168.10.203:2379 –cacert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem defrag
[root@ ~]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.204:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem defrag
[root@~]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.205:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem defrag

5 dismissal of warning police
[root@~]#opt/k8s/bin/etcdctl –endpoints=https://192.168.10.203:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem alarm disarm
[root@]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.204:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem alarm disarm
[root@]#/opt/k8s/bin/etcdctl –endpoints=https://192.168.10.205:2379 –ca cert=/etc/kubernetes/ssl/ca.pem –cert=/etc/etcd/ssl/etcd.pem –key=/etc/etcd/ssl/etcd-key.pem alarm disarm

6 equipment and identifying standby data information
ETCDCTL_API=3 etcdctl snapshot save backup.db
ETCDCTL_API=3 etcdctl snapshot status backup.db