k8s集群安装-kubeadm安装

kubeadm 安装集群

准备工作

角色 IP 组件
k8s-master 192.168.1.20 kube-apiserver,kube-controller-manager,kube-scheduler,docker, etcd
k8s-node01 192.168.1.18 kubelet,kube-proxy,docker, etcd
k8s-node02 192.168.1.19 kubelet,kube-proxy,docker, etcd
  • docker版本:docker-ce 20.10.9
  • kubernetes版本:v1.21.4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
1、关闭防火墙
- systemctl stop firewalld
- systemctl disable firewalld

2、关闭Selinux
- setenforce 0
- sed -i 's/enforcing/disabled/g' /etc/selinux/config
3、关闭swap
- sudo sed -i '/swap/s/^/#/' /etc/fstab #注销掉 含有 swap 的行 /swap/: 匹配包含 swap 的行。s/^/#/: 在每行的开头(^ 表示行的开头)添加 # 号

4、修改主机名
- hostnamectl set-hostname k8s-master
- hostnamectl set-hostname k8s-node01
- hostnamectl set-hostname k8s-node02

5、时间同步
yum install ntpdate -y
ntpdate ntp1.aliyun.com

6、定时任务
crontab -e
*/30 * * * * /usr/sbin/ntpdate-u ntp1.aliyun.com >> /var/log/ntpdate.log 2>&1

7、内核参数调整
modprobe br_netfilter

cat > /etc/sysctl.d/kubernetes.conf <<EOF
net.bridge.bridge-nf-call-iptables=1
net.bridge.bridge-nf-call-ip6tables=1
net.ipv4.ip_forward=1
net.ipv4.tcp_tw_recycle=0 # 表示开启TCP连接中TIME-WAIT sockets的快速回收,默认为0,表示关闭
vm.swappiness=0 # 禁止使用 swap 空间,只有当系统 OOM 时才允许使用它
vm.overcommit_memory=1 # 不检查物理内存是否够用
fs.inotify.max_user_instances=8192 # 开启 OOM
vm.panic_on_oom=0
fs.inotify.max_user_watches=1048576
fs.file-max=52706963
fs.nr_open=52706963
net.ipv6.conf.all.disable_ipv6=1
EOF

sysctl -p /etc/sysctl.d/kubernetes.conf

8、开启 ipvs
lsmod|grep ip_vs

for i in $(ls /lib/modules/$(uname -r)/kernel/net/netfilter/ipvs|grep -o "^[^.]*"); do echo $i; /sbin/modinfo -F filename $i >/dev/null 2>&1 && /sbin/modprobe $i; done

ls /lib/modules/$(uname -r)/kernel/net/netfilter/ipvs|grep -o "^[^.]*" >> /etc/modules

9、安装 ipvsadm
yum install ipvsadm ipset -y

docker安装

centos
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# 1、下载阿里 的docker-ce镜像源仓库文件:
wget https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo -O /etc/yum.repos.d/docker-ce.repo #将repo文件下载到指定的位置

# 2、更新yum源:
yum makecache fast

# 3、查看docker-ce版本:
yum list docker-ce --showduplicates

# 4、新建docker配置文件daemon.json(作用是配置docker国内的镜像下载的网站、阿里云加速器):
mkdir /etc/docker
tee /etc/docker/daemon.json <<-'EOF'
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors":["https://gwsg6nw9.mirror.aliyuncs.com"]
}
EOF

# 5、安装docker指定版本:
yum install -y --setopt=obsoletes=0 docker-ce-20.10.9-3.el7 #--setopt=obsoletes=0关闭安装最新的包 指定安装版本

# 6、启动docker:
systemctl start docker.service

# 7. 修改cgroup驱动为systemd,适配k8s默认选项
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"registry-mirrors":["https://xxxxxxx.aliyuncs.com"], #阿里云镜像加速,这里需要从阿里云平台获取
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
EOF

# 8、查看docker版本:
docker version

ubuntu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# 1. 安装GPG证书
curl -fsSL https://mirrors.aliyun.com/docker-ce/linux/ubuntu/gpg | sudo apt-key add -

# 2. 写入软件源信息
add-apt-repository "deb [arch=amd64] https://mirrors.aliyun.com/docker-ce/linux/ubuntu $(lsb_release -cs) stable"

# 3. 更新
apt update

# 4. 查询版本
apt-cache madison docker-ce
#docker-ce | 5:20.10.9~3-0~ubuntu-focal | https://mirrors.aliyun.com/docker-ce/linux/ubuntu focal/stable amd64 Packages

# 5. 安装
apt install docker-ce=5:20.10.9~3-0~ubuntu-focal -y

# 6. 验证
docker version

# 7. 修改cgroup驱动为systemd,适配k8s默认选项
cat > /etc/docker/daemon.json <<EOF
{
"exec-opts": ["native.cgroupdriver=systemd"],
"log-driver": "json-file",
"log-opts": {
"max-size": "100m"
},
"storage-driver": "overlay2",
"storage-opts": [
"overlay2.override_kernel_check=true"
]
}
EOF

systemctl restart docker

kubeadm

简介
作用:将kuberenets大部分组件都容器化,通过StaticPod方式运行,并自动化了大部分的集群配置及认证等工作,简单几步即可搭建一个可用k8ss的集群。

  • kubeadm init:master 节点创建

    • 检查当前机器是否合规
    • 自动生成集群运行所需的各类证书及各类配置,并将master节点信息保存在名为cluster-info的ConfigMap中
    • 通过Static Pod方式,运行API server、controller manager 、scheduler及etcd组件。
    • 生成Token以便其他节点加入集群
  • kubeadm join:node 节点加入集群

  • 节点通过token访问kube-apiserver,获取cluster-info中信息,主要是apiserver的授权信息(节点信任集群)。

  • 通过授权信息,kubelet可执行TLS bootstrapping,与apiserver真正建立互信任关系(集群信任节点)。

安装组件(kubelet kubeadm kubectl)

centos
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# 1、新建repo文件:
cat <<EOF > /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://mirrors.aliyun.com/kubernetes/yum/repos/kubernetes-el7-x86_64/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://mirrors.aliyun.com/kubernetes/yum/doc/yum-key.gpg https://mirrors.aliyun.com/kubernetes/yum/doc/rpm-package-key.gpg
EOF

# 2、安装指定的版本组件:
### master ###
yum install -y --setopt=obsoletes=0 kubelet-1.21.4-0 kubeadm-1.21.4-0 kubectl-1.21.4-0

### node ###
yum install -y --setopt=obsoletes=0 kubelet-1.21.4-0 kubeadm-1.21.4-0
ubuntu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# 1. 安装GPG证书
curl -fsSL https://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg | apt-key add -

# 2. 写入软件源信息
cat > /etc/apt/sources.list.d/kubernetes.list <<EOF
deb https://mirrors.aliyun.com/kubernetes/apt/ kubernetes-xenial main
EOF

# 3. 更新
apt update

# 4. 查询版本
apt-cache madison kubeadm
#kubeadm | 1.21.4-00 | https://mirrors.aliyun.com/kubernetes/apt kubernetes-xenial/main amd64 Packages

# 5. 安装
#### master ####
apt-get install -y kubeadm=1.21.4-00 kubelet=1.21.4-00 kubectl=1.21.4-00

#### node ####
apt-get install -y kubeadm=1.21.4-00 kubelet=1.21.4-00

集群的安装

master节点

  • 初始化

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    kubeadm init \
    --apiserver-advertise-address=192.168.1.20 \
    --image-repository registry.aliyuncs.com/google_containers \
    --kubernetes-version v1.21.4 \
    --service-cidr=10.96.0.0/12 \
    --pod-network-cidr=10.244.0.0/16 \
    --ignore-preflight-errors=all

    # 对应参数含义:
    # 节点ip
    # 镜像仓库
    # 版本
    # service网段 默认
    # pod网段 默认
    # 忽略检查
  • 输出结果:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    [kubelet-finalize] Updating "/etc/kubernetes/kubelet.conf" to point to a rotatable kubelet client certificate and key
    [addons] Applied essential addon: CoreDNS
    [addons] Applied essential addon: kube-proxy

    Your Kubernetes control-plane has initialized successfully!

    To start using your cluster, you need to run the following as a regular user:

    mkdir -p $HOME/.kube
    sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
    sudo chown $(id -u):$(id -g) $HOME/.kube/config

    Alternatively, if you are the root user, you can run:

    export KUBECONFIG=/etc/kubernetes/admin.conf

    You should now deploy a pod network to the cluster.
    Run "kubectl apply -f [podnetwork].yaml" with one of the options listed at:
    https://kubernetes.io/docs/concepts/cluster-administration/addons/

    Then you can join any number of worker nodes by running the following on each as root:

    kubeadm join 192.168.1.20:6443 --token mutg2q.34gp9f77hfm3mzqt \
    --discovery-token-ca-cert-hash sha256:5aafdb66ba0d96855c0ec8a59577d4e7b3ba81e2f58069b9b407a0059998372e
  • 根据提示,创建 kubectl 认证文件:

    1
    2
    3
    4
    # 即使是root用户,也采用默认文件方式
    mkdir -p $HOME/.kube
    sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
    sudo chown $(id -u):$(id -g) $HOME/.kube/config
  • kubectl 命令补齐:

    1
    2
    3
    4
    echo "source <(kubectl completion bash)" >> ~/.bashrc

    # 立即生效
    source <(kubectl completion bash)

Node 节点

  • 加入集群:
    • 在node01和node02节点执行
    • 根据上面的输出,执行加入命令
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      # 在node上执行(ip不用改)
      kubeadm join 192.168.1.20:6443 --token mutg2q.34gp9f77hfm3mzqt \
      --discovery-token-ca-cert-hash sha256:5aafdb66ba0d96855c0ec8a59577d4e7b3ba81e2f58069b9b407a0059998372e


      # 注意:如果某个节点报错:(这是kubelet报错导致无法启动)
      报错1)
      Failed to run kubelet" err="failed to run Kubelet: misconfiguration: kubelet cgroup driver: \"systemd\" is different from docker cgroup driver: \"cgroupfs\"

      # 在报错的节点上查看:
      docker info | grep Cgroup
      Cgroup Driver: cgroupfs #不是systemd ,因为有些系统不支持cgroupfs ,所以要改为systemd
      Cgroup Version: 1

      # 修改Cgroup Driver:
      # 在/etc/docker/daemon.json中添加如下内容(如果没有创建)- 可看 上面的安装docker第7步:
      {
      "exec-opts":["native.cgroupdriver=systemd"]
      }


      为什么要修改为使用systemd?
      Kubernetes 推荐使用 systemd 来代替 cgroupf
      因为systemd是Kubernetes自带的cgroup管理器, 负责为每个进程分配cgroups,
      但docker的cgroup driver默认是cgroupfs,这样就同时运行有两个cgroup控制管理器,
      当资源有压力的情况时,有可能出现不稳定的情况

      报错2):
      error execution phase preflight: [preflight] Some fatal errors occurred:
      [ERROR FileAvailable--etc-kubernetes-kubelet.conf]: /etc/kubernetes/kubelet.conf already exists
      [ERROR Port-10250]: Port 10250 is in use
      [ERROR FileAvailable--etc-kubernetes-pki-ca.crt]: /etc/kubernetes/pki/ca.crt already exists
      [preflight] If you know what you are doing, you can make a check non-fatal with `--ignore-preflight-errors=...`

      - 解决:(注意是在node节点上执行)
      如果你正在重新初始化一个 Kubernetes 集群,可以使用 kubeadm reset 命令来清理先前的设置。
      sudo kubeadm reset

以下这个情况是在token过期时操作或后续新增节点时,需要重新生成token。

  • token 过期: kubeadm join 加入集群时,需要2个参数,–token与–discovery-token-ca-cert-hash。其中,token有限期一般是24小时,如果超过时间要新增节点,就需要重新生成token。
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18

    # 查询token(在master上操作)
    kubeadm token list
    TOKEN TTL EXPIRES USAGES DESCRIPTION EXTRA GROUPS
    mutg2q.34gp9f77hfm3mzqt 23h 2024-07-03T22:43:22+08:00 authentication,signing The default bootstrap token generated by 'kubeadm init'. system:bootstrappers:kubeadm:default-node-token

    # 创建 token
    kubeadm token create
    token:l0r2mh.0z5aojdur71posxt

    # discovery-token-ca-cert-hash token的hash值
    openssl x509 -pubkey -in /etc/kubernetes/pki/ca.crt | openssl rsa -pubin -outform der 2>/dev/null | openssl dgst -sha256 -hex | sed 's/^.* //'
    5aafdb66ba0d96855c0ec8a59577d4e7b3ba81e2f58069b9b407a0059998372e

    # 新节点加入(比如新增节点或者原先节点未加入)
    kubeadm join 192.168.80.20:6443 --token l0r2mh.0z5aojdur71posxt \
    --discovery-token-ca-cert-hash sha256:5aafdb66ba0d96855c0ec8a59577d4e7b3ba81e2f58069b9b407a0059998372e

网络插件安装

  • 未安装网络插件,节点没有 Ready

    1
    2
    3
    4
    5
    kubectl get nodes
    NAME STATUS ROLES AGE VERSION
    k8s-master NotReady control-plane,master 41m v1.21.4
    k8s-node01 NotReady <none> 35m v1.21.4
    k8s-node02 NotReady <none> 8m21s v1.21.4
  • 安装 flannel:

  • 如果无法拉取镜像:
    参考:docker配置代理篇的老方法配置 proxy.conf

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    # 下载网络插件yaml文件
    wget https://raw.githubusercontent.com/coreos/flannel/master/Documentation/kube-flannel.yml

    # 确保网络配置与 `--pod-network-cidr=10.244.0.0/16` 一致
    vi kube-flannel.yml
    net-conf.json: |
    {
    "Network": "10.244.0.0/16",

    # 应用网络插件
    kubectl apply -f kube-flannel.yml


    # 查看网络插件pod
    - 默认每个节点都会跑一个flannel 的pod、才能让集群正常
    kubectl get pod -n kube-flannel
    NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES
    kube-flannel-ds-nkjbh 1/1 Running 0 103s 192.168.1.18 k8s-node01 <none> <none>
    kube-flannel-ds-qs9sz 1/1 Running 0 103s 192.168.1.20 k8s-master <none> <none>
    kube-flannel-ds-xsg6c 1/1 Running 0 103s 192.168.1.19 k8s-node02 <none> <none>


    # 查看节点状态
    kubectl get nodes
    NAME STATUS ROLES AGE VERSION
    k8s-master Ready control-plane,master 43m v1.21.4
    k8s-node01 Ready <none> 33m v1.21.4
    k8s-node02 Ready <none> 32m v1.21.4

ipvs

  • 开启 kube-proxy 协议为 ipvs(master上操作)
1
2
kubectl edit configmap kube-proxy -n kube-system
mode: "ipvs"

集群状态

  • 集群状态异常:

    1
    2
    3
    4
    5
    $ kubectl get cs
    NAME STATUS MESSAGE ERROR
    scheduler Unhealthy Get "http://127.0.0.1:10251/healthz": dial tcp 127.0.0.1:10251: connect: connection refused
    controller-manager Unhealthy Get "http://127.0.0.1:10252/healthz": dial tcp 127.0.0.1:10252: connect: connection refused
    etcd-0 Healthy {"health":"true"}
  • 原因:使用了非安全端口。按如下方法修改(master上操作)

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    $ vi /etc/kubernetes/manifests/kube-scheduler.yaml 
    ...
    spec:
    containers:
    - command:
    - kube-scheduler
    - --kubeconfig=/etc/kubernetes/scheduler.conf
    - --leader-elect=true
    #- --port=0 # 注释掉
    image: k8s.gcr.io/kube-scheduler:v1.18.6

    $ vi /etc/kubernetes/manifests/kube-controller-manager.yaml
    ...
    spec:
    containers:
    - command:
    - kube-controller-manager
    - --node-cidr-mask-size=24
    #- --port=0 # 注释掉
    - --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt

    # 重启kubelet
    $ systemctl restart kubelet

    # 再次查询状态
    $ kubectl get cs
    NAME STATUS MESSAGE ERROR
    scheduler Healthy ok
    controller-manager Healthy ok
    etcd-0 Healthy {"health":"true"}