# add host to your ssh known_hosts
ssh server-name-01
#
# note the setup:
# 1. Playbook.yml, 2. -i (inventory), 3. pass common vars, including password vault, 4. pass extra vars, including hostname and environment, 5. unlock the password vault
#
ansible-playbook configure-apt.yml -i "server-name-01," -e @common_vars/common_vars.yml --extra-vars 'env=local variable_host=server-name-01' -v -C --vault-password-file ~/vars/.common.txt

 

 
---
- name: Test Playbook to run a shell command
hosts: "{{ variable_host | default('host-group-name')}}"
become: yes
tasks:
- name: run this command and ignore the result
  shell: /usr/bin/somecommand
  ignore_errors: True
...

Pass the host list as a variable:

hosts: "{{ variable_host | default('web')}}"
 
# command
ansible-playbook server.yml --extra-vars "variable_host=server-name-01"

'나는 노동자 > LINUX' 카테고리의 다른 글

rpm으로 패키지 설치 유무 확인  (0) 2023.05.09
ansible 물리서버, 가성서버 확인  (0) 2023.02.07
ansible facts device check  (0) 2023.02.04
리눅스 임시 포트 오픈  (0) 2023.01.12
xfs volume extend  (0) 2020.07.10
:: 실서버 
# systemd-detect-virt
none

:: 가상서버 
 # systemd-detect-virt
kvm

root@DESKTOP-F4T7TCG:/home/ansible# cat vm.yml
---
- name: test
  hosts: vm
  tasks:
  - name: server check
    shell: systemd-detect-virt
    register: check

  - name: dfasf
    shell: echo " testesrsfd" > /home/ansible/a.txt
    when: '"wsl" in check.stdout'

'나는 노동자 > LINUX' 카테고리의 다른 글

rpm으로 패키지 설치 유무 확인  (0) 2023.05.09
ansible extra vars  (0) 2023.02.13
ansible facts device check  (0) 2023.02.04
리눅스 임시 포트 오픈  (0) 2023.01.12
xfs volume extend  (0) 2020.07.10
- hosts: localhost
  gather_facts: true
  tasks:
    - debug:
        msg: Disk nvme0n1 exists.
      when: "'nvme0n1' in ansible_devices.keys()|list"
    - debug:
        msg: Disk sdb does not exist.
      when: "'sdb' not in ansible_devices.keys()|list"

https://docs.ansible.com/ansible/latest/playbook_guide/playbooks_conditionals.html

 

```bash
root@DESKTOP-F4T7TCG:/home/ansible# more disk.yml
---
- name: test disk information check
  hosts: vm
  tasks:
  - name: disk check
    debug:
      msg: "{{ ansible_facts['devices'] }}"
  - name: check True or not
    debug:
      msg: Disk sdb doest not exist
    when: "'sdb' not in ansible_devices.keys()|list"
  - name: check disk not
    copy:
      content: "{{ ansible_facts['devices']['sdb']['size'] }}"
      dest: "/home/{{ansible_fqdn}}-diskinfo.txt"
    delegate_to: localhost
    when: "'sdb' in ansible_devices.keys()| list"

```

 

============

  - name: chel
    shell: |
            echo "
            node_kernel_size(name='{{ ansible_hostname }}')  '{{ ansible_kernel }}'
            node_disk_size(name='{{ ansible_hostname }}')  '{{ ansible_facts.devices.sdb.size }}'
            " >> /home/ansible/info.txt
#      dest: "/home/ansible/{{ansible_fqdn}}-info.txt"
#    delegate_to: localhost
    when: "'sdb' in ansible_devices.keys()| list"

 

=========== prom으로 사용할려면 ========= \"를 앞뒤 추가해줘야한다

 

systemctl restart node_exporter
journalctl -eu node_exporter

 

workshop_student_is_happy{campus="campusX"} 1
curl localhost:9100/metrics
curl -s localhost:9100/metrics | grep workshop

  - name: chel
    shell: |
            echo "
            node_kernel_size{name=\""{{ ansible_hostname }}"\",output=\""{{ ansible_kernel }}"\"} 0
            node_disk_size{name=\""{{ ansible_hostname }}"\",size=\""{{ ansible_facts.devices.sdb.size }}"\"} 0
            " >> /home/ansible/info.txt
#      dest: "/home/ansible/{{ansible_fqdn}}-info.txt"
#    delegate_to: localhost
    when: "'sdb' in ansible_devices.keys()| list"

 

 

==============================================================

----
- name: test disk information check
  hosts: vm
  tasks:
  - name: disk check if yes
    shell: |
         echo "
         node_kernel_size{name=\""{{ ansible_hostname }}"\",output=\""{{ ansible_kernel }}"\",disk=\""{{ ansible_facts.devices.sdb.size}} "\"} 0
         " > /home/ansible/info.txt
#      dest: "/home/ansible/{{ansible_fqdn}}-info.txt"
#    delegate_to: localhost
    when: "'sdb' in ansible_devices.keys()| list"

  - name: disk check if no
    shell: |
         echo "
         node_kernel_size{name=\""{{ ansible_hostname }}"\",output=\""{{ ansible_kernel }}"\",disk=\""0"\"} 0
         " > /home/ansible/info.txt
#      dest: "/home/ansible/{{ansible_fqdn}}-info.txt"
#    delegate_to: localhost
    when: "'sdcxb' not in ansible_devices.keys()| list"                 

 

https://dywang.csie.cyut.edu.tw/dywang/ansible/node132.html                                                                    

'나는 노동자 > LINUX' 카테고리의 다른 글

ansible extra vars  (0) 2023.02.13
ansible 물리서버, 가성서버 확인  (0) 2023.02.07
리눅스 임시 포트 오픈  (0) 2023.01.12
xfs volume extend  (0) 2020.07.10
Log Rate Limitimg in Linux  (0) 2018.09.17

nc 명령어를 사용하여 서버에 임시 포트 오픈 및 확인을 할 수 있다.



서버 1은 임시로 포트를 open

서버 2는 서버1의 오픈된 포트로 접근시도



* 서버1 192.168.10.10

$ nc -lk [port]

예: nc -lk 8080



* 서버2 192.168.20.10

$ nc -v [서버1 IP] [port]

예: nc -v 192.168.10.10 8080

'나는 노동자 > LINUX' 카테고리의 다른 글

ansible 물리서버, 가성서버 확인  (0) 2023.02.07
ansible facts device check  (0) 2023.02.04
xfs volume extend  (0) 2020.07.10
Log Rate Limitimg in Linux  (0) 2018.09.17
repo_download and sync  (0) 2018.04.26

ssl로 실제 fetch_openml로 mnist를 다운받지 못하는 상태에서..

from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784',version=1,data_home='./dataset', as_frame=False)

압축파일을 jupyter위체 풀고 다시 한번 해보자..될련가?



ㅋㅋ 된다 된다

우선 현재 작업 중인 경로를 확인

import os
print(os.getcwd())

압축파일 다은로드후 현재 작업공간에 업로그
이제 압축을 풀어준다

파일경로는 본인 경로에 맞게 수정

import zipfile
with zipfile.ZipFile(‘/home/jovyan/study/daraset.zip’, ‘r’) as zip_ref
zip_ref.extractall(‘/home/jovyan/study/‘)


이제 불러 맨 위 import mnist진행하면돰

dataset.zip
14.76MB

'나는 노동자 > 이런저런 Tip' 카테고리의 다른 글

windows os hostname s/n ip  (0) 2023.03.20
grafana plugin 수동설치  (0) 2023.03.03
rancher password reset  (0) 2021.09.02
아이폰 벨소리 -대략 1분 정도  (0) 2020.09.29
gitlab file read 경로 문제  (0) 2020.08.19

참고로 minikube는 /var/lib/minikube/certs에 있다

 

 

 

[root@minikube home]#  kubeadm certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'

CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Nov 24, 2022 14:07 UTC   364d                                    no
apiserver                  Nov 24, 2022 14:07 UTC   364d            ca                      no
apiserver-etcd-client      Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
apiserver-kubelet-client   Nov 24, 2022 14:07 UTC   364d            ca                      no
controller-manager.conf    Nov 24, 2022 14:07 UTC   364d                                    no
etcd-healthcheck-client    Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
etcd-peer                  Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
etcd-server                Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
front-proxy-client         Nov 24, 2022 14:07 UTC   364d            front-proxy-ca          no
scheduler.conf             Nov 24, 2022 14:07 UTC   364d                                    no

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Nov 17, 2031 15:30 UTC   9y              no
etcd-ca                 Nov 17, 2031 15:32 UTC   9y              no
front-proxy-ca          Nov 17, 2031 15:32 UTC   9y              no


 kubeadm certs generate-csr  --cert-dir /home/GOOD --kubeconfig-dir /home/GOOD
 cp /etc/kubernetes/pki/ca.* /home/GOOD/
 
  openssl x509 -req -in apiserver.csr -CAcreateserial -CA ca.crt -CAkey ca.key -days 10000 -out apiserver.crt

cd /etc/kubernetes/pki/
rm  apiserver.crt
cp /home/GOOD/apiserver.crt .
 
 
 [root@minikube certs]#  kubeadm certs check-expiration
[check-expiration] Reading configuration from the cluster...
[check-expiration] FYI: You can look at this config file with 'kubectl -n kube-system get cm kubeadm-config -o yaml'

CERTIFICATE                EXPIRES                  RESIDUAL TIME   CERTIFICATE AUTHORITY   EXTERNALLY MANAGED
admin.conf                 Nov 24, 2022 14:07 UTC   364d                                    no
apiserver                  Apr 12, 2049 13:32 UTC   27y             ca                      no
apiserver-etcd-client      Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
apiserver-kubelet-client   Nov 24, 2022 14:07 UTC   364d            ca                      no
controller-manager.conf    Nov 24, 2022 14:07 UTC   364d                                    no
etcd-healthcheck-client    Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
etcd-peer                  Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
etcd-server                Nov 24, 2022 14:07 UTC   364d            etcd-ca                 no
front-proxy-client         Nov 24, 2022 14:07 UTC   364d            front-proxy-ca          no
scheduler.conf             Nov 24, 2022 14:07 UTC   364d                                    no

CERTIFICATE AUTHORITY   EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
ca                      Nov 17, 2031 15:30 UTC   9y              no
etcd-ca                 Nov 17, 2031 15:32 UTC   9y              no
front-proxy-ca          Nov 17, 2031 15:32 UTC   9y              no
[root@minikube certs]#

 

 

 

The Kubernetes cluster certificates have a lifespan of one year. If the Kubernetes cluster certificate expires on the Kubernetes master, then the kubelet service will fail. Issuing a kubectl command, such as kubectl get pods or kubectl exec -it container_name bash, will result in a message similar to Unable to connect to the server: x509: certificate has expired or is not yet valid.

Procedure

  1. Log on to the Kubernetes master node as the root user and run the following command to check when the Kubernetes certificates will expire.
    kubeadm alpha certs check-expiration
    The output will be similar to the following. In this case the certificates will expire in 273 days.
    CERTIFICATE                EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
    admin.conf                 Sep 17, 2020 21:24 UTC   273d            no
    apiserver                  Sep 17, 2020 21:24 UTC   273d            no
    apiserver-etcd-client      Sep 17, 2020 21:24 UTC   273d            no
    apiserver-kubelet-client   Sep 17, 2020 21:24 UTC   273d            no
    controller-manager.conf    Sep 17, 2020 21:24 UTC   273d            no
    etcd-healthcheck-client    Sep 17, 2020 21:24 UTC   273d            no
    etcd-peer                  Sep 17, 2020 21:24 UTC   273d            no
    etcd-server                Sep 17, 2020 21:24 UTC   273d            no
    front-proxy-client         Sep 17, 2020 21:24 UTC   273d            no
    scheduler.conf             Sep 17, 2020 21:24 UTC   273d            no
  2. Run the following commands to back up the existing Kubernetes certificates:
    mkdir -p $HOME/fcik8s-old-certs/pki
    /bin/cp -p /etc/kubernetes/pki/*.* $HOME/fcik8s-old-certs/pki
    ls -l $HOME/fcik8s-old-certs/pki/
    The output will be similar to the following:
    total 56
    -rw-r--r-- 1 root root 1261 Sep  4  2019 apiserver.crt
    -rw-r--r-- 1 root root 1090 Sep  4  2019 apiserver-etcd-client.crt
    -rw------- 1 root root 1679 Sep  4  2019 apiserver-etcd-client.key
    -rw------- 1 root root 1679 Sep  4  2019 apiserver.key
    -rw-r--r-- 1 root root 1099 Sep  4  2019 apiserver-kubelet-client.crt
    -rw------- 1 root root 1679 Sep  4  2019 apiserver-kubelet-client.key
    -rw-r--r-- 1 root root 1025 Sep  4  2019 ca.crt
    -rw------- 1 root root 1675 Sep  4  2019 ca.key
    -rw-r--r-- 1 root root 1038 Sep  4  2019 front-proxy-ca.crt
    -rw------- 1 root root 1675 Sep  4  2019 front-proxy-ca.key
    -rw-r--r-- 1 root root 1058 Sep  4  2019 front-proxy-client.crt
    -rw------- 1 root root 1679 Sep  4  2019 front-proxy-client.key
    -rw------- 1 root root 1675 Sep  4  2019 sa.key
    -rw------- 1 root root  451 Sep  4  2019 sa.pub
  3. Run the following commands to back up the existing configurtion files:
    /bin/cp -p /etc/kubernetes/*.conf $HOME/fcik8s-old-certs
    ls -ltr $HOME/fcik8s-old-certs
    The output will be similar to the following:
    total 36
    -rw------- 1 root root 5451 Sep  4  2019 admin.conf
    -rw------- 1 root root 5595 Sep  4  2019 kubelet.conf
    -rw------- 1 root root 5483 Sep  4  2019 controller-manager.conf
    -rw------- 1 root root 5435 Sep  4  2019 scheduler.conf
    drwxr-xr-x 2 root root 4096 Dec 19 21:21 pki
  4. Run the following commands to back up your home configuration:
    mkdir -p $HOME/fcik8s-old-certs/.kube
    /bin/cp -p ~/.kube/config $HOME/fcik8s-old-certs/.kube/.
    ls -l $HOME/fcik8s-old-certs/.kube/.
    The output will be similar to the following:
    -rw------- 1 root root 5451 Sep  4  2019 config
  5. Run the following command to renew all the Kubernetes certificates:
    kubeadm alpha certs renew all
    The output of the command will be similar to the following:
    certificate embedded in the kubeconfig file for the admin to use and for kubeadm itself renewed
    certificate for serving the Kubernetes API renewed
    certificate the apiserver uses to access etcd renewed
    certificate for the API server to connect to kubelet renewed
    certificate embedded in the kubeconfig file for the controller manager to use renewed
    certificate for liveness probes to healtcheck etcd renewed
    certificate for etcd nodes to communicate with each other renewed
    certificate for serving etcd renewed
    certificate for the front proxy client renewed
    certificate embedded in the kubeconfig file for the scheduler manager to use renewed
  6. Run the following command to confirm the certificates have been renewed and will expire in 364 days:
    kubeadm alpha certs check-expiration
    The output should look similar to the following:
    CERTIFICATE                EXPIRES                  RESIDUAL TIME   EXTERNALLY MANAGED
    admin.conf                 Dec 20, 2021 02:35 UTC   364d            no      
    apiserver                  Dec 20, 2021 02:35 UTC   364d            no      
    apiserver-etcd-client      Dec 20, 2021 02:35 UTC   364d            no      
    apiserver-kubelet-client   Dec 20, 2021 02:35 UTC   364d            no      
    controller-manager.conf    Dec 20, 2021 02:35 UTC   364d            no      
    etcd-healthcheck-client    Dec 20, 2021 02:35 UTC   364d            no      
    etcd-peer                  Dec 20, 2021 02:35 UTC   364d            no      
    etcd-server                Dec 20, 2021 02:35 UTC   364d            no      
    front-proxy-client         Dec 20, 2021 02:35 UTC   364d            no      
    scheduler.conf             Dec 20, 2021 02:35 UTC   364d            no
  7. Confirm the kubelet services are running and communication between the worker nodes and the Kubernetes master is working.
  8. After waiting a few minutes, run the following command from the Kubernetes master node to confirm that the worker nodes are available:
    kubectl get nodes
    If you get a response similar to the following:
    The connection to the server 9.37.21.119:6443 was refused - did you specify the right host or port?
    
    continue with the next steps to resolve the issue. Otherwise, your Kubernetes cluster certificates have been successfully renewed.
  9. Run the following command:
    diff $HOME/fcik8s-old-certs/kubelet.conf /etc/kubernetes/kubelet.conf
    If there is no output, the kubelet.conf file was not updated with the new certificate information.
  10. Update the /etc/kubernetes/kubelet.conf file and display the difference from the old version to the new one:
    cd /etc/kubernetes
    sudo kubeadm alpha kubeconfig user --org system:nodes --client-name system:node:$(hostname) > kubelet.conf
    diff $HOME/fcik8s-old-certs/kubelet.conf /etc/kubernetes/kubelet.conf
    If the output shows a difference, the file kubelet.conf was updated with the new certificate information.
  11. Run the following command:
    diff ~/.kube/config $HOME/fcik8s-old-certs/.kube/config
    If there is no output, the config file still has the outdated keys and certificate values in it.
  12. Update client-certificate-data and client-key-data in ~/.kube/config with the values from the updated file in /etc/kubernetes/kubelet.conf:
    • cat /etc/kubernetes/kubelet.conf

      Select and copy the output after client-key-data:.

    • In the ~/.kube/config file, replace the information after client-key-data: with the text copied in the previous step.
    • cat /etc/kubernetes/kubelet.conf

      Select and copy the output after client-certificate-data:.

    • In the ~/.kube/config file, replace the information after client-certificate-data: with the text copied in the previous step.
  13. Restart the kubelet service:
    systemctl daemon-reload&&systemctl restart kubelet
    This command is successful if there is no output.
  14. Verify master and worker nodes are available:
    kubectl get nodes
  15. Verify all pods are in the running state:
    kubectl get pods

해당 기간을 확인해야 하는 이유는 인증서를 renew할경우 1.17이하 버젼에서는 나머지는 다 renew가 되지만

kublet.conf값이 renew되지 않는 버그가 존재하므로 항상 체크하는 습관을 가지는게 좋을거 같다

 

kubelet.conf certification 기간 확인

음.. 우선 2가지를 확인해야한다..
[root@minikube kubernetes]# pwd
/etc/kubernetes
[root@minikube kubernetes]# cat kubelet.conf

- name: system:node:minikube
  user:
    client-certificate: /var/lib/kubelet/pki/kubelet-client-current.pem
    client-key: /var/lib/kubelet/pki/kubelet-client-current.pem

위 부분이 코드로 된것도 있고 위처럼 파일 경로로된것도 있다. 아마 파일이면 minkube일거구 나머지는 암호화 코드일것이다

우선 암호화된 파일일 경우
echo -n "암호화된 내용" |base64 -d > test.txt
openssl x509 -in test.txt  -text -noout

인증일자를 확인하면 된다

 

 

 cd /etc/kubernetes

kubeadm alpha kubeconfig user --org system:nodes --client-name system:node:$(hostname) > kubelet.conf

systemctl restart kubelet

 

========== 참고 자료 =============

# On master - See https://kubernetes.io/docs/setup/certificates/#all-certificates

# Generate the new certificates - you may have to deal with AWS - see above re extra certificate SANs
sudo kubeadm alpha certs renew apiserver
sudo kubeadm alpha certs renew apiserver-etcd-client
sudo kubeadm alpha certs renew apiserver-kubelet-client
sudo kubeadm alpha certs renew front-proxy-client

# Generate new kube-configs with embedded certificates - Again you may need extra AWS specific content - see above
sudo kubeadm alpha kubeconfig user --org system:masters --client-name kubernetes-admin  > admin.conf
sudo kubeadm alpha kubeconfig user --client-name system:kube-controller-manager > controller-manager.conf
sudo kubeadm alpha kubeconfig user --org system:nodes --client-name system:node:$(hostname) > kubelet.conf
sudo kubeadm alpha kubeconfig user --client-name system:kube-scheduler > scheduler.conf

# chown and chmod so they match existing files
sudo chown root:root {admin,controller-manager,kubelet,scheduler}.conf
sudo chmod 600 {admin,controller-manager,kubelet,scheduler}.conf

# Move to replace existing kubeconfigs
sudo mv admin.conf /etc/kubernetes/
sudo mv controller-manager.conf /etc/kubernetes/
sudo mv kubelet.conf /etc/kubernetes/
sudo mv scheduler.conf /etc/kubernetes/

# Restart the master components
sudo kill -s SIGHUP $(pidof kube-apiserver)
sudo kill -s SIGHUP $(pidof kube-controller-manager)
sudo kill -s SIGHUP $(pidof kube-scheduler)

# Verify master component certificates - should all be 1 year in the future
# Cert from api-server
echo -n | openssl s_client -connect localhost:6443 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not
# Cert from controller manager
echo -n | openssl s_client -connect localhost:10257 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not
# Cert from scheduler
echo -n | openssl s_client -connect localhost:10259 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not

# Generate kubelet.conf
sudo kubeadm alpha kubeconfig user --org system:nodes --client-name system:node:$(hostname) > kubelet.conf
sudo chown root:root kubelet.conf
sudo chmod 600 kubelet.conf

# Drain
kubectl drain --ignore-daemonsets $(hostname)
# Stop kubelet
sudo systemctl stop kubelet
# Delete files
sudo rm /var/lib/kubelet/pki/*
# Copy file
sudo mv kubelet.conf /etc/kubernetes/
# Restart
sudo systemctl start kubelet
# Uncordon
kubectl uncordon $(hostname)

# Check kubelet
echo -n | openssl s_client -connect localhost:10250 2>&1 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | openssl x509 -text -noout | grep Not

'나는 노동자 > KUBERNETES' 카테고리의 다른 글

인증서 기간 연장하기  (0) 2021.11.25
인증서 갱신 - 전통적인 방법  (0) 2021.11.24
계속 꺼지는 etcd 컨테이너 etcd 용량 줄이기  (0) 2021.11.21
minikube etcd 조각 모음 defrag  (0) 2021.11.21
metallb  (0) 2021.10.27

계속 꺼지는 etcd 컨테이너들

위 문제를 해결하기 위해서는 etcdctl 명령을 이용해서 etcd클러스터 구성원들의 과도한 키 스페이스 데이터들을 제거하고, 데이터베이스 조각모음을 수행해서 quota 범위 내로 크기를 되돌리는 과정이 필요합니다. 하지만, etcd 컨테이너들이 2~3분에 한번씩 죽어대는 바람에 제대로 작업을 진행하기가 불가능했습니다.

컨테이너가 계속 꺼지고 켜지기를 반복하는 이유는 컨테이너에 livenessProbe 설정이 세팅되어 있어서 etcd 컨테이너가 정상동작하지 않으면 healthcheck에 실패한 것으로 보고 컨테이너를 계속 재기동 하기 때문이었습니다. 우선 이 현상을 해결하기 위해서 etcd pod에 세팅되어 있는 livenessProbe 설정을 제거해 주기로 합니다. etcd는 kubernetes를 구성하는 핵심 구성요소 중 하나이기 때문에 /etc/kubernetes/manifests/ 디렉토리에 pod 구성정보가 존재합니다. 찾아서 수정해 줍니다.

 
# /etc/kubernetes/manifests/etcd.yaml
 
 
 
apiVersion: v1
 
kind: Pod
 
metadata:
 
creationTimestamp: null
 
labels:
 
component: etcd
 
tier: control-plane
 
name: etcd
 
namespace: kube-system
 
spec:
 
containers:
 
- command:
 
- etcd
 
- --advertise-client-urls=https://192.168.0.220:2379
 
- --cert-file=/etc/kubernetes/pki/etcd/server.crt
 
- --client-cert-auth=true
 
- --data-dir=/var/lib/etcd
 
- --election-timeout=5000
 
- --heartbeat-interval=250
 
- --initial-advertise-peer-urls=https://192.168.0.220:2380
 
- --initial-cluster=k8s-master1=https://192.168.0.220:2380
 
- --key-file=/etc/kubernetes/pki/etcd/server.key
 
- --listen-client-urls=https://127.0.0.1:2379,https://192.168.0.220:2379
 
- --listen-metrics-urls=http://127.0.0.1:2381
 
- --listen-peer-urls=https://192.168.0.220:2380
 
- --name=k8s-master1
 
- --peer-cert-file=/etc/kubernetes/pki/etcd/peer.crt
 
- --peer-client-cert-auth=true
 
- --peer-key-file=/etc/kubernetes/pki/etcd/peer.key
 
- --peer-trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
 
- --snapshot-count=10000
 
- --trusted-ca-file=/etc/kubernetes/pki/etcd/ca.crt
 
image: k8s.gcr.io/etcd:3.3.15-0
 
imagePullPolicy: IfNotPresent
 
# 컨테이너가 꺼지는 현상을 방지하기 위해 주석처리 해줍니다.
 
# livenessProbe:
 
# failureThreshold: 8
 
# httpGet:
 
# host: 127.0.0.1
 
# path: /health
 
# port: 2381
 
# scheme: HTTP
 
# initialDelaySeconds: 15
 
# timeoutSeconds: 15
 
name: etcd
 
resources: {}
 
volumeMounts:
 
- mountPath: /var/lib/etcd
 
name: etcd-data
 
- mountPath: /etc/kubernetes/pki/etcd
 
name: etcd-certs
 
hostNetwork: true
 
priorityClassName: system-cluster-critical
 
volumes:
 
- hostPath:
 
path: /etc/kubernetes/pki/etcd
 
type: DirectoryOrCreate
 
name: etcd-certs
 
- hostPath:
 
path: /var/lib/etcd
 
type: DirectoryOrCreate
 
name: etcd-data
 
status: {}

위와같은 주석처리를 모든 master 노드의 해당 경로에 존재한 yaml파일에 작업해주면, etcd 컨테이너가 죽지 않게 됩니다.

etcdctl 명령어

konvoy로 설치한 kubernetes에서 etcdctl 명령어를 사용하기 위해서 가장 정석적인 방법은 etcd 컨테이너 내에 접속해서 etcd 명령어를 사용하는 것이지만, 본인은 귀찮아서 그냥 컨테이너 밖에서 찾아서 사용해보기로 했습니다. (당연히 etcd컨테이너가 동작중인 master노드에서 해야합니다.)

 
#bash
 
find / -type f -name etcdctl 2>/dev/null
 
#출력예시
 
[root@k8s-master1 manifests]# find / -type f -name etcdctl 2>/dev/null
 
/run/containerd/io.containerd.runtime.v1.linux/k8s.io/4fc80ceb99dfc0dca39e726d95104f5e424c53e618fd71d201b9b8b9c75a6d5d/rootfs/usr/local/bin/etcdctl
 
/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/9/fs/usr/local/bin/etcdctl

둘중에 아무거나 선택해서 alias를 걸어서 사용하도록 합니다. alias를 걸어줄때 etcdctl 명령어로 클러스터와 통신할 때 사용하기 위한 인증서등을 함께 세팅해서 걸어줍니다.

 
#bash
 
 
 
alias etcdctl="\
 
ETCDCTL_API=3 \
 
/var/lib/containerd/io.containerd.snapshotter.v1.overlayfs/snapshots/9/fs/usr/local/bin/etcdctl \
 
--cacert='/etc/kubernetes/pki/etcd/ca.crt' \
 
--cert='/etc/kubernetes/pki/etcd/server.crt' \
 
--key='/etc/kubernetes/pki/etcd/server.key' "

테스트

 
#bash
 
 
 
etcdctl member list

문제 해결

우선, 알람이 설정되어 있는 목록과 현재 클러스터 상태를 확인합니다.

 
#bash
 
 
 
etcdctl alarm list
 
etcdctl -w table endpoint status --cluster

etcd용량을 다이어트 해봅니다. 현재 상태를 제외한 나머지 오래된 revision들을 제거하기 위해 current revision 값을 가져옵니다.

 
#bash
 
c_revision=$(etcdctl endpoint status --write-out="json" | egrep -o '"revision":[0-9]*' | egrep -o '[0-9].*')
 
echo ${c_revision}

오래된 revision들을 날립니다.

 
#bash
 
etcdctl --endpoints=$(etcdctl member list | cut -d, -f5 | sed -e 's/ //g' | paste -sd ',') compact $c_revision

조각모음을 합니다. 본인의 경우에는 이 작업에서 용량이 드라마틱하게 줄어들었습니다.

 
#bash
 
etcdctl --endpoints=$(etcdctl member list | cut -d, -f5 | sed -e 's/ //g' | paste -sd ',') defrag

클러스터 상태를 확인합니다.

 
#bash
 
etcdctl -w table endpoint status --cluster
 
#출력결과
 
+----------------------------+------------------+---------+---------+-----------+-----------+------------+
 
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
 
+----------------------------+------------------+---------+---------+-----------+-----------+------------+
 
| https://192.168.0.221:2379 | 1806ccfb80e73faf | 3.3.15 | 7.8 MB | false | 602 | 66877835 |
 
| https://192.168.0.222:2379 | e7c82e12168d0897 | 3.3.15 | 7.8 MB | false | 602 | 66877835 |
 
| https://192.168.0.220:2379 | edabb0b65fe02a4c | 3.3.15 | 7.8 MB | true | 602 | 66877835 |
 
+----------------------------+------------------+---------+---------+-----------+-----------+------------+

경보를 해제하고 확인합니다.

 
#bash
 
etcdctl alarm disarm
 
etcdctl alarm list

'나는 노동자 > KUBERNETES' 카테고리의 다른 글

인증서 갱신 - 전통적인 방법  (0) 2021.11.24
kubelet.conf certification 기간 확인  (0) 2021.11.24
minikube etcd 조각 모음 defrag  (0) 2021.11.21
metallb  (0) 2021.10.27
etcd 설치 - 간략문서  (0) 2019.09.19

+ Recent posts