k8s学习(二十五) 使用prometheus监控集群节点

mac2022-11-25  5

1、 获取node-exporter

已经打好在”用到的离线包/node-exporter.tar” 在有网机器下载镜像

docker pull prom/node-exporter:v0.16.0 docker save -o node-exporter.tar prom/node-exporter:v0.16.0

将tar包拷贝至有网的机器

2、 将node-exporter放入离线镜像服务器

docker load -i node-exporter.tar docker tag prom/ node-exporter:v0.16.0 192.168.100.94:80/node-exporter:v0.16.0 docker push 192.168.100.94:80/node-exporter:v0.16.0

3、 使用DemonSet部署node-exporter

apiVersion: extensions/v1beta1 kind: DaemonSet metadata: name: node-exporter namespace: kube-ops labels: name: node-exporter spec: template: metadata: labels: name: node-exporter spec: hostPID: true hostIPC: true hostNetwork: true containers: - name: node-exporter image: 192.168.100.94:80/node-exporter:v0.16.0 ports: - containerPort: 9100 resources: requests: cpu: 0.15 securityContext: privileged: true args: - --path.procfs - /host/proc - --path.sysfs - /host/sys - --collector.filesystem.ignored-mount-points - '"^/(sys|proc|dev|host|etc)($|/)"' volumeMounts: - name: dev mountPath: /host/dev - name: proc mountPath: /host/proc - name: sys mountPath: /host/sys - name: rootfs mountPath: /rootfs tolerations: - key: "node-role.kubernetes.io/master" operator: "Exists" effect: "NoSchedule" volumes: - name: proc hostPath: path: /proc - name: dev hostPath: path: /dev - name: sys hostPath: path: /sys - name: rootfs hostPath: path: / kubectl apply -f prome-node-exporter.yaml [root@master node-exporter]# kubectl get pods -n kube-ops -owide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES node-exporter-kg427 1/1 Running 0 17s 192.168.100.94 master <none> <none> node-exporter-xk6vm 1/1 Running 0 17s 192.168.100.95 node2 <none> <none> node-exporter-z6bpr 1/1 Running 0 17s 192.168.100.96 node3 <none> <none> prometheus-5745dbdc87-rjnxj 1/1 Running 1 5d4h 10.244.2.30 node3 <none> <none> redis-55d48df69d-xzsvt 2/2 Running 2 5d3h 10.244.1.28 node2 <none> <none>

4、 部署服务发现

修改prometheus-cm.yaml

apiVersion: v1 kind: ConfigMap metadata: name: prometheus-config namespace: kube-ops data: prometheus.yml: | global: scrape_interval: 15s scrape_timeout: 15s scrape_configs: - job_name: 'prometheus' static_configs: - targets: ['localhost:9090'] - job_name: 'traefik' static_configs: - targets: ['traefik-service.kube-system.svc.cluster.local:8080'] - job_name: 'redis' static_configs: - targets: ['redis:9121'] - job_name: 'kubernetes-nodes' kubernetes_sd_configs: - role: node relabel_configs: - source_labels: [__address__] regex: '(.*):10250' replacement: '${1}:9100' target_label: __address__ action: replace - action: labelmap regex: __meta_kubernetes_node_label_(.+) # - job_name: 'kubernetes-kubelet' # kubernetes_sd_configs: # - role: node # scheme: https #tls_config: # ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt #insecure_skip_verify: true #bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token #relabel_configs: #- action: labelmap #regex: __meta_kubernetes_node_label_(.+) #- target_label: __address__ #replacement: kubernetes.default.svc:443 #- source_labels: [__meta_kubernetes_node_name] #regex: (.+) #target_label: __metrics_path__ #replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor kubectl delete -f prometheus-cm.yaml kubectl apply –f Prometheus-cm.yaml [root@master prometheus]# kubectl get services -n kube-ops NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE prometheus NodePort 10.105.89.6 <none> 9090:30229/TCP 5d4h redis ClusterIP 10.100.232.96 <none> 6379/TCP,9121/TCP 5d4h [root@master prometheus]# curl -X POST http://10.105.89.6:9090/-/reload

访问http://192.168.100.94:30229/targets

使用检索条件

sum by (pod_name)(rate(container_cpu_usage_seconds_total{image!="", pod_name!=""}[1m] ))

最新回复(0)