# 添加 repo helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo update # Install chart helm install [RELEASE_NAME] prometheus-community/prometheus-blackbox-exporterBlackbox 基本配置
modules: http_2xx: prober: http timeout: 15s http: fail_if_not_ssl: true ip_protocol_fallback: false method: GET follow_redirects: true preferred_ip_protocol: ip4 valid_http_versions: - HTTP/1.1 - HTTP/2.0 valid_status_codes: - 200 - 204你可以相应地配置你自己的blackbox.yml,使探针 (probe) 根据你的配置返回成功/失败。以上面配置为例,详细说明下 module 和 http probe 的配置:
scrape_configs: - job_name: blackbox-exporter params: module: - http_2xx scrape_interval: 1m scrape_timeout: 10s metrics_path: /probe scheme: http relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: prometheus-blackbox-exporter.monitoring:9115 action: replace static_configs: - targets: - https://ewhisper.cn - https://www.ewhisper.cn - https://rancher.ewhisper.cn labels: domain: ewhisper environment: test cluster: home-k3s这样直接改 Prometheus 的配置是比较容易出错的,如果你已经安装了 Prometheus Operator, 则可以直接通过 probe CRD 来配置,非常方便:
apiVersion: monitoring.coreos.com/v1 kind: Probe metadata: name: ewhisper namespace: monitoring spec: jobName: http-get interval: 60s module: http_2xx prober: url: prometheus-blackbox-exporter.monitoring:9115 scheme: http path: /probe targets: staticConfig: static: - targets: - https://ewhisper.cn - https://www.ewhisper.cn - https://rancher.ewhisper.cn labels: domain: ewhisper environment: test cluster: home-k3sBlackbox Exporter 探测场景
kubernetes_sd_configs: - role: service metrics_path: /probe params: module: - http_2xx relabel_configs: - action: keep regex: true source_labels: - __meta_kubernetes_service_annotation_prometheus_io_probe - source_labels: - __address__ target_label: __param_target - replacement: prometheus-blackbox-exporter.monitoring:9115 target_label: __address__ - source_labels: - __param_target target_label: instance - action: labelmap regex: __meta_kubernetes_service_label_(.+) - source_labels: - __meta_kubernetes_namespace target_label: kubernetes_namespace - source_labels: - __meta_kubernetes_service_name target_label: kubernetes_name这里我们可以使用[__meta_kubernetes_service_annotation_prometheus_io_probe]来只检查那些有prometheus.io/probe = true注释的服务,示例如下:
➜ kubectl describe svc nginx ... Annotations: prometheus.io/probe: true ...场景三:探测 K8S 集群内部 Ingress
- job_name: "blackbox-kubernetes-ingresses" metrics_path: /probe params: module: [http_2xx] kubernetes_sd_configs: - role: ingress relabel_configs: # 示例重新标记,只探测有 "prometheus.io/probe = true"注释的一些接入点。 # - source_labels: [__meta_kubernetes_ingess_annotation_prometheus_io_probe] # action: keep # regex: true - source_labels: [ __meta_kubernetes_ingress_scheme, __address__, __meta_kubernetes_ingress_path, ] regex: (.+);(.+);(.+) replacement: ${1}://${2}${3} target_label: __param_target - target_label: __address__ replacement: prometheus-blackbox-exporter.monitoring:9115 - source_labels: [__param_target] target_label: instance - action: labelmap regex: __meta_kubernetes_ingress_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_ingress_name] target_label: ingress_name场景四:探测 K8S 集群内部 Pod
- job_name: "blackbox-kubernetes-pods" metrics_path: /probe params: module: [http_2xx] kubernetes_sd_configs: - role: pod relabel_configs: # 示例重新标记,只探测有 # "prometheus.io/probe = true"注释的 pod。 # - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_probe] # action: keep # regex: true - source_labels: [__address__] target_label: __param_target - target_label: __address__ replacement: prometheus-blackbox-exporter.monitoring:9115 - source_labels: [__param_target] replacement: ${1}/health target_label: instance - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_pod_name] target_label: kubernetes_pod_name在 Prometheus 验证生成的指标
。
指标名 | 功能 |
---|---|
probe_duration_seconds | 返回探针完成的时间(秒)。 |
probe_http_status_code | 响应 HTTP 状态代码 |
probe_http_version | 返回探针响应的 HTTP 版本 |
probe_success | 显示探测是否成功 |
probe_dns_lookup_time_seconds | 返回探测 DNS 的时间,单位是秒。 |
probe_ip_protocol | 指定探针 IP 协议是 IP4 还是 IP6 |
probe_ssl_earliest_cert_expiry metric | 返回以 unixtime 为单位的最早的 SSL 证书到期时间 |
probe_tls_version_info | 包含所使用的 TLS 版本 |
probe_failed_due_to_regex | 表示探测是否因 regex 匹配而失败 |
probe_http_content_length | HTTP 内容响应的长度 |