«

K8s 部署 OpenTelemetry v0.41.0(适用于 Kubernetes v1.20 to v1.22)

myluzh 发布于 阅读:350 Kubernetes


0x00 前言

OpenTelemetry Operator:v0.41.0(api-versions:opentelemetry.io/v1alpha1)
Kubernetes:v1.20 to v1.22
Cert-Manager:1.6.1


业务 Pod 的遥测数据流向为:JavaAgent(自动注入) -> Sidecar Collector -> Center Collector -> 观测后端 (Loki/Jaeger)。

graph LR
    %% =======================
    %% 样式定义 (配色优化版)
    %% =======================
    %% Operator: 清新绿
    classDef operator fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px,rx:5,ry:5,color:#1b5e20;
    %% 容器: 纯白底 + 深蓝框
    classDef container fill:#ffffff,stroke:#1565c0,stroke-width:2px,rx:5,ry:5,color:#0d47a1;
    %% OTel组件: 活力橙
    classDef otel fill:#fff3e0,stroke:#ef6c00,stroke-width:2px,rx:5,ry:5,color:#e65100;
    %% 后端存储: ⚠️ 修改处 - 改为清爽的浅靛蓝,不再用黑色
    classDef backend fill:#e8eaf6,stroke:#3f51b5,stroke-width:2px,shape:cylinder,color:#1a237e;

    %% =======================
    %% 1. 控制平面
    %% =======================
    subgraph ControlPlane [🎮 Control Plane]
        direction TB
        style ControlPlane fill:#f5f5f5,stroke:#bdbdbd,stroke-dasharray: 3 3,color:#616161

        Op(OTel Operator<br/>v0.41.0):::operator
        Cert(Cert-Manager<br/>v1.6.1):::operator

        CRD_Inst(CRD: Instrumentation<br/>自动注入配置):::operator
        CRD_Side(CRD: OTelCol Sidecar<br/>Sidecar 配置):::operator

        Cert -.->|提供证书| Op
    end

    %% =======================
    %% 2. 业务命名空间
    %% =======================
    subgraph NS_Biz [Namespace: xfsh 业务]
        direction TB
        style NS_Biz fill:#e3f2fd,stroke:#64b5f6,color:#1565c0

        subgraph Pod [Business Pod]
            direction LR
            style Pod fill:#ffffff,stroke:#2196f3,stroke-dasharray: 5 5,color:#1565c0

            %% 业务容器
            subgraph JavaCont [Java Container]
                direction TB
                style JavaCont fill:none,stroke:none
                App(Java App):::container
                Agent(OTel JavaAgent<br/>Auto-injected):::otel
                App --- Agent
            end

            %% Sidecar 容器
            Sidecar(Sidecar Collector<br/>Mode: Sidecar):::otel
        end
    end

    %% =======================
    %% 3. OTel 命名空间
    %% =======================
    subgraph NS_Otel [Namespace: opentelemetry]
        direction TB
        style NS_Otel fill:#fff8e1,stroke:#ffb74d,color:#ef6c00

        Center(Center Collector<br/>Mode: Deployment):::otel
    end

    %% =======================
    %% 4. 后端存储
    %% =======================
    subgraph Backends [🔭 Observability Backends]
        direction TB
        style Backends fill:#f3e5f5,stroke:#9575cd,color:#512da8

        Loki[(LokiLogs)]:::backend
        Jaeger[(JaegerTraces)]:::backend
        Prom[(PrometheusMetrics)]:::backend
    end

    %% =======================
    %% 连线关系
    %% =======================

    %% Operator 指向具体的 Sidecar 节点
    Op -.->|监听 & 注入| Sidecar
    Op -.->|监听 & 注入| Agent

    %% 1. Pod 内部通信
    Agent ==>|OTLP HTTP<br/>localhost:4318| Sidecar

    %% 2. 跨命名空间通信
    Sidecar ==>|OTLP gRPC<br/>batch send| Center

    %% 3. Center 发送给后端
    Center ==>|otlphttp/loki| Loki
    Center ==>|otlp/jaeger| Jaeger
    Prom -.->|Scrape :8889| Center

    %% 关联配置
    CRD_Inst -.-> Agent
    CRD_Side -.-> Sidecar

    %% =======================
    %% 连线样式
    %% =======================
    linkStyle 2 stroke:#ef6c00,stroke-width:3px;
    linkStyle 3 stroke:#ef6c00,stroke-width:3px;
    linkStyle 4,5,6 stroke:#7986cb,stroke-width:2px;

0x01 安装 opentelemetry operator

安装cert-manager

# 安装cert-manager
root@iZbp12bkuvg20e1j3y9gtxZ:~/k8s-yaml/opentelemetry# wget -O cert-manager-v1.6.1.yaml https://github.com/cert-manager/cert-manager/releases/download/v1.6.1/cert-manager.yaml
root@iZbp12bkuvg20e1j3y9gtxZ:~/k8s-yaml/opentelemetry# kubectl apply -f cert-manager-v1.6.1.yaml
root@iZbp12bkuvg20e1j3y9gtxZ:~/k8s-yaml/opentelemetry# kubectl get pod -n cert-manager  
NAME                                      READY   STATUS    RESTARTS   AGE
cert-manager-55658cdf68-4crgm             1/1     Running   0          24s
cert-manager-cainjector-967788869-dwlb2   1/1     Running   0          24s
cert-manager-webhook-7b86bc6578-l6xr4     1/1     Running   0          24s

安装opentelemetry-operator

# 创建命名空间
root@iZbp12bkuvg20e1j3y9gtxZ:~# kubectl create ns opentelemetry
# 安装opentelemetry-operator
root@iZbp12bkuvg20e1j3y9gtxZ:~/k8s-yaml/opentelemetry# wget -O opentelemetry-operator-v0.41.0.yaml https://github.com/open-telemetry/opentelemetry-operator/releases/download/v0.41.0/opentelemetry-operator.yaml
root@iZbp12bkuvg20e1j3y9gtxZ:~/k8s-yaml/opentelemetry# kubectl apply -f opentelemetry-operator-v0.41.0.yaml 

0x02 部署Collector

部署center

在 loki3.0 之前的版本,使用lokiexporter导出,参考文档:https://github.com/open-telemetry/opentelemetry-collector-contrib/tree/main/exporter/lokiexporter
但是 3.0 后的版本已弃用 lokiExporter,需要使用 otlphttp 导出,参考文档:https://grafana.com/docs/loki/latest/send-data/otel/

apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
metadata:
  name: center
  namespace: opentelemetry
spec:
  mode: deployment
  image: registry.sxhlcloud.com:5443/base/otel/opentelemetry-collector-contrib:0.44.0  # contrib 镜像支持的更多
  config: |
    receivers:
      otlp:
        protocols:
          grpc:
            endpoint: 0.0.0.0:4317 # 启用 gRPC
          http:
            endpoint: 0.0.0.0:4318 # 启动 HTTP

    processors: # 用于处理收集到的数据
      batch: {}  # 分批发送提高效率

    exporters:
      # 1. 调试用 (生产环境可设为 info 或关掉)
      logging:
        loglevel: debug

      # 2. 链路 -> Jaeger
      otlp/jaeger:
        endpoint: "jaeger-collector.opentelemetry.svc.cluster.local:4317" 
        tls:
          insecure: true # 跳过证书验证

      # 3. 日志 -> Loki
      #loki:
      #  endpoint: "http://loki-gateway.loki.svc.cluster.local/loki/api/v1/push"
      otlphttp/loki:
        endpoint: "http://loki-gateway.loki.svc.cluster.local/otlp/v1/logs"
        tls:
          insecure: true

      # 4. 指标 -> Prometheus (暴露端点等待拉取),需要在 Center Collector 的 Service (k8s service) 中暴露 8889 端口,否则 Prometheus Server 抓不到数据。
      prometheus:
        endpoint: "0.0.0.0:8889"

    service:
      pipelines:
        # 链路:发给 Jaeger
        #traces:
        #  receivers: [otlp]
        #  processors: [batch]
        #  exporters: [logging, otlp/jaeger] 

        # 指标:暴露给 Prometheus
        #metrics:
        #  receivers: [otlp]
        #  processors: [batch]
        #  exporters: [logging, prometheus]

        # 日志:发给 Loki
        logs:
          receivers: [otlp]
          processors: [batch] # 批处理.提高效率
          exporters: [logging, otlphttp/loki]

部署sidecar

这个版本的OpenTelemetry Operator,Sidecar 模式的 Collector 必须与其监控的应用程序部署在同一命名空间中。

apiVersion: opentelemetry.io/v1alpha1
kind: OpenTelemetryCollector
metadata:
  name: sidecar
  namespace: xfsh # 业务应用所在的命名空间
spec:
  mode: sidecar
  image: registry.sxhlcloud.com:5443/base/otel/opentelemetry-collector-contrib:0.44.0
  config: |
    receivers:
      otlp:
        protocols:
          grpc:
            endpoint: 0.0.0.0:4317
          http:
            endpoint: 0.0.0.0:4318

    processors:
      batch:
        timeout: 1s
        send_batch_size: 100

    exporters:
      otlp:
        # 指向 Center Collector
        endpoint: "http://center-collector.opentelemetry.svc.cluster.local:4317"
        tls:
          insecure: true

    service:
      pipelines:
        # 1. 链路
        #traces:
        #  receivers: [otlp]
        #  processors: [batch]
        #  exporters: [otlp]
        # 2. 日志
        logs:
          receivers: [otlp]
          processors: [batch]
          exporters: [otlp]
        # 3. 指标
        #metrics:
        #  receivers: [otlp]
        #  processors: [batch]
        #  exporters: [otlp]

0x03 自动埋点

编写 Instrumentation

Instrumentation也需要与其监控的应用程序部署在同一命名空间中。
由于 Operator v0.41.0 对协议参数支持有限,在Instrumentation不能显示指定通过grpc传输,我一开始尝试使用gRPC 4317,agent还是通过http传输,然后报错。为了避免错误,所以使用http4318,把数据给到sidecar。

apiVersion: opentelemetry.io/v1alpha1
kind: Instrumentation
metadata:
  name: xfsh-instrumentation
  namespace: xfsh # 业务应用所在的命名空间
spec:
  exporter:
    endpoint: http://localhost:4318

  propagators:
    - tracecontext
    - baggage
    - b3

  sampler:
    type: parentbased_traceidratio
    argument: "1"

  java:
    image: registry.sxhlcloud.com:5443/base/otel/autoinstrumentation-java:2.23.0

设置应用自动埋点

1、添加注解
可以直接在需要的deployment或者整个ns 添加注解sidecar.opentelemetry.io/inject: "true"

metadata:
  annotations:
    sidecar.opentelemetry.io/inject: true # 注入Sidecar容器
    instrumentation.opentelemetry.io/inject-java: xfsh-instrumentation # 上面创建 instrumentation 的名称,向业务容器注入定制化的Agent。

2、重新部署
完成后,把旧的pod删掉,新起来的pod就会自带sidecar,自动注入agent。

kubernetes OpenTelemetry 观测


正文到此结束
版权声明:若无特殊注明,本文皆为 Myluzh Blog 原创,转载请保留文章出处。
文章内容:https://itho.cn/k8s/561.html
文章标题:《K8s 部署 OpenTelemetry v0.41.0(适用于 Kubernetes v1.20 to v1.22)