如何正确配置 Grafana Tempo?

How configure properly Grafana Tempo?

我尝试使用 Grafana Tempo 进行分布式跟踪。

我从 docker-compose 启动它:

version: "3.9"
services:

  # MY MICROSERVICES
  ...

  prometheus:
    image: prom/prometheus
    ports:
      - ${PROMETHEUS_EXTERNAL_PORT}:9090
    volumes:
      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:cached

  promtail:
    image: grafana/promtail
    volumes:
      - ./log:/var/log
      - ./promtail/:/mnt/config
    command: -config.file=/mnt/config/promtail-config.yaml

  loki:
    image: grafana/loki
    command: -config.file=/etc/loki/local-config.yaml

  tempo:
    image: grafana/tempo
    command: [ "-config.file=/etc/tempo.yaml" ]
    volumes:
      - ./tempo/tempo-local.yaml:/etc/tempo.yaml
      # - ./tempo-data/:/tmp/tempo
    ports:
      - "14268"  # jaeger ingest
      - "3200"   # tempo
      - "55680"  # otlp grpc
      - "55681"  # otlp http
      - "9411"   # zipkin

  tempo-query:
    image: grafana/tempo-query
    command: [ "--grpc-storage-plugin.configuration-file=/etc/tempo-query.yaml" ]
    volumes:
      - ./tempo/tempo-query.yaml:/etc/tempo-query.yaml
    ports:
      - "16686:16686"  # jaeger-ui
    depends_on:
      - tempo

  grafana:
    image: grafana/grafana
    volumes:
      - ./grafana/datasource-config/:/etc/grafana/provisioning/datasources:cached
      - ./grafana/dashboards/prometheus.json:/var/lib/grafana/dashboards/prometheus.json:cached
      - ./grafana/dashboards/loki.json:/var/lib/grafana/dashboards/loki.json:cached
      - ./grafana/dashboards-config/:/etc/grafana/provisioning/dashboards:cached
    ports:
      - ${GRAFANA_EXTERNAL_PORT}:3000
    environment:
      - GF_AUTH_ANONYMOUS_ENABLED=true
      - GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
      - GF_AUTH_DISABLE_LOGIN_FORM=true
    depends_on:
      - prometheus
      - loki

tempo-local.yaml:

server:
  http_listen_port: 3200

distributor:
  receivers:                           # this configuration will listen on all ports and protocols that tempo is capable of.
    jaeger:                            # the receives all come from the OpenTelemetry collector.  more configuration information can
      protocols:                       # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver
        thrift_http:                   #
        grpc:                          # for a production deployment you should only enable the receivers you need!
        thrift_binary:
        thrift_compact:
    zipkin:
    otlp:
      protocols:
        http:
        grpc:
    opencensus:

ingester:
  trace_idle_period: 10s               # the length of time after a trace has not received spans to consider it complete and flush it
  max_block_bytes: 1_000_000           # cut the head block when it hits this size or ...
  max_block_duration: 5m               #   this much time passes

compactor:
  compaction:
    compaction_window: 1h              # blocks in this time window will be compacted together
    max_block_bytes: 100_000_000       # maximum size of compacted blocks
    block_retention: 1h
    compacted_block_retention: 10m

storage:
  trace:
    backend: local                     # backend configuration to use
    block:
      bloom_filter_false_positive: .05 # bloom filter false positive rate.  lower values create larger filters but fewer false positives
      index_downsample_bytes: 1000     # number of bytes per index record
      encoding: zstd                   # block encoding/compression.  options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2
    wal:
      path: /tmp/tempo/wal             # where to store the the wal locally
      encoding: snappy                 # wal encoding/compression.  options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2
    local:
      path: /tmp/tempo/blocks
    pool:
      max_workers: 100                 # worker pool determines the number of parallel requests to the object store backend
      queue_depth: 10000

tempo-query.yaml:

backend: "tempo:3200"

datasource.yml 用于在 grafana 上检测数据源:

apiVersion: 1

deleteDatasources:
  - name: Prometheus
  - name: Tempo
  - name: Loki

datasources:

  - name: Prometheus
    type: prometheus
    access: proxy
    orgId: 1
    url: http://prometheus:9090
    basicAuth: false
    isDefault: false
    version: 1
    editable: false

  - name: Tempo
    type: tempo
    access: proxy
    orgId: 1
    url: http://tempo-query:16686
    basicAuth: false
    isDefault: false
    version: 1
    editable: false
    apiVersion: 1
    uid: tempo

  - name: Tempo-Multitenant
    type: tempo
    access: proxy
    orgId: 1
    url: http://tempo-query:16686
    basicAuth: false
    isDefault: false
    version: 1
    editable: false
    apiVersion: 1
    uid: tempo-authed
    jsonData:
      httpHeaderName1: 'Authorization'
    secureJsonData:
      httpHeaderValue1: 'Bearer foo-bar-baz'

  - name: Loki
    type: loki
    access: proxy
    orgId: 1
    url: http://loki:3100
    basicAuth: false
    isDefault: false
    version: 1
    editable: false
    apiVersion: 1
    jsonData:
      derivedFields:
        - datasourceUid: tempo
          matcherRegex: \[.+,(.+),.+\]
          name: TraceID
          url: $${__value.raw}

但是如果我在grafana中测试数据源,我有这个错误:

在 Loki 视图中,我可以找到用于查看痕迹的 Tempo 按钮...但是我在 Tempo 上看不到它,因为我有一个错误:

无论如何,如果我获取跟踪 ID 并在 Jaeger 上搜索它,我可以正确地看到它。

我在 Tempo 配置中缺少什么?如何正确配置?

Grafana 7.5 及更高版本可以原生与 Tempo 对话,不再需要速度查询代理。我认为这解释了正在发生的事情,Grafana 正试图使用​​ Tempo-native API 来对抗速度查询,这会暴露 Jaeger API。尝试将 datasource.yml 中的 Grafana 数据源更改为 http://tempo:3200