如何正确配置 Grafana Tempo?
How configure properly Grafana Tempo?
我尝试使用 Grafana Tempo 进行分布式跟踪。
我从 docker-compose 启动它:
version: "3.9"
services:
# MY MICROSERVICES
...
prometheus:
image: prom/prometheus
ports:
- ${PROMETHEUS_EXTERNAL_PORT}:9090
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:cached
promtail:
image: grafana/promtail
volumes:
- ./log:/var/log
- ./promtail/:/mnt/config
command: -config.file=/mnt/config/promtail-config.yaml
loki:
image: grafana/loki
command: -config.file=/etc/loki/local-config.yaml
tempo:
image: grafana/tempo
command: [ "-config.file=/etc/tempo.yaml" ]
volumes:
- ./tempo/tempo-local.yaml:/etc/tempo.yaml
# - ./tempo-data/:/tmp/tempo
ports:
- "14268" # jaeger ingest
- "3200" # tempo
- "55680" # otlp grpc
- "55681" # otlp http
- "9411" # zipkin
tempo-query:
image: grafana/tempo-query
command: [ "--grpc-storage-plugin.configuration-file=/etc/tempo-query.yaml" ]
volumes:
- ./tempo/tempo-query.yaml:/etc/tempo-query.yaml
ports:
- "16686:16686" # jaeger-ui
depends_on:
- tempo
grafana:
image: grafana/grafana
volumes:
- ./grafana/datasource-config/:/etc/grafana/provisioning/datasources:cached
- ./grafana/dashboards/prometheus.json:/var/lib/grafana/dashboards/prometheus.json:cached
- ./grafana/dashboards/loki.json:/var/lib/grafana/dashboards/loki.json:cached
- ./grafana/dashboards-config/:/etc/grafana/provisioning/dashboards:cached
ports:
- ${GRAFANA_EXTERNAL_PORT}:3000
environment:
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
- GF_AUTH_DISABLE_LOGIN_FORM=true
depends_on:
- prometheus
- loki
与 tempo-local.yaml
:
server:
http_listen_port: 3200
distributor:
receivers: # this configuration will listen on all ports and protocols that tempo is capable of.
jaeger: # the receives all come from the OpenTelemetry collector. more configuration information can
protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver
thrift_http: #
grpc: # for a production deployment you should only enable the receivers you need!
thrift_binary:
thrift_compact:
zipkin:
otlp:
protocols:
http:
grpc:
opencensus:
ingester:
trace_idle_period: 10s # the length of time after a trace has not received spans to consider it complete and flush it
max_block_bytes: 1_000_000 # cut the head block when it hits this size or ...
max_block_duration: 5m # this much time passes
compactor:
compaction:
compaction_window: 1h # blocks in this time window will be compacted together
max_block_bytes: 100_000_000 # maximum size of compacted blocks
block_retention: 1h
compacted_block_retention: 10m
storage:
trace:
backend: local # backend configuration to use
block:
bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives
index_downsample_bytes: 1000 # number of bytes per index record
encoding: zstd # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2
wal:
path: /tmp/tempo/wal # where to store the the wal locally
encoding: snappy # wal encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2
local:
path: /tmp/tempo/blocks
pool:
max_workers: 100 # worker pool determines the number of parallel requests to the object store backend
queue_depth: 10000
tempo-query.yaml
:
backend: "tempo:3200"
和datasource.yml
用于在 grafana 上检测数据源:
apiVersion: 1
deleteDatasources:
- name: Prometheus
- name: Tempo
- name: Loki
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: false
version: 1
editable: false
- name: Tempo
type: tempo
access: proxy
orgId: 1
url: http://tempo-query:16686
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
uid: tempo
- name: Tempo-Multitenant
type: tempo
access: proxy
orgId: 1
url: http://tempo-query:16686
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
uid: tempo-authed
jsonData:
httpHeaderName1: 'Authorization'
secureJsonData:
httpHeaderValue1: 'Bearer foo-bar-baz'
- name: Loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
jsonData:
derivedFields:
- datasourceUid: tempo
matcherRegex: \[.+,(.+),.+\]
name: TraceID
url: $${__value.raw}
但是如果我在grafana中测试数据源,我有这个错误:
在 Loki 视图中,我可以找到用于查看痕迹的 Tempo 按钮...但是我在 Tempo 上看不到它,因为我有一个错误:
无论如何,如果我获取跟踪 ID 并在 Jaeger 上搜索它,我可以正确地看到它。
我在 Tempo 配置中缺少什么?如何正确配置?
Grafana 7.5 及更高版本可以原生与 Tempo 对话,不再需要速度查询代理。我认为这解释了正在发生的事情,Grafana 正试图使用 Tempo-native API 来对抗速度查询,这会暴露 Jaeger API。尝试将 datasource.yml 中的 Grafana 数据源更改为 http://tempo:3200
。
我尝试使用 Grafana Tempo 进行分布式跟踪。
我从 docker-compose 启动它:
version: "3.9"
services:
# MY MICROSERVICES
...
prometheus:
image: prom/prometheus
ports:
- ${PROMETHEUS_EXTERNAL_PORT}:9090
volumes:
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:cached
promtail:
image: grafana/promtail
volumes:
- ./log:/var/log
- ./promtail/:/mnt/config
command: -config.file=/mnt/config/promtail-config.yaml
loki:
image: grafana/loki
command: -config.file=/etc/loki/local-config.yaml
tempo:
image: grafana/tempo
command: [ "-config.file=/etc/tempo.yaml" ]
volumes:
- ./tempo/tempo-local.yaml:/etc/tempo.yaml
# - ./tempo-data/:/tmp/tempo
ports:
- "14268" # jaeger ingest
- "3200" # tempo
- "55680" # otlp grpc
- "55681" # otlp http
- "9411" # zipkin
tempo-query:
image: grafana/tempo-query
command: [ "--grpc-storage-plugin.configuration-file=/etc/tempo-query.yaml" ]
volumes:
- ./tempo/tempo-query.yaml:/etc/tempo-query.yaml
ports:
- "16686:16686" # jaeger-ui
depends_on:
- tempo
grafana:
image: grafana/grafana
volumes:
- ./grafana/datasource-config/:/etc/grafana/provisioning/datasources:cached
- ./grafana/dashboards/prometheus.json:/var/lib/grafana/dashboards/prometheus.json:cached
- ./grafana/dashboards/loki.json:/var/lib/grafana/dashboards/loki.json:cached
- ./grafana/dashboards-config/:/etc/grafana/provisioning/dashboards:cached
ports:
- ${GRAFANA_EXTERNAL_PORT}:3000
environment:
- GF_AUTH_ANONYMOUS_ENABLED=true
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
- GF_AUTH_DISABLE_LOGIN_FORM=true
depends_on:
- prometheus
- loki
与 tempo-local.yaml
:
server:
http_listen_port: 3200
distributor:
receivers: # this configuration will listen on all ports and protocols that tempo is capable of.
jaeger: # the receives all come from the OpenTelemetry collector. more configuration information can
protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver
thrift_http: #
grpc: # for a production deployment you should only enable the receivers you need!
thrift_binary:
thrift_compact:
zipkin:
otlp:
protocols:
http:
grpc:
opencensus:
ingester:
trace_idle_period: 10s # the length of time after a trace has not received spans to consider it complete and flush it
max_block_bytes: 1_000_000 # cut the head block when it hits this size or ...
max_block_duration: 5m # this much time passes
compactor:
compaction:
compaction_window: 1h # blocks in this time window will be compacted together
max_block_bytes: 100_000_000 # maximum size of compacted blocks
block_retention: 1h
compacted_block_retention: 10m
storage:
trace:
backend: local # backend configuration to use
block:
bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives
index_downsample_bytes: 1000 # number of bytes per index record
encoding: zstd # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2
wal:
path: /tmp/tempo/wal # where to store the the wal locally
encoding: snappy # wal encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd, s2
local:
path: /tmp/tempo/blocks
pool:
max_workers: 100 # worker pool determines the number of parallel requests to the object store backend
queue_depth: 10000
tempo-query.yaml
:
backend: "tempo:3200"
和datasource.yml
用于在 grafana 上检测数据源:
apiVersion: 1
deleteDatasources:
- name: Prometheus
- name: Tempo
- name: Loki
datasources:
- name: Prometheus
type: prometheus
access: proxy
orgId: 1
url: http://prometheus:9090
basicAuth: false
isDefault: false
version: 1
editable: false
- name: Tempo
type: tempo
access: proxy
orgId: 1
url: http://tempo-query:16686
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
uid: tempo
- name: Tempo-Multitenant
type: tempo
access: proxy
orgId: 1
url: http://tempo-query:16686
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
uid: tempo-authed
jsonData:
httpHeaderName1: 'Authorization'
secureJsonData:
httpHeaderValue1: 'Bearer foo-bar-baz'
- name: Loki
type: loki
access: proxy
orgId: 1
url: http://loki:3100
basicAuth: false
isDefault: false
version: 1
editable: false
apiVersion: 1
jsonData:
derivedFields:
- datasourceUid: tempo
matcherRegex: \[.+,(.+),.+\]
name: TraceID
url: $${__value.raw}
但是如果我在grafana中测试数据源,我有这个错误:
在 Loki 视图中,我可以找到用于查看痕迹的 Tempo 按钮...但是我在 Tempo 上看不到它,因为我有一个错误:
无论如何,如果我获取跟踪 ID 并在 Jaeger 上搜索它,我可以正确地看到它。
我在 Tempo 配置中缺少什么?如何正确配置?
Grafana 7.5 及更高版本可以原生与 Tempo 对话,不再需要速度查询代理。我认为这解释了正在发生的事情,Grafana 正试图使用 Tempo-native API 来对抗速度查询,这会暴露 Jaeger API。尝试将 datasource.yml 中的 Grafana 数据源更改为 http://tempo:3200
。