OKR/prod-upgrade/prom/prometheus.yaml

140 lines
4.0 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

apiVersion: v1
kind: ConfigMap
metadata:
name: prom-config
data:
prom-config: |-
global:
scrape_interval: 15s
scrape_configs:
- job_name: mysql # To get metrics about the mysql exporters targets
params:
auth_module: [client]
static_configs:
- targets:
# All mysql hostnames or unix sockets to monitor.
- mysql-0.base.svc:3306
- mysql-1.base.svc:3306
- mysql-2.base.svc:3306
- mysql-3.base.svc:3306
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: mysql-exporter.base.svc:9104
- job_name: mongo
static_configs:
- targets:
# All mysql hostnames or unix sockets to monitor.
- mongodb://mongo-rs0-0.mongo-rs0.base.svc:27017
- mongodb://mongo-rs0-1.mongo-rs0.base.svc:27017
- mongodb://mongo-rs0-2.mongo-rs0.base.svc:27017
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: mongo-exporter.base.svc:9216
rule_files:
- /etc/prometheus/rules/rule-0.yaml
alerting:
alert_relabel_configs:
alertmanagers:
prom-rule-0: |-
groups:
- name: mysqld_rules
rules:
# Record slave lag seconds for pre-computed timeseries that takes
# `mysql_slave_status_sql_delay` into account
- record: instance:mysql_slave_lag_seconds
expr: mysql_slave_status_seconds_behind_master - mysql_slave_status_sql_delay
# Record slave lag via heartbeat method
- record: instance:mysql_heartbeat_lag_seconds
expr: mysql_heartbeat_now_timestamp_seconds - mysql_heartbeat_stored_timestamp_seconds
- record: job:mysql_transactions:rate5m
expr: sum without (command) (rate(mysql_global_status_commands_total{command=~"(commit|rollback)"}[5m]))
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: prometheus
spec:
replicas: 1
serviceName: prometheus
selector:
matchLabels:
app: prometheus
template:
metadata:
labels:
app: prometheus
spec:
containers:
- name: prometheus
image: prom/prometheus:v2.51.2
securityContext:
runAsUser: 0
allowPrivilegeEscalation: false
args:
- --config.file=/etc/prometheus/prometheus.yaml
- --storage.tsdb.path=/etc/prometheus/data
- --storage.tsdb.retention.time=15d
- --web.enable-lifecycle
volumeMounts:
- name: prometheus
mountPath: /etc/prometheus/data
- name: prom-config
mountPath: /etc/prometheus/prometheus.yaml
subPath: prometheus.yaml
readOnly: true
- name: prom-config
mountPath: /etc/prometheus/rules/rule-0.yaml
subPath: rule-0.yaml
readOnly: true
volumes:
- name: prometheus
hostPath:
path: /data/prometheus/data/
- name: prom-config
configMap:
name: prom-config
items:
- key: prom-config
path: prometheus.yaml
- key: prom-rule-0
path: rule-0.yaml
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: "kubernetes.io/hostname"
operator: In
values:
- bfs-k8snode-10-2-2-7.hetzner.base.beaconfireinc.com
---
apiVersion: v1
kind: Service
metadata:
name: prometheus
labels:
app: prometheus
spec:
ports:
- protocol: TCP
port: 9090
targetPort: 9090
selector:
app: prometheus
type: ClusterIP