Skip to content
Snippets Groups Projects
Verified Commit 0f748dd0 authored by nd's avatar nd
Browse files

Initial commit

parents
Branches
No related tags found
No related merge requests found
prometheus_alertmanager:
args:
"web.listen-address": "[::1]:9093"
config:
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_from: 'alertmanager@{{ inventory_hostname }}'
templates:
- '/etc/prometheus/alertmanager_templates/*.tmpl'
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: mail-default
# All the above attributes are inherited by all child routes and can
# overwritten on each.
# The child route trees.
routes: []
# Inhibition rules allow to mute a set of alerts given that another alert is
# firing.
# We use this to mute any warning-level notifications if the same alert is
# already critical.
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
# Apply inhibition if the alertname is the same.
equal: ['alertname', 'cluster', 'service']
receivers:
- name: "blackhole"
- name: 'mail-default'
send_resolved: True
email_configs:
- to: 'root@localhost'
- name: restart alertmanager
service:
name: prometheus-alertmanager
state: restarted
- name: install alertmanager
apt:
pkg: prometheus-alertmanager
- name: wrtie alertmanager service config
notify: restart alertmanager
template:
src: prometheus-alertmanager.j2
dest: /etc/default/prometheus-alertmanager
- name: wrtie alertmanager config
notify: restart alertmanager
copy:
owner: root
group: root
mode: 0644
dest: /etc/prometheus/alertmanager.yml
content: "{{ prometheus_alertmanager.config|to_nice_yaml(indent=2) }}"
# Set the command-line arguments to pass to the server.
ARGS="{% for i in prometheus_alertmanager.args %} --{{ i }}{% if prometheus_alertmanager.args[i] and prometheus_alertmanager.args[i] != {} %}='{{ prometheus_alertmanager.args[i] }}'{% endif %} {% endfor %}"
# The alert manager supports the following options:
# --config.file="/etc/prometheus/alertmanager.yml"
# Alertmanager configuration file name.
# --storage.path="/var/lib/prometheus/alertmanager/"
# Base path for data storage.
# --data.retention=120h
# How long to keep data for.
# --alerts.gc-interval=30m
# Interval between alert GC.
# --log.level=info
# Only log messages with the given severity or above.
# --web.external-url=WEB.EXTERNAL-URL
# The URL under which Alertmanager is externally reachable (for example,
# if Alertmanager is served via a reverse proxy). Used for generating
# relative and absolute links back to Alertmanager itself. If the URL has
# a path portion, it will be used to prefix all HTTP endpoints served by
# Alertmanager. If omitted, relevant URL components will be derived
# automatically.
# --web.route-prefix=WEB.ROUTE-PREFIX
# Prefix for the internal routes of web endpoints. Defaults to path of
# --web.external-url.
# --web.listen-address=":9093"
# Address to listen on for the web interface and API.
# --web.ui-path="/usr/share/prometheus/alertmanager/ui/"
# Path to static UI directory.
# --template.default="/usr/share/prometheus/alertmanager/default.tmpl"
# Path to default notification template.
# --cluster.listen-address="0.0.0.0:9094"
# Listen address for cluster.
# --cluster.advertise-address=CLUSTER.ADVERTISE-ADDRESS
# Explicit address to advertise in cluster.
# --cluster.peer=CLUSTER.PEER ...
# Initial peers (may be repeated).
# --cluster.peer-timeout=15s
# Time to wait between peers to send notifications.
# --cluster.gossip-interval=200ms
# Interval between sending gossip messages. By lowering this value (more
# frequent) gossip messages are propagated across the cluster more
# quickly at the expense of increased bandwidth.
# --cluster.pushpull-interval=1m0s
# Interval for gossip state syncs. Setting this interval lower (more
# frequent) will increase convergence speeds across larger clusters at
# the expense of increased bandwidth usage.
# --cluster.tcp-timeout=10s Timeout for establishing a stream connection
# with a remote node for a full state sync, and for stream read and write
# operations.
# --cluster.probe-timeout=500ms
# Timeout to wait for an ack from a probed node before assuming it is
# unhealthy. This should be set to 99-percentile of RTT (round-trip time)
# on your network.
# --cluster.probe-interval=1s
# Interval between random node probes. Setting this lower (more frequent)
# will cause the cluster to detect failed nodes more quickly at the
# expense of increased bandwidth usage.
# --cluster.settle-timeout=1m0s
# Maximum time to wait for cluster connections to settle before
# evaluating notifications.
# --cluster.reconnect-interval=10s
# Interval between attempting to reconnect to lost peers.
# --cluster.reconnect-timeout=6h0m0s
# Length of time to attempt to reconnect to a lost peer.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment