203 lines
6.2 KiB
Nix
203 lines
6.2 KiB
Nix
{
|
|
lib,
|
|
pkgs,
|
|
config,
|
|
nodes,
|
|
ports,
|
|
...
|
|
}:
|
|
let
|
|
targets = lib.mapAttrsToList (_name: node: node.fqdn) nodes ++ [ "pek0.ny4.dev" ];
|
|
in
|
|
{
|
|
services.prometheus = {
|
|
enable = true;
|
|
listenAddress = "127.0.0.1";
|
|
port = ports.prometheus;
|
|
webExternalUrl = "https://prom.ny4.dev";
|
|
|
|
exporters.blackbox = {
|
|
enable = true;
|
|
listenAddress = "127.0.0.1";
|
|
port = ports.blackbox;
|
|
configFile = (pkgs.formats.yaml { }).generate "config.yaml" {
|
|
modules.http_2xx = {
|
|
prober = "http";
|
|
http.fail_if_not_ssl = true;
|
|
};
|
|
};
|
|
};
|
|
|
|
scrapeConfigs = [
|
|
{
|
|
job_name = "node_exporter";
|
|
scheme = "https";
|
|
metrics_path = "/metrics";
|
|
basic_auth = {
|
|
username = "prometheus";
|
|
password_file = config.sops.secrets."prometheus/auth".path;
|
|
};
|
|
static_configs = lib.singleton { inherit targets; };
|
|
}
|
|
{
|
|
job_name = "caddy";
|
|
scheme = "https";
|
|
metrics_path = "/caddy";
|
|
basic_auth = {
|
|
username = "prometheus";
|
|
password_file = config.sops.secrets."prometheus/auth".path;
|
|
};
|
|
static_configs = lib.singleton { inherit targets; };
|
|
}
|
|
{
|
|
job_name = "ntfy";
|
|
scheme = "https";
|
|
metrics_path = "/metrics";
|
|
static_configs = lib.singleton { targets = [ "ntfy.ny4.dev" ]; };
|
|
}
|
|
{
|
|
job_name = "forgejo";
|
|
scheme = "https";
|
|
metrics_path = "/metrics";
|
|
static_configs = lib.singleton { targets = [ "git.ny4.dev" ]; };
|
|
}
|
|
{
|
|
job_name = "miniflux";
|
|
scheme = "https";
|
|
metrics_path = "/metrics";
|
|
static_configs = lib.singleton { targets = [ "rss.ny4.dev" ]; };
|
|
}
|
|
{
|
|
job_name = "blackbox_exporter";
|
|
static_configs = lib.singleton { targets = [ "127.0.0.1:${toString ports.blackbox}" ]; };
|
|
}
|
|
{
|
|
job_name = "blackbox_probe";
|
|
metrics_path = "/probe";
|
|
params = {
|
|
module = [ "http_2xx" ];
|
|
};
|
|
static_configs = lib.singleton {
|
|
targets = [
|
|
"https://blog.ny4.dev"
|
|
"https://cinny.ny4.dev"
|
|
"https://element.ny4.dev"
|
|
"https://git.ny4.dev"
|
|
"https://id.ny4.dev"
|
|
"https://ip.ny4.dev"
|
|
"https://mastodon.ny4.dev"
|
|
"https://matrix.ny4.dev"
|
|
"https://ntfy.ny4.dev"
|
|
"https://pb.ny4.dev"
|
|
"https://reddit.ny4.dev"
|
|
"https://rss.ny4.dev"
|
|
"https://vault.ny4.dev"
|
|
];
|
|
};
|
|
relabel_configs = [
|
|
{
|
|
source_labels = [ "__address__" ];
|
|
target_label = "__param_target";
|
|
}
|
|
{
|
|
source_labels = [ "__param_target" ];
|
|
target_label = "instance";
|
|
}
|
|
{
|
|
target_label = "__address__";
|
|
replacement = "127.0.0.1:${toString ports.blackbox}";
|
|
}
|
|
];
|
|
}
|
|
|
|
];
|
|
|
|
rules = lib.singleton (
|
|
builtins.toJSON {
|
|
groups = lib.singleton {
|
|
name = "metrics";
|
|
rules = [
|
|
{
|
|
alert = "NodeDown";
|
|
expr = ''up{job="node_exporter"} == 0'';
|
|
for = "5m";
|
|
annotations = {
|
|
summary = "Node exporter down on {{ $labels.instance }}";
|
|
description = "Node exporter on {{ $labels.instance }} has been down for more than 5 minutes.";
|
|
};
|
|
}
|
|
{
|
|
alert = "HTTPDown";
|
|
expr = ''up{job="blackbox_probe"} == 0 or probe_success{job="blackbox_probe"} == 0'';
|
|
for = "5m";
|
|
annotations = {
|
|
summary = "HTTP probe failure on {{ $labels.instance }}";
|
|
description = "The HTTP blackbox probe on {{ $labels.instance }} has failed for more than 5 minutes.";
|
|
};
|
|
}
|
|
{
|
|
alert = "MemoryFull";
|
|
expr = ''node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes < 0.1'';
|
|
for = "5m";
|
|
annotations = {
|
|
summary = "Low available memory on {{ $labels.instance }}";
|
|
description = "{{ $labels.instance }} has less than 10% available memory for more than 5 minutes.";
|
|
};
|
|
}
|
|
{
|
|
alert = "DiskFull";
|
|
expr = ''node_filesystem_avail_bytes{mountpoint=~"/|/persist|/mnt"} / node_filesystem_size_bytes < 0.1'';
|
|
annotations = {
|
|
summary = "Low disk space on {{ $labels.instance }}";
|
|
description = "The disk {{ $labels.device }} mounted at {{ $labels.mountpoint }} on {{ $labels.instance }} has less than 10% of empty space available.";
|
|
};
|
|
}
|
|
{
|
|
alert = "UnitFailed";
|
|
expr = ''node_systemd_unit_state{state="failed"} == 1'';
|
|
annotations = {
|
|
summary = "Systemd unit {{ $labels.name }} failure on {{ $labels.instance }}";
|
|
description = "The systemd unit {{ $labels.name }} on {{ $labels.instance }} has entered a {{ $labels.state }} state.";
|
|
};
|
|
}
|
|
];
|
|
};
|
|
}
|
|
);
|
|
|
|
alertmanagers = lib.singleton {
|
|
static_configs = lib.singleton { targets = [ "127.0.0.1:${toString ports.alertmanager}" ]; };
|
|
};
|
|
|
|
alertmanager = {
|
|
enable = true;
|
|
checkConfig = false;
|
|
listenAddress = "127.0.0.1";
|
|
port = ports.alertmanager;
|
|
|
|
configuration = {
|
|
receivers = lib.singleton {
|
|
name = "ntfy";
|
|
webhook_configs = lib.singleton {
|
|
# https://docs.ntfy.sh/publish/#message-templating
|
|
url = "$ALERTMANAGER_WEBHOOK_URL";
|
|
};
|
|
};
|
|
route = {
|
|
receiver = "ntfy";
|
|
};
|
|
};
|
|
};
|
|
};
|
|
|
|
systemd.services."alertmanager".serviceConfig.EnvironmentFile =
|
|
config.sops.templates."alertmanager/environment".path;
|
|
|
|
services.caddy.settings.apps.http.servers.srv0.routes = lib.singleton {
|
|
match = lib.singleton { host = [ "prom.ny4.dev" ]; };
|
|
handle = lib.singleton {
|
|
handler = "reverse_proxy";
|
|
upstreams = [ { dial = "127.0.0.1:${toString ports.prometheus}"; } ];
|
|
};
|
|
};
|
|
}
|