#!/usr/bin/python3
import apt
import argparse
from cryptography import x509
from cryptography.hazmat.backends import default_backend
import datetime
import dbus
import mailbox
from git import Repo
import glob
import os
import psutil
import requests
import shlex
import subprocess
from systemd import login, journal
import time

from prometheus_client import CollectorRegistry, Gauge
from prometheus_client.exposition import generate_latest
from prometheus_client.parser import text_string_to_metric_families


apt_cache = apt.Cache()
registry = CollectorRegistry()
eo_errors = Gauge("eo_errors", "failed tests", ["ctn"], registry=registry)
eo_certificates = Gauge("eo_certificates", "certificates", ["ctn", "name"], registry=registry)
eo_debian = Gauge("eo_debian", "debian os", ["ctn"], registry=registry)
eo_etckeeper = Gauge("eo_etckeeper", "etckeeper", ["ctn", "name"], registry=registry)
eo_exim = Gauge("eo_exim", "exim", ["ctn", "name"], registry=registry)
eo_journal = Gauge("eo_journal", "journald", ["ctn", "name"], registry=registry)
eo_local_changes = Gauge("eo_local_changes", "", ["ctn"], registry=registry)
eo_mailboxes = Gauge("eo_mailboxes", "emails in local mailboxes", ["ctn", "name"], registry=registry)
eo_munin = Gauge("eo_munin", "munin", ["ctn", "name"], registry=registry)
eo_nginx = Gauge("eo_nginx", "nginx", ["ctn", "name"], registry=registry)
eo_postgresql = Gauge("eo_postgresql", "postgresql", ["ctn", "role", "name"], registry=registry)
eo_rabbitmq = Gauge("eo_rabbitmq", "rabbitmq", ["ctn"], registry=registry)
eo_threads = Gauge("eo_threads", "system threads", ["ctn"], registry=registry)
eo_units = Gauge("eo_units", "systemd units", ["ctn", "state"], registry=registry)
eo_packages = Gauge("eo_packages", "packages", ["ctn", "state"], registry=registry)


def run(cmd):
    m = shlex.split(cmd)
    p = subprocess.Popen(m, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return "\n".join([l.decode().rstrip() for l in p.stdout.readlines()])


def debian(ctn):
    with open('/etc/debian_version') as fh:
        content = fh.read()
        if 'sid' in content:
            # we shouldn't have non-stable versions
            ve = 0
        else:
            ve = float(content.strip())
        eo_debian.labels(ctn).set(ve)


def etckeeper(ctn):
    rep = Repo("/etc")
    eo_etckeeper.labels(ctn, "dirty").set(int(rep.is_dirty()))
    eo_etckeeper.labels(ctn, "untracked").set(len(rep.untracked_files))


def exim(ctn):
    with open('/var/log/exim4/mainlog') as fh:
        for line in fh.readlines():
            if " ** " in line:
                eo_exim.labels(ctn, "errors").inc()
    deferred = int(run("exim -bpc"))
    eo_exim.labels(ctn, "deferred").set(deferred)


def certificates(ctn):
    certs = ["/etc/exim4/exim.crt"]
    for cert in certs:
        if os.path.isfile(cert):
            c = x509.load_pem_x509_certificate(open(cert).read().encode(), default_backend())
            remaining = (c.not_valid_after - datetime.datetime.today()).days
            eo_certificates.labels(ctn, cert).set(remaining)


def journald(ctn):
    j = journal.Reader()
    fifteen = time.time() - 15 * 60
    j.seek_realtime(fifteen)
    j.add_match(PRIORITY=2)
    eo_journal.labels(ctn, "critical").set(len(list(j)))
    j.seek_realtime(fifteen)
    j.add_match(PRIORITY=3)
    for e in j:
        eo_journal.labels(ctn, "error").inc()
        msg = e["MESSAGE"]
        if "Connected -> NetworkFailure" in msg or "task nfsd" in msg:
            eo_journal.labels(ctn, "network_failure").inc()


def local_changes(ctn):
    f = "/var/log/check-local-changes.log"
    if os.path.isfile(f):
        n = len(open(f).readlines())
        eo_local_changes.labels(ctn).set(n)


def munin(ctn):
    f = "/var/log/munin/munin-node.log"
    if os.path.isfile(f):
        n = len([l for l in open(f).readlines() if "rror" in l and "mail_space" not in l])
        eo_munin.labels(ctn, "errors").set(n)


def nginx(ctn):
    try:
        r = requests.get("http://localhost/nginx_status")
    except (requests.exceptions.SSLError, requests.exceptions.ConnectionError):
        return
    if r.ok:
        for line in r.text.splitlines():
            if "Active connections" in line:
                n = int(line.split(':')[1].strip())
                eo_nginx.labels(ctn, "connections").set(n)


def packages(ctn):
    n = 0
    for pkg in apt_cache.get_changes():
        if pkg.isUpgradable:
            n += 1
    eo_packages.labels(ctn, "upgradable").set(n)


def mailboxes(ctn):
    if os.path.exists('/etc/dovecot/dovecot.conf'):
        # skip servers where dovecot is installed as it's expected to have
        # mailboxes there.
        return
    boxes = glob.glob("/var/spool/mail/*")
    days_ago = time.time() - 30 * 86400
    for m in boxes:
        if not os.path.isfile(m):
            continue
        if not os.stat(m).st_mtime > days_ago:
            # skip mailboxes that didn't change for a long time
            continue
        n = m.split("/")[-1]
        c = len(mailbox.mbox(m))
        eo_mailboxes.labels(ctn, n).set(c)


def postgresql(ctn):
    if not os.path.isdir("/var/lib/postgresql"):
        return

    def get_last_backup_delta():
        backup_files = glob.glob('/var/lib/postgresql/backups/base/*')
        if not backup_files:
            return -1
        sorted_backup_files = sorted(backup_files, key=os.path.getmtime)
        created = os.stat(sorted_backup_files[-1]).st_ctime
        return (datetime.datetime.now() - datetime.datetime.fromtimestamp(created)).total_seconds()

    recovery = glob.glob("/var/lib/postgresql/*/*/recovery.conf")
    if len(recovery) == 0:
        role = "primary"

        eo_postgresql.labels(ctn, role, "backup_delta").set(get_last_backup_delta())
        eo_postgresql.labels(ctn, role, "replicators").set(0)
        for p in psutil.process_iter():
            cmd = p.cmdline()
            if cmd and ('walsender' in cmd[0] or 'wal sender' in cmd[0]):
                eo_postgresql.labels(ctn, role, "replicators").inc()
    else:
        role = "secondary"
        eo_postgresql.labels(ctn, role, "replicating").set(0)
        for p in psutil.process_iter():
            cmd = p.cmdline()
            if cmd and 'walreceiver' in cmd[0]:
                eo_postgresql.labels(ctn, role, "replicating").inc()


def rabbitmq(ctn):
    rabbitmqctl = "/usr/sbin/rabbitmqctl"
    if os.path.isfile(rabbitmqctl):
        for i in run("%s list_queues messages" % rabbitmqctl):
            if i.isdigit():
                eo_rabbitmq.labels(ctn).inc(int(i))


def threads(ctn):
    for p in psutil.process_iter():
        eo_threads.labels(ctn).inc(p.num_threads())


def units(ctn):
    bus = dbus.SystemBus()
    s = bus.get_object("org.freedesktop.systemd1", "/org/freedesktop/systemd1")
    manager = dbus.Interface(s, "org.freedesktop.systemd1.Manager")
    units = manager.ListUnits()
    n = len([u for u in units if u[3] == "failed"])
    eo_units.labels(ctn, "failed").set(n)


def run_in_machines(ctn):
    for machine in login.machine_names():
        r = run(
            "systemd-run --wait --pipe -q --machine %s /usr/bin/prometheus-entrouvert-exporter.py --ctn %s"
            % (machine, machine)
        )
        current_metrics = text_string_to_metric_families(r)
        for m in current_metrics:
            for s in m.samples:
                metric = globals()[m.name]
                metric.labels(**s.labels).set(s.value)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--test", action="store_true", help="raise errors")
    parser.add_argument("--ctn", default="")
    args = parser.parse_args()

    for test in [
        certificates,
        debian,
        etckeeper,
        exim,
        journald,
        local_changes,
        mailboxes,
        munin,
        nginx,
        packages,
        postgresql,
        rabbitmq,
        threads,
        units,
        run_in_machines,
    ]:
        try:
            test(args.ctn)
        except Exception:
            eo_errors.labels(ctn=args.ctn).inc()
            if args.test:
                raise
    print(generate_latest(registry).decode())
