From 43d50700eccf7e16777220874adb27f36354f47d Mon Sep 17 00:00:00 2001 From: Justine Date: Mon, 29 Aug 2022 18:29:39 +0200 Subject: [PATCH] first --- prometheus-amdgpu-power.py | 71 +++++++++++++++++++++++++++++++++ prometheus-amdgpu-power.service | 10 +++++ 2 files changed, 81 insertions(+) create mode 100755 prometheus-amdgpu-power.py create mode 100644 prometheus-amdgpu-power.service diff --git a/prometheus-amdgpu-power.py b/prometheus-amdgpu-power.py new file mode 100755 index 0000000..05cdc87 --- /dev/null +++ b/prometheus-amdgpu-power.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 +#coding: utf-8 +import requests, json, datetime +from prometheus_client import start_http_server, Gauge, Summary, Info +from time import sleep + +def gen_metrics(): + ''' + Generate empty metrics. + Return a list of metrics. + ''' + metrics = [ + Gauge('pwr', 'GPU power consumption in milliwatts'), + ] + return metrics + + + +def populate_metrics(stats, metrics): + ''' + Take dict of stats and list of metrics (all summaries) + Populate the metrics with the stats + Return the metrics + ''' + i = 0 + + for statname in stats: + statvalue = stats[statname] + for metric in metrics: + if statname == metric._name: + if isinstance(metric, Summary): + metric.observe(statvalue) + elif isinstance(metric, Gauge): + metric.set(statvalue) + elif isinstance(metric, Info): + metric._value = statvalue + + return metrics + +def get_power(stat): + ''' + Get the gpu's power consumption from + /sys/kernel/debug/dri/0/amdgpu_pm_info + and populate the metric + ''' + from re import findall + with open('/sys/kernel/debug/dri/0/amdgpu_pm_info', 'r') as pminfo: + pminfo = pminfo.read() + wattage = pminfo.find('average') + if wattage != -1: + wattage = findall(r'\d{1,3}\.\d{1,2} W', pminfo)[0] + wattage = wattage.replace(" W", "") + wattage = int(float(wattage)) * 1000 + stat.set(wattage) + return stat + +def main(): + + wattage = gen_metrics()[0] + wattage = get_power(wattage) + + start_http_server(8064) + print("Started http server on port 8002") + while True: + wattage = get_power(wattage) + #metrics = populate_metrics(stats, gen_metrics()) + sleep(2) + +if __name__ == '__main__': + main() + diff --git a/prometheus-amdgpu-power.service b/prometheus-amdgpu-power.service new file mode 100644 index 0000000..0edd8a8 --- /dev/null +++ b/prometheus-amdgpu-power.service @@ -0,0 +1,10 @@ +[Unit] +Description=prometheus-amdgpu-power exporter + +[Service] +User=root +ExecStart=/usr/bin/prometheus-amdgpu-power +Restart=on-failure + +[Install] +WantedBy=multi-user.target