prometheus_amdgpu_power/prometheus-amdgpu-power.py
2022-08-29 18:29:39 +02:00

72 lines
1.8 KiB
Python
Executable File

#!/usr/bin/env python3
#coding: utf-8
import requests, json, datetime
from prometheus_client import start_http_server, Gauge, Summary, Info
from time import sleep
def gen_metrics():
'''
Generate empty metrics.
Return a list of metrics.
'''
metrics = [
Gauge('pwr', 'GPU power consumption in milliwatts'),
]
return metrics
def populate_metrics(stats, metrics):
'''
Take dict of stats and list of metrics (all summaries)
Populate the metrics with the stats
Return the metrics
'''
i = 0
for statname in stats:
statvalue = stats[statname]
for metric in metrics:
if statname == metric._name:
if isinstance(metric, Summary):
metric.observe(statvalue)
elif isinstance(metric, Gauge):
metric.set(statvalue)
elif isinstance(metric, Info):
metric._value = statvalue
return metrics
def get_power(stat):
'''
Get the gpu's power consumption from
/sys/kernel/debug/dri/0/amdgpu_pm_info
and populate the metric
'''
from re import findall
with open('/sys/kernel/debug/dri/0/amdgpu_pm_info', 'r') as pminfo:
pminfo = pminfo.read()
wattage = pminfo.find('average')
if wattage != -1:
wattage = findall(r'\d{1,3}\.\d{1,2} W', pminfo)[0]
wattage = wattage.replace(" W", "")
wattage = int(float(wattage)) * 1000
stat.set(wattage)
return stat
def main():
wattage = gen_metrics()[0]
wattage = get_power(wattage)
start_http_server(8064)
print("Started http server on port 8002")
while True:
wattage = get_power(wattage)
#metrics = populate_metrics(stats, gen_metrics())
sleep(2)
if __name__ == '__main__':
main()