{"id":2709,"date":"2020-02-21T09:14:09","date_gmt":"2020-02-21T01:14:09","guid":{"rendered":"https:\/\/www.yaoge123.com\/blog\/?p=2709"},"modified":"2020-02-21T09:20:39","modified_gmt":"2020-02-21T01:20:39","slug":"prometheus-grafana-%e7%9b%91%e6%8e%a7-nvidia-gpu","status":"publish","type":"post","link":"https:\/\/www.yaoge123.com\/blog\/archives\/2709","title":{"rendered":"Prometheus + Grafana \u76d1\u63a7 NVIDIA GPU"},"content":{"rendered":"\n<p>1.\u9996\u5148\u5b89\u88c5 NVIDIA Data Center GPU Manager (DCGM)\uff0c\u4ece https:\/\/developer.nvidia.com\/dcgm \u4e0b\u8f7d\u5b89\u88c5<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>nv-hostengine -t\nyum erase -y datacenter-gpu-manager\nrpm -ivh datacenter-gpu-manager*\nsystemctl enable --now dcgm.service<\/code><\/pre>\n\n\n\n<p>2. \u5b89\u88c5 NVIDIA DCGM exporter for Prometheus\uff0c\u4ece https:\/\/github.com\/NVIDIA\/gpu-monitoring-tools\/tree\/master\/exporters\/prometheus-dcgm \u4e0b\u8f7d\u624b\u5de5\u5b89\u88c5<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>wget -q -O \/usr\/local\/bin\/dcgm-exporter https:\/\/raw.githubusercontent.com\/NVIDIA\/gpu-monitoring-tools\/master\/exporters\/prometheus-dcgm\/dcgm-exporter\/dcgm-exporter\nchmod +x \/usr\/local\/bin\/dcgm-exporter\nmkdir \/run\/prometheus \nwget -q -O \/etc\/systemd\/system\/prometheus-dcgm.service https:\/\/raw.githubusercontent.com\/NVIDIA\/gpu-monitoring-tools\/master\/exporters\/prometheus-dcgm\/bare-metal\/prometheus-dcgm.service\nsystemctl daemon-reload\nsystemctl enable --now prometheus-dcgm.service<\/code><\/pre>\n\n\n\n<p>3. \u4ece https:\/\/prometheus.io\/download\/#node_exporter \u4e0b\u8f7d node_exporter\uff0c\u624b\u5de5\u5b89\u88c5\u4e3a\u670d\u52a1\u5e76\u6dfb\u52a0 dcgm-exporter \u8d44\u6599<\/p>\n\n\n\n<pre class=\"wp-block-code\"><code>tar xf node_exporter*.tar.gz\nmv node_exporter-*\/node_exporter \/usr\/local\/bin\/\nchown root:root \/usr\/local\/bin\/node_exporter\nchmod +x \/usr\/local\/bin\/node_exporter\n\ncat > \/etc\/systemd\/system\/node_exporter.service &lt;&lt;EOF\n[Unit]\nDescription=Prometheus Node Exporter\nWants=network-online.target\nAfter=network-online.target\n\n[Service]\nType=simple\nExecStart=\/usr\/local\/bin\/node_exporter\n\n[Install]\nWantedBy=multi-user.target\nEOF\n\nsed -i '\/ExecStart=\\\/usr\\\/local\\\/bin\\\/node_exporter\/c\\ExecStart=\\\/usr\\\/local\\\/bin\\\/node_exporter --collector.textfile.directory=\\\/run\\\/prometheus' \/etc\/systemd\/system\/node_exporter.service\n\nsystemctl daemon-reload\nsystemctl enable --now node_exporter.service\n<\/code><\/pre>\n\n\n\n<p>4. Grafana \u6dfb\u52a0\u8fd9\u4e2aDashboard<br \/><a href=\"https:\/\/grafana.com\/grafana\/dashboards\/11752\">https:\/\/grafana.com\/grafana\/dashboards\/11752<\/a><\/p>\n\n\n","protected":false},"excerpt":{"rendered":"<p>1.\u9996\u5148\u5b89\u88c5 NVIDIA Data Center GPU Manager (DCGM)\uff0c\u4ece https:\/\/ [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"_monsterinsights_skip_tracking":false,"_monsterinsights_sitenote_active":false,"_monsterinsights_sitenote_note":"","_monsterinsights_sitenote_category":0,"jetpack_post_was_ever_published":false,"_jetpack_newsletter_access":"","_jetpack_dont_email_post_to_subs":false,"_jetpack_newsletter_tier_id":0,"_jetpack_memberships_contains_paywalled_content":false,"_jetpack_memberships_contains_paid_content":false,"footnotes":"","jetpack_publicize_message":"","jetpack_publicize_feature_enabled":true,"jetpack_social_post_already_shared":true,"jetpack_social_options":{"image_generator_settings":{"template":"highway","default_image_id":0,"font":"","enabled":false},"version":2}},"categories":[198,11],"tags":[],"class_list":["post-2709","post","type-post","status-publish","format-standard","hentry","category-hpc","category-xnix"],"jetpack_publicize_connections":[],"jetpack_featured_media_url":"","jetpack_sharing_enabled":true,"jetpack_shortlink":"https:\/\/wp.me\/paOwEq-HH","_links":{"self":[{"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/posts\/2709","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/comments?post=2709"}],"version-history":[{"count":3,"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/posts\/2709\/revisions"}],"predecessor-version":[{"id":2712,"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/posts\/2709\/revisions\/2712"}],"wp:attachment":[{"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/media?parent=2709"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/categories?post=2709"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.yaoge123.com\/blog\/wp-json\/wp\/v2\/tags?post=2709"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}