Add container specs & cadvisor version info to prometheus metrics

This commit is contained in:
Jimmi Dyson 2015-09-09 13:43:25 +01:00
parent 4914057418
commit b8fc8cd2ae
4 changed files with 139 additions and 66 deletions

View File

@ -136,8 +136,7 @@ func newDockerContainerHandler(
handler.pid = ctnr.State.Pid
// Add the name and bare ID as aliases of the container.
handler.aliases = append(handler.aliases, strings.TrimPrefix(ctnr.Name, "/"))
handler.aliases = append(handler.aliases, id)
handler.aliases = append(handler.aliases, strings.TrimPrefix(ctnr.Name, "/"), id)
handler.labels = ctnr.Config.Labels
handler.image = ctnr.Config.Image
handler.networkMode = ctnr.HostConfig.NetworkMode

View File

@ -24,9 +24,11 @@ import (
)
// This will usually be manager.Manager, but can be swapped out for testing.
type subcontainersInfoProvider interface {
type infoProvider interface {
// Get information about all subcontainers of the specified container (includes self).
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// Get information about the machine.
GetVersionInfo() (*info.VersionInfo, error)
}
// metricValue describes a single metric value for a given set of label values
@ -60,19 +62,19 @@ type containerMetric struct {
getValues func(s *info.ContainerStats) metricValues
}
func (cm *containerMetric) desc() *prometheus.Desc {
return prometheus.NewDesc(cm.name, cm.help, append([]string{"name", "id", "image"}, cm.extraLabels...), nil)
func (cm *containerMetric) desc(baseLabels []string) *prometheus.Desc {
return prometheus.NewDesc(cm.name, cm.help, append(baseLabels, cm.extraLabels...), nil)
}
// PrometheusCollector implements prometheus.Collector.
type PrometheusCollector struct {
infoProvider subcontainersInfoProvider
infoProvider infoProvider
errors prometheus.Gauge
containerMetrics []containerMetric
}
// NewPrometheusCollector returns a new PrometheusCollector.
func NewPrometheusCollector(infoProvider subcontainersInfoProvider) *PrometheusCollector {
func NewPrometheusCollector(infoProvider infoProvider) *PrometheusCollector {
c := &PrometheusCollector{
infoProvider: infoProvider,
errors: prometheus.NewGauge(prometheus.GaugeOpts{
@ -441,18 +443,29 @@ func NewPrometheusCollector(infoProvider subcontainersInfoProvider) *PrometheusC
return c
}
var (
versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version & cadvisor version.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion"}, nil)
)
// Describe describes all the metrics ever exported by cadvisor. It
// implements prometheus.PrometheusCollector.
func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
c.errors.Describe(ch)
for _, cm := range c.containerMetrics {
ch <- cm.desc()
ch <- cm.desc([]string{})
}
ch <- versionInfoDesc
}
// Collect fetches the stats from all containers and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
c.collectVersionInfo(ch)
c.collectContainersInfo(ch)
c.errors.Collect(ch)
}
func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) {
containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1})
if err != nil {
c.errors.Set(1)
@ -460,20 +473,63 @@ func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
return
}
for _, container := range containers {
baseLabels := []string{"id"}
id := container.Name
name := id
if len(container.Aliases) > 0 {
name = container.Aliases[0]
baseLabels = append(baseLabels, "name")
}
image := container.Spec.Image
stats := container.Stats[0]
if len(image) > 0 {
baseLabels = append(baseLabels, "image")
}
baseLabelValues := []string{id, name, image}[:len(baseLabels)]
// Container spec
desc := prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", baseLabels, nil)
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(container.Spec.CreationTime.Unix()), baseLabelValues...)
if container.Spec.HasCpu {
desc := prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", baseLabels, nil)
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(container.Spec.Cpu.Limit), baseLabelValues...)
}
if container.Spec.HasMemory {
desc := prometheus.NewDesc("container_spec_memory_limit_bytes", "Memory limit for the container.", baseLabels, nil)
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(container.Spec.Memory.Limit), baseLabelValues...)
desc = prometheus.NewDesc("container_spec_memory_swap_limit_bytes", "Memory swap limit for the container.", baseLabels, nil)
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(container.Spec.Memory.SwapLimit), baseLabelValues...)
}
// Now for the actual metrics
stats := container.Stats[0]
for _, cm := range c.containerMetrics {
desc := cm.desc()
desc := cm.desc(baseLabels)
for _, metricValue := range cm.getValues(stats) {
ch <- prometheus.MustNewConstMetric(desc, cm.valueType, float64(metricValue.value), append([]string{name, id, image}, metricValue.labels...)...)
ch <- prometheus.MustNewConstMetric(desc, cm.valueType, float64(metricValue.value), append(baseLabelValues, metricValue.labels...)...)
}
}
}
c.errors.Collect(ch)
}
func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
versionInfo, err := c.infoProvider.GetVersionInfo()
if err != nil {
c.errors.Set(1)
glog.Warningf("Couldn't get version info: %s", err)
return
}
ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion}...)
}
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
const maxMemorySize = uint64(1 << 62)
func specMemoryValue(v uint64) float64 {
if v > maxMemorySize {
return 0
}
return float64(v)
}

View File

@ -21,6 +21,7 @@ import (
"regexp"
"strings"
"testing"
"time"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
@ -28,14 +29,25 @@ import (
type testSubcontainersInfoProvider struct{}
func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
return &info.VersionInfo{
KernelVersion: "4.1.6-200.fc22.x86_64",
ContainerOsVersion: "Fedora 22 (Twenty Two)",
DockerVersion: "1.8.1",
CadvisorVersion: "0.16.0",
}, nil
}
func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return []*info.ContainerInfo{
{
ContainerReference: info.ContainerReference{
Name: "testcontainer",
Aliases: []string{"testcontaineralias"},
},
Spec: info.ContainerSpec{
Image: "test",
CreationTime: time.Unix(1257894000, 0),
},
Stats: []*info.ContainerStats{
{

View File

@ -1,116 +1,122 @@
# HELP cadvisor_version_info A metric with a constant '1' value labeled by kernel version, OS version, docker version & cadvisor version.
# TYPE cadvisor_version_info gauge
cadvisor_version_info{cadvisorVersion="0.16.0",dockerVersion="1.8.1",kernelVersion="4.1.6-200.fc22.x86_64",osVersion="Fedora 22 (Twenty Two)"} 1
# HELP container_cpu_system_seconds_total Cumulative system cpu time consumed in seconds.
# TYPE container_cpu_system_seconds_total counter
container_cpu_system_seconds_total{id="testcontainer",image="test",name="testcontainer"} 7e-09
container_cpu_system_seconds_total{id="testcontainer",image="test",name="testcontaineralias"} 7e-09
# HELP container_cpu_usage_seconds_total Cumulative cpu time consumed per cpu in seconds.
# TYPE container_cpu_usage_seconds_total counter
container_cpu_usage_seconds_total{cpu="cpu00",id="testcontainer",image="test",name="testcontainer"} 2e-09
container_cpu_usage_seconds_total{cpu="cpu01",id="testcontainer",image="test",name="testcontainer"} 3e-09
container_cpu_usage_seconds_total{cpu="cpu02",id="testcontainer",image="test",name="testcontainer"} 4e-09
container_cpu_usage_seconds_total{cpu="cpu03",id="testcontainer",image="test",name="testcontainer"} 5e-09
container_cpu_usage_seconds_total{cpu="cpu00",id="testcontainer",image="test",name="testcontaineralias"} 2e-09
container_cpu_usage_seconds_total{cpu="cpu01",id="testcontainer",image="test",name="testcontaineralias"} 3e-09
container_cpu_usage_seconds_total{cpu="cpu02",id="testcontainer",image="test",name="testcontaineralias"} 4e-09
container_cpu_usage_seconds_total{cpu="cpu03",id="testcontainer",image="test",name="testcontaineralias"} 5e-09
# HELP container_cpu_user_seconds_total Cumulative user cpu time consumed in seconds.
# TYPE container_cpu_user_seconds_total counter
container_cpu_user_seconds_total{id="testcontainer",image="test",name="testcontainer"} 6e-09
container_cpu_user_seconds_total{id="testcontainer",image="test",name="testcontaineralias"} 6e-09
# HELP container_fs_io_current Number of I/Os currently in progress
# TYPE container_fs_io_current gauge
container_fs_io_current{device="sda1",id="testcontainer",image="test",name="testcontainer"} 42
container_fs_io_current{device="sda2",id="testcontainer",image="test",name="testcontainer"} 47
container_fs_io_current{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 42
container_fs_io_current{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 47
# HELP container_fs_io_time_seconds_total Cumulative count of seconds spent doing I/Os
# TYPE container_fs_io_time_seconds_total counter
container_fs_io_time_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 4.3e-08
container_fs_io_time_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.8e-08
container_fs_io_time_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 4.3e-08
container_fs_io_time_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.8e-08
# HELP container_fs_io_time_weighted_seconds_total Cumulative weighted I/O time in seconds
# TYPE container_fs_io_time_weighted_seconds_total counter
container_fs_io_time_weighted_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 4.4e-08
container_fs_io_time_weighted_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.9e-08
container_fs_io_time_weighted_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 4.4e-08
container_fs_io_time_weighted_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.9e-08
# HELP container_fs_limit_bytes Number of bytes that can be consumed by the container on this filesystem.
# TYPE container_fs_limit_bytes gauge
container_fs_limit_bytes{device="sda1",id="testcontainer",image="test",name="testcontainer"} 22
container_fs_limit_bytes{device="sda2",id="testcontainer",image="test",name="testcontainer"} 37
container_fs_limit_bytes{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 22
container_fs_limit_bytes{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 37
# HELP container_fs_read_seconds_total Cumulative count of seconds spent reading
# TYPE container_fs_read_seconds_total counter
container_fs_read_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 2.7e-08
container_fs_read_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.2e-08
container_fs_read_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 2.7e-08
container_fs_read_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.2e-08
# HELP container_fs_reads_merged_total Cumulative count of reads merged
# TYPE container_fs_reads_merged_total counter
container_fs_reads_merged_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 25
container_fs_reads_merged_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 40
container_fs_reads_merged_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 25
container_fs_reads_merged_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 40
# HELP container_fs_reads_total Cumulative count of reads completed
# TYPE container_fs_reads_total counter
container_fs_reads_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 24
container_fs_reads_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 39
container_fs_reads_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 24
container_fs_reads_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 39
# HELP container_fs_sector_reads_total Cumulative count of sector reads completed
# TYPE container_fs_sector_reads_total counter
container_fs_sector_reads_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 26
container_fs_sector_reads_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 41
container_fs_sector_reads_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 26
container_fs_sector_reads_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 41
# HELP container_fs_sector_writes_total Cumulative count of sector writes completed
# TYPE container_fs_sector_writes_total counter
container_fs_sector_writes_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 40
container_fs_sector_writes_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 45
container_fs_sector_writes_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 40
container_fs_sector_writes_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 45
# HELP container_fs_usage_bytes Number of bytes that are consumed by the container on this filesystem.
# TYPE container_fs_usage_bytes gauge
container_fs_usage_bytes{device="sda1",id="testcontainer",image="test",name="testcontainer"} 23
container_fs_usage_bytes{device="sda2",id="testcontainer",image="test",name="testcontainer"} 38
container_fs_usage_bytes{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 23
container_fs_usage_bytes{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 38
# HELP container_fs_write_seconds_total Cumulative count of seconds spent writing
# TYPE container_fs_write_seconds_total counter
container_fs_write_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 4.1e-08
container_fs_write_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.6e-08
container_fs_write_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 4.1e-08
container_fs_write_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.6e-08
# HELP container_fs_writes_merged_total Cumulative count of writes merged
# TYPE container_fs_writes_merged_total counter
container_fs_writes_merged_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 39
container_fs_writes_merged_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 44
container_fs_writes_merged_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 39
container_fs_writes_merged_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 44
# HELP container_fs_writes_total Cumulative count of writes completed
# TYPE container_fs_writes_total counter
container_fs_writes_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 28
container_fs_writes_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 43
container_fs_writes_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 28
container_fs_writes_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 43
# HELP container_last_seen Last time a container was seen by the exporter
# TYPE container_last_seen gauge
container_last_seen{id="testcontainer",image="test",name="testcontainer"} 1.426203694e+09
container_last_seen{id="testcontainer",image="test",name="testcontaineralias"} 1.426203694e+09
# HELP container_memory_failures_total Cumulative count of memory allocation failures.
# TYPE container_memory_failures_total counter
container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="container",type="pgfault"} 10
container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="container",type="pgmajfault"} 11
container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="hierarchy",type="pgfault"} 12
container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="hierarchy",type="pgmajfault"} 13
container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="container",type="pgfault"} 10
container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="container",type="pgmajfault"} 11
container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="hierarchy",type="pgfault"} 12
container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="hierarchy",type="pgmajfault"} 13
# HELP container_memory_usage_bytes Current memory usage in bytes.
# TYPE container_memory_usage_bytes gauge
container_memory_usage_bytes{id="testcontainer",image="test",name="testcontainer"} 8
container_memory_usage_bytes{id="testcontainer",image="test",name="testcontaineralias"} 8
# HELP container_memory_working_set_bytes Current working set in bytes.
# TYPE container_memory_working_set_bytes gauge
container_memory_working_set_bytes{id="testcontainer",image="test",name="testcontainer"} 9
container_memory_working_set_bytes{id="testcontainer",image="test",name="testcontaineralias"} 9
# HELP container_network_receive_bytes_total Cumulative count of bytes received
# TYPE container_network_receive_bytes_total counter
container_network_receive_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 14
container_network_receive_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 14
# HELP container_network_receive_errors_total Cumulative count of errors encountered while receiving
# TYPE container_network_receive_errors_total counter
container_network_receive_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 16
container_network_receive_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 16
# HELP container_network_receive_packets_dropped_total Cumulative count of packets dropped while receiving
# TYPE container_network_receive_packets_dropped_total counter
container_network_receive_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 17
container_network_receive_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 17
# HELP container_network_receive_packets_total Cumulative count of packets received
# TYPE container_network_receive_packets_total counter
container_network_receive_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 15
container_network_receive_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 15
# HELP container_network_transmit_bytes_total Cumulative count of bytes transmitted
# TYPE container_network_transmit_bytes_total counter
container_network_transmit_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 18
container_network_transmit_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 18
# HELP container_network_transmit_errors_total Cumulative count of errors encountered while transmitting
# TYPE container_network_transmit_errors_total counter
container_network_transmit_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 20
container_network_transmit_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 20
# HELP container_network_transmit_packets_dropped_total Cumulative count of packets dropped while transmitting
# TYPE container_network_transmit_packets_dropped_total counter
container_network_transmit_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 21
container_network_transmit_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 21
# HELP container_network_transmit_packets_total Cumulative count of packets transmitted
# TYPE container_network_transmit_packets_total counter
container_network_transmit_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 19
container_network_transmit_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 19
# HELP container_scrape_error 1 if there was an error while getting container metrics, 0 otherwise
# TYPE container_scrape_error gauge
container_scrape_error 0
# HELP container_start_time_seconds Start time of the container since unix epoch in seconds.
# TYPE container_start_time_seconds gauge
container_start_time_seconds{id="testcontainer",image="test",name="testcontaineralias"} 1.257894e+09
# HELP container_tasks_state Number of tasks in given state
# TYPE container_tasks_state gauge
container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="iowaiting"} 54
container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="running"} 51
container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="sleeping"} 50
container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="stopped"} 52
container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="uninterruptible"} 53
container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="iowaiting"} 54
container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="running"} 51
container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="sleeping"} 50
container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="stopped"} 52
container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="uninterruptible"} 53
# HELP http_request_duration_microseconds The HTTP request latencies in microseconds.
# TYPE http_request_duration_microseconds summary
http_request_duration_microseconds{handler="prometheus",quantile="0.5"} 0