diff --git a/container/docker/handler.go b/container/docker/handler.go index c2ff1af6..7bf57e46 100644 --- a/container/docker/handler.go +++ b/container/docker/handler.go @@ -136,8 +136,7 @@ func newDockerContainerHandler( handler.pid = ctnr.State.Pid // Add the name and bare ID as aliases of the container. - handler.aliases = append(handler.aliases, strings.TrimPrefix(ctnr.Name, "/")) - handler.aliases = append(handler.aliases, id) + handler.aliases = append(handler.aliases, strings.TrimPrefix(ctnr.Name, "/"), id) handler.labels = ctnr.Config.Labels handler.image = ctnr.Config.Image handler.networkMode = ctnr.HostConfig.NetworkMode diff --git a/metrics/prometheus.go b/metrics/prometheus.go index a0c05779..077853a9 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -24,9 +24,11 @@ import ( ) // This will usually be manager.Manager, but can be swapped out for testing. -type subcontainersInfoProvider interface { +type infoProvider interface { // Get information about all subcontainers of the specified container (includes self). SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) + // Get information about the machine. + GetVersionInfo() (*info.VersionInfo, error) } // metricValue describes a single metric value for a given set of label values @@ -60,19 +62,19 @@ type containerMetric struct { getValues func(s *info.ContainerStats) metricValues } -func (cm *containerMetric) desc() *prometheus.Desc { - return prometheus.NewDesc(cm.name, cm.help, append([]string{"name", "id", "image"}, cm.extraLabels...), nil) +func (cm *containerMetric) desc(baseLabels []string) *prometheus.Desc { + return prometheus.NewDesc(cm.name, cm.help, append(baseLabels, cm.extraLabels...), nil) } // PrometheusCollector implements prometheus.Collector. type PrometheusCollector struct { - infoProvider subcontainersInfoProvider + infoProvider infoProvider errors prometheus.Gauge containerMetrics []containerMetric } // NewPrometheusCollector returns a new PrometheusCollector. -func NewPrometheusCollector(infoProvider subcontainersInfoProvider) *PrometheusCollector { +func NewPrometheusCollector(infoProvider infoProvider) *PrometheusCollector { c := &PrometheusCollector{ infoProvider: infoProvider, errors: prometheus.NewGauge(prometheus.GaugeOpts{ @@ -441,18 +443,29 @@ func NewPrometheusCollector(infoProvider subcontainersInfoProvider) *PrometheusC return c } +var ( + versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version & cadvisor version.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion"}, nil) +) + // Describe describes all the metrics ever exported by cadvisor. It // implements prometheus.PrometheusCollector. func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) { c.errors.Describe(ch) for _, cm := range c.containerMetrics { - ch <- cm.desc() + ch <- cm.desc([]string{}) } + ch <- versionInfoDesc } // Collect fetches the stats from all containers and delivers them as // Prometheus metrics. It implements prometheus.PrometheusCollector. func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) { + c.collectVersionInfo(ch) + c.collectContainersInfo(ch) + c.errors.Collect(ch) +} + +func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) { containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1}) if err != nil { c.errors.Set(1) @@ -460,20 +473,63 @@ func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) { return } for _, container := range containers { + baseLabels := []string{"id"} id := container.Name name := id if len(container.Aliases) > 0 { name = container.Aliases[0] + baseLabels = append(baseLabels, "name") } image := container.Spec.Image - stats := container.Stats[0] + if len(image) > 0 { + baseLabels = append(baseLabels, "image") + } + baseLabelValues := []string{id, name, image}[:len(baseLabels)] + // Container spec + desc := prometheus.NewDesc("container_start_time_seconds", "Start time of the container since unix epoch in seconds.", baseLabels, nil) + ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(container.Spec.CreationTime.Unix()), baseLabelValues...) + + if container.Spec.HasCpu { + desc := prometheus.NewDesc("container_spec_cpu_shares", "CPU share of the container.", baseLabels, nil) + ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, float64(container.Spec.Cpu.Limit), baseLabelValues...) + } + + if container.Spec.HasMemory { + desc := prometheus.NewDesc("container_spec_memory_limit_bytes", "Memory limit for the container.", baseLabels, nil) + ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(container.Spec.Memory.Limit), baseLabelValues...) + desc = prometheus.NewDesc("container_spec_memory_swap_limit_bytes", "Memory swap limit for the container.", baseLabels, nil) + ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, specMemoryValue(container.Spec.Memory.SwapLimit), baseLabelValues...) + } + + // Now for the actual metrics + stats := container.Stats[0] for _, cm := range c.containerMetrics { - desc := cm.desc() + desc := cm.desc(baseLabels) for _, metricValue := range cm.getValues(stats) { - ch <- prometheus.MustNewConstMetric(desc, cm.valueType, float64(metricValue.value), append([]string{name, id, image}, metricValue.labels...)...) + ch <- prometheus.MustNewConstMetric(desc, cm.valueType, float64(metricValue.value), append(baseLabelValues, metricValue.labels...)...) } } } - c.errors.Collect(ch) +} + +func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) { + versionInfo, err := c.infoProvider.GetVersionInfo() + if err != nil { + c.errors.Set(1) + glog.Warningf("Couldn't get version info: %s", err) + return + } + ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion}...) +} + +// Size after which we consider memory to be "unlimited". This is not +// MaxInt64 due to rounding by the kernel. +const maxMemorySize = uint64(1 << 62) + +func specMemoryValue(v uint64) float64 { + if v > maxMemorySize { + return 0 + } + return float64(v) } diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go index 0bb6c5e1..8c7370be 100644 --- a/metrics/prometheus_test.go +++ b/metrics/prometheus_test.go @@ -21,6 +21,7 @@ import ( "regexp" "strings" "testing" + "time" info "github.com/google/cadvisor/info/v1" "github.com/prometheus/client_golang/prometheus" @@ -28,14 +29,25 @@ import ( type testSubcontainersInfoProvider struct{} +func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) { + return &info.VersionInfo{ + KernelVersion: "4.1.6-200.fc22.x86_64", + ContainerOsVersion: "Fedora 22 (Twenty Two)", + DockerVersion: "1.8.1", + CadvisorVersion: "0.16.0", + }, nil +} + func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) { return []*info.ContainerInfo{ { ContainerReference: info.ContainerReference{ - Name: "testcontainer", + Name: "testcontainer", + Aliases: []string{"testcontaineralias"}, }, Spec: info.ContainerSpec{ - Image: "test", + Image: "test", + CreationTime: time.Unix(1257894000, 0), }, Stats: []*info.ContainerStats{ { diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index 5e7de5ef..9413fc20 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -1,116 +1,122 @@ +# HELP cadvisor_version_info A metric with a constant '1' value labeled by kernel version, OS version, docker version & cadvisor version. +# TYPE cadvisor_version_info gauge +cadvisor_version_info{cadvisorVersion="0.16.0",dockerVersion="1.8.1",kernelVersion="4.1.6-200.fc22.x86_64",osVersion="Fedora 22 (Twenty Two)"} 1 # HELP container_cpu_system_seconds_total Cumulative system cpu time consumed in seconds. # TYPE container_cpu_system_seconds_total counter -container_cpu_system_seconds_total{id="testcontainer",image="test",name="testcontainer"} 7e-09 +container_cpu_system_seconds_total{id="testcontainer",image="test",name="testcontaineralias"} 7e-09 # HELP container_cpu_usage_seconds_total Cumulative cpu time consumed per cpu in seconds. # TYPE container_cpu_usage_seconds_total counter -container_cpu_usage_seconds_total{cpu="cpu00",id="testcontainer",image="test",name="testcontainer"} 2e-09 -container_cpu_usage_seconds_total{cpu="cpu01",id="testcontainer",image="test",name="testcontainer"} 3e-09 -container_cpu_usage_seconds_total{cpu="cpu02",id="testcontainer",image="test",name="testcontainer"} 4e-09 -container_cpu_usage_seconds_total{cpu="cpu03",id="testcontainer",image="test",name="testcontainer"} 5e-09 +container_cpu_usage_seconds_total{cpu="cpu00",id="testcontainer",image="test",name="testcontaineralias"} 2e-09 +container_cpu_usage_seconds_total{cpu="cpu01",id="testcontainer",image="test",name="testcontaineralias"} 3e-09 +container_cpu_usage_seconds_total{cpu="cpu02",id="testcontainer",image="test",name="testcontaineralias"} 4e-09 +container_cpu_usage_seconds_total{cpu="cpu03",id="testcontainer",image="test",name="testcontaineralias"} 5e-09 # HELP container_cpu_user_seconds_total Cumulative user cpu time consumed in seconds. # TYPE container_cpu_user_seconds_total counter -container_cpu_user_seconds_total{id="testcontainer",image="test",name="testcontainer"} 6e-09 +container_cpu_user_seconds_total{id="testcontainer",image="test",name="testcontaineralias"} 6e-09 # HELP container_fs_io_current Number of I/Os currently in progress # TYPE container_fs_io_current gauge -container_fs_io_current{device="sda1",id="testcontainer",image="test",name="testcontainer"} 42 -container_fs_io_current{device="sda2",id="testcontainer",image="test",name="testcontainer"} 47 +container_fs_io_current{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 42 +container_fs_io_current{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 47 # HELP container_fs_io_time_seconds_total Cumulative count of seconds spent doing I/Os # TYPE container_fs_io_time_seconds_total counter -container_fs_io_time_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 4.3e-08 -container_fs_io_time_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.8e-08 +container_fs_io_time_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 4.3e-08 +container_fs_io_time_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.8e-08 # HELP container_fs_io_time_weighted_seconds_total Cumulative weighted I/O time in seconds # TYPE container_fs_io_time_weighted_seconds_total counter -container_fs_io_time_weighted_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 4.4e-08 -container_fs_io_time_weighted_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.9e-08 +container_fs_io_time_weighted_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 4.4e-08 +container_fs_io_time_weighted_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.9e-08 # HELP container_fs_limit_bytes Number of bytes that can be consumed by the container on this filesystem. # TYPE container_fs_limit_bytes gauge -container_fs_limit_bytes{device="sda1",id="testcontainer",image="test",name="testcontainer"} 22 -container_fs_limit_bytes{device="sda2",id="testcontainer",image="test",name="testcontainer"} 37 +container_fs_limit_bytes{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 22 +container_fs_limit_bytes{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 37 # HELP container_fs_read_seconds_total Cumulative count of seconds spent reading # TYPE container_fs_read_seconds_total counter -container_fs_read_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 2.7e-08 -container_fs_read_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.2e-08 +container_fs_read_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 2.7e-08 +container_fs_read_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.2e-08 # HELP container_fs_reads_merged_total Cumulative count of reads merged # TYPE container_fs_reads_merged_total counter -container_fs_reads_merged_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 25 -container_fs_reads_merged_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 40 +container_fs_reads_merged_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 25 +container_fs_reads_merged_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 40 # HELP container_fs_reads_total Cumulative count of reads completed # TYPE container_fs_reads_total counter -container_fs_reads_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 24 -container_fs_reads_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 39 +container_fs_reads_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 24 +container_fs_reads_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 39 # HELP container_fs_sector_reads_total Cumulative count of sector reads completed # TYPE container_fs_sector_reads_total counter -container_fs_sector_reads_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 26 -container_fs_sector_reads_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 41 +container_fs_sector_reads_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 26 +container_fs_sector_reads_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 41 # HELP container_fs_sector_writes_total Cumulative count of sector writes completed # TYPE container_fs_sector_writes_total counter -container_fs_sector_writes_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 40 -container_fs_sector_writes_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 45 +container_fs_sector_writes_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 40 +container_fs_sector_writes_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 45 # HELP container_fs_usage_bytes Number of bytes that are consumed by the container on this filesystem. # TYPE container_fs_usage_bytes gauge -container_fs_usage_bytes{device="sda1",id="testcontainer",image="test",name="testcontainer"} 23 -container_fs_usage_bytes{device="sda2",id="testcontainer",image="test",name="testcontainer"} 38 +container_fs_usage_bytes{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 23 +container_fs_usage_bytes{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 38 # HELP container_fs_write_seconds_total Cumulative count of seconds spent writing # TYPE container_fs_write_seconds_total counter -container_fs_write_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 4.1e-08 -container_fs_write_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 4.6e-08 +container_fs_write_seconds_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 4.1e-08 +container_fs_write_seconds_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 4.6e-08 # HELP container_fs_writes_merged_total Cumulative count of writes merged # TYPE container_fs_writes_merged_total counter -container_fs_writes_merged_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 39 -container_fs_writes_merged_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 44 +container_fs_writes_merged_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 39 +container_fs_writes_merged_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 44 # HELP container_fs_writes_total Cumulative count of writes completed # TYPE container_fs_writes_total counter -container_fs_writes_total{device="sda1",id="testcontainer",image="test",name="testcontainer"} 28 -container_fs_writes_total{device="sda2",id="testcontainer",image="test",name="testcontainer"} 43 +container_fs_writes_total{device="sda1",id="testcontainer",image="test",name="testcontaineralias"} 28 +container_fs_writes_total{device="sda2",id="testcontainer",image="test",name="testcontaineralias"} 43 # HELP container_last_seen Last time a container was seen by the exporter # TYPE container_last_seen gauge -container_last_seen{id="testcontainer",image="test",name="testcontainer"} 1.426203694e+09 +container_last_seen{id="testcontainer",image="test",name="testcontaineralias"} 1.426203694e+09 # HELP container_memory_failures_total Cumulative count of memory allocation failures. # TYPE container_memory_failures_total counter -container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="container",type="pgfault"} 10 -container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="container",type="pgmajfault"} 11 -container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="hierarchy",type="pgfault"} 12 -container_memory_failures_total{id="testcontainer",image="test",name="testcontainer",scope="hierarchy",type="pgmajfault"} 13 +container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="container",type="pgfault"} 10 +container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="container",type="pgmajfault"} 11 +container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="hierarchy",type="pgfault"} 12 +container_memory_failures_total{id="testcontainer",image="test",name="testcontaineralias",scope="hierarchy",type="pgmajfault"} 13 # HELP container_memory_usage_bytes Current memory usage in bytes. # TYPE container_memory_usage_bytes gauge -container_memory_usage_bytes{id="testcontainer",image="test",name="testcontainer"} 8 +container_memory_usage_bytes{id="testcontainer",image="test",name="testcontaineralias"} 8 # HELP container_memory_working_set_bytes Current working set in bytes. # TYPE container_memory_working_set_bytes gauge -container_memory_working_set_bytes{id="testcontainer",image="test",name="testcontainer"} 9 +container_memory_working_set_bytes{id="testcontainer",image="test",name="testcontaineralias"} 9 # HELP container_network_receive_bytes_total Cumulative count of bytes received # TYPE container_network_receive_bytes_total counter -container_network_receive_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 14 +container_network_receive_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 14 # HELP container_network_receive_errors_total Cumulative count of errors encountered while receiving # TYPE container_network_receive_errors_total counter -container_network_receive_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 16 +container_network_receive_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 16 # HELP container_network_receive_packets_dropped_total Cumulative count of packets dropped while receiving # TYPE container_network_receive_packets_dropped_total counter -container_network_receive_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 17 +container_network_receive_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 17 # HELP container_network_receive_packets_total Cumulative count of packets received # TYPE container_network_receive_packets_total counter -container_network_receive_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 15 +container_network_receive_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 15 # HELP container_network_transmit_bytes_total Cumulative count of bytes transmitted # TYPE container_network_transmit_bytes_total counter -container_network_transmit_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 18 +container_network_transmit_bytes_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 18 # HELP container_network_transmit_errors_total Cumulative count of errors encountered while transmitting # TYPE container_network_transmit_errors_total counter -container_network_transmit_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 20 +container_network_transmit_errors_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 20 # HELP container_network_transmit_packets_dropped_total Cumulative count of packets dropped while transmitting # TYPE container_network_transmit_packets_dropped_total counter -container_network_transmit_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 21 +container_network_transmit_packets_dropped_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 21 # HELP container_network_transmit_packets_total Cumulative count of packets transmitted # TYPE container_network_transmit_packets_total counter -container_network_transmit_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontainer"} 19 +container_network_transmit_packets_total{id="testcontainer",image="test",interface="eth0",name="testcontaineralias"} 19 # HELP container_scrape_error 1 if there was an error while getting container metrics, 0 otherwise # TYPE container_scrape_error gauge container_scrape_error 0 +# HELP container_start_time_seconds Start time of the container since unix epoch in seconds. +# TYPE container_start_time_seconds gauge +container_start_time_seconds{id="testcontainer",image="test",name="testcontaineralias"} 1.257894e+09 # HELP container_tasks_state Number of tasks in given state # TYPE container_tasks_state gauge -container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="iowaiting"} 54 -container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="running"} 51 -container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="sleeping"} 50 -container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="stopped"} 52 -container_tasks_state{id="testcontainer",image="test",name="testcontainer",state="uninterruptible"} 53 +container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="iowaiting"} 54 +container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="running"} 51 +container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="sleeping"} 50 +container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="stopped"} 52 +container_tasks_state{id="testcontainer",image="test",name="testcontaineralias",state="uninterruptible"} 53 # HELP http_request_duration_microseconds The HTTP request latencies in microseconds. # TYPE http_request_duration_microseconds summary http_request_duration_microseconds{handler="prometheus",quantile="0.5"} 0