From bac3a5d839d8e3680e5142026c04a34a8cb86136 Mon Sep 17 00:00:00 2001 From: Julius Volz Date: Thu, 12 Mar 2015 22:11:49 +0100 Subject: [PATCH] Add Prometheus tests and fix metric type bug. --- info/v1/container.go | 37 ++++--- metrics/prometheus.go | 16 +-- metrics/prometheus_test.go | 145 ++++++++++++++++++++++++++ metrics/testdata/prometheus_metrics | 155 ++++++++++++++++++++++++++++ 4 files changed, 330 insertions(+), 23 deletions(-) create mode 100644 metrics/prometheus_test.go create mode 100644 metrics/testdata/prometheus_metrics diff --git a/info/v1/container.go b/info/v1/container.go index 8f40d067..14ce7d95 100644 --- a/info/v1/container.go +++ b/info/v1/container.go @@ -234,25 +234,28 @@ type LoadStats struct { NrIoWait uint64 `json:"nr_io_wait"` } +// CPU usage time statistics. +type CpuUsage struct { + // Total CPU usage. + // Units: nanoseconds + Total uint64 `json:"total"` + + // Per CPU/core usage of the container. + // Unit: nanoseconds. + PerCpu []uint64 `json:"per_cpu_usage,omitempty"` + + // Time spent in user space. + // Unit: nanoseconds + User uint64 `json:"user"` + + // Time spent in kernel space. + // Unit: nanoseconds + System uint64 `json:"system"` +} + // All CPU usage metrics are cumulative from the creation of the container type CpuStats struct { - Usage struct { - // Total CPU usage. - // Units: nanoseconds - Total uint64 `json:"total"` - - // Per CPU/core usage of the container. - // Unit: nanoseconds. - PerCpu []uint64 `json:"per_cpu_usage,omitempty"` - - // Time spent in user space. - // Unit: nanoseconds - User uint64 `json:"user"` - - // Time spent in kernel space. - // Unit: nanoseconds - System uint64 `json:"system"` - } `json:"usage"` + Usage CpuUsage `json:"usage"` // Smoothed average of number of runnable threads x 1000. // We multiply by thousand to avoid using floats, but preserving precision. // Load is smoothed over the last 10 seconds. Instantaneous value can be read diff --git a/metrics/prometheus.go b/metrics/prometheus.go index c7805763..fcf40b54 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -20,10 +20,14 @@ import ( "github.com/golang/glog" info "github.com/google/cadvisor/info/v1" - "github.com/google/cadvisor/manager" "github.com/prometheus/client_golang/prometheus" ) +type subcontainersInfoProvider interface { + // Get information about all subcontainers of the specified container (includes self). + SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) +} + type prometheusMetric struct { valueType prometheus.ValueType value float64 @@ -32,7 +36,7 @@ type prometheusMetric struct { // PrometheusCollector implements prometheus.Collector. type PrometheusCollector struct { - manager manager.Manager + infoProvider subcontainersInfoProvider errors prometheus.Gauge lastSeen *prometheus.Desc @@ -77,9 +81,9 @@ type PrometheusCollector struct { } // NewPrometheusCollector returns a new PrometheusCollector. -func NewPrometheusCollector(manager manager.Manager) *PrometheusCollector { +func NewPrometheusCollector(infoProvider subcontainersInfoProvider) *PrometheusCollector { c := &PrometheusCollector{ - manager: manager, + infoProvider: infoProvider, errors: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "container", Name: "scrape_error", @@ -283,7 +287,7 @@ func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) { // Collect fetches the stats from all containers and delivers them as // Prometheus metrics. It implements prometheus.PrometheusCollector. func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) { - containers, err := c.manager.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1}) + containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1}) if err != nil { c.errors.Set(1) glog.Warning("Couldn't get containers: %s", err) @@ -330,7 +334,7 @@ func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) { c.networkTxDropped: {{valueType: prometheus.CounterValue, value: float64(stats.Network.TxDropped)}}, } { for _, m := range metrics { - ch <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, float64(m.value), append(m.labels, name, id)...) + ch <- prometheus.MustNewConstMetric(desc, m.valueType, float64(m.value), append(m.labels, name, id)...) } } diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go new file mode 100644 index 00000000..ac701891 --- /dev/null +++ b/metrics/prometheus_test.go @@ -0,0 +1,145 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +import ( + "io/ioutil" + "net/http" + "net/http/httptest" + "regexp" + "strings" + "testing" + + info "github.com/google/cadvisor/info/v1" + "github.com/prometheus/client_golang/prometheus" +) + +type testSubcontainersInfoProvider struct{} + +func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) { + return []*info.ContainerInfo{ + { + ContainerReference: info.ContainerReference{ + Name: "testcontainer", + }, + Stats: []*info.ContainerStats{ + { + Cpu: info.CpuStats{ + Usage: info.CpuUsage{ + Total: 1, + PerCpu: []uint64{2, 3, 4, 5}, + User: 6, + System: 7, + }, + }, + Memory: info.MemoryStats{ + Usage: 8, + WorkingSet: 9, + ContainerData: info.MemoryStatsMemoryData{ + Pgfault: 10, + Pgmajfault: 11, + }, + HierarchicalData: info.MemoryStatsMemoryData{ + Pgfault: 12, + Pgmajfault: 13, + }, + }, + Network: info.NetworkStats{ + RxBytes: 14, + RxPackets: 15, + RxErrors: 16, + RxDropped: 17, + TxBytes: 18, + TxPackets: 19, + TxErrors: 20, + TxDropped: 21, + }, + Filesystem: []info.FsStats{ + { + Device: "sda1", + Limit: 22, + Usage: 23, + ReadsCompleted: 24, + ReadsMerged: 25, + SectorsRead: 26, + ReadTime: 27, + WritesCompleted: 28, + WritesMerged: 39, + SectorsWritten: 40, + WriteTime: 41, + IoInProgress: 42, + IoTime: 43, + WeightedIoTime: 44, + }, + { + Device: "sda2", + Limit: 37, + Usage: 38, + ReadsCompleted: 39, + ReadsMerged: 40, + SectorsRead: 41, + ReadTime: 42, + WritesCompleted: 43, + WritesMerged: 44, + SectorsWritten: 45, + WriteTime: 46, + IoInProgress: 47, + IoTime: 48, + WeightedIoTime: 49, + }, + }, + TaskStats: info.LoadStats{ + NrSleeping: 50, + NrRunning: 51, + NrStopped: 52, + NrUninterruptible: 53, + NrIoWait: 54, + }, + }, + }, + }, + }, nil +} + +func TestPrometheusCollector(t *testing.T) { + prometheus.MustRegister(NewPrometheusCollector(testSubcontainersInfoProvider{})) + + rw := httptest.NewRecorder() + prometheus.Handler().ServeHTTP(rw, &http.Request{}) + + metricsFile := "testdata/prometheus_metrics" + wantMetrics, err := ioutil.ReadFile(metricsFile) + if err != nil { + t.Fatalf("unable to read input test file %s", metricsFile) + } + + wantLines := strings.Split(string(wantMetrics), "\n") + gotLines := strings.Split(string(rw.Body.String()), "\n") + + // Until the Prometheus Go client library offers better testability + // (https://github.com/prometheus/client_golang/issues/58), we simply compare + // verbatim text-format metrics outputs, but ignore certain metric lines + // whose value depends on the current time or local circumstances. + includeRe := regexp.MustCompile("^(# HELP |# TYPE |)container_") + ignoreRe := regexp.MustCompile("^container_last_seen{") + for i, want := range wantLines { + if !includeRe.MatchString(want) || ignoreRe.MatchString(want) { + continue + } + if want != gotLines[i] { + t.Fatalf("want %s, got %s", want, gotLines[i]) + } + } +} diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics new file mode 100644 index 00000000..70d5d977 --- /dev/null +++ b/metrics/testdata/prometheus_metrics @@ -0,0 +1,155 @@ +# HELP container_cpu_system_seconds_total Cumulative system cpu time consumed in seconds. +# TYPE container_cpu_system_seconds_total counter +container_cpu_system_seconds_total{id="testcontainer",name="testcontainer"} 7e-09 +# HELP container_cpu_usage_seconds_total Cumulative cpu time consumed per cpu in seconds. +# TYPE container_cpu_usage_seconds_total counter +container_cpu_usage_seconds_total{cpu="cpu00",id="testcontainer",name="testcontainer"} 2e-09 +container_cpu_usage_seconds_total{cpu="cpu01",id="testcontainer",name="testcontainer"} 3e-09 +container_cpu_usage_seconds_total{cpu="cpu02",id="testcontainer",name="testcontainer"} 4e-09 +container_cpu_usage_seconds_total{cpu="cpu03",id="testcontainer",name="testcontainer"} 5e-09 +# HELP container_cpu_user_seconds_total Cumulative user cpu time consumed in seconds. +# TYPE container_cpu_user_seconds_total counter +container_cpu_user_seconds_total{id="testcontainer",name="testcontainer"} 6e-09 +# HELP container_fs_io_current Number of I/Os currently in progress +# TYPE container_fs_io_current gauge +container_fs_io_current{device="sda1",id="testcontainer",name="testcontainer"} 42 +container_fs_io_current{device="sda2",id="testcontainer",name="testcontainer"} 47 +# HELP container_fs_io_time_seconds_total Cumulative count of seconds spent doing I/Os +# TYPE container_fs_io_time_seconds_total counter +container_fs_io_time_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 4.3e-08 +container_fs_io_time_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.8e-08 +# HELP container_fs_io_time_weighted_seconds_total Cumulative weighted I/O time in seconds +# TYPE container_fs_io_time_weighted_seconds_total counter +container_fs_io_time_weighted_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 4.4e-08 +container_fs_io_time_weighted_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.9e-08 +# HELP container_fs_limit_bytes Number of bytes that can be consumed by the container on this filesystem. +# TYPE container_fs_limit_bytes gauge +container_fs_limit_bytes{device="sda1",id="testcontainer",name="testcontainer"} 22 +container_fs_limit_bytes{device="sda2",id="testcontainer",name="testcontainer"} 37 +# HELP container_fs_read_seconds_total Cumulative count of seconds spent reading +# TYPE container_fs_read_seconds_total counter +container_fs_read_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 2.7e-08 +container_fs_read_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.2e-08 +# HELP container_fs_reads_merged_total Cumulative count of reads merged +# TYPE container_fs_reads_merged_total counter +container_fs_reads_merged_total{device="sda1",id="testcontainer",name="testcontainer"} 25 +container_fs_reads_merged_total{device="sda2",id="testcontainer",name="testcontainer"} 40 +# HELP container_fs_reads_total Cumulative count of reads completed +# TYPE container_fs_reads_total counter +container_fs_reads_total{device="sda1",id="testcontainer",name="testcontainer"} 24 +container_fs_reads_total{device="sda2",id="testcontainer",name="testcontainer"} 39 +# HELP container_fs_sector_reads_total Cumulative count of sector reads completed +# TYPE container_fs_sector_reads_total counter +container_fs_sector_reads_total{device="sda1",id="testcontainer",name="testcontainer"} 26 +container_fs_sector_reads_total{device="sda2",id="testcontainer",name="testcontainer"} 41 +# HELP container_fs_sector_writes_total Cumulative count of sector writes completed +# TYPE container_fs_sector_writes_total counter +container_fs_sector_writes_total{device="sda1",id="testcontainer",name="testcontainer"} 40 +container_fs_sector_writes_total{device="sda2",id="testcontainer",name="testcontainer"} 45 +# HELP container_fs_usage_bytes Number of bytes that are consumed by the container on this filesystem. +# TYPE container_fs_usage_bytes gauge +container_fs_usage_bytes{device="sda1",id="testcontainer",name="testcontainer"} 23 +container_fs_usage_bytes{device="sda2",id="testcontainer",name="testcontainer"} 38 +# HELP container_fs_write_seconds_total Cumulative count of seconds spent writing +# TYPE container_fs_write_seconds_total counter +container_fs_write_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 4.1e-08 +container_fs_write_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.6e-08 +# HELP container_fs_writes_merged_total Cumulative count of writes merged +# TYPE container_fs_writes_merged_total counter +container_fs_writes_merged_total{device="sda1",id="testcontainer",name="testcontainer"} 39 +container_fs_writes_merged_total{device="sda2",id="testcontainer",name="testcontainer"} 44 +# HELP container_fs_writes_total Cumulative count of writes completed +# TYPE container_fs_writes_total counter +container_fs_writes_total{device="sda1",id="testcontainer",name="testcontainer"} 28 +container_fs_writes_total{device="sda2",id="testcontainer",name="testcontainer"} 43 +# HELP container_last_seen Last time a container was seen by the exporter +# TYPE container_last_seen gauge +container_last_seen{id="testcontainer",name="testcontainer"} 1.426203694e+09 +# HELP container_memory_failures_total Cumulative count of memory allocation failures. +# TYPE container_memory_failures_total counter +container_memory_failures_total{id="testcontainer",name="testcontainer",scope="container",type="pgfault"} 10 +container_memory_failures_total{id="testcontainer",name="testcontainer",scope="container",type="pgmajfault"} 11 +container_memory_failures_total{id="testcontainer",name="testcontainer",scope="hierarchy",type="pgfault"} 12 +container_memory_failures_total{id="testcontainer",name="testcontainer",scope="hierarchy",type="pgmajfault"} 13 +# HELP container_memory_usage_bytes Current memory usage in bytes. +# TYPE container_memory_usage_bytes gauge +container_memory_usage_bytes{id="testcontainer",name="testcontainer"} 8 +# HELP container_memory_working_set_bytes Current working set in bytes. +# TYPE container_memory_working_set_bytes gauge +container_memory_working_set_bytes{id="testcontainer",name="testcontainer"} 9 +# HELP container_network_receive_bytes_total Cumulative count of bytes received +# TYPE container_network_receive_bytes_total counter +container_network_receive_bytes_total{id="testcontainer",name="testcontainer"} 14 +# HELP container_network_receive_errors_total Cumulative count of errors encountered while receiving +# TYPE container_network_receive_errors_total counter +container_network_receive_errors_total{id="testcontainer",name="testcontainer"} 16 +# HELP container_network_receive_packets_dropped_total Cumulative count of packets dropped while receiving +# TYPE container_network_receive_packets_dropped_total counter +container_network_receive_packets_dropped_total{id="testcontainer",name="testcontainer"} 17 +# HELP container_network_receive_packets_total Cumulative count of packets received +# TYPE container_network_receive_packets_total counter +container_network_receive_packets_total{id="testcontainer",name="testcontainer"} 15 +# HELP container_network_transmit_bytes_total Cumulative count of bytes transmitted +# TYPE container_network_transmit_bytes_total counter +container_network_transmit_bytes_total{id="testcontainer",name="testcontainer"} 18 +# HELP container_network_transmit_errors_total Cumulative count of errors encountered while transmitting +# TYPE container_network_transmit_errors_total counter +container_network_transmit_errors_total{id="testcontainer",name="testcontainer"} 20 +# HELP container_network_transmit_packets_dropped_total Cumulative count of packets dropped while transmitting +# TYPE container_network_transmit_packets_dropped_total counter +container_network_transmit_packets_dropped_total{id="testcontainer",name="testcontainer"} 21 +# HELP container_network_transmit_packets_total Cumulative count of packets transmitted +# TYPE container_network_transmit_packets_total counter +container_network_transmit_packets_total{id="testcontainer",name="testcontainer"} 19 +# HELP container_scrape_error 1 if there was an error while getting container metrics, 0 otherwise +# TYPE container_scrape_error gauge +container_scrape_error 0 +# HELP container_tasks_state Number of tasks in given state +# TYPE container_tasks_state gauge +container_tasks_state{id="testcontainer",name="testcontainer",state="iowaiting"} 54 +container_tasks_state{id="testcontainer",name="testcontainer",state="running"} 51 +container_tasks_state{id="testcontainer",name="testcontainer",state="sleeping"} 50 +container_tasks_state{id="testcontainer",name="testcontainer",state="stopped"} 52 +container_tasks_state{id="testcontainer",name="testcontainer",state="uninterruptible"} 53 +# HELP http_request_duration_microseconds The HTTP request latencies in microseconds. +# TYPE http_request_duration_microseconds summary +http_request_duration_microseconds{handler="prometheus",quantile="0.5"} 0 +http_request_duration_microseconds{handler="prometheus",quantile="0.9"} 0 +http_request_duration_microseconds{handler="prometheus",quantile="0.99"} 0 +http_request_duration_microseconds_sum{handler="prometheus"} 0 +http_request_duration_microseconds_count{handler="prometheus"} 0 +# HELP http_request_size_bytes The HTTP request sizes in bytes. +# TYPE http_request_size_bytes summary +http_request_size_bytes{handler="prometheus",quantile="0.5"} 0 +http_request_size_bytes{handler="prometheus",quantile="0.9"} 0 +http_request_size_bytes{handler="prometheus",quantile="0.99"} 0 +http_request_size_bytes_sum{handler="prometheus"} 0 +http_request_size_bytes_count{handler="prometheus"} 0 +# HELP http_response_size_bytes The HTTP response sizes in bytes. +# TYPE http_response_size_bytes summary +http_response_size_bytes{handler="prometheus",quantile="0.5"} 0 +http_response_size_bytes{handler="prometheus",quantile="0.9"} 0 +http_response_size_bytes{handler="prometheus",quantile="0.99"} 0 +http_response_size_bytes_sum{handler="prometheus"} 0 +http_response_size_bytes_count{handler="prometheus"} 0 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 0 +# HELP process_goroutines Number of goroutines that currently exist. +# TYPE process_goroutines gauge +process_goroutines 16 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1024 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 4 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 7.74144e+06 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.42620369439e+09 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 1.16420608e+08