Add Prometheus tests and fix metric type bug.

This commit is contained in:
Julius Volz 2015-03-12 22:11:49 +01:00
parent cf9c76c06c
commit bac3a5d839
4 changed files with 330 additions and 23 deletions

View File

@ -234,9 +234,8 @@ type LoadStats struct {
NrIoWait uint64 `json:"nr_io_wait"` NrIoWait uint64 `json:"nr_io_wait"`
} }
// All CPU usage metrics are cumulative from the creation of the container // CPU usage time statistics.
type CpuStats struct { type CpuUsage struct {
Usage struct {
// Total CPU usage. // Total CPU usage.
// Units: nanoseconds // Units: nanoseconds
Total uint64 `json:"total"` Total uint64 `json:"total"`
@ -252,7 +251,11 @@ type CpuStats struct {
// Time spent in kernel space. // Time spent in kernel space.
// Unit: nanoseconds // Unit: nanoseconds
System uint64 `json:"system"` System uint64 `json:"system"`
} `json:"usage"` }
// All CPU usage metrics are cumulative from the creation of the container
type CpuStats struct {
Usage CpuUsage `json:"usage"`
// Smoothed average of number of runnable threads x 1000. // Smoothed average of number of runnable threads x 1000.
// We multiply by thousand to avoid using floats, but preserving precision. // We multiply by thousand to avoid using floats, but preserving precision.
// Load is smoothed over the last 10 seconds. Instantaneous value can be read // Load is smoothed over the last 10 seconds. Instantaneous value can be read

View File

@ -20,10 +20,14 @@ import (
"github.com/golang/glog" "github.com/golang/glog"
info "github.com/google/cadvisor/info/v1" info "github.com/google/cadvisor/info/v1"
"github.com/google/cadvisor/manager"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
) )
type subcontainersInfoProvider interface {
// Get information about all subcontainers of the specified container (includes self).
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
}
type prometheusMetric struct { type prometheusMetric struct {
valueType prometheus.ValueType valueType prometheus.ValueType
value float64 value float64
@ -32,7 +36,7 @@ type prometheusMetric struct {
// PrometheusCollector implements prometheus.Collector. // PrometheusCollector implements prometheus.Collector.
type PrometheusCollector struct { type PrometheusCollector struct {
manager manager.Manager infoProvider subcontainersInfoProvider
errors prometheus.Gauge errors prometheus.Gauge
lastSeen *prometheus.Desc lastSeen *prometheus.Desc
@ -77,9 +81,9 @@ type PrometheusCollector struct {
} }
// NewPrometheusCollector returns a new PrometheusCollector. // NewPrometheusCollector returns a new PrometheusCollector.
func NewPrometheusCollector(manager manager.Manager) *PrometheusCollector { func NewPrometheusCollector(infoProvider subcontainersInfoProvider) *PrometheusCollector {
c := &PrometheusCollector{ c := &PrometheusCollector{
manager: manager, infoProvider: infoProvider,
errors: prometheus.NewGauge(prometheus.GaugeOpts{ errors: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "container", Namespace: "container",
Name: "scrape_error", Name: "scrape_error",
@ -283,7 +287,7 @@ func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
// Collect fetches the stats from all containers and delivers them as // Collect fetches the stats from all containers and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector. // Prometheus metrics. It implements prometheus.PrometheusCollector.
func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) { func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
containers, err := c.manager.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1}) containers, err := c.infoProvider.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1})
if err != nil { if err != nil {
c.errors.Set(1) c.errors.Set(1)
glog.Warning("Couldn't get containers: %s", err) glog.Warning("Couldn't get containers: %s", err)
@ -330,7 +334,7 @@ func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
c.networkTxDropped: {{valueType: prometheus.CounterValue, value: float64(stats.Network.TxDropped)}}, c.networkTxDropped: {{valueType: prometheus.CounterValue, value: float64(stats.Network.TxDropped)}},
} { } {
for _, m := range metrics { for _, m := range metrics {
ch <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, float64(m.value), append(m.labels, name, id)...) ch <- prometheus.MustNewConstMetric(desc, m.valueType, float64(m.value), append(m.labels, name, id)...)
} }
} }

145
metrics/prometheus_test.go Normal file
View File

@ -0,0 +1,145 @@
// Copyright 2014 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"io/ioutil"
"net/http"
"net/http/httptest"
"regexp"
"strings"
"testing"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
)
type testSubcontainersInfoProvider struct{}
func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return []*info.ContainerInfo{
{
ContainerReference: info.ContainerReference{
Name: "testcontainer",
},
Stats: []*info.ContainerStats{
{
Cpu: info.CpuStats{
Usage: info.CpuUsage{
Total: 1,
PerCpu: []uint64{2, 3, 4, 5},
User: 6,
System: 7,
},
},
Memory: info.MemoryStats{
Usage: 8,
WorkingSet: 9,
ContainerData: info.MemoryStatsMemoryData{
Pgfault: 10,
Pgmajfault: 11,
},
HierarchicalData: info.MemoryStatsMemoryData{
Pgfault: 12,
Pgmajfault: 13,
},
},
Network: info.NetworkStats{
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
Filesystem: []info.FsStats{
{
Device: "sda1",
Limit: 22,
Usage: 23,
ReadsCompleted: 24,
ReadsMerged: 25,
SectorsRead: 26,
ReadTime: 27,
WritesCompleted: 28,
WritesMerged: 39,
SectorsWritten: 40,
WriteTime: 41,
IoInProgress: 42,
IoTime: 43,
WeightedIoTime: 44,
},
{
Device: "sda2",
Limit: 37,
Usage: 38,
ReadsCompleted: 39,
ReadsMerged: 40,
SectorsRead: 41,
ReadTime: 42,
WritesCompleted: 43,
WritesMerged: 44,
SectorsWritten: 45,
WriteTime: 46,
IoInProgress: 47,
IoTime: 48,
WeightedIoTime: 49,
},
},
TaskStats: info.LoadStats{
NrSleeping: 50,
NrRunning: 51,
NrStopped: 52,
NrUninterruptible: 53,
NrIoWait: 54,
},
},
},
},
}, nil
}
func TestPrometheusCollector(t *testing.T) {
prometheus.MustRegister(NewPrometheusCollector(testSubcontainersInfoProvider{}))
rw := httptest.NewRecorder()
prometheus.Handler().ServeHTTP(rw, &http.Request{})
metricsFile := "testdata/prometheus_metrics"
wantMetrics, err := ioutil.ReadFile(metricsFile)
if err != nil {
t.Fatalf("unable to read input test file %s", metricsFile)
}
wantLines := strings.Split(string(wantMetrics), "\n")
gotLines := strings.Split(string(rw.Body.String()), "\n")
// Until the Prometheus Go client library offers better testability
// (https://github.com/prometheus/client_golang/issues/58), we simply compare
// verbatim text-format metrics outputs, but ignore certain metric lines
// whose value depends on the current time or local circumstances.
includeRe := regexp.MustCompile("^(# HELP |# TYPE |)container_")
ignoreRe := regexp.MustCompile("^container_last_seen{")
for i, want := range wantLines {
if !includeRe.MatchString(want) || ignoreRe.MatchString(want) {
continue
}
if want != gotLines[i] {
t.Fatalf("want %s, got %s", want, gotLines[i])
}
}
}

155
metrics/testdata/prometheus_metrics vendored Normal file
View File

@ -0,0 +1,155 @@
# HELP container_cpu_system_seconds_total Cumulative system cpu time consumed in seconds.
# TYPE container_cpu_system_seconds_total counter
container_cpu_system_seconds_total{id="testcontainer",name="testcontainer"} 7e-09
# HELP container_cpu_usage_seconds_total Cumulative cpu time consumed per cpu in seconds.
# TYPE container_cpu_usage_seconds_total counter
container_cpu_usage_seconds_total{cpu="cpu00",id="testcontainer",name="testcontainer"} 2e-09
container_cpu_usage_seconds_total{cpu="cpu01",id="testcontainer",name="testcontainer"} 3e-09
container_cpu_usage_seconds_total{cpu="cpu02",id="testcontainer",name="testcontainer"} 4e-09
container_cpu_usage_seconds_total{cpu="cpu03",id="testcontainer",name="testcontainer"} 5e-09
# HELP container_cpu_user_seconds_total Cumulative user cpu time consumed in seconds.
# TYPE container_cpu_user_seconds_total counter
container_cpu_user_seconds_total{id="testcontainer",name="testcontainer"} 6e-09
# HELP container_fs_io_current Number of I/Os currently in progress
# TYPE container_fs_io_current gauge
container_fs_io_current{device="sda1",id="testcontainer",name="testcontainer"} 42
container_fs_io_current{device="sda2",id="testcontainer",name="testcontainer"} 47
# HELP container_fs_io_time_seconds_total Cumulative count of seconds spent doing I/Os
# TYPE container_fs_io_time_seconds_total counter
container_fs_io_time_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 4.3e-08
container_fs_io_time_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.8e-08
# HELP container_fs_io_time_weighted_seconds_total Cumulative weighted I/O time in seconds
# TYPE container_fs_io_time_weighted_seconds_total counter
container_fs_io_time_weighted_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 4.4e-08
container_fs_io_time_weighted_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.9e-08
# HELP container_fs_limit_bytes Number of bytes that can be consumed by the container on this filesystem.
# TYPE container_fs_limit_bytes gauge
container_fs_limit_bytes{device="sda1",id="testcontainer",name="testcontainer"} 22
container_fs_limit_bytes{device="sda2",id="testcontainer",name="testcontainer"} 37
# HELP container_fs_read_seconds_total Cumulative count of seconds spent reading
# TYPE container_fs_read_seconds_total counter
container_fs_read_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 2.7e-08
container_fs_read_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.2e-08
# HELP container_fs_reads_merged_total Cumulative count of reads merged
# TYPE container_fs_reads_merged_total counter
container_fs_reads_merged_total{device="sda1",id="testcontainer",name="testcontainer"} 25
container_fs_reads_merged_total{device="sda2",id="testcontainer",name="testcontainer"} 40
# HELP container_fs_reads_total Cumulative count of reads completed
# TYPE container_fs_reads_total counter
container_fs_reads_total{device="sda1",id="testcontainer",name="testcontainer"} 24
container_fs_reads_total{device="sda2",id="testcontainer",name="testcontainer"} 39
# HELP container_fs_sector_reads_total Cumulative count of sector reads completed
# TYPE container_fs_sector_reads_total counter
container_fs_sector_reads_total{device="sda1",id="testcontainer",name="testcontainer"} 26
container_fs_sector_reads_total{device="sda2",id="testcontainer",name="testcontainer"} 41
# HELP container_fs_sector_writes_total Cumulative count of sector writes completed
# TYPE container_fs_sector_writes_total counter
container_fs_sector_writes_total{device="sda1",id="testcontainer",name="testcontainer"} 40
container_fs_sector_writes_total{device="sda2",id="testcontainer",name="testcontainer"} 45
# HELP container_fs_usage_bytes Number of bytes that are consumed by the container on this filesystem.
# TYPE container_fs_usage_bytes gauge
container_fs_usage_bytes{device="sda1",id="testcontainer",name="testcontainer"} 23
container_fs_usage_bytes{device="sda2",id="testcontainer",name="testcontainer"} 38
# HELP container_fs_write_seconds_total Cumulative count of seconds spent writing
# TYPE container_fs_write_seconds_total counter
container_fs_write_seconds_total{device="sda1",id="testcontainer",name="testcontainer"} 4.1e-08
container_fs_write_seconds_total{device="sda2",id="testcontainer",name="testcontainer"} 4.6e-08
# HELP container_fs_writes_merged_total Cumulative count of writes merged
# TYPE container_fs_writes_merged_total counter
container_fs_writes_merged_total{device="sda1",id="testcontainer",name="testcontainer"} 39
container_fs_writes_merged_total{device="sda2",id="testcontainer",name="testcontainer"} 44
# HELP container_fs_writes_total Cumulative count of writes completed
# TYPE container_fs_writes_total counter
container_fs_writes_total{device="sda1",id="testcontainer",name="testcontainer"} 28
container_fs_writes_total{device="sda2",id="testcontainer",name="testcontainer"} 43
# HELP container_last_seen Last time a container was seen by the exporter
# TYPE container_last_seen gauge
container_last_seen{id="testcontainer",name="testcontainer"} 1.426203694e+09
# HELP container_memory_failures_total Cumulative count of memory allocation failures.
# TYPE container_memory_failures_total counter
container_memory_failures_total{id="testcontainer",name="testcontainer",scope="container",type="pgfault"} 10
container_memory_failures_total{id="testcontainer",name="testcontainer",scope="container",type="pgmajfault"} 11
container_memory_failures_total{id="testcontainer",name="testcontainer",scope="hierarchy",type="pgfault"} 12
container_memory_failures_total{id="testcontainer",name="testcontainer",scope="hierarchy",type="pgmajfault"} 13
# HELP container_memory_usage_bytes Current memory usage in bytes.
# TYPE container_memory_usage_bytes gauge
container_memory_usage_bytes{id="testcontainer",name="testcontainer"} 8
# HELP container_memory_working_set_bytes Current working set in bytes.
# TYPE container_memory_working_set_bytes gauge
container_memory_working_set_bytes{id="testcontainer",name="testcontainer"} 9
# HELP container_network_receive_bytes_total Cumulative count of bytes received
# TYPE container_network_receive_bytes_total counter
container_network_receive_bytes_total{id="testcontainer",name="testcontainer"} 14
# HELP container_network_receive_errors_total Cumulative count of errors encountered while receiving
# TYPE container_network_receive_errors_total counter
container_network_receive_errors_total{id="testcontainer",name="testcontainer"} 16
# HELP container_network_receive_packets_dropped_total Cumulative count of packets dropped while receiving
# TYPE container_network_receive_packets_dropped_total counter
container_network_receive_packets_dropped_total{id="testcontainer",name="testcontainer"} 17
# HELP container_network_receive_packets_total Cumulative count of packets received
# TYPE container_network_receive_packets_total counter
container_network_receive_packets_total{id="testcontainer",name="testcontainer"} 15
# HELP container_network_transmit_bytes_total Cumulative count of bytes transmitted
# TYPE container_network_transmit_bytes_total counter
container_network_transmit_bytes_total{id="testcontainer",name="testcontainer"} 18
# HELP container_network_transmit_errors_total Cumulative count of errors encountered while transmitting
# TYPE container_network_transmit_errors_total counter
container_network_transmit_errors_total{id="testcontainer",name="testcontainer"} 20
# HELP container_network_transmit_packets_dropped_total Cumulative count of packets dropped while transmitting
# TYPE container_network_transmit_packets_dropped_total counter
container_network_transmit_packets_dropped_total{id="testcontainer",name="testcontainer"} 21
# HELP container_network_transmit_packets_total Cumulative count of packets transmitted
# TYPE container_network_transmit_packets_total counter
container_network_transmit_packets_total{id="testcontainer",name="testcontainer"} 19
# HELP container_scrape_error 1 if there was an error while getting container metrics, 0 otherwise
# TYPE container_scrape_error gauge
container_scrape_error 0
# HELP container_tasks_state Number of tasks in given state
# TYPE container_tasks_state gauge
container_tasks_state{id="testcontainer",name="testcontainer",state="iowaiting"} 54
container_tasks_state{id="testcontainer",name="testcontainer",state="running"} 51
container_tasks_state{id="testcontainer",name="testcontainer",state="sleeping"} 50
container_tasks_state{id="testcontainer",name="testcontainer",state="stopped"} 52
container_tasks_state{id="testcontainer",name="testcontainer",state="uninterruptible"} 53
# HELP http_request_duration_microseconds The HTTP request latencies in microseconds.
# TYPE http_request_duration_microseconds summary
http_request_duration_microseconds{handler="prometheus",quantile="0.5"} 0
http_request_duration_microseconds{handler="prometheus",quantile="0.9"} 0
http_request_duration_microseconds{handler="prometheus",quantile="0.99"} 0
http_request_duration_microseconds_sum{handler="prometheus"} 0
http_request_duration_microseconds_count{handler="prometheus"} 0
# HELP http_request_size_bytes The HTTP request sizes in bytes.
# TYPE http_request_size_bytes summary
http_request_size_bytes{handler="prometheus",quantile="0.5"} 0
http_request_size_bytes{handler="prometheus",quantile="0.9"} 0
http_request_size_bytes{handler="prometheus",quantile="0.99"} 0
http_request_size_bytes_sum{handler="prometheus"} 0
http_request_size_bytes_count{handler="prometheus"} 0
# HELP http_response_size_bytes The HTTP response sizes in bytes.
# TYPE http_response_size_bytes summary
http_response_size_bytes{handler="prometheus",quantile="0.5"} 0
http_response_size_bytes{handler="prometheus",quantile="0.9"} 0
http_response_size_bytes{handler="prometheus",quantile="0.99"} 0
http_response_size_bytes_sum{handler="prometheus"} 0
http_response_size_bytes_count{handler="prometheus"} 0
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0
# HELP process_goroutines Number of goroutines that currently exist.
# TYPE process_goroutines gauge
process_goroutines 16
# HELP process_max_fds Maximum number of open file descriptors.
# TYPE process_max_fds gauge
process_max_fds 1024
# HELP process_open_fds Number of open file descriptors.
# TYPE process_open_fds gauge
process_open_fds 4
# HELP process_resident_memory_bytes Resident memory size in bytes.
# TYPE process_resident_memory_bytes gauge
process_resident_memory_bytes 7.74144e+06
# HELP process_start_time_seconds Start time of the process since unix epoch in seconds.
# TYPE process_start_time_seconds gauge
process_start_time_seconds 1.42620369439e+09
# HELP process_virtual_memory_bytes Virtual memory size in bytes.
# TYPE process_virtual_memory_bytes gauge
process_virtual_memory_bytes 1.16420608e+08