367 lines
13 KiB
Go
367 lines
13 KiB
Go
// Copyright 2014 Google Inc. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package metrics
|
|
|
|
import (
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/golang/glog"
|
|
info "github.com/google/cadvisor/info/v1"
|
|
"github.com/google/cadvisor/manager"
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
type prometheusMetric struct {
|
|
valueType prometheus.ValueType
|
|
value float64
|
|
labels []string
|
|
}
|
|
|
|
// PrometheusCollector implements prometheus.Collector.
|
|
type PrometheusCollector struct {
|
|
manager manager.Manager
|
|
|
|
errors prometheus.Gauge
|
|
lastSeen *prometheus.Desc
|
|
|
|
cpuUsageUserSeconds *prometheus.Desc
|
|
cpuUsageSystemSeconds *prometheus.Desc
|
|
cpuUsageSecondsPerCPU *prometheus.Desc
|
|
|
|
memoryUsageBytes *prometheus.Desc
|
|
memoryWorkingSet *prometheus.Desc
|
|
memoryFailures *prometheus.Desc
|
|
|
|
fsLimit *prometheus.Desc
|
|
fsUsage *prometheus.Desc
|
|
fsReads *prometheus.Desc
|
|
fsReadsSectors *prometheus.Desc
|
|
fsReadsMerged *prometheus.Desc
|
|
fsReadTime *prometheus.Desc
|
|
|
|
fsWrites *prometheus.Desc
|
|
fsWritesSectors *prometheus.Desc
|
|
fsWritesMerged *prometheus.Desc
|
|
fsWriteTime *prometheus.Desc
|
|
|
|
fsIoInProgress *prometheus.Desc
|
|
fsIoTime *prometheus.Desc
|
|
|
|
fsWeightedIoTime *prometheus.Desc
|
|
|
|
networkRxBytes *prometheus.Desc
|
|
networkRxPackets *prometheus.Desc
|
|
networkRxErrors *prometheus.Desc
|
|
networkRxDropped *prometheus.Desc
|
|
networkTxBytes *prometheus.Desc
|
|
networkTxPackets *prometheus.Desc
|
|
networkTxErrors *prometheus.Desc
|
|
networkTxDropped *prometheus.Desc
|
|
|
|
tasks *prometheus.Desc
|
|
|
|
descs []*prometheus.Desc
|
|
}
|
|
|
|
// NewPrometheusCollector returns a new PrometheusCollector.
|
|
func NewPrometheusCollector(manager manager.Manager) *PrometheusCollector {
|
|
c := &PrometheusCollector{
|
|
manager: manager,
|
|
errors: prometheus.NewGauge(prometheus.GaugeOpts{
|
|
Namespace: "container",
|
|
Name: "scrape_error",
|
|
Help: "1 if there was an error while getting container metrics, 0 otherwise",
|
|
}),
|
|
lastSeen: prometheus.NewDesc(
|
|
"container_last_seen",
|
|
"Last time a container was seen by the exporter",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
cpuUsageUserSeconds: prometheus.NewDesc(
|
|
"container_cpu_user_seconds_total",
|
|
"Cumulative user cpu time consumed in seconds.",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
cpuUsageSystemSeconds: prometheus.NewDesc(
|
|
"container_cpu_system_seconds_total",
|
|
"Cumulative system cpu time consumed in seconds.",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
cpuUsageSecondsPerCPU: prometheus.NewDesc(
|
|
"container_cpu_usage_seconds_total",
|
|
"Cumulative cpu time consumed per cpu in seconds.",
|
|
[]string{"name", "id", "cpu"},
|
|
nil),
|
|
memoryUsageBytes: prometheus.NewDesc(
|
|
"container_memory_usage_bytes",
|
|
"Current memory usage in bytes.",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
memoryWorkingSet: prometheus.NewDesc(
|
|
"container_memory_working_set_bytes",
|
|
"Current working set in bytes.",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
memoryFailures: prometheus.NewDesc(
|
|
"container_memory_failures_total",
|
|
"Cumulative count of memory allocation failures.",
|
|
[]string{"type", "scope", "name", "id"},
|
|
nil),
|
|
|
|
fsLimit: prometheus.NewDesc(
|
|
"container_fs_limit_bytes",
|
|
"Number of bytes that can be consumed by the container on this filesystem.",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsUsage: prometheus.NewDesc(
|
|
"container_fs_usage_bytes",
|
|
"Number of bytes that are consumed by the container on this filesystem.",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsReads: prometheus.NewDesc(
|
|
"container_fs_reads_total",
|
|
"Cumulative count of reads completed",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsReadsSectors: prometheus.NewDesc(
|
|
"container_fs_sector_reads_total",
|
|
"Cumulative count of sector reads completed",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsReadsMerged: prometheus.NewDesc(
|
|
"container_fs_reads_merged_total",
|
|
"Cumulative count of reads merged",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsReadTime: prometheus.NewDesc(
|
|
"container_fs_read_seconds_total",
|
|
"Cumulative count of seconds spent reading",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsWrites: prometheus.NewDesc(
|
|
"container_fs_writes_total",
|
|
"Cumulative count of writes completed",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsWritesSectors: prometheus.NewDesc(
|
|
"container_fs_sector_writes_total",
|
|
"Cumulative count of sector writes completed",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsWritesMerged: prometheus.NewDesc(
|
|
"container_fs_writes_merged_total",
|
|
"Cumulative count of writes merged",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsWriteTime: prometheus.NewDesc(
|
|
"container_fs_write_seconds_total",
|
|
"Cumulative count of seconds spent writing",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsIoInProgress: prometheus.NewDesc(
|
|
"container_fs_io_current",
|
|
"Number of I/Os currently in progress",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsIoTime: prometheus.NewDesc(
|
|
"container_fs_io_time_seconds_total",
|
|
"Cumulative count of seconds spent doing I/Os",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
fsWeightedIoTime: prometheus.NewDesc(
|
|
"container_fs_io_time_weighted_seconds_total",
|
|
"Cumulative count of seconds spent doing I/Os",
|
|
[]string{"name", "id", "device"},
|
|
nil),
|
|
networkRxBytes: prometheus.NewDesc(
|
|
"container_network_receive_bytes_total",
|
|
"Cumulative count of bytes received",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
networkRxPackets: prometheus.NewDesc(
|
|
"container_network_receive_packets_total",
|
|
"Cumulative count of packets received",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
networkRxDropped: prometheus.NewDesc(
|
|
"container_network_receive_packets_dropped_total",
|
|
"Cumulative count of bytes received",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
networkRxErrors: prometheus.NewDesc(
|
|
"container_network_receive_errors_total",
|
|
"Cumulative count of errors encountered while receiving",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
networkTxBytes: prometheus.NewDesc(
|
|
"container_network_transmit_bytes_total",
|
|
"Cumulative count of bytes transmitted",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
networkTxPackets: prometheus.NewDesc(
|
|
"container_network_transmit_packets_total",
|
|
"Cumulative count of packets transmitted",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
networkTxDropped: prometheus.NewDesc(
|
|
"container_network_transmit_packets_dropped_total",
|
|
"Cumulative count of bytes dropped",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
networkTxErrors: prometheus.NewDesc(
|
|
"container_network_transmit_errors_total",
|
|
"Cumulative count of errors encountered while transmitting",
|
|
[]string{"name", "id"},
|
|
nil),
|
|
|
|
tasks: prometheus.NewDesc(
|
|
"container_tasks_state",
|
|
"Number of tasks in given state",
|
|
[]string{"state", "name", "id"},
|
|
nil),
|
|
}
|
|
c.descs = []*prometheus.Desc{
|
|
c.lastSeen,
|
|
|
|
c.cpuUsageUserSeconds,
|
|
c.cpuUsageSystemSeconds,
|
|
|
|
c.memoryUsageBytes,
|
|
c.memoryWorkingSet,
|
|
c.memoryFailures,
|
|
|
|
c.fsLimit,
|
|
c.fsUsage,
|
|
c.fsReads,
|
|
c.fsReadsSectors,
|
|
c.fsReadsMerged,
|
|
c.fsReadTime,
|
|
c.fsWrites,
|
|
c.fsWritesSectors,
|
|
c.fsWritesMerged,
|
|
c.fsWriteTime,
|
|
c.fsIoInProgress,
|
|
c.fsIoTime,
|
|
c.fsWeightedIoTime,
|
|
|
|
c.networkRxBytes,
|
|
c.networkRxPackets,
|
|
c.networkRxErrors,
|
|
c.networkRxDropped,
|
|
c.networkTxBytes,
|
|
c.networkTxPackets,
|
|
c.networkTxErrors,
|
|
c.networkTxDropped,
|
|
|
|
c.tasks,
|
|
}
|
|
return c
|
|
}
|
|
|
|
// Describe describes all the metrics ever exported by cadvisor. It
|
|
// implements prometheus.PrometheusCollector.
|
|
func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
|
|
c.errors.Describe(ch)
|
|
for _, d := range c.descs {
|
|
ch <- d
|
|
}
|
|
}
|
|
|
|
// Collect fetches the stats from all containers and delivers them as
|
|
// Prometheus metrics. It implements prometheus.PrometheusCollector.
|
|
func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
|
|
containers, err := c.manager.SubcontainersInfo("/", &info.ContainerInfoRequest{NumStats: 1})
|
|
if err != nil {
|
|
c.errors.Set(1)
|
|
glog.Warning("Couldn't get containers: %s", err)
|
|
return
|
|
}
|
|
for _, container := range containers {
|
|
id := container.Name
|
|
name := id
|
|
if len(container.Aliases) > 0 {
|
|
name = container.Aliases[0]
|
|
}
|
|
stats := container.Stats[0]
|
|
|
|
for desc, metrics := range map[*prometheus.Desc][]prometheusMetric{
|
|
c.cpuUsageUserSeconds: {{valueType: prometheus.CounterValue, value: float64(stats.Cpu.Usage.User) / float64(time.Second)}},
|
|
c.cpuUsageSystemSeconds: {{valueType: prometheus.CounterValue, value: float64(stats.Cpu.Usage.System) / float64(time.Second)}},
|
|
|
|
c.memoryFailures: {
|
|
{valueType: prometheus.CounterValue, labels: []string{"pgfault", "container"}, value: float64(stats.Memory.ContainerData.Pgfault)},
|
|
{valueType: prometheus.CounterValue, labels: []string{"pgmajfault", "container"}, value: float64(stats.Memory.ContainerData.Pgmajfault)},
|
|
{valueType: prometheus.CounterValue, labels: []string{"pgfault", "hierarchy"}, value: float64(stats.Memory.HierarchicalData.Pgfault)},
|
|
{valueType: prometheus.CounterValue, labels: []string{"pgmajfault", "hierarchy"}, value: float64(stats.Memory.HierarchicalData.Pgmajfault)},
|
|
},
|
|
c.tasks: {
|
|
{valueType: prometheus.GaugeValue, labels: []string{"sleeping"}, value: float64(stats.TaskStats.NrSleeping)},
|
|
{valueType: prometheus.GaugeValue, labels: []string{"running"}, value: float64(stats.TaskStats.NrRunning)},
|
|
{valueType: prometheus.GaugeValue, labels: []string{"stopped"}, value: float64(stats.TaskStats.NrStopped)},
|
|
{valueType: prometheus.GaugeValue, labels: []string{"uninterruptible"}, value: float64(stats.TaskStats.NrUinterruptible)},
|
|
{valueType: prometheus.GaugeValue, labels: []string{"iowaiting"}, value: float64(stats.TaskStats.NrIoWait)},
|
|
},
|
|
|
|
c.lastSeen: {{valueType: prometheus.GaugeValue, value: float64(time.Now().Unix())}},
|
|
|
|
c.memoryUsageBytes: {{valueType: prometheus.GaugeValue, value: float64(stats.Memory.Usage)}},
|
|
c.memoryWorkingSet: {{valueType: prometheus.GaugeValue, value: float64(stats.Memory.WorkingSet)}},
|
|
|
|
c.networkRxBytes: {{valueType: prometheus.CounterValue, value: float64(stats.Network.RxBytes)}},
|
|
c.networkRxPackets: {{valueType: prometheus.CounterValue, value: float64(stats.Network.RxPackets)}},
|
|
c.networkRxErrors: {{valueType: prometheus.CounterValue, value: float64(stats.Network.RxErrors)}},
|
|
c.networkRxDropped: {{valueType: prometheus.CounterValue, value: float64(stats.Network.RxDropped)}},
|
|
c.networkTxBytes: {{valueType: prometheus.CounterValue, value: float64(stats.Network.TxBytes)}},
|
|
c.networkTxPackets: {{valueType: prometheus.CounterValue, value: float64(stats.Network.TxPackets)}},
|
|
c.networkTxErrors: {{valueType: prometheus.CounterValue, value: float64(stats.Network.TxErrors)}},
|
|
c.networkTxDropped: {{valueType: prometheus.CounterValue, value: float64(stats.Network.TxDropped)}},
|
|
} {
|
|
for _, m := range metrics {
|
|
ch <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, float64(m.value), append(m.labels, name, id)...)
|
|
}
|
|
}
|
|
|
|
// Metrics with dynamic labels
|
|
for i, value := range stats.Cpu.Usage.PerCpu {
|
|
ch <- prometheus.MustNewConstMetric(c.cpuUsageSecondsPerCPU, prometheus.CounterValue, float64(value)/float64(time.Second), name, id, fmt.Sprintf("cpu%02d", i))
|
|
}
|
|
|
|
for _, stat := range stats.Filesystem {
|
|
for desc, m := range map[*prometheus.Desc]prometheusMetric{
|
|
c.fsReads: {valueType: prometheus.CounterValue, value: float64(stat.ReadsCompleted)},
|
|
c.fsReadsSectors: {valueType: prometheus.CounterValue, value: float64(stat.SectorsRead)},
|
|
c.fsReadsMerged: {valueType: prometheus.CounterValue, value: float64(stat.ReadsMerged)},
|
|
c.fsReadTime: {valueType: prometheus.CounterValue, value: float64(stat.ReadTime) / float64(time.Second)},
|
|
|
|
c.fsWrites: {valueType: prometheus.CounterValue, value: float64(stat.WritesCompleted)},
|
|
c.fsWritesSectors: {valueType: prometheus.CounterValue, value: float64(stat.SectorsWritten)},
|
|
c.fsWritesMerged: {valueType: prometheus.CounterValue, value: float64(stat.WritesMerged)},
|
|
c.fsWriteTime: {valueType: prometheus.CounterValue, value: float64(stat.WriteTime) / float64(time.Second)},
|
|
|
|
c.fsIoTime: {valueType: prometheus.CounterValue, value: float64(stat.IoInProgress) / float64(time.Second)},
|
|
c.fsWeightedIoTime: {valueType: prometheus.CounterValue, value: float64(stat.IoTime) / float64(time.Second)},
|
|
|
|
c.fsIoInProgress: {valueType: prometheus.GaugeValue, value: float64(stat.IoInProgress)},
|
|
c.fsLimit: {valueType: prometheus.GaugeValue, value: float64(stat.Limit)},
|
|
c.fsUsage: {valueType: prometheus.GaugeValue, value: float64(stat.Usage)},
|
|
} {
|
|
ch <- prometheus.MustNewConstMetric(desc, m.valueType, m.value, name, id, stat.Device)
|
|
}
|
|
}
|
|
}
|
|
c.errors.Collect(ch)
|
|
}
|