From 8aed6e9cae1e897b99130a170edc93227ff3ceb2 Mon Sep 17 00:00:00 2001 From: wujiaxing Date: Thu, 8 Jun 2017 14:48:24 +0800 Subject: [PATCH] Add cpu load,tcp\udp usage statistics in prom `/metrics` endpoint. --- metrics/prometheus.go | 86 +++++++++++++++++++++++++++++ metrics/prometheus_test.go | 20 +++++++ metrics/testdata/prometheus_metrics | 22 ++++++++ 3 files changed, 128 insertions(+) diff --git a/metrics/prometheus.go b/metrics/prometheus.go index 87d53e94..2c766d66 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -189,6 +189,13 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo getValues: func(s *info.ContainerStats) metricValues { return metricValues{{value: float64(s.Cpu.CFS.ThrottledTime) / float64(time.Second)}} }, + }, { + name: "container_cpu_load_average_10s", + help: "Value of container cpu load average over the last 10 seconds.", + valueType: prometheus.GaugeValue, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{{value: float64(s.Cpu.LoadAverage)}} + }, }, { name: "container_memory_cache", help: "Number of bytes of page cache memory.", @@ -578,6 +585,84 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo } return values }, + }, { + name: "container_network_tcp_usage_total", + help: "tcp connection usage statistic for container", + valueType: prometheus.GaugeValue, + extraLabels: []string{"tcp_state"}, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{ + { + value: float64(s.Network.Tcp.Established), + labels: []string{"established"}, + }, + { + value: float64(s.Network.Tcp.SynSent), + labels: []string{"synsent"}, + }, + { + value: float64(s.Network.Tcp.SynRecv), + labels: []string{"synrecv"}, + }, + { + value: float64(s.Network.Tcp.FinWait1), + labels: []string{"finwait1"}, + }, + { + value: float64(s.Network.Tcp.FinWait2), + labels: []string{"finwait2"}, + }, + { + value: float64(s.Network.Tcp.TimeWait), + labels: []string{"timewait"}, + }, + { + value: float64(s.Network.Tcp.Close), + labels: []string{"close"}, + }, + { + value: float64(s.Network.Tcp.CloseWait), + labels: []string{"closewait"}, + }, + { + value: float64(s.Network.Tcp.LastAck), + labels: []string{"lastack"}, + }, + { + value: float64(s.Network.Tcp.Listen), + labels: []string{"listen"}, + }, + { + value: float64(s.Network.Tcp.Closing), + labels: []string{"closing"}, + }, + } + }, + }, { + name: "container_network_udp_usage_total", + help: "udp connection usage statistic for container", + valueType: prometheus.GaugeValue, + extraLabels: []string{"udp_state"}, + getValues: func(s *info.ContainerStats) metricValues { + return metricValues{ + { + value: float64(s.Network.Udp.Listen), + labels: []string{"listen"}, + }, + { + value: float64(s.Network.Udp.Dropped), + labels: []string{"dropped"}, + }, + { + value: float64(s.Network.Udp.RxQueued), + labels: []string{"rxqueued"}, + }, + { + value: float64(s.Network.Udp.TxQueued), + labels: []string{"txqueued"}, + }, + } + }, }, { name: "container_tasks_state", help: "Number of tasks in given state", @@ -610,6 +695,7 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo }, }, } + return c } diff --git a/metrics/prometheus_test.go b/metrics/prometheus_test.go index c02da60b..6f0a22c2 100644 --- a/metrics/prometheus_test.go +++ b/metrics/prometheus_test.go @@ -85,6 +85,7 @@ func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.Container ThrottledPeriods: 18, ThrottledTime: 1724314000, }, + LoadAverage: 2, }, Memory: info.MemoryStats{ Usage: 8, @@ -126,6 +127,25 @@ func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.Container TxDropped: 21, }, }, + Tcp: info.TcpStat{ + Established: 13, + SynSent: 0, + SynRecv: 0, + FinWait1: 0, + FinWait2: 0, + TimeWait: 0, + Close: 0, + CloseWait: 0, + LastAck: 0, + Listen: 3, + Closing: 0, + }, + Udp: info.UdpStat{ + Listen: 0, + Dropped: 0, + RxQueued: 0, + TxQueued: 0, + }, }, Filesystem: []info.FsStats{ { diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index 954c6a5a..2f588567 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -10,6 +10,9 @@ container_cpu_cfs_throttled_periods_total{container_env_foo_env="prod",container # HELP container_cpu_cfs_throttled_seconds_total Total time duration the container has been throttled. # TYPE container_cpu_cfs_throttled_seconds_total counter container_cpu_cfs_throttled_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1.724314 +# HELP container_cpu_load_average_10s Value of container cpu load average over the last 10 seconds. +# TYPE container_cpu_load_average_10s gauge +container_cpu_load_average_10s{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 2 # HELP container_cpu_system_seconds_total Cumulative system cpu time consumed in seconds. # TYPE container_cpu_system_seconds_total counter container_cpu_system_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 7e-09 @@ -121,6 +124,19 @@ container_network_receive_packets_dropped_total{container_env_foo_env="prod",con # HELP container_network_receive_packets_total Cumulative count of packets received # TYPE container_network_receive_packets_total counter container_network_receive_packets_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",interface="eth0",name="testcontaineralias",zone_name="hello"} 15 +# HELP container_network_tcp_usage_total tcp connection usage statistic for container +# TYPE container_network_tcp_usage_total gauge +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="close",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="closewait",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="closing",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="established",zone_name="hello"} 13 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="finwait1",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="finwait2",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="lastack",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="listen",zone_name="hello"} 3 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="synrecv",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="synsent",zone_name="hello"} 0 +container_network_tcp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="timewait",zone_name="hello"} 0 # HELP container_network_transmit_bytes_total Cumulative count of bytes transmitted # TYPE container_network_transmit_bytes_total counter container_network_transmit_bytes_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",interface="eth0",name="testcontaineralias",zone_name="hello"} 18 @@ -133,6 +149,12 @@ container_network_transmit_packets_dropped_total{container_env_foo_env="prod",co # HELP container_network_transmit_packets_total Cumulative count of packets transmitted # TYPE container_network_transmit_packets_total counter container_network_transmit_packets_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",interface="eth0",name="testcontaineralias",zone_name="hello"} 19 +# HELP container_network_udp_usage_total udp connection usage statistic for container +# TYPE container_network_udp_usage_total gauge +container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="dropped",zone_name="hello"} 0 +container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="listen",zone_name="hello"} 0 +container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="rxqueued",zone_name="hello"} 0 +container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="txqueued",zone_name="hello"} 0 # HELP container_scrape_error 1 if there was an error while getting container metrics, 0 otherwise # TYPE container_scrape_error gauge container_scrape_error 0