diff --git a/docs/storage/prometheus.md b/docs/storage/prometheus.md index c64c4a5f..c6969263 100644 --- a/docs/storage/prometheus.md +++ b/docs/storage/prometheus.md @@ -78,6 +78,8 @@ Metric name | Type | Description | Unit (where applicable) `container_spec_memory_reservation_limit_bytes` | Gauge | Memory reservation limit for the container | bytes `container_start_time_seconds` | Gauge | Start time of the container since unix epoch | seconds `container_tasks_state` | Gauge | Number of tasks in given state (`sleeping`, `running`, `stopped`, `uninterruptible`, or `ioawaiting`) | +`container_perf_*` | Counter | Scaled counter of perf event. See [perf event configuration](docs/runtime_options.md#perf-events) | +`container_perf_*_scaling_ratio` | Gauge | Scaling ratio for perf event counter. See [perf event configuration](docs/runtime_options.md#perf-events) | ## Prometheus hardware metrics diff --git a/metrics/prometheus.go b/metrics/prometheus.go index d0b0a924..43409857 100644 --- a/metrics/prometheus.go +++ b/metrics/prometheus.go @@ -1647,7 +1647,7 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) containerLabels := c.containerLabelsFunc(cont) for l := range rawLabels { duplicate := false - sl := sanitizeLabelName(l) + sl := sanitizeName(l) for _, x := range labels { if sl == x { duplicate = true @@ -1709,7 +1709,7 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) copy(clabels, labels) copy(cvalues, values) for label, value := range metric.Labels { - clabels = append(clabels, sanitizeLabelName("app_"+label)) + clabels = append(clabels, sanitizeName("app_"+label)) cvalues = append(cvalues, value) } desc := prometheus.NewDesc(metricLabel, "Custom application metric.", clabels, nil) @@ -1717,12 +1717,20 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric) } } } + perfLabelNames := append(labels, "cpu") if c.includedMetrics.Has(container.PerfMetrics) { for _, metric := range stats.PerfStats { - labelNames := append(labels, "cpu") labelValues := append(values, strconv.Itoa(metric.Cpu)) - desc := prometheus.NewDesc(metric.Name, "Perf event metric", labelNames, nil) - scalingDesc := prometheus.NewDesc(fmt.Sprintf("%s_scaling_ratio", metric.Name), "Perf event metric scaling ratio", labelNames, nil) + desc := prometheus.NewDesc( + fmt.Sprintf("container_perf_%s", sanitizeName(metric.Name)), + "Perf event metric", + perfLabelNames, + nil) + scalingDesc := prometheus.NewDesc( + fmt.Sprintf("container_perf_%s_scaling_ratio", sanitizeName(metric.Name)), + "Perf event metric scaling ratio", + perfLabelNames, + nil) ch <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, float64(metric.Value), labelValues...) ch <- prometheus.MustNewConstMetric(scalingDesc, prometheus.GaugeValue, metric.ScalingRatio, labelValues...) } @@ -1752,10 +1760,10 @@ func specMemoryValue(v uint64) float64 { return float64(v) } -var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) +var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) -// sanitizeLabelName replaces anything that doesn't match +// sanitizeName replaces anything that doesn't match // client_label.LabelNameRE with an underscore. -func sanitizeLabelName(name string) string { - return invalidLabelCharRE.ReplaceAllString(name, "_") +func sanitizeName(name string) string { + return invalidNameCharRE.ReplaceAllString(name, "_") } diff --git a/metrics/testdata/prometheus_metrics b/metrics/testdata/prometheus_metrics index e4760a3c..e769a6e3 100644 --- a/metrics/testdata/prometheus_metrics +++ b/metrics/testdata/prometheus_metrics @@ -327,6 +327,22 @@ container_network_udp_usage_total{container_env_foo_env="prod",container_label_f container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="listen",zone_name="hello"} 0 1395066363000 container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="rxqueued",zone_name="hello"} 0 1395066363000 container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="txqueued",zone_name="hello"} 0 1395066363000 +# HELP container_perf_instructions Perf event metric +# TYPE container_perf_instructions counter +container_perf_instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123 +container_perf_instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456 +# HELP container_perf_instructions_retired Perf event metric +# TYPE container_perf_instructions_retired counter +container_perf_instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321 +container_perf_instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789 +# HELP container_perf_instructions_retired_scaling_ratio Perf event metric scaling ratio +# TYPE container_perf_instructions_retired_scaling_ratio gauge +container_perf_instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666 +container_perf_instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333 +# HELP container_perf_instructions_scaling_ratio Perf event metric scaling ratio +# TYPE container_perf_instructions_scaling_ratio gauge +container_perf_instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 +container_perf_instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 # HELP container_processes Number of processes running inside the container. # TYPE container_processes gauge container_processes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000 @@ -364,22 +380,6 @@ container_threads_max{container_env_foo_env="prod",container_label_foo_label="ba # HELP container_ulimits_soft Soft ulimit values for the container root process. Unlimited if -1, except priority and nice # TYPE container_ulimits_soft gauge container_ulimits_soft{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",ulimit="max_open_files",zone_name="hello"} 16384 1395066363000 -# HELP instructions Perf event metric -# TYPE instructions counter -instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123 -instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456 -# HELP instructions_retired Perf event metric -# TYPE instructions_retired counter -instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321 -instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789 -# HELP instructions_retired_scaling_ratio Perf event metric scaling ratio -# TYPE instructions_retired_scaling_ratio gauge -instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666 -instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333 -# HELP instructions_scaling_ratio Perf event metric scaling ratio -# TYPE instructions_scaling_ratio gauge -instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 -instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5 # HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. # TYPE process_cpu_seconds_total counter process_cpu_seconds_total 0