Sanitizing perf metrics names and documenting newly added metrics

Signed-off-by: Maciej "Iwan" Iwanowski <maciej.iwanowski@intel.com>
This commit is contained in:
Maciej "Iwan" Iwanowski 2020-04-16 11:00:35 +02:00
parent 95460a3ca6
commit bf1f3ccbda
No known key found for this signature in database
GPG Key ID: 2484258A4DD3EE84
3 changed files with 35 additions and 25 deletions

View File

@ -78,6 +78,8 @@ Metric name | Type | Description | Unit (where applicable)
`container_spec_memory_reservation_limit_bytes` | Gauge | Memory reservation limit for the container | bytes
`container_start_time_seconds` | Gauge | Start time of the container since unix epoch | seconds
`container_tasks_state` | Gauge | Number of tasks in given state (`sleeping`, `running`, `stopped`, `uninterruptible`, or `ioawaiting`) |
`container_perf_*` | Counter | Scaled counter of perf event. See [perf event configuration](docs/runtime_options.md#perf-events) |
`container_perf_*_scaling_ratio` | Gauge | Scaling ratio for perf event counter. See [perf event configuration](docs/runtime_options.md#perf-events) |
## Prometheus hardware metrics

View File

@ -1647,7 +1647,7 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric)
containerLabels := c.containerLabelsFunc(cont)
for l := range rawLabels {
duplicate := false
sl := sanitizeLabelName(l)
sl := sanitizeName(l)
for _, x := range labels {
if sl == x {
duplicate = true
@ -1709,7 +1709,7 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric)
copy(clabels, labels)
copy(cvalues, values)
for label, value := range metric.Labels {
clabels = append(clabels, sanitizeLabelName("app_"+label))
clabels = append(clabels, sanitizeName("app_"+label))
cvalues = append(cvalues, value)
}
desc := prometheus.NewDesc(metricLabel, "Custom application metric.", clabels, nil)
@ -1717,12 +1717,20 @@ func (c *PrometheusCollector) collectContainersInfo(ch chan<- prometheus.Metric)
}
}
}
perfLabelNames := append(labels, "cpu")
if c.includedMetrics.Has(container.PerfMetrics) {
for _, metric := range stats.PerfStats {
labelNames := append(labels, "cpu")
labelValues := append(values, strconv.Itoa(metric.Cpu))
desc := prometheus.NewDesc(metric.Name, "Perf event metric", labelNames, nil)
scalingDesc := prometheus.NewDesc(fmt.Sprintf("%s_scaling_ratio", metric.Name), "Perf event metric scaling ratio", labelNames, nil)
desc := prometheus.NewDesc(
fmt.Sprintf("container_perf_%s", sanitizeName(metric.Name)),
"Perf event metric",
perfLabelNames,
nil)
scalingDesc := prometheus.NewDesc(
fmt.Sprintf("container_perf_%s_scaling_ratio", sanitizeName(metric.Name)),
"Perf event metric scaling ratio",
perfLabelNames,
nil)
ch <- prometheus.MustNewConstMetric(desc, prometheus.CounterValue, float64(metric.Value), labelValues...)
ch <- prometheus.MustNewConstMetric(scalingDesc, prometheus.GaugeValue, metric.ScalingRatio, labelValues...)
}
@ -1752,10 +1760,10 @@ func specMemoryValue(v uint64) float64 {
return float64(v)
}
var invalidLabelCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`)
// sanitizeLabelName replaces anything that doesn't match
// sanitizeName replaces anything that doesn't match
// client_label.LabelNameRE with an underscore.
func sanitizeLabelName(name string) string {
return invalidLabelCharRE.ReplaceAllString(name, "_")
func sanitizeName(name string) string {
return invalidNameCharRE.ReplaceAllString(name, "_")
}

View File

@ -327,6 +327,22 @@ container_network_udp_usage_total{container_env_foo_env="prod",container_label_f
container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="listen",zone_name="hello"} 0 1395066363000
container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="rxqueued",zone_name="hello"} 0 1395066363000
container_network_udp_usage_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",udp_state="txqueued",zone_name="hello"} 0 1395066363000
# HELP container_perf_instructions Perf event metric
# TYPE container_perf_instructions counter
container_perf_instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123
container_perf_instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456
# HELP container_perf_instructions_retired Perf event metric
# TYPE container_perf_instructions_retired counter
container_perf_instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321
container_perf_instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789
# HELP container_perf_instructions_retired_scaling_ratio Perf event metric scaling ratio
# TYPE container_perf_instructions_retired_scaling_ratio gauge
container_perf_instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666
container_perf_instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333
# HELP container_perf_instructions_scaling_ratio Perf event metric scaling ratio
# TYPE container_perf_instructions_scaling_ratio gauge
container_perf_instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1
container_perf_instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5
# HELP container_processes Number of processes running inside the container.
# TYPE container_processes gauge
container_processes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000
@ -364,22 +380,6 @@ container_threads_max{container_env_foo_env="prod",container_label_foo_label="ba
# HELP container_ulimits_soft Soft ulimit values for the container root process. Unlimited if -1, except priority and nice
# TYPE container_ulimits_soft gauge
container_ulimits_soft{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",ulimit="max_open_files",zone_name="hello"} 16384 1395066363000
# HELP instructions Perf event metric
# TYPE instructions counter
instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 123
instructions{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 456
# HELP instructions_retired Perf event metric
# TYPE instructions_retired counter
instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 321
instructions_retired{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 789
# HELP instructions_retired_scaling_ratio Perf event metric scaling ratio
# TYPE instructions_retired_scaling_ratio gauge
instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.66666666666
instructions_retired_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.33333333333
# HELP instructions_scaling_ratio Perf event metric scaling ratio
# TYPE instructions_scaling_ratio gauge
instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="0",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1
instructions_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",cpu="1",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.5
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0