diff --git a/cmd/cadvisor.go b/cmd/cadvisor.go
index d0e26f71..64e96c2c 100644
--- a/cmd/cadvisor.go
+++ b/cmd/cadvisor.go
@@ -88,6 +88,7 @@ var (
container.ProcessMetrics: struct{}{},
container.HugetlbUsageMetrics: struct{}{},
container.ReferencedMemoryMetrics: struct{}{},
+ container.CPUTopologyMetrics: struct{}{},
}}
// List of metrics that can be ignored.
@@ -103,6 +104,7 @@ var (
container.ProcessMetrics: struct{}{},
container.HugetlbUsageMetrics: struct{}{},
container.ReferencedMemoryMetrics: struct{}{},
+ container.CPUTopologyMetrics: struct{}{},
}
)
@@ -134,7 +136,7 @@ func (ml *metricSetValue) Set(value string) error {
}
func init() {
- flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'disk', 'diskIO', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory'.")
+ flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'cpu_topology','disk', 'diskIO', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory'.")
// Default logging verbosity to V(2)
flag.Set("v", "2")
diff --git a/cmd/cadvisor_test.go b/cmd/cadvisor_test.go
index 4b417fce..57439e1a 100644
--- a/cmd/cadvisor_test.go
+++ b/cmd/cadvisor_test.go
@@ -46,6 +46,12 @@ func TestReferencedMemoryMetricsIsDisabledByDefault(t *testing.T) {
assert.True(t, ignoreMetrics.Has(container.ReferencedMemoryMetrics))
}
+func TestCPUTopologyMetricsAreDisabledByDefault(t *testing.T) {
+ assert.True(t, ignoreMetrics.Has(container.CPUTopologyMetrics))
+ flag.Parse()
+ assert.True(t, ignoreMetrics.Has(container.CPUTopologyMetrics))
+}
+
func TestIgnoreMetrics(t *testing.T) {
tests := []struct {
value string
@@ -93,6 +99,7 @@ func TestToIncludedMetrics(t *testing.T) {
container.HugetlbUsageMetrics: struct{}{},
container.PerfMetrics: struct{}{},
container.ReferencedMemoryMetrics: struct{}{},
+ container.CPUTopologyMetrics: struct{}{},
},
container.AllMetrics,
{},
diff --git a/cmd/internal/http/handlers.go b/cmd/internal/http/handlers.go
index 867a11c9..f00385b1 100644
--- a/cmd/internal/http/handlers.go
+++ b/cmd/internal/http/handlers.go
@@ -99,7 +99,7 @@ func RegisterPrometheusHandler(mux httpmux.Mux, resourceManager manager.Manager,
r := prometheus.NewRegistry()
r.MustRegister(
metrics.NewPrometheusCollector(resourceManager, f, includedMetrics, clock.RealClock{}),
- metrics.NewPrometheusMachineCollector(resourceManager),
+ metrics.NewPrometheusMachineCollector(resourceManager, includedMetrics),
prometheus.NewGoCollector(),
prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}),
)
diff --git a/container/factory.go b/container/factory.go
index e8a15616..2db30671 100644
--- a/container/factory.go
+++ b/container/factory.go
@@ -60,6 +60,7 @@ const (
HugetlbUsageMetrics MetricKind = "hugetlb"
PerfMetrics MetricKind = "perf_event"
ReferencedMemoryMetrics MetricKind = "referenced_memory"
+ CPUTopologyMetrics MetricKind = "cpu_topology"
)
// AllMetrics represents all kinds of metrics that cAdvisor supported.
@@ -81,6 +82,7 @@ var AllMetrics = MetricSet{
HugetlbUsageMetrics: struct{}{},
PerfMetrics: struct{}{},
ReferencedMemoryMetrics: struct{}{},
+ CPUTopologyMetrics: struct{}{},
}
func (mk MetricKind) String() string {
diff --git a/docs/storage/prometheus.md b/docs/storage/prometheus.md
index 0cde28b7..45596b67 100644
--- a/docs/storage/prometheus.md
+++ b/docs/storage/prometheus.md
@@ -88,10 +88,15 @@ The table below lists the Prometheus hardware metrics exposed by cAdvisor (in al
Metric name | Type | Description | Unit (where applicable)
:-----------|:-----|:------------|:-----------------------
+`machine_cpu_cache_capacity_bytes` | Gauge | Cache size in bytes assigned to NUMA node and CPU core | bytes
`machine_cpu_cores` | Gauge | Number of physical CPU cores |
`machine_cpu_physical_cores` | Gauge | Number of logical CPU cores |
`machine_cpu_sockets` | Gauge | Number of CPU sockets |
`machine_dimm_capacity_bytes` | Gauge | Total RAM DIMM capacity (all types memory modules) value labeled by dimm type,
information is retrieved from sysfs edac per-DIMM API (/sys/devices/system/edac/mc/) introduced in kernel 3.6 | bytes
`machine_dimm_count` | Gauge | Number of RAM DIMM (all types memory modules) value labeled by dimm type,
information is retrieved from sysfs edac per-DIMM API (/sys/devices/system/edac/mc/) introduced in kernel 3.6 |
`machine_memory_bytes` | Gauge | Amount of memory installed on the machine | bytes
+`machine_node_hugepages_count` | Gauge | Numer of hugepages assigned to NUMA node |
+`machine_node_memory_capacity_bytes` | Gauge | Amount of memory assigned to NUMA node | bytes
+`machine_nvm_avg_power_budget_watts` | Gauge | NVM power budget | watts
`machine_nvm_capacity` | Gauge | NVM capacity value labeled by NVM mode (memory mode or app direct mode) | bytes
+`machine_thread_siblings_count` | Gauge | Number of CPU thread siblings |
diff --git a/info/v1/machine.go b/info/v1/machine.go
index 04ad666f..9dd989d4 100644
--- a/info/v1/machine.go
+++ b/info/v1/machine.go
@@ -14,6 +14,8 @@
package v1
+import "time"
+
type FsInfo struct {
// Block device associated with the filesystem.
Device string `json:"device"`
@@ -156,6 +158,9 @@ const (
)
type MachineInfo struct {
+ // The time of this information point.
+ Timestamp time.Time `json:"timestamp"`
+
// The number of cores in this machine.
NumCores int `json:"num_cores"`
@@ -227,6 +232,7 @@ func (m *MachineInfo) Clone() *MachineInfo {
}
}
copy := MachineInfo{
+ Timestamp: m.Timestamp,
NumCores: m.NumCores,
NumPhysicalCores: m.NumPhysicalCores,
NumSockets: m.NumSockets,
diff --git a/machine/info.go b/machine/info.go
index 61b88a41..72a77130 100644
--- a/machine/info.go
+++ b/machine/info.go
@@ -20,6 +20,7 @@ import (
"io/ioutil"
"path/filepath"
"strings"
+ "time"
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
@@ -119,6 +120,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach
instanceID := realCloudInfo.GetInstanceID()
machineInfo := &info.MachineInfo{
+ Timestamp: time.Now(),
NumCores: numCores,
NumPhysicalCores: GetPhysicalCores(cpuinfo),
NumSockets: GetSockets(cpuinfo),
diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go
index 433bd600..823d6d0b 100644
--- a/metrics/prometheus_fake.go
+++ b/metrics/prometheus_fake.go
@@ -35,6 +35,7 @@ func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, erro
func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
return &info.MachineInfo{
+ Timestamp: time.Unix(1395066363, 0),
NumCores: 4,
NumPhysicalCores: 1,
NumSockets: 1,
@@ -50,6 +51,216 @@ func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, erro
MachineID: "machine-id-test",
SystemUUID: "system-uuid-test",
BootID: "boot-id-test",
+ Topology: []info.Node{
+ {
+ Id: 0,
+ Memory: 33604804608,
+ HugePages: []info.HugePagesInfo{
+ {
+ PageSize: uint64(1048576),
+ NumPages: uint64(0),
+ },
+ {
+ PageSize: uint64(2048),
+ NumPages: uint64(0),
+ },
+ },
+ Cores: []info.Core{
+ {
+ Id: 0,
+ Threads: []int{0, 1},
+ Caches: []info.Cache{
+ {
+ Size: 32768,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32768,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262144,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+ {
+ Id: 1,
+ Threads: []int{2, 3},
+ Caches: []info.Cache{
+ {
+ Size: 32764,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32764,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262148,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+
+ {
+ Id: 2,
+ Threads: []int{4, 5},
+ Caches: []info.Cache{
+ {
+ Size: 32768,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32768,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262144,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+ {
+ Id: 3,
+ Threads: []int{6, 7},
+ Caches: []info.Cache{
+ {
+ Size: 32764,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32764,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262148,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+ },
+ },
+ {
+ Id: 1,
+ Memory: 33604804606,
+ HugePages: []info.HugePagesInfo{
+ {
+ PageSize: uint64(1048576),
+ NumPages: uint64(2),
+ },
+ {
+ PageSize: uint64(2048),
+ NumPages: uint64(4),
+ },
+ },
+ Cores: []info.Core{
+ {
+ Id: 4,
+ Threads: []int{8, 9},
+ Caches: []info.Cache{
+ {
+ Size: 32768,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32768,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262144,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+ {
+ Id: 5,
+ Threads: []int{10, 11},
+ Caches: []info.Cache{
+ {
+ Size: 32764,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32764,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262148,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+ {
+ Id: 6,
+ Threads: []int{12, 13},
+ Caches: []info.Cache{
+ {
+ Size: 32768,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32768,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262144,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+ {
+ Id: 7,
+ Threads: []int{14, 15},
+ Caches: []info.Cache{
+ {
+ Size: 32764,
+ Type: "Data",
+ Level: 1,
+ },
+ {
+ Size: 32764,
+ Type: "Instruction",
+ Level: 1,
+ },
+ {
+ Size: 262148,
+ Type: "Unified",
+ Level: 2,
+ },
+ },
+ },
+ },
+ Caches: []info.Cache{
+ {
+ Size: 8388608,
+ Type: "Unified",
+ Level: 3,
+ },
+ },
+ },
+ },
}, nil
}
diff --git a/metrics/prometheus_machine.go b/metrics/prometheus_machine.go
index 8be21489..6a9c55ab 100644
--- a/metrics/prometheus_machine.go
+++ b/metrics/prometheus_machine.go
@@ -15,6 +15,9 @@
package metrics
import (
+ "strconv"
+
+ "github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
@@ -24,14 +27,21 @@ import (
var baseLabelsNames = []string{"machine_id", "system_uuid", "boot_id"}
const (
- prometheusModeLabelName = "mode"
- prometheusTypeLabelName = "type"
+ prometheusModeLabelName = "mode"
+ prometheusTypeLabelName = "type"
+ prometheusLevelLabelName = "level"
+ prometheusNodeLabelName = "node_id"
+ prometheusCoreLabelName = "core_id"
+ prometheusThreadLabelName = "thread_id"
+ prometheusPageSizeLabelName = "page_size"
nvmMemoryMode = "memory_mode"
nvmAppDirectMode = "app_direct_mode"
memoryByTypeDimmCountKey = "DimmCount"
memoryByTypeDimmCapacityKey = "Capacity"
+
+ emptyLabelValue = ""
)
// machineMetric describes a multi-dimensional metric used for exposing a
@@ -57,8 +67,9 @@ type PrometheusMachineCollector struct {
}
// NewPrometheusMachineCollector returns a new PrometheusCollector.
-func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector {
+func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.MetricSet) *PrometheusMachineCollector {
c := &PrometheusMachineCollector{
+
infoProvider: i,
errors: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "machine",
@@ -71,7 +82,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector {
help: "Number of physical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
- return metricValues{{value: float64(machineInfo.NumPhysicalCores)}}
+ return metricValues{{value: float64(machineInfo.NumPhysicalCores), timestamp: machineInfo.Timestamp}}
},
},
{
@@ -79,7 +90,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector {
help: "Number of logical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
- return metricValues{{value: float64(machineInfo.NumCores)}}
+ return metricValues{{value: float64(machineInfo.NumCores), timestamp: machineInfo.Timestamp}}
},
},
{
@@ -87,7 +98,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector {
help: "Number of CPU sockets.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
- return metricValues{{value: float64(machineInfo.NumSockets)}}
+ return metricValues{{value: float64(machineInfo.NumSockets), timestamp: machineInfo.Timestamp}}
},
},
{
@@ -95,7 +106,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector {
help: "Amount of memory installed on the machine.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
- return metricValues{{value: float64(machineInfo.MemoryCapacity)}}
+ return metricValues{{value: float64(machineInfo.MemoryCapacity), timestamp: machineInfo.Timestamp}}
},
},
{
@@ -125,13 +136,62 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector {
extraLabels: []string{prometheusModeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{
- {value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}},
- {value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}},
+ {value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}, timestamp: machineInfo.Timestamp},
+ {value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}, timestamp: machineInfo.Timestamp},
}
},
},
+ {
+ name: "machine_nvm_avg_power_budget_watts",
+ help: "NVM power budget.",
+ valueType: prometheus.GaugeValue,
+ getValues: func(machineInfo *info.MachineInfo) metricValues {
+ return metricValues{{value: float64(machineInfo.NVMInfo.AvgPowerBudget), timestamp: machineInfo.Timestamp}}
+ },
+ },
},
}
+
+ if includedMetrics.Has(container.CPUTopologyMetrics) {
+ c.machineMetrics = append(c.machineMetrics, []machineMetric{
+ {
+ name: "machine_cpu_cache_capacity_bytes",
+ help: "Cache size in bytes assigned to NUMA node and CPU core.",
+ valueType: prometheus.GaugeValue,
+ extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusTypeLabelName, prometheusLevelLabelName},
+ getValues: func(machineInfo *info.MachineInfo) metricValues {
+ return getCaches(machineInfo)
+ },
+ },
+ {
+ name: "machine_thread_siblings_count",
+ help: "Number of CPU thread siblings.",
+ valueType: prometheus.GaugeValue,
+ extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusThreadLabelName},
+ getValues: func(machineInfo *info.MachineInfo) metricValues {
+ return getThreadsSiblingsCount(machineInfo)
+ },
+ },
+ {
+ name: "machine_node_memory_capacity_bytes",
+ help: "Amount of memory assigned to NUMA node.",
+ valueType: prometheus.GaugeValue,
+ extraLabels: []string{prometheusNodeLabelName},
+ getValues: func(machineInfo *info.MachineInfo) metricValues {
+ return getNodeMemory(machineInfo)
+ },
+ },
+ {
+ name: "machine_node_hugepages_count",
+ help: "Numer of hugepages assigned to NUMA node.",
+ valueType: prometheus.GaugeValue,
+ extraLabels: []string{prometheusNodeLabelName, prometheusPageSizeLabelName},
+ getValues: func(machineInfo *info.MachineInfo) metricValues {
+ return getHugePagesCount(machineInfo)
+ },
+ },
+ }...)
+ }
return c
}
@@ -173,8 +233,15 @@ func (collector *PrometheusMachineCollector) collectMachineInfo(ch chan<- promet
if len(metric.extraLabels) != 0 {
labelValues = append(labelValues, metricValue.labels...)
}
- ch <- prometheus.MustNewConstMetric(metric.desc(baseLabelsNames),
+
+ prometheusMetric := prometheus.MustNewConstMetric(metric.desc(baseLabelsNames),
metric.valueType, metricValue.value, labelValues...)
+
+ if metricValue.timestamp.IsZero() {
+ ch <- prometheusMetric
+ } else {
+ ch <- prometheus.NewMetricWithTimestamp(metricValue.timestamp, prometheusMetric)
+ }
}
}
@@ -193,7 +260,90 @@ func getMemoryByType(machineInfo *info.MachineInfo, property string) metricValue
klog.Warningf("Incorrect propery name for MemoryByType, property %s", property)
return metricValues{}
}
- mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}})
+ mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}, timestamp: machineInfo.Timestamp})
+ }
+ return mValues
+}
+
+func getThreadsSiblingsCount(machineInfo *info.MachineInfo) metricValues {
+ mValues := make(metricValues, 0, machineInfo.NumCores)
+ for _, node := range machineInfo.Topology {
+ nodeID := strconv.Itoa(node.Id)
+
+ for _, core := range node.Cores {
+ coreID := strconv.Itoa(core.Id)
+ siblingsCount := len(core.Threads)
+
+ for _, thread := range core.Threads {
+ mValues = append(mValues,
+ metricValue{
+ value: float64(siblingsCount),
+ labels: []string{nodeID, coreID, strconv.Itoa(thread)},
+ timestamp: machineInfo.Timestamp,
+ })
+ }
+ }
+ }
+ return mValues
+}
+
+func getNodeMemory(machineInfo *info.MachineInfo) metricValues {
+ mValues := make(metricValues, 0, len(machineInfo.Topology))
+ for _, node := range machineInfo.Topology {
+ nodeID := strconv.Itoa(node.Id)
+ mValues = append(mValues,
+ metricValue{
+ value: float64(node.Memory),
+ labels: []string{nodeID},
+ timestamp: machineInfo.Timestamp,
+ })
+ }
+ return mValues
+}
+
+func getHugePagesCount(machineInfo *info.MachineInfo) metricValues {
+ mValues := make(metricValues, 0)
+ for _, node := range machineInfo.Topology {
+ nodeID := strconv.Itoa(node.Id)
+
+ for _, hugePage := range node.HugePages {
+ mValues = append(mValues,
+ metricValue{
+ value: float64(hugePage.NumPages),
+ labels: []string{nodeID, strconv.FormatUint(hugePage.PageSize, 10)},
+ timestamp: machineInfo.Timestamp,
+ })
+ }
+ }
+ return mValues
+}
+
+func getCaches(machineInfo *info.MachineInfo) metricValues {
+ mValues := make(metricValues, 0)
+ for _, node := range machineInfo.Topology {
+ nodeID := strconv.Itoa(node.Id)
+
+ for _, core := range node.Cores {
+ coreID := strconv.Itoa(core.Id)
+
+ for _, cache := range core.Caches {
+ mValues = append(mValues,
+ metricValue{
+ value: float64(cache.Size),
+ labels: []string{nodeID, coreID, cache.Type, strconv.Itoa(cache.Level)},
+ timestamp: machineInfo.Timestamp,
+ })
+ }
+ }
+
+ for _, cache := range node.Caches {
+ mValues = append(mValues,
+ metricValue{
+ value: float64(cache.Size),
+ labels: []string{nodeID, emptyLabelValue, cache.Type, strconv.Itoa(cache.Level)},
+ timestamp: machineInfo.Timestamp,
+ })
+ }
}
return mValues
}
diff --git a/metrics/prometheus_machine_test.go b/metrics/prometheus_machine_test.go
index 284bac78..8095415f 100644
--- a/metrics/prometheus_machine_test.go
+++ b/metrics/prometheus_machine_test.go
@@ -17,8 +17,11 @@ package metrics
import (
"bytes"
"io/ioutil"
+ "reflect"
"testing"
+ "time"
+ "github.com/google/cadvisor/container"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"github.com/stretchr/testify/assert"
@@ -28,7 +31,7 @@ const machineMetricsFile = "testdata/prometheus_machine_metrics"
const machineMetricsFailureFile = "testdata/prometheus_machine_metrics_failure"
func TestPrometheusMachineCollector(t *testing.T) {
- collector := NewPrometheusMachineCollector(testSubcontainersInfoProvider{})
+ collector := NewPrometheusMachineCollector(testSubcontainersInfoProvider{}, container.AllMetrics)
registry := prometheus.NewRegistry()
registry.MustRegister(collector)
@@ -41,6 +44,7 @@ func TestPrometheusMachineCollector(t *testing.T) {
assert.Nil(t, err)
}
collectedMetrics := metricBuffer.String()
+
expectedMetrics, err := ioutil.ReadFile(machineMetricsFile)
assert.Nil(t, err)
assert.Equal(t, string(expectedMetrics), collectedMetrics)
@@ -51,7 +55,7 @@ func TestPrometheusMachineCollectorWithFailure(t *testing.T) {
successfulProvider: testSubcontainersInfoProvider{},
shouldFail: true,
}
- collector := NewPrometheusMachineCollector(provider)
+ collector := NewPrometheusMachineCollector(provider, container.AllMetrics)
registry := prometheus.NewRegistry()
registry.MustRegister(collector)
@@ -87,3 +91,105 @@ func TestGetMemoryByTypeWithWrongProperty(t *testing.T) {
metricVals := getMemoryByType(machineInfo, "wrong_property_name")
assert.Equal(t, 0, len(metricVals))
}
+
+func TestGetCaches(t *testing.T) {
+ machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo()
+ assert.Nil(t, err)
+
+ metricVals := getCaches(machineInfo)
+
+ assert.Equal(t, 25, len(metricVals))
+ expectedMetricVals := []metricValue{
+ {value: 32768, labels: []string{"0", "0", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32768, labels: []string{"0", "0", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262144, labels: []string{"0", "0", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"0", "1", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"0", "1", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262148, labels: []string{"0", "1", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32768, labels: []string{"0", "2", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32768, labels: []string{"0", "2", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262144, labels: []string{"0", "2", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"0", "3", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"0", "3", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262148, labels: []string{"0", "3", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32768, labels: []string{"1", "4", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32768, labels: []string{"1", "4", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262144, labels: []string{"1", "4", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"1", "5", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"1", "5", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262148, labels: []string{"1", "5", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32768, labels: []string{"1", "6", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32768, labels: []string{"1", "6", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262144, labels: []string{"1", "6", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"1", "7", "Data", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 32764, labels: []string{"1", "7", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 262148, labels: []string{"1", "7", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 8388608, labels: []string{"1", "", "Unified", "3"}, timestamp: time.Unix(1395066363, 0)},
+ }
+ assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about Node memory")
+}
+
+func TestGetThreadsSiblingsCount(t *testing.T) {
+ machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo()
+ assert.Nil(t, err)
+
+ metricVals := getThreadsSiblingsCount(machineInfo)
+
+ assert.Equal(t, 16, len(metricVals))
+ expectedMetricVals := []metricValue{
+ {value: 2, labels: []string{"0", "0", "0"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"0", "0", "1"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"0", "1", "2"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"0", "1", "3"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"0", "2", "4"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"0", "2", "5"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"0", "3", "6"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"0", "3", "7"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "4", "8"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "4", "9"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "5", "10"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "5", "11"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "6", "12"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "6", "13"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "7", "14"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "7", "15"}, timestamp: time.Unix(1395066363, 0)},
+ }
+ assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about CPU threads")
+}
+
+func TestGetNodeMemory(t *testing.T) {
+ machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo()
+ assert.Nil(t, err)
+
+ metricVals := getNodeMemory(machineInfo)
+
+ assert.Equal(t, 2, len(metricVals))
+ expectedMetricVals := []metricValue{
+ {value: 33604804608, labels: []string{"0"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 33604804606, labels: []string{"1"}, timestamp: time.Unix(1395066363, 0)},
+ }
+ assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about Node memory")
+}
+
+func TestGetHugePagesCount(t *testing.T) {
+ machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo()
+ assert.Nil(t, err)
+
+ metricVals := getHugePagesCount(machineInfo)
+
+ assert.Equal(t, 4, len(metricVals))
+ expectedMetricVals := []metricValue{
+ {value: 0, labels: []string{"0", "1048576"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 0, labels: []string{"0", "2048"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 2, labels: []string{"1", "1048576"}, timestamp: time.Unix(1395066363, 0)},
+ {value: 4, labels: []string{"1", "2048"}, timestamp: time.Unix(1395066363, 0)},
+ }
+ assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about Node memory")
+}
+
+func assertMetricValues(t *testing.T, expected metricValues, actual metricValues, message string) {
+ for i := range actual {
+ assert.Truef(t, reflect.DeepEqual(expected[i], actual[i]),
+ "%s expected %#v but found %#v\n", message, expected[i], actual[i])
+ }
+}
diff --git a/metrics/testdata/prometheus_machine_metrics b/metrics/testdata/prometheus_machine_metrics
index 81a2f186..17604278 100644
--- a/metrics/testdata/prometheus_machine_metrics
+++ b/metrics/testdata/prometheus_machine_metrics
@@ -1,27 +1,85 @@
+# HELP machine_cpu_cache_capacity_bytes Cache size in bytes assigned to NUMA node and CPU core.
+# TYPE machine_cpu_cache_capacity_bytes gauge
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="",level="3",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 8.388608e+06 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="0",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="0",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="0",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="1",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="1",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="1",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="2",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="2",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="2",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="3",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="3",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="3",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="4",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="4",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="4",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="5",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="5",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="5",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="6",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="6",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="6",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="7",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="7",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000
+machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="7",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000
# HELP machine_cpu_cores Number of logical CPU cores.
# TYPE machine_cpu_cores gauge
-machine_cpu_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 4
+machine_cpu_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 4 1395066363000
# HELP machine_cpu_physical_cores Number of physical CPU cores.
# TYPE machine_cpu_physical_cores gauge
-machine_cpu_physical_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1
+machine_cpu_physical_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1 1395066363000
# HELP machine_cpu_sockets Number of CPU sockets.
# TYPE machine_cpu_sockets gauge
-machine_cpu_sockets{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1
+machine_cpu_sockets{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1 1395066363000
# HELP machine_dimm_capacity_bytes Total RAM DIMM capacity (all types memory modules) value labeled by dimm type.
# TYPE machine_dimm_capacity_bytes gauge
-machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 2.168421613568e+12
-machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 4.12316860416e+11
+machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 2.168421613568e+12 1395066363000
+machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 4.12316860416e+11 1395066363000
# HELP machine_dimm_count Number of RAM DIMM (all types memory modules) value labeled by dimm type.
# TYPE machine_dimm_count gauge
-machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 8
-machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 12
+machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 8 1395066363000
+machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 12 1395066363000
# HELP machine_memory_bytes Amount of memory installed on the machine.
# TYPE machine_memory_bytes gauge
-machine_memory_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1024
+machine_memory_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1024 1395066363000
+# HELP machine_node_hugepages_count Numer of hugepages assigned to NUMA node.
+# TYPE machine_node_hugepages_count gauge
+machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",page_size="1048576",system_uuid="system-uuid-test"} 0 1395066363000
+machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",page_size="2048",system_uuid="system-uuid-test"} 0 1395066363000
+machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="1048576",system_uuid="system-uuid-test"} 2 1395066363000
+machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="2048",system_uuid="system-uuid-test"} 4 1395066363000
+# HELP machine_node_memory_capacity_bytes Amount of memory assigned to NUMA node.
+# TYPE machine_node_memory_capacity_bytes gauge
+machine_node_memory_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test"} 3.3604804608e+10 1395066363000
+machine_node_memory_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test"} 3.3604804606e+10 1395066363000
+# HELP machine_nvm_avg_power_budget_watts NVM power budget.
+# TYPE machine_nvm_avg_power_budget_watts gauge
+machine_nvm_avg_power_budget_watts{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 0 1395066363000
# HELP machine_nvm_capacity NVM capacity value labeled by NVM mode (memory mode or app direct mode).
# TYPE machine_nvm_capacity gauge
-machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="app_direct_mode",system_uuid="system-uuid-test"} 1.735166787584e+12
-machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="memory_mode",system_uuid="system-uuid-test"} 4.294967296e+11
+machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="app_direct_mode",system_uuid="system-uuid-test"} 1.735166787584e+12 1395066363000
+machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="memory_mode",system_uuid="system-uuid-test"} 4.294967296e+11 1395066363000
# HELP machine_scrape_error 1 if there was an error while getting machine metrics, 0 otherwise.
# TYPE machine_scrape_error gauge
machine_scrape_error 0
+# HELP machine_thread_siblings_count Number of CPU thread siblings.
+# TYPE machine_thread_siblings_count gauge
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="0",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="0"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="0",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="1"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="2"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="3"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="4"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="5"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="3",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="6"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="3",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="7"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="4",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="8"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="4",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="9"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="5",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="10"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="5",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="11"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="6",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="12"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="6",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="13"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="7",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="14"} 2 1395066363000
+machine_thread_siblings_count{boot_id="boot-id-test",core_id="7",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="15"} 2 1395066363000