diff --git a/cmd/cadvisor.go b/cmd/cadvisor.go index d0e26f71..64e96c2c 100644 --- a/cmd/cadvisor.go +++ b/cmd/cadvisor.go @@ -88,6 +88,7 @@ var ( container.ProcessMetrics: struct{}{}, container.HugetlbUsageMetrics: struct{}{}, container.ReferencedMemoryMetrics: struct{}{}, + container.CPUTopologyMetrics: struct{}{}, }} // List of metrics that can be ignored. @@ -103,6 +104,7 @@ var ( container.ProcessMetrics: struct{}{}, container.HugetlbUsageMetrics: struct{}{}, container.ReferencedMemoryMetrics: struct{}{}, + container.CPUTopologyMetrics: struct{}{}, } ) @@ -134,7 +136,7 @@ func (ml *metricSetValue) Set(value string) error { } func init() { - flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'disk', 'diskIO', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory'.") + flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'cpu_topology','disk', 'diskIO', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory'.") // Default logging verbosity to V(2) flag.Set("v", "2") diff --git a/cmd/cadvisor_test.go b/cmd/cadvisor_test.go index 4b417fce..57439e1a 100644 --- a/cmd/cadvisor_test.go +++ b/cmd/cadvisor_test.go @@ -46,6 +46,12 @@ func TestReferencedMemoryMetricsIsDisabledByDefault(t *testing.T) { assert.True(t, ignoreMetrics.Has(container.ReferencedMemoryMetrics)) } +func TestCPUTopologyMetricsAreDisabledByDefault(t *testing.T) { + assert.True(t, ignoreMetrics.Has(container.CPUTopologyMetrics)) + flag.Parse() + assert.True(t, ignoreMetrics.Has(container.CPUTopologyMetrics)) +} + func TestIgnoreMetrics(t *testing.T) { tests := []struct { value string @@ -93,6 +99,7 @@ func TestToIncludedMetrics(t *testing.T) { container.HugetlbUsageMetrics: struct{}{}, container.PerfMetrics: struct{}{}, container.ReferencedMemoryMetrics: struct{}{}, + container.CPUTopologyMetrics: struct{}{}, }, container.AllMetrics, {}, diff --git a/cmd/internal/http/handlers.go b/cmd/internal/http/handlers.go index 867a11c9..f00385b1 100644 --- a/cmd/internal/http/handlers.go +++ b/cmd/internal/http/handlers.go @@ -99,7 +99,7 @@ func RegisterPrometheusHandler(mux httpmux.Mux, resourceManager manager.Manager, r := prometheus.NewRegistry() r.MustRegister( metrics.NewPrometheusCollector(resourceManager, f, includedMetrics, clock.RealClock{}), - metrics.NewPrometheusMachineCollector(resourceManager), + metrics.NewPrometheusMachineCollector(resourceManager, includedMetrics), prometheus.NewGoCollector(), prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}), ) diff --git a/container/factory.go b/container/factory.go index e8a15616..2db30671 100644 --- a/container/factory.go +++ b/container/factory.go @@ -60,6 +60,7 @@ const ( HugetlbUsageMetrics MetricKind = "hugetlb" PerfMetrics MetricKind = "perf_event" ReferencedMemoryMetrics MetricKind = "referenced_memory" + CPUTopologyMetrics MetricKind = "cpu_topology" ) // AllMetrics represents all kinds of metrics that cAdvisor supported. @@ -81,6 +82,7 @@ var AllMetrics = MetricSet{ HugetlbUsageMetrics: struct{}{}, PerfMetrics: struct{}{}, ReferencedMemoryMetrics: struct{}{}, + CPUTopologyMetrics: struct{}{}, } func (mk MetricKind) String() string { diff --git a/docs/storage/prometheus.md b/docs/storage/prometheus.md index 0cde28b7..45596b67 100644 --- a/docs/storage/prometheus.md +++ b/docs/storage/prometheus.md @@ -88,10 +88,15 @@ The table below lists the Prometheus hardware metrics exposed by cAdvisor (in al Metric name | Type | Description | Unit (where applicable) :-----------|:-----|:------------|:----------------------- +`machine_cpu_cache_capacity_bytes` | Gauge | Cache size in bytes assigned to NUMA node and CPU core | bytes `machine_cpu_cores` | Gauge | Number of physical CPU cores | `machine_cpu_physical_cores` | Gauge | Number of logical CPU cores | `machine_cpu_sockets` | Gauge | Number of CPU sockets | `machine_dimm_capacity_bytes` | Gauge | Total RAM DIMM capacity (all types memory modules) value labeled by dimm type,
information is retrieved from sysfs edac per-DIMM API (/sys/devices/system/edac/mc/) introduced in kernel 3.6 | bytes `machine_dimm_count` | Gauge | Number of RAM DIMM (all types memory modules) value labeled by dimm type,
information is retrieved from sysfs edac per-DIMM API (/sys/devices/system/edac/mc/) introduced in kernel 3.6 | `machine_memory_bytes` | Gauge | Amount of memory installed on the machine | bytes +`machine_node_hugepages_count` | Gauge | Numer of hugepages assigned to NUMA node | +`machine_node_memory_capacity_bytes` | Gauge | Amount of memory assigned to NUMA node | bytes +`machine_nvm_avg_power_budget_watts` | Gauge | NVM power budget | watts `machine_nvm_capacity` | Gauge | NVM capacity value labeled by NVM mode (memory mode or app direct mode) | bytes +`machine_thread_siblings_count` | Gauge | Number of CPU thread siblings | diff --git a/info/v1/machine.go b/info/v1/machine.go index 04ad666f..9dd989d4 100644 --- a/info/v1/machine.go +++ b/info/v1/machine.go @@ -14,6 +14,8 @@ package v1 +import "time" + type FsInfo struct { // Block device associated with the filesystem. Device string `json:"device"` @@ -156,6 +158,9 @@ const ( ) type MachineInfo struct { + // The time of this information point. + Timestamp time.Time `json:"timestamp"` + // The number of cores in this machine. NumCores int `json:"num_cores"` @@ -227,6 +232,7 @@ func (m *MachineInfo) Clone() *MachineInfo { } } copy := MachineInfo{ + Timestamp: m.Timestamp, NumCores: m.NumCores, NumPhysicalCores: m.NumPhysicalCores, NumSockets: m.NumSockets, diff --git a/machine/info.go b/machine/info.go index 61b88a41..72a77130 100644 --- a/machine/info.go +++ b/machine/info.go @@ -20,6 +20,7 @@ import ( "io/ioutil" "path/filepath" "strings" + "time" "github.com/google/cadvisor/fs" info "github.com/google/cadvisor/info/v1" @@ -119,6 +120,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach instanceID := realCloudInfo.GetInstanceID() machineInfo := &info.MachineInfo{ + Timestamp: time.Now(), NumCores: numCores, NumPhysicalCores: GetPhysicalCores(cpuinfo), NumSockets: GetSockets(cpuinfo), diff --git a/metrics/prometheus_fake.go b/metrics/prometheus_fake.go index 433bd600..823d6d0b 100644 --- a/metrics/prometheus_fake.go +++ b/metrics/prometheus_fake.go @@ -35,6 +35,7 @@ func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, erro func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) { return &info.MachineInfo{ + Timestamp: time.Unix(1395066363, 0), NumCores: 4, NumPhysicalCores: 1, NumSockets: 1, @@ -50,6 +51,216 @@ func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, erro MachineID: "machine-id-test", SystemUUID: "system-uuid-test", BootID: "boot-id-test", + Topology: []info.Node{ + { + Id: 0, + Memory: 33604804608, + HugePages: []info.HugePagesInfo{ + { + PageSize: uint64(1048576), + NumPages: uint64(0), + }, + { + PageSize: uint64(2048), + NumPages: uint64(0), + }, + }, + Cores: []info.Core{ + { + Id: 0, + Threads: []int{0, 1}, + Caches: []info.Cache{ + { + Size: 32768, + Type: "Data", + Level: 1, + }, + { + Size: 32768, + Type: "Instruction", + Level: 1, + }, + { + Size: 262144, + Type: "Unified", + Level: 2, + }, + }, + }, + { + Id: 1, + Threads: []int{2, 3}, + Caches: []info.Cache{ + { + Size: 32764, + Type: "Data", + Level: 1, + }, + { + Size: 32764, + Type: "Instruction", + Level: 1, + }, + { + Size: 262148, + Type: "Unified", + Level: 2, + }, + }, + }, + + { + Id: 2, + Threads: []int{4, 5}, + Caches: []info.Cache{ + { + Size: 32768, + Type: "Data", + Level: 1, + }, + { + Size: 32768, + Type: "Instruction", + Level: 1, + }, + { + Size: 262144, + Type: "Unified", + Level: 2, + }, + }, + }, + { + Id: 3, + Threads: []int{6, 7}, + Caches: []info.Cache{ + { + Size: 32764, + Type: "Data", + Level: 1, + }, + { + Size: 32764, + Type: "Instruction", + Level: 1, + }, + { + Size: 262148, + Type: "Unified", + Level: 2, + }, + }, + }, + }, + }, + { + Id: 1, + Memory: 33604804606, + HugePages: []info.HugePagesInfo{ + { + PageSize: uint64(1048576), + NumPages: uint64(2), + }, + { + PageSize: uint64(2048), + NumPages: uint64(4), + }, + }, + Cores: []info.Core{ + { + Id: 4, + Threads: []int{8, 9}, + Caches: []info.Cache{ + { + Size: 32768, + Type: "Data", + Level: 1, + }, + { + Size: 32768, + Type: "Instruction", + Level: 1, + }, + { + Size: 262144, + Type: "Unified", + Level: 2, + }, + }, + }, + { + Id: 5, + Threads: []int{10, 11}, + Caches: []info.Cache{ + { + Size: 32764, + Type: "Data", + Level: 1, + }, + { + Size: 32764, + Type: "Instruction", + Level: 1, + }, + { + Size: 262148, + Type: "Unified", + Level: 2, + }, + }, + }, + { + Id: 6, + Threads: []int{12, 13}, + Caches: []info.Cache{ + { + Size: 32768, + Type: "Data", + Level: 1, + }, + { + Size: 32768, + Type: "Instruction", + Level: 1, + }, + { + Size: 262144, + Type: "Unified", + Level: 2, + }, + }, + }, + { + Id: 7, + Threads: []int{14, 15}, + Caches: []info.Cache{ + { + Size: 32764, + Type: "Data", + Level: 1, + }, + { + Size: 32764, + Type: "Instruction", + Level: 1, + }, + { + Size: 262148, + Type: "Unified", + Level: 2, + }, + }, + }, + }, + Caches: []info.Cache{ + { + Size: 8388608, + Type: "Unified", + Level: 3, + }, + }, + }, + }, }, nil } diff --git a/metrics/prometheus_machine.go b/metrics/prometheus_machine.go index 8be21489..6a9c55ab 100644 --- a/metrics/prometheus_machine.go +++ b/metrics/prometheus_machine.go @@ -15,6 +15,9 @@ package metrics import ( + "strconv" + + "github.com/google/cadvisor/container" info "github.com/google/cadvisor/info/v1" "github.com/prometheus/client_golang/prometheus" @@ -24,14 +27,21 @@ import ( var baseLabelsNames = []string{"machine_id", "system_uuid", "boot_id"} const ( - prometheusModeLabelName = "mode" - prometheusTypeLabelName = "type" + prometheusModeLabelName = "mode" + prometheusTypeLabelName = "type" + prometheusLevelLabelName = "level" + prometheusNodeLabelName = "node_id" + prometheusCoreLabelName = "core_id" + prometheusThreadLabelName = "thread_id" + prometheusPageSizeLabelName = "page_size" nvmMemoryMode = "memory_mode" nvmAppDirectMode = "app_direct_mode" memoryByTypeDimmCountKey = "DimmCount" memoryByTypeDimmCapacityKey = "Capacity" + + emptyLabelValue = "" ) // machineMetric describes a multi-dimensional metric used for exposing a @@ -57,8 +67,9 @@ type PrometheusMachineCollector struct { } // NewPrometheusMachineCollector returns a new PrometheusCollector. -func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector { +func NewPrometheusMachineCollector(i infoProvider, includedMetrics container.MetricSet) *PrometheusMachineCollector { c := &PrometheusMachineCollector{ + infoProvider: i, errors: prometheus.NewGauge(prometheus.GaugeOpts{ Namespace: "machine", @@ -71,7 +82,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector { help: "Number of physical CPU cores.", valueType: prometheus.GaugeValue, getValues: func(machineInfo *info.MachineInfo) metricValues { - return metricValues{{value: float64(machineInfo.NumPhysicalCores)}} + return metricValues{{value: float64(machineInfo.NumPhysicalCores), timestamp: machineInfo.Timestamp}} }, }, { @@ -79,7 +90,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector { help: "Number of logical CPU cores.", valueType: prometheus.GaugeValue, getValues: func(machineInfo *info.MachineInfo) metricValues { - return metricValues{{value: float64(machineInfo.NumCores)}} + return metricValues{{value: float64(machineInfo.NumCores), timestamp: machineInfo.Timestamp}} }, }, { @@ -87,7 +98,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector { help: "Number of CPU sockets.", valueType: prometheus.GaugeValue, getValues: func(machineInfo *info.MachineInfo) metricValues { - return metricValues{{value: float64(machineInfo.NumSockets)}} + return metricValues{{value: float64(machineInfo.NumSockets), timestamp: machineInfo.Timestamp}} }, }, { @@ -95,7 +106,7 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector { help: "Amount of memory installed on the machine.", valueType: prometheus.GaugeValue, getValues: func(machineInfo *info.MachineInfo) metricValues { - return metricValues{{value: float64(machineInfo.MemoryCapacity)}} + return metricValues{{value: float64(machineInfo.MemoryCapacity), timestamp: machineInfo.Timestamp}} }, }, { @@ -125,13 +136,62 @@ func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector { extraLabels: []string{prometheusModeLabelName}, getValues: func(machineInfo *info.MachineInfo) metricValues { return metricValues{ - {value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}}, - {value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}}, + {value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}, timestamp: machineInfo.Timestamp}, + {value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}, timestamp: machineInfo.Timestamp}, } }, }, + { + name: "machine_nvm_avg_power_budget_watts", + help: "NVM power budget.", + valueType: prometheus.GaugeValue, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return metricValues{{value: float64(machineInfo.NVMInfo.AvgPowerBudget), timestamp: machineInfo.Timestamp}} + }, + }, }, } + + if includedMetrics.Has(container.CPUTopologyMetrics) { + c.machineMetrics = append(c.machineMetrics, []machineMetric{ + { + name: "machine_cpu_cache_capacity_bytes", + help: "Cache size in bytes assigned to NUMA node and CPU core.", + valueType: prometheus.GaugeValue, + extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusTypeLabelName, prometheusLevelLabelName}, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return getCaches(machineInfo) + }, + }, + { + name: "machine_thread_siblings_count", + help: "Number of CPU thread siblings.", + valueType: prometheus.GaugeValue, + extraLabels: []string{prometheusNodeLabelName, prometheusCoreLabelName, prometheusThreadLabelName}, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return getThreadsSiblingsCount(machineInfo) + }, + }, + { + name: "machine_node_memory_capacity_bytes", + help: "Amount of memory assigned to NUMA node.", + valueType: prometheus.GaugeValue, + extraLabels: []string{prometheusNodeLabelName}, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return getNodeMemory(machineInfo) + }, + }, + { + name: "machine_node_hugepages_count", + help: "Numer of hugepages assigned to NUMA node.", + valueType: prometheus.GaugeValue, + extraLabels: []string{prometheusNodeLabelName, prometheusPageSizeLabelName}, + getValues: func(machineInfo *info.MachineInfo) metricValues { + return getHugePagesCount(machineInfo) + }, + }, + }...) + } return c } @@ -173,8 +233,15 @@ func (collector *PrometheusMachineCollector) collectMachineInfo(ch chan<- promet if len(metric.extraLabels) != 0 { labelValues = append(labelValues, metricValue.labels...) } - ch <- prometheus.MustNewConstMetric(metric.desc(baseLabelsNames), + + prometheusMetric := prometheus.MustNewConstMetric(metric.desc(baseLabelsNames), metric.valueType, metricValue.value, labelValues...) + + if metricValue.timestamp.IsZero() { + ch <- prometheusMetric + } else { + ch <- prometheus.NewMetricWithTimestamp(metricValue.timestamp, prometheusMetric) + } } } @@ -193,7 +260,90 @@ func getMemoryByType(machineInfo *info.MachineInfo, property string) metricValue klog.Warningf("Incorrect propery name for MemoryByType, property %s", property) return metricValues{} } - mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}}) + mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}, timestamp: machineInfo.Timestamp}) + } + return mValues +} + +func getThreadsSiblingsCount(machineInfo *info.MachineInfo) metricValues { + mValues := make(metricValues, 0, machineInfo.NumCores) + for _, node := range machineInfo.Topology { + nodeID := strconv.Itoa(node.Id) + + for _, core := range node.Cores { + coreID := strconv.Itoa(core.Id) + siblingsCount := len(core.Threads) + + for _, thread := range core.Threads { + mValues = append(mValues, + metricValue{ + value: float64(siblingsCount), + labels: []string{nodeID, coreID, strconv.Itoa(thread)}, + timestamp: machineInfo.Timestamp, + }) + } + } + } + return mValues +} + +func getNodeMemory(machineInfo *info.MachineInfo) metricValues { + mValues := make(metricValues, 0, len(machineInfo.Topology)) + for _, node := range machineInfo.Topology { + nodeID := strconv.Itoa(node.Id) + mValues = append(mValues, + metricValue{ + value: float64(node.Memory), + labels: []string{nodeID}, + timestamp: machineInfo.Timestamp, + }) + } + return mValues +} + +func getHugePagesCount(machineInfo *info.MachineInfo) metricValues { + mValues := make(metricValues, 0) + for _, node := range machineInfo.Topology { + nodeID := strconv.Itoa(node.Id) + + for _, hugePage := range node.HugePages { + mValues = append(mValues, + metricValue{ + value: float64(hugePage.NumPages), + labels: []string{nodeID, strconv.FormatUint(hugePage.PageSize, 10)}, + timestamp: machineInfo.Timestamp, + }) + } + } + return mValues +} + +func getCaches(machineInfo *info.MachineInfo) metricValues { + mValues := make(metricValues, 0) + for _, node := range machineInfo.Topology { + nodeID := strconv.Itoa(node.Id) + + for _, core := range node.Cores { + coreID := strconv.Itoa(core.Id) + + for _, cache := range core.Caches { + mValues = append(mValues, + metricValue{ + value: float64(cache.Size), + labels: []string{nodeID, coreID, cache.Type, strconv.Itoa(cache.Level)}, + timestamp: machineInfo.Timestamp, + }) + } + } + + for _, cache := range node.Caches { + mValues = append(mValues, + metricValue{ + value: float64(cache.Size), + labels: []string{nodeID, emptyLabelValue, cache.Type, strconv.Itoa(cache.Level)}, + timestamp: machineInfo.Timestamp, + }) + } } return mValues } diff --git a/metrics/prometheus_machine_test.go b/metrics/prometheus_machine_test.go index 284bac78..8095415f 100644 --- a/metrics/prometheus_machine_test.go +++ b/metrics/prometheus_machine_test.go @@ -17,8 +17,11 @@ package metrics import ( "bytes" "io/ioutil" + "reflect" "testing" + "time" + "github.com/google/cadvisor/container" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/expfmt" "github.com/stretchr/testify/assert" @@ -28,7 +31,7 @@ const machineMetricsFile = "testdata/prometheus_machine_metrics" const machineMetricsFailureFile = "testdata/prometheus_machine_metrics_failure" func TestPrometheusMachineCollector(t *testing.T) { - collector := NewPrometheusMachineCollector(testSubcontainersInfoProvider{}) + collector := NewPrometheusMachineCollector(testSubcontainersInfoProvider{}, container.AllMetrics) registry := prometheus.NewRegistry() registry.MustRegister(collector) @@ -41,6 +44,7 @@ func TestPrometheusMachineCollector(t *testing.T) { assert.Nil(t, err) } collectedMetrics := metricBuffer.String() + expectedMetrics, err := ioutil.ReadFile(machineMetricsFile) assert.Nil(t, err) assert.Equal(t, string(expectedMetrics), collectedMetrics) @@ -51,7 +55,7 @@ func TestPrometheusMachineCollectorWithFailure(t *testing.T) { successfulProvider: testSubcontainersInfoProvider{}, shouldFail: true, } - collector := NewPrometheusMachineCollector(provider) + collector := NewPrometheusMachineCollector(provider, container.AllMetrics) registry := prometheus.NewRegistry() registry.MustRegister(collector) @@ -87,3 +91,105 @@ func TestGetMemoryByTypeWithWrongProperty(t *testing.T) { metricVals := getMemoryByType(machineInfo, "wrong_property_name") assert.Equal(t, 0, len(metricVals)) } + +func TestGetCaches(t *testing.T) { + machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo() + assert.Nil(t, err) + + metricVals := getCaches(machineInfo) + + assert.Equal(t, 25, len(metricVals)) + expectedMetricVals := []metricValue{ + {value: 32768, labels: []string{"0", "0", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32768, labels: []string{"0", "0", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262144, labels: []string{"0", "0", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"0", "1", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"0", "1", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262148, labels: []string{"0", "1", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32768, labels: []string{"0", "2", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32768, labels: []string{"0", "2", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262144, labels: []string{"0", "2", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"0", "3", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"0", "3", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262148, labels: []string{"0", "3", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32768, labels: []string{"1", "4", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32768, labels: []string{"1", "4", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262144, labels: []string{"1", "4", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"1", "5", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"1", "5", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262148, labels: []string{"1", "5", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32768, labels: []string{"1", "6", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32768, labels: []string{"1", "6", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262144, labels: []string{"1", "6", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"1", "7", "Data", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 32764, labels: []string{"1", "7", "Instruction", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 262148, labels: []string{"1", "7", "Unified", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 8388608, labels: []string{"1", "", "Unified", "3"}, timestamp: time.Unix(1395066363, 0)}, + } + assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about Node memory") +} + +func TestGetThreadsSiblingsCount(t *testing.T) { + machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo() + assert.Nil(t, err) + + metricVals := getThreadsSiblingsCount(machineInfo) + + assert.Equal(t, 16, len(metricVals)) + expectedMetricVals := []metricValue{ + {value: 2, labels: []string{"0", "0", "0"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"0", "0", "1"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"0", "1", "2"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"0", "1", "3"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"0", "2", "4"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"0", "2", "5"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"0", "3", "6"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"0", "3", "7"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "4", "8"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "4", "9"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "5", "10"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "5", "11"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "6", "12"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "6", "13"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "7", "14"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "7", "15"}, timestamp: time.Unix(1395066363, 0)}, + } + assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about CPU threads") +} + +func TestGetNodeMemory(t *testing.T) { + machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo() + assert.Nil(t, err) + + metricVals := getNodeMemory(machineInfo) + + assert.Equal(t, 2, len(metricVals)) + expectedMetricVals := []metricValue{ + {value: 33604804608, labels: []string{"0"}, timestamp: time.Unix(1395066363, 0)}, + {value: 33604804606, labels: []string{"1"}, timestamp: time.Unix(1395066363, 0)}, + } + assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about Node memory") +} + +func TestGetHugePagesCount(t *testing.T) { + machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo() + assert.Nil(t, err) + + metricVals := getHugePagesCount(machineInfo) + + assert.Equal(t, 4, len(metricVals)) + expectedMetricVals := []metricValue{ + {value: 0, labels: []string{"0", "1048576"}, timestamp: time.Unix(1395066363, 0)}, + {value: 0, labels: []string{"0", "2048"}, timestamp: time.Unix(1395066363, 0)}, + {value: 2, labels: []string{"1", "1048576"}, timestamp: time.Unix(1395066363, 0)}, + {value: 4, labels: []string{"1", "2048"}, timestamp: time.Unix(1395066363, 0)}, + } + assertMetricValues(t, expectedMetricVals, metricVals, "Unexpected information about Node memory") +} + +func assertMetricValues(t *testing.T, expected metricValues, actual metricValues, message string) { + for i := range actual { + assert.Truef(t, reflect.DeepEqual(expected[i], actual[i]), + "%s expected %#v but found %#v\n", message, expected[i], actual[i]) + } +} diff --git a/metrics/testdata/prometheus_machine_metrics b/metrics/testdata/prometheus_machine_metrics index 81a2f186..17604278 100644 --- a/metrics/testdata/prometheus_machine_metrics +++ b/metrics/testdata/prometheus_machine_metrics @@ -1,27 +1,85 @@ +# HELP machine_cpu_cache_capacity_bytes Cache size in bytes assigned to NUMA node and CPU core. +# TYPE machine_cpu_cache_capacity_bytes gauge +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="",level="3",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 8.388608e+06 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="0",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="0",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="0",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="1",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="1",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="1",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="2",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="2",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="2",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="3",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="3",level="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="3",level="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="4",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="4",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="4",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="5",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="5",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="5",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="6",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="6",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32768 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="6",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262144 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="7",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Data"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="7",level="1",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Instruction"} 32764 1395066363000 +machine_cpu_cache_capacity_bytes{boot_id="boot-id-test",core_id="7",level="2",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",type="Unified"} 262148 1395066363000 # HELP machine_cpu_cores Number of logical CPU cores. # TYPE machine_cpu_cores gauge -machine_cpu_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 4 +machine_cpu_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 4 1395066363000 # HELP machine_cpu_physical_cores Number of physical CPU cores. # TYPE machine_cpu_physical_cores gauge -machine_cpu_physical_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1 +machine_cpu_physical_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1 1395066363000 # HELP machine_cpu_sockets Number of CPU sockets. # TYPE machine_cpu_sockets gauge -machine_cpu_sockets{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1 +machine_cpu_sockets{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1 1395066363000 # HELP machine_dimm_capacity_bytes Total RAM DIMM capacity (all types memory modules) value labeled by dimm type. # TYPE machine_dimm_capacity_bytes gauge -machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 2.168421613568e+12 -machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 4.12316860416e+11 +machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 2.168421613568e+12 1395066363000 +machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 4.12316860416e+11 1395066363000 # HELP machine_dimm_count Number of RAM DIMM (all types memory modules) value labeled by dimm type. # TYPE machine_dimm_count gauge -machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 8 -machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 12 +machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 8 1395066363000 +machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 12 1395066363000 # HELP machine_memory_bytes Amount of memory installed on the machine. # TYPE machine_memory_bytes gauge -machine_memory_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1024 +machine_memory_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1024 1395066363000 +# HELP machine_node_hugepages_count Numer of hugepages assigned to NUMA node. +# TYPE machine_node_hugepages_count gauge +machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",page_size="1048576",system_uuid="system-uuid-test"} 0 1395066363000 +machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",page_size="2048",system_uuid="system-uuid-test"} 0 1395066363000 +machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="1048576",system_uuid="system-uuid-test"} 2 1395066363000 +machine_node_hugepages_count{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",page_size="2048",system_uuid="system-uuid-test"} 4 1395066363000 +# HELP machine_node_memory_capacity_bytes Amount of memory assigned to NUMA node. +# TYPE machine_node_memory_capacity_bytes gauge +machine_node_memory_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test"} 3.3604804608e+10 1395066363000 +machine_node_memory_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test"} 3.3604804606e+10 1395066363000 +# HELP machine_nvm_avg_power_budget_watts NVM power budget. +# TYPE machine_nvm_avg_power_budget_watts gauge +machine_nvm_avg_power_budget_watts{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 0 1395066363000 # HELP machine_nvm_capacity NVM capacity value labeled by NVM mode (memory mode or app direct mode). # TYPE machine_nvm_capacity gauge -machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="app_direct_mode",system_uuid="system-uuid-test"} 1.735166787584e+12 -machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="memory_mode",system_uuid="system-uuid-test"} 4.294967296e+11 +machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="app_direct_mode",system_uuid="system-uuid-test"} 1.735166787584e+12 1395066363000 +machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="memory_mode",system_uuid="system-uuid-test"} 4.294967296e+11 1395066363000 # HELP machine_scrape_error 1 if there was an error while getting machine metrics, 0 otherwise. # TYPE machine_scrape_error gauge machine_scrape_error 0 +# HELP machine_thread_siblings_count Number of CPU thread siblings. +# TYPE machine_thread_siblings_count gauge +machine_thread_siblings_count{boot_id="boot-id-test",core_id="0",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="0"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="0",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="1"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="2"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="1",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="3"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="4"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="2",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="5"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="3",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="6"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="3",machine_id="machine-id-test",node_id="0",system_uuid="system-uuid-test",thread_id="7"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="4",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="8"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="4",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="9"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="5",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="10"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="5",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="11"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="6",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="12"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="6",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="13"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="7",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="14"} 2 1395066363000 +machine_thread_siblings_count{boot_id="boot-id-test",core_id="7",machine_id="machine-id-test",node_id="1",system_uuid="system-uuid-test",thread_id="15"} 2 1395066363000