Extending Prometheus metrics by hardware metrics (#2444)

* Extending Prometheus metrics by hardware metrics:
- machine_cpu_physical_cores
- machine_cpu_sockets
- machine_dimm_capacity_bytes
- machine_dimm_count
- machine_nvm_capacity

Signed-off-by: Katarzyna Kujawa <katarzyna.kujawa@intel.com>
This commit is contained in:
Katarzyna Kujawa 2020-04-02 20:43:47 +02:00 committed by GitHub
parent e0c7caafcc
commit 921d0824c4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 835 additions and 470 deletions

View File

@ -157,9 +157,10 @@ func main() {
collectorHttpClient := createCollectorHttpClient(*collectorCert, *collectorKey)
containerManager, err := manager.New(memoryStorage, sysFs, *maxHousekeepingInterval, *allowDynamicHousekeeping, includedMetrics, &collectorHttpClient, strings.Split(*rawCgroupPrefixWhiteList, ","))
// Create a new manager for containers and machine
resourceManager, err := manager.New(memoryStorage, sysFs, *maxHousekeepingInterval, *allowDynamicHousekeeping, includedMetrics, &collectorHttpClient, strings.Split(*rawCgroupPrefixWhiteList, ","))
if err != nil {
klog.Fatalf("Failed to create a Container Manager: %s", err)
klog.Fatalf("Failed to create a manager: %s", err)
}
mux := http.NewServeMux()
@ -172,7 +173,7 @@ func main() {
}
// Register all HTTP handlers.
err = cadvisorhttp.RegisterHandlers(mux, containerManager, *httpAuthFile, *httpAuthRealm, *httpDigestFile, *httpDigestRealm, *urlBasePrefix)
err = cadvisorhttp.RegisterHandlers(mux, resourceManager, *httpAuthFile, *httpAuthRealm, *httpDigestFile, *httpDigestRealm, *urlBasePrefix)
if err != nil {
klog.Fatalf("Failed to register HTTP handlers: %v", err)
}
@ -183,15 +184,16 @@ func main() {
containerLabelFunc = metrics.BaseContainerLabels(whitelistedLabels)
}
cadvisorhttp.RegisterPrometheusHandler(mux, containerManager, *prometheusEndpoint, containerLabelFunc, includedMetrics)
// Register Prometheus collector to gather information about containers, Go runtime, processes, and machine
cadvisorhttp.RegisterPrometheusHandler(mux, resourceManager, *prometheusEndpoint, containerLabelFunc, includedMetrics)
// Start the manager.
if err := containerManager.Start(); err != nil {
klog.Fatalf("Failed to start container manager: %v", err)
if err := resourceManager.Start(); err != nil {
klog.Fatalf("Failed to start manager: %v", err)
}
// Install signal handler.
installSignalHandler(containerManager)
installSignalHandler(resourceManager)
klog.V(1).Infof("Starting cAdvisor version: %s-%s on port %d", version.Info["version"], version.Info["revision"], *argPort)

View File

@ -92,11 +92,12 @@ func RegisterHandlers(mux httpmux.Mux, containerManager manager.Manager, httpAut
// RegisterPrometheusHandler creates a new PrometheusCollector and configures
// the provided HTTP mux to handle the given Prometheus endpoint.
func RegisterPrometheusHandler(mux httpmux.Mux, containerManager manager.Manager, prometheusEndpoint string,
func RegisterPrometheusHandler(mux httpmux.Mux, resourceManager manager.Manager, prometheusEndpoint string,
f metrics.ContainerLabelsFunc, includedMetrics container.MetricSet) {
r := prometheus.NewRegistry()
r.MustRegister(
metrics.NewPrometheusCollector(containerManager, f, includedMetrics),
metrics.NewPrometheusCollector(resourceManager, f, includedMetrics),
metrics.NewPrometheusMachineCollector(resourceManager),
prometheus.NewGoCollector(),
prometheus.NewProcessCollector(prometheus.ProcessCollectorOpts{}),
)

View File

@ -1,6 +1,6 @@
# Monitoring cAdvisor with Prometheus
cAdvisor exposes container statistics as [Prometheus](https://prometheus.io) metrics out of the box. By default, these metrics are served under the `/metrics` HTTP endpoint. This endpoint may be customized by setting the `-prometheus_endpoint` command-line flag.
cAdvisor exposes container and hardware statistics as [Prometheus](https://prometheus.io) metrics out of the box. By default, these metrics are served under the `/metrics` HTTP endpoint. This endpoint may be customized by setting the `-prometheus_endpoint` command-line flag.
To monitor cAdvisor with Prometheus, simply configure one or more jobs in Prometheus which scrape the relevant cAdvisor processes at that metrics endpoint. For details, see Prometheus's [Configuration](https://prometheus.io/docs/operating/configuration/) documentation, as well as the [Getting started](https://prometheus.io/docs/introduction/getting_started/) guide.
@ -10,9 +10,9 @@ To monitor cAdvisor with Prometheus, simply configure one or more jobs in Promet
* [vegasbrianc](https://github.com/vegasbrianc) provides a [starter project](https://github.com/vegasbrianc/prometheus) for cAdvisor and Prometheus monitoring, alongide a ready-to-use [Grafana dashboard](https://github.com/vegasbrianc/grafana_dashboard).
## Prometheus metrics
## Prometheus container metrics
The table below lists the Prometheus metrics exposed by cAdvisor (in alphabetical order by metric name):
The table below lists the Prometheus container metrics exposed by cAdvisor (in alphabetical order by metric name):
Metric name | Type | Description | Unit (where applicable)
:-----------|:-----|:------------|:-----------------------
@ -78,3 +78,17 @@ Metric name | Type | Description | Unit (where applicable)
`container_spec_memory_reservation_limit_bytes` | Gauge | Memory reservation limit for the container | bytes
`container_start_time_seconds` | Gauge | Start time of the container since unix epoch | seconds
`container_tasks_state` | Gauge | Number of tasks in given state (`sleeping`, `running`, `stopped`, `uninterruptible`, or `ioawaiting`) |
## Prometheus hardware metrics
The table below lists the Prometheus hardware metrics exposed by cAdvisor (in alphabetical order by metric name):
Metric name | Type | Description | Unit (where applicable)
:-----------|:-----|:------------|:-----------------------
`machine_cpu_cores` | Gauge | Number of physical CPU cores |
`machine_cpu_physical_cores` | Gauge | Number of logical CPU cores |
`machine_cpu_sockets` | Gauge | Number of CPU sockets |
`machine_dimm_capacity_bytes` | Gauge | Total RAM DIMM capacity (all types memory modules) value labeled by dimm type,<br>information is retrieved from sysfs edac per-DIMM API (/sys/devices/system/edac/mc/) introduced in kernel 3.6 | bytes
`machine_dimm_count` | Gauge | Number of RAM DIMM (all types memory modules) value labeled by dimm type,<br>information is retrieved from sysfs edac per-DIMM API (/sys/devices/system/edac/mc/) introduced in kernel 3.6 |
`machine_memory_bytes` | Gauge | Amount of memory installed on the machine | bytes
`machine_nvm_capacity` | Gauge | NVM capacity value labeled by NVM mode (memory mode or app direct mode) | bytes

42
metrics/metrics.go Normal file
View File

@ -0,0 +1,42 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"time"
info "github.com/google/cadvisor/info/v1"
)
// metricValue describes a single metric value for a given set of label values
// within a parent containerMetric.
type metricValue struct {
value float64
labels []string
timestamp time.Time
}
type metricValues []metricValue
// infoProvider will usually be manager.Manager, but can be swapped out for testing.
type infoProvider interface {
// SubcontainersInfo provides information about all subcontainers of the
// specified container including itself.
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// GetVersionInfo provides information about the version.
GetVersionInfo() (*info.VersionInfo, error)
// GetMachineInfo provides information about the machine.
GetMachineInfo() (*info.MachineInfo, error)
}

View File

@ -25,27 +25,6 @@ import (
"k8s.io/klog"
)
// infoProvider will usually be manager.Manager, but can be swapped out for testing.
type infoProvider interface {
// SubcontainersInfo provides information about all subcontainers of the
// specified container including itself.
SubcontainersInfo(containerName string, query *info.ContainerInfoRequest) ([]*info.ContainerInfo, error)
// GetVersionInfo provides information about the version.
GetVersionInfo() (*info.VersionInfo, error)
// GetMachineInfo provides information about the machine.
GetMachineInfo() (*info.MachineInfo, error)
}
// metricValue describes a single metric value for a given set of label values
// within a parent containerMetric.
type metricValue struct {
value float64
labels []string
timestamp time.Time
}
type metricValues []metricValue
// asFloat64 converts a uint64 into a float64.
func asFloat64(v uint64) float64 { return float64(v) }
@ -1568,11 +1547,7 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
return c
}
var (
versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
machineInfoCoresDesc = prometheus.NewDesc("machine_cpu_cores", "Number of CPU cores on the machine.", nil, nil)
machineInfoMemoryDesc = prometheus.NewDesc("machine_memory_bytes", "Amount of memory installed on the machine.", nil, nil)
)
var versionInfoDesc = prometheus.NewDesc("cadvisor_version_info", "A metric with a constant '1' value labeled by kernel version, OS version, docker version, cadvisor version & cadvisor revision.", []string{"kernelVersion", "osVersion", "dockerVersion", "cadvisorVersion", "cadvisorRevision"}, nil)
// Describe describes all the metrics ever exported by cadvisor. It
// implements prometheus.PrometheusCollector.
@ -1582,15 +1557,12 @@ func (c *PrometheusCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- cm.desc([]string{})
}
ch <- versionInfoDesc
ch <- machineInfoCoresDesc
ch <- machineInfoMemoryDesc
}
// Collect fetches the stats from all containers and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (c *PrometheusCollector) Collect(ch chan<- prometheus.Metric) {
c.errors.Set(0)
c.collectMachineInfo(ch)
c.collectVersionInfo(ch)
c.collectContainersInfo(ch)
c.errors.Collect(ch)
@ -1758,17 +1730,6 @@ func (c *PrometheusCollector) collectVersionInfo(ch chan<- prometheus.Metric) {
ch <- prometheus.MustNewConstMetric(versionInfoDesc, prometheus.GaugeValue, 1, []string{versionInfo.KernelVersion, versionInfo.ContainerOsVersion, versionInfo.DockerVersion, versionInfo.CadvisorVersion, versionInfo.CadvisorRevision}...)
}
func (c *PrometheusCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
machineInfo, err := c.infoProvider.GetMachineInfo()
if err != nil {
c.errors.Set(1)
klog.Warningf("Couldn't get machine info: %s", err)
return
}
ch <- prometheus.MustNewConstMetric(machineInfoCoresDesc, prometheus.GaugeValue, float64(machineInfo.NumCores))
ch <- prometheus.MustNewConstMetric(machineInfoMemoryDesc, prometheus.GaugeValue, float64(machineInfo.MemoryCapacity))
}
// Size after which we consider memory to be "unlimited". This is not
// MaxInt64 due to rounding by the kernel.
const maxMemorySize = uint64(1 << 62)

445
metrics/prometheus_fake.go Normal file
View File

@ -0,0 +1,445 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"errors"
"time"
info "github.com/google/cadvisor/info/v1"
)
type testSubcontainersInfoProvider struct{}
func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
return &info.VersionInfo{
KernelVersion: "4.1.6-200.fc22.x86_64",
ContainerOsVersion: "Fedora 22 (Twenty Two)",
DockerVersion: "1.8.1",
CadvisorVersion: "0.16.0",
CadvisorRevision: "abcdef",
}, nil
}
func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
return &info.MachineInfo{
NumCores: 4,
NumPhysicalCores: 1,
NumSockets: 1,
MemoryCapacity: 1024,
MemoryByType: map[string]*info.MemoryInfo{
"Non-volatile-RAM": {Capacity: 2168421613568, DimmCount: 8},
"Unbuffered-DDR4": {Capacity: 412316860416, DimmCount: 12},
},
NVMInfo: info.NVMInfo{
MemoryModeCapacity: 429496729600,
AppDirectModeCapacity: 1735166787584,
},
MachineID: "machine-id-test",
SystemUUID: "system-uuid-test",
BootID: "boot-id-test",
}, nil
}
func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return []*info.ContainerInfo{
{
ContainerReference: info.ContainerReference{
Name: "testcontainer",
Aliases: []string{"testcontaineralias"},
},
Spec: info.ContainerSpec{
Image: "test",
HasCpu: true,
Cpu: info.CpuSpec{
Limit: 1000,
Period: 100000,
Quota: 10000,
},
Memory: info.MemorySpec{
Limit: 2048,
Reservation: 1024,
SwapLimit: 4096,
},
HasHugetlb: true,
HasProcesses: true,
Processes: info.ProcessSpec{
Limit: 100,
},
CreationTime: time.Unix(1257894000, 0),
Labels: map[string]string{
"foo.label": "bar",
},
Envs: map[string]string{
"foo+env": "prod",
},
},
Stats: []*info.ContainerStats{
{
Timestamp: time.Unix(1395066363, 0),
Cpu: info.CpuStats{
Usage: info.CpuUsage{
Total: 1,
PerCpu: []uint64{2, 3, 4, 5},
User: 6,
System: 7,
},
CFS: info.CpuCFS{
Periods: 723,
ThrottledPeriods: 18,
ThrottledTime: 1724314000,
},
Schedstat: info.CpuSchedstat{
RunTime: 53643567,
RunqueueTime: 479424566378,
RunPeriods: 984285,
},
LoadAverage: 2,
},
Memory: info.MemoryStats{
Usage: 8,
MaxUsage: 8,
WorkingSet: 9,
ContainerData: info.MemoryStatsMemoryData{
Pgfault: 10,
Pgmajfault: 11,
},
HierarchicalData: info.MemoryStatsMemoryData{
Pgfault: 12,
Pgmajfault: 13,
},
Cache: 14,
RSS: 15,
MappedFile: 16,
Swap: 8192,
},
Hugetlb: map[string]info.HugetlbStats{
"2Mi": {
Usage: 4,
MaxUsage: 10,
Failcnt: 1,
},
"1Gi": {
Usage: 0,
MaxUsage: 0,
Failcnt: 0,
},
},
Network: info.NetworkStats{
InterfaceStats: info.InterfaceStats{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
Interfaces: []info.InterfaceStats{
{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
},
Tcp: info.TcpStat{
Established: 13,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
Tcp6: info.TcpStat{
Established: 11,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
TcpAdvanced: info.TcpAdvancedStat{
TCPFullUndo: 2361,
TCPMD5NotFound: 0,
TCPDSACKRecv: 83680,
TCPSackShifted: 2,
TCPSackShiftFallback: 298,
PFMemallocDrop: 0,
EstabResets: 37,
InSegs: 140370590,
TCPPureAcks: 24251339,
TCPDSACKOldSent: 15633,
IPReversePathFilter: 0,
TCPFastOpenPassiveFail: 0,
InCsumErrors: 0,
TCPRenoFailures: 43414,
TCPMemoryPressuresChrono: 0,
TCPDeferAcceptDrop: 0,
TW: 10436427,
TCPSpuriousRTOs: 0,
TCPDSACKIgnoredNoUndo: 71885,
RtoMax: 120000,
ActiveOpens: 11038621,
EmbryonicRsts: 0,
RcvPruned: 0,
TCPLossProbeRecovery: 401,
TCPHPHits: 56096478,
TCPPartialUndo: 3,
TCPAbortOnMemory: 0,
AttemptFails: 48997,
RetransSegs: 462961,
SyncookiesFailed: 0,
OfoPruned: 0,
TCPAbortOnLinger: 0,
TCPAbortFailed: 0,
TCPRenoReorder: 839,
TCPRcvCollapsed: 0,
TCPDSACKIgnoredOld: 0,
TCPReqQFullDrop: 0,
OutOfWindowIcmps: 0,
TWKilled: 0,
TCPLossProbes: 88648,
TCPRenoRecoveryFail: 394,
TCPFastOpenCookieReqd: 0,
TCPHPAcks: 21490641,
TCPSACKReneging: 0,
TCPTSReorder: 3,
TCPSlowStartRetrans: 290832,
MaxConn: -1,
SyncookiesRecv: 0,
TCPSackFailures: 60,
DelayedACKLocked: 90,
TCPDSACKOfoSent: 1,
TCPSynRetrans: 988,
TCPDSACKOfoRecv: 10,
TCPSACKDiscard: 0,
TCPMD5Unexpected: 0,
TCPSackMerged: 6,
RtoMin: 200,
CurrEstab: 22,
TCPTimeWaitOverflow: 0,
ListenOverflows: 0,
DelayedACKs: 503975,
TCPLossUndo: 61374,
TCPOrigDataSent: 130698387,
TCPBacklogDrop: 0,
TCPReqQFullDoCookies: 0,
TCPFastOpenPassive: 0,
PAWSActive: 0,
OutRsts: 91699,
TCPSackRecoveryFail: 2,
DelayedACKLost: 18843,
TCPAbortOnData: 8,
TCPMinTTLDrop: 0,
PruneCalled: 0,
TWRecycled: 0,
ListenDrops: 0,
TCPAbortOnTimeout: 0,
SyncookiesSent: 0,
TCPSACKReorder: 11,
TCPDSACKUndo: 33,
TCPMD5Failure: 0,
TCPLostRetransmit: 0,
TCPAbortOnClose: 7,
TCPFastOpenListenOverflow: 0,
OutSegs: 211580512,
InErrs: 31,
TCPTimeouts: 27422,
TCPLossFailures: 729,
TCPSackRecovery: 159,
RtoAlgorithm: 1,
PassiveOpens: 59,
LockDroppedIcmps: 0,
TCPRenoRecovery: 3519,
TCPFACKReorder: 0,
TCPFastRetrans: 11794,
TCPRetransFail: 0,
TCPMemoryPressures: 0,
TCPFastOpenActive: 0,
TCPFastOpenActiveFail: 0,
PAWSEstab: 0,
},
Udp: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
Udp6: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
},
Filesystem: []info.FsStats{
{
Device: "sda1",
InodesFree: 524288,
Inodes: 2097152,
Limit: 22,
Usage: 23,
ReadsCompleted: 24,
ReadsMerged: 25,
SectorsRead: 26,
ReadTime: 27,
WritesCompleted: 28,
WritesMerged: 39,
SectorsWritten: 40,
WriteTime: 41,
IoInProgress: 42,
IoTime: 43,
WeightedIoTime: 44,
},
{
Device: "sda2",
InodesFree: 262144,
Inodes: 2097152,
Limit: 37,
Usage: 38,
ReadsCompleted: 39,
ReadsMerged: 40,
SectorsRead: 41,
ReadTime: 42,
WritesCompleted: 43,
WritesMerged: 44,
SectorsWritten: 45,
WriteTime: 46,
IoInProgress: 47,
IoTime: 48,
WeightedIoTime: 49,
},
},
Accelerators: []info.AcceleratorStats{
{
Make: "nvidia",
Model: "tesla-p100",
ID: "GPU-deadbeef-1234-5678-90ab-feedfacecafe",
MemoryTotal: 20304050607,
MemoryUsed: 2030405060,
DutyCycle: 12,
},
{
Make: "nvidia",
Model: "tesla-k80",
ID: "GPU-deadbeef-0123-4567-89ab-feedfacecafe",
MemoryTotal: 10203040506,
MemoryUsed: 1020304050,
DutyCycle: 6,
},
},
Processes: info.ProcessStats{
ProcessCount: 1,
FdCount: 5,
SocketCount: 3,
ThreadsCurrent: 5,
ThreadsMax: 100,
Ulimits: []info.UlimitSpec{
{
Name: "max_open_files",
SoftLimit: 16384,
HardLimit: 16384,
},
},
},
TaskStats: info.LoadStats{
NrSleeping: 50,
NrRunning: 51,
NrStopped: 52,
NrUninterruptible: 53,
NrIoWait: 54,
},
CustomMetrics: map[string][]info.MetricVal{
"container_custom_app_metric_1": {
{
FloatValue: float64(1.1),
Timestamp: time.Now(),
Label: "testlabel_1_1_1",
Labels: map[string]string{"test_label": "1_1", "test_label_2": "2_1"},
},
{
FloatValue: float64(1.2),
Timestamp: time.Now(),
Label: "testlabel_1_1_2",
Labels: map[string]string{"test_label": "1_2", "test_label_2": "2_2"},
},
},
"container_custom_app_metric_2": {
{
FloatValue: float64(2),
Timestamp: time.Now(),
Label: "testlabel2",
Labels: map[string]string{"test_label": "test_value"},
},
},
"container_custom_app_metric_3": {
{
FloatValue: float64(3),
Timestamp: time.Now(),
Label: "testlabel3",
Labels: map[string]string{"test_label": "test_value"},
},
},
},
},
},
},
}, nil
}
type erroringSubcontainersInfoProvider struct {
successfulProvider testSubcontainersInfoProvider
shouldFail bool
}
func (p *erroringSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 1")
}
return p.successfulProvider.GetVersionInfo()
}
func (p *erroringSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 2")
}
return p.successfulProvider.GetMachineInfo()
}
func (p *erroringSubcontainersInfoProvider) SubcontainersInfo(
a string, r *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
if p.shouldFail {
return []*info.ContainerInfo{}, errors.New("Oops 3")
}
return p.successfulProvider.SubcontainersInfo(a, r)
}

View File

@ -0,0 +1,199 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
info "github.com/google/cadvisor/info/v1"
"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog"
)
var baseLabelsNames = []string{"machine_id", "system_uuid", "boot_id"}
const (
prometheusModeLabelName = "mode"
prometheusTypeLabelName = "type"
nvmMemoryMode = "memory_mode"
nvmAppDirectMode = "app_direct_mode"
memoryByTypeDimmCountKey = "DimmCount"
memoryByTypeDimmCapacityKey = "Capacity"
)
// machineMetric describes a multi-dimensional metric used for exposing a
// certain type of machine statistic.
type machineMetric struct {
name string
help string
valueType prometheus.ValueType
extraLabels []string
condition func(machineInfo *info.MachineInfo) bool
getValues func(machineInfo *info.MachineInfo) metricValues
}
func (metric *machineMetric) desc(baseLabels []string) *prometheus.Desc {
return prometheus.NewDesc(metric.name, metric.help, append(baseLabels, metric.extraLabels...), nil)
}
// PrometheusMachineCollector implements prometheus.Collector.
type PrometheusMachineCollector struct {
infoProvider infoProvider
errors prometheus.Gauge
machineMetrics []machineMetric
}
// NewPrometheusMachineCollector returns a new PrometheusCollector.
func NewPrometheusMachineCollector(i infoProvider) *PrometheusMachineCollector {
c := &PrometheusMachineCollector{
infoProvider: i,
errors: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: "machine",
Name: "scrape_error",
Help: "1 if there was an error while getting machine metrics, 0 otherwise.",
}),
machineMetrics: []machineMetric{
{
name: "machine_cpu_physical_cores",
help: "Number of physical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumPhysicalCores)}}
},
},
{
name: "machine_cpu_cores",
help: "Number of logical CPU cores.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumCores)}}
},
},
{
name: "machine_cpu_sockets",
help: "Number of CPU sockets.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.NumSockets)}}
},
},
{
name: "machine_memory_bytes",
help: "Amount of memory installed on the machine.",
valueType: prometheus.GaugeValue,
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{{value: float64(machineInfo.MemoryCapacity)}}
},
},
{
name: "machine_dimm_count",
help: "Number of RAM DIMM (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCountKey)
},
},
{
name: "machine_dimm_capacity_bytes",
help: "Total RAM DIMM capacity (all types memory modules) value labeled by dimm type.",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusTypeLabelName},
condition: func(machineInfo *info.MachineInfo) bool { return len(machineInfo.MemoryByType) != 0 },
getValues: func(machineInfo *info.MachineInfo) metricValues {
return getMemoryByType(machineInfo, memoryByTypeDimmCapacityKey)
},
},
{
name: "machine_nvm_capacity",
help: "NVM capacity value labeled by NVM mode (memory mode or app direct mode).",
valueType: prometheus.GaugeValue,
extraLabels: []string{prometheusModeLabelName},
getValues: func(machineInfo *info.MachineInfo) metricValues {
return metricValues{
{value: float64(machineInfo.NVMInfo.MemoryModeCapacity), labels: []string{nvmMemoryMode}},
{value: float64(machineInfo.NVMInfo.AppDirectModeCapacity), labels: []string{nvmAppDirectMode}},
}
},
},
},
}
return c
}
// Describe describes all the machine metrics ever exported by cadvisor. It
// implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Describe(ch chan<- *prometheus.Desc) {
collector.errors.Describe(ch)
for _, metric := range collector.machineMetrics {
ch <- metric.desc([]string{})
}
}
// Collect fetches information about machine and delivers them as
// Prometheus metrics. It implements prometheus.PrometheusCollector.
func (collector *PrometheusMachineCollector) Collect(ch chan<- prometheus.Metric) {
collector.errors.Set(0)
collector.collectMachineInfo(ch)
collector.errors.Collect(ch)
}
func (collector *PrometheusMachineCollector) collectMachineInfo(ch chan<- prometheus.Metric) {
machineInfo, err := collector.infoProvider.GetMachineInfo()
if err != nil {
collector.errors.Set(1)
klog.Warningf("Couldn't get machine info: %s", err)
return
}
baseLabelsValues := []string{machineInfo.MachineID, machineInfo.SystemUUID, machineInfo.BootID}
for _, metric := range collector.machineMetrics {
if metric.condition != nil && !metric.condition(machineInfo) {
continue
}
for _, metricValue := range metric.getValues(machineInfo) {
labelValues := make([]string, len(baseLabelsValues))
copy(labelValues, baseLabelsValues)
if len(metric.extraLabels) != 0 {
labelValues = append(labelValues, metricValue.labels...)
}
ch <- prometheus.MustNewConstMetric(metric.desc(baseLabelsNames),
metric.valueType, metricValue.value, labelValues...)
}
}
}
func getMemoryByType(machineInfo *info.MachineInfo, property string) metricValues {
mValues := make(metricValues, 0, len(machineInfo.MemoryByType))
for memoryType, memoryInfo := range machineInfo.MemoryByType {
propertyValue := 0.0
switch property {
case memoryByTypeDimmCapacityKey:
propertyValue = float64(memoryInfo.Capacity)
case memoryByTypeDimmCountKey:
propertyValue = float64(memoryInfo.DimmCount)
default:
klog.Warningf("Incorrect propery name for MemoryByType, property %s", property)
return metricValues{}
}
mValues = append(mValues, metricValue{value: propertyValue, labels: []string{memoryType}})
}
return mValues
}

View File

@ -0,0 +1,89 @@
// Copyright 2020 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"bytes"
"io/ioutil"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"github.com/stretchr/testify/assert"
)
const machineMetricsFile = "testdata/prometheus_machine_metrics"
const machineMetricsFailureFile = "testdata/prometheus_machine_metrics_failure"
func TestPrometheusMachineCollector(t *testing.T) {
collector := NewPrometheusMachineCollector(testSubcontainersInfoProvider{})
registry := prometheus.NewRegistry()
registry.MustRegister(collector)
metricsFamily, err := registry.Gather()
assert.Nil(t, err)
var metricBuffer bytes.Buffer
for _, metricFamily := range metricsFamily {
_, err := expfmt.MetricFamilyToText(&metricBuffer, metricFamily)
assert.Nil(t, err)
}
collectedMetrics := string(metricBuffer.Bytes())
expectedMetrics, err := ioutil.ReadFile(machineMetricsFile)
assert.Nil(t, err)
assert.Equal(t, string(expectedMetrics), collectedMetrics)
}
func TestPrometheusMachineCollectorWithFailure(t *testing.T) {
provider := &erroringSubcontainersInfoProvider{
successfulProvider: testSubcontainersInfoProvider{},
shouldFail: true,
}
collector := NewPrometheusMachineCollector(provider)
registry := prometheus.NewRegistry()
registry.MustRegister(collector)
metricsFamily, err := registry.Gather()
assert.Nil(t, err)
var metricBuffer bytes.Buffer
for _, metricFamily := range metricsFamily {
_, err := expfmt.MetricFamilyToText(&metricBuffer, metricFamily)
assert.Nil(t, err)
}
collectedMetrics := string(metricBuffer.Bytes())
expectedMetrics, err := ioutil.ReadFile(machineMetricsFailureFile)
assert.Nil(t, err)
assert.Equal(t, string(expectedMetrics), collectedMetrics)
}
func TestGetMemoryByType(t *testing.T) {
machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo()
assert.Nil(t, err)
capacityMetrics := getMemoryByType(machineInfo, memoryByTypeDimmCapacityKey)
assert.Equal(t, 2, len(capacityMetrics))
countMetrics := getMemoryByType(machineInfo, memoryByTypeDimmCountKey)
assert.Equal(t, 2, len(countMetrics))
}
func TestGetMemoryByTypeWithWrongProperty(t *testing.T) {
machineInfo, err := testSubcontainersInfoProvider{}.GetMachineInfo()
assert.Nil(t, err)
metricVals := getMemoryByType(machineInfo, "wrong_property_name")
assert.Equal(t, 0, len(metricVals))
}

View File

@ -15,14 +15,12 @@
package metrics
import (
"errors"
"io/ioutil"
"net/http"
"net/http/httptest"
"regexp"
"strings"
"testing"
"time"
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
@ -30,389 +28,6 @@ import (
"github.com/prometheus/client_golang/prometheus/promhttp"
)
type testSubcontainersInfoProvider struct{}
func (p testSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
return &info.VersionInfo{
KernelVersion: "4.1.6-200.fc22.x86_64",
ContainerOsVersion: "Fedora 22 (Twenty Two)",
DockerVersion: "1.8.1",
CadvisorVersion: "0.16.0",
CadvisorRevision: "abcdef",
}, nil
}
func (p testSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
return &info.MachineInfo{
NumCores: 4,
MemoryCapacity: 1024,
}, nil
}
func (p testSubcontainersInfoProvider) SubcontainersInfo(string, *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
return []*info.ContainerInfo{
{
ContainerReference: info.ContainerReference{
Name: "testcontainer",
Aliases: []string{"testcontaineralias"},
},
Spec: info.ContainerSpec{
Image: "test",
HasCpu: true,
Cpu: info.CpuSpec{
Limit: 1000,
Period: 100000,
Quota: 10000,
},
Memory: info.MemorySpec{
Limit: 2048,
Reservation: 1024,
SwapLimit: 4096,
},
HasHugetlb: true,
HasProcesses: true,
Processes: info.ProcessSpec{
Limit: 100,
},
CreationTime: time.Unix(1257894000, 0),
Labels: map[string]string{
"foo.label": "bar",
},
Envs: map[string]string{
"foo+env": "prod",
},
},
Stats: []*info.ContainerStats{
{
Timestamp: time.Unix(1395066363, 0),
Cpu: info.CpuStats{
Usage: info.CpuUsage{
Total: 1,
PerCpu: []uint64{2, 3, 4, 5},
User: 6,
System: 7,
},
CFS: info.CpuCFS{
Periods: 723,
ThrottledPeriods: 18,
ThrottledTime: 1724314000,
},
Schedstat: info.CpuSchedstat{
RunTime: 53643567,
RunqueueTime: 479424566378,
RunPeriods: 984285,
},
LoadAverage: 2,
},
Memory: info.MemoryStats{
Usage: 8,
MaxUsage: 8,
WorkingSet: 9,
ContainerData: info.MemoryStatsMemoryData{
Pgfault: 10,
Pgmajfault: 11,
},
HierarchicalData: info.MemoryStatsMemoryData{
Pgfault: 12,
Pgmajfault: 13,
},
Cache: 14,
RSS: 15,
MappedFile: 16,
Swap: 8192,
},
Hugetlb: map[string]info.HugetlbStats{
"2Mi": {
Usage: 4,
MaxUsage: 10,
Failcnt: 1,
},
"1Gi": {
Usage: 0,
MaxUsage: 0,
Failcnt: 0,
},
},
Network: info.NetworkStats{
InterfaceStats: info.InterfaceStats{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
Interfaces: []info.InterfaceStats{
{
Name: "eth0",
RxBytes: 14,
RxPackets: 15,
RxErrors: 16,
RxDropped: 17,
TxBytes: 18,
TxPackets: 19,
TxErrors: 20,
TxDropped: 21,
},
},
Tcp: info.TcpStat{
Established: 13,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
Tcp6: info.TcpStat{
Established: 11,
SynSent: 0,
SynRecv: 0,
FinWait1: 0,
FinWait2: 0,
TimeWait: 0,
Close: 0,
CloseWait: 0,
LastAck: 0,
Listen: 3,
Closing: 0,
},
TcpAdvanced: info.TcpAdvancedStat{
TCPFullUndo: 2361,
TCPMD5NotFound: 0,
TCPDSACKRecv: 83680,
TCPSackShifted: 2,
TCPSackShiftFallback: 298,
PFMemallocDrop: 0,
EstabResets: 37,
InSegs: 140370590,
TCPPureAcks: 24251339,
TCPDSACKOldSent: 15633,
IPReversePathFilter: 0,
TCPFastOpenPassiveFail: 0,
InCsumErrors: 0,
TCPRenoFailures: 43414,
TCPMemoryPressuresChrono: 0,
TCPDeferAcceptDrop: 0,
TW: 10436427,
TCPSpuriousRTOs: 0,
TCPDSACKIgnoredNoUndo: 71885,
RtoMax: 120000,
ActiveOpens: 11038621,
EmbryonicRsts: 0,
RcvPruned: 0,
TCPLossProbeRecovery: 401,
TCPHPHits: 56096478,
TCPPartialUndo: 3,
TCPAbortOnMemory: 0,
AttemptFails: 48997,
RetransSegs: 462961,
SyncookiesFailed: 0,
OfoPruned: 0,
TCPAbortOnLinger: 0,
TCPAbortFailed: 0,
TCPRenoReorder: 839,
TCPRcvCollapsed: 0,
TCPDSACKIgnoredOld: 0,
TCPReqQFullDrop: 0,
OutOfWindowIcmps: 0,
TWKilled: 0,
TCPLossProbes: 88648,
TCPRenoRecoveryFail: 394,
TCPFastOpenCookieReqd: 0,
TCPHPAcks: 21490641,
TCPSACKReneging: 0,
TCPTSReorder: 3,
TCPSlowStartRetrans: 290832,
MaxConn: -1,
SyncookiesRecv: 0,
TCPSackFailures: 60,
DelayedACKLocked: 90,
TCPDSACKOfoSent: 1,
TCPSynRetrans: 988,
TCPDSACKOfoRecv: 10,
TCPSACKDiscard: 0,
TCPMD5Unexpected: 0,
TCPSackMerged: 6,
RtoMin: 200,
CurrEstab: 22,
TCPTimeWaitOverflow: 0,
ListenOverflows: 0,
DelayedACKs: 503975,
TCPLossUndo: 61374,
TCPOrigDataSent: 130698387,
TCPBacklogDrop: 0,
TCPReqQFullDoCookies: 0,
TCPFastOpenPassive: 0,
PAWSActive: 0,
OutRsts: 91699,
TCPSackRecoveryFail: 2,
DelayedACKLost: 18843,
TCPAbortOnData: 8,
TCPMinTTLDrop: 0,
PruneCalled: 0,
TWRecycled: 0,
ListenDrops: 0,
TCPAbortOnTimeout: 0,
SyncookiesSent: 0,
TCPSACKReorder: 11,
TCPDSACKUndo: 33,
TCPMD5Failure: 0,
TCPLostRetransmit: 0,
TCPAbortOnClose: 7,
TCPFastOpenListenOverflow: 0,
OutSegs: 211580512,
InErrs: 31,
TCPTimeouts: 27422,
TCPLossFailures: 729,
TCPSackRecovery: 159,
RtoAlgorithm: 1,
PassiveOpens: 59,
LockDroppedIcmps: 0,
TCPRenoRecovery: 3519,
TCPFACKReorder: 0,
TCPFastRetrans: 11794,
TCPRetransFail: 0,
TCPMemoryPressures: 0,
TCPFastOpenActive: 0,
TCPFastOpenActiveFail: 0,
PAWSEstab: 0,
},
Udp: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
Udp6: info.UdpStat{
Listen: 0,
Dropped: 0,
RxQueued: 0,
TxQueued: 0,
},
},
Filesystem: []info.FsStats{
{
Device: "sda1",
InodesFree: 524288,
Inodes: 2097152,
Limit: 22,
Usage: 23,
ReadsCompleted: 24,
ReadsMerged: 25,
SectorsRead: 26,
ReadTime: 27,
WritesCompleted: 28,
WritesMerged: 39,
SectorsWritten: 40,
WriteTime: 41,
IoInProgress: 42,
IoTime: 43,
WeightedIoTime: 44,
},
{
Device: "sda2",
InodesFree: 262144,
Inodes: 2097152,
Limit: 37,
Usage: 38,
ReadsCompleted: 39,
ReadsMerged: 40,
SectorsRead: 41,
ReadTime: 42,
WritesCompleted: 43,
WritesMerged: 44,
SectorsWritten: 45,
WriteTime: 46,
IoInProgress: 47,
IoTime: 48,
WeightedIoTime: 49,
},
},
Accelerators: []info.AcceleratorStats{
{
Make: "nvidia",
Model: "tesla-p100",
ID: "GPU-deadbeef-1234-5678-90ab-feedfacecafe",
MemoryTotal: 20304050607,
MemoryUsed: 2030405060,
DutyCycle: 12,
},
{
Make: "nvidia",
Model: "tesla-k80",
ID: "GPU-deadbeef-0123-4567-89ab-feedfacecafe",
MemoryTotal: 10203040506,
MemoryUsed: 1020304050,
DutyCycle: 6,
},
},
Processes: info.ProcessStats{
ProcessCount: 1,
FdCount: 5,
SocketCount: 3,
ThreadsCurrent: 5,
ThreadsMax: 100,
Ulimits: []info.UlimitSpec{
{
Name: "max_open_files",
SoftLimit: 16384,
HardLimit: 16384,
},
},
},
TaskStats: info.LoadStats{
NrSleeping: 50,
NrRunning: 51,
NrStopped: 52,
NrUninterruptible: 53,
NrIoWait: 54,
},
CustomMetrics: map[string][]info.MetricVal{
"container_custom_app_metric_1": {
{
FloatValue: float64(1.1),
Timestamp: time.Now(),
Label: "testlabel_1_1_1",
Labels: map[string]string{"test_label": "1_1", "test_label_2": "2_1"},
},
{
FloatValue: float64(1.2),
Timestamp: time.Now(),
Label: "testlabel_1_1_2",
Labels: map[string]string{"test_label": "1_2", "test_label_2": "2_2"},
},
},
"container_custom_app_metric_2": {
{
FloatValue: float64(2),
Timestamp: time.Now(),
Label: "testlabel2",
Labels: map[string]string{"test_label": "test_value"},
},
},
"container_custom_app_metric_3": {
{
FloatValue: float64(3),
Timestamp: time.Now(),
Label: "testlabel3",
Labels: map[string]string{"test_label": "test_value"},
},
},
},
},
},
},
}, nil
}
var (
includeRe = regexp.MustCompile(`^(?:(?:# HELP |# TYPE )?container_|cadvisor_version_info\{)`)
ignoreRe = regexp.MustCompile(`^container_last_seen\{`)
@ -456,33 +71,6 @@ func testPrometheusCollector(t *testing.T, c *PrometheusCollector, metricsFile s
}
}
type erroringSubcontainersInfoProvider struct {
successfulProvider testSubcontainersInfoProvider
shouldFail bool
}
func (p *erroringSubcontainersInfoProvider) GetVersionInfo() (*info.VersionInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 1")
}
return p.successfulProvider.GetVersionInfo()
}
func (p *erroringSubcontainersInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
if p.shouldFail {
return nil, errors.New("Oops 2")
}
return p.successfulProvider.GetMachineInfo()
}
func (p *erroringSubcontainersInfoProvider) SubcontainersInfo(
a string, r *info.ContainerInfoRequest) ([]*info.ContainerInfo, error) {
if p.shouldFail {
return []*info.ContainerInfo{}, errors.New("Oops 3")
}
return p.successfulProvider.SubcontainersInfo(a, r)
}
func TestPrometheusCollector_scrapeFailure(t *testing.T) {
provider := &erroringSubcontainersInfoProvider{
successfulProvider: testSubcontainersInfoProvider{},

View File

@ -0,0 +1,27 @@
# HELP machine_cpu_cores Number of logical CPU cores.
# TYPE machine_cpu_cores gauge
machine_cpu_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 4
# HELP machine_cpu_physical_cores Number of physical CPU cores.
# TYPE machine_cpu_physical_cores gauge
machine_cpu_physical_cores{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1
# HELP machine_cpu_sockets Number of CPU sockets.
# TYPE machine_cpu_sockets gauge
machine_cpu_sockets{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1
# HELP machine_dimm_capacity_bytes Total RAM DIMM capacity (all types memory modules) value labeled by dimm type.
# TYPE machine_dimm_capacity_bytes gauge
machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 2.168421613568e+12
machine_dimm_capacity_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 4.12316860416e+11
# HELP machine_dimm_count Number of RAM DIMM (all types memory modules) value labeled by dimm type.
# TYPE machine_dimm_count gauge
machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Non-volatile-RAM"} 8
machine_dimm_count{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test",type="Unbuffered-DDR4"} 12
# HELP machine_memory_bytes Amount of memory installed on the machine.
# TYPE machine_memory_bytes gauge
machine_memory_bytes{boot_id="boot-id-test",machine_id="machine-id-test",system_uuid="system-uuid-test"} 1024
# HELP machine_nvm_capacity NVM capacity value labeled by NVM mode (memory mode or app direct mode).
# TYPE machine_nvm_capacity gauge
machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="app_direct_mode",system_uuid="system-uuid-test"} 1.735166787584e+12
machine_nvm_capacity{boot_id="boot-id-test",machine_id="machine-id-test",mode="memory_mode",system_uuid="system-uuid-test"} 4.294967296e+11
# HELP machine_scrape_error 1 if there was an error while getting machine metrics, 0 otherwise.
# TYPE machine_scrape_error gauge
machine_scrape_error 0

View File

@ -0,0 +1,3 @@
# HELP machine_scrape_error 1 if there was an error while getting machine metrics, 0 otherwise.
# TYPE machine_scrape_error gauge
machine_scrape_error 1

View File

@ -364,12 +364,6 @@ container_threads_max{container_env_foo_env="prod",container_label_foo_label="ba
# HELP container_ulimits_soft Soft ulimit values for the container root process. Unlimited if -1, except priority and nice
# TYPE container_ulimits_soft gauge
container_ulimits_soft{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",ulimit="max_open_files",zone_name="hello"} 16384 1395066363000
# HELP machine_cpu_cores Number of CPU cores on the machine.
# TYPE machine_cpu_cores gauge
machine_cpu_cores 4
# HELP machine_memory_bytes Amount of memory installed on the machine.
# TYPE machine_memory_bytes gauge
machine_memory_bytes 1024
# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds.
# TYPE process_cpu_seconds_total counter
process_cpu_seconds_total 0