Report container FS metrics into prometheus /metrics

PerDiskStats reported from cgroups were not being surfaced into
prometheus. In order to properly correlate the metrics, we need to
assign a device label to each metric (which is the FS or device path).
Since blkio cgroup tracks devices, we create a synthetic device
`/dev/NAME` for the metric.

Assign a Device label to each PerDiskStat for the handlers up front, and
then surface the PerDiskStat values into the prometheus metrics. Report
two new metrics - total bytes read and total bytes written.
This commit is contained in:
Clayton Coleman 2017-04-19 01:08:59 -04:00
parent 0073fcb9c7
commit 4e25a7951f
No known key found for this signature in database
GPG Key ID: 3D16906B4F1C5CB3
8 changed files with 230 additions and 43 deletions

View File

@ -223,3 +223,71 @@ func ListContainers(name string, cgroupPaths map[string]string, listType contain
return ret, nil return ret, nil
} }
// AssignDeviceNamesToDiskStats assigns the Device field on the provided DiskIoStats by looking up
// the device major and minor identifiers in the provided device namer.
func AssignDeviceNamesToDiskStats(namer DeviceNamer, stats *info.DiskIoStats) {
assignDeviceNamesToPerDiskStats(
namer,
stats.IoMerged,
stats.IoQueued,
stats.IoServiceBytes,
stats.IoServiceTime,
stats.IoServiced,
stats.IoTime,
stats.IoWaitTime,
stats.Sectors,
)
}
// assignDeviceNamesToPerDiskStats looks up device names for the provided stats, caching names
// if necessary.
func assignDeviceNamesToPerDiskStats(namer DeviceNamer, diskStats ...[]info.PerDiskStats) {
devices := make(deviceIdentifierMap)
for _, stats := range diskStats {
for i, stat := range stats {
stats[i].Device = devices.Find(stat.Major, stat.Minor, namer)
}
}
}
// DeviceNamer returns string names for devices by their major and minor id.
type DeviceNamer interface {
// DeviceName returns the name of the device by its major and minor ids, or false if no
// such device is recognized.
DeviceName(major, minor uint64) (string, bool)
}
type MachineInfoNamer info.MachineInfo
func (n *MachineInfoNamer) DeviceName(major, minor uint64) (string, bool) {
for _, info := range n.DiskMap {
if info.Major == major && info.Minor == minor {
return "/dev/" + info.Name, true
}
}
for _, info := range n.Filesystems {
if info.DeviceMajor == major && info.DeviceMinor == minor {
return info.Device, true
}
}
return "", false
}
type deviceIdentifier struct {
major uint64
minor uint64
}
type deviceIdentifierMap map[deviceIdentifier]string
// Find locates the device name by device identifier out of from, caching the result as necessary.
func (m deviceIdentifierMap) Find(major, minor uint64, namer DeviceNamer) string {
d := deviceIdentifier{major, minor}
if s, ok := m[d]; ok {
return s
}
s, _ := namer.DeviceName(major, minor)
m[d] = s
return s
}

View File

@ -390,6 +390,15 @@ func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
} }
func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error { func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := self.machineInfoFactory.GetMachineInfo()
if err != nil {
return err
}
if !self.ignoreMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
}
if self.ignoreMetrics.Has(container.DiskUsageMetrics) { if self.ignoreMetrics.Has(container.DiskUsageMetrics) {
return nil return nil
} }
@ -411,11 +420,6 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
return nil return nil
} }
mi, err := self.machineInfoFactory.GetMachineInfo()
if err != nil {
return err
}
var ( var (
limit uint64 limit uint64
fsType string fsType string

View File

@ -197,6 +197,7 @@ func fsToFsStats(fs *fs.Fs) info.FsStats {
} }
func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error { func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
var allFs []fs.Fs
// Get Filesystem information only for the root cgroup. // Get Filesystem information only for the root cgroup.
if isRootCgroup(self.name) { if isRootCgroup(self.name) {
filesystems, err := self.fsInfo.GetGlobalFsInfo() filesystems, err := self.fsInfo.GetGlobalFsInfo()
@ -207,6 +208,7 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
fs := filesystems[i] fs := filesystems[i]
stats.Filesystem = append(stats.Filesystem, fsToFsStats(&fs)) stats.Filesystem = append(stats.Filesystem, fsToFsStats(&fs))
} }
allFs = filesystems
} else if len(self.externalMounts) > 0 { } else if len(self.externalMounts) > 0 {
var mountSet map[string]struct{} var mountSet map[string]struct{}
mountSet = make(map[string]struct{}) mountSet = make(map[string]struct{})
@ -221,7 +223,10 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
fs := filesystems[i] fs := filesystems[i]
stats.Filesystem = append(stats.Filesystem, fsToFsStats(&fs)) stats.Filesystem = append(stats.Filesystem, fsToFsStats(&fs))
} }
allFs = filesystems
} }
common.AssignDeviceNamesToDiskStats(&fsNamer{fs: allFs, factory: self.machineInfoFactory}, &stats.DiskIo)
return nil return nil
} }
@ -272,3 +277,25 @@ func (self *rawContainerHandler) Exists() bool {
func (self *rawContainerHandler) Type() container.ContainerType { func (self *rawContainerHandler) Type() container.ContainerType {
return container.ContainerTypeRaw return container.ContainerTypeRaw
} }
type fsNamer struct {
fs []fs.Fs
factory info.MachineInfoFactory
info common.DeviceNamer
}
func (n *fsNamer) DeviceName(major, minor uint64) (string, bool) {
for _, info := range n.fs {
if uint64(info.Major) == major && uint64(info.Minor) == minor {
return info.Device, true
}
}
if n.info == nil {
mi, err := n.factory.GetMachineInfo()
if err != nil {
return "", false
}
n.info = (*common.MachineInfoNamer)(mi)
}
return n.info.DeviceName(major, minor)
}

View File

@ -202,6 +202,15 @@ func (handler *rktContainerHandler) GetSpec() (info.ContainerSpec, error) {
} }
func (handler *rktContainerHandler) getFsStats(stats *info.ContainerStats) error { func (handler *rktContainerHandler) getFsStats(stats *info.ContainerStats) error {
mi, err := handler.machineInfoFactory.GetMachineInfo()
if err != nil {
return err
}
if !handler.ignoreMetrics.Has(container.DiskIOMetrics) {
common.AssignDeviceNamesToDiskStats((*common.MachineInfoNamer)(mi), &stats.DiskIo)
}
if handler.ignoreMetrics.Has(container.DiskUsageMetrics) { if handler.ignoreMetrics.Has(container.DiskUsageMetrics) {
return nil return nil
} }
@ -211,10 +220,6 @@ func (handler *rktContainerHandler) getFsStats(stats *info.ContainerStats) error
return err return err
} }
mi, err := handler.machineInfoFactory.GetMachineInfo()
if err != nil {
return err
}
var limit uint64 = 0 var limit uint64 = 0
// Use capacity as limit. // Use capacity as limit.

View File

@ -307,6 +307,7 @@ type CpuStats struct {
} }
type PerDiskStats struct { type PerDiskStats struct {
Device string `json:"-"`
Major uint64 `json:"major"` Major uint64 `json:"major"`
Minor uint64 `json:"minor"` Minor uint64 `json:"minor"`
Stats map[string]uint64 `json:"stats"` Stats map[string]uint64 `json:"stats"`

View File

@ -17,6 +17,10 @@ package v1
type FsInfo struct { type FsInfo struct {
// Block device associated with the filesystem. // Block device associated with the filesystem.
Device string `json:"device"` Device string `json:"device"`
// DeviceMajor is the major identifier of the device, used for correlation with blkio stats
DeviceMajor uint64 `json:"-"`
// DeviceMinor is the minor identifier of the device, used for correlation with blkio stats
DeviceMinor uint64 `json:"-"`
// Total number of bytes available on the filesystem. // Total number of bytes available on the filesystem.
Capacity uint64 `json:"capacity"` Capacity uint64 `json:"capacity"`

View File

@ -116,7 +116,7 @@ func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.Mach
if fs.Inodes != nil { if fs.Inodes != nil {
inodes = *fs.Inodes inodes = *fs.Inodes
} }
machineInfo.Filesystems = append(machineInfo.Filesystems, info.FsInfo{Device: fs.Device, Type: fs.Type.String(), Capacity: fs.Capacity, Inodes: inodes, HasInodes: fs.Inodes != nil}) machineInfo.Filesystems = append(machineInfo.Filesystems, info.FsInfo{Device: fs.Device, DeviceMajor: uint64(fs.Major), DeviceMinor: uint64(fs.Minor), Type: fs.Type.String(), Capacity: fs.Capacity, Inodes: inodes, HasInodes: fs.Inodes != nil})
} }
return machineInfo, nil return machineInfo, nil

View File

@ -45,6 +45,14 @@ type metricValue struct {
type metricValues []metricValue type metricValues []metricValue
// asFloat64 converts a uint64 into a float64.
func asFloat64(v uint64) float64 { return float64(v) }
// asNanosecondsToSeconds converts nanoseconds into a float64 representing seconds.
func asNanosecondsToSeconds(v uint64) float64 {
return float64(v) / float64(time.Second)
}
// fsValues is a helper method for assembling per-filesystem stats. // fsValues is a helper method for assembling per-filesystem stats.
func fsValues(fsStats []info.FsStats, valueFn func(*info.FsStats) float64) metricValues { func fsValues(fsStats []info.FsStats, valueFn func(*info.FsStats) float64) metricValues {
values := make(metricValues, 0, len(fsStats)) values := make(metricValues, 0, len(fsStats))
@ -57,6 +65,24 @@ func fsValues(fsStats []info.FsStats, valueFn func(*info.FsStats) float64) metri
return values return values
} }
// ioValues is a helper method for assembling per-disk and per-filesystem stats.
func ioValues(ioStats []info.PerDiskStats, ioType string, ioValueFn func(uint64) float64, fsStats []info.FsStats, valueFn func(*info.FsStats) float64) metricValues {
values := make(metricValues, 0, len(ioStats)+len(fsStats))
for _, stat := range ioStats {
values = append(values, metricValue{
value: ioValueFn(stat.Stats[ioType]),
labels: []string{stat.Device},
})
}
for _, stat := range fsStats {
values = append(values, metricValue{
value: valueFn(&stat),
labels: []string{stat.Device},
})
}
return values
}
// containerMetric describes a multi-dimensional metric used for exposing a // containerMetric describes a multi-dimensional metric used for exposing a
// certain type of container statistic. // certain type of container statistic.
type containerMetric struct { type containerMetric struct {
@ -270,15 +296,29 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
return float64(fs.Usage) return float64(fs.Usage)
}) })
}, },
}, {
name: "container_fs_reads_bytes_total",
help: "Cumulative count of bytes read",
valueType: prometheus.CounterValue,
extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues {
return ioValues(
s.DiskIo.IoServiceBytes, "Read", asFloat64,
nil, nil,
)
},
}, { }, {
name: "container_fs_reads_total", name: "container_fs_reads_total",
help: "Cumulative count of reads completed", help: "Cumulative count of reads completed",
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoServiced, "Read", asFloat64,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.ReadsCompleted) return float64(fs.ReadsCompleted)
}) },
)
}, },
}, { }, {
name: "container_fs_sector_reads_total", name: "container_fs_sector_reads_total",
@ -286,9 +326,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.Sectors, "Read", asFloat64,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.SectorsRead) return float64(fs.SectorsRead)
}) },
)
}, },
}, { }, {
name: "container_fs_reads_merged_total", name: "container_fs_reads_merged_total",
@ -296,9 +339,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoMerged, "Read", asFloat64,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.ReadsMerged) return float64(fs.ReadsMerged)
}) },
)
}, },
}, { }, {
name: "container_fs_read_seconds_total", name: "container_fs_read_seconds_total",
@ -306,9 +352,23 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoServiceTime, "Read", asNanosecondsToSeconds,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.ReadTime) / float64(time.Second) return float64(fs.ReadTime) / float64(time.Second)
}) },
)
},
}, {
name: "container_fs_writes_bytes_total",
help: "Cumulative count of bytes written",
valueType: prometheus.CounterValue,
extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues {
return ioValues(
s.DiskIo.IoServiceBytes, "Write", asFloat64,
nil, nil,
)
}, },
}, { }, {
name: "container_fs_writes_total", name: "container_fs_writes_total",
@ -316,9 +376,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoServiced, "Write", asFloat64,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.WritesCompleted) return float64(fs.WritesCompleted)
}) },
)
}, },
}, { }, {
name: "container_fs_sector_writes_total", name: "container_fs_sector_writes_total",
@ -326,9 +389,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.Sectors, "Write", asFloat64,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.SectorsWritten) return float64(fs.SectorsWritten)
}) },
)
}, },
}, { }, {
name: "container_fs_writes_merged_total", name: "container_fs_writes_merged_total",
@ -336,9 +402,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoMerged, "Write", asFloat64,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.WritesMerged) return float64(fs.WritesMerged)
}) },
)
}, },
}, { }, {
name: "container_fs_write_seconds_total", name: "container_fs_write_seconds_total",
@ -346,9 +415,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoServiceTime, "Write", asNanosecondsToSeconds,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.WriteTime) / float64(time.Second) return float64(fs.WriteTime) / float64(time.Second)
}) },
)
}, },
}, { }, {
name: "container_fs_io_current", name: "container_fs_io_current",
@ -356,9 +428,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.GaugeValue, valueType: prometheus.GaugeValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoQueued, "Total", asFloat64,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(fs.IoInProgress) return float64(fs.IoInProgress)
}) },
)
}, },
}, { }, {
name: "container_fs_io_time_seconds_total", name: "container_fs_io_time_seconds_total",
@ -366,9 +441,12 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc) *PrometheusCo
valueType: prometheus.CounterValue, valueType: prometheus.CounterValue,
extraLabels: []string{"device"}, extraLabels: []string{"device"},
getValues: func(s *info.ContainerStats) metricValues { getValues: func(s *info.ContainerStats) metricValues {
return fsValues(s.Filesystem, func(fs *info.FsStats) float64 { return ioValues(
s.DiskIo.IoServiceTime, "Total", asNanosecondsToSeconds,
s.Filesystem, func(fs *info.FsStats) float64 {
return float64(float64(fs.IoTime) / float64(time.Second)) return float64(float64(fs.IoTime) / float64(time.Second))
}) },
)
}, },
}, { }, {
name: "container_fs_io_time_weighted_seconds_total", name: "container_fs_io_time_weighted_seconds_total",