Add stats to InfluxDB storage (#2593)

* Fix unit tests for InfluxDB; Add stats to InfluxDB storage
- memory stats
- hugetlb stats
- perf stats
- resctrl stats
- referenced memory

Signed-off-by: Katarzyna Kujawa <katarzyna.kujawa@intel.com>
This commit is contained in:
Katarzyna Kujawa 2020-06-23 18:56:33 +02:00 committed by GitHub
parent 0ac6b77bee
commit 3fcc88c533
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 266 additions and 20 deletions

View File

@ -58,8 +58,22 @@ const (
serLoadAverage string = "load_average"
// Memory Usage
serMemoryUsage string = "memory_usage"
// Maximum memory usage recorded
serMemoryMaxUsage string = "memory_max_usage"
// //Number of bytes of page cache memory
serMemoryCache string = "memory_cache"
// Size of RSS
serMemoryRss string = "memory_rss"
// Container swap usage
serMemorySwap string = "memory_swap"
// Size of memory mapped files in bytes
serMemoryMappedFile string = "memory_mapped_file"
// Working set size
serMemoryWorkingSet string = "memory_working_set"
// Number of memory usage hits limits
serMemoryFailcnt string = "memory_failcnt"
// Cumulative count of memory allocation failures
serMemoryFailure string = "memory_failure"
// Cumulative count of bytes received.
serRxBytes string = "rx_bytes"
// Cumulative count of receive errors encountered.
@ -72,6 +86,22 @@ const (
serFsLimit string = "fs_limit"
// Filesystem usage.
serFsUsage string = "fs_usage"
// Hugetlb stat - current res_counter usage for hugetlb
setHugetlbUsage = "hugetlb_usage"
// Hugetlb stat - maximum usage ever recorded
setHugetlbMaxUsage = "hugetlb_max_usage"
// Hugetlb stat - number of times hugetlb usage allocation failure
setHugetlbFailcnt = "hugetlb_failcnt"
// Perf statistics
serPerfStat = "perf_stat"
// Referenced memory
serReferencedMemory = "referenced_memory"
// Resctrl - Total memory bandwidth
serResctrlMemoryBandwidthTotal = "resctrl_memory_bandwidth_total"
// Resctrl - Local memory bandwidth
serResctrlMemoryBandwidthLocal = "resctrl_memory_bandwidth_local"
// Resctrl - Last level cache usage
serResctrlLLCOccupancy = "resctrl_llc_occupancy"
)
func new() (storage.StorageDriver, error) {
@ -194,18 +224,152 @@ func (s *influxdbStorage) containerStatsToPoints(
// Load Average
points = append(points, makePoint(serLoadAverage, stats.Cpu.LoadAverage))
// Memory Usage
points = append(points, makePoint(serMemoryUsage, stats.Memory.Usage))
// Working Set Size
points = append(points, makePoint(serMemoryWorkingSet, stats.Memory.WorkingSet))
// Network Stats
points = append(points, makePoint(serRxBytes, stats.Network.RxBytes))
points = append(points, makePoint(serRxErrors, stats.Network.RxErrors))
points = append(points, makePoint(serTxBytes, stats.Network.TxBytes))
points = append(points, makePoint(serTxErrors, stats.Network.TxErrors))
// Referenced Memory
points = append(points, makePoint(serReferencedMemory, stats.ReferencedMemory))
s.tagPoints(cInfo, stats, points)
return points
}
func (s *influxdbStorage) memoryStatsToPoints(
cInfo *info.ContainerInfo,
stats *info.ContainerStats,
) (points []*influxdb.Point) {
// Memory Usage
points = append(points, makePoint(serMemoryUsage, stats.Memory.Usage))
// Maximum memory usage recorded
points = append(points, makePoint(serMemoryMaxUsage, stats.Memory.MaxUsage))
//Number of bytes of page cache memory
points = append(points, makePoint(serMemoryCache, stats.Memory.Cache))
// Size of RSS
points = append(points, makePoint(serMemoryRss, stats.Memory.RSS))
// Container swap usage
points = append(points, makePoint(serMemorySwap, stats.Memory.Swap))
// Size of memory mapped files in bytes
points = append(points, makePoint(serMemoryMappedFile, stats.Memory.MappedFile))
// Working Set Size
points = append(points, makePoint(serMemoryWorkingSet, stats.Memory.WorkingSet))
// Number of memory usage hits limits
points = append(points, makePoint(serMemoryFailcnt, stats.Memory.Failcnt))
// Cumulative count of memory allocation failures
memoryFailuresTags := map[string]string{
"failure_type": "pgfault",
"scope": "container",
}
memoryFailurePoint := makePoint(serMemoryFailure, stats.Memory.ContainerData.Pgfault)
addTagsToPoint(memoryFailurePoint, memoryFailuresTags)
points = append(points, memoryFailurePoint)
memoryFailuresTags["failure_type"] = "pgmajfault"
memoryFailurePoint = makePoint(serMemoryFailure, stats.Memory.ContainerData.Pgmajfault)
addTagsToPoint(memoryFailurePoint, memoryFailuresTags)
points = append(points, memoryFailurePoint)
memoryFailuresTags["failure_type"] = "pgfault"
memoryFailuresTags["scope"] = "hierarchical"
memoryFailurePoint = makePoint(serMemoryFailure, stats.Memory.HierarchicalData.Pgfault)
addTagsToPoint(memoryFailurePoint, memoryFailuresTags)
points = append(points, memoryFailurePoint)
memoryFailuresTags["failure_type"] = "pgmajfault"
memoryFailurePoint = makePoint(serMemoryFailure, stats.Memory.HierarchicalData.Pgmajfault)
addTagsToPoint(memoryFailurePoint, memoryFailuresTags)
points = append(points, memoryFailurePoint)
s.tagPoints(cInfo, stats, points)
return points
}
func (s *influxdbStorage) hugetlbStatsToPoints(
cInfo *info.ContainerInfo,
stats *info.ContainerStats,
) (points []*influxdb.Point) {
for pageSize, hugetlbStat := range stats.Hugetlb {
tags := map[string]string{
"page_size": pageSize,
}
// Hugepage usage
point := makePoint(setHugetlbUsage, hugetlbStat.Usage)
addTagsToPoint(point, tags)
points = append(points, point)
//Maximum hugepage usage recorded
point = makePoint(setHugetlbMaxUsage, hugetlbStat.MaxUsage)
addTagsToPoint(point, tags)
points = append(points, point)
// Number of hugepage usage hits limits
point = makePoint(setHugetlbFailcnt, hugetlbStat.Failcnt)
addTagsToPoint(point, tags)
points = append(points, point)
}
s.tagPoints(cInfo, stats, points)
return points
}
func (s *influxdbStorage) perfStatsToPoints(
cInfo *info.ContainerInfo,
stats *info.ContainerStats,
) (points []*influxdb.Point) {
for _, perfStat := range stats.PerfStats {
point := makePoint(serPerfStat, perfStat.Value)
tags := map[string]string{
"cpu": fmt.Sprintf("%v", perfStat.Cpu),
"name": perfStat.Name,
"scaling_ratio": fmt.Sprintf("%v", perfStat.ScalingRatio),
}
addTagsToPoint(point, tags)
points = append(points, point)
}
s.tagPoints(cInfo, stats, points)
return points
}
func (s *influxdbStorage) resctrlStatsToPoints(
cInfo *info.ContainerInfo,
stats *info.ContainerStats,
) (points []*influxdb.Point) {
// Memory bandwidth
for nodeID, rdtMemoryBandwidth := range stats.Resctrl.MemoryBandwidth {
tags := map[string]string{
"node_id": fmt.Sprintf("%v", nodeID),
}
point := makePoint(serResctrlMemoryBandwidthTotal, rdtMemoryBandwidth.TotalBytes)
addTagsToPoint(point, tags)
points = append(points, point)
point = makePoint(serResctrlMemoryBandwidthLocal, rdtMemoryBandwidth.LocalBytes)
addTagsToPoint(point, tags)
points = append(points, point)
}
// Cache
for nodeID, rdtCache := range stats.Resctrl.Cache {
tags := map[string]string{
"node_id": fmt.Sprintf("%v", nodeID),
}
point := makePoint(serResctrlLLCOccupancy, rdtCache.LLCOccupancy)
addTagsToPoint(point, tags)
points = append(points, point)
}
s.tagPoints(cInfo, stats, points)
return points
@ -230,6 +394,10 @@ func (s *influxdbStorage) AddStats(cInfo *info.ContainerInfo, stats *info.Contai
defer s.lock.Unlock()
s.points = append(s.points, s.containerStatsToPoints(cInfo, stats)...)
s.points = append(s.points, s.memoryStatsToPoints(cInfo, stats)...)
s.points = append(s.points, s.hugetlbStatsToPoints(cInfo, stats)...)
s.points = append(s.points, s.perfStatsToPoints(cInfo, stats)...)
s.points = append(s.points, s.resctrlStatsToPoints(cInfo, stats)...)
s.points = append(s.points, s.containerFilesystemStatsToPoints(cInfo, stats)...)
if s.readyToFlush() {
pointsToFlush = s.points

View File

@ -199,11 +199,14 @@ func TestContainerFileSystemStatsToPoints(t *testing.T) {
false, 2*time.Minute)
assert.Nil(err)
ref := info.ContainerReference{
Name: "containerName",
cInfo := &info.ContainerInfo{
ContainerReference: info.ContainerReference{
Name: "containerName",
},
}
stats := &info.ContainerStats{}
points := storage.containerFilesystemStatsToPoints(ref, stats)
points := storage.containerFilesystemStatsToPoints(cInfo, stats)
// stats.Filesystem is always nil, not sure why
assert.Nil(points)
@ -215,30 +218,75 @@ func TestContainerStatsToPoints(t *testing.T) {
require.Nil(t, err)
require.NotNil(t, storage)
ref, stats := createTestStats()
cInfo, stats := createTestStats()
require.Nil(t, err)
require.NotNil(t, stats)
// When
points := storage.containerStatsToPoints(*ref, stats)
points := storage.containerStatsToPoints(cInfo, stats)
points = append(points, storage.memoryStatsToPoints(cInfo, stats)...)
points = append(points, storage.hugetlbStatsToPoints(cInfo, stats)...)
points = append(points, storage.perfStatsToPoints(cInfo, stats)...)
points = append(points, storage.resctrlStatsToPoints(cInfo, stats)...)
// Then
assert.NotEmpty(t, points)
assert.Len(t, points, 10+len(stats.Cpu.Usage.PerCpu))
assert.Len(t, points, 34+len(stats.Cpu.Usage.PerCpu))
// CPU stats
assertContainsPointWithValue(t, points, serCpuUsageTotal, stats.Cpu.Usage.Total)
assertContainsPointWithValue(t, points, serCpuUsageSystem, stats.Cpu.Usage.System)
assertContainsPointWithValue(t, points, serCpuUsageUser, stats.Cpu.Usage.User)
assertContainsPointWithValue(t, points, serMemoryUsage, stats.Memory.Usage)
assertContainsPointWithValue(t, points, serLoadAverage, stats.Cpu.LoadAverage)
for _, cpu_usage := range stats.Cpu.Usage.PerCpu {
assertContainsPointWithValue(t, points, serCpuUsagePerCpu, cpu_usage)
}
// Memory stats
assertContainsPointWithValue(t, points, serMemoryUsage, stats.Memory.Usage)
assertContainsPointWithValue(t, points, serMemoryMaxUsage, stats.Memory.MaxUsage)
assertContainsPointWithValue(t, points, serMemoryCache, stats.Memory.Cache)
assertContainsPointWithValue(t, points, serMemoryRss, stats.Memory.RSS)
assertContainsPointWithValue(t, points, serMemorySwap, stats.Memory.Swap)
assertContainsPointWithValue(t, points, serMemoryMappedFile, stats.Memory.MappedFile)
assertContainsPointWithValue(t, points, serMemoryUsage, stats.Memory.Usage)
assertContainsPointWithValue(t, points, serMemoryWorkingSet, stats.Memory.WorkingSet)
assertContainsPointWithValue(t, points, serMemoryFailcnt, stats.Memory.Failcnt)
assertContainsPointWithValue(t, points, serMemoryFailure, stats.Memory.ContainerData.Pgfault)
assertContainsPointWithValue(t, points, serMemoryFailure, stats.Memory.ContainerData.Pgmajfault)
assertContainsPointWithValue(t, points, serMemoryFailure, stats.Memory.HierarchicalData.Pgfault)
assertContainsPointWithValue(t, points, serMemoryFailure, stats.Memory.HierarchicalData.Pgmajfault)
// Hugetlb stats
for _, hugetlbStat := range stats.Hugetlb {
assertContainsPointWithValue(t, points, setHugetlbUsage, hugetlbStat.Usage)
assertContainsPointWithValue(t, points, setHugetlbMaxUsage, hugetlbStat.MaxUsage)
assertContainsPointWithValue(t, points, setHugetlbFailcnt, hugetlbStat.Failcnt)
}
// Network stats
assertContainsPointWithValue(t, points, serRxBytes, stats.Network.RxBytes)
assertContainsPointWithValue(t, points, serRxErrors, stats.Network.RxErrors)
assertContainsPointWithValue(t, points, serTxBytes, stats.Network.TxBytes)
assertContainsPointWithValue(t, points, serTxBytes, stats.Network.TxErrors)
for _, cpu_usage := range stats.Cpu.Usage.PerCpu {
assertContainsPointWithValue(t, points, serCpuUsagePerCpu, cpu_usage)
// Perf stats
for _, perfStat := range stats.PerfStats {
assertContainsPointWithValue(t, points, serPerfStat, perfStat.Value)
}
// Reference memory
assertContainsPointWithValue(t, points, serReferencedMemory, stats.ReferencedMemory)
// Resource Control stats - memory bandwidth
for _, rdtMemoryBandwidth := range stats.Resctrl.MemoryBandwidth {
assertContainsPointWithValue(t, points, serResctrlMemoryBandwidthTotal, rdtMemoryBandwidth.TotalBytes)
assertContainsPointWithValue(t, points, serResctrlMemoryBandwidthLocal, rdtMemoryBandwidth.LocalBytes)
}
// Resource Control stats - cache
for _, rdtCache := range stats.Resctrl.Cache {
assertContainsPointWithValue(t, points, serResctrlLLCOccupancy, rdtCache.LLCOccupancy)
}
}
@ -274,10 +322,12 @@ func createTestStorage() (*influxdbStorage, error) {
return storage, err
}
func createTestStats() (*info.ContainerReference, *info.ContainerStats) {
ref := &info.ContainerReference{
Name: "testContainername",
Aliases: []string{"testContainerAlias1", "testContainerAlias2"},
func createTestStats() (*info.ContainerInfo, *info.ContainerStats) {
cInfo := &info.ContainerInfo{
ContainerReference: info.ContainerReference{
Name: "testContainername",
Aliases: []string{"testContainerAlias1", "testContainerAlias2"},
},
}
cpuUsage := info.CpuUsage{
@ -293,6 +343,34 @@ func createTestStats() (*info.ContainerReference, *info.ContainerStats) {
Usage: cpuUsage,
LoadAverage: int32(rand.Intn(1000)),
},
Memory: info.MemoryStats{
Usage: 26767396864,
MaxUsage: 30429605888,
Cache: 7837376512,
RSS: 18930020352,
Swap: 1024,
MappedFile: 1025327104,
WorkingSet: 23630012416,
Failcnt: 1,
ContainerData: info.MemoryStatsMemoryData{Pgfault: 100328455, Pgmajfault: 97},
HierarchicalData: info.MemoryStatsMemoryData{Pgfault: 100328454, Pgmajfault: 96},
},
Hugetlb: map[string]info.HugetlbStats{
"1GB": {Usage: 1234, MaxUsage: 5678, Failcnt: 9},
"2GB": {Usage: 9876, MaxUsage: 5432, Failcnt: 1},
},
ReferencedMemory: 12345,
PerfStats: []info.PerfStat{{Cpu: 1, Name: "cycles", ScalingRatio: 1.5, Value: 4589}},
Resctrl: info.ResctrlStats{
MemoryBandwidth: []info.MemoryBandwidthStats{
{TotalBytes: 11234, LocalBytes: 4567},
{TotalBytes: 55678, LocalBytes: 9876},
},
Cache: []info.CacheStats{
{LLCOccupancy: 3},
{LLCOccupancy: 5},
},
},
}
return ref, stats
return cInfo, stats
}