From d2e11efba250b49fa4699d274783343b6131ac1e Mon Sep 17 00:00:00 2001 From: Euan Kemp Date: Thu, 3 Aug 2017 15:33:51 -0700 Subject: [PATCH] libcontainer: use real number of CPUs for usage As of the 4.7 kernel, the cpustats field returned from libcontainer contains values for every possible cpu (including nonexistent ones). The extra values are all 0s. If we assume that hotplug events won't happen, we can get a more accurage cpu count by using runtime.NumCPU and then ignoring any values beyond that. --- container/libcontainer/helpers.go | 51 ++++++++++++++++++++++++-- container/libcontainer/helpers_test.go | 48 ++++++++++++++++++++++++ 2 files changed, 96 insertions(+), 3 deletions(-) diff --git a/container/libcontainer/helpers.go b/container/libcontainer/helpers.go index da25bf07..b51477ee 100644 --- a/container/libcontainer/helpers.go +++ b/container/libcontainer/helpers.go @@ -33,6 +33,11 @@ import ( "github.com/opencontainers/runc/libcontainer/cgroups" ) +/* +#include +*/ +import "C" + type CgroupSubsystems struct { // Cgroup subsystem mounts. // e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"] @@ -433,15 +438,40 @@ func DiskStatsCopy(blkio_stats []cgroups.BlkioStatEntry) (stat []info.PerDiskSta return DiskStatsCopy1(disk_stat) } +func minUint32(x, y uint32) uint32 { + if x < y { + return x + } + return y +} + +// var to allow unit tests to stub it out +var numCpusFunc = getNumberOnlineCPUs + // Convert libcontainer stats to info.ContainerStats. func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats) { ret.Cpu.Usage.User = s.CpuStats.CpuUsage.UsageInUsermode ret.Cpu.Usage.System = s.CpuStats.CpuUsage.UsageInKernelmode - n := len(s.CpuStats.CpuUsage.PercpuUsage) - ret.Cpu.Usage.PerCpu = make([]uint64, n) + numPossible := uint32(len(s.CpuStats.CpuUsage.PercpuUsage)) + // Note that as of https://patchwork.kernel.org/patch/8607101/ (kernel v4.7), + // the percpu usage information includes extra zero values for all additional + // possible CPUs. This is to allow statistic collection after CPU-hotplug. + // We intentionally ignore these extra zeroes. + numActual, err := numCpusFunc() + if err != nil { + glog.Errorf("unable to determine number of actual cpus; defaulting to maximum possible number: errno %v", err) + numActual = numPossible + } + if numActual > numPossible { + // The real number of cores should never be greater than the number of + // datapoints reported in cpu usage. + glog.Errorf("PercpuUsage had %v cpus, but the actual number is %v; ignoring extra CPUs", numPossible, numActual) + } + numActual = minUint32(numPossible, numActual) + ret.Cpu.Usage.PerCpu = make([]uint64, numActual) ret.Cpu.Usage.Total = 0 - for i := 0; i < n; i++ { + for i := uint32(0); i < numActual; i++ { ret.Cpu.Usage.PerCpu[i] = s.CpuStats.CpuUsage.PercpuUsage[i] ret.Cpu.Usage.Total += s.CpuStats.CpuUsage.PercpuUsage[i] } @@ -451,6 +481,21 @@ func setCpuStats(s *cgroups.Stats, ret *info.ContainerStats) { ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime } +// Copied from +// https://github.com/moby/moby/blob/8b1adf55c2af329a4334f21d9444d6a169000c81/daemon/stats/collector_unix.go#L73 +// Apache 2.0, Copyright Docker, Inc. +func getNumberOnlineCPUs() (uint32, error) { + i, err := C.sysconf(C._SC_NPROCESSORS_ONLN) + // According to POSIX - errno is undefined after successful + // sysconf, and can be non-zero in several cases, so look for + // error in returned value not in errno. + // (https://sourceware.org/bugzilla/show_bug.cgi?id=21536) + if i == -1 { + return 0, err + } + return uint32(i), nil +} + func setDiskIoStats(s *cgroups.Stats, ret *info.ContainerStats) { ret.DiskIo.IoServiceBytes = DiskStatsCopy(s.BlkioStats.IoServiceBytesRecursive) ret.DiskIo.IoServiced = DiskStatsCopy(s.BlkioStats.IoServicedRecursive) diff --git a/container/libcontainer/helpers_test.go b/container/libcontainer/helpers_test.go index c491e287..662fd89c 100644 --- a/container/libcontainer/helpers_test.go +++ b/container/libcontainer/helpers_test.go @@ -19,6 +19,8 @@ import ( "testing" info "github.com/google/cadvisor/info/v1" + "github.com/opencontainers/runc/libcontainer/cgroups" + "github.com/opencontainers/runc/libcontainer/system" ) func TestScanInterfaceStats(t *testing.T) { @@ -86,3 +88,49 @@ func TestScanUDPStats(t *testing.T) { t.Errorf("Expected %#v, got %#v", udpstats, stats) } } + +// https://github.com/docker/libcontainer/blob/v2.2.1/cgroups/fs/cpuacct.go#L19 +const nanosecondsInSeconds = 1000000000 + +var clockTicks = uint64(system.GetClockTicks()) + +func TestMorePossibleCPUs(t *testing.T) { + realNumCPUs := uint32(8) + numCpusFunc = func() (uint32, error) { + return realNumCPUs, nil + } + possibleCPUs := uint32(31) + + perCpuUsage := make([]uint64, possibleCPUs) + for i := uint32(0); i < realNumCPUs; i++ { + perCpuUsage[i] = 8562955455524 + } + + s := &cgroups.Stats{ + CpuStats: cgroups.CpuStats{ + CpuUsage: cgroups.CpuUsage{ + PercpuUsage: perCpuUsage, + TotalUsage: 33802947350272, + UsageInKernelmode: 734746 * nanosecondsInSeconds / clockTicks, + UsageInUsermode: 2767637 * nanosecondsInSeconds / clockTicks, + }, + }, + } + var ret info.ContainerStats + setCpuStats(s, &ret) + + expected := info.ContainerStats{ + Cpu: info.CpuStats{ + Usage: info.CpuUsage{ + PerCpu: perCpuUsage[0:realNumCPUs], + User: s.CpuStats.CpuUsage.UsageInUsermode, + System: s.CpuStats.CpuUsage.UsageInKernelmode, + Total: 8562955455524 * uint64(realNumCPUs), + }, + }, + } + + if !ret.Eq(&expected) { + t.Fatalf("expected %+v == %+v", ret, expected) + } +}