Merge pull request #7 from monnand/correct-stats

Correct information in samples
This commit is contained in:
Victor Marmol 2014-06-12 14:41:59 -07:00
commit 1429dcb4d9
6 changed files with 415 additions and 140 deletions

View File

@ -35,12 +35,12 @@ type ContainerHandler interface {
ListContainers(listType ListType) ([]string, error)
ListThreads(listType ListType) ([]int, error)
ListProcesses(listType ListType) ([]int, error)
StatsSummary() (*info.ContainerStatsSummary, error)
StatsSummary() (*info.ContainerStatsPercentiles, error)
}
type NoStatsSummary struct {
}
func (self *NoStatsSummary) StatsSummary() (*info.ContainerStatsSummary, error) {
func (self *NoStatsSummary) StatsSummary() (*info.ContainerStatsPercentiles, error) {
return nil, fmt.Errorf("This method (StatsSummary) should never be called")
}

View File

@ -15,7 +15,7 @@
package container
import (
"math/big"
"fmt"
"sync"
"time"
@ -23,81 +23,104 @@ import (
"github.com/google/cadvisor/sampling"
)
type statsSummaryContainerHandlerWrapper struct {
type percentilesContainerHandlerWrapper struct {
handler ContainerHandler
currentSummary *info.ContainerStatsSummary
totalMemoryUsage *big.Int
containerPercentiles *info.ContainerStatsPercentiles
prevStats *info.ContainerStats
numStats uint64
sampler sampling.Sampler
lock sync.Mutex
}
func (self *statsSummaryContainerHandlerWrapper) GetSpec() (*info.ContainerSpec, error) {
func (self *percentilesContainerHandlerWrapper) GetSpec() (*info.ContainerSpec, error) {
return self.handler.GetSpec()
}
func (self *statsSummaryContainerHandlerWrapper) GetStats() (*info.ContainerStats, error) {
func (self *percentilesContainerHandlerWrapper) updatePrevStats(stats *info.ContainerStats) {
if stats == nil || stats.Cpu == nil || stats.Memory == nil {
// discard incomplete stats
self.prevStats = nil
return
}
if self.prevStats == nil {
self.prevStats = &info.ContainerStats{
Cpu: &info.CpuStats{},
Memory: &info.MemoryStats{},
}
}
// make a deep copy.
self.prevStats.Timestamp = stats.Timestamp
*self.prevStats.Cpu = *stats.Cpu
self.prevStats.Cpu.Usage.PerCpu = make([]uint64, len(stats.Cpu.Usage.PerCpu))
for i, perCpu := range stats.Cpu.Usage.PerCpu {
self.prevStats.Cpu.Usage.PerCpu[i] = perCpu
}
*self.prevStats.Memory = *stats.Memory
}
func (self *percentilesContainerHandlerWrapper) GetStats() (*info.ContainerStats, error) {
stats, err := self.handler.GetStats()
if err != nil {
return nil, err
}
if stats == nil {
return nil, nil
return nil, fmt.Errorf("container handler returns a nil error and a nil stats")
}
if stats.Timestamp.IsZero() {
return nil, fmt.Errorf("container handler did not set timestamp")
}
stats.Timestamp = time.Now()
self.lock.Lock()
defer self.lock.Unlock()
self.sampler.Update(stats)
if self.currentSummary == nil {
self.currentSummary = new(info.ContainerStatsSummary)
if self.prevStats != nil {
sample, err := info.NewSample(self.prevStats, stats)
if err != nil {
return nil, fmt.Errorf("wrong stats: %v", err)
}
if sample != nil {
self.sampler.Update(sample)
}
}
self.updatePrevStats(stats)
if self.containerPercentiles == nil {
self.containerPercentiles = new(info.ContainerStatsPercentiles)
}
self.numStats++
if stats.Memory != nil {
if stats.Memory.Usage > self.currentSummary.MaxMemoryUsage {
self.currentSummary.MaxMemoryUsage = stats.Memory.Usage
if stats.Memory.Usage > self.containerPercentiles.MaxMemoryUsage {
self.containerPercentiles.MaxMemoryUsage = stats.Memory.Usage
}
// XXX(dengnan): Very inefficient!
if self.totalMemoryUsage == nil {
self.totalMemoryUsage = new(big.Int)
}
usage := (&big.Int{}).SetUint64(stats.Memory.Usage)
self.totalMemoryUsage = self.totalMemoryUsage.Add(self.totalMemoryUsage, usage)
n := (&big.Int{}).SetUint64(self.numStats)
avg := (&big.Int{}).Div(self.totalMemoryUsage, n)
self.currentSummary.AvgMemoryUsage = avg.Uint64()
}
return stats, nil
}
func (self *statsSummaryContainerHandlerWrapper) ListContainers(listType ListType) ([]string, error) {
func (self *percentilesContainerHandlerWrapper) ListContainers(listType ListType) ([]string, error) {
return self.handler.ListContainers(listType)
}
func (self *statsSummaryContainerHandlerWrapper) ListThreads(listType ListType) ([]int, error) {
func (self *percentilesContainerHandlerWrapper) ListThreads(listType ListType) ([]int, error) {
return self.handler.ListThreads(listType)
}
func (self *statsSummaryContainerHandlerWrapper) ListProcesses(listType ListType) ([]int, error) {
func (self *percentilesContainerHandlerWrapper) ListProcesses(listType ListType) ([]int, error) {
return self.handler.ListProcesses(listType)
}
func (self *statsSummaryContainerHandlerWrapper) StatsSummary() (*info.ContainerStatsSummary, error) {
func (self *percentilesContainerHandlerWrapper) StatsSummary() (*info.ContainerStatsPercentiles, error) {
self.lock.Lock()
defer self.lock.Unlock()
samples := make([]*info.ContainerStats, 0, self.sampler.Len())
samples := make([]*info.ContainerStatsSample, 0, self.sampler.Len())
self.sampler.Map(func(d interface{}) {
stats := d.(*info.ContainerStats)
stats := d.(*info.ContainerStatsSample)
samples = append(samples, stats)
})
self.currentSummary.Samples = samples
self.containerPercentiles.Samples = samples
// XXX(dengnan): propabily add to StatsParameter?
self.currentSummary.FillPercentiles(
self.containerPercentiles.FillPercentiles(
[]int{50, 80, 90, 95, 99},
[]int{50, 80, 90, 95, 99},
)
return self.currentSummary, nil
return self.containerPercentiles, nil
}
type StatsParameter struct {
@ -112,9 +135,9 @@ func AddStatsSummary(handler ContainerHandler, parameter *StatsParameter) (Conta
if err != nil {
return nil, err
}
return &statsSummaryContainerHandlerWrapper{
return &percentilesContainerHandlerWrapper{
handler: handler,
currentSummary: &info.ContainerStatsSummary{},
containerPercentiles: &info.ContainerStatsPercentiles{},
sampler: sampler,
}, nil
}

View File

@ -15,53 +15,40 @@
package container
import (
crand "crypto/rand"
"encoding/binary"
"math/rand"
"sync"
"testing"
"time"
"github.com/google/cadvisor/info"
)
func init() {
// NOTE(dengnan): Even if we picked a good random seed,
// the random number from math/rand is still not cryptographically secure!
var seed int64
binary.Read(crand.Reader, binary.LittleEndian, &seed)
rand.Seed(seed)
type mockContainer struct {
}
type randomStatsContainer struct {
NoStatsSummary
func (self *mockContainer) GetSpec() (*info.ContainerSpec, error) {
return nil, nil
}
func (self *randomStatsContainer) GetSpec() (*info.ContainerSpec, error) {
func (self *mockContainer) ListContainers(listType ListType) ([]string, error) {
return nil, nil
}
func (self *randomStatsContainer) GetStats() (*info.ContainerStats, error) {
stats := new(info.ContainerStats)
stats.Cpu = new(info.CpuStats)
stats.Memory = new(info.MemoryStats)
stats.Memory.Usage = uint64(rand.Intn(2048))
return stats, nil
}
func (self *randomStatsContainer) ListContainers(listType ListType) ([]string, error) {
func (self *mockContainer) ListThreads(listType ListType) ([]int, error) {
return nil, nil
}
func (self *randomStatsContainer) ListThreads(listType ListType) ([]int, error) {
func (self *mockContainer) ListProcesses(listType ListType) ([]int, error) {
return nil, nil
}
func (self *randomStatsContainer) ListProcesses(listType ListType) ([]int, error) {
return nil, nil
}
func TestAvgMaxMemoryUsage(t *testing.T) {
func TestMaxMemoryUsage(t *testing.T) {
N := 100
memTrace := make([]uint64, N)
for i := 0; i < N; i++ {
memTrace[i] = uint64(i + 1)
}
handler, err := AddStatsSummary(
&randomStatsContainer{},
containerWithTrace(1*time.Second, nil, memTrace),
&StatsParameter{
Sampler: "uniform",
NumSamples: 10,
@ -70,19 +57,13 @@ func TestAvgMaxMemoryUsage(t *testing.T) {
if err != nil {
t.Error(err)
}
var maxUsage uint64
var totalUsage uint64
N := 100
maxUsage := uint64(N)
for i := 0; i < N; i++ {
stats, err := handler.GetStats()
_, err := handler.GetStats()
if err != nil {
t.Errorf("Error when get stats: %v", err)
continue
}
if stats.Memory.Usage > maxUsage {
maxUsage = stats.Memory.Usage
}
totalUsage += stats.Memory.Usage
}
summary, err := handler.StatsSummary()
if err != nil {
@ -91,8 +72,109 @@ func TestAvgMaxMemoryUsage(t *testing.T) {
if summary.MaxMemoryUsage != maxUsage {
t.Fatalf("Max memory usage should be %v; received %v", maxUsage, summary.MaxMemoryUsage)
}
avg := totalUsage / uint64(N)
if summary.AvgMemoryUsage != avg {
t.Fatalf("Avg memory usage should be %v; received %v", avg, summary.AvgMemoryUsage)
}
type replayTrace struct {
NoStatsSummary
mockContainer
cpuTrace []uint64
memTrace []uint64
totalUsage uint64
currenttime time.Time
duration time.Duration
lock sync.Mutex
}
func containerWithTrace(duration time.Duration, cpuUsages []uint64, memUsages []uint64) ContainerHandler {
return &replayTrace{
duration: duration,
cpuTrace: cpuUsages,
memTrace: memUsages,
currenttime: time.Now(),
}
}
func (self *replayTrace) GetStats() (*info.ContainerStats, error) {
stats := new(info.ContainerStats)
stats.Cpu = new(info.CpuStats)
stats.Memory = new(info.MemoryStats)
if len(self.memTrace) > 0 {
stats.Memory.Usage = self.memTrace[0]
self.memTrace = self.memTrace[1:]
}
self.lock.Lock()
defer self.lock.Unlock()
cpuTrace := self.totalUsage
if len(self.cpuTrace) > 0 {
cpuTrace += self.cpuTrace[0]
self.cpuTrace = self.cpuTrace[1:]
}
self.totalUsage = cpuTrace
stats.Timestamp = self.currenttime
self.currenttime = self.currenttime.Add(self.duration)
stats.Cpu.Usage.Total = cpuTrace
stats.Cpu.Usage.PerCpu = []uint64{cpuTrace}
stats.Cpu.Usage.User = cpuTrace
stats.Cpu.Usage.System = 0
return stats, nil
}
func TestSampleCpuUsage(t *testing.T) {
// Number of samples
N := 10
cpuTrace := make([]uint64, 0, N)
memTrace := make([]uint64, 0, N)
// We need N+1 observations to get N samples
for i := 0; i < N+1; i++ {
cpuusage := uint64(rand.Intn(1000))
memusage := uint64(rand.Intn(1000))
cpuTrace = append(cpuTrace, cpuusage)
memTrace = append(memTrace, memusage)
}
samplePeriod := 1 * time.Second
handler, err := AddStatsSummary(
containerWithTrace(samplePeriod, cpuTrace, memTrace),
&StatsParameter{
// Use uniform sampler with sample size of N, so that
// we will be guaranteed to store the first N samples.
Sampler: "uniform",
NumSamples: N,
},
)
if err != nil {
t.Error(err)
}
// request stats/obervation N+1 times, so that there will be N samples
for i := 0; i < N+1; i++ {
_, err = handler.GetStats()
if err != nil {
t.Fatal(err)
}
}
s, err := handler.StatsSummary()
if err != nil {
t.Fatal(err)
}
for _, sample := range s.Samples {
if sample.Duration != samplePeriod {
t.Errorf("sample duration is %v, not %v", sample.Duration, samplePeriod)
}
cpuUsage := sample.Cpu.Usage
found := false
for _, u := range cpuTrace {
if u == cpuUsage {
found = true
}
}
if !found {
t.Errorf("unable to find cpu usage %v", cpuUsage)
}
}
}

View File

@ -15,6 +15,7 @@
package info
import (
"fmt"
"sort"
"time"
)
@ -61,7 +62,7 @@ type ContainerInfo struct {
// Historical statistics gathered from the container.
Stats []*ContainerStats `json:"stats,omitempty"`
StatsSummary *ContainerStatsSummary `json:"stats_summary,omitempty"`
StatsSummary *ContainerStatsPercentiles `json:"stats_summary,omitempty"`
}
func (self *ContainerInfo) StatsAfter(ref time.Time) []*ContainerStats {
@ -99,11 +100,23 @@ func (self *ContainerInfo) StatsEndTime() time.Time {
return ret
}
// All CPU usage metrics are cumulative from the creation of the container
type CpuStats struct {
Usage struct {
// Total CPU usage.
// Units: nanoseconds
Total uint64 `json:"total"`
// Per CPU/core usage of the container.
// Unit: nanoseconds.
PerCpu []uint64 `json:"per_cpu,omitempty"`
// Time spent in user space.
// Unit: nanoseconds
User uint64 `json:"user"`
// Time spent in kernel space.
// Unit: nanoseconds
System uint64 `json:"system"`
} `json:"usage"`
Load int32 `json:"load"`
@ -142,6 +155,21 @@ type ContainerStats struct {
Memory *MemoryStats `json:"memory,omitempty"`
}
type ContainerStatsSample struct {
// Timetamp of the end of the sample period
Timestamp time.Time `json:"timestamp"`
// Duration of the sample period
Duration time.Duration `json:"duration"`
Cpu struct {
// number of nanoseconds of CPU time used by the container
Usage uint64 `json:"usage"`
} `json:"cpu"`
Memory struct {
// Units: Bytes.
Usage uint64 `json:"usage"`
} `json:"memory"`
}
// This is not exported.
// Use FillPercentile to calculate percentiles
type percentile struct {
@ -149,15 +177,45 @@ type percentile struct {
Value uint64 `json:"value"`
}
type ContainerStatsSummary struct {
type ContainerStatsPercentiles struct {
// TODO(dengnan): More things?
MaxMemoryUsage uint64 `json:"max_memory_usage,omitempty"`
AvgMemoryUsage uint64 `json:"avg_memory_usage,omitempty"`
Samples []*ContainerStats `json:"samples,omitempty"`
Samples []*ContainerStatsSample `json:"samples,omitempty"`
MemoryUsagePercentiles []percentile `json:"memory_usage_percentiles,omitempty"`
CpuUsagePercentiles []percentile `json:"cpu_usage_percentiles,omitempty"`
}
// Each sample needs two stats because the cpu usage in ContainerStats is
// cumulative.
// prev should be an earlier observation than current.
// This method is not thread/goroutine safe.
func NewSample(prev, current *ContainerStats) (*ContainerStatsSample, error) {
if prev == nil || current == nil {
return nil, fmt.Errorf("empty stats")
}
// Ignore this sample if it is incomplete
if prev.Cpu == nil || prev.Memory == nil || current.Cpu == nil || current.Memory == nil {
return nil, fmt.Errorf("incomplete stats")
}
// prev must be an early observation
if !current.Timestamp.After(prev.Timestamp) {
return nil, fmt.Errorf("wrong stats order")
}
// This data is invalid.
if current.Cpu.Usage.Total < prev.Cpu.Usage.Total {
return nil, fmt.Errorf("current CPU usage is less than prev CPU usage (cumulative).")
}
sample := new(ContainerStatsSample)
// Caculate the diff to get the CPU usage within the time interval.
sample.Cpu.Usage = current.Cpu.Usage.Total - prev.Cpu.Usage.Total
// Memory usage is current memory usage
sample.Memory.Usage = current.Memory.Usage
sample.Timestamp = current.Timestamp
sample.Duration = current.Timestamp.Sub(prev.Timestamp)
return sample, nil
}
type uint64Slice []uint64
func (self uint64Slice) Len() int {
@ -172,40 +230,26 @@ func (self uint64Slice) Swap(i, j int) {
self[i], self[j] = self[j], self[i]
}
func (self uint64Slice) Percentiles(ps ...int) []uint64 {
func (self uint64Slice) Percentiles(requestedPercentiles ...int) []percentile {
if len(self) == 0 {
return nil
}
ret := make([]uint64, 0, len(ps))
ret := make([]percentile, 0, len(requestedPercentiles))
sort.Sort(self)
for _, p := range ps {
idx := (float64(p) / 100.0) * float64(len(self)+1)
if idx > float64(len(self)-1) {
ret = append(ret, self[len(self)-1])
} else {
ret = append(ret, self[int(idx)])
}
for _, p := range requestedPercentiles {
idx := (len(self) * p / 100) - 1
ret = append(
ret,
percentile{
Percentage: p,
Value: self[idx],
},
)
}
return ret
}
// len(bs) <= len(as)
func float64Zipuint64(as []int, bs []uint64) []percentile {
if len(bs) == 0 {
return nil
}
ret := make([]percentile, len(bs))
for i, b := range bs {
a := as[i]
ret[i] = percentile{
Percentage: a,
Value: b,
}
}
return ret
}
func (self *ContainerStatsSummary) FillPercentiles(cpuPercentages, memoryPercentages []int) {
func (self *ContainerStatsPercentiles) FillPercentiles(cpuPercentages, memoryPercentages []int) {
if len(self.Samples) == 0 {
return
}
@ -216,16 +260,10 @@ func (self *ContainerStatsSummary) FillPercentiles(cpuPercentages, memoryPercent
if sample == nil {
continue
}
if sample.Cpu != nil {
cpuUsages = append(cpuUsages, sample.Cpu.Usage.Total)
}
if sample.Memory != nil {
cpuUsages = append(cpuUsages, sample.Cpu.Usage)
memUsages = append(memUsages, sample.Memory.Usage)
}
}
cpuPercentiles := uint64Slice(cpuUsages).Percentiles(cpuPercentages...)
memPercentiles := uint64Slice(memUsages).Percentiles(memoryPercentages...)
self.CpuUsagePercentiles = float64Zipuint64(cpuPercentages, cpuPercentiles)
self.MemoryUsagePercentiles = float64Zipuint64(memoryPercentages, memPercentiles)
self.CpuUsagePercentiles = uint64Slice(cpuUsages).Percentiles(cpuPercentages...)
self.MemoryUsagePercentiles = uint64Slice(memUsages).Percentiles(memoryPercentages...)
}

View File

@ -67,20 +67,152 @@ func TestPercentiles(t *testing.T) {
N := 100
data := make([]uint64, N)
for i := 0; i < N; i++ {
data[i] = uint64(i)
for i := 1; i < N+1; i++ {
data[i-1] = uint64(i)
}
ps := []float64{
0.8,
0.9,
0.5,
percentages := []int{
80,
90,
50,
}
ss := uint64Slice(data).Percentiles(ps...)
for i, s := range ss {
p := ps[i]
d := uint64(float64(N) * p)
percentiles := uint64Slice(data).Percentiles(percentages...)
for _, s := range percentiles {
if s.Value != uint64(s.Percentage) {
t.Errorf("%v percentile data should be %v, but got %v", s.Percentage, s.Percentage, s.Value)
}
/*
p := percentages[i]
d := uint64(N * p / 100)
if d != s {
t.Errorf("%v \\%tile data should be %v, but got %v", p*float64(100), d, s)
t.Errorf("%v percentile data should be %v, but got %v", p, d, s)
}
*/
}
}
func TestNewSampleNilStats(t *testing.T) {
stats := &ContainerStats{
Cpu: &CpuStats{},
Memory: &MemoryStats{},
}
stats.Cpu.Usage.PerCpu = []uint64{uint64(10)}
stats.Cpu.Usage.Total = uint64(10)
stats.Cpu.Usage.System = uint64(2)
stats.Cpu.Usage.User = uint64(8)
stats.Memory.Usage = uint64(200)
sample, err := NewSample(nil, stats)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
sample, err = NewSample(stats, nil)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
}
func createStats(cpuUsage, memUsage uint64, timestamp time.Time) *ContainerStats {
stats := &ContainerStats{
Cpu: &CpuStats{},
Memory: &MemoryStats{},
}
stats.Cpu.Usage.PerCpu = []uint64{cpuUsage}
stats.Cpu.Usage.Total = cpuUsage
stats.Cpu.Usage.System = 0
stats.Cpu.Usage.User = cpuUsage
stats.Memory.Usage = memUsage
stats.Timestamp = timestamp
return stats
}
func TestAddSample(t *testing.T) {
cpuPrevUsage := uint64(10)
cpuCurrentUsage := uint64(15)
memCurrentUsage := uint64(200)
prevTime := time.Now()
prev := createStats(cpuPrevUsage, memCurrentUsage, prevTime)
current := createStats(cpuCurrentUsage, memCurrentUsage, prevTime.Add(1*time.Second))
sample, err := NewSample(prev, current)
if err != nil {
t.Errorf("should be able to generate a sample. but received error: %v", err)
}
if sample == nil {
t.Fatalf("nil sample and nil error. unexpected result!")
}
if sample.Memory.Usage != memCurrentUsage {
t.Errorf("wrong memory usage: %v. should be %v", sample.Memory.Usage, memCurrentUsage)
}
if sample.Cpu.Usage != cpuCurrentUsage-cpuPrevUsage {
t.Errorf("wrong CPU usage: %v. should be %v", sample.Cpu.Usage, cpuCurrentUsage-cpuPrevUsage)
}
}
func TestAddSampleIncompleteStats(t *testing.T) {
cpuPrevUsage := uint64(10)
cpuCurrentUsage := uint64(15)
memCurrentUsage := uint64(200)
prevTime := time.Now()
prev := createStats(cpuPrevUsage, memCurrentUsage, prevTime)
current := createStats(cpuCurrentUsage, memCurrentUsage, prevTime.Add(1*time.Second))
stats := &ContainerStats{
Cpu: prev.Cpu,
Memory: nil,
}
sample, err := NewSample(stats, current)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
sample, err = NewSample(prev, stats)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
stats = &ContainerStats{
Cpu: nil,
Memory: prev.Memory,
}
sample, err = NewSample(stats, current)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
sample, err = NewSample(prev, stats)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
}
func TestAddSampleWrongOrder(t *testing.T) {
cpuPrevUsage := uint64(10)
cpuCurrentUsage := uint64(15)
memCurrentUsage := uint64(200)
prevTime := time.Now()
prev := createStats(cpuPrevUsage, memCurrentUsage, prevTime)
current := createStats(cpuCurrentUsage, memCurrentUsage, prevTime.Add(1*time.Second))
sample, err := NewSample(current, prev)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
}
func TestAddSampleWrongCpuUsage(t *testing.T) {
cpuPrevUsage := uint64(15)
cpuCurrentUsage := uint64(10)
memCurrentUsage := uint64(200)
prevTime := time.Now()
prev := createStats(cpuPrevUsage, memCurrentUsage, prevTime)
current := createStats(cpuCurrentUsage, memCurrentUsage, prevTime.Add(1*time.Second))
sample, err := NewSample(prev, current)
if err == nil {
t.Errorf("generated an unexpected sample: %+v", sample)
}
}

View File

@ -39,7 +39,7 @@ type containerInfo struct {
Subcontainers []string
Spec *info.ContainerSpec
Stats *list.List
StatsSummary *info.ContainerStatsSummary
StatsSummary *info.ContainerStatsPercentiles
}
type containerData struct {