Merge pull request #491 from rjnagal/summary
WIP: Add utility to calculate derived stats.
This commit is contained in:
commit
48c41ba273
74
summary/buffer.go
Normal file
74
summary/buffer.go
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package summary
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/google/cadvisor/info"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Manages a buffer of usage samples.
|
||||||
|
// This is similar to stats buffer in storage/memory.
|
||||||
|
// The main difference is that we do not pre-allocate the buffer as most containers
|
||||||
|
// won't live that long.
|
||||||
|
type SamplesBuffer struct {
|
||||||
|
// list of collected samples.
|
||||||
|
samples []info.Usage
|
||||||
|
// maximum size this buffer can grow to.
|
||||||
|
maxSize int
|
||||||
|
// index for the latest sample.
|
||||||
|
index int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initializes an empty buffer.
|
||||||
|
func NewSamplesBuffer(size int) *SamplesBuffer {
|
||||||
|
return &SamplesBuffer{
|
||||||
|
index: -1,
|
||||||
|
maxSize: size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns the current number of samples in the buffer.
|
||||||
|
func (s *SamplesBuffer) Size() int {
|
||||||
|
return len(s.samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add an element to the buffer. Oldest one is overwritten if required.
|
||||||
|
func (s *SamplesBuffer) Add(stat info.Usage) {
|
||||||
|
if len(s.samples) < s.maxSize {
|
||||||
|
s.samples = append(s.samples, stat)
|
||||||
|
s.index++
|
||||||
|
return
|
||||||
|
}
|
||||||
|
s.index = (s.index + 1) % s.maxSize
|
||||||
|
s.samples[s.index] = stat
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns pointers to the last 'n' stats.
|
||||||
|
func (s *SamplesBuffer) RecentStats(n int) []*info.Usage {
|
||||||
|
if n > len(s.samples) {
|
||||||
|
n = len(s.samples)
|
||||||
|
}
|
||||||
|
start := s.index - (n - 1)
|
||||||
|
if start < 0 {
|
||||||
|
start += len(s.samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]*info.Usage, n)
|
||||||
|
for i := 0; i < n; i++ {
|
||||||
|
index := (start + i) % len(s.samples)
|
||||||
|
out[i] = &s.samples[index]
|
||||||
|
}
|
||||||
|
return out
|
||||||
|
}
|
118
summary/buffer_test.go
Normal file
118
summary/buffer_test.go
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package summary
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/google/cadvisor/info"
|
||||||
|
)
|
||||||
|
|
||||||
|
func createSample(i uint64) info.Usage {
|
||||||
|
usage := info.Usage{}
|
||||||
|
usage.PercentComplete = 100
|
||||||
|
usage.Cpu = info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: i * 50,
|
||||||
|
Max: i * 100,
|
||||||
|
Ninety: i * 90,
|
||||||
|
}
|
||||||
|
usage.Memory = info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: i * 50 * 1024,
|
||||||
|
Max: i * 100 * 1024,
|
||||||
|
Ninety: i * 90 * 1024,
|
||||||
|
}
|
||||||
|
return usage
|
||||||
|
}
|
||||||
|
|
||||||
|
func expectSize(t *testing.T, b *SamplesBuffer, expectedSize int) {
|
||||||
|
if b.Size() != expectedSize {
|
||||||
|
t.Errorf("Expected size %d, got %d", expectedSize, b.Size())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func expectElements(t *testing.T, b *SamplesBuffer, expected []info.Usage) {
|
||||||
|
|
||||||
|
out := b.RecentStats(b.Size())
|
||||||
|
if len(out) != len(expected) {
|
||||||
|
t.Errorf("Expected %d elements, got %d", len(expected), len(out))
|
||||||
|
}
|
||||||
|
for i, el := range out {
|
||||||
|
if !reflect.DeepEqual(*el, expected[i]) {
|
||||||
|
t.Errorf("Expected elements %v, got %v", expected[i], *el)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestEmpty(t *testing.T) {
|
||||||
|
b := NewSamplesBuffer(5)
|
||||||
|
expectSize(t, b, 0)
|
||||||
|
expectElements(t, b, []info.Usage{})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAddSingleSample(t *testing.T) {
|
||||||
|
b := NewSamplesBuffer(5)
|
||||||
|
|
||||||
|
sample := createSample(1)
|
||||||
|
b.Add(sample)
|
||||||
|
expectSize(t, b, 1)
|
||||||
|
expectElements(t, b, []info.Usage{sample})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFullBuffer(t *testing.T) {
|
||||||
|
maxSize := 5
|
||||||
|
b := NewSamplesBuffer(maxSize)
|
||||||
|
samples := []info.Usage{}
|
||||||
|
for i := 0; i < maxSize; i++ {
|
||||||
|
sample := createSample(uint64(i))
|
||||||
|
samples = append(samples, sample)
|
||||||
|
b.Add(sample)
|
||||||
|
}
|
||||||
|
expectSize(t, b, maxSize)
|
||||||
|
expectElements(t, b, samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOverflow(t *testing.T) {
|
||||||
|
maxSize := 5
|
||||||
|
overflow := 2
|
||||||
|
b := NewSamplesBuffer(maxSize)
|
||||||
|
samples := []info.Usage{}
|
||||||
|
for i := 0; i < maxSize+overflow; i++ {
|
||||||
|
sample := createSample(uint64(i))
|
||||||
|
if i >= overflow {
|
||||||
|
samples = append(samples, sample)
|
||||||
|
}
|
||||||
|
b.Add(sample)
|
||||||
|
}
|
||||||
|
expectSize(t, b, maxSize)
|
||||||
|
expectElements(t, b, samples)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReplaceAll(t *testing.T) {
|
||||||
|
maxSize := 5
|
||||||
|
b := NewSamplesBuffer(maxSize)
|
||||||
|
samples := []info.Usage{}
|
||||||
|
for i := 0; i < maxSize*2; i++ {
|
||||||
|
sample := createSample(uint64(i))
|
||||||
|
if i >= maxSize {
|
||||||
|
samples = append(samples, sample)
|
||||||
|
}
|
||||||
|
b.Add(sample)
|
||||||
|
}
|
||||||
|
expectSize(t, b, maxSize)
|
||||||
|
expectElements(t, b, samples)
|
||||||
|
}
|
183
summary/percentiles.go
Normal file
183
summary/percentiles.go
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Utility methods to calculate percentiles.
|
||||||
|
|
||||||
|
package summary
|
||||||
|
|
||||||
|
import (
|
||||||
|
"math"
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
"github.com/golang/glog"
|
||||||
|
"github.com/google/cadvisor/info"
|
||||||
|
)
|
||||||
|
|
||||||
|
const secondsToMilliSeconds = 1000
|
||||||
|
const milliSecondsToNanoSeconds = 1000000
|
||||||
|
const secondsToNanoSeconds = secondsToMilliSeconds * milliSecondsToNanoSeconds
|
||||||
|
|
||||||
|
type uint64Slice []uint64
|
||||||
|
|
||||||
|
func (a uint64Slice) Len() int { return len(a) }
|
||||||
|
func (a uint64Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||||
|
func (a uint64Slice) Less(i, j int) bool { return a[i] < a[j] }
|
||||||
|
|
||||||
|
// Get 90th percentile of the provided samples. Round to integer.
|
||||||
|
func (self uint64Slice) Get90Percentile() uint64 {
|
||||||
|
count := self.Len()
|
||||||
|
if count == 0 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
sort.Sort(self)
|
||||||
|
n := float64(0.9 * (float64(count) + 1))
|
||||||
|
idx, frac := math.Modf(n)
|
||||||
|
index := int(idx)
|
||||||
|
percentile := float64(self[index-1])
|
||||||
|
if index > 1 || index < count {
|
||||||
|
percentile += frac * float64(self[index]-self[index-1])
|
||||||
|
}
|
||||||
|
return uint64(percentile)
|
||||||
|
}
|
||||||
|
|
||||||
|
type mean struct {
|
||||||
|
// current count.
|
||||||
|
count uint64
|
||||||
|
// current mean.
|
||||||
|
Mean float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self *mean) Add(value uint64) {
|
||||||
|
self.count++
|
||||||
|
if self.count == 1 {
|
||||||
|
self.Mean = float64(value)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c := float64(self.count)
|
||||||
|
v := float64(value)
|
||||||
|
self.Mean = (self.Mean*(c-1) + v) / c
|
||||||
|
}
|
||||||
|
|
||||||
|
type resource struct {
|
||||||
|
// list of samples being tracked.
|
||||||
|
samples uint64Slice
|
||||||
|
// average from existing samples.
|
||||||
|
mean mean
|
||||||
|
// maximum value seen so far in the added samples.
|
||||||
|
max uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds a new percentile sample.
|
||||||
|
func (self *resource) Add(p info.Percentiles) {
|
||||||
|
if !p.Present {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if p.Max > self.max {
|
||||||
|
self.max = p.Max
|
||||||
|
}
|
||||||
|
self.mean.Add(p.Mean)
|
||||||
|
// Selecting 90p of 90p :(
|
||||||
|
self.samples = append(self.samples, p.Ninety)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a single sample. Internally, we convert it to a fake percentile sample.
|
||||||
|
func (self *resource) AddSample(val uint64) {
|
||||||
|
sample := info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: val,
|
||||||
|
Max: val,
|
||||||
|
Ninety: val,
|
||||||
|
}
|
||||||
|
self.Add(sample)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get max, average, and 90p from existing samples.
|
||||||
|
func (self *resource) GetPercentile() info.Percentiles {
|
||||||
|
p := info.Percentiles{}
|
||||||
|
p.Mean = uint64(self.mean.Mean)
|
||||||
|
p.Max = self.max
|
||||||
|
p.Ninety = self.samples.Get90Percentile()
|
||||||
|
p.Present = true
|
||||||
|
return p
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewResource(size int) *resource {
|
||||||
|
return &resource{
|
||||||
|
samples: make(uint64Slice, 0, size),
|
||||||
|
mean: mean{count: 0, Mean: 0},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return aggregated percentiles from the provided percentile samples.
|
||||||
|
func GetDerivedPercentiles(stats []*info.Usage) info.Usage {
|
||||||
|
cpu := NewResource(len(stats))
|
||||||
|
memory := NewResource(len(stats))
|
||||||
|
for _, stat := range stats {
|
||||||
|
cpu.Add(stat.Cpu)
|
||||||
|
memory.Add(stat.Memory)
|
||||||
|
}
|
||||||
|
usage := info.Usage{}
|
||||||
|
usage.Cpu = cpu.GetPercentile()
|
||||||
|
usage.Memory = memory.GetPercentile()
|
||||||
|
return usage
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate part of a minute this sample set represent.
|
||||||
|
func getPercentComplete(stats []*secondSample) (percent int32) {
|
||||||
|
numSamples := len(stats)
|
||||||
|
if numSamples > 1 {
|
||||||
|
percent = 100
|
||||||
|
timeRange := stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds()
|
||||||
|
// allow some slack
|
||||||
|
if timeRange < 58*secondsToNanoSeconds {
|
||||||
|
percent = int32((timeRange * 100) / 60 * secondsToNanoSeconds)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns a percentile sample for a minute by aggregating seconds samples.
|
||||||
|
func GetMinutePercentiles(stats []*secondSample) info.Usage {
|
||||||
|
lastSample := secondSample{}
|
||||||
|
cpu := NewResource(len(stats))
|
||||||
|
memory := NewResource(len(stats))
|
||||||
|
for _, stat := range stats {
|
||||||
|
var elapsed int64
|
||||||
|
if !lastSample.Timestamp.IsZero() {
|
||||||
|
elapsed = stat.Timestamp.Sub(lastSample.Timestamp).Nanoseconds()
|
||||||
|
if elapsed < 10*milliSecondsToNanoSeconds {
|
||||||
|
glog.Infof("Elapsed time too small: %d ns: time now %s last %s", elapsed, stat.Timestamp.String(), lastSample.Timestamp.String())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
glog.V(2).Infof("Read sample: cpu %d, memory %d", stat.Cpu, memory)
|
||||||
|
cpuRate := (stat.Cpu - lastSample.Cpu) * secondsToMilliSeconds / uint64(elapsed)
|
||||||
|
if cpuRate < 0 {
|
||||||
|
glog.Infof("cpu rate too small: %f ns", cpuRate)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
glog.V(2).Infof("Adding cpu rate sample : %d", cpuRate)
|
||||||
|
cpu.AddSample(cpuRate)
|
||||||
|
memory.AddSample(stat.Memory)
|
||||||
|
} else {
|
||||||
|
memory.AddSample(stat.Memory)
|
||||||
|
}
|
||||||
|
lastSample = *stat
|
||||||
|
}
|
||||||
|
percent := getPercentComplete(stats)
|
||||||
|
return info.Usage{
|
||||||
|
PercentComplete: percent,
|
||||||
|
Cpu: cpu.GetPercentile(),
|
||||||
|
Memory: memory.GetPercentile(),
|
||||||
|
}
|
||||||
|
}
|
182
summary/percentiles_test.go
Normal file
182
summary/percentiles_test.go
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
package summary
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/cadvisor/info"
|
||||||
|
)
|
||||||
|
|
||||||
|
const Nanosecond = 1000000000
|
||||||
|
|
||||||
|
func Test90Percentile(t *testing.T) {
|
||||||
|
N := 100
|
||||||
|
stats := make(uint64Slice, 0, N)
|
||||||
|
for i := N; i > 0; i-- {
|
||||||
|
stats = append(stats, uint64(i))
|
||||||
|
}
|
||||||
|
p := stats.Get90Percentile()
|
||||||
|
if p != 90 {
|
||||||
|
t.Errorf("90th percentile is %d, should be 90.", p)
|
||||||
|
}
|
||||||
|
// 90p should be between 94 and 95. Promoted to 95.
|
||||||
|
N = 105
|
||||||
|
for i := 101; i <= N; i++ {
|
||||||
|
stats = append(stats, uint64(i))
|
||||||
|
}
|
||||||
|
p = stats.Get90Percentile()
|
||||||
|
if p != 95 {
|
||||||
|
t.Errorf("90th percentile is %d, should be 95.", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMean(t *testing.T) {
|
||||||
|
var i, N uint64
|
||||||
|
N = 100
|
||||||
|
mean := mean{count: 0, Mean: 0}
|
||||||
|
for i = 1; i < N; i++ {
|
||||||
|
mean.Add(i)
|
||||||
|
}
|
||||||
|
if mean.Mean != 50.0 {
|
||||||
|
t.Errorf("Mean is %f, should be 50.0", mean.Mean)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAggregates(t *testing.T) {
|
||||||
|
N := uint64(100)
|
||||||
|
var i uint64
|
||||||
|
ct := time.Now()
|
||||||
|
stats := make([]*secondSample, 0, N)
|
||||||
|
for i = 1; i < N; i++ {
|
||||||
|
s := &secondSample{
|
||||||
|
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
||||||
|
// cpu rate is 1 s/s
|
||||||
|
Cpu: i * Nanosecond,
|
||||||
|
// Memory grows by a KB every second.
|
||||||
|
Memory: i * 1024,
|
||||||
|
}
|
||||||
|
stats = append(stats, s)
|
||||||
|
}
|
||||||
|
usage := GetMinutePercentiles(stats)
|
||||||
|
// Cpu mean, max, and 90p should all be 1000 ms/s.
|
||||||
|
cpuExpected := info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: 1000,
|
||||||
|
Max: 1000,
|
||||||
|
Ninety: 1000,
|
||||||
|
}
|
||||||
|
if usage.Cpu != cpuExpected {
|
||||||
|
t.Errorf("cpu stats are %+v. Expected %+v", usage.Cpu, cpuExpected)
|
||||||
|
}
|
||||||
|
memExpected := info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: 50 * 1024,
|
||||||
|
Max: 99 * 1024,
|
||||||
|
Ninety: 90 * 1024,
|
||||||
|
}
|
||||||
|
if usage.Memory != memExpected {
|
||||||
|
t.Errorf("memory stats are mean %+v. Expected %+v", usage.Memory, memExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func TestSamplesCloseInTimeIgnored(t *testing.T) {
|
||||||
|
N := uint64(100)
|
||||||
|
var i uint64
|
||||||
|
ct := time.Now()
|
||||||
|
stats := make([]*secondSample, 0, N*2)
|
||||||
|
for i = 1; i < N; i++ {
|
||||||
|
s1 := &secondSample{
|
||||||
|
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
||||||
|
// cpu rate is 1 s/s
|
||||||
|
Cpu: i * Nanosecond,
|
||||||
|
// Memory grows by a KB every second.
|
||||||
|
Memory: i * 1024,
|
||||||
|
}
|
||||||
|
stats = append(stats, s1)
|
||||||
|
|
||||||
|
// Add another dummy sample too close in time to the last one.
|
||||||
|
s2 := &secondSample{
|
||||||
|
// Add extra millisecond.
|
||||||
|
Timestamp: ct.Add(time.Duration(i) * time.Second).Add(time.Duration(1) * time.Millisecond),
|
||||||
|
Cpu: i * 100 * Nanosecond,
|
||||||
|
Memory: i * 1024 * 1024,
|
||||||
|
}
|
||||||
|
stats = append(stats, s2)
|
||||||
|
}
|
||||||
|
usage := GetMinutePercentiles(stats)
|
||||||
|
// Cpu mean, max, and 90p should all be 1000 ms/s. All high-value samples are discarded.
|
||||||
|
cpuExpected := info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: 1000,
|
||||||
|
Max: 1000,
|
||||||
|
Ninety: 1000,
|
||||||
|
}
|
||||||
|
if usage.Cpu != cpuExpected {
|
||||||
|
t.Errorf("cpu stats are %+v. Expected %+v", usage.Cpu, cpuExpected)
|
||||||
|
}
|
||||||
|
memExpected := info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: 50 * 1024,
|
||||||
|
Max: 99 * 1024,
|
||||||
|
Ninety: 90 * 1024,
|
||||||
|
}
|
||||||
|
if usage.Memory != memExpected {
|
||||||
|
t.Errorf("memory stats are mean %+v. Expected %+v", usage.Memory, memExpected)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDerivedStats(t *testing.T) {
|
||||||
|
N := uint64(100)
|
||||||
|
var i uint64
|
||||||
|
stats := make([]*info.Usage, 0, N)
|
||||||
|
for i = 1; i < N; i++ {
|
||||||
|
s := &info.Usage{
|
||||||
|
PercentComplete: 100,
|
||||||
|
Cpu: info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: i * Nanosecond,
|
||||||
|
Max: i * Nanosecond,
|
||||||
|
Ninety: i * Nanosecond,
|
||||||
|
},
|
||||||
|
Memory: info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: i * 1024,
|
||||||
|
Max: i * 1024,
|
||||||
|
Ninety: i * 1024,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
stats = append(stats, s)
|
||||||
|
}
|
||||||
|
usage := GetDerivedPercentiles(stats)
|
||||||
|
cpuExpected := info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: 50 * Nanosecond,
|
||||||
|
Max: 99 * Nanosecond,
|
||||||
|
Ninety: 90 * Nanosecond,
|
||||||
|
}
|
||||||
|
if usage.Cpu != cpuExpected {
|
||||||
|
t.Errorf("cpu stats are %+v. Expected %+v", usage.Cpu, cpuExpected)
|
||||||
|
}
|
||||||
|
memExpected := info.Percentiles{
|
||||||
|
Present: true,
|
||||||
|
Mean: 50 * 1024,
|
||||||
|
Max: 99 * 1024,
|
||||||
|
Ninety: 90 * 1024,
|
||||||
|
}
|
||||||
|
if usage.Memory != memExpected {
|
||||||
|
t.Errorf("memory stats are mean %+v. Expected %+v", usage.Memory, memExpected)
|
||||||
|
}
|
||||||
|
}
|
147
summary/summary.go
Normal file
147
summary/summary.go
Normal file
@ -0,0 +1,147 @@
|
|||||||
|
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
// Maintains the summary of aggregated minute, hour, and day stats.
|
||||||
|
// For a container running for more than a day, amount of tracked data can go up to
|
||||||
|
// 40 KB when cpu and memory are tracked. We'll start by enabling collection for the
|
||||||
|
// node, followed by docker, and then all containers as we understand the usage pattern
|
||||||
|
// better
|
||||||
|
// TODO(rjnagal): Optimize the size if we start running it for every container.
|
||||||
|
package summary
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/google/cadvisor/info"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Usage fields we track for generating percentiles.
|
||||||
|
type secondSample struct {
|
||||||
|
Timestamp time.Time // time when the sample was recorded.
|
||||||
|
Cpu uint64 // cpu usage
|
||||||
|
Memory uint64 // memory usage
|
||||||
|
}
|
||||||
|
|
||||||
|
type availableResources struct {
|
||||||
|
Cpu bool
|
||||||
|
Memory bool
|
||||||
|
}
|
||||||
|
|
||||||
|
type StatsSummary struct {
|
||||||
|
// Resources being tracked for this container.
|
||||||
|
available availableResources
|
||||||
|
// list of second samples. The list is cleared when a new minute samples is generated.
|
||||||
|
secondSamples []*secondSample
|
||||||
|
// minute percentiles. We track 24 * 60 maximum samples.
|
||||||
|
minuteSamples *SamplesBuffer
|
||||||
|
// latest derived minute, hour, and day stats. Updated every minute.
|
||||||
|
derivedStats info.DerivedStats // Guarded by dataLock.
|
||||||
|
dataLock sync.RWMutex
|
||||||
|
}
|
||||||
|
|
||||||
|
// Adds a new seconds sample.
|
||||||
|
// If enough seconds samples are collected, a minute sample is generated and derived
|
||||||
|
// stats are updated.
|
||||||
|
func (s *StatsSummary) AddSample(stat info.ContainerStats) error {
|
||||||
|
sample := secondSample{}
|
||||||
|
if s.available.Cpu {
|
||||||
|
sample.Cpu = stat.Cpu.Usage.Total
|
||||||
|
}
|
||||||
|
if s.available.Memory {
|
||||||
|
sample.Memory = stat.Memory.WorkingSet
|
||||||
|
}
|
||||||
|
s.secondSamples = append(s.secondSamples, &sample)
|
||||||
|
// TODO(jnagal): Use 'available' to avoid unnecessary computation.
|
||||||
|
if len(s.secondSamples) == 60 {
|
||||||
|
// Make a minute stat.
|
||||||
|
minuteSample := GetMinutePercentiles(s.secondSamples)
|
||||||
|
// clear seconds samples.
|
||||||
|
s.secondSamples = s.secondSamples[:0]
|
||||||
|
s.minuteSamples.Add(minuteSample)
|
||||||
|
err := s.updateDerivedStats()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate new derived stats based on current minute stats samples.
|
||||||
|
func (s *StatsSummary) updateDerivedStats() error {
|
||||||
|
derived := info.DerivedStats{}
|
||||||
|
derived.Timestamp = time.Now()
|
||||||
|
minuteSamples := s.minuteSamples.RecentStats(1)
|
||||||
|
if len(minuteSamples) != 1 {
|
||||||
|
return fmt.Errorf("failed to retrieve minute stats")
|
||||||
|
}
|
||||||
|
derived.MinuteUsage = *minuteSamples[0]
|
||||||
|
hourUsage, err := s.getDerivedUsage(60)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to compute hour stats: %v", err)
|
||||||
|
}
|
||||||
|
dayUsage, err := s.getDerivedUsage(60 * 24)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to compute day usage: %v", err)
|
||||||
|
}
|
||||||
|
derived.HourUsage = hourUsage
|
||||||
|
derived.DayUsage = dayUsage
|
||||||
|
|
||||||
|
s.dataLock.Lock()
|
||||||
|
defer s.dataLock.Unlock()
|
||||||
|
s.derivedStats = derived
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// helper method to get hour and daily derived stats
|
||||||
|
func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) {
|
||||||
|
if n < 1 {
|
||||||
|
return info.Usage{}, fmt.Errorf("invalid number of samples requested: %d", n)
|
||||||
|
}
|
||||||
|
samples := s.minuteSamples.RecentStats(n)
|
||||||
|
numSamples := len(samples)
|
||||||
|
if numSamples < 1 {
|
||||||
|
return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats.")
|
||||||
|
}
|
||||||
|
// We generate derived stats even with partial data.
|
||||||
|
usage := GetDerivedPercentiles(samples)
|
||||||
|
// Assumes we have equally placed minute samples.
|
||||||
|
usage.PercentComplete = int32(numSamples / n)
|
||||||
|
return usage, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the latest calculated derived stats.
|
||||||
|
func (s *StatsSummary) DerivedStats() (info.DerivedStats, error) {
|
||||||
|
s.dataLock.RLock()
|
||||||
|
defer s.dataLock.RUnlock()
|
||||||
|
|
||||||
|
return s.derivedStats, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func New(spec info.ContainerSpec) (*StatsSummary, error) {
|
||||||
|
summary := StatsSummary{}
|
||||||
|
if spec.HasCpu {
|
||||||
|
summary.available.Cpu = true
|
||||||
|
}
|
||||||
|
if spec.HasMemory {
|
||||||
|
summary.available.Memory = true
|
||||||
|
}
|
||||||
|
if !summary.available.Cpu && !summary.available.Memory {
|
||||||
|
return nil, fmt.Errorf("none of the resources are being tracked.")
|
||||||
|
}
|
||||||
|
summary.minuteSamples = NewSamplesBuffer(60 /* one hour */)
|
||||||
|
return &summary, nil
|
||||||
|
}
|
@ -1,135 +0,0 @@
|
|||||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
package utils
|
|
||||||
|
|
||||||
import (
|
|
||||||
"math"
|
|
||||||
"sort"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/golang/glog"
|
|
||||||
"github.com/google/cadvisor/info"
|
|
||||||
)
|
|
||||||
|
|
||||||
const milliSecondsToNanoSeconds = 1000000
|
|
||||||
const secondsToMilliSeconds = 1000
|
|
||||||
|
|
||||||
type uint64Slice []uint64
|
|
||||||
|
|
||||||
func (a uint64Slice) Len() int { return len(a) }
|
|
||||||
func (a uint64Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
||||||
func (a uint64Slice) Less(i, j int) bool { return a[i] < a[j] }
|
|
||||||
|
|
||||||
// TODO(rjnagal): Move out when we update API.
|
|
||||||
type Percentiles struct {
|
|
||||||
// Average over the collected sample.
|
|
||||||
Mean uint64 `json:"mean"`
|
|
||||||
// Max seen over the collected sample.
|
|
||||||
Max uint64 `json:"max"`
|
|
||||||
// 90th percentile over the collected sample.
|
|
||||||
Ninety uint64 `json:"ninety"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get 90th percentile of the provided samples. Round to integer.
|
|
||||||
func (self uint64Slice) Get90Percentile() uint64 {
|
|
||||||
count := self.Len()
|
|
||||||
if count == 0 {
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
sort.Sort(self)
|
|
||||||
n := float64(0.9 * (float64(count) + 1))
|
|
||||||
idx, frac := math.Modf(n)
|
|
||||||
index := int(idx)
|
|
||||||
percentile := float64(self[index-1])
|
|
||||||
if index > 1 || index < count {
|
|
||||||
percentile += frac * float64(self[index]-self[index-1])
|
|
||||||
}
|
|
||||||
return uint64(percentile)
|
|
||||||
}
|
|
||||||
|
|
||||||
type Mean struct {
|
|
||||||
// current count.
|
|
||||||
count uint64
|
|
||||||
// current mean.
|
|
||||||
Mean float64
|
|
||||||
}
|
|
||||||
|
|
||||||
func (self *Mean) Add(value uint64) {
|
|
||||||
self.count++
|
|
||||||
if self.count == 1 {
|
|
||||||
self.Mean = float64(value)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
c := float64(self.count)
|
|
||||||
v := float64(value)
|
|
||||||
self.Mean = (self.Mean*(c-1) + v) / c
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns cpu and memory usage percentiles.
|
|
||||||
func GetPercentiles(stats []*info.ContainerStats) (Percentiles, Percentiles) {
|
|
||||||
lastCpu := uint64(0)
|
|
||||||
lastTime := time.Time{}
|
|
||||||
memorySamples := make(uint64Slice, 0, len(stats))
|
|
||||||
cpuSamples := make(uint64Slice, 0, len(stats)-1)
|
|
||||||
numSamples := 0
|
|
||||||
memoryMean := Mean{count: 0, Mean: 0}
|
|
||||||
cpuMean := Mean{count: 0, Mean: 0}
|
|
||||||
memoryPercentiles := Percentiles{}
|
|
||||||
cpuPercentiles := Percentiles{}
|
|
||||||
for _, stat := range stats {
|
|
||||||
var elapsed int64
|
|
||||||
time := stat.Timestamp
|
|
||||||
if !lastTime.IsZero() {
|
|
||||||
elapsed = time.UnixNano() - lastTime.UnixNano()
|
|
||||||
if elapsed < 10*milliSecondsToNanoSeconds {
|
|
||||||
glog.Infof("Elapsed time too small: %d ns: time now %s last %s", elapsed, time.String(), lastTime.String())
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
numSamples++
|
|
||||||
cpuNs := stat.Cpu.Usage.Total
|
|
||||||
// Ignore actual usage and only focus on working set.
|
|
||||||
memory := stat.Memory.WorkingSet
|
|
||||||
if memory > memoryPercentiles.Max {
|
|
||||||
memoryPercentiles.Max = memory
|
|
||||||
}
|
|
||||||
glog.V(2).Infof("Read sample: cpu %d, memory %d", cpuNs, memory)
|
|
||||||
memoryMean.Add(memory)
|
|
||||||
memorySamples = append(memorySamples, memory)
|
|
||||||
if lastTime.IsZero() {
|
|
||||||
lastCpu = cpuNs
|
|
||||||
lastTime = time
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
cpuRate := (cpuNs - lastCpu) * secondsToMilliSeconds / uint64(elapsed)
|
|
||||||
if cpuRate < 0 {
|
|
||||||
glog.Infof("cpu rate too small: %f ns", cpuRate)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
glog.V(2).Infof("Adding cpu rate sample : %d", cpuRate)
|
|
||||||
lastCpu = cpuNs
|
|
||||||
lastTime = time
|
|
||||||
cpuSamples = append(cpuSamples, cpuRate)
|
|
||||||
if cpuRate > cpuPercentiles.Max {
|
|
||||||
cpuPercentiles.Max = cpuRate
|
|
||||||
}
|
|
||||||
cpuMean.Add(cpuRate)
|
|
||||||
}
|
|
||||||
cpuPercentiles.Mean = uint64(cpuMean.Mean)
|
|
||||||
memoryPercentiles.Mean = uint64(memoryMean.Mean)
|
|
||||||
cpuPercentiles.Ninety = cpuSamples.Get90Percentile()
|
|
||||||
memoryPercentiles.Ninety = memorySamples.Get90Percentile()
|
|
||||||
return cpuPercentiles, memoryPercentiles
|
|
||||||
}
|
|
@ -1,144 +0,0 @@
|
|||||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
//
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
|
|
||||||
package utils
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/google/cadvisor/info"
|
|
||||||
)
|
|
||||||
|
|
||||||
const Nanosecond = 1000000000
|
|
||||||
|
|
||||||
func Test90Percentile(t *testing.T) {
|
|
||||||
N := 100
|
|
||||||
stats := make(uint64Slice, 0, N)
|
|
||||||
for i := N; i > 0; i-- {
|
|
||||||
stats = append(stats, uint64(i))
|
|
||||||
}
|
|
||||||
p := stats.Get90Percentile()
|
|
||||||
if p != 90 {
|
|
||||||
t.Errorf("90th percentile is %d, should be 90.", p)
|
|
||||||
}
|
|
||||||
// 90p should be between 94 and 95. Promoted to 95.
|
|
||||||
N = 105
|
|
||||||
for i := 101; i <= N; i++ {
|
|
||||||
stats = append(stats, uint64(i))
|
|
||||||
}
|
|
||||||
p = stats.Get90Percentile()
|
|
||||||
if p != 95 {
|
|
||||||
t.Errorf("90th percentile is %d, should be 95.", p)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestMean(t *testing.T) {
|
|
||||||
var i, N uint64
|
|
||||||
N = 100
|
|
||||||
mean := Mean{count: 0, Mean: 0}
|
|
||||||
for i = 1; i < N; i++ {
|
|
||||||
mean.Add(i)
|
|
||||||
}
|
|
||||||
if mean.Mean != 50.0 {
|
|
||||||
t.Errorf("Mean is %f, should be 50.0", mean.Mean)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAggregates(t *testing.T) {
|
|
||||||
N := uint64(100)
|
|
||||||
var i uint64
|
|
||||||
ct := time.Now()
|
|
||||||
stats := make([]*info.ContainerStats, 0, N)
|
|
||||||
for i = 1; i < N; i++ {
|
|
||||||
s := &info.ContainerStats{
|
|
||||||
Cpu: info.CpuStats{},
|
|
||||||
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
|
||||||
Memory: info.MemoryStats{
|
|
||||||
// Memory grows by a KB every second.
|
|
||||||
WorkingSet: i * 1024,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
// cpu rate is 1 s/s
|
|
||||||
s.Cpu.Usage.Total = i * Nanosecond
|
|
||||||
stats = append(stats, s)
|
|
||||||
}
|
|
||||||
cpu, mem := GetPercentiles(stats)
|
|
||||||
// Cpu mean, max, and 90p should all be 1000 ms/s.
|
|
||||||
cpuExpected := Percentiles{
|
|
||||||
Mean: 1000,
|
|
||||||
Max: 1000,
|
|
||||||
Ninety: 1000,
|
|
||||||
}
|
|
||||||
if cpu != cpuExpected {
|
|
||||||
t.Errorf("cpu stats are %+v. Expected %+v", cpu, cpuExpected)
|
|
||||||
}
|
|
||||||
memExpected := Percentiles{
|
|
||||||
Mean: 50 * 1024,
|
|
||||||
Max: 99 * 1024,
|
|
||||||
Ninety: 90 * 1024,
|
|
||||||
}
|
|
||||||
if mem != memExpected {
|
|
||||||
t.Errorf("memory stats are mean %+v. Expected %+v", mem, memExpected)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
func TestSamplesCloseInTimeIgnored(t *testing.T) {
|
|
||||||
N := uint64(100)
|
|
||||||
var i uint64
|
|
||||||
ct := time.Now()
|
|
||||||
stats := make([]*info.ContainerStats, 0, N*2)
|
|
||||||
for i = 1; i < N; i++ {
|
|
||||||
s1 := &info.ContainerStats{
|
|
||||||
Cpu: info.CpuStats{},
|
|
||||||
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
|
||||||
Memory: info.MemoryStats{
|
|
||||||
// Memory grows by a KB every second.
|
|
||||||
WorkingSet: i * 1024,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
// cpu rate is 1 s/s
|
|
||||||
s1.Cpu.Usage.Total = i * Nanosecond
|
|
||||||
stats = append(stats, s1)
|
|
||||||
|
|
||||||
// Add another dummy sample too close in time to the last one.
|
|
||||||
s2 := &info.ContainerStats{
|
|
||||||
Cpu: info.CpuStats{},
|
|
||||||
// Add extra millisecond.
|
|
||||||
Timestamp: ct.Add(time.Duration(i) * time.Second).Add(time.Duration(1) * time.Millisecond),
|
|
||||||
Memory: info.MemoryStats{
|
|
||||||
WorkingSet: i * 1024 * 1024,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
s2.Cpu.Usage.Total = i * 100 * Nanosecond
|
|
||||||
stats = append(stats, s2)
|
|
||||||
}
|
|
||||||
cpu, mem := GetPercentiles(stats)
|
|
||||||
// Cpu mean, max, and 90p should all be 1000 ms/s. All high-value samples are discarded.
|
|
||||||
cpuExpected := Percentiles{
|
|
||||||
Mean: 1000,
|
|
||||||
Max: 1000,
|
|
||||||
Ninety: 1000,
|
|
||||||
}
|
|
||||||
if cpu != cpuExpected {
|
|
||||||
t.Errorf("cpu stats are %+v. Expected %+v", cpu, cpuExpected)
|
|
||||||
}
|
|
||||||
memExpected := Percentiles{
|
|
||||||
Mean: 50 * 1024,
|
|
||||||
Max: 99 * 1024,
|
|
||||||
Ninety: 90 * 1024,
|
|
||||||
}
|
|
||||||
if mem != memExpected {
|
|
||||||
t.Errorf("memory stats are mean %+v. Expected %+v", mem, memExpected)
|
|
||||||
}
|
|
||||||
}
|
|
Loading…
Reference in New Issue
Block a user