Merge pull request #491 from rjnagal/summary
WIP: Add utility to calculate derived stats.
This commit is contained in:
commit
48c41ba273
74
summary/buffer.go
Normal file
74
summary/buffer.go
Normal file
@ -0,0 +1,74 @@
|
||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package summary
|
||||
|
||||
import (
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
// Manages a buffer of usage samples.
|
||||
// This is similar to stats buffer in storage/memory.
|
||||
// The main difference is that we do not pre-allocate the buffer as most containers
|
||||
// won't live that long.
|
||||
type SamplesBuffer struct {
|
||||
// list of collected samples.
|
||||
samples []info.Usage
|
||||
// maximum size this buffer can grow to.
|
||||
maxSize int
|
||||
// index for the latest sample.
|
||||
index int
|
||||
}
|
||||
|
||||
// Initializes an empty buffer.
|
||||
func NewSamplesBuffer(size int) *SamplesBuffer {
|
||||
return &SamplesBuffer{
|
||||
index: -1,
|
||||
maxSize: size,
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the current number of samples in the buffer.
|
||||
func (s *SamplesBuffer) Size() int {
|
||||
return len(s.samples)
|
||||
}
|
||||
|
||||
// Add an element to the buffer. Oldest one is overwritten if required.
|
||||
func (s *SamplesBuffer) Add(stat info.Usage) {
|
||||
if len(s.samples) < s.maxSize {
|
||||
s.samples = append(s.samples, stat)
|
||||
s.index++
|
||||
return
|
||||
}
|
||||
s.index = (s.index + 1) % s.maxSize
|
||||
s.samples[s.index] = stat
|
||||
}
|
||||
|
||||
// Returns pointers to the last 'n' stats.
|
||||
func (s *SamplesBuffer) RecentStats(n int) []*info.Usage {
|
||||
if n > len(s.samples) {
|
||||
n = len(s.samples)
|
||||
}
|
||||
start := s.index - (n - 1)
|
||||
if start < 0 {
|
||||
start += len(s.samples)
|
||||
}
|
||||
|
||||
out := make([]*info.Usage, n)
|
||||
for i := 0; i < n; i++ {
|
||||
index := (start + i) % len(s.samples)
|
||||
out[i] = &s.samples[index]
|
||||
}
|
||||
return out
|
||||
}
|
118
summary/buffer_test.go
Normal file
118
summary/buffer_test.go
Normal file
@ -0,0 +1,118 @@
|
||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package summary
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
func createSample(i uint64) info.Usage {
|
||||
usage := info.Usage{}
|
||||
usage.PercentComplete = 100
|
||||
usage.Cpu = info.Percentiles{
|
||||
Present: true,
|
||||
Mean: i * 50,
|
||||
Max: i * 100,
|
||||
Ninety: i * 90,
|
||||
}
|
||||
usage.Memory = info.Percentiles{
|
||||
Present: true,
|
||||
Mean: i * 50 * 1024,
|
||||
Max: i * 100 * 1024,
|
||||
Ninety: i * 90 * 1024,
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
func expectSize(t *testing.T, b *SamplesBuffer, expectedSize int) {
|
||||
if b.Size() != expectedSize {
|
||||
t.Errorf("Expected size %d, got %d", expectedSize, b.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func expectElements(t *testing.T, b *SamplesBuffer, expected []info.Usage) {
|
||||
|
||||
out := b.RecentStats(b.Size())
|
||||
if len(out) != len(expected) {
|
||||
t.Errorf("Expected %d elements, got %d", len(expected), len(out))
|
||||
}
|
||||
for i, el := range out {
|
||||
if !reflect.DeepEqual(*el, expected[i]) {
|
||||
t.Errorf("Expected elements %v, got %v", expected[i], *el)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEmpty(t *testing.T) {
|
||||
b := NewSamplesBuffer(5)
|
||||
expectSize(t, b, 0)
|
||||
expectElements(t, b, []info.Usage{})
|
||||
}
|
||||
|
||||
func TestAddSingleSample(t *testing.T) {
|
||||
b := NewSamplesBuffer(5)
|
||||
|
||||
sample := createSample(1)
|
||||
b.Add(sample)
|
||||
expectSize(t, b, 1)
|
||||
expectElements(t, b, []info.Usage{sample})
|
||||
}
|
||||
|
||||
func TestFullBuffer(t *testing.T) {
|
||||
maxSize := 5
|
||||
b := NewSamplesBuffer(maxSize)
|
||||
samples := []info.Usage{}
|
||||
for i := 0; i < maxSize; i++ {
|
||||
sample := createSample(uint64(i))
|
||||
samples = append(samples, sample)
|
||||
b.Add(sample)
|
||||
}
|
||||
expectSize(t, b, maxSize)
|
||||
expectElements(t, b, samples)
|
||||
}
|
||||
|
||||
func TestOverflow(t *testing.T) {
|
||||
maxSize := 5
|
||||
overflow := 2
|
||||
b := NewSamplesBuffer(maxSize)
|
||||
samples := []info.Usage{}
|
||||
for i := 0; i < maxSize+overflow; i++ {
|
||||
sample := createSample(uint64(i))
|
||||
if i >= overflow {
|
||||
samples = append(samples, sample)
|
||||
}
|
||||
b.Add(sample)
|
||||
}
|
||||
expectSize(t, b, maxSize)
|
||||
expectElements(t, b, samples)
|
||||
}
|
||||
|
||||
func TestReplaceAll(t *testing.T) {
|
||||
maxSize := 5
|
||||
b := NewSamplesBuffer(maxSize)
|
||||
samples := []info.Usage{}
|
||||
for i := 0; i < maxSize*2; i++ {
|
||||
sample := createSample(uint64(i))
|
||||
if i >= maxSize {
|
||||
samples = append(samples, sample)
|
||||
}
|
||||
b.Add(sample)
|
||||
}
|
||||
expectSize(t, b, maxSize)
|
||||
expectElements(t, b, samples)
|
||||
}
|
183
summary/percentiles.go
Normal file
183
summary/percentiles.go
Normal file
@ -0,0 +1,183 @@
|
||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Utility methods to calculate percentiles.
|
||||
|
||||
package summary
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
const secondsToMilliSeconds = 1000
|
||||
const milliSecondsToNanoSeconds = 1000000
|
||||
const secondsToNanoSeconds = secondsToMilliSeconds * milliSecondsToNanoSeconds
|
||||
|
||||
type uint64Slice []uint64
|
||||
|
||||
func (a uint64Slice) Len() int { return len(a) }
|
||||
func (a uint64Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a uint64Slice) Less(i, j int) bool { return a[i] < a[j] }
|
||||
|
||||
// Get 90th percentile of the provided samples. Round to integer.
|
||||
func (self uint64Slice) Get90Percentile() uint64 {
|
||||
count := self.Len()
|
||||
if count == 0 {
|
||||
return 0
|
||||
}
|
||||
sort.Sort(self)
|
||||
n := float64(0.9 * (float64(count) + 1))
|
||||
idx, frac := math.Modf(n)
|
||||
index := int(idx)
|
||||
percentile := float64(self[index-1])
|
||||
if index > 1 || index < count {
|
||||
percentile += frac * float64(self[index]-self[index-1])
|
||||
}
|
||||
return uint64(percentile)
|
||||
}
|
||||
|
||||
type mean struct {
|
||||
// current count.
|
||||
count uint64
|
||||
// current mean.
|
||||
Mean float64
|
||||
}
|
||||
|
||||
func (self *mean) Add(value uint64) {
|
||||
self.count++
|
||||
if self.count == 1 {
|
||||
self.Mean = float64(value)
|
||||
return
|
||||
}
|
||||
c := float64(self.count)
|
||||
v := float64(value)
|
||||
self.Mean = (self.Mean*(c-1) + v) / c
|
||||
}
|
||||
|
||||
type resource struct {
|
||||
// list of samples being tracked.
|
||||
samples uint64Slice
|
||||
// average from existing samples.
|
||||
mean mean
|
||||
// maximum value seen so far in the added samples.
|
||||
max uint64
|
||||
}
|
||||
|
||||
// Adds a new percentile sample.
|
||||
func (self *resource) Add(p info.Percentiles) {
|
||||
if !p.Present {
|
||||
return
|
||||
}
|
||||
if p.Max > self.max {
|
||||
self.max = p.Max
|
||||
}
|
||||
self.mean.Add(p.Mean)
|
||||
// Selecting 90p of 90p :(
|
||||
self.samples = append(self.samples, p.Ninety)
|
||||
}
|
||||
|
||||
// Add a single sample. Internally, we convert it to a fake percentile sample.
|
||||
func (self *resource) AddSample(val uint64) {
|
||||
sample := info.Percentiles{
|
||||
Present: true,
|
||||
Mean: val,
|
||||
Max: val,
|
||||
Ninety: val,
|
||||
}
|
||||
self.Add(sample)
|
||||
}
|
||||
|
||||
// Get max, average, and 90p from existing samples.
|
||||
func (self *resource) GetPercentile() info.Percentiles {
|
||||
p := info.Percentiles{}
|
||||
p.Mean = uint64(self.mean.Mean)
|
||||
p.Max = self.max
|
||||
p.Ninety = self.samples.Get90Percentile()
|
||||
p.Present = true
|
||||
return p
|
||||
}
|
||||
|
||||
func NewResource(size int) *resource {
|
||||
return &resource{
|
||||
samples: make(uint64Slice, 0, size),
|
||||
mean: mean{count: 0, Mean: 0},
|
||||
}
|
||||
}
|
||||
|
||||
// Return aggregated percentiles from the provided percentile samples.
|
||||
func GetDerivedPercentiles(stats []*info.Usage) info.Usage {
|
||||
cpu := NewResource(len(stats))
|
||||
memory := NewResource(len(stats))
|
||||
for _, stat := range stats {
|
||||
cpu.Add(stat.Cpu)
|
||||
memory.Add(stat.Memory)
|
||||
}
|
||||
usage := info.Usage{}
|
||||
usage.Cpu = cpu.GetPercentile()
|
||||
usage.Memory = memory.GetPercentile()
|
||||
return usage
|
||||
}
|
||||
|
||||
// Calculate part of a minute this sample set represent.
|
||||
func getPercentComplete(stats []*secondSample) (percent int32) {
|
||||
numSamples := len(stats)
|
||||
if numSamples > 1 {
|
||||
percent = 100
|
||||
timeRange := stats[numSamples-1].Timestamp.Sub(stats[0].Timestamp).Nanoseconds()
|
||||
// allow some slack
|
||||
if timeRange < 58*secondsToNanoSeconds {
|
||||
percent = int32((timeRange * 100) / 60 * secondsToNanoSeconds)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Returns a percentile sample for a minute by aggregating seconds samples.
|
||||
func GetMinutePercentiles(stats []*secondSample) info.Usage {
|
||||
lastSample := secondSample{}
|
||||
cpu := NewResource(len(stats))
|
||||
memory := NewResource(len(stats))
|
||||
for _, stat := range stats {
|
||||
var elapsed int64
|
||||
if !lastSample.Timestamp.IsZero() {
|
||||
elapsed = stat.Timestamp.Sub(lastSample.Timestamp).Nanoseconds()
|
||||
if elapsed < 10*milliSecondsToNanoSeconds {
|
||||
glog.Infof("Elapsed time too small: %d ns: time now %s last %s", elapsed, stat.Timestamp.String(), lastSample.Timestamp.String())
|
||||
continue
|
||||
}
|
||||
glog.V(2).Infof("Read sample: cpu %d, memory %d", stat.Cpu, memory)
|
||||
cpuRate := (stat.Cpu - lastSample.Cpu) * secondsToMilliSeconds / uint64(elapsed)
|
||||
if cpuRate < 0 {
|
||||
glog.Infof("cpu rate too small: %f ns", cpuRate)
|
||||
continue
|
||||
}
|
||||
glog.V(2).Infof("Adding cpu rate sample : %d", cpuRate)
|
||||
cpu.AddSample(cpuRate)
|
||||
memory.AddSample(stat.Memory)
|
||||
} else {
|
||||
memory.AddSample(stat.Memory)
|
||||
}
|
||||
lastSample = *stat
|
||||
}
|
||||
percent := getPercentComplete(stats)
|
||||
return info.Usage{
|
||||
PercentComplete: percent,
|
||||
Cpu: cpu.GetPercentile(),
|
||||
Memory: memory.GetPercentile(),
|
||||
}
|
||||
}
|
182
summary/percentiles_test.go
Normal file
182
summary/percentiles_test.go
Normal file
@ -0,0 +1,182 @@
|
||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package summary
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
const Nanosecond = 1000000000
|
||||
|
||||
func Test90Percentile(t *testing.T) {
|
||||
N := 100
|
||||
stats := make(uint64Slice, 0, N)
|
||||
for i := N; i > 0; i-- {
|
||||
stats = append(stats, uint64(i))
|
||||
}
|
||||
p := stats.Get90Percentile()
|
||||
if p != 90 {
|
||||
t.Errorf("90th percentile is %d, should be 90.", p)
|
||||
}
|
||||
// 90p should be between 94 and 95. Promoted to 95.
|
||||
N = 105
|
||||
for i := 101; i <= N; i++ {
|
||||
stats = append(stats, uint64(i))
|
||||
}
|
||||
p = stats.Get90Percentile()
|
||||
if p != 95 {
|
||||
t.Errorf("90th percentile is %d, should be 95.", p)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMean(t *testing.T) {
|
||||
var i, N uint64
|
||||
N = 100
|
||||
mean := mean{count: 0, Mean: 0}
|
||||
for i = 1; i < N; i++ {
|
||||
mean.Add(i)
|
||||
}
|
||||
if mean.Mean != 50.0 {
|
||||
t.Errorf("Mean is %f, should be 50.0", mean.Mean)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAggregates(t *testing.T) {
|
||||
N := uint64(100)
|
||||
var i uint64
|
||||
ct := time.Now()
|
||||
stats := make([]*secondSample, 0, N)
|
||||
for i = 1; i < N; i++ {
|
||||
s := &secondSample{
|
||||
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
||||
// cpu rate is 1 s/s
|
||||
Cpu: i * Nanosecond,
|
||||
// Memory grows by a KB every second.
|
||||
Memory: i * 1024,
|
||||
}
|
||||
stats = append(stats, s)
|
||||
}
|
||||
usage := GetMinutePercentiles(stats)
|
||||
// Cpu mean, max, and 90p should all be 1000 ms/s.
|
||||
cpuExpected := info.Percentiles{
|
||||
Present: true,
|
||||
Mean: 1000,
|
||||
Max: 1000,
|
||||
Ninety: 1000,
|
||||
}
|
||||
if usage.Cpu != cpuExpected {
|
||||
t.Errorf("cpu stats are %+v. Expected %+v", usage.Cpu, cpuExpected)
|
||||
}
|
||||
memExpected := info.Percentiles{
|
||||
Present: true,
|
||||
Mean: 50 * 1024,
|
||||
Max: 99 * 1024,
|
||||
Ninety: 90 * 1024,
|
||||
}
|
||||
if usage.Memory != memExpected {
|
||||
t.Errorf("memory stats are mean %+v. Expected %+v", usage.Memory, memExpected)
|
||||
}
|
||||
}
|
||||
func TestSamplesCloseInTimeIgnored(t *testing.T) {
|
||||
N := uint64(100)
|
||||
var i uint64
|
||||
ct := time.Now()
|
||||
stats := make([]*secondSample, 0, N*2)
|
||||
for i = 1; i < N; i++ {
|
||||
s1 := &secondSample{
|
||||
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
||||
// cpu rate is 1 s/s
|
||||
Cpu: i * Nanosecond,
|
||||
// Memory grows by a KB every second.
|
||||
Memory: i * 1024,
|
||||
}
|
||||
stats = append(stats, s1)
|
||||
|
||||
// Add another dummy sample too close in time to the last one.
|
||||
s2 := &secondSample{
|
||||
// Add extra millisecond.
|
||||
Timestamp: ct.Add(time.Duration(i) * time.Second).Add(time.Duration(1) * time.Millisecond),
|
||||
Cpu: i * 100 * Nanosecond,
|
||||
Memory: i * 1024 * 1024,
|
||||
}
|
||||
stats = append(stats, s2)
|
||||
}
|
||||
usage := GetMinutePercentiles(stats)
|
||||
// Cpu mean, max, and 90p should all be 1000 ms/s. All high-value samples are discarded.
|
||||
cpuExpected := info.Percentiles{
|
||||
Present: true,
|
||||
Mean: 1000,
|
||||
Max: 1000,
|
||||
Ninety: 1000,
|
||||
}
|
||||
if usage.Cpu != cpuExpected {
|
||||
t.Errorf("cpu stats are %+v. Expected %+v", usage.Cpu, cpuExpected)
|
||||
}
|
||||
memExpected := info.Percentiles{
|
||||
Present: true,
|
||||
Mean: 50 * 1024,
|
||||
Max: 99 * 1024,
|
||||
Ninety: 90 * 1024,
|
||||
}
|
||||
if usage.Memory != memExpected {
|
||||
t.Errorf("memory stats are mean %+v. Expected %+v", usage.Memory, memExpected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDerivedStats(t *testing.T) {
|
||||
N := uint64(100)
|
||||
var i uint64
|
||||
stats := make([]*info.Usage, 0, N)
|
||||
for i = 1; i < N; i++ {
|
||||
s := &info.Usage{
|
||||
PercentComplete: 100,
|
||||
Cpu: info.Percentiles{
|
||||
Present: true,
|
||||
Mean: i * Nanosecond,
|
||||
Max: i * Nanosecond,
|
||||
Ninety: i * Nanosecond,
|
||||
},
|
||||
Memory: info.Percentiles{
|
||||
Present: true,
|
||||
Mean: i * 1024,
|
||||
Max: i * 1024,
|
||||
Ninety: i * 1024,
|
||||
},
|
||||
}
|
||||
stats = append(stats, s)
|
||||
}
|
||||
usage := GetDerivedPercentiles(stats)
|
||||
cpuExpected := info.Percentiles{
|
||||
Present: true,
|
||||
Mean: 50 * Nanosecond,
|
||||
Max: 99 * Nanosecond,
|
||||
Ninety: 90 * Nanosecond,
|
||||
}
|
||||
if usage.Cpu != cpuExpected {
|
||||
t.Errorf("cpu stats are %+v. Expected %+v", usage.Cpu, cpuExpected)
|
||||
}
|
||||
memExpected := info.Percentiles{
|
||||
Present: true,
|
||||
Mean: 50 * 1024,
|
||||
Max: 99 * 1024,
|
||||
Ninety: 90 * 1024,
|
||||
}
|
||||
if usage.Memory != memExpected {
|
||||
t.Errorf("memory stats are mean %+v. Expected %+v", usage.Memory, memExpected)
|
||||
}
|
||||
}
|
147
summary/summary.go
Normal file
147
summary/summary.go
Normal file
@ -0,0 +1,147 @@
|
||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Maintains the summary of aggregated minute, hour, and day stats.
|
||||
// For a container running for more than a day, amount of tracked data can go up to
|
||||
// 40 KB when cpu and memory are tracked. We'll start by enabling collection for the
|
||||
// node, followed by docker, and then all containers as we understand the usage pattern
|
||||
// better
|
||||
// TODO(rjnagal): Optimize the size if we start running it for every container.
|
||||
package summary
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
// Usage fields we track for generating percentiles.
|
||||
type secondSample struct {
|
||||
Timestamp time.Time // time when the sample was recorded.
|
||||
Cpu uint64 // cpu usage
|
||||
Memory uint64 // memory usage
|
||||
}
|
||||
|
||||
type availableResources struct {
|
||||
Cpu bool
|
||||
Memory bool
|
||||
}
|
||||
|
||||
type StatsSummary struct {
|
||||
// Resources being tracked for this container.
|
||||
available availableResources
|
||||
// list of second samples. The list is cleared when a new minute samples is generated.
|
||||
secondSamples []*secondSample
|
||||
// minute percentiles. We track 24 * 60 maximum samples.
|
||||
minuteSamples *SamplesBuffer
|
||||
// latest derived minute, hour, and day stats. Updated every minute.
|
||||
derivedStats info.DerivedStats // Guarded by dataLock.
|
||||
dataLock sync.RWMutex
|
||||
}
|
||||
|
||||
// Adds a new seconds sample.
|
||||
// If enough seconds samples are collected, a minute sample is generated and derived
|
||||
// stats are updated.
|
||||
func (s *StatsSummary) AddSample(stat info.ContainerStats) error {
|
||||
sample := secondSample{}
|
||||
if s.available.Cpu {
|
||||
sample.Cpu = stat.Cpu.Usage.Total
|
||||
}
|
||||
if s.available.Memory {
|
||||
sample.Memory = stat.Memory.WorkingSet
|
||||
}
|
||||
s.secondSamples = append(s.secondSamples, &sample)
|
||||
// TODO(jnagal): Use 'available' to avoid unnecessary computation.
|
||||
if len(s.secondSamples) == 60 {
|
||||
// Make a minute stat.
|
||||
minuteSample := GetMinutePercentiles(s.secondSamples)
|
||||
// clear seconds samples.
|
||||
s.secondSamples = s.secondSamples[:0]
|
||||
s.minuteSamples.Add(minuteSample)
|
||||
err := s.updateDerivedStats()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Generate new derived stats based on current minute stats samples.
|
||||
func (s *StatsSummary) updateDerivedStats() error {
|
||||
derived := info.DerivedStats{}
|
||||
derived.Timestamp = time.Now()
|
||||
minuteSamples := s.minuteSamples.RecentStats(1)
|
||||
if len(minuteSamples) != 1 {
|
||||
return fmt.Errorf("failed to retrieve minute stats")
|
||||
}
|
||||
derived.MinuteUsage = *minuteSamples[0]
|
||||
hourUsage, err := s.getDerivedUsage(60)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to compute hour stats: %v", err)
|
||||
}
|
||||
dayUsage, err := s.getDerivedUsage(60 * 24)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to compute day usage: %v", err)
|
||||
}
|
||||
derived.HourUsage = hourUsage
|
||||
derived.DayUsage = dayUsage
|
||||
|
||||
s.dataLock.Lock()
|
||||
defer s.dataLock.Unlock()
|
||||
s.derivedStats = derived
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// helper method to get hour and daily derived stats
|
||||
func (s *StatsSummary) getDerivedUsage(n int) (info.Usage, error) {
|
||||
if n < 1 {
|
||||
return info.Usage{}, fmt.Errorf("invalid number of samples requested: %d", n)
|
||||
}
|
||||
samples := s.minuteSamples.RecentStats(n)
|
||||
numSamples := len(samples)
|
||||
if numSamples < 1 {
|
||||
return info.Usage{}, fmt.Errorf("failed to retrieve any minute stats.")
|
||||
}
|
||||
// We generate derived stats even with partial data.
|
||||
usage := GetDerivedPercentiles(samples)
|
||||
// Assumes we have equally placed minute samples.
|
||||
usage.PercentComplete = int32(numSamples / n)
|
||||
return usage, nil
|
||||
}
|
||||
|
||||
// Return the latest calculated derived stats.
|
||||
func (s *StatsSummary) DerivedStats() (info.DerivedStats, error) {
|
||||
s.dataLock.RLock()
|
||||
defer s.dataLock.RUnlock()
|
||||
|
||||
return s.derivedStats, nil
|
||||
}
|
||||
|
||||
func New(spec info.ContainerSpec) (*StatsSummary, error) {
|
||||
summary := StatsSummary{}
|
||||
if spec.HasCpu {
|
||||
summary.available.Cpu = true
|
||||
}
|
||||
if spec.HasMemory {
|
||||
summary.available.Memory = true
|
||||
}
|
||||
if !summary.available.Cpu && !summary.available.Memory {
|
||||
return nil, fmt.Errorf("none of the resources are being tracked.")
|
||||
}
|
||||
summary.minuteSamples = NewSamplesBuffer(60 /* one hour */)
|
||||
return &summary, nil
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"math"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/golang/glog"
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
const milliSecondsToNanoSeconds = 1000000
|
||||
const secondsToMilliSeconds = 1000
|
||||
|
||||
type uint64Slice []uint64
|
||||
|
||||
func (a uint64Slice) Len() int { return len(a) }
|
||||
func (a uint64Slice) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||
func (a uint64Slice) Less(i, j int) bool { return a[i] < a[j] }
|
||||
|
||||
// TODO(rjnagal): Move out when we update API.
|
||||
type Percentiles struct {
|
||||
// Average over the collected sample.
|
||||
Mean uint64 `json:"mean"`
|
||||
// Max seen over the collected sample.
|
||||
Max uint64 `json:"max"`
|
||||
// 90th percentile over the collected sample.
|
||||
Ninety uint64 `json:"ninety"`
|
||||
}
|
||||
|
||||
// Get 90th percentile of the provided samples. Round to integer.
|
||||
func (self uint64Slice) Get90Percentile() uint64 {
|
||||
count := self.Len()
|
||||
if count == 0 {
|
||||
return 0
|
||||
}
|
||||
sort.Sort(self)
|
||||
n := float64(0.9 * (float64(count) + 1))
|
||||
idx, frac := math.Modf(n)
|
||||
index := int(idx)
|
||||
percentile := float64(self[index-1])
|
||||
if index > 1 || index < count {
|
||||
percentile += frac * float64(self[index]-self[index-1])
|
||||
}
|
||||
return uint64(percentile)
|
||||
}
|
||||
|
||||
type Mean struct {
|
||||
// current count.
|
||||
count uint64
|
||||
// current mean.
|
||||
Mean float64
|
||||
}
|
||||
|
||||
func (self *Mean) Add(value uint64) {
|
||||
self.count++
|
||||
if self.count == 1 {
|
||||
self.Mean = float64(value)
|
||||
return
|
||||
}
|
||||
c := float64(self.count)
|
||||
v := float64(value)
|
||||
self.Mean = (self.Mean*(c-1) + v) / c
|
||||
}
|
||||
|
||||
// Returns cpu and memory usage percentiles.
|
||||
func GetPercentiles(stats []*info.ContainerStats) (Percentiles, Percentiles) {
|
||||
lastCpu := uint64(0)
|
||||
lastTime := time.Time{}
|
||||
memorySamples := make(uint64Slice, 0, len(stats))
|
||||
cpuSamples := make(uint64Slice, 0, len(stats)-1)
|
||||
numSamples := 0
|
||||
memoryMean := Mean{count: 0, Mean: 0}
|
||||
cpuMean := Mean{count: 0, Mean: 0}
|
||||
memoryPercentiles := Percentiles{}
|
||||
cpuPercentiles := Percentiles{}
|
||||
for _, stat := range stats {
|
||||
var elapsed int64
|
||||
time := stat.Timestamp
|
||||
if !lastTime.IsZero() {
|
||||
elapsed = time.UnixNano() - lastTime.UnixNano()
|
||||
if elapsed < 10*milliSecondsToNanoSeconds {
|
||||
glog.Infof("Elapsed time too small: %d ns: time now %s last %s", elapsed, time.String(), lastTime.String())
|
||||
continue
|
||||
}
|
||||
}
|
||||
numSamples++
|
||||
cpuNs := stat.Cpu.Usage.Total
|
||||
// Ignore actual usage and only focus on working set.
|
||||
memory := stat.Memory.WorkingSet
|
||||
if memory > memoryPercentiles.Max {
|
||||
memoryPercentiles.Max = memory
|
||||
}
|
||||
glog.V(2).Infof("Read sample: cpu %d, memory %d", cpuNs, memory)
|
||||
memoryMean.Add(memory)
|
||||
memorySamples = append(memorySamples, memory)
|
||||
if lastTime.IsZero() {
|
||||
lastCpu = cpuNs
|
||||
lastTime = time
|
||||
continue
|
||||
}
|
||||
cpuRate := (cpuNs - lastCpu) * secondsToMilliSeconds / uint64(elapsed)
|
||||
if cpuRate < 0 {
|
||||
glog.Infof("cpu rate too small: %f ns", cpuRate)
|
||||
continue
|
||||
}
|
||||
glog.V(2).Infof("Adding cpu rate sample : %d", cpuRate)
|
||||
lastCpu = cpuNs
|
||||
lastTime = time
|
||||
cpuSamples = append(cpuSamples, cpuRate)
|
||||
if cpuRate > cpuPercentiles.Max {
|
||||
cpuPercentiles.Max = cpuRate
|
||||
}
|
||||
cpuMean.Add(cpuRate)
|
||||
}
|
||||
cpuPercentiles.Mean = uint64(cpuMean.Mean)
|
||||
memoryPercentiles.Mean = uint64(memoryMean.Mean)
|
||||
cpuPercentiles.Ninety = cpuSamples.Get90Percentile()
|
||||
memoryPercentiles.Ninety = memorySamples.Get90Percentile()
|
||||
return cpuPercentiles, memoryPercentiles
|
||||
}
|
@ -1,144 +0,0 @@
|
||||
// Copyright 2015 Google Inc. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package utils
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/cadvisor/info"
|
||||
)
|
||||
|
||||
const Nanosecond = 1000000000
|
||||
|
||||
func Test90Percentile(t *testing.T) {
|
||||
N := 100
|
||||
stats := make(uint64Slice, 0, N)
|
||||
for i := N; i > 0; i-- {
|
||||
stats = append(stats, uint64(i))
|
||||
}
|
||||
p := stats.Get90Percentile()
|
||||
if p != 90 {
|
||||
t.Errorf("90th percentile is %d, should be 90.", p)
|
||||
}
|
||||
// 90p should be between 94 and 95. Promoted to 95.
|
||||
N = 105
|
||||
for i := 101; i <= N; i++ {
|
||||
stats = append(stats, uint64(i))
|
||||
}
|
||||
p = stats.Get90Percentile()
|
||||
if p != 95 {
|
||||
t.Errorf("90th percentile is %d, should be 95.", p)
|
||||
}
|
||||
}
|
||||
|
||||
func TestMean(t *testing.T) {
|
||||
var i, N uint64
|
||||
N = 100
|
||||
mean := Mean{count: 0, Mean: 0}
|
||||
for i = 1; i < N; i++ {
|
||||
mean.Add(i)
|
||||
}
|
||||
if mean.Mean != 50.0 {
|
||||
t.Errorf("Mean is %f, should be 50.0", mean.Mean)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAggregates(t *testing.T) {
|
||||
N := uint64(100)
|
||||
var i uint64
|
||||
ct := time.Now()
|
||||
stats := make([]*info.ContainerStats, 0, N)
|
||||
for i = 1; i < N; i++ {
|
||||
s := &info.ContainerStats{
|
||||
Cpu: info.CpuStats{},
|
||||
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
||||
Memory: info.MemoryStats{
|
||||
// Memory grows by a KB every second.
|
||||
WorkingSet: i * 1024,
|
||||
},
|
||||
}
|
||||
// cpu rate is 1 s/s
|
||||
s.Cpu.Usage.Total = i * Nanosecond
|
||||
stats = append(stats, s)
|
||||
}
|
||||
cpu, mem := GetPercentiles(stats)
|
||||
// Cpu mean, max, and 90p should all be 1000 ms/s.
|
||||
cpuExpected := Percentiles{
|
||||
Mean: 1000,
|
||||
Max: 1000,
|
||||
Ninety: 1000,
|
||||
}
|
||||
if cpu != cpuExpected {
|
||||
t.Errorf("cpu stats are %+v. Expected %+v", cpu, cpuExpected)
|
||||
}
|
||||
memExpected := Percentiles{
|
||||
Mean: 50 * 1024,
|
||||
Max: 99 * 1024,
|
||||
Ninety: 90 * 1024,
|
||||
}
|
||||
if mem != memExpected {
|
||||
t.Errorf("memory stats are mean %+v. Expected %+v", mem, memExpected)
|
||||
}
|
||||
}
|
||||
func TestSamplesCloseInTimeIgnored(t *testing.T) {
|
||||
N := uint64(100)
|
||||
var i uint64
|
||||
ct := time.Now()
|
||||
stats := make([]*info.ContainerStats, 0, N*2)
|
||||
for i = 1; i < N; i++ {
|
||||
s1 := &info.ContainerStats{
|
||||
Cpu: info.CpuStats{},
|
||||
Timestamp: ct.Add(time.Duration(i) * time.Second),
|
||||
Memory: info.MemoryStats{
|
||||
// Memory grows by a KB every second.
|
||||
WorkingSet: i * 1024,
|
||||
},
|
||||
}
|
||||
// cpu rate is 1 s/s
|
||||
s1.Cpu.Usage.Total = i * Nanosecond
|
||||
stats = append(stats, s1)
|
||||
|
||||
// Add another dummy sample too close in time to the last one.
|
||||
s2 := &info.ContainerStats{
|
||||
Cpu: info.CpuStats{},
|
||||
// Add extra millisecond.
|
||||
Timestamp: ct.Add(time.Duration(i) * time.Second).Add(time.Duration(1) * time.Millisecond),
|
||||
Memory: info.MemoryStats{
|
||||
WorkingSet: i * 1024 * 1024,
|
||||
},
|
||||
}
|
||||
s2.Cpu.Usage.Total = i * 100 * Nanosecond
|
||||
stats = append(stats, s2)
|
||||
}
|
||||
cpu, mem := GetPercentiles(stats)
|
||||
// Cpu mean, max, and 90p should all be 1000 ms/s. All high-value samples are discarded.
|
||||
cpuExpected := Percentiles{
|
||||
Mean: 1000,
|
||||
Max: 1000,
|
||||
Ninety: 1000,
|
||||
}
|
||||
if cpu != cpuExpected {
|
||||
t.Errorf("cpu stats are %+v. Expected %+v", cpu, cpuExpected)
|
||||
}
|
||||
memExpected := Percentiles{
|
||||
Mean: 50 * 1024,
|
||||
Max: 99 * 1024,
|
||||
Ninety: 90 * 1024,
|
||||
}
|
||||
if mem != memExpected {
|
||||
t.Errorf("memory stats are mean %+v. Expected %+v", mem, memExpected)
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user