505 lines
15 KiB
Go
505 lines
15 KiB
Go
// Copyright 2014 Google Inc. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package influxdb
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/google/cadvisor/info"
|
|
"github.com/google/cadvisor/storage"
|
|
influxdb "github.com/influxdb/influxdb/client"
|
|
)
|
|
|
|
type influxdbStorage struct {
|
|
client *influxdb.Client
|
|
prevStats *info.ContainerStats
|
|
machineName string
|
|
tableName string
|
|
windowLen time.Duration
|
|
}
|
|
|
|
const (
|
|
colTimestamp string = "timestamp"
|
|
colMachineName string = "machine"
|
|
colContainerName string = "container_name"
|
|
colCpuCumulativeUsage string = "cpu_cumulative_usage"
|
|
// Cumulative Cpu Usage in system mode
|
|
colCpuCumulativeUsageSystem string = "cpu_cumulative_usage_system"
|
|
// Cumulative Cpu Usage in user mode
|
|
colCpuCumulativeUsageUser string = "cpu_cumulative_usage_user"
|
|
// Memory Usage
|
|
colMemoryUsage string = "memory_usage"
|
|
// Working set size
|
|
colMemoryWorkingSet string = "memory_working_set"
|
|
// container page fault
|
|
colMemoryContainerPgfault string = "memory_container_pgfault"
|
|
// container major page fault
|
|
colMemoryContainerPgmajfault string = "memory_container_pgmajfault"
|
|
// hierarchical page fault
|
|
colMemoryHierarchicalPgfault string = "memory_hierarchical_pgfault"
|
|
// hierarchical major page fault
|
|
colMemoryHierarchicalPgmajfault string = "memory_hierarchical_pgmajfault"
|
|
// Cumulative per core usage
|
|
colPerCoreCumulativeUsagePrefix string = "per_core_cumulative_usage_core_"
|
|
// Optional: sample duration. Unit: Nanosecond.
|
|
colSampleDuration string = "sample_duration"
|
|
// Optional: Instant cpu usage
|
|
colCpuInstantUsage string = "cpu_instant_usage"
|
|
// Optional: Instant per core usage
|
|
colPerCoreInstantUsagePrefix string = "per_core_instant_usage_core_"
|
|
)
|
|
|
|
func (self *influxdbStorage) containerStatsToValues(
|
|
ref info.ContainerReference,
|
|
stats *info.ContainerStats,
|
|
) (columns []string, values []interface{}) {
|
|
|
|
// Timestamp
|
|
columns = append(columns, colTimestamp)
|
|
values = append(values, stats.Timestamp.Format(time.RFC3339Nano))
|
|
|
|
// Machine name
|
|
columns = append(columns, colMachineName)
|
|
values = append(values, self.machineName)
|
|
|
|
// Container name
|
|
columns = append(columns, colContainerName)
|
|
values = append(values, ref.Name)
|
|
|
|
// Cumulative Cpu Usage
|
|
columns = append(columns, colCpuCumulativeUsage)
|
|
values = append(values, stats.Cpu.Usage.Total)
|
|
|
|
// Cumulative Cpu Usage in system mode
|
|
columns = append(columns, colCpuCumulativeUsageSystem)
|
|
values = append(values, stats.Cpu.Usage.System)
|
|
|
|
// Cumulative Cpu Usage in user mode
|
|
columns = append(columns, colCpuCumulativeUsageUser)
|
|
values = append(values, stats.Cpu.Usage.User)
|
|
|
|
// Memory Usage
|
|
columns = append(columns, colMemoryUsage)
|
|
values = append(values, stats.Memory.Usage)
|
|
|
|
// Working set size
|
|
columns = append(columns, colMemoryWorkingSet)
|
|
values = append(values, stats.Memory.WorkingSet)
|
|
|
|
// container page fault
|
|
columns = append(columns, colMemoryContainerPgfault)
|
|
values = append(values, stats.Memory.ContainerData.Pgfault)
|
|
|
|
// container major page fault
|
|
columns = append(columns, colMemoryContainerPgmajfault)
|
|
values = append(values, stats.Memory.ContainerData.Pgmajfault)
|
|
|
|
// hierarchical page fault
|
|
columns = append(columns, colMemoryHierarchicalPgfault)
|
|
values = append(values, stats.Memory.HierarchicalData.Pgfault)
|
|
|
|
// hierarchical major page fault
|
|
columns = append(columns, colMemoryHierarchicalPgmajfault)
|
|
values = append(values, stats.Memory.HierarchicalData.Pgmajfault)
|
|
|
|
// per cpu cumulative usage
|
|
for i, u := range stats.Cpu.Usage.PerCpu {
|
|
columns = append(columns, fmt.Sprintf("%v%v", colPerCoreCumulativeUsagePrefix, i))
|
|
values = append(values, u)
|
|
}
|
|
|
|
sample, err := info.NewSample(self.prevStats, stats)
|
|
if err != nil || sample == nil {
|
|
return columns, values
|
|
}
|
|
|
|
// Optional: sample duration. Unit: Nanosecond.
|
|
columns = append(columns, colSampleDuration)
|
|
values = append(values, sample.Duration.String())
|
|
|
|
// Optional: Instant cpu usage
|
|
columns = append(columns, colCpuInstantUsage)
|
|
values = append(values, sample.Cpu.Usage)
|
|
|
|
// Optional: Instant per core usage
|
|
for i, u := range sample.Cpu.PerCpuUsage {
|
|
columns = append(columns, fmt.Sprintf("%v%v", colPerCoreInstantUsagePrefix, i))
|
|
values = append(values, u)
|
|
}
|
|
|
|
return columns, values
|
|
}
|
|
|
|
func convertToUint64(v interface{}) (uint64, error) {
|
|
if v == nil {
|
|
return 0, nil
|
|
}
|
|
switch x := v.(type) {
|
|
case uint64:
|
|
return x, nil
|
|
case int:
|
|
if x < 0 {
|
|
return 0, fmt.Errorf("negative value: %v", x)
|
|
}
|
|
return uint64(x), nil
|
|
case int32:
|
|
if x < 0 {
|
|
return 0, fmt.Errorf("negative value: %v", x)
|
|
}
|
|
return uint64(x), nil
|
|
case int64:
|
|
if x < 0 {
|
|
return 0, fmt.Errorf("negative value: %v", x)
|
|
}
|
|
return uint64(x), nil
|
|
case float64:
|
|
if x < 0 {
|
|
return 0, fmt.Errorf("negative value: %v", x)
|
|
}
|
|
return uint64(x), nil
|
|
case uint32:
|
|
return uint64(x), nil
|
|
}
|
|
return 0, fmt.Errorf("Unknown type")
|
|
}
|
|
|
|
func (self *influxdbStorage) valuesToContainerStats(columns []string, values []interface{}) (*info.ContainerStats, error) {
|
|
stats := &info.ContainerStats{
|
|
Cpu: &info.CpuStats{},
|
|
Memory: &info.MemoryStats{},
|
|
}
|
|
perCoreUsage := make(map[int]uint64, 32)
|
|
var err error
|
|
for i, col := range columns {
|
|
v := values[i]
|
|
switch {
|
|
case col == colTimestamp:
|
|
if str, ok := v.(string); ok {
|
|
stats.Timestamp, err = time.Parse(time.RFC3339Nano, str)
|
|
}
|
|
case col == colMachineName:
|
|
if m, ok := v.(string); ok {
|
|
if m != self.machineName {
|
|
return nil, fmt.Errorf("different machine")
|
|
}
|
|
} else {
|
|
return nil, fmt.Errorf("machine name field is not a string: %v", v)
|
|
}
|
|
// Cumulative Cpu Usage
|
|
case col == colCpuCumulativeUsage:
|
|
stats.Cpu.Usage.Total, err = convertToUint64(v)
|
|
// Cumulative Cpu used by the system
|
|
case col == colCpuCumulativeUsageSystem:
|
|
stats.Cpu.Usage.System, err = convertToUint64(v)
|
|
// Cumulative Cpu Usage in user mode
|
|
case col == colCpuCumulativeUsageUser:
|
|
stats.Cpu.Usage.User, err = convertToUint64(v)
|
|
// Memory Usage
|
|
case col == colMemoryUsage:
|
|
stats.Memory.Usage, err = convertToUint64(v)
|
|
// Working set size
|
|
case col == colMemoryWorkingSet:
|
|
stats.Memory.WorkingSet, err = convertToUint64(v)
|
|
// container page fault
|
|
case col == colMemoryContainerPgfault:
|
|
stats.Memory.ContainerData.Pgfault, err = convertToUint64(v)
|
|
// container major page fault
|
|
case col == colMemoryContainerPgmajfault:
|
|
stats.Memory.ContainerData.Pgmajfault, err = convertToUint64(v)
|
|
// hierarchical page fault
|
|
case col == colMemoryHierarchicalPgfault:
|
|
stats.Memory.HierarchicalData.Pgfault, err = convertToUint64(v)
|
|
// hierarchical major page fault
|
|
case col == colMemoryHierarchicalPgmajfault:
|
|
stats.Memory.HierarchicalData.Pgmajfault, err = convertToUint64(v)
|
|
case strings.HasPrefix(col, colPerCoreCumulativeUsagePrefix):
|
|
idxStr := col[len(colPerCoreCumulativeUsagePrefix):]
|
|
idx, err := strconv.Atoi(idxStr)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
perCoreUsage[idx], err = convertToUint64(v)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("column %v has invalid value %v: %v", col, v, err)
|
|
}
|
|
}
|
|
stats.Cpu.Usage.PerCpu = make([]uint64, len(perCoreUsage))
|
|
for idx, usage := range perCoreUsage {
|
|
stats.Cpu.Usage.PerCpu[idx] = usage
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
func (self *influxdbStorage) valuesToContainerSample(columns []string, values []interface{}) (*info.ContainerStatsSample, error) {
|
|
sample := &info.ContainerStatsSample{}
|
|
perCoreUsage := make(map[int]uint64, 32)
|
|
var err error
|
|
for i, col := range columns {
|
|
v := values[i]
|
|
switch {
|
|
case col == colTimestamp:
|
|
if str, ok := v.(string); ok {
|
|
sample.Timestamp, err = time.Parse(time.RFC3339Nano, str)
|
|
}
|
|
case col == colMachineName:
|
|
if m, ok := v.(string); ok {
|
|
if m != self.machineName {
|
|
return nil, fmt.Errorf("different machine")
|
|
}
|
|
} else {
|
|
return nil, fmt.Errorf("machine name field is not a string: %v", v)
|
|
}
|
|
// Memory Usage
|
|
case col == colMemoryUsage:
|
|
sample.Memory.Usage, err = convertToUint64(v)
|
|
// sample duration. Unit: Nanosecond.
|
|
case col == colSampleDuration:
|
|
if v == nil {
|
|
// this record does not have sample_duration, so it's the first stats.
|
|
return nil, nil
|
|
}
|
|
sample.Duration, err = time.ParseDuration(v.(string))
|
|
// Instant cpu usage
|
|
case col == colCpuInstantUsage:
|
|
sample.Cpu.Usage, err = convertToUint64(v)
|
|
case strings.HasPrefix(col, colPerCoreInstantUsagePrefix):
|
|
idxStr := col[len(colPerCoreInstantUsagePrefix):]
|
|
idx, err := strconv.Atoi(idxStr)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
perCoreUsage[idx], err = convertToUint64(v)
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("column %v has invalid value %v: %v", col, v, err)
|
|
}
|
|
}
|
|
sample.Cpu.PerCpuUsage = make([]uint64, len(perCoreUsage))
|
|
for idx, usage := range perCoreUsage {
|
|
sample.Cpu.PerCpuUsage[idx] = usage
|
|
}
|
|
if sample.Duration.Nanoseconds() == 0 {
|
|
return nil, nil
|
|
}
|
|
return sample, nil
|
|
}
|
|
|
|
func (self *influxdbStorage) AddStats(ref info.ContainerReference, stats *info.ContainerStats) error {
|
|
series := &influxdb.Series{
|
|
Name: self.tableName,
|
|
// There's only one point for each stats
|
|
Points: make([][]interface{}, 1),
|
|
}
|
|
if stats == nil || stats.Cpu == nil || stats.Memory == nil {
|
|
return nil
|
|
}
|
|
series.Columns, series.Points[0] = self.containerStatsToValues(ref, stats)
|
|
|
|
self.prevStats = stats.Copy(self.prevStats)
|
|
err := self.client.WriteSeries([]*influxdb.Series{series})
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (self *influxdbStorage) RecentStats(containerName string, numStats int) ([]*info.ContainerStats, error) {
|
|
if numStats == 0 {
|
|
return nil, nil
|
|
}
|
|
// TODO(dengnan): select only columns that we need
|
|
// TODO(dengnan): escape names
|
|
query := fmt.Sprintf("select * from %v where %v='%v' and %v='%v'", self.tableName, colContainerName, containerName, colMachineName, self.machineName)
|
|
if numStats > 0 {
|
|
query = fmt.Sprintf("%v limit %v", query, numStats)
|
|
}
|
|
series, err := self.client.Query(query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
statsList := make([]*info.ContainerStats, 0, len(series))
|
|
// By default, influxDB returns data in time descending order.
|
|
// RecentStats() requires stats in time increasing order,
|
|
// so we need to go through from the last one to the first one.
|
|
for i := len(series) - 1; i >= 0; i-- {
|
|
s := series[i]
|
|
for j := len(s.Points) - 1; j >= 0; j-- {
|
|
values := s.Points[j]
|
|
stats, err := self.valuesToContainerStats(s.Columns, values)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if stats == nil {
|
|
continue
|
|
}
|
|
statsList = append(statsList, stats)
|
|
}
|
|
}
|
|
return statsList, nil
|
|
}
|
|
|
|
func (self *influxdbStorage) Samples(containerName string, numSamples int) ([]*info.ContainerStatsSample, error) {
|
|
if numSamples == 0 {
|
|
return nil, nil
|
|
}
|
|
// TODO(dengnan): select only columns that we need
|
|
// TODO(dengnan): escape names
|
|
query := fmt.Sprintf("select * from %v where %v='%v' and %v='%v'", self.tableName, colContainerName, containerName, colMachineName, self.machineName)
|
|
if numSamples > 0 {
|
|
query = fmt.Sprintf("%v limit %v", query, numSamples)
|
|
}
|
|
series, err := self.client.Query(query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
sampleList := make([]*info.ContainerStatsSample, 0, len(series))
|
|
for i := len(series) - 1; i >= 0; i-- {
|
|
s := series[i]
|
|
for j := len(s.Points) - 1; j >= 0; j-- {
|
|
values := s.Points[j]
|
|
sample, err := self.valuesToContainerSample(s.Columns, values)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if sample == nil {
|
|
continue
|
|
}
|
|
sampleList = append(sampleList, sample)
|
|
}
|
|
}
|
|
return sampleList, nil
|
|
}
|
|
|
|
func (self *influxdbStorage) Close() error {
|
|
self.client = nil
|
|
return nil
|
|
}
|
|
|
|
func (self *influxdbStorage) Percentiles(
|
|
containerName string,
|
|
cpuUsagePercentiles []int,
|
|
memUsagePercentiles []int,
|
|
) (*info.ContainerStatsPercentiles, error) {
|
|
selectedCol := make([]string, 0, len(cpuUsagePercentiles)+len(memUsagePercentiles)+1)
|
|
|
|
selectedCol = append(selectedCol, fmt.Sprintf("max(%v)", colMemoryUsage))
|
|
for _, p := range cpuUsagePercentiles {
|
|
selectedCol = append(selectedCol, fmt.Sprintf("percentile(%v, %v)", colCpuInstantUsage, p))
|
|
}
|
|
for _, p := range memUsagePercentiles {
|
|
selectedCol = append(selectedCol, fmt.Sprintf("percentile(%v, %v)", colMemoryUsage, p))
|
|
}
|
|
|
|
query := fmt.Sprintf("select %v from %v where %v='%v' and %v='%v' and time > now() - %v",
|
|
strings.Join(selectedCol, ","),
|
|
self.tableName,
|
|
colContainerName,
|
|
containerName,
|
|
colMachineName,
|
|
self.machineName,
|
|
fmt.Sprintf("%vs", self.windowLen.Seconds()),
|
|
)
|
|
series, err := self.client.Query(query)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(series) != 1 {
|
|
return nil, nil
|
|
}
|
|
if len(series[0].Points) == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
point := series[0].Points[0]
|
|
|
|
ret := new(info.ContainerStatsPercentiles)
|
|
ret.MaxMemoryUsage, err = convertToUint64(point[1])
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid max memory usage: %v", err)
|
|
}
|
|
retrievedCpuPercentiles := point[2 : 2+len(cpuUsagePercentiles)]
|
|
for i, p := range cpuUsagePercentiles {
|
|
v, err := convertToUint64(retrievedCpuPercentiles[i])
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid cpu usage: %v", err)
|
|
}
|
|
ret.CpuUsagePercentiles = append(
|
|
ret.CpuUsagePercentiles,
|
|
info.Percentile{
|
|
Percentage: p,
|
|
Value: v,
|
|
},
|
|
)
|
|
}
|
|
retrievedMemoryPercentiles := point[2+len(cpuUsagePercentiles):]
|
|
for i, p := range memUsagePercentiles {
|
|
v, err := convertToUint64(retrievedMemoryPercentiles[i])
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid memory usage: %v", err)
|
|
}
|
|
ret.MemoryUsagePercentiles = append(
|
|
ret.MemoryUsagePercentiles,
|
|
info.Percentile{
|
|
Percentage: p,
|
|
Value: v,
|
|
},
|
|
)
|
|
}
|
|
return ret, nil
|
|
}
|
|
|
|
// machineName: A unique identifier to identify the host that current cAdvisor
|
|
// instance is running on.
|
|
// influxdbHost: The host which runs influxdb.
|
|
// percentilesDuration: Time window which will be considered when calls Percentiles()
|
|
func New(machineName,
|
|
tablename,
|
|
database,
|
|
username,
|
|
password,
|
|
influxdbHost string,
|
|
isSecure bool,
|
|
percentilesDuration time.Duration,
|
|
) (storage.StorageDriver, error) {
|
|
config := &influxdb.ClientConfig{
|
|
Host: influxdbHost,
|
|
Username: username,
|
|
Password: password,
|
|
Database: database,
|
|
IsSecure: isSecure,
|
|
}
|
|
client, err := influxdb.NewClient(config)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// TODO(monnand): With go 1.3, we cannot compress data now.
|
|
client.DisableCompression()
|
|
if percentilesDuration.Seconds() < 1.0 {
|
|
percentilesDuration = 5 * time.Minute
|
|
}
|
|
|
|
ret := &influxdbStorage{
|
|
client: client,
|
|
windowLen: percentilesDuration,
|
|
machineName: machineName,
|
|
tableName: tablename,
|
|
}
|
|
return ret, nil
|
|
}
|