Merge pull request #1286 from timstclair/subcontainers2
Make manager multi-container functions robust to partial failures
This commit is contained in:
commit
05809d5936
@ -361,7 +361,10 @@ func (self *version2_0) HandleRequest(requestType string, request []string, m ma
|
||||
glog.V(4).Infof("Api - Stats: Looking for stats for container %q, options %+v", name, opt)
|
||||
infos, err := m.GetRequestedContainersInfo(name, opt)
|
||||
if err != nil {
|
||||
return err
|
||||
if len(infos) == 0 {
|
||||
return err
|
||||
}
|
||||
glog.Errorf("Error calling GetRequestedContainersInfo: %v", err)
|
||||
}
|
||||
contStats := make(map[string][]v2.DeprecatedContainerStats, 0)
|
||||
for name, cinfo := range infos {
|
||||
@ -482,7 +485,10 @@ func (self *version2_1) HandleRequest(requestType string, request []string, m ma
|
||||
glog.V(4).Infof("Api - MachineStats(%v)", request)
|
||||
cont, err := m.GetRequestedContainersInfo("/", opt)
|
||||
if err != nil {
|
||||
return err
|
||||
if len(cont) == 0 {
|
||||
return err
|
||||
}
|
||||
glog.Errorf("Error calling GetRequestedContainersInfo: %v", err)
|
||||
}
|
||||
return writeResult(v2.MachineStatsFromV1(cont["/"]), w)
|
||||
case statsApi:
|
||||
@ -490,7 +496,10 @@ func (self *version2_1) HandleRequest(requestType string, request []string, m ma
|
||||
glog.V(4).Infof("Api - Stats: Looking for stats for container %q, options %+v", name, opt)
|
||||
conts, err := m.GetRequestedContainersInfo(name, opt)
|
||||
if err != nil {
|
||||
return err
|
||||
if len(conts) == 0 {
|
||||
return err
|
||||
}
|
||||
glog.Errorf("Error calling GetRequestedContainersInfo: %v", err)
|
||||
}
|
||||
contStats := make(map[string]v2.ContainerInfo, len(conts))
|
||||
for name, cont := range conts {
|
||||
|
@ -70,6 +70,8 @@ type Manager interface {
|
||||
GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
|
||||
|
||||
// Get V2 information about a container.
|
||||
// Recursive (subcontainer) requests are best-effort, and may return a partial result alongside an
|
||||
// error in the partial failure case.
|
||||
GetContainerInfoV2(containerName string, options v2.RequestOptions) (map[string]v2.ContainerInfo, error)
|
||||
|
||||
// Get information about all subcontainers of the specified container (includes self).
|
||||
@ -394,15 +396,16 @@ func (self *manager) GetDerivedStats(containerName string, options v2.RequestOpt
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var errs partialFailure
|
||||
stats := make(map[string]v2.DerivedStats)
|
||||
for name, cont := range conts {
|
||||
d, err := cont.DerivedStats()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
errs.append(name, "DerivedStats", err)
|
||||
}
|
||||
stats[name] = d
|
||||
}
|
||||
return stats, nil
|
||||
return stats, errs
|
||||
}
|
||||
|
||||
func (self *manager) GetContainerSpec(containerName string, options v2.RequestOptions) (map[string]v2.ContainerSpec, error) {
|
||||
@ -410,16 +413,17 @@ func (self *manager) GetContainerSpec(containerName string, options v2.RequestOp
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var errs partialFailure
|
||||
specs := make(map[string]v2.ContainerSpec)
|
||||
for name, cont := range conts {
|
||||
cinfo, err := cont.GetInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
errs.append(name, "GetInfo", err)
|
||||
}
|
||||
spec := self.getV2Spec(cinfo)
|
||||
specs[name] = spec
|
||||
}
|
||||
return specs, nil
|
||||
return specs, errs
|
||||
}
|
||||
|
||||
// Get V2 container spec from v1 container info.
|
||||
@ -455,26 +459,32 @@ func (self *manager) GetContainerInfoV2(containerName string, options v2.Request
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var errs partialFailure
|
||||
var nilTime time.Time // Ignored.
|
||||
|
||||
infos := make(map[string]v2.ContainerInfo, len(containers))
|
||||
for name, container := range containers {
|
||||
result := v2.ContainerInfo{}
|
||||
cinfo, err := container.GetInfo()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
errs.append(name, "GetInfo", err)
|
||||
infos[name] = result
|
||||
continue
|
||||
}
|
||||
result.Spec = self.getV2Spec(cinfo)
|
||||
|
||||
var nilTime time.Time // Ignored.
|
||||
stats, err := self.memoryCache.RecentStats(name, nilTime, nilTime, options.Count)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
errs.append(name, "RecentStats", err)
|
||||
infos[name] = result
|
||||
continue
|
||||
}
|
||||
|
||||
infos[name] = v2.ContainerInfo{
|
||||
Spec: self.getV2Spec(cinfo),
|
||||
Stats: v2.ContainerStatsFromV1(&cinfo.Spec, stats),
|
||||
}
|
||||
result.Stats = v2.ContainerStatsFromV1(&cinfo.Spec, stats)
|
||||
infos[name] = result
|
||||
}
|
||||
|
||||
return infos, nil
|
||||
return infos, errs
|
||||
}
|
||||
|
||||
func (self *manager) containerDataToContainerInfo(cont *containerData, query *info.ContainerInfoRequest) (*info.ContainerInfo, error) {
|
||||
@ -615,6 +625,7 @@ func (self *manager) GetRequestedContainersInfo(containerName string, options v2
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var errs partialFailure
|
||||
containersMap := make(map[string]*info.ContainerInfo)
|
||||
query := info.ContainerInfoRequest{
|
||||
NumStats: options.Count,
|
||||
@ -622,12 +633,11 @@ func (self *manager) GetRequestedContainersInfo(containerName string, options v2
|
||||
for name, data := range containers {
|
||||
info, err := self.containerDataToContainerInfo(data, &query)
|
||||
if err != nil {
|
||||
// Skip containers with errors, we try to degrade gracefully.
|
||||
continue
|
||||
errs.append(name, "containerDataToContainerInfo", err)
|
||||
}
|
||||
containersMap[name] = info
|
||||
}
|
||||
return containersMap, nil
|
||||
return containersMap, errs
|
||||
}
|
||||
|
||||
func (self *manager) getRequestedContainers(containerName string, options v2.RequestOptions) (map[string]*containerData, error) {
|
||||
@ -1058,19 +1068,18 @@ func (self *manager) watchForNewContainers(quit chan error) error {
|
||||
glog.Warningf("Failed to process watch event %+v: %v", event, err)
|
||||
}
|
||||
case <-quit:
|
||||
errors := []string{}
|
||||
var errs partialFailure
|
||||
|
||||
// Stop processing events if asked to quit.
|
||||
for _, watcher := range self.containerWatchers {
|
||||
for i, watcher := range self.containerWatchers {
|
||||
err := watcher.Stop()
|
||||
if err != nil {
|
||||
errors = append(errors, err.Error())
|
||||
errs.append(fmt.Sprintf("watcher %d", i), "Stop", err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) > 0 {
|
||||
err_str := strings.Join(errors, ", ")
|
||||
quit <- fmt.Errorf("Error quiting watchers: %v", err_str)
|
||||
if len(errs) > 0 {
|
||||
quit <- errs
|
||||
} else {
|
||||
quit <- nil
|
||||
glog.Infof("Exiting thread watching subcontainers")
|
||||
@ -1244,3 +1253,14 @@ func getVersionInfo() (*info.VersionInfo, error) {
|
||||
CadvisorRevision: version.Info["revision"],
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Helper for accumulating partial failures.
|
||||
type partialFailure []string
|
||||
|
||||
func (f *partialFailure) append(id, operation string, err error) {
|
||||
*f = append(*f, fmt.Sprintf("[%q: %s: %s]", id, operation, err))
|
||||
}
|
||||
|
||||
func (f partialFailure) Error() string {
|
||||
return fmt.Sprintf("partial failures: %s", strings.Join(f, ", "))
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
package manager
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
@ -28,7 +29,10 @@ import (
|
||||
"github.com/google/cadvisor/container/docker"
|
||||
info "github.com/google/cadvisor/info/v1"
|
||||
itest "github.com/google/cadvisor/info/v1/test"
|
||||
"github.com/google/cadvisor/info/v2"
|
||||
"github.com/google/cadvisor/utils/sysfs/fakesysfs"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TODO(vmarmol): Refactor these tests.
|
||||
@ -151,6 +155,88 @@ func TestGetContainerInfo(t *testing.T) {
|
||||
|
||||
}
|
||||
|
||||
func TestGetContainerInfoV2(t *testing.T) {
|
||||
containers := []string{
|
||||
"/",
|
||||
"/c1",
|
||||
"/c2",
|
||||
}
|
||||
|
||||
options := v2.RequestOptions{
|
||||
IdType: v2.TypeName,
|
||||
Count: 1,
|
||||
Recursive: true,
|
||||
}
|
||||
query := &info.ContainerInfoRequest{
|
||||
NumStats: 2,
|
||||
}
|
||||
|
||||
m, _, handlerMap := expectManagerWithContainers(containers, query, t)
|
||||
|
||||
infos, err := m.GetContainerInfoV2("/", options)
|
||||
require.NoError(t, err, "Error calling GetContainerInfoV2")
|
||||
|
||||
for container, handler := range handlerMap {
|
||||
handler.AssertExpectations(t)
|
||||
info, ok := infos[container]
|
||||
assert.True(t, ok, "Missing info for container %q", container)
|
||||
assert.NotEqual(t, v2.ContainerSpec{}, info.Spec, "Empty spec for container %q", container)
|
||||
assert.NotEmpty(t, info.Stats, "Missing stats for container %q", container)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetContainerInfoV2Failure(t *testing.T) {
|
||||
successful := "/"
|
||||
statless := "/c1"
|
||||
failing := "/c2"
|
||||
containers := []string{
|
||||
successful, statless, failing,
|
||||
}
|
||||
|
||||
options := v2.RequestOptions{
|
||||
IdType: v2.TypeName,
|
||||
Count: 1,
|
||||
Recursive: true,
|
||||
}
|
||||
query := &info.ContainerInfoRequest{
|
||||
NumStats: 2,
|
||||
}
|
||||
|
||||
m, _, handlerMap := expectManagerWithContainers(containers, query, t)
|
||||
|
||||
// Remove /c1 stats
|
||||
require.NoError(t, m.memoryCache.RemoveContainer(statless))
|
||||
|
||||
// Make GetSpec fail on /c2
|
||||
mockErr := fmt.Errorf("intentional GetSpec failure")
|
||||
failingHandler := container.NewMockContainerHandler(failing)
|
||||
failingHandler.On("GetSpec").Return(info.ContainerSpec{}, mockErr)
|
||||
failingHandler.On("Exists").Return(true)
|
||||
*handlerMap[failing] = *failingHandler
|
||||
m.containers[namespacedContainerName{Name: failing}].lastUpdatedTime = time.Time{} // Force GetSpec.
|
||||
|
||||
infos, err := m.GetContainerInfoV2("/", options)
|
||||
assert.Error(t, err, "Expected error calling GetContainerInfoV2")
|
||||
|
||||
// Successful containers still successful.
|
||||
info, ok := infos[successful]
|
||||
assert.True(t, ok, "Missing info for container %q", successful)
|
||||
assert.NotEqual(t, v2.ContainerSpec{}, info.Spec, "Empty spec for container %q", successful)
|
||||
assert.NotEmpty(t, info.Stats, "Missing stats for container %q", successful)
|
||||
|
||||
// "/c1" present with spec.
|
||||
info, ok = infos[statless]
|
||||
assert.True(t, ok, "Missing info for container %q", statless)
|
||||
assert.NotEqual(t, v2.ContainerSpec{}, info.Spec, "Empty spec for container %q", statless)
|
||||
assert.Empty(t, info.Stats, "Missing stats for container %q", successful)
|
||||
|
||||
// "/c2" should be present but empty.
|
||||
info, ok = infos[failing]
|
||||
assert.True(t, ok, "Missing info for failed container")
|
||||
assert.Equal(t, v2.ContainerInfo{}, info, "Empty spec for failed container")
|
||||
assert.Empty(t, info.Stats, "Missing stats for failed container")
|
||||
}
|
||||
|
||||
func TestSubcontainersInfo(t *testing.T) {
|
||||
containers := []string{
|
||||
"/c1",
|
||||
|
Loading…
Reference in New Issue
Block a user