Ignore update errors for dead containers.

This adds an Exists() interface to detect when the container is dead.
Before reporting an update error we check is Exists() is true.

Some documentation was added as well.
This commit is contained in:
Victor Marmol 2014-11-21 17:34:18 +08:00
parent c005bc1722
commit e3ab15417c
9 changed files with 117 additions and 59 deletions

View File

@ -60,6 +60,7 @@ func RegisterHandlers(m manager.Manager) error {
func handleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) error {
start := time.Now()
defer glog.V(2).Infof("Request took %s", time.Since(start))
request := r.URL.Path
requestElements := strings.Split(r.URL.Path, "/")
@ -195,7 +196,6 @@ func handleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) er
return fmt.Errorf("unknown API request type %q", requestType)
}
glog.V(2).Infof("Request took %s", time.Since(start))
return nil
}

View File

@ -154,7 +154,7 @@ func (self *Client) httpGetJsonData(data, postData interface{}, url, infoName st
return err
}
if err = json.Unmarshal(body, data); err != nil {
err = fmt.Errorf("unable to unmarshal %q (%v): %v", infoName, string(body), err)
err = fmt.Errorf("unable to unmarshal %q (Body: %q) with error: %v", infoName, string(body), err)
return err
}
return nil

View File

@ -46,12 +46,30 @@ type SubcontainerEvent struct {
// Interface for container operation handlers.
type ContainerHandler interface {
// Returns the ContainerReference
ContainerReference() (info.ContainerReference, error)
// Returns container's isolation spec.
GetSpec() (info.ContainerSpec, error)
// Returns the current stats values of the container.
GetStats() (*info.ContainerStats, error)
// Returns the subcontainers of this container.
ListContainers(listType ListType) ([]info.ContainerReference, error)
// Returns the threads inside this container.
ListThreads(listType ListType) ([]int, error)
// Returns the processes inside this container.
ListProcesses(listType ListType) ([]int, error)
// Registers a channel to listen for events affecting subcontainers (recursively).
WatchSubcontainers(events chan SubcontainerEvent) error
// Stops watching for subcontainer changes.
StopWatchingSubcontainers() error
// Returns whether the container still exists.
Exists() bool
}

View File

@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// Handler for Docker containers.
package docker
import (
"encoding/json"
"errors"
"fmt"
"math"
"os"
@ -27,7 +27,6 @@ import (
"github.com/docker/libcontainer/cgroups"
cgroup_fs "github.com/docker/libcontainer/cgroups/fs"
"github.com/fsouza/go-dockerclient"
"github.com/golang/glog"
"github.com/google/cadvisor/container"
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/fs"
@ -43,15 +42,23 @@ const pathToLibcontainerState = "execdriver/native"
// aufs/mnt contains the mount points used to compose the rootfs. Hence it is also ignored.
var pathToAufsDir = "aufs/diff"
var fileNotFound = errors.New("file not found")
type dockerContainerHandler struct {
client *docker.Client
name string
id string
aliases []string
machineInfoFactory info.MachineInfoFactory
libcontainerStateDir string
// Path to the libcontainer config file.
libcontainerConfigPath string
// Path to the libcontainer state file.
libcontainerStatePath string
// TODO(vmarmol): Remove when we depend on a newer Docker.
// Path to the libcontainer pid file.
libcontainerPidPath string
cgroup cgroups.Cgroup
usesAufsDriver bool
fsInfo fs.FsInfo
@ -69,11 +76,15 @@ func newDockerContainerHandler(
if err != nil {
return nil, err
}
id := ContainerNameToDockerId(name)
handler := &dockerContainerHandler{
id: id,
client: client,
name: name,
machineInfoFactory: machineInfoFactory,
libcontainerStateDir: path.Join(dockerRootDir, pathToLibcontainerState),
libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"),
libcontainerStatePath: path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"),
libcontainerPidPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"),
cgroup: cgroups.Cgroup{
Parent: "/",
Name: name,
@ -82,12 +93,11 @@ func newDockerContainerHandler(
fsInfo: fsInfo,
}
handler.storageDirs = append(handler.storageDirs, path.Join(dockerRootDir, pathToAufsDir, path.Base(name)))
id := ContainerNameToDockerId(name)
handler.id = id
ctnr, err := client.InspectContainer(id)
// We assume that if Inspect fails then the container is not known to docker.
ctnr, err := client.InspectContainer(id)
if err != nil {
return nil, fmt.Errorf("failed to inspect container %s - %s\n", id, err)
return nil, fmt.Errorf("failed to inspect container %q: %v", id, err)
}
// Add the name and bare ID as aliases of the container.
@ -107,15 +117,9 @@ func (self *dockerContainerHandler) ContainerReference() (info.ContainerReferenc
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
func (self *dockerContainerHandler) readLibcontainerConfig() (config *libcontainer.Config, err error) {
configPath := path.Join(self.libcontainerStateDir, self.id, "container.json")
if !utils.FileExists(configPath) {
// TODO(vishh): Return file name as well once we have a better error interface.
err = fileNotFound
return
}
f, err := os.Open(configPath)
f, err := os.Open(self.libcontainerConfigPath)
if err != nil {
return nil, fmt.Errorf("failed to open %s - %s\n", configPath, err)
return nil, fmt.Errorf("failed to open %s - %s\n", self.libcontainerConfigPath, err)
}
defer f.Close()
d := json.NewDecoder(f)
@ -134,23 +138,17 @@ func (self *dockerContainerHandler) readLibcontainerConfig() (config *libcontain
}
func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer.State, err error) {
statePath := path.Join(self.libcontainerStateDir, self.id, "state.json")
if !utils.FileExists(statePath) {
// TODO(vmarmol): Remove this once we can depend on a newer Docker.
// Libcontainer changed how its state was stored, try the old way of a "pid" file
if utils.FileExists(path.Join(self.libcontainerStateDir, self.id, "pid")) {
if !utils.FileExists(self.libcontainerStatePath) {
if utils.FileExists(self.libcontainerPidPath) {
// We don't need the old state, return an empty state and we'll gracefully degrade.
state = new(libcontainer.State)
return
return &libcontainer.State{}, nil
}
// TODO(vishh): Return file name as well once we have a better error interface.
err = fileNotFound
return
}
f, err := os.Open(statePath)
f, err := os.Open(self.libcontainerStatePath)
if err != nil {
return nil, fmt.Errorf("failed to open %s - %s\n", statePath, err)
return nil, fmt.Errorf("failed to open %s - %s\n", self.libcontainerStatePath, err)
}
defer f.Close()
d := json.NewDecoder(f)
@ -258,10 +256,6 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err error) {
state, err := self.readLibcontainerState()
if err != nil {
if err == fileNotFound {
glog.Errorf("Libcontainer state not found for container %q", self.name)
return &info.ContainerStats{}, nil
}
return
}
@ -322,3 +316,8 @@ func (self *dockerContainerHandler) StopWatchingSubcontainers() error {
// No-op for Docker driver.
return nil
}
func (self *dockerContainerHandler) Exists() bool {
// We consider the container existing if both libcontainer config and state files exist.
return utils.FileExists(self.libcontainerConfigPath) && utils.FileExists(self.libcontainerStatePath)
}

View File

@ -85,6 +85,11 @@ func (self *MockContainerHandler) StopWatchingSubcontainers() error {
return args.Error(0)
}
func (self *MockContainerHandler) Exists() bool {
args := self.Called()
return args.Get(0).(bool)
}
type FactoryForMockContainerHandler struct {
Name string
PrepareContainerHandlerFunc func(name string, handler *MockContainerHandler)

View File

@ -25,9 +25,11 @@ import (
type cgroupSubsystems struct {
// Cgroup subsystem mounts.
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
mounts []cgroups.Mount
// Cgroup subsystem to their mount location.
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
mountPoints map[string]string
}

View File

@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
// TODO(cAdvisor): Package comment.
// Handler for "raw" containers.
package raw
import (
@ -478,3 +478,13 @@ func (self *rawContainerHandler) StopWatchingSubcontainers() error {
self.stopWatcher <- nil
return <-self.stopWatcher
}
func (self *rawContainerHandler) Exists() bool {
// If any cgroup exists, the container is still alive.
for _, subsystem := range self.cgroupSubsystems.mounts {
if utils.FileExists(path.Join(subsystem.Mountpoint, self.name)) {
return true
}
}
return false
}

View File

@ -118,11 +118,11 @@ func (self *containerData) nextHousekeeping(lastHousekeeping time.Time) time.Tim
if self.housekeepingInterval > *maxHousekeepingInterval {
self.housekeepingInterval = *maxHousekeepingInterval
}
glog.V(2).Infof("Raising housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
glog.V(3).Infof("Raising housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
} else if self.housekeepingInterval != *HousekeepingInterval {
// Lower interval back to the baseline.
self.housekeepingInterval = *HousekeepingInterval
glog.V(1).Infof("Lowering housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
glog.V(3).Infof("Lowering housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
}
}
}
@ -193,6 +193,10 @@ func (c *containerData) housekeepingTick() {
func (c *containerData) updateSpec() error {
spec, err := c.handler.GetSpec()
if err != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
return nil
}
return err
}
c.lock.Lock()
@ -204,6 +208,10 @@ func (c *containerData) updateSpec() error {
func (c *containerData) updateStats() error {
stats, err := c.handler.GetStats()
if err != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
return nil
}
return err
}
if stats == nil {
@ -211,6 +219,10 @@ func (c *containerData) updateStats() error {
}
ref, err := c.handler.ContainerReference()
if err != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
return nil
}
return err
}
err = c.storageDriver.AddStats(ref, stats)
@ -223,6 +235,10 @@ func (c *containerData) updateStats() error {
func (c *containerData) updateSubcontainers() error {
subcontainers, err := c.handler.ListContainers(container.ListSelf)
if err != nil {
// Ignore errors if the container is dead.
if !c.handler.Exists() {
return nil
}
return err
}
c.lock.Lock()

View File

@ -26,6 +26,7 @@ import (
"github.com/google/cadvisor/info"
itest "github.com/google/cadvisor/info/test"
stest "github.com/google/cadvisor/storage/test"
"github.com/stretchr/testify/assert"
)
const containerName = "/container"
@ -83,15 +84,22 @@ func TestUpdateSubcontainersWithError(t *testing.T) {
[]info.ContainerReference{},
fmt.Errorf("some error"),
)
mockHandler.On("Exists").Return(true)
err := cd.updateSubcontainers()
if err == nil {
t.Fatal("updateSubcontainers should return error")
}
if len(cd.info.Subcontainers) != 0 {
t.Errorf("Received %v subcontainers, should be 0", len(cd.info.Subcontainers))
assert.NotNil(t, cd.updateSubcontainers())
assert.Empty(t, cd.info.Subcontainers, "subcontainers should not be populated on failure")
mockHandler.AssertExpectations(t)
}
func TestUpdateSubcontainersWithErrorOnDeadContainer(t *testing.T) {
cd, mockHandler, _ := newTestContainerData(t)
mockHandler.On("ListContainers", container.ListSelf).Return(
[]info.ContainerReference{},
fmt.Errorf("some error"),
)
mockHandler.On("Exists").Return(false)
assert.Nil(t, cd.updateSubcontainers())
mockHandler.AssertExpectations(t)
}