Ignore update errors for dead containers.
This adds an Exists() interface to detect when the container is dead. Before reporting an update error we check is Exists() is true. Some documentation was added as well.
This commit is contained in:
parent
c005bc1722
commit
e3ab15417c
@ -60,6 +60,7 @@ func RegisterHandlers(m manager.Manager) error {
|
||||
|
||||
func handleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) error {
|
||||
start := time.Now()
|
||||
defer glog.V(2).Infof("Request took %s", time.Since(start))
|
||||
|
||||
request := r.URL.Path
|
||||
requestElements := strings.Split(r.URL.Path, "/")
|
||||
@ -195,7 +196,6 @@ func handleRequest(m manager.Manager, w http.ResponseWriter, r *http.Request) er
|
||||
return fmt.Errorf("unknown API request type %q", requestType)
|
||||
}
|
||||
|
||||
glog.V(2).Infof("Request took %s", time.Since(start))
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -154,7 +154,7 @@ func (self *Client) httpGetJsonData(data, postData interface{}, url, infoName st
|
||||
return err
|
||||
}
|
||||
if err = json.Unmarshal(body, data); err != nil {
|
||||
err = fmt.Errorf("unable to unmarshal %q (%v): %v", infoName, string(body), err)
|
||||
err = fmt.Errorf("unable to unmarshal %q (Body: %q) with error: %v", infoName, string(body), err)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
|
@ -46,12 +46,30 @@ type SubcontainerEvent struct {
|
||||
|
||||
// Interface for container operation handlers.
|
||||
type ContainerHandler interface {
|
||||
// Returns the ContainerReference
|
||||
ContainerReference() (info.ContainerReference, error)
|
||||
|
||||
// Returns container's isolation spec.
|
||||
GetSpec() (info.ContainerSpec, error)
|
||||
|
||||
// Returns the current stats values of the container.
|
||||
GetStats() (*info.ContainerStats, error)
|
||||
|
||||
// Returns the subcontainers of this container.
|
||||
ListContainers(listType ListType) ([]info.ContainerReference, error)
|
||||
|
||||
// Returns the threads inside this container.
|
||||
ListThreads(listType ListType) ([]int, error)
|
||||
|
||||
// Returns the processes inside this container.
|
||||
ListProcesses(listType ListType) ([]int, error)
|
||||
|
||||
// Registers a channel to listen for events affecting subcontainers (recursively).
|
||||
WatchSubcontainers(events chan SubcontainerEvent) error
|
||||
|
||||
// Stops watching for subcontainer changes.
|
||||
StopWatchingSubcontainers() error
|
||||
|
||||
// Returns whether the container still exists.
|
||||
Exists() bool
|
||||
}
|
||||
|
@ -12,11 +12,11 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Handler for Docker containers.
|
||||
package docker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
@ -27,7 +27,6 @@ import (
|
||||
"github.com/docker/libcontainer/cgroups"
|
||||
cgroup_fs "github.com/docker/libcontainer/cgroups/fs"
|
||||
"github.com/fsouza/go-dockerclient"
|
||||
"github.com/golang/glog"
|
||||
"github.com/google/cadvisor/container"
|
||||
containerLibcontainer "github.com/google/cadvisor/container/libcontainer"
|
||||
"github.com/google/cadvisor/fs"
|
||||
@ -43,15 +42,23 @@ const pathToLibcontainerState = "execdriver/native"
|
||||
// aufs/mnt contains the mount points used to compose the rootfs. Hence it is also ignored.
|
||||
var pathToAufsDir = "aufs/diff"
|
||||
|
||||
var fileNotFound = errors.New("file not found")
|
||||
|
||||
type dockerContainerHandler struct {
|
||||
client *docker.Client
|
||||
name string
|
||||
id string
|
||||
aliases []string
|
||||
machineInfoFactory info.MachineInfoFactory
|
||||
libcontainerStateDir string
|
||||
|
||||
// Path to the libcontainer config file.
|
||||
libcontainerConfigPath string
|
||||
|
||||
// Path to the libcontainer state file.
|
||||
libcontainerStatePath string
|
||||
|
||||
// TODO(vmarmol): Remove when we depend on a newer Docker.
|
||||
// Path to the libcontainer pid file.
|
||||
libcontainerPidPath string
|
||||
|
||||
cgroup cgroups.Cgroup
|
||||
usesAufsDriver bool
|
||||
fsInfo fs.FsInfo
|
||||
@ -69,11 +76,15 @@ func newDockerContainerHandler(
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
id := ContainerNameToDockerId(name)
|
||||
handler := &dockerContainerHandler{
|
||||
id: id,
|
||||
client: client,
|
||||
name: name,
|
||||
machineInfoFactory: machineInfoFactory,
|
||||
libcontainerStateDir: path.Join(dockerRootDir, pathToLibcontainerState),
|
||||
libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"),
|
||||
libcontainerStatePath: path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"),
|
||||
libcontainerPidPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"),
|
||||
cgroup: cgroups.Cgroup{
|
||||
Parent: "/",
|
||||
Name: name,
|
||||
@ -82,12 +93,11 @@ func newDockerContainerHandler(
|
||||
fsInfo: fsInfo,
|
||||
}
|
||||
handler.storageDirs = append(handler.storageDirs, path.Join(dockerRootDir, pathToAufsDir, path.Base(name)))
|
||||
id := ContainerNameToDockerId(name)
|
||||
handler.id = id
|
||||
ctnr, err := client.InspectContainer(id)
|
||||
|
||||
// We assume that if Inspect fails then the container is not known to docker.
|
||||
ctnr, err := client.InspectContainer(id)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to inspect container %s - %s\n", id, err)
|
||||
return nil, fmt.Errorf("failed to inspect container %q: %v", id, err)
|
||||
}
|
||||
|
||||
// Add the name and bare ID as aliases of the container.
|
||||
@ -107,15 +117,9 @@ func (self *dockerContainerHandler) ContainerReference() (info.ContainerReferenc
|
||||
|
||||
// TODO(vmarmol): Switch to getting this from libcontainer once we have a solid API.
|
||||
func (self *dockerContainerHandler) readLibcontainerConfig() (config *libcontainer.Config, err error) {
|
||||
configPath := path.Join(self.libcontainerStateDir, self.id, "container.json")
|
||||
if !utils.FileExists(configPath) {
|
||||
// TODO(vishh): Return file name as well once we have a better error interface.
|
||||
err = fileNotFound
|
||||
return
|
||||
}
|
||||
f, err := os.Open(configPath)
|
||||
f, err := os.Open(self.libcontainerConfigPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open %s - %s\n", configPath, err)
|
||||
return nil, fmt.Errorf("failed to open %s - %s\n", self.libcontainerConfigPath, err)
|
||||
}
|
||||
defer f.Close()
|
||||
d := json.NewDecoder(f)
|
||||
@ -134,23 +138,17 @@ func (self *dockerContainerHandler) readLibcontainerConfig() (config *libcontain
|
||||
}
|
||||
|
||||
func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer.State, err error) {
|
||||
statePath := path.Join(self.libcontainerStateDir, self.id, "state.json")
|
||||
if !utils.FileExists(statePath) {
|
||||
// TODO(vmarmol): Remove this once we can depend on a newer Docker.
|
||||
// Libcontainer changed how its state was stored, try the old way of a "pid" file
|
||||
if utils.FileExists(path.Join(self.libcontainerStateDir, self.id, "pid")) {
|
||||
if !utils.FileExists(self.libcontainerStatePath) {
|
||||
if utils.FileExists(self.libcontainerPidPath) {
|
||||
// We don't need the old state, return an empty state and we'll gracefully degrade.
|
||||
state = new(libcontainer.State)
|
||||
return
|
||||
return &libcontainer.State{}, nil
|
||||
}
|
||||
|
||||
// TODO(vishh): Return file name as well once we have a better error interface.
|
||||
err = fileNotFound
|
||||
return
|
||||
}
|
||||
f, err := os.Open(statePath)
|
||||
f, err := os.Open(self.libcontainerStatePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open %s - %s\n", statePath, err)
|
||||
return nil, fmt.Errorf("failed to open %s - %s\n", self.libcontainerStatePath, err)
|
||||
}
|
||||
defer f.Close()
|
||||
d := json.NewDecoder(f)
|
||||
@ -258,10 +256,6 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
|
||||
func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err error) {
|
||||
state, err := self.readLibcontainerState()
|
||||
if err != nil {
|
||||
if err == fileNotFound {
|
||||
glog.Errorf("Libcontainer state not found for container %q", self.name)
|
||||
return &info.ContainerStats{}, nil
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@ -322,3 +316,8 @@ func (self *dockerContainerHandler) StopWatchingSubcontainers() error {
|
||||
// No-op for Docker driver.
|
||||
return nil
|
||||
}
|
||||
|
||||
func (self *dockerContainerHandler) Exists() bool {
|
||||
// We consider the container existing if both libcontainer config and state files exist.
|
||||
return utils.FileExists(self.libcontainerConfigPath) && utils.FileExists(self.libcontainerStatePath)
|
||||
}
|
||||
|
@ -85,6 +85,11 @@ func (self *MockContainerHandler) StopWatchingSubcontainers() error {
|
||||
return args.Error(0)
|
||||
}
|
||||
|
||||
func (self *MockContainerHandler) Exists() bool {
|
||||
args := self.Called()
|
||||
return args.Get(0).(bool)
|
||||
}
|
||||
|
||||
type FactoryForMockContainerHandler struct {
|
||||
Name string
|
||||
PrepareContainerHandlerFunc func(name string, handler *MockContainerHandler)
|
||||
|
@ -25,9 +25,11 @@ import (
|
||||
|
||||
type cgroupSubsystems struct {
|
||||
// Cgroup subsystem mounts.
|
||||
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
|
||||
mounts []cgroups.Mount
|
||||
|
||||
// Cgroup subsystem to their mount location.
|
||||
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
|
||||
mountPoints map[string]string
|
||||
}
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// TODO(cAdvisor): Package comment.
|
||||
// Handler for "raw" containers.
|
||||
package raw
|
||||
|
||||
import (
|
||||
@ -478,3 +478,13 @@ func (self *rawContainerHandler) StopWatchingSubcontainers() error {
|
||||
self.stopWatcher <- nil
|
||||
return <-self.stopWatcher
|
||||
}
|
||||
|
||||
func (self *rawContainerHandler) Exists() bool {
|
||||
// If any cgroup exists, the container is still alive.
|
||||
for _, subsystem := range self.cgroupSubsystems.mounts {
|
||||
if utils.FileExists(path.Join(subsystem.Mountpoint, self.name)) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
@ -118,11 +118,11 @@ func (self *containerData) nextHousekeeping(lastHousekeeping time.Time) time.Tim
|
||||
if self.housekeepingInterval > *maxHousekeepingInterval {
|
||||
self.housekeepingInterval = *maxHousekeepingInterval
|
||||
}
|
||||
glog.V(2).Infof("Raising housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
||||
glog.V(3).Infof("Raising housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
||||
} else if self.housekeepingInterval != *HousekeepingInterval {
|
||||
// Lower interval back to the baseline.
|
||||
self.housekeepingInterval = *HousekeepingInterval
|
||||
glog.V(1).Infof("Lowering housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
||||
glog.V(3).Infof("Lowering housekeeping interval for %q to %v", self.info.Name, self.housekeepingInterval)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -193,6 +193,10 @@ func (c *containerData) housekeepingTick() {
|
||||
func (c *containerData) updateSpec() error {
|
||||
spec, err := c.handler.GetSpec()
|
||||
if err != nil {
|
||||
// Ignore errors if the container is dead.
|
||||
if !c.handler.Exists() {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
c.lock.Lock()
|
||||
@ -204,6 +208,10 @@ func (c *containerData) updateSpec() error {
|
||||
func (c *containerData) updateStats() error {
|
||||
stats, err := c.handler.GetStats()
|
||||
if err != nil {
|
||||
// Ignore errors if the container is dead.
|
||||
if !c.handler.Exists() {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if stats == nil {
|
||||
@ -211,6 +219,10 @@ func (c *containerData) updateStats() error {
|
||||
}
|
||||
ref, err := c.handler.ContainerReference()
|
||||
if err != nil {
|
||||
// Ignore errors if the container is dead.
|
||||
if !c.handler.Exists() {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
err = c.storageDriver.AddStats(ref, stats)
|
||||
@ -223,6 +235,10 @@ func (c *containerData) updateStats() error {
|
||||
func (c *containerData) updateSubcontainers() error {
|
||||
subcontainers, err := c.handler.ListContainers(container.ListSelf)
|
||||
if err != nil {
|
||||
// Ignore errors if the container is dead.
|
||||
if !c.handler.Exists() {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
c.lock.Lock()
|
||||
|
@ -26,6 +26,7 @@ import (
|
||||
"github.com/google/cadvisor/info"
|
||||
itest "github.com/google/cadvisor/info/test"
|
||||
stest "github.com/google/cadvisor/storage/test"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
const containerName = "/container"
|
||||
@ -83,15 +84,22 @@ func TestUpdateSubcontainersWithError(t *testing.T) {
|
||||
[]info.ContainerReference{},
|
||||
fmt.Errorf("some error"),
|
||||
)
|
||||
mockHandler.On("Exists").Return(true)
|
||||
|
||||
err := cd.updateSubcontainers()
|
||||
if err == nil {
|
||||
t.Fatal("updateSubcontainers should return error")
|
||||
}
|
||||
if len(cd.info.Subcontainers) != 0 {
|
||||
t.Errorf("Received %v subcontainers, should be 0", len(cd.info.Subcontainers))
|
||||
assert.NotNil(t, cd.updateSubcontainers())
|
||||
assert.Empty(t, cd.info.Subcontainers, "subcontainers should not be populated on failure")
|
||||
mockHandler.AssertExpectations(t)
|
||||
}
|
||||
|
||||
func TestUpdateSubcontainersWithErrorOnDeadContainer(t *testing.T) {
|
||||
cd, mockHandler, _ := newTestContainerData(t)
|
||||
mockHandler.On("ListContainers", container.ListSelf).Return(
|
||||
[]info.ContainerReference{},
|
||||
fmt.Errorf("some error"),
|
||||
)
|
||||
mockHandler.On("Exists").Return(false)
|
||||
|
||||
assert.Nil(t, cd.updateSubcontainers())
|
||||
mockHandler.AssertExpectations(t)
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user