diff --git a/container/containerd/handler.go b/container/containerd/handler.go index e755f774..a8095f42 100644 --- a/container/containerd/handler.go +++ b/container/containerd/handler.go @@ -20,7 +20,9 @@ import ( "fmt" "path" "strings" + "time" + "github.com/containerd/containerd/errdefs" cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs" libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs" "golang.org/x/net/context" @@ -91,10 +93,28 @@ func newContainerdContainerHandler( return nil, err } - taskPid, err := client.TaskPid(ctx, id) - if err != nil { - return nil, err + // Cgroup is created during task creation. When cadvisor sees the cgroup, + // task may not be fully created yet. Use a retry+backoff to tolerant the + // race condition. + // TODO(random-liu): Use cri-containerd client to talk with cri-containerd + // instead. cri-containerd has some internal synchronization to make sure + // `ContainerStatus` only returns result after `StartContainer` finishes. + var taskPid uint32 + backoff := 100 * time.Millisecond + retry := 5 + for { + taskPid, err = client.TaskPid(ctx, id) + if err == nil { + break + } + retry-- + if !errdefs.IsNotFound(err) || retry == 0 { + return nil, err + } + time.Sleep(backoff) + backoff *= 2 } + rootfs := "/" if !inHostNamespace { rootfs = "/rootfs"