Merge pull request #1117 from vishh/timeout-du
Timeout `du` after a minute.
This commit is contained in:
commit
5fdd1ee185
@ -36,6 +36,7 @@ type realFsHandler struct {
|
||||
usageBytes uint64
|
||||
baseUsageBytes uint64
|
||||
period time.Duration
|
||||
minPeriod time.Duration
|
||||
rootfs string
|
||||
extraDir string
|
||||
fsInfo fs.FsInfo
|
||||
@ -43,7 +44,11 @@ type realFsHandler struct {
|
||||
stopChan chan struct{}
|
||||
}
|
||||
|
||||
const longDu = time.Second
|
||||
const (
|
||||
longDu = time.Second
|
||||
duTimeout = time.Minute
|
||||
maxDuBackoffFactor = 20
|
||||
)
|
||||
|
||||
var _ fsHandler = &realFsHandler{}
|
||||
|
||||
@ -53,6 +58,7 @@ func newFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInf
|
||||
usageBytes: 0,
|
||||
baseUsageBytes: 0,
|
||||
period: period,
|
||||
minPeriod: period,
|
||||
rootfs: rootfs,
|
||||
extraDir: extraDir,
|
||||
fsInfo: fsInfo,
|
||||
@ -60,18 +66,14 @@ func newFsHandler(period time.Duration, rootfs, extraDir string, fsInfo fs.FsInf
|
||||
}
|
||||
}
|
||||
|
||||
func (fh *realFsHandler) needsUpdate() bool {
|
||||
return time.Now().After(fh.lastUpdate.Add(fh.period))
|
||||
}
|
||||
|
||||
func (fh *realFsHandler) update() error {
|
||||
// TODO(vishh): Add support for external mounts.
|
||||
baseUsage, err := fh.fsInfo.GetDirUsage(fh.rootfs)
|
||||
baseUsage, err := fh.fsInfo.GetDirUsage(fh.rootfs, duTimeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
extraDirUsage, err := fh.fsInfo.GetDirUsage(fh.extraDir)
|
||||
extraDirUsage, err := fh.fsInfo.GetDirUsage(fh.extraDir, duTimeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@ -93,11 +95,17 @@ func (fh *realFsHandler) trackUsage() {
|
||||
case <-time.After(fh.period):
|
||||
start := time.Now()
|
||||
if err := fh.update(); err != nil {
|
||||
glog.V(2).Infof("failed to collect filesystem stats - %v", err)
|
||||
glog.Errorf("failed to collect filesystem stats - %v", err)
|
||||
fh.period = fh.period * 2
|
||||
if fh.period > maxDuBackoffFactor*fh.minPeriod {
|
||||
fh.period = maxDuBackoffFactor * fh.minPeriod
|
||||
}
|
||||
} else {
|
||||
fh.period = fh.minPeriod
|
||||
}
|
||||
duration := time.Since(start)
|
||||
if duration > longDu {
|
||||
glog.V(3).Infof("`du` on following dirs took %v: %v", duration, []string{fh.rootfs, fh.extraDir})
|
||||
glog.V(2).Infof("`du` on following dirs took %v: %v", duration, []string{fh.rootfs, fh.extraDir})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -129,6 +129,7 @@ func newDockerContainerHandler(
|
||||
rootFs := "/"
|
||||
if !inHostNamespace {
|
||||
rootFs = "/rootfs"
|
||||
storageDir = path.Join(rootFs, storageDir)
|
||||
}
|
||||
|
||||
id := ContainerNameToDockerId(name)
|
||||
|
@ -210,7 +210,7 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
|
||||
spec.Cpu.Period = readUInt64(cpuRoot, "cpu.cfs_period_us")
|
||||
quota := readString(cpuRoot, "cpu.cfs_quota_us")
|
||||
|
||||
if quota != "-1" {
|
||||
if quota != "" && quota != "-1" {
|
||||
val, err := strconv.ParseUint(quota, 10, 64)
|
||||
if err != nil {
|
||||
glog.Errorf("raw driver: Failed to parse CPUQuota from %q: %s", path.Join(cpuRoot, "cpu.cfs_quota_us"), err)
|
||||
|
35
fs/fs.go
35
fs/fs.go
@ -21,6 +21,7 @@ import (
|
||||
"bufio"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
@ -29,6 +30,7 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/pkg/mount"
|
||||
"github.com/golang/glog"
|
||||
@ -355,14 +357,37 @@ func (self *RealFsInfo) GetDirFsDevice(dir string) (*DeviceInfo, error) {
|
||||
return nil, fmt.Errorf("could not find device with major: %d, minor: %d in cached partitions map", major, minor)
|
||||
}
|
||||
|
||||
func (self *RealFsInfo) GetDirUsage(dir string) (uint64, error) {
|
||||
out, err := exec.Command("nice", "-n", "19", "du", "-s", dir).CombinedOutput()
|
||||
func (self *RealFsInfo) GetDirUsage(dir string, timeout time.Duration) (uint64, error) {
|
||||
cmd := exec.Command("nice", "-n", "19", "du", "-s", dir)
|
||||
stdoutp, err := cmd.StdoutPipe()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("du command failed on %s with output %s - %s", dir, out, err)
|
||||
return 0, fmt.Errorf("failed to setup stdout for cmd %v - %v", cmd.Args, err)
|
||||
}
|
||||
usageInKb, err := strconv.ParseUint(strings.Fields(string(out))[0], 10, 64)
|
||||
stderrp, err := cmd.StderrPipe()
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse 'du' output %s - %s", out, err)
|
||||
return 0, fmt.Errorf("failed to setup stderr for cmd %v - %v", cmd.Args, err)
|
||||
}
|
||||
|
||||
if err := cmd.Start(); err != nil {
|
||||
return 0, fmt.Errorf("failed to exec du - %v", err)
|
||||
}
|
||||
stdoutb, souterr := ioutil.ReadAll(stdoutp)
|
||||
stderrb, _ := ioutil.ReadAll(stderrp)
|
||||
timer := time.AfterFunc(timeout, func() {
|
||||
glog.Infof("killing cmd %v due to timeout(%s)", cmd.Args, timeout.String())
|
||||
cmd.Process.Kill()
|
||||
})
|
||||
if err := cmd.Wait(); err != nil {
|
||||
return 0, fmt.Errorf("du command failed on %s with output stdout: %s, stderr: %s - %v", dir, string(stdoutb), string(stderrb), err)
|
||||
}
|
||||
stdout := string(stdoutb)
|
||||
if souterr != nil {
|
||||
glog.Errorf("failed to read from stdout for cmd %v - %v", cmd.Args, souterr)
|
||||
}
|
||||
timer.Stop()
|
||||
usageInKb, err := strconv.ParseUint(strings.Fields(stdout)[0], 10, 64)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("cannot parse 'du' output %s - %s", stdout, err)
|
||||
}
|
||||
return usageInKb * 1024, nil
|
||||
}
|
||||
|
@ -20,6 +20,7 @@ import (
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
@ -98,7 +99,7 @@ func TestDirUsage(t *testing.T) {
|
||||
fi, err := f.Stat()
|
||||
as.NoError(err)
|
||||
expectedSize := uint64(fi.Size())
|
||||
size, err := fsInfo.GetDirUsage(dir)
|
||||
size, err := fsInfo.GetDirUsage(dir, time.Minute)
|
||||
as.NoError(err)
|
||||
as.True(expectedSize <= size, "expected dir size to be at-least %d; got size: %d", expectedSize, size)
|
||||
}
|
||||
|
@ -14,6 +14,8 @@
|
||||
|
||||
package fs
|
||||
|
||||
import "time"
|
||||
|
||||
type DeviceInfo struct {
|
||||
Device string
|
||||
Major uint
|
||||
@ -50,7 +52,7 @@ type FsInfo interface {
|
||||
GetFsInfoForPath(mountSet map[string]struct{}) ([]Fs, error)
|
||||
|
||||
// Returns number of bytes occupied by 'dir'.
|
||||
GetDirUsage(dir string) (uint64, error)
|
||||
GetDirUsage(dir string, timeout time.Duration) (uint64, error)
|
||||
|
||||
// Returns the block device info of the filesystem on which 'dir' resides.
|
||||
GetDirFsDevice(dir string) (*DeviceInfo, error)
|
||||
|
@ -39,13 +39,17 @@ import (
|
||||
// must be able to ssh into hosts without password
|
||||
// godep go run ./integration/runner/runner.go --logtostderr --v 2 --ssh-config <.ssh/config file> <list of hosts>
|
||||
|
||||
const cadvisorBinary = "cadvisor"
|
||||
const (
|
||||
cadvisorBinary = "cadvisor"
|
||||
testTimeout = 15 * time.Minute
|
||||
)
|
||||
|
||||
var cadvisorTimeout = flag.Duration("cadvisor_timeout", 15*time.Second, "Time to wait for cAdvisor to come up on the remote host")
|
||||
var port = flag.Int("port", 8080, "Port in which to start cAdvisor in the remote host")
|
||||
var testRetryCount = flag.Int("test-retry-count", 3, "Number of times to retry failed tests before failing.")
|
||||
var testRetryWhitelist = flag.String("test-retry-whitelist", "", "Path to newline separated list of regexexp for test failures that should be retried. If empty, no tests are retried.")
|
||||
var sshOptions = flag.String("ssh-options", "", "Commandline options passed to ssh.")
|
||||
var testArgs = flag.String("test_args", "", "arguments to be passed to the integrationt tests")
|
||||
var retryRegex *regexp.Regexp
|
||||
|
||||
func getAttributes(ipAddress, portStr string) (*cadvisorApi.Attributes, error) {
|
||||
@ -158,7 +162,7 @@ func PushAndRunTests(host, testDir string) error {
|
||||
}
|
||||
// Run the command
|
||||
|
||||
err = RunCommand("godep", "go", "test", "github.com/google/cadvisor/integration/tests/...", "--host", host, "--port", portStr, "--ssh-options", *sshOptions)
|
||||
err = RunCommand("godep", "go", "test", "--timeout", testTimeout.String(), *testArgs, "github.com/google/cadvisor/integration/tests/...", "--host", host, "--port", portStr, "--ssh-options", *sshOptions)
|
||||
if err == nil {
|
||||
// On success, break out of retry loop
|
||||
break
|
||||
@ -166,7 +170,7 @@ func PushAndRunTests(host, testDir string) error {
|
||||
|
||||
// Only retry on test failures caused by these known flaky failure conditions
|
||||
if retryRegex == nil || !retryRegex.Match([]byte(err.Error())) {
|
||||
glog.Warningf("Skipping retry for tests on host %s because error is not whitelisted: %s", host, err.Error())
|
||||
glog.Warningf("Skipping retry for tests on host %s because error is not whitelisted", host)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
@ -292,36 +292,62 @@ func TestDockerContainerNetworkStats(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestDockerFilesystemStats(t *testing.T) {
|
||||
t.Skip("enable this once this test does not cause timeouts.")
|
||||
fm := framework.New(t)
|
||||
defer fm.Cleanup()
|
||||
|
||||
storageDriver := fm.Docker().StorageDriver()
|
||||
switch storageDriver {
|
||||
case framework.Aufs:
|
||||
case framework.Overlay:
|
||||
default:
|
||||
t.Skip("skipping filesystem stats test")
|
||||
}
|
||||
const (
|
||||
ddUsage = uint64(1 << 3) // 1 KB
|
||||
sleepDuration = 10 * time.Second
|
||||
)
|
||||
// Wait for the container to show up.
|
||||
containerId := fm.Docker().RunBusybox("/bin/sh", "-c", "dd if=/dev/zero of=/file count=1 bs=1M & ping www.google.com")
|
||||
containerId := fm.Docker().RunBusybox("/bin/sh", "-c", fmt.Sprintf("'dd if=/dev/zero of=/file count=2 bs=%d & sleep 10000'", ddUsage))
|
||||
|
||||
waitForContainer(containerId, fm)
|
||||
time.Sleep(time.Minute)
|
||||
request := &v2.RequestOptions{
|
||||
IdType: v2.TypeDocker,
|
||||
Count: 1,
|
||||
}
|
||||
containerInfo, err := fm.Cadvisor().ClientV2().Stats(containerId, request)
|
||||
time.Sleep(time.Minute)
|
||||
require.NoError(t, err)
|
||||
require.True(t, len(containerInfo) == 1)
|
||||
var info v2.ContainerInfo
|
||||
for _, cInfo := range containerInfo {
|
||||
info = cInfo
|
||||
needsBaseUsageCheck := false
|
||||
storageDriver := fm.Docker().StorageDriver()
|
||||
switch storageDriver {
|
||||
case framework.Aufs, framework.Overlay:
|
||||
needsBaseUsageCheck = true
|
||||
}
|
||||
pass := false
|
||||
// We need to wait for the `dd` operation to complete.
|
||||
for i := 0; i < 10; i++ {
|
||||
containerInfo, err := fm.Cadvisor().ClientV2().Stats(containerId, request)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, len(containerInfo), 1)
|
||||
var info v2.ContainerInfo
|
||||
// There is only one container in containerInfo. Since it is a map with unknown key,
|
||||
// use the value blindly.
|
||||
for _, cInfo := range containerInfo {
|
||||
info = cInfo
|
||||
}
|
||||
sanityCheckV2(containerId, info, t)
|
||||
|
||||
require.NotNil(t, info.Stats[0].Filesystem.TotalUsageBytes)
|
||||
if *info.Stats[0].Filesystem.TotalUsageBytes >= ddUsage {
|
||||
if !needsBaseUsageCheck {
|
||||
pass = true
|
||||
break
|
||||
}
|
||||
require.NotNil(t, info.Stats[0].Filesystem.BaseUsageBytes)
|
||||
if *info.Stats[0].Filesystem.BaseUsageBytes >= ddUsage {
|
||||
pass = true
|
||||
break
|
||||
}
|
||||
}
|
||||
t.Logf("expected total usage %d bytes to be greater than %d bytes", *info.Stats[0].Filesystem.TotalUsageBytes, ddUsage)
|
||||
if needsBaseUsageCheck {
|
||||
t.Logf("expected base %d bytes to be greater than %d bytes", *info.Stats[0].Filesystem.BaseUsageBytes, ddUsage)
|
||||
}
|
||||
t.Logf("retrying after %s...", sleepDuration.String())
|
||||
time.Sleep(sleepDuration)
|
||||
}
|
||||
|
||||
if !pass {
|
||||
t.Fail()
|
||||
}
|
||||
sanityCheckV2(containerId, info, t)
|
||||
require.NotNil(t, info.Stats[0].Filesystem.BaseUsageBytes)
|
||||
assert.True(t, *info.Stats[0].Filesystem.BaseUsageBytes > (1<<6), "expected base fs usage to be greater than 1MB")
|
||||
require.NotNil(t, info.Stats[0].Filesystem.TotalUsageBytes)
|
||||
assert.True(t, *info.Stats[0].Filesystem.TotalUsageBytes > (1<<6), "expected total fs usage to be greater than 1MB")
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user