Merge branch 'master' into patch-2

This commit is contained in:
Mathieu Cornic 2017-03-25 17:14:14 +01:00 committed by GitHub
commit 8a0fa01342
14 changed files with 249 additions and 32 deletions

View File

@ -1,5 +1,12 @@
# Changelog
### 0.25.0 (2017-03-09)
- Disable thin_ls due to excessive iops
- Ignore .mount cgroups, fixing dissappearing stats
- Fix wc goroutine leak
- Update aws-sdk-go dependency to 1.6.10
- Update to go 1.7 for releases
### 0.24.1 (2016-10-10)
- Fix issue with running cAdvisor in a container on some distributions.

View File

@ -35,7 +35,7 @@ fi
docker run --rm \
-w "/go/src/github.com/google/cadvisor" \
-v "${GOPATH}/src/github.com/google/cadvisor:/go/src/github.com/google/cadvisor" \
golang:1.7.1 make all test-runner
golang:1.7 make all test-runner
# Nodes that are currently stable. When tests fail on a specific node, and the failure is not remedied within a week, that node will be removed from this list.
golden_nodes=(

View File

@ -48,8 +48,7 @@ docker build -t $docker_tag -t $gcr_tag -f deploy/Dockerfile .
echo
echo "Release info:"
echo "VERSION=$VERSION"
sha1sum --tag cadvisor
md5sum --tag cadvisor
sha256sum --tag cadvisor
echo "docker image: $docker_tag"
echo "gcr.io image: $gcr_tag"

View File

@ -117,10 +117,7 @@ func main() {
glog.Fatalf("Failed to initialize storage driver: %s", err)
}
sysFs, err := sysfs.NewRealSysFs()
if err != nil {
glog.Fatalf("Failed to create a system interface: %s", err)
}
sysFs := sysfs.NewRealSysFs()
collectorHttpClient := createCollectorHttpClient(*collectorCert, *collectorKey)

View File

@ -33,6 +33,7 @@ import (
"github.com/google/cadvisor/machine"
"github.com/google/cadvisor/manager/watcher"
dockerutil "github.com/google/cadvisor/utils/docker"
"github.com/google/cadvisor/zfs"
docker "github.com/docker/engine-api/client"
"github.com/golang/glog"
@ -105,6 +106,8 @@ type dockerFactory struct {
ignoreMetrics container.MetricSet
thinPoolWatcher *devicemapper.ThinPoolWatcher
zfsWatcher *zfs.ZfsWatcher
}
func (self *dockerFactory) String() string {
@ -132,6 +135,7 @@ func (self *dockerFactory) NewContainerHandler(name string, inHostNamespace bool
self.dockerVersion,
self.ignoreMetrics,
self.thinPoolWatcher,
self.zfsWatcher,
)
return
}
@ -218,6 +222,21 @@ func startThinPoolWatcher(dockerInfo *dockertypes.Info) (*devicemapper.ThinPoolW
return thinPoolWatcher, nil
}
func startZfsWatcher(dockerInfo *dockertypes.Info) (*zfs.ZfsWatcher, error) {
filesystem, err := dockerutil.DockerZfsFilesystem(*dockerInfo)
if err != nil {
return nil, err
}
zfsWatcher, err := zfs.NewZfsWatcher(filesystem)
if err != nil {
return nil, err
}
go zfsWatcher.Start()
return zfsWatcher, nil
}
func ensureThinLsKernelVersion(kernelVersion string) error {
// kernel 4.4.0 has the proper bug fixes to allow thin_ls to work without corrupting the thin pool
minKernelVersion := semver.MustParse("4.4.0")
@ -306,6 +325,14 @@ func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, ignoreMetrics c
}
}
var zfsWatcher *zfs.ZfsWatcher
if storageDriver(dockerInfo.Driver) == zfsStorageDriver {
zfsWatcher, err = startZfsWatcher(dockerInfo)
if err != nil {
glog.Errorf("zfs filesystem stats will not be reported: %v", err)
}
}
glog.Infof("Registering Docker factory")
f := &dockerFactory{
cgroupSubsystems: cgroupSubsystems,
@ -317,6 +344,7 @@ func Register(factory info.MachineInfoFactory, fsInfo fs.FsInfo, ignoreMetrics c
storageDir: RootDir(),
ignoreMetrics: ignoreMetrics,
thinPoolWatcher: thinPoolWatcher,
zfsWatcher: zfsWatcher,
}
container.RegisterContainerHandlerFactory(f, []watcher.ContainerWatchSource{watcher.Raw})

View File

@ -29,6 +29,7 @@ import (
"github.com/google/cadvisor/fs"
info "github.com/google/cadvisor/info/v1"
dockerutil "github.com/google/cadvisor/utils/docker"
"github.com/google/cadvisor/zfs"
docker "github.com/docker/engine-api/client"
dockercontainer "github.com/docker/engine-api/types/container"
@ -42,6 +43,7 @@ import (
const (
// The read write layers exist here.
aufsRWLayer = "diff"
// Path to the directory where docker stores log files if the json logging driver is enabled.
pathToContainersDir = "containers"
)
@ -72,6 +74,12 @@ type dockerContainerHandler struct {
// the devicemapper device id for the container
deviceID string
// zfs Filesystem
zfsFilesystem string
// zfsParent is the parent for docker zfs
zfsParent string
// Time at which this container was created.
creationTime time.Time
@ -101,6 +109,9 @@ type dockerContainerHandler struct {
// thin pool watcher
thinPoolWatcher *devicemapper.ThinPoolWatcher
// zfs watcher
zfsWatcher *zfs.ZfsWatcher
}
var _ container.ContainerHandler = &dockerContainerHandler{}
@ -136,6 +147,7 @@ func newDockerContainerHandler(
dockerVersion []int,
ignoreMetrics container.MetricSet,
thinPoolWatcher *devicemapper.ThinPoolWatcher,
zfsWatcher *zfs.ZfsWatcher,
) (container.ContainerHandler, error) {
// Create the cgroup paths.
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
@ -172,12 +184,21 @@ func newDockerContainerHandler(
var (
rootfsStorageDir string
poolName string
zfsFilesystem string
zfsParent string
)
switch storageDriver {
case aufsStorageDriver:
rootfsStorageDir = path.Join(storageDir, string(aufsStorageDriver), aufsRWLayer, rwLayerID)
case overlayStorageDriver:
rootfsStorageDir = path.Join(storageDir, string(overlayStorageDriver), rwLayerID)
case zfsStorageDriver:
status, err := Status()
if err != nil {
return nil, fmt.Errorf("unable to determine docker status: %v", err)
}
zfsParent = status.DriverStatus[dockerutil.DriverStatusParentDataset]
zfsFilesystem = path.Join(zfsParent, rwLayerID)
case devicemapperStorageDriver:
status, err := Status()
if err != nil {
@ -199,10 +220,13 @@ func newDockerContainerHandler(
fsInfo: fsInfo,
rootFs: rootFs,
poolName: poolName,
zfsFilesystem: zfsFilesystem,
rootfsStorageDir: rootfsStorageDir,
envs: make(map[string]string),
ignoreMetrics: ignoreMetrics,
thinPoolWatcher: thinPoolWatcher,
zfsWatcher: zfsWatcher,
zfsParent: zfsParent,
}
// We assume that if Inspect fails then the container is not known to docker.
@ -245,7 +269,9 @@ func newDockerContainerHandler(
handler.fsHandler = &dockerFsHandler{
fsHandler: common.NewFsHandler(common.DefaultPeriod, rootfsStorageDir, otherStorageDir, fsInfo),
thinPoolWatcher: thinPoolWatcher,
zfsWatcher: zfsWatcher,
deviceID: handler.deviceID,
zfsFilesystem: zfsFilesystem,
}
}
@ -265,7 +291,7 @@ func newDockerContainerHandler(
}
// dockerFsHandler is a composite FsHandler implementation the incorporates
// the common fs handler and a devicemapper ThinPoolWatcher.
// the common fs handler, a devicemapper ThinPoolWatcher, and a zfsWatcher
type dockerFsHandler struct {
fsHandler common.FsHandler
@ -273,6 +299,11 @@ type dockerFsHandler struct {
thinPoolWatcher *devicemapper.ThinPoolWatcher
// deviceID is the id of the container's fs device
deviceID string
// zfsWatcher is the zfs filesystem watcher
zfsWatcher *zfs.ZfsWatcher
// zfsFilesystem is the docker zfs filesystem
zfsFilesystem string
}
var _ common.FsHandler = &dockerFsHandler{}
@ -306,6 +337,15 @@ func (h *dockerFsHandler) Usage() common.FsUsage {
}
}
if h.zfsWatcher != nil {
zfsUsage, err := h.zfsWatcher.GetUsage(h.zfsFilesystem)
if err != nil {
glog.V(5).Infof("unable to get fs usage from zfs for filesystem %s: %v", h.zfsFilesystem, err)
} else {
usage.BaseUsageBytes = zfsUsage
usage.TotalUsageBytes += zfsUsage
}
}
return usage
}
@ -359,12 +399,14 @@ func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error
// Device has to be the pool name to correlate with the device name as
// set in the machine info filesystems.
device = self.poolName
case aufsStorageDriver, overlayStorageDriver, zfsStorageDriver:
case aufsStorageDriver, overlayStorageDriver:
deviceInfo, err := self.fsInfo.GetDirFsDevice(self.rootfsStorageDir)
if err != nil {
return fmt.Errorf("unable to determine device info for dir: %v: %v", self.rootfsStorageDir, err)
}
device = deviceInfo.Device
case zfsStorageDriver:
device = self.zfsParent
default:
return nil
}

View File

@ -47,7 +47,7 @@ Command: `make release`
- Try to build it from the release branch, since we include that in the binary version
- Verify the ldflags output, in particular check the Version, BuildUser, and GoVersion are expected
Once the build is complete, check the VERSION and note the sha1 and md5 hashes.
Once the build is complete, check the VERSION and note the sha256 hash.
## 4. Push the Docker images
@ -93,9 +93,9 @@ Once you are satisfied with the release quality (consider waiting a week for bug
1. Edit the github release a final time, and uncheck the "Pre-release" checkbox
2. Tag the docker & gcr.io releases with the latest version
```
$ docker pull google/cadvisor:$VERSION
$ docker tag -f google/cadvisor:$VERSION google/cadvisor:latest
$ docker tag -f google/cadvisor:$VERSION gcr.io/google_containers/cadvisor:latest
```
```
$ docker pull google/cadvisor:$VERSION
$ docker tag -f google/cadvisor:$VERSION google/cadvisor:latest
$ docker tag -f google/cadvisor:$VERSION gcr.io/google_containers/cadvisor:latest
```
3. Repeat steps 4.a and 4.b to push the image tagged with latest

View File

@ -18,6 +18,10 @@ sudo docker run \
cAdvisor is now running (in the background) on `http://localhost:8080/`. The setup includes directories with Docker state cAdvisor needs to observe.
**Note**: If docker daemon is running with [user namespace enabled](https://docs.docker.com/engine/reference/commandline/dockerd/#starting-the-daemon-with-user-namespaces-enabled),
You need to add `--userns=host` option in order for cAdvisor to monitor Docker containers,
otherwise cAdvisor can not connect to docker daemon.
## Latest Canary
The latest cAdvisor canary release is continuously built from HEAD and available
@ -78,7 +82,3 @@ cAdvisor is now running (in the foreground) on `http://localhost:8080/`.
## Runtime Options
cAdvisor has a series of flags that can be used to configure its runtime behavior. More details can be found in runtime [options](runtime_options.md).
## I need help!
We aim to have cAdvisor run everywhere! If you run into issues getting it running, feel free to file an issue. We are very responsive in supporting our users and update our documentation with new setups.

View File

@ -149,6 +149,31 @@ func processMounts(mounts []*mount.Info, excludedMountpointPrefixes []string) ma
continue
}
// btrfs fix: following workaround fixes wrong btrfs Major and Minor Ids reported in /proc/self/mountinfo.
// instead of using values from /proc/self/mountinfo we use stat to get Ids from btrfs mount point
if mount.Fstype == "btrfs" && mount.Major == 0 && strings.HasPrefix(mount.Source, "/dev/") {
buf := new(syscall.Stat_t)
err := syscall.Stat(mount.Source, buf)
if err != nil {
glog.Warningf("stat failed on %s with error: %s", mount.Source, err)
} else {
glog.Infof("btrfs mount %#v", mount)
if buf.Mode&syscall.S_IFMT == syscall.S_IFBLK {
err := syscall.Stat(mount.Mountpoint, buf)
if err != nil {
glog.Warningf("stat failed on %s with error: %s", mount.Mountpoint, err)
} else {
glog.Infof("btrfs dev major:minor %d:%d\n", int(major(buf.Dev)), int(minor(buf.Dev)))
glog.Infof("btrfs rdev major:minor %d:%d\n", int(major(buf.Rdev)), int(minor(buf.Rdev)))
mount.Major = int(major(buf.Dev))
mount.Minor = int(minor(buf.Dev))
}
}
}
}
partitions[mount.Source] = partition{
fsType: mount.Fstype,
mountpoint: mount.Mountpoint,

View File

@ -49,8 +49,7 @@ func (self *Client) Send(namespace, containerName, key string, value uint64) err
formatted := fmt.Sprintf("%s.%s.%s:%d|g", namespace, containerName, key, value)
_, err := fmt.Fprintf(self.conn, formatted)
if err != nil {
glog.V(3).Infof("failed to send data %q: %v", formatted, err)
return err
return fmt.Errorf("failed to send data %q: %v", formatted, err)
}
return nil
}

View File

@ -23,11 +23,12 @@ import (
)
const (
DockerInfoDriver = "Driver"
DockerInfoDriverStatus = "DriverStatus"
DriverStatusPoolName = "Pool Name"
DriverStatusDataLoopFile = "Data loop file"
DriverStatusMetadataFile = "Metadata file"
DockerInfoDriver = "Driver"
DockerInfoDriverStatus = "DriverStatus"
DriverStatusPoolName = "Pool Name"
DriverStatusDataLoopFile = "Data loop file"
DriverStatusMetadataFile = "Metadata file"
DriverStatusParentDataset = "Parent Dataset"
)
func DriverStatusValue(status [][2]string, target string) string {
@ -68,3 +69,12 @@ func DockerMetadataDevice(info dockertypes.Info) (string, error) {
return metadataDevice, nil
}
func DockerZfsFilesystem(info dockertypes.Info) (string, error) {
filesystem := DriverStatusValue(info.DriverStatus, DriverStatusParentDataset)
if len(filesystem) == 0 {
return "", fmt.Errorf("Could not get zfs filesystem")
}
return filesystem, nil
}

View File

@ -70,8 +70,8 @@ type SysFs interface {
type realSysFs struct{}
func NewRealSysFs() (SysFs, error) {
return &realSysFs{}, nil
func NewRealSysFs() SysFs {
return &realSysFs{}
}
func (self *realSysFs) GetBlockDevices() ([]os.FileInfo, error) {

View File

@ -155,10 +155,7 @@ func GetCacheInfo(sysFs sysfs.SysFs, id int) ([]sysfs.CacheInfo, error) {
func GetNetworkStats(name string) (info.InterfaceStats, error) {
// TODO(rjnagal): Take syfs as an argument.
sysFs, err := sysfs.NewRealSysFs()
if err != nil {
return info.InterfaceStats{}, err
}
sysFs := sysfs.NewRealSysFs()
return getNetworkStats(name, sysFs)
}

113
zfs/watcher.go Normal file
View File

@ -0,0 +1,113 @@
// Copyright 2016 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zfs
import (
"fmt"
"sync"
"time"
"github.com/golang/glog"
zfs "github.com/mistifyio/go-zfs"
)
// zfsWatcher maintains a cache of filesystem -> usage stats for a
// zfs filesystem
type ZfsWatcher struct {
filesystem string
lock *sync.RWMutex
cache map[string]uint64
period time.Duration
stopChan chan struct{}
}
// NewThinPoolWatcher returns a new ThinPoolWatcher for the given devicemapper
// thin pool name and metadata device or an error.
func NewZfsWatcher(filesystem string) (*ZfsWatcher, error) {
return &ZfsWatcher{
filesystem: filesystem,
lock: &sync.RWMutex{},
cache: make(map[string]uint64),
period: 15 * time.Second,
stopChan: make(chan struct{}),
}, nil
}
// Start starts the ZfsWatcher.
func (w *ZfsWatcher) Start() {
err := w.Refresh()
if err != nil {
glog.Errorf("encountered error refreshing zfs watcher: %v", err)
}
for {
select {
case <-w.stopChan:
return
case <-time.After(w.period):
start := time.Now()
err = w.Refresh()
if err != nil {
glog.Errorf("encountered error refreshing zfs watcher: %v", err)
}
// print latency for refresh
duration := time.Since(start)
glog.V(5).Infof("zfs(%d) took %s", start.Unix(), duration)
}
}
}
// Stop stops the ZfsWatcher.
func (w *ZfsWatcher) Stop() {
close(w.stopChan)
}
// GetUsage gets the cached usage value of the given filesystem.
func (w *ZfsWatcher) GetUsage(filesystem string) (uint64, error) {
w.lock.RLock()
defer w.lock.RUnlock()
v, ok := w.cache[filesystem]
if !ok {
return 0, fmt.Errorf("no cached value for usage of filesystem %v", filesystem)
}
return v, nil
}
// Refresh performs a zfs get
func (w *ZfsWatcher) Refresh() error {
w.lock.Lock()
defer w.lock.Unlock()
newCache := make(map[string]uint64)
parent, err := zfs.GetDataset(w.filesystem)
if err != nil {
glog.Errorf("encountered error getting zfs filesystem: %s: %v", w.filesystem, err)
return err
}
children, err := parent.Children(0)
if err != nil {
glog.Errorf("encountered error getting children of zfs filesystem: %s: %v", w.filesystem, err)
return err
}
for _, ds := range children {
newCache[ds.Name] = ds.Used
}
w.cache = newCache
return nil
}