Merge pull request #358 from vmarmol/update-libcontainer

Update libcontainer dependency
This commit is contained in:
Rohit Jnagal 2014-12-15 17:01:44 -08:00
commit 8eec529106
69 changed files with 2021 additions and 1150 deletions

4
Godeps/Godeps.json generated
View File

@ -55,8 +55,8 @@
},
{
"ImportPath": "github.com/docker/libcontainer",
"Comment": "v1.2.0-99-gfe3801c",
"Rev": "fe3801ccd2f5d0cc3ec5d063067fc4a1c312fa81"
"Comment": "v1.2.0-173-g58fc931",
"Rev": "58fc93160e03387a4f41dcf4aed2e376c4a92db4"
},
{
"ImportPath": "github.com/fsouza/go-dockerclient",

View File

@ -0,0 +1,9 @@
image: dockercore/libcontainer
script:
# Setup the DockerInDocker environment.
- /dind
- sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate
- bash /go/src/github.com/docker/docker/hack/make/validate-dco
- bash /go/src/github.com/docker/docker/hack/make/validate-gofmt
- export GOPATH="$GOPATH:/go:$(pwd)/vendor" # Drone mucks with our GOPATH
- make direct-test

View File

@ -1,36 +0,0 @@
language: go
go: 1.3
# let us have pretty experimental Docker-based Travis workers
sudo: false
env:
- TRAVIS_GLOBAL_WTF=1
- _GOOS=linux _GOARCH=amd64 CGO_ENABLED=1
- _GOOS=linux _GOARCH=amd64 CGO_ENABLED=0
# - _GOOS=linux _GOARCH=386 CGO_ENABLED=1 # TODO add this once Travis can handle it (https://github.com/travis-ci/travis-ci/issues/2207#issuecomment-49625061)
- _GOOS=linux _GOARCH=386 CGO_ENABLED=0
- _GOOS=linux _GOARCH=arm CGO_ENABLED=0
install:
- go get code.google.com/p/go.tools/cmd/cover
- mkdir -pv "${GOPATH%%:*}/src/github.com/docker" && [ -d "${GOPATH%%:*}/src/github.com/docker/libcontainer" ] || ln -sv "$(readlink -f .)" "${GOPATH%%:*}/src/github.com/docker/libcontainer"
- if [ -z "$TRAVIS_GLOBAL_WTF" ]; then
gvm cross "$_GOOS" "$_GOARCH";
export GOOS="$_GOOS" GOARCH="$_GOARCH";
fi
- export GOPATH="$GOPATH:$(pwd)/vendor"
- if [ -z "$TRAVIS_GLOBAL_WTF" ]; then go env; fi
- go get -d -v ./... # TODO remove this if /docker/docker gets purged from our includes
- if [ "$TRAVIS_GLOBAL_WTF" ]; then
export DOCKER_PATH="${GOPATH%%:*}/src/github.com/docker/docker";
mkdir -p "$DOCKER_PATH/hack/make";
( cd "$DOCKER_PATH/hack/make" && wget -c 'https://raw.githubusercontent.com/docker/docker/master/hack/make/'{.validate,validate-dco,validate-gofmt} );
sed -i 's!docker/docker!docker/libcontainer!' "$DOCKER_PATH/hack/make/.validate";
fi
script:
- if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-dco"; fi
- if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-gofmt"; fi
- if [ -z "$TRAVIS_GLOBAL_WTF" ]; then make direct-build; fi
- if [ -z "$TRAVIS_GLOBAL_WTF" -a "$GOARCH" != 'arm' ]; then make direct-test-short; fi

View File

@ -1,7 +1,7 @@
FROM crosbymichael/golang
RUN apt-get update && apt-get install -y gcc make
RUN go get code.google.com/p/go.tools/cmd/cover
RUN go get golang.org/x/tools/cmd/cover
ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor
RUN go get github.com/docker/docker/pkg/term
@ -10,7 +10,7 @@ RUN go get github.com/docker/docker/pkg/term
RUN mkdir /busybox && \
curl -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.02/rootfs.tar' | tar -xC /busybox
RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/hack/dind -o /dind && \
RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/project/dind -o /dind && \
chmod +x /dind
COPY . /go/src/github.com/docker/libcontainer

View File

@ -2,5 +2,4 @@ Michael Crosby <michael@docker.com> (@crosbymichael)
Rohit Jnagal <jnagal@google.com> (@rjnagal)
Victor Marmol <vmarmol@google.com> (@vmarmol)
Mrunal Patel <mpatel@redhat.com> (@mrunalp)
.travis.yml: Tianon Gravi <admwiggin@gmail.com> (@tianon)
update-vendor.sh: Tianon Gravi <admwiggin@gmail.com> (@tianon)

View File

@ -12,10 +12,10 @@ sh:
GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u)
direct-test:
go test -cover -v $(GO_PACKAGES)
go test $(TEST_TAGS) -cover -v $(GO_PACKAGES)
direct-test-short:
go test -cover -test.short -v $(GO_PACKAGES)
go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES)
direct-build:
go build -v $(GO_PACKAGES)

View File

@ -1,4 +1,4 @@
## libcontainer - reference implementation for containers [![Build Status](https://travis-ci.org/docker/libcontainer.png?branch=master)](https://travis-ci.org/docker/libcontainer)
## libcontainer - reference implementation for containers [![Build Status](https://ci.dockerproject.com/github.com/docker/libcontainer/status.svg?branch=master)](https://ci.dockerproject.com/github.com/docker/libcontainer)
### Note on API changes:

View File

@ -0,0 +1,321 @@
## Container Specification - v1
This is the standard configuration for version 1 containers. It includes
namespaces, standard filesystem setup, a default Linux capability set, and
information about resource reservations. It also has information about any
populated environment settings for the processes running inside a container.
Along with the configuration of how a container is created the standard also
discusses actions that can be performed on a container to manage and inspect
information about the processes running inside.
The v1 profile is meant to be able to accommodate the majority of applications
with a strong security configuration.
### System Requirements and Compatibility
Minimum requirements:
* Kernel version - 3.8 recommended 2.6.2x minimum(with backported patches)
* Mounted cgroups with each subsystem in its own hierarchy
### Namespaces
| Flag | Enabled |
| ------------ | ------- |
| CLONE_NEWPID | 1 |
| CLONE_NEWUTS | 1 |
| CLONE_NEWIPC | 1 |
| CLONE_NEWNET | 1 |
| CLONE_NEWNS | 1 |
| CLONE_NEWUSER | 0 |
In v1 the user namespace is not enabled by default for support of older kernels
where the user namespace feature is not fully implemented. Namespaces are
created for the container via the `clone` syscall.
### Filesystem
A root filesystem must be provided to a container for execution. The container
will use this root filesystem (rootfs) to jail and spawn processes inside where
the binaries and system libraries are local to that directory. Any binaries
to be executed must be contained within this rootfs.
Mounts that happen inside the container are automatically cleaned up when the
container exits as the mount namespace is destroyed and the kernel will
unmount all the mounts that were setup within that namespace.
For a container to execute properly there are certain filesystems that
are required to be mounted within the rootfs that the runtime will setup.
| Path | Type | Flags | Data |
| ----------- | ------ | -------------------------------------- | --------------------------------------- |
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
| /dev/shm | shm | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid5 |
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
After a container's filesystems are mounted within the newly created
mount namespace `/dev` will need to be populated with a set of device nodes.
It is expected that a rootfs does not need to have any device nodes specified
for `/dev` witin the rootfs as the container will setup the correct devices
that are required for executing a container's process.
| Path | Mode | Access |
| ------------ | ---- | ---------- |
| /dev/null | 0666 | rwm |
| /dev/zero | 0666 | rwm |
| /dev/full | 0666 | rwm |
| /dev/tty | 0666 | rwm |
| /dev/random | 0666 | rwm |
| /dev/urandom | 0666 | rwm |
| /dev/fuse | 0666 | rwm |
**ptmx**
`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within
the container.
The use of a pseudo TTY is optional within a container and it should support both.
If a pseudo is provided to the container `/dev/console` will need to be
setup by binding the console in `/dev/` after it has been populated and mounted
in tmpfs.
| Source | Destination | UID GID | Mode | Type |
| --------------- | ------------ | ------- | ---- | ---- |
| *pty host path* | /dev/console | 0 0 | 0600 | bind |
After `/dev/null` has been setup we check for any external links between
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
to `/dev/null` outside the container we close and `dup2` the the `/dev/null`
that is local to the container's rootfs.
After the container has `/proc` mounted a few standard symlinks are setup
within `/dev/` for the io.
| Source | Destination |
| ------------ | ----------- |
| /proc/1/fd | /dev/fd |
| /proc/1/fd/0 | /dev/stdin |
| /proc/1/fd/1 | /dev/stdout |
| /proc/1/fd/2 | /dev/stderr |
A `pivot_root` is used to change the root for the process, effectively
jailing the process inside the rootfs.
```c
put_old = mkdir(...);
pivot_root(rootfs, put_old);
chdir("/");
unmount(put_old, MS_DETACH);
rmdir(put_old);
```
For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined
with a `chroot` is required as `pivot_root` is not supported in `ramfs`.
```c
mount(rootfs, "/", NULL, MS_MOVE, NULL);
chroot(".");
chdir("/");
```
The `umask` is set back to `0022` after the filesystem setup has been completed.
### Resources
Cgroups are used to handle resource allocation for containers. This includes
system resources like cpu, memory, and device access.
| Subsystem | Enabled |
| ---------- | ------- |
| devices | 1 |
| memory | 1 |
| cpu | 1 |
| cpuacct | 1 |
| cpuset | 1 |
| blkio | 1 |
| perf_event | 1 |
| freezer | 1 |
All cgroup subsystem are joined so that statistics can be collected from
each of the subsystems. Freezer does not expose any stats but is joined
so that containers can be paused and resumed.
The parent process of the container's init must place the init pid inside
the correct cgroups before the initialization begins. This is done so
that no processes or threads escape the cgroups. This sync is
done via a pipe ( specified in the runtime section below ) that the container's
init process will block waiting for the parent to finish setup.
### Security
The standard set of Linux capabilities that are set in a container
provide a good default for security and flexibility for the applications.
| Capability | Enabled |
| -------------------- | ------- |
| CAP_NET_RAW | 1 |
| CAP_NET_BIND_SERVICE | 1 |
| CAP_AUDIT_WRITE | 1 |
| CAP_DAC_OVERRIDE | 1 |
| CAP_SETFCAP | 1 |
| CAP_SETPCAP | 1 |
| CAP_SETGID | 1 |
| CAP_SETUID | 1 |
| CAP_MKNOD | 1 |
| CAP_CHOWN | 1 |
| CAP_FOWNER | 1 |
| CAP_FSETID | 1 |
| CAP_KILL | 1 |
| CAP_SYS_CHROOT | 1 |
| CAP_NET_BROADCAST | 0 |
| CAP_SYS_MODULE | 0 |
| CAP_SYS_RAWIO | 0 |
| CAP_SYS_PACCT | 0 |
| CAP_SYS_ADMIN | 0 |
| CAP_SYS_NICE | 0 |
| CAP_SYS_RESOURCE | 0 |
| CAP_SYS_TIME | 0 |
| CAP_SYS_TTY_CONFIG | 0 |
| CAP_AUDIT_CONTROL | 0 |
| CAP_MAC_OVERRIDE | 0 |
| CAP_MAC_ADMIN | 0 |
| CAP_NET_ADMIN | 0 |
| CAP_SYSLOG | 0 |
| CAP_DAC_READ_SEARCH | 0 |
| CAP_LINUX_IMMUTABLE | 0 |
| CAP_IPC_LOCK | 0 |
| CAP_IPC_OWNER | 0 |
| CAP_SYS_PTRACE | 0 |
| CAP_SYS_BOOT | 0 |
| CAP_LEASE | 0 |
| CAP_WAKE_ALARM | 0 |
| CAP_BLOCK_SUSPE | 0 |
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
and [selinux](http://selinuxproject.org/page/Main_Page) can be used with
the containers. A container should support setting an apparmor profile or
selinux process and mount labels if provided in the configuration.
Standard apparmor profile:
```c
#include <tunables/global>
profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
#include <abstractions/base>
network,
capability,
file,
umount,
mount fstype=tmpfs,
mount fstype=mqueue,
mount fstype=fuse.*,
mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,
mount fstype=efivarfs -> /sys/firmware/efi/efivars/,
mount fstype=fusectl -> /sys/fs/fuse/connections/,
mount fstype=securityfs -> /sys/kernel/security/,
mount fstype=debugfs -> /sys/kernel/debug/,
mount fstype=proc -> /proc/,
mount fstype=sysfs -> /sys/,
deny @{PROC}/sys/fs/** wklx,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
deny @{PROC}/kmem rwklx,
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
deny @{PROC}/sys/kernel/*/** wklx,
deny mount options=(ro, remount) -> /,
deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
deny mount fstype=devpts,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,
deny /sys/fs/[^c]*/** wklx,
deny /sys/fs/c[^g]*/** wklx,
deny /sys/fs/cg[^r]*/** wklx,
deny /sys/firmware/efi/efivars/** rwklx,
deny /sys/kernel/security/** rwklx,
}
```
*TODO: seccomp work is being done to find a good default config*
### Runtime and Init Process
During container creation the parent process needs to talk to the container's init
process and have a form of synchronization. This is accomplished by creating
a pipe that is passed to the container's init. When the init process first spawns
it will block on its side of the pipe until the parent closes its side. This
allows the parent to have time to set the new process inside a cgroup hierarchy
and/or write any uid/gid mappings required for user namespaces.
The pipe is passed to the init process via FD 3.
The application consuming libcontainer should be compiled statically. libcontainer
does not define any init process and the arguments provided are used to `exec` the
process inside the application. There should be no long running init within the
container spec.
If a pseudo tty is provided to a container it will open and `dup2` the console
as the container's STDIN, STDOUT, STDERR as well as mounting the console
as `/dev/console`.
An extra set of mounts are provided to a container and setup for use. A container's
rootfs can contain some non portable files inside that can cause side effects during
execution of a process. These files are usually created and populated with the container
specific information via the runtime.
**Extra runtime files:**
* /etc/hosts
* /etc/resolv.conf
* /etc/hostname
* /etc/localtime
#### Defaults
There are a few defaults that can be overridden by users, but in their omission
these apply to processes within a container.
| Type | Value |
| ------------------- | ------------------------------ |
| Parent Death Signal | SIGKILL |
| UID | 0 |
| GID | 0 |
| GROUPS | 0, NULL |
| CWD | "/" |
| $HOME | Current user's home dir or "/" |
| Readonly rootfs | false |
| Pseudo TTY | false |
## Actions
After a container is created there is a standard set of actions that can
be done to the container. These actions are part of the public API for
a container.
| Action | Description |
| -------------- | ------------------------------------------------------------------ |
| Get processes | Return all the pids for processes running inside a container |
| Get Stats | Return resource statistics for the container as a whole |
| Wait | Wait waits on the container's init process ( pid 1 ) |
| Wait Process | Wait on any of the container's processes returning the exit status |
| Destroy | Kill the container's init process and remove any filesystem state |
| Signal | Send a signal to the container's init process |
| Signal Process | Send a signal to any of the container's processes |
| Pause | Pause all processes inside the container |
| Resume | Resume all processes inside the container if paused |
| Exec | Execute a new process inside of the container ( requires setns ) |

View File

@ -5,30 +5,17 @@ package libcontainer
import (
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer/network"
)
// TODO(vmarmol): Complete Stats() in final libcontainer API and move users to that.
// DEPRECATED: The below portions are only to be used during the transition to the official API.
// Returns all available stats for the given container.
func GetStats(container *Config, state *State) (*ContainerStats, error) {
var (
err error
stats = &ContainerStats{}
)
if systemd.UseSystemd() {
stats.CgroupStats, err = systemd.GetStats(container.Cgroups)
} else {
stats.CgroupStats, err = fs.GetStats(container.Cgroups)
}
if err != nil {
func GetStats(container *Config, state *State) (stats *ContainerStats, err error) {
stats = &ContainerStats{}
if stats.CgroupStats, err = fs.GetStats(state.CgroupPaths); err != nil {
return stats, err
}
stats.NetworkStats, err = network.GetStats(&state.NetworkState)
return stats, err
}

View File

@ -50,11 +50,7 @@ type Cgroup struct {
CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use
Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process
Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
}
type ActiveCgroup interface {
Cleanup() error
Paths() (map[string]string, error)
}

View File

@ -1,264 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"log"
"os"
"syscall"
"time"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
)
var createCommand = cli.Command{
Name: "create",
Usage: "Create a cgroup container using the supplied configuration and initial process.",
Flags: []cli.Flag{
cli.StringFlag{Name: "config, c", Value: "cgroup.json", Usage: "path to container configuration (cgroups.Cgroup object)"},
cli.IntFlag{Name: "pid, p", Value: 0, Usage: "pid of the initial process in the container"},
},
Action: createAction,
}
var destroyCommand = cli.Command{
Name: "destroy",
Usage: "Destroy an existing cgroup container.",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: destroyAction,
}
var statsCommand = cli.Command{
Name: "stats",
Usage: "Get stats for cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: statsAction,
}
var pauseCommand = cli.Command{
Name: "pause",
Usage: "Pause cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: pauseAction,
}
var resumeCommand = cli.Command{
Name: "resume",
Usage: "Resume a paused cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: resumeAction,
}
var psCommand = cli.Command{
Name: "ps",
Usage: "Get list of pids for a cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: psAction,
}
func getConfigFromFile(c *cli.Context) (*cgroups.Cgroup, error) {
f, err := os.Open(c.String("config"))
if err != nil {
return nil, err
}
defer f.Close()
var config *cgroups.Cgroup
if err := json.NewDecoder(f).Decode(&config); err != nil {
log.Fatal(err)
}
return config, nil
}
func openLog(name string) error {
f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755)
if err != nil {
return err
}
log.SetOutput(f)
return nil
}
func getConfig(context *cli.Context) (*cgroups.Cgroup, error) {
name := context.String("name")
if name == "" {
log.Fatal(fmt.Errorf("Missing container name"))
}
parent := context.String("parent")
return &cgroups.Cgroup{
Name: name,
Parent: parent,
}, nil
}
func killAll(config *cgroups.Cgroup) {
// We could use freezer here to prevent process spawning while we are trying
// to kill everything. But going with more portable solution of retrying for
// now.
pids := getPids(config)
retry := 10
for len(pids) != 0 || retry > 0 {
killPids(pids)
time.Sleep(100 * time.Millisecond)
retry--
pids = getPids(config)
}
if len(pids) != 0 {
log.Fatal(fmt.Errorf("Could not kill existing processes in the container."))
}
}
func getPids(config *cgroups.Cgroup) []int {
pids, err := fs.GetPids(config)
if err != nil {
log.Fatal(err)
}
return pids
}
func killPids(pids []int) {
for _, pid := range pids {
// pids might go away on their own. Ignore errors.
syscall.Kill(pid, syscall.SIGKILL)
}
}
func setFreezerState(context *cli.Context, state cgroups.FreezerState) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
if systemd.UseSystemd() {
err = systemd.Freeze(config, state)
} else {
err = fs.Freeze(config, state)
}
if err != nil {
log.Fatal(err)
}
}
func createAction(context *cli.Context) {
config, err := getConfigFromFile(context)
if err != nil {
log.Fatal(err)
}
pid := context.Int("pid")
if pid <= 0 {
log.Fatal(fmt.Errorf("Invalid pid : %d", pid))
}
if systemd.UseSystemd() {
_, err := systemd.Apply(config, pid)
if err != nil {
log.Fatal(err)
}
} else {
_, err := fs.Apply(config, pid)
if err != nil {
log.Fatal(err)
}
}
}
func destroyAction(context *cli.Context) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
killAll(config)
// Systemd will clean up cgroup state for empty container.
if !systemd.UseSystemd() {
err := fs.Cleanup(config)
if err != nil {
log.Fatal(err)
}
}
}
func statsAction(context *cli.Context) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
stats, err := fs.GetStats(config)
if err != nil {
log.Fatal(err)
}
out, err := json.MarshalIndent(stats, "", "\t")
if err != nil {
log.Fatal(err)
}
fmt.Printf("Usage stats for '%s':\n %v\n", config.Name, string(out))
}
func pauseAction(context *cli.Context) {
setFreezerState(context, cgroups.Frozen)
}
func resumeAction(context *cli.Context) {
setFreezerState(context, cgroups.Thawed)
}
func psAction(context *cli.Context) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
pids, err := fs.GetPids(config)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Pids in '%s':\n", config.Name)
fmt.Println(pids)
}
func main() {
logPath := os.Getenv("log")
if logPath != "" {
if err := openLog(logPath); err != nil {
log.Fatal(err)
}
}
app := cli.NewApp()
app.Name = "cgutil"
app.Usage = "Test utility for libcontainer cgroups package"
app.Version = "0.1"
app.Commands = []cli.Command{
createCommand,
destroyCommand,
statsCommand,
pauseCommand,
resumeCommand,
psCommand,
}
if err := app.Run(os.Args); err != nil {
log.Fatal(err)
}
}

View File

@ -1,10 +0,0 @@
{
"name": "luke",
"parent": "darth",
"allow_all_devices": true,
"memory": 1073741824,
"memory_swap": -1,
"cpu_shares": 2048,
"cpu_quota": 500000,
"cpu_period": 250000
}

View File

@ -57,20 +57,35 @@ type data struct {
pid int
}
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
d, err := getCgroupData(c, pid)
if err != nil {
return nil, err
}
for _, sys := range subsystems {
paths := make(map[string]string)
defer func() {
if err != nil {
cgroups.RemovePaths(paths)
}
}()
for name, sys := range subsystems {
if err := sys.Set(d); err != nil {
d.Cleanup()
return nil, err
}
// FIXME: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(name)
if err != nil {
if cgroups.IsNotFound(err) {
continue
}
return nil, err
}
paths[name] = p
}
return d, nil
return paths, nil
}
// Symmetrical public function to update device based cgroups. Also available
@ -86,33 +101,13 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error {
return devices.Set(d)
}
func Cleanup(c *cgroups.Cgroup) error {
d, err := getCgroupData(c, 0)
if err != nil {
return fmt.Errorf("Could not get Cgroup data %s", err)
}
return d.Cleanup()
}
func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
func GetStats(systemPaths map[string]string) (*cgroups.Stats, error) {
stats := cgroups.NewStats()
d, err := getCgroupData(c, 0)
if err != nil {
return nil, fmt.Errorf("getting CgroupData %s", err)
}
for sysname, sys := range subsystems {
path, err := d.path(sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return nil, err
for name, path := range systemPaths {
sys, ok := subsystems[name]
if !ok {
continue
}
if err := sys.GetStats(path, stats); err != nil {
return nil, err
}
@ -176,26 +171,6 @@ func (raw *data) parent(subsystem string) (string, error) {
return filepath.Join(raw.root, subsystem, initPath), nil
}
func (raw *data) Paths() (map[string]string, error) {
paths := make(map[string]string)
for sysname := range subsystems {
path, err := raw.path(sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return nil, err
}
paths[sysname] = path
}
return paths, nil
}
func (raw *data) path(subsystem string) (string, error) {
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.cgroup) {
@ -234,13 +209,6 @@ func (raw *data) join(subsystem string) (string, error) {
return path, nil
}
func (raw *data) Cleanup() error {
for _, sys := range subsystems {
sys.Remove(raw)
}
return nil
}
func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}

View File

@ -14,17 +14,11 @@ type CpusetGroup struct {
}
func (s *CpusetGroup) Set(d *data) error {
// we don't want to join this cgroup unless it is specified
if d.c.CpusetCpus != "" {
dir, err := d.path("cpuset")
if err != nil {
return err
}
return s.SetDir(dir, d.c.CpusetCpus, d.pid)
dir, err := d.path("cpuset")
if err != nil {
return err
}
return nil
return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid)
}
func (s *CpusetGroup) Remove(d *data) error {
@ -35,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil
}
func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error {
if err := s.ensureParent(dir); err != nil {
return err
}
@ -46,8 +40,17 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
return err
}
if err := writeFile(dir, "cpuset.cpus", value); err != nil {
return err
// If we don't use --cpuset-xxx, the default value inherit from parent cgroup
// is set in s.ensureParent, otherwise, use the value we set
if cpus != "" {
if err := writeFile(dir, "cpuset.cpus", cpus); err != nil {
return err
}
}
if mems != "" {
if err := writeFile(dir, "cpuset.mems", mems); err != nil {
return err
}
}
return nil

View File

@ -57,7 +57,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %f", value, 0)
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Success with negative values lesser than min int64
@ -70,7 +70,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
if err != nil {
t.Fatal(err)
} else if value != 0 {
t.Fatalf("Expected %d to equal %f", value, 0)
t.Fatalf("Expected %d to equal %d", value, 0)
}
// Not a float.

View File

@ -27,7 +27,7 @@ type CpuUsage struct {
type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throlling_data,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
}
type MemoryStats struct {

View File

@ -12,7 +12,7 @@ func UseSystemd() bool {
return false
}
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
return nil, fmt.Errorf("Systemd not supported")
}
@ -27,7 +27,3 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error {
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error {
return fmt.Errorf("Systemd not supported")
}
func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
return nil, fmt.Errorf("Systemd not supported")
}

View File

@ -31,18 +31,15 @@ var (
connLock sync.Mutex
theConn *systemd.Conn
hasStartTransientUnit bool
subsystems = map[string]subsystem{
"devices": &fs.DevicesGroup{},
"memory": &fs.MemoryGroup{},
"cpu": &fs.CpuGroup{},
"cpuset": &fs.CpusetGroup{},
"cpuacct": &fs.CpuacctGroup{},
"blkio": &fs.BlkioGroup{},
"perf_event": &fs.PerfEventGroup{},
"freezer": &fs.FreezerGroup{},
}
)
func newProp(name string, units interface{}) systemd.Property {
return systemd.Property{
Name: name,
Value: dbus.MakeVariant(units),
}
}
func UseSystemd() bool {
s, err := os.Stat("/run/systemd/system")
if err != nil || !s.IsDir() {
@ -84,7 +81,7 @@ func getIfaceForUnit(unitName string) string {
return "Unit"
}
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
var (
unitName = getUnitName(c)
slice = "system.slice"
@ -99,27 +96,27 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
}
properties = append(properties,
systemd.Property{"Slice", dbus.MakeVariant(slice)},
systemd.Property{"Description", dbus.MakeVariant("docker container " + c.Name)},
systemd.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})},
systemd.PropSlice(slice),
systemd.PropDescription("docker container "+c.Name),
newProp("PIDs", []uint32{uint32(pid)}),
)
// Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties,
systemd.Property{"MemoryAccounting", dbus.MakeVariant(true)},
systemd.Property{"CPUAccounting", dbus.MakeVariant(true)},
systemd.Property{"BlockIOAccounting", dbus.MakeVariant(true)})
newProp("MemoryAccounting", true),
newProp("CPUAccounting", true),
newProp("BlockIOAccounting", true))
if c.Memory != 0 {
properties = append(properties,
systemd.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))})
newProp("MemoryLimit", uint64(c.Memory)))
}
// TODO: MemoryReservation and MemorySwap not available in systemd
if c.CpuShares != 0 {
properties = append(properties,
systemd.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))})
newProp("CPUShares", uint64(c.CpuShares)))
}
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
@ -140,57 +137,42 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
}
// we need to manually join the freezer cgroup in systemd because it does not currently support it
// via the dbus api
// we need to manually join the freezer and cpuset cgroup in systemd
// because it does not currently support it via the dbus api.
if err := joinFreezer(c, pid); err != nil {
return nil, err
}
if c.CpusetCpus != "" {
if err := joinCpuset(c, pid); err != nil {
return nil, err
}
if err := joinCpuset(c, pid); err != nil {
return nil, err
}
return res, nil
}
func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func (c *systemdCgroup) Paths() (map[string]string, error) {
paths := make(map[string]string)
for sysname := range subsystems {
subsystemPath, err := getSubsystemPath(c.cgroup, sysname)
for _, sysname := range []string{
"devices",
"memory",
"cpu",
"cpuset",
"cpuacct",
"blkio",
"perf_event",
"freezer",
} {
subsystemPath, err := getSubsystemPath(res.cgroup, sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return nil, err
}
paths[sysname] = subsystemPath
}
return paths, nil
}
func (c *systemdCgroup) Cleanup() error {
// systemd cleans up, we don't need to do much
paths, err := c.Paths()
if err != nil {
return err
}
for _, path := range paths {
os.RemoveAll(path)
}
return nil
func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func joinFreezer(c *cgroups.Cgroup, pid int) error {
@ -260,35 +242,6 @@ func getUnitName(c *cgroups.Cgroup) string {
return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name)
}
/*
* This would be nicer to get from the systemd API when accounting
* is enabled, but sadly there is no way to do that yet.
* The lack of this functionality in the API & the approach taken
* is guided by
* http://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/#readingaccountinginformation.
*/
func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
stats := cgroups.NewStats()
for sysname, sys := range subsystems {
subsystemPath, err := getSubsystemPath(c, sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return nil, err
}
if err := sys.GetStats(subsystemPath, stats); err != nil {
return nil, err
}
}
return stats, nil
}
// Atm we can't use the systemd device support because of two missing things:
// * Support for wildcards to allow mknod on any device
// * Support for wildcards to allow /dev/pts support
@ -360,5 +313,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error {
s := &fs.CpusetGroup{}
return s.SetDir(path, c.CpusetCpus, pid)
return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid)
}

View File

@ -189,6 +189,17 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
}
}
}
return nil
}
// RemovePaths iterates over the provided paths removing them.
// If an error is encountered the removal proceeds and the first error is
// returned to ensure a partial removal is not possible.
func RemovePaths(paths map[string]string) (err error) {
for _, path := range paths {
if rerr := os.RemoveAll(path); err == nil {
err = rerr
}
}
return err
}

View File

@ -10,6 +10,13 @@ type MountConfig mount.MountConfig
type Network network.Network
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Name string `json:"name"`
Path string `json:"path,omitempty"`
}
// Config defines configuration options for executing a process inside a contained environment.
type Config struct {
// Mount specific options.
@ -38,7 +45,7 @@ type Config struct {
// Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process
Namespaces map[string]bool `json:"namespaces,omitempty"`
Namespaces []Namespace `json:"namespaces,omitempty"`
// Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask
@ -65,6 +72,10 @@ type Config struct {
// RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and
// /proc/bus
RestrictSys bool `json:"restrict_sys,omitempty"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits,omitempty"`
}
// Routes can be specified to create entries in the route table as the container is started
@ -87,3 +98,9 @@ type Route struct {
// The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name,omitempty"`
}
type Rlimit struct {
Type int `json:"type,omitempty"`
Hard uint64 `json:"hard,omitempty"`
Soft uint64 `json:"soft,omitempty"`
}

View File

@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) {
t.Fail()
}
if !container.Namespaces["NEWNET"] {
if getNamespaceIndex(container, "NEWNET") == -1 {
t.Log("namespaces should contain NEWNET")
t.Fail()
}
if container.Namespaces["NEWUSER"] {
if getNamespaceIndex(container, "NEWUSER") != -1 {
t.Log("namespaces should not contain NEWUSER")
t.Fail()
}
@ -158,3 +158,12 @@ func TestSelinuxLabels(t *testing.T) {
t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel)
}
}
func getNamespaceIndex(config *Config, name string) int {
for i, v := range config.Namespaces {
if v.Name == name {
return i
}
}
return -1
}

View File

@ -103,7 +103,7 @@ func getDeviceNodes(path string) ([]*Device, error) {
switch {
case f.IsDir():
switch f.Name() {
case "pts", "shm", "fd":
case "pts", "shm", "fd", "mqueue":
continue
default:
sub, err := getDeviceNodes(filepath.Join(path, f.Name()))

View File

@ -1,8 +1,11 @@
package integration
import (
"os"
"strings"
"testing"
"github.com/docker/libcontainer"
)
func TestExecPS(t *testing.T) {
@ -36,3 +39,152 @@ func TestExecPS(t *testing.T) {
t.Fatalf("expected output %q but received %q", expected, actual)
}
}
func TestIPCPrivate(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
l, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
config := newTemplateConfig(rootfs)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
}
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
t.Fatalf("ipc link should be private to the conatiner but equals host %q %q", actual, l)
}
}
func TestIPCHost(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
l, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces = append(config.Namespaces[:i], config.Namespaces[i+1:]...)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
}
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
t.Fatalf("ipc link not equal to host link %q %q", actual, l)
}
}
func TestIPCJoinPath(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
l, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces[i].Path = "/proc/1/ns/ipc"
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
}
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
t.Fatalf("ipc link not equal to host link %q %q", actual, l)
}
}
func TestIPCBadPath(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces[i].Path = "/proc/1/ns/ipcc"
_, _, err = runContainer(config, "", "true")
if err == nil {
t.Fatal("container succeded with bad ipc path")
}
}
func TestRlimit(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n")
if err != nil {
t.Fatal(err)
}
if limit := strings.TrimSpace(out.Stdout.String()); limit != "1024" {
t.Fatalf("expected rlimit to be 1024, got %s", limit)
}
}
func getNamespaceIndex(config *libcontainer.Config, name string) int {
for i, v := range config.Namespaces {
if v.Name == name {
return i
}
}
return -1
}

View File

@ -6,7 +6,6 @@ import (
"runtime"
"github.com/docker/libcontainer/namespaces"
"github.com/docker/libcontainer/syncpipe"
)
// init runs the libcontainer initialization code because of the busybox style needs
@ -27,12 +26,7 @@ func init() {
log.Fatal(err)
}
syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3)
if err != nil {
log.Fatalf("unable to create sync pipe: %s", err)
}
if err := namespaces.Init(container, rootfs, "", syncPipe, os.Args[3:]); err != nil {
if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
log.Fatalf("unable to initialize for container: %s", err)
}
os.Exit(1)

View File

@ -1,6 +1,8 @@
package integration
import (
"syscall"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/devices"
@ -30,12 +32,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
"KILL",
"AUDIT_WRITE",
},
Namespaces: map[string]bool{
"NEWNS": true,
"NEWUTS": true,
"NEWIPC": true,
"NEWPID": true,
"NEWNET": true,
Namespaces: []libcontainer.Namespace{
{Name: "NEWNS"},
{Name: "NEWUTS"},
{Name: "NEWIPC"},
{Name: "NEWPID"},
{Name: "NEWNET"},
},
Cgroups: &cgroups.Cgroup{
Parent: "integration",
@ -60,5 +62,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
Gateway: "localhost",
},
},
Rlimits: []libcontainer.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: uint64(1024),
Soft: uint64(1024),
},
},
}
}

View File

@ -25,6 +25,10 @@ func SetFileLabel(path string, fileLabel string) error {
return nil
}
func SetFileCreateLabel(fileLabel string) error {
return nil
}
func Relabel(path string, fileLabel string, relabel string) error {
return nil
}
@ -43,3 +47,15 @@ func ReserveLabel(label string) error {
func UnreserveLabel(label string) error {
return nil
}
// DupSecOpt takes an process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return nil
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return nil
}

View File

@ -17,7 +17,6 @@ func InitLabels(options []string) (string, string, error) {
if !selinux.SelinuxEnabled() {
return "", "", nil
}
var err error
processLabel, mountLabel := selinux.GetLxcContexts()
if processLabel != "" {
pcon := selinux.NewContext(processLabel)
@ -38,7 +37,7 @@ func InitLabels(options []string) (string, string, error) {
processLabel = pcon.Get()
mountLabel = mcon.Get()
}
return processLabel, mountLabel, err
return processLabel, mountLabel, nil
}
// DEPRECATED: The GenLabels function is only to be used during the transition to the official API.
@ -88,6 +87,14 @@ func SetFileLabel(path string, fileLabel string) error {
return nil
}
// Tell the kernel the label for all files to be created
func SetFileCreateLabel(fileLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setfscreatecon(fileLabel)
}
return nil
}
// Change the label of path to the filelabel string. If the relabel string
// is "z", relabel will change the MCS label to s0. This will allow all
// containers to share the content. If the relabel string is a "Z" then
@ -130,3 +137,15 @@ func UnreserveLabel(label string) error {
selinux.FreeLxcContexts(label)
return nil
}
// DupSecOpt takes an process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return selinux.DupSecOpt(src)
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return selinux.DisableSecOpt()
}

View File

@ -3,6 +3,7 @@
package label
import (
"strings"
"testing"
"github.com/docker/libcontainer/selinux"
@ -33,7 +34,7 @@ func TestInit(t *testing.T) {
t.Fatal(err)
}
if plabel != "user_u:user_r:user_t:s0:c1,c15" || mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15" {
t.Log("InitLabels User Failed")
t.Log("InitLabels User Match Failed")
t.Log(plabel, mlabel)
t.Fatal(err)
}
@ -46,3 +47,43 @@ func TestInit(t *testing.T) {
}
}
}
func TestDuplicateLabel(t *testing.T) {
secopt := DupSecOpt("system_u:system_r:svirt_lxc_net_t:s0:c1,c2")
t.Log(secopt)
for _, opt := range secopt {
con := strings.SplitN(opt, ":", 3)
if len(con) != 3 || con[0] != "label" {
t.Errorf("Invalid DupSecOpt return value")
continue
}
if con[1] == "user" {
if con[2] != "system_u" {
t.Errorf("DupSecOpt Failed user incorrect")
}
continue
}
if con[1] == "role" {
if con[2] != "system_r" {
t.Errorf("DupSecOpt Failed role incorrect")
}
continue
}
if con[1] == "type" {
if con[2] != "svirt_lxc_net_t" {
t.Errorf("DupSecOpt Failed type incorrect")
}
continue
}
if con[1] == "level" {
if con[2] != "s0:c1,c2" {
t.Errorf("DupSecOpt Failed level incorrect")
}
continue
}
t.Errorf("DupSecOpt Failed invalid field %q", con[1])
}
secopt = DisableSecOpt()
if secopt[0] != "label:disable" {
t.Errorf("DisableSecOpt Failed level incorrect")
}
}

View File

@ -97,7 +97,7 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon
return nil
}
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts
// mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts
// inside the mount namespace
func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error {
for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) {
@ -168,6 +168,7 @@ func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount {
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
}

View File

@ -3,6 +3,7 @@
package namespaces
import (
"encoding/json"
"io"
"os"
"os/exec"
@ -13,7 +14,6 @@ import (
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/syncpipe"
"github.com/docker/libcontainer/system"
)
@ -22,19 +22,17 @@ import (
// Exec performs setup outside of a namespace so that a container can be
// executed. Exec is a high level function for working with container namespaces.
func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
var (
err error
)
var err error
// create a pipe so that we can syncronize with the namespaced process and
// pass the veth name to the child
syncPipe, err := syncpipe.NewSyncPipe()
// pass the state and configuration to the child process
parent, child, err := newInitPipe()
if err != nil {
return -1, err
}
defer syncPipe.Close()
defer parent.Close()
command := createCommand(container, console, dataPath, os.Args[0], syncPipe.Child(), args)
command := createCommand(container, console, dataPath, os.Args[0], child, args)
// Note: these are only used in non-tty mode
// if there is a tty for the container it will be opened within the namespace and the
// fds will be duped to stdin, stdiout, and stderr
@ -43,39 +41,42 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
command.Stderr = stderr
if err := command.Start(); err != nil {
child.Close()
return -1, err
}
child.Close()
// Now we passed the pipe to the child, close our side
syncPipe.CloseChild()
terminate := func(terr error) (int, error) {
// TODO: log the errors for kill and wait
command.Process.Kill()
command.Wait()
return -1, terr
}
started, err := system.GetProcessStartTime(command.Process.Pid)
if err != nil {
return -1, err
return terminate(err)
}
// Do this before syncing with child so that no children
// can escape the cgroup
cgroupRef, err := SetupCgroups(container, command.Process.Pid)
cgroupPaths, err := SetupCgroups(container, command.Process.Pid)
if err != nil {
command.Process.Kill()
command.Wait()
return -1, err
}
defer cgroupRef.Cleanup()
cgroupPaths, err := cgroupRef.Paths()
if err != nil {
command.Process.Kill()
command.Wait()
return -1, err
return terminate(err)
}
defer cgroups.RemovePaths(cgroupPaths)
var networkState network.NetworkState
if err := InitializeNetworking(container, command.Process.Pid, syncPipe, &networkState); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
return terminate(err)
}
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(parent).Encode(networkState); err != nil {
return terminate(err)
}
// shutdown writes for the parent side of the pipe
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
return terminate(err)
}
state := &libcontainer.State{
@ -86,17 +87,18 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
}
if err := libcontainer.SaveState(dataPath, state); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
return terminate(err)
}
defer libcontainer.DeleteState(dataPath)
// Sync with child
if err := syncPipe.ReadFromChild(); err != nil {
command.Process.Kill()
command.Wait()
return -1, err
// wait for the child process to fully complete and receive an error message
// if one was encoutered
var ierr *initError
if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
return terminate(err)
}
if ierr != nil {
return terminate(ierr)
}
if startCallback != nil {
@ -108,7 +110,6 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
return -1, err
}
}
return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
}
@ -129,16 +130,6 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini
"data_path=" + dataPath,
}
/*
TODO: move user and wd into env
if user != "" {
env = append(env, "user="+user)
}
if workingDir != "" {
env = append(env, "wd="+workingDir)
}
*/
command := exec.Command(init, append([]string{"init", "--"}, args...)...)
// make sure the process is executed inside the context of the rootfs
command.Dir = container.RootFs
@ -157,23 +148,20 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini
// SetupCgroups applies the cgroup restrictions to the process running in the container based
// on the container's configuration
func SetupCgroups(container *libcontainer.Config, nspid int) (cgroups.ActiveCgroup, error) {
func SetupCgroups(container *libcontainer.Config, nspid int) (map[string]string, error) {
if container.Cgroups != nil {
c := container.Cgroups
if systemd.UseSystemd() {
return systemd.Apply(c, nspid)
}
return fs.Apply(c, nspid)
}
return nil, nil
return map[string]string{}, nil
}
// InitializeNetworking creates the container's network stack outside of the namespace and moves
// interfaces into the container's net namespaces if necessary
func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncpipe.SyncPipe, networkState *network.NetworkState) error {
func InitializeNetworking(container *libcontainer.Config, nspid int, networkState *network.NetworkState) error {
for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type)
if err != nil {
@ -183,18 +171,5 @@ func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncp
return err
}
}
return pipe.SendToChild(networkState)
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces map[string]bool) (flag int) {
for key, enabled := range namespaces {
if enabled {
if ns := GetNamespace(key); ns != nil {
flag |= ns.Value
}
}
}
return flag
return nil
}

View File

@ -3,6 +3,7 @@
package namespaces
import (
"encoding/json"
"fmt"
"io"
"os"
@ -15,7 +16,6 @@ import (
"github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/syncpipe"
"github.com/docker/libcontainer/system"
)
@ -41,11 +41,11 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
}
}
pipe, err := syncpipe.NewSyncPipe()
parent, child, err := newInitPipe()
if err != nil {
return -1, err
}
defer pipe.Close()
defer parent.Close()
// Note: these are only used in non-tty mode
// if there is a tty for the container it will be opened within the namespace and the
@ -53,23 +53,28 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
cmd.Stdin = stdin
cmd.Stdout = stdout
cmd.Stderr = stderr
cmd.ExtraFiles = []*os.File{pipe.Child()}
cmd.ExtraFiles = []*os.File{child}
if err := cmd.Start(); err != nil {
child.Close()
return -1, err
}
pipe.CloseChild()
child.Close()
terminate := func(terr error) (int, error) {
// TODO: log the errors for kill and wait
cmd.Process.Kill()
cmd.Wait()
return -1, terr
}
// Enter cgroups.
if err := EnterCgroups(state, cmd.Process.Pid); err != nil {
return -1, err
return terminate(err)
}
if err := pipe.SendToChild(container); err != nil {
cmd.Process.Kill()
cmd.Wait()
return -1, err
if err := json.NewEncoder(parent).Encode(container); err != nil {
return terminate(err)
}
if startCallback != nil {
@ -81,7 +86,6 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
return -1, err
}
}
return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
}
@ -107,7 +111,7 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error {
}
}
if err := system.Execv(args[0], args[0:], container.Env); err != nil {
if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
return err
}

View File

@ -3,7 +3,9 @@
package namespaces
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"strings"
"syscall"
@ -17,7 +19,6 @@ import (
"github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/security/capabilities"
"github.com/docker/libcontainer/security/restrict"
"github.com/docker/libcontainer/syncpipe"
"github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils"
@ -29,11 +30,22 @@ import (
// and other options required for the new container.
// The caller of Init function has to ensure that the go runtime is locked to an OS thread
// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended.
func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) {
func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) {
defer func() {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
if err != nil {
syncPipe.ReportChildError(err)
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(initError{
Message: err.Error(),
}); err != nil {
panic(err)
}
}
// ensure that this pipe is always closed
pipe.Close()
}()
rootfs, err := utils.ResolveRootfs(uncleanRootfs)
@ -49,10 +61,13 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
// We always read this as it is a way to sync with the parent as well
var networkState *network.NetworkState
if err := syncPipe.ReadFromParent(&networkState); err != nil {
if err := json.NewDecoder(pipe).Decode(&networkState); err != nil {
return err
}
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(container.Namespaces); err != nil {
return err
}
if consolePath != "" {
if err := console.OpenAndDup(consolePath); err != nil {
return err
@ -66,6 +81,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
return fmt.Errorf("setctty %s", err)
}
}
if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err)
}
@ -73,6 +89,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
return fmt.Errorf("setup route %s", err)
}
if err := setupRlimits(container); err != nil {
return fmt.Errorf("setup rlimits %s", err)
}
label.Init()
if err := mount.InitializeMountNamespace(rootfs,
@ -84,7 +104,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
if container.Hostname != "" {
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
return fmt.Errorf("sethostname %s", err)
return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
}
}
@ -151,26 +171,43 @@ func RestoreParentDeathSignal(old int) error {
// SetupUser changes the groups, gid, and uid for the user inside the container
func SetupUser(u string) error {
uid, gid, suppGids, home, err := user.GetUserGroupSupplementaryHome(u, syscall.Getuid(), syscall.Getgid(), "/")
// Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdFile, err := user.GetPasswdFile()
if err != nil {
return err
}
groupFile, err := user.GetGroupFile()
if err != nil {
return err
}
execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile)
if err != nil {
return fmt.Errorf("get supplementary groups %s", err)
}
if err := syscall.Setgroups(suppGids); err != nil {
if err := syscall.Setgroups(execUser.Sgids); err != nil {
return fmt.Errorf("setgroups %s", err)
}
if err := syscall.Setgid(gid); err != nil {
if err := system.Setgid(execUser.Gid); err != nil {
return fmt.Errorf("setgid %s", err)
}
if err := syscall.Setuid(uid); err != nil {
if err := system.Setuid(execUser.Uid); err != nil {
return fmt.Errorf("setuid %s", err)
}
// if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", home); err != nil {
if err := os.Setenv("HOME", execUser.Home); err != nil {
return fmt.Errorf("set HOME %s", err)
}
}
@ -205,6 +242,16 @@ func setupRoute(container *libcontainer.Config) error {
return nil
}
func setupRlimits(container *libcontainer.Config) error {
for _, rlimit := range container.Rlimits {
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
// FinalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaky file descriptors
// before execing the command inside the namespace
@ -261,3 +308,22 @@ func LoadContainerEnvironment(container *libcontainer.Config) error {
}
return nil
}
// joinExistingNamespaces gets all the namespace paths specified for the container and
// does a setns on the namespace fd so that the current process joins the namespace.
func joinExistingNamespaces(namespaces []libcontainer.Namespace) error {
for _, ns := range namespaces {
if ns.Path != "" {
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Name]))
f.Close()
if err != nil {
return err
}
}
}
return nil
}

View File

@ -10,6 +10,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/prctl.h>
#include <sys/types.h>
#include <unistd.h>
#include <getopt.h>
@ -88,6 +89,11 @@ void nsenter()
return;
}
if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
fprintf(stderr, "nsenter: failed to set child subreaper: %s", strerror(errno));
exit(1);
}
static const struct option longopts[] = {
{"nspid", required_argument, NULL, 'n'},
{"console", required_argument, NULL, 't'},

View File

@ -1,50 +0,0 @@
package namespaces
import "errors"
type (
Namespace struct {
Key string `json:"key,omitempty"`
Value int `json:"value,omitempty"`
File string `json:"file,omitempty"`
}
Namespaces []*Namespace
)
// namespaceList is used to convert the libcontainer types
// into the names of the files located in /proc/<pid>/ns/* for
// each namespace
var (
namespaceList = Namespaces{}
ErrUnkownNamespace = errors.New("Unknown namespace")
ErrUnsupported = errors.New("Unsupported method")
)
func (ns *Namespace) String() string {
return ns.Key
}
func GetNamespace(key string) *Namespace {
for _, ns := range namespaceList {
if ns.Key == key {
cpy := *ns
return &cpy
}
}
return nil
}
// Contains returns true if the specified Namespace is
// in the slice
func (n Namespaces) Contains(ns string) bool {
return n.Get(ns) != nil
}
func (n Namespaces) Get(ns string) *Namespace {
for _, nsp := range n {
if nsp != nil && nsp.Key == ns {
return nsp
}
}
return nil
}

View File

@ -1,16 +0,0 @@
package namespaces
import (
"syscall"
)
func init() {
namespaceList = Namespaces{
{Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"},
{Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"},
{Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"},
{Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"},
{Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"},
{Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"},
}
}

View File

@ -1,30 +0,0 @@
package namespaces
import (
"testing"
)
func TestNamespacesContains(t *testing.T) {
ns := Namespaces{
GetNamespace("NEWPID"),
GetNamespace("NEWNS"),
GetNamespace("NEWUTS"),
}
if ns.Contains("NEWNET") {
t.Fatal("namespaces should not contain NEWNET")
}
if !ns.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
withNil := Namespaces{
GetNamespace("UNDEFINED"), // this element will be nil
GetNamespace("NEWPID"),
}
if !withNil.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
}

View File

@ -0,0 +1,45 @@
// +build linux
package namespaces
import (
"os"
"syscall"
"github.com/docker/libcontainer"
)
type initError struct {
Message string `json:"message,omitempty"`
}
func (i initError) Error() string {
return i.Message
}
var namespaceInfo = map[string]int{
"NEWNET": syscall.CLONE_NEWNET,
"NEWNS": syscall.CLONE_NEWNS,
"NEWUSER": syscall.CLONE_NEWUSER,
"NEWIPC": syscall.CLONE_NEWIPC,
"NEWUTS": syscall.CLONE_NEWUTS,
"NEWPID": syscall.CLONE_NEWPID,
}
// New returns a newly initialized Pipe for communication between processes
func newInitPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces []libcontainer.Namespace) (flag int) {
for _, v := range namespaces {
flag |= namespaceInfo[v.Name]
}
return flag
}

View File

@ -7,6 +7,7 @@ import (
"math/rand"
"net"
"os"
"path/filepath"
"sync/atomic"
"syscall"
"unsafe"
@ -575,6 +576,31 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error {
return s.HandleAck(wb.Seq)
}
// Set link queue length
// This is identical to running: ip link set dev $name txqueuelen $QLEN
func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen)))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error {
s, err := getNetlinkSocket()
if err != nil {
@ -768,26 +794,38 @@ func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error {
return s.HandleAck(wb.Seq)
}
// Add MAC VLAN network interface with masterDev as its upper device
// This is identical to running:
// ip link add name $name link $masterdev type macvlan mode $mode
func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
// MacVlan link has LowerDev, UpperDev and operates in Mode mode
// This simplifies the code when creating MacVlan or MacVtap interface
type MacVlanLink struct {
MasterDev string
SlaveDev string
mode string
}
macVlan := map[string]uint32{
func (m MacVlanLink) Mode() uint32 {
modeMap := map[string]uint32{
"private": MACVLAN_MODE_PRIVATE,
"vepa": MACVLAN_MODE_VEPA,
"bridge": MACVLAN_MODE_BRIDGE,
"passthru": MACVLAN_MODE_PASSTHRU,
}
return modeMap[m.mode]
}
// Add MAC VLAN network interface with masterDev as its upper device
// This is identical to running:
// ip link add name $name link $masterdev type macvlan mode $mode
func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
masterDevIfc, err := net.InterfaceByName(masterDev)
masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev)
if err != nil {
return err
}
@ -796,16 +834,16 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
wb.AddData(msg)
nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("macvlan"))
newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type))
nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
macVlanData := make([]byte, 4)
native.PutUint32(macVlanData, macVlan[mode])
native.PutUint32(macVlanData, mcvln.Mode())
newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData)
wb.AddData(nest1)
wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(macVlanDev)))
wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev)))
if err := s.Send(wb); err != nil {
return err
@ -813,6 +851,22 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
return s.HandleAck(wb.Seq)
}
func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
return networkLinkMacVlan("macvlan", &MacVlanLink{
MasterDev: masterDev,
SlaveDev: macVlanDev,
mode: mode,
})
}
func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error {
return networkLinkMacVlan("macvtap", &MacVlanLink{
MasterDev: masterDev,
SlaveDev: macVlanDev,
mode: mode,
})
}
func networkLinkIpAction(action, flags int, ifa IfAddr) error {
s, err := getNetlinkSocket()
if err != nil {
@ -1002,28 +1056,23 @@ func AddRoute(destination, source, gateway, device string) error {
}
if source != "" {
srcIP, srcNet, err := net.ParseCIDR(source)
if err != nil {
return fmt.Errorf("source CIDR %s couldn't be parsed", source)
srcIP := net.ParseIP(source)
if srcIP == nil {
return fmt.Errorf("source IP %s couldn't be parsed", source)
}
srcFamily := getIpFamily(srcIP)
if currentFamily != -1 && currentFamily != srcFamily {
return fmt.Errorf("source and destination ip were not the same IP family")
}
currentFamily = srcFamily
srcLen, bits := srcNet.Mask.Size()
if srcLen == 0 && bits == 0 {
return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source)
}
msg.Family = uint8(srcFamily)
msg.Src_len = uint8(srcLen)
var srcData []byte
if srcFamily == syscall.AF_INET {
srcData = srcIP.To4()
} else {
srcData = srcIP.To16()
}
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData))
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData))
}
if gateway != "" {
@ -1204,6 +1253,28 @@ func SetMacAddress(name, addr string) error {
return nil
}
func SetHairpinMode(iface *net.Interface, enabled bool) error {
sysPath := filepath.Join("/sys/class/net", iface.Name, "brport/hairpin_mode")
sysFile, err := os.OpenFile(sysPath, os.O_WRONLY, 0)
if err != nil {
return err
}
defer sysFile.Close()
var writeVal []byte
if enabled {
writeVal = []byte("1")
} else {
writeVal = []byte("0")
}
if _, err := sysFile.Write(writeVal); err != nil {
return err
}
return nil
}
func ChangeName(iface *net.Interface, newName string) error {
if len(newName) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", newName)
@ -1224,5 +1295,6 @@ func ChangeName(iface *net.Interface, newName string) error {
if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
return errno
}
return nil
}

View File

@ -116,7 +116,7 @@ func TestNetworkSetMacAddress(t *testing.T) {
ifcBeforeSet := readLink(t, tl.name)
if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil {
t.Fatalf("Could not set %s MAC address on %#v interface: err", macaddr, tl, err)
t.Fatalf("Could not set %s MAC address on %#v interface: %s", macaddr, tl, err)
}
ifcAfterSet := readLink(t, tl.name)
@ -140,7 +140,7 @@ func TestNetworkSetMTU(t *testing.T) {
ifcBeforeSet := readLink(t, tl.name)
if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil {
t.Fatalf("Could not set %d MTU on %#v interface: err", mtu, tl, err)
t.Fatalf("Could not set %d MTU on %#v interface: %s", mtu, tl, err)
}
ifcAfterSet := readLink(t, tl.name)
@ -248,6 +248,30 @@ func TestNetworkLinkAddMacVlan(t *testing.T) {
readLink(t, tl.name)
}
func TestNetworkLinkAddMacVtap(t *testing.T) {
if testing.Short() {
return
}
tl := struct {
name string
mode string
}{
name: "tstVtap",
mode: "private",
}
masterLink := testLink{"tstEth", "dummy"}
addLink(t, masterLink.name, masterLink.linkType)
defer deleteLink(t, masterLink.name)
if err := NetworkLinkAddMacVtap(masterLink.name, tl.name, tl.mode); err != nil {
t.Fatalf("Unable to create %#v MAC VTAP interface: %s", tl, err)
}
readLink(t, tl.name)
}
func TestAddDelNetworkIp(t *testing.T) {
if testing.Short() {
return
@ -280,6 +304,34 @@ func TestAddDelNetworkIp(t *testing.T) {
}
}
func TestAddRouteSourceSelection(t *testing.T) {
tstIp := "127.1.1.1"
tl := testLink{name: "tstEth", linkType: "dummy"}
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
ip := net.ParseIP(tstIp)
mask := net.IPv4Mask(255, 255, 255, 255)
ipNet := &net.IPNet{IP: ip, Mask: mask}
iface, err := net.InterfaceByName(tl.name)
if err != nil {
t.Fatalf("Lost created link %#v", tl)
}
if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil {
t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err)
}
upLink(t, tl.name)
defer downLink(t, tl.name)
if err := AddRoute("127.0.0.0/8", tstIp, "", tl.name); err != nil {
t.Fatalf("Failed to add route with source address")
}
}
func TestCreateVethPair(t *testing.T) {
if testing.Short() {
return

View File

@ -47,6 +47,10 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error {
return ErrNotImplemented
}
func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
return ErrNotImplemented
}
func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
return ErrNotImplemented
}

View File

@ -1,39 +0,0 @@
// +build linux
package network
import (
"fmt"
"os"
"syscall"
"github.com/docker/libcontainer/system"
)
// crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace
type NetNS struct {
}
func (v *NetNS) Create(n *Network, nspid int, networkState *NetworkState) error {
networkState.NsPath = n.NsPath
return nil
}
func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error {
if networkState.NsPath == "" {
return fmt.Errorf("nspath does is not specified in NetworkState")
}
f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0)
if err != nil {
return fmt.Errorf("failed get network namespace fd: %v", err)
}
if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil {
f.Close()
return fmt.Errorf("failed to setns current network namespace: %v", err)
}
f.Close()
return nil
}

View File

@ -95,3 +95,11 @@ func SetMtu(name string, mtu int) error {
}
return netlink.NetworkSetMTU(iface, mtu)
}
func SetHairpinMode(name string, enabled bool) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
return netlink.SetHairpinMode(iface, enabled)
}

View File

@ -13,7 +13,6 @@ var (
var strategies = map[string]NetworkStrategy{
"veth": &Veth{},
"loopback": &Loopback{},
"netns": &NetNS{},
}
// NetworkStrategy represents a specific network configuration for

View File

@ -8,9 +8,6 @@ type Network struct {
// Type sets the networks type, commonly veth and loopback
Type string `json:"type,omitempty"`
// Path to network namespace
NsPath string `json:"ns_path,omitempty"`
// The bridge to use.
Bridge string `json:"bridge,omitempty"`
@ -50,6 +47,4 @@ type NetworkState struct {
VethHost string `json:"veth_host,omitempty"`
// The name of the veth interface created inside the container for the child.
VethChild string `json:"veth_child,omitempty"`
// Net namespace path.
NsPath string `json:"ns_path,omitempty"`
}

View File

@ -8,7 +8,6 @@ import (
"github.com/codegangsta/cli"
"github.com/docker/libcontainer/namespaces"
"github.com/docker/libcontainer/syncpipe"
)
var (
@ -41,12 +40,8 @@ func initAction(context *cli.Context) {
log.Fatal(err)
}
syncPipe, err := syncpipe.NewSyncPipeFromFd(0, uintptr(pipeFd))
if err != nil {
log.Fatalf("unable to create sync pipe: %s", err)
}
if err := namespaces.Init(container, rootfs, console, syncPipe, []string(context.Args())); err != nil {
pipe := os.NewFile(uintptr(pipeFd), "pipe")
if err := namespaces.Init(container, rootfs, console, pipe, []string(context.Args())); err != nil {
log.Fatalf("unable to initialize for container: %s", err)
}
}

View File

@ -8,7 +8,6 @@ import (
"github.com/codegangsta/cli"
"github.com/docker/libcontainer"
"github.com/docker/libcontainer/syncpipe"
)
// rFunc is a function registration for calling after an execin
@ -59,16 +58,13 @@ func findUserArgs() []string {
// loadConfigFromFd loads a container's config from the sync pipe that is provided by
// fd 3 when running a process
func loadConfigFromFd() (*libcontainer.Config, error) {
syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3)
if err != nil {
return nil, err
}
pipe := os.NewFile(3, "pipe")
defer pipe.Close()
var config *libcontainer.Config
if err := syncPipe.ReadFromParent(&config); err != nil {
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err
}
return config, nil
}

View File

@ -176,13 +176,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"name":"NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View File

@ -175,13 +175,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View File

@ -181,13 +181,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View File

@ -0,0 +1,209 @@
{
"capabilities": [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL"
],
"cgroups": {
"allowed_devices": [
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 98
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 1,
"path": "/dev/console",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"path": "/dev/tty0",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"minor_number": 1,
"path": "/dev/tty1",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 136,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 2,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 10,
"minor_number": 200,
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"name": "docker-koye",
"parent": "docker"
},
"restrict_sys": true,
"mount_config": {
"device_nodes": [
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
]
},
"environment": [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=koye",
"TERM=xterm"
],
"hostname": "koye",
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",
"gateway": "localhost",
"mtu": 1500,
"type": "loopback"
},
{
"address": "172.17.0.101/16",
"bridge": "docker0",
"veth_prefix": "veth",
"mtu": 1500,
"type": "veth"
}
],
"routes": [
{
"destination": "0.0.0.0/0",
"source": "172.17.0.101",
"gateway": "172.17.42.1",
"interface_name": "eth0"
}
],
"tty": true
}

View File

@ -177,13 +177,13 @@
"TERM=xterm"
],
"hostname": "koye",
"namespaces": {
"NEWIPC": true,
"NEWNET": true,
"NEWNS": true,
"NEWPID": true,
"NEWUTS": true
},
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",

View File

@ -434,3 +434,28 @@ func Chcon(fpath string, scon string, recurse bool) error {
return Setfilecon(fpath, scon)
}
// DupSecOpt takes an SELinux process label and returns security options that
// can will set the SELinux Type and Level for future container processes
func DupSecOpt(src string) []string {
if src == "" {
return nil
}
con := NewContext(src)
if con["user"] == "" ||
con["role"] == "" ||
con["type"] == "" ||
con["level"] == "" {
return nil
}
return []string{"label:user:" + con["user"],
"label:role:" + con["role"],
"label:type:" + con["type"],
"label:level:" + con["level"]}
}
// DisableSecOpt returns a security opt that can be used to disabling SELinux
// labeling support for future container processes
func DisableSecOpt() []string {
return []string{"label:disable"}
}

View File

@ -42,7 +42,7 @@ func TestSELinux(t *testing.T) {
t.Log("getenforce ", selinux.SelinuxGetEnforce())
t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode())
pid := os.Getpid()
t.Log("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023))
t.Logf("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023))
err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0")
if err == nil {
t.Log(selinux.Getfscreatecon())

View File

@ -1,105 +0,0 @@
package syncpipe
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"syscall"
)
// SyncPipe allows communication to and from the child processes
// to it's parent and allows the two independent processes to
// syncronize their state.
type SyncPipe struct {
parent, child *os.File
}
func NewSyncPipeFromFd(parentFd, childFd uintptr) (*SyncPipe, error) {
s := &SyncPipe{}
if parentFd > 0 {
s.parent = os.NewFile(parentFd, "parentPipe")
} else if childFd > 0 {
s.child = os.NewFile(childFd, "childPipe")
} else {
return nil, fmt.Errorf("no valid sync pipe fd specified")
}
return s, nil
}
func (s *SyncPipe) Child() *os.File {
return s.child
}
func (s *SyncPipe) Parent() *os.File {
return s.parent
}
func (s *SyncPipe) SendToChild(v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
return err
}
s.parent.Write(data)
return syscall.Shutdown(int(s.parent.Fd()), syscall.SHUT_WR)
}
func (s *SyncPipe) ReadFromChild() error {
data, err := ioutil.ReadAll(s.parent)
if err != nil {
return err
}
if len(data) > 0 {
return fmt.Errorf("%s", data)
}
return nil
}
func (s *SyncPipe) ReadFromParent(v interface{}) error {
data, err := ioutil.ReadAll(s.child)
if err != nil {
return fmt.Errorf("error reading from sync pipe %s", err)
}
if len(data) > 0 {
if err := json.Unmarshal(data, v); err != nil {
return err
}
}
return nil
}
func (s *SyncPipe) ReportChildError(err error) {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(s.child)
s.child.Write([]byte(err.Error()))
s.CloseChild()
}
func (s *SyncPipe) Close() error {
if s.parent != nil {
s.parent.Close()
}
if s.child != nil {
s.child.Close()
}
return nil
}
func (s *SyncPipe) CloseChild() {
if s.child != nil {
s.child.Close()
s.child = nil
}
}

View File

@ -1,20 +0,0 @@
package syncpipe
import (
"os"
"syscall"
)
func NewSyncPipe() (s *SyncPipe, err error) {
s = &SyncPipe{}
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, err
}
s.child = os.NewFile(uintptr(fds[0]), "child syncpipe")
s.parent = os.NewFile(uintptr(fds[1]), "parent syncpipe")
return s, nil
}

View File

@ -1,72 +0,0 @@
package syncpipe
import (
"fmt"
"syscall"
"testing"
)
type testStruct struct {
Name string
}
func TestSendErrorFromChild(t *testing.T) {
pipe, err := NewSyncPipe()
if err != nil {
t.Fatal(err)
}
defer func() {
if err := pipe.Close(); err != nil {
t.Fatal(err)
}
}()
childfd, err := syscall.Dup(int(pipe.Child().Fd()))
if err != nil {
t.Fatal(err)
}
childPipe, _ := NewSyncPipeFromFd(0, uintptr(childfd))
pipe.CloseChild()
pipe.SendToChild(nil)
expected := "something bad happened"
childPipe.ReportChildError(fmt.Errorf(expected))
childError := pipe.ReadFromChild()
if childError == nil {
t.Fatal("expected an error to be returned but did not receive anything")
}
if childError.Error() != expected {
t.Fatalf("expected %q but received error message %q", expected, childError.Error())
}
}
func TestSendPayloadToChild(t *testing.T) {
pipe, err := NewSyncPipe()
if err != nil {
t.Fatal(err)
}
defer func() {
if err := pipe.Close(); err != nil {
t.Fatal(err)
}
}()
expected := "libcontainer"
if err := pipe.SendToChild(testStruct{Name: expected}); err != nil {
t.Fatal(err)
}
var s *testStruct
if err := pipe.ReadFromParent(&s); err != nil {
t.Fatal(err)
}
if s.Name != expected {
t.Fatalf("expected name %q but received %q", expected, s.Name)
}
}

View File

@ -11,9 +11,12 @@ import (
// We need different setns values for the different platforms and arch
// We are declaring the macro here because the SETNS syscall does not exist in th stdlib
var setNsMap = map[string]uintptr{
"linux/386": 346,
"linux/amd64": 308,
"linux/arm": 374,
"linux/386": 346,
"linux/amd64": 308,
"linux/arm": 374,
"linux/ppc64": 350,
"linux/ppc64le": 350,
"linux/s390x": 339,
}
func Setns(fd uintptr, flags uintptr) error {

View File

@ -1,4 +1,5 @@
// +build linux,amd64
// +build linux,amd64 linux,ppc64 linux,ppc64le linux,s390x
package system
import (

View File

@ -7,7 +7,7 @@ import (
// Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0)
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 {
err = e1
}

View File

@ -0,0 +1,108 @@
package user
import (
"errors"
"fmt"
"syscall"
)
var (
// The current operating system does not provide the required data for user lookups.
ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
)
func lookupUser(filter func(u User) bool) (User, error) {
// Get operating system-specific passwd reader-closer.
passwd, err := GetPasswd()
if err != nil {
return User{}, err
}
defer passwd.Close()
// Get the users.
users, err := ParsePasswdFilter(passwd, filter)
if err != nil {
return User{}, err
}
// No user entries found.
if len(users) == 0 {
return User{}, fmt.Errorf("no matching entries in passwd file")
}
// Assume the first entry is the "correct" one.
return users[0], nil
}
// CurrentUser looks up the current user by their user id in /etc/passwd. If the
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(syscall.Getuid())
}
// LookupUser looks up a user by their username in /etc/passwd. If the user
// cannot be found (or there is no /etc/passwd file on the filesystem), then
// LookupUser returns an error.
func LookupUser(username string) (User, error) {
return lookupUser(func(u User) bool {
return u.Name == username
})
}
// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
// be found (or there is no /etc/passwd file on the filesystem), then LookupId
// returns an error.
func LookupUid(uid int) (User, error) {
return lookupUser(func(u User) bool {
return u.Uid == uid
})
}
func lookupGroup(filter func(g Group) bool) (Group, error) {
// Get operating system-specific group reader-closer.
group, err := GetGroup()
if err != nil {
return Group{}, err
}
defer group.Close()
// Get the users.
groups, err := ParseGroupFilter(group, filter)
if err != nil {
return Group{}, err
}
// No user entries found.
if len(groups) == 0 {
return Group{}, fmt.Errorf("no matching entries in group file")
}
// Assume the first entry is the "correct" one.
return groups[0], nil
}
// CurrentGroup looks up the current user's group by their primary group id's
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(syscall.Getgid())
}
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
// returns an error.
func LookupGroup(groupname string) (Group, error) {
return lookupGroup(func(g Group) bool {
return g.Name == groupname
})
}
// LookupGid looks up a group by its group id in /etc/group. If the group cannot
// be found (or there is no /etc/group file on the filesystem), then LookupGid
// returns an error.
func LookupGid(gid int) (Group, error) {
return lookupGroup(func(g Group) bool {
return g.Gid == gid
})
}

View File

@ -0,0 +1,30 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package user
import (
"io"
"os"
)
// Unix-specific path to the passwd and group formatted files.
const (
unixPasswdFile = "/etc/passwd"
unixGroupFile = "/etc/group"
)
func GetPasswdFile() (string, error) {
return unixPasswdFile, nil
}
func GetPasswd() (io.ReadCloser, error) {
return os.Open(unixPasswdFile)
}
func GetGroupFile() (string, error) {
return unixGroupFile, nil
}
func GetGroup() (io.ReadCloser, error) {
return os.Open(unixGroupFile)
}

View File

@ -0,0 +1,21 @@
// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris
package user
import "io"
func GetPasswdFile() (string, error) {
return "", ErrUnsupported
}
func GetPasswd() (io.ReadCloser, error) {
return nil, ErrUnsupported
}
func GetGroupFile() (string, error) {
return "", ErrUnsupported
}
func GetGroup() (io.ReadCloser, error) {
return nil, ErrUnsupported
}

View File

@ -69,23 +69,36 @@ func parseLine(line string, v ...interface{}) {
}
}
func ParsePasswd() ([]*User, error) {
return ParsePasswdFilter(nil)
}
func ParsePasswdFilter(filter func(*User) bool) ([]*User, error) {
f, err := os.Open("/etc/passwd")
func ParsePasswdFile(path string) ([]User, error) {
passwd, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
return parsePasswdFile(f, filter)
defer passwd.Close()
return ParsePasswd(passwd)
}
func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
func ParsePasswd(passwd io.Reader) ([]User, error) {
return ParsePasswdFilter(passwd, nil)
}
func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) {
passwd, err := os.Open(path)
if err != nil {
return nil, err
}
defer passwd.Close()
return ParsePasswdFilter(passwd, filter)
}
func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
if r == nil {
return nil, fmt.Errorf("nil source for passwd-formatted data")
}
var (
s = bufio.NewScanner(r)
out = []*User{}
out = []User{}
)
for s.Scan() {
@ -103,7 +116,7 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
// Name:Pass:Uid:Gid:Gecos:Home:Shell
// root:x:0:0:root:/root:/bin/bash
// adm:x:3:4:adm:/var/adm:/bin/false
p := &User{}
p := User{}
parseLine(
text,
&p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell,
@ -117,23 +130,36 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
return out, nil
}
func ParseGroup() ([]*Group, error) {
return ParseGroupFilter(nil)
}
func ParseGroupFilter(filter func(*Group) bool) ([]*Group, error) {
f, err := os.Open("/etc/group")
func ParseGroupFile(path string) ([]Group, error) {
group, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
return parseGroupFile(f, filter)
defer group.Close()
return ParseGroup(group)
}
func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
func ParseGroup(group io.Reader) ([]Group, error) {
return ParseGroupFilter(group, nil)
}
func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) {
group, err := os.Open(path)
if err != nil {
return nil, err
}
defer group.Close()
return ParseGroupFilter(group, filter)
}
func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
if r == nil {
return nil, fmt.Errorf("nil source for group-formatted data")
}
var (
s = bufio.NewScanner(r)
out = []*Group{}
out = []Group{}
)
for s.Scan() {
@ -151,7 +177,7 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
// Name:Pass:Gid:List
// root:x:0:root
// adm:x:4:root,adm,daemon
p := &Group{}
p := Group{}
parseLine(
text,
&p.Name, &p.Pass, &p.Gid, &p.List,
@ -165,94 +191,160 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
return out, nil
}
// Given a string like "user", "1000", "user:group", "1000:1000", returns the uid, gid, list of supplementary group IDs, and home directory, if available and/or applicable.
func GetUserGroupSupplementaryHome(userSpec string, defaultUid, defaultGid int, defaultHome string) (int, int, []int, string, error) {
var (
uid = defaultUid
gid = defaultGid
suppGids = []int{}
home = defaultHome
type ExecUser struct {
Uid, Gid int
Sgids []int
Home string
}
// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the
// given file paths and uses that data as the arguments to GetExecUser. If the
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
passwd, err := os.Open(passwdPath)
if err != nil {
passwd = nil
} else {
defer passwd.Close()
}
group, err := os.Open(groupPath)
if err != nil {
group = nil
} else {
defer group.Close()
}
return GetExecUser(userSpec, defaults, passwd, group)
}
// GetExecUser parses a user specification string (using the passwd and group
// readers as sources for /etc/passwd and /etc/group data, respectively). In
// the case of blank fields or missing data from the sources, the values in
// defaults is used.
//
// GetExecUser will return an error if a user or group literal could not be
// found in any entry in passwd and group respectively.
//
// Examples of valid user specifications are:
// * ""
// * "user"
// * "uid"
// * "user:group"
// * "uid:gid
// * "user:gid"
// * "uid:group"
func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) {
var (
userArg, groupArg string
name string
)
if defaults == nil {
defaults = new(ExecUser)
}
// Copy over defaults.
user := &ExecUser{
Uid: defaults.Uid,
Gid: defaults.Gid,
Sgids: defaults.Sgids,
Home: defaults.Home,
}
// Sgids slice *cannot* be nil.
if user.Sgids == nil {
user.Sgids = []int{}
}
// allow for userArg to have either "user" syntax, or optionally "user:group" syntax
parseLine(userSpec, &userArg, &groupArg)
users, err := ParsePasswdFilter(func(u *User) bool {
users, err := ParsePasswdFilter(passwd, func(u User) bool {
if userArg == "" {
return u.Uid == uid
return u.Uid == user.Uid
}
return u.Name == userArg || strconv.Itoa(u.Uid) == userArg
})
if err != nil && !os.IsNotExist(err) {
if err != nil && passwd != nil {
if userArg == "" {
userArg = strconv.Itoa(uid)
userArg = strconv.Itoa(user.Uid)
}
return 0, 0, nil, "", fmt.Errorf("Unable to find user %v: %v", userArg, err)
return nil, fmt.Errorf("Unable to find user %v: %v", userArg, err)
}
haveUser := users != nil && len(users) > 0
if haveUser {
// if we found any user entries that matched our filter, let's take the first one as "correct"
uid = users[0].Uid
gid = users[0].Gid
home = users[0].Home
name = users[0].Name
user.Uid = users[0].Uid
user.Gid = users[0].Gid
user.Home = users[0].Home
} else if userArg != "" {
// we asked for a user but didn't find them... let's check to see if we wanted a numeric user
uid, err = strconv.Atoi(userArg)
user.Uid, err = strconv.Atoi(userArg)
if err != nil {
// not numeric - we have to bail
return 0, 0, nil, "", fmt.Errorf("Unable to find user %v", userArg)
return nil, fmt.Errorf("Unable to find user %v", userArg)
}
if uid < minId || uid > maxId {
return 0, 0, nil, "", ErrRange
// Must be inside valid uid range.
if user.Uid < minId || user.Uid > maxId {
return nil, ErrRange
}
// if userArg couldn't be found in /etc/passwd but is numeric, just roll with it - this is legit
}
if groupArg != "" || (haveUser && users[0].Name != "") {
groups, err := ParseGroupFilter(func(g *Group) bool {
if groupArg != "" || name != "" {
groups, err := ParseGroupFilter(group, func(g Group) bool {
// Explicit group format takes precedence.
if groupArg != "" {
return g.Name == groupArg || strconv.Itoa(g.Gid) == groupArg
}
// Check if user is a member.
for _, u := range g.List {
if u == users[0].Name {
if u == name {
return true
}
}
return false
})
if err != nil && !os.IsNotExist(err) {
return 0, 0, nil, "", fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err)
if err != nil && group != nil {
return nil, fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err)
}
haveGroup := groups != nil && len(groups) > 0
if groupArg != "" {
if haveGroup {
// if we found any group entries that matched our filter, let's take the first one as "correct"
gid = groups[0].Gid
user.Gid = groups[0].Gid
} else {
// we asked for a group but didn't find id... let's check to see if we wanted a numeric group
gid, err = strconv.Atoi(groupArg)
user.Gid, err = strconv.Atoi(groupArg)
if err != nil {
// not numeric - we have to bail
return 0, 0, nil, "", fmt.Errorf("Unable to find group %v", groupArg)
return nil, fmt.Errorf("Unable to find group %v", groupArg)
}
if gid < minId || gid > maxId {
return 0, 0, nil, "", ErrRange
// Ensure gid is inside gid range.
if user.Gid < minId || user.Gid > maxId {
return nil, ErrRange
}
// if groupArg couldn't be found in /etc/group but is numeric, just roll with it - this is legit
}
} else if haveGroup {
suppGids = make([]int, len(groups))
// If implicit group format, fill supplementary gids.
user.Sgids = make([]int, len(groups))
for i, group := range groups {
suppGids[i] = group.Gid
user.Sgids[i] = group.Gid
}
}
}
return uid, gid, suppGids, home, nil
return user, nil
}

View File

@ -1,6 +1,8 @@
package user
import (
"io"
"reflect"
"strings"
"testing"
)
@ -54,7 +56,7 @@ func TestUserParseLine(t *testing.T) {
}
func TestUserParsePasswd(t *testing.T) {
users, err := parsePasswdFile(strings.NewReader(`
users, err := ParsePasswdFilter(strings.NewReader(`
root:x:0:0:root:/root:/bin/bash
adm:x:3:4:adm:/var/adm:/bin/false
this is just some garbage data
@ -74,7 +76,7 @@ this is just some garbage data
}
func TestUserParseGroup(t *testing.T) {
groups, err := parseGroupFile(strings.NewReader(`
groups, err := ParseGroupFilter(strings.NewReader(`
root:x:0:root
adm:x:4:root,adm,daemon
this is just some garbage data
@ -92,3 +94,259 @@ this is just some garbage data
t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List))
}
}
func TestValidGetExecUser(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
defaultExecUser := ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
}
tests := []struct {
ref string
expected ExecUser
}{
{
ref: "root",
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{0, 1234},
Home: "/root",
},
},
{
ref: "adm",
expected: ExecUser{
Uid: 42,
Gid: 43,
Sgids: []int{1234},
Home: "/var/adm",
},
},
{
ref: "root:adm",
expected: ExecUser{
Uid: 0,
Gid: 43,
Sgids: defaultExecUser.Sgids,
Home: "/root",
},
},
{
ref: "adm:1234",
expected: ExecUser{
Uid: 42,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: "/var/adm",
},
},
{
ref: "42:1234",
expected: ExecUser{
Uid: 42,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: "/var/adm",
},
},
{
ref: "1337:1234",
expected: ExecUser{
Uid: 1337,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
{
ref: "1337",
expected: ExecUser{
Uid: 1337,
Gid: defaultExecUser.Gid,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
{
ref: "",
expected: ExecUser{
Uid: defaultExecUser.Uid,
Gid: defaultExecUser.Gid,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
}
for _, test := range tests {
passwd := strings.NewReader(passwdContent)
group := strings.NewReader(groupContent)
execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
if err != nil {
t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
t.Fail()
continue
}
if !reflect.DeepEqual(test.expected, *execUser) {
t.Logf("got: %#v", execUser)
t.Logf("expected: %#v", test.expected)
t.Fail()
continue
}
}
}
func TestInvalidGetExecUser(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
tests := []string{
// No such user/group.
"notuser",
"notuser:notgroup",
"root:notgroup",
"notuser:adm",
"8888:notgroup",
"notuser:8888",
// Invalid user/group values.
"-1:0",
"0:-3",
"-5:-2",
}
for _, test := range tests {
passwd := strings.NewReader(passwdContent)
group := strings.NewReader(groupContent)
execUser, err := GetExecUser(test, nil, passwd, group)
if err == nil {
t.Logf("got unexpected success when parsing '%s': %#v", test, execUser)
t.Fail()
continue
}
}
}
func TestGetExecUserNilSources(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
defaultExecUser := ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
}
tests := []struct {
ref string
passwd, group bool
expected ExecUser
}{
{
ref: "",
passwd: false,
group: false,
expected: ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
},
},
{
ref: "root",
passwd: true,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{8888},
Home: "/root",
},
},
{
ref: "0",
passwd: false,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
},
},
{
ref: "0:0",
passwd: false,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{8888},
Home: "/8888",
},
},
}
for _, test := range tests {
var passwd, group io.Reader
if test.passwd {
passwd = strings.NewReader(passwdContent)
}
if test.group {
group = strings.NewReader(groupContent)
}
execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
if err != nil {
t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
t.Fail()
continue
}
if !reflect.DeepEqual(test.expected, *execUser) {
t.Logf("got: %#v", execUser)
t.Logf("expected: %#v", test.expected)
t.Fail()
continue
}
}
}

View File

@ -27,6 +27,7 @@ import (
"github.com/fsouza/go-dockerclient"
"github.com/golang/glog"
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/info"
"github.com/google/cadvisor/utils"
)
@ -68,6 +69,9 @@ type dockerFactory struct {
usesAufsDriver bool
client *docker.Client
// Information about the mounted cgroup subsystems.
cgroupSubsystems libcontainer.CgroupSubsystems
}
func (self *dockerFactory) String() string {
@ -85,6 +89,7 @@ func (self *dockerFactory) NewContainerHandler(name string) (handler container.C
self.machineInfoFactory,
*dockerRootDir,
self.usesAufsDriver,
&self.cgroupSubsystems,
)
return
}
@ -218,11 +223,17 @@ func Register(factory info.MachineInfoFactory) error {
glog.Infof("System is using systemd")
}
cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
if err != nil {
return fmt.Errorf("failed to get cgroup subsystems: %v", err)
}
glog.Infof("Registering Docker factory")
f := &dockerFactory{
machineInfoFactory: factory,
client: client,
usesAufsDriver: usesAufsDriver,
cgroupSubsystems: cgroupSubsystems,
}
container.RegisterContainerHandlerFactory(f)
return nil

View File

@ -59,6 +59,10 @@ type dockerContainerHandler struct {
// Path to the libcontainer pid file.
libcontainerPidPath string
// Absolute path to the cgroup hierarchies of this container.
// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
cgroupPaths map[string]string
cgroup cgroups.Cgroup
usesAufsDriver bool
fsInfo fs.FsInfo
@ -71,11 +75,19 @@ func newDockerContainerHandler(
machineInfoFactory info.MachineInfoFactory,
dockerRootDir string,
usesAufsDriver bool,
cgroupSubsystems *containerLibcontainer.CgroupSubsystems,
) (container.ContainerHandler, error) {
fsInfo, err := fs.NewFsInfo()
if err != nil {
return nil, err
}
// Create the cgroup paths.
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
for key, val := range cgroupSubsystems.MountPoints {
cgroupPaths[key] = path.Join(val, name)
}
id := ContainerNameToDockerId(name)
handler := &dockerContainerHandler{
id: id,
@ -85,6 +97,7 @@ func newDockerContainerHandler(
libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"),
libcontainerStatePath: path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"),
libcontainerPidPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"),
cgroupPaths: cgroupPaths,
cgroup: cgroups.Cgroup{
Parent: "/",
Name: name,
@ -159,6 +172,11 @@ func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer
}
state = retState
// Create cgroup paths if they don't exist. This is since older Docker clients don't write it.
if len(state.CgroupPaths) == 0 {
state.CgroupPaths = self.cgroupPaths
}
return
}
@ -259,7 +277,7 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err
return
}
stats, err = containerLibcontainer.GetStats(&self.cgroup, state)
stats, err = containerLibcontainer.GetStats(state)
if err != nil {
return
}

View File

@ -15,6 +15,7 @@
package libcontainer
import (
"fmt"
"time"
"github.com/docker/libcontainer"
@ -24,13 +25,60 @@ import (
"github.com/google/cadvisor/info"
)
type CgroupSubsystems struct {
// Cgroup subsystem mounts.
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
Mounts []cgroups.Mount
// Cgroup subsystem to their mount location.
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
MountPoints map[string]string
}
// Get information about the cgroup subsystems.
func GetCgroupSubsystems() (CgroupSubsystems, error) {
// Get all cgroup mounts.
allCgroups, err := cgroups.GetCgroupMounts()
if err != nil {
return CgroupSubsystems{}, err
}
if len(allCgroups) == 0 {
return CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts")
}
// Trim the mounts to only the subsystems we care about.
supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups))
mountPoints := make(map[string]string, len(allCgroups))
for _, mount := range allCgroups {
for _, subsystem := range mount.Subsystems {
if _, ok := supportedSubsystems[subsystem]; ok {
supportedCgroups = append(supportedCgroups, mount)
mountPoints[subsystem] = mount.Mountpoint
}
}
}
return CgroupSubsystems{
Mounts: supportedCgroups,
MountPoints: mountPoints,
}, nil
}
// Cgroup subsystems we support listing (should be the minimal set we need stats from).
var supportedSubsystems map[string]struct{} = map[string]struct{}{
"cpu": {},
"cpuacct": {},
"memory": {},
"cpuset": {},
}
// Get stats of the specified container
func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.ContainerStats, error) {
func GetStats(state *libcontainer.State) (*info.ContainerStats, error) {
// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
stats := &libcontainer.ContainerStats{}
var err error
stats.CgroupStats, err = cgroupfs.GetStats(cgroup)
stats.CgroupStats, err = cgroupfs.GetStats(state.CgroupPaths)
if err != nil {
return &info.ContainerStats{}, err
}
@ -43,14 +91,6 @@ func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.Containe
return toContainerStats(stats), nil
}
func GetStatsCgroupOnly(cgroup *cgroups.Cgroup) (*info.ContainerStats, error) {
s, err := cgroupfs.GetStats(cgroup)
if err != nil {
return nil, err
}
return toContainerStats(&libcontainer.ContainerStats{CgroupStats: s}), nil
}
func DiskStatsCopy(blkio_stats []cgroups.BlkioStatEntry) (stat []info.PerDiskStats) {
if len(blkio_stats) == 0 {
return

View File

@ -17,26 +17,18 @@ package raw
import (
"fmt"
"github.com/docker/libcontainer/cgroups"
"github.com/golang/glog"
"github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/info"
)
type cgroupSubsystems struct {
// Cgroup subsystem mounts.
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
mounts []cgroups.Mount
// Cgroup subsystem to their mount location.
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
mountPoints map[string]string
}
type rawFactory struct {
// Factory for machine information.
machineInfoFactory info.MachineInfoFactory
cgroupSubsystems *cgroupSubsystems
// Information about the cgroup subsystems.
cgroupSubsystems *libcontainer.CgroupSubsystems
}
func (self *rawFactory) String() string {
@ -53,46 +45,19 @@ func (self *rawFactory) CanHandle(name string) (bool, error) {
}
func Register(machineInfoFactory info.MachineInfoFactory) error {
// Get all cgroup mounts.
allCgroups, err := cgroups.GetCgroupMounts()
cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
if err != nil {
return err
return fmt.Errorf("failed to get cgroup subsystems: %v", err)
}
if len(allCgroups) == 0 {
return fmt.Errorf("failed to find cgroup mounts for the raw factory")
}
// Trim the mounts to only the subsystems we care about.
supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups))
mountPoints := make(map[string]string, len(allCgroups))
for _, mount := range allCgroups {
for _, subsystem := range mount.Subsystems {
if _, ok := supportedSubsystems[subsystem]; ok {
supportedCgroups = append(supportedCgroups, mount)
mountPoints[subsystem] = mount.Mountpoint
}
}
}
if len(supportedCgroups) == 0 {
if len(cgroupSubsystems.Mounts) == 0 {
return fmt.Errorf("failed to find supported cgroup mounts for the raw factory")
}
glog.Infof("Registering Raw factory")
factory := &rawFactory{
machineInfoFactory: machineInfoFactory,
cgroupSubsystems: &cgroupSubsystems{
mounts: supportedCgroups,
mountPoints: mountPoints,
},
cgroupSubsystems: &cgroupSubsystems,
}
container.RegisterContainerHandlerFactory(factory)
return nil
}
// Cgroup subsystems we support listing (should be the minimal set we need stats from).
var supportedSubsystems map[string]struct{} = map[string]struct{}{
"cpu": {},
"cpuacct": {},
"memory": {},
"cpuset": {},
}

View File

@ -39,7 +39,7 @@ type rawContainerHandler struct {
// Name of the container for this handler.
name string
cgroup *cgroups.Cgroup
cgroupSubsystems *cgroupSubsystems
cgroupSubsystems *libcontainer.CgroupSubsystems
machineInfoFactory info.MachineInfoFactory
// Inotify event watcher.
@ -54,12 +54,16 @@ type rawContainerHandler struct {
// Cgroup paths being watchd for new subcontainers
cgroupWatches map[string]struct{}
// Absolute path to the cgroup hierarchies of this container.
// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
cgroupPaths map[string]string
fsInfo fs.FsInfo
networkInterface *networkInterface
externalMounts []mount
}
func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) {
func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) {
fsInfo, err := fs.NewFsInfo()
if err != nil {
return nil, err
@ -77,6 +81,13 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac
break
}
}
// Create the cgroup paths.
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
for key, val := range cgroupSubsystems.MountPoints {
cgroupPaths[key] = path.Join(val, name)
}
return &rawContainerHandler{
name: name,
cgroup: &cgroups.Cgroup{
@ -88,6 +99,7 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac
stopWatcher: make(chan error),
watches: make(map[string]struct{}),
cgroupWatches: make(map[string]struct{}),
cgroupPaths: cgroupPaths,
fsInfo: fsInfo,
networkInterface: networkInterface,
externalMounts: externalMounts,
@ -145,9 +157,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
}
// CPU.
cpuRoot, ok := self.cgroupSubsystems.mountPoints["cpu"]
cpuRoot, ok := self.cgroupPaths["cpu"]
if ok {
cpuRoot = path.Join(cpuRoot, self.name)
if utils.FileExists(cpuRoot) {
spec.HasCpu = true
spec.Cpu.Limit = readInt64(cpuRoot, "cpu.shares")
@ -156,9 +167,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
// Cpu Mask.
// This will fail for non-unified hierarchies. We'll return the whole machine mask in that case.
cpusetRoot, ok := self.cgroupSubsystems.mountPoints["cpuset"]
cpusetRoot, ok := self.cgroupPaths["cpuset"]
if ok {
cpusetRoot = path.Join(cpusetRoot, self.name)
if utils.FileExists(cpusetRoot) {
spec.HasCpu = true
spec.Cpu.Mask = readString(cpusetRoot, "cpuset.cpus")
@ -169,9 +179,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
}
// Memory.
memoryRoot, ok := self.cgroupSubsystems.mountPoints["memory"]
memoryRoot, ok := self.cgroupPaths["memory"]
if ok {
memoryRoot = path.Join(memoryRoot, self.name)
if utils.FileExists(memoryRoot) {
spec.HasMemory = true
spec.Memory.Limit = readInt64(memoryRoot, "memory.limit_in_bytes")
@ -227,7 +236,10 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
}
for _, fs := range filesystems {
stats.Filesystem = append(stats.Filesystem,
info.FsStats{fs.Device, fs.Capacity, fs.Capacity - fs.Free,
info.FsStats{
fs.Device,
fs.Capacity,
fs.Capacity - fs.Free,
fs.DiskStats.ReadsCompleted,
fs.DiskStats.ReadsMerged,
fs.DiskStats.SectorsRead,
@ -246,18 +258,20 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
}
func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
state := dockerlibcontainer.State{}
// TODO(vmarmol): Don't re-create this every time.
state := dockerlibcontainer.State{
CgroupPaths: self.cgroupPaths,
}
if self.networkInterface != nil {
state = dockerlibcontainer.State{
NetworkState: network.NetworkState{
VethHost: self.networkInterface.VethHost,
VethChild: self.networkInterface.VethChild,
NsPath: "unknown",
},
}
}
stats, err := libcontainer.GetStats(self.cgroup, &state)
stats, err := libcontainer.GetStats(&state)
if err != nil {
return nil, err
}
@ -301,8 +315,8 @@ func listDirectories(dirpath string, parent string, recursive bool, output map[s
func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
containers := make(map[string]struct{})
for _, subsystem := range self.cgroupSubsystems.mounts {
err := listDirectories(path.Join(subsystem.Mountpoint, self.name), self.name, listType == container.ListRecursive, containers)
for _, cgroupPath := range self.cgroupPaths {
err := listDirectories(cgroupPath, self.name, listType == container.ListRecursive, containers)
if err != nil {
return nil, err
}
@ -372,7 +386,7 @@ func (self *rawContainerHandler) processEvent(event *inotify.Event, events chan
// Derive the container name from the path name.
var containerName string
for _, mount := range self.cgroupSubsystems.mounts {
for _, mount := range self.cgroupSubsystems.Mounts {
mountLocation := path.Clean(mount.Mountpoint) + "/"
if strings.HasPrefix(event.Name, mountLocation) {
containerName = event.Name[len(mountLocation)-1:]
@ -437,8 +451,8 @@ func (self *rawContainerHandler) WatchSubcontainers(events chan container.Subcon
}
// Watch this container (all its cgroups) and all subdirectories.
for _, mnt := range self.cgroupSubsystems.mounts {
err := self.watchDirectory(path.Join(mnt.Mountpoint, self.name), self.name)
for _, cgroupPath := range self.cgroupPaths {
err := self.watchDirectory(cgroupPath, self.name)
if err != nil {
return err
}
@ -481,8 +495,8 @@ func (self *rawContainerHandler) StopWatchingSubcontainers() error {
func (self *rawContainerHandler) Exists() bool {
// If any cgroup exists, the container is still alive.
for _, subsystem := range self.cgroupSubsystems.mounts {
if utils.FileExists(path.Join(subsystem.Mountpoint, self.name)) {
for _, cgroupPath := range self.cgroupPaths {
if utils.FileExists(cgroupPath) {
return true
}
}