Merge pull request #358 from vmarmol/update-libcontainer

Update libcontainer dependency
This commit is contained in:
Rohit Jnagal 2014-12-15 17:01:44 -08:00
commit 8eec529106
69 changed files with 2021 additions and 1150 deletions

4
Godeps/Godeps.json generated
View File

@ -55,8 +55,8 @@
}, },
{ {
"ImportPath": "github.com/docker/libcontainer", "ImportPath": "github.com/docker/libcontainer",
"Comment": "v1.2.0-99-gfe3801c", "Comment": "v1.2.0-173-g58fc931",
"Rev": "fe3801ccd2f5d0cc3ec5d063067fc4a1c312fa81" "Rev": "58fc93160e03387a4f41dcf4aed2e376c4a92db4"
}, },
{ {
"ImportPath": "github.com/fsouza/go-dockerclient", "ImportPath": "github.com/fsouza/go-dockerclient",

View File

@ -0,0 +1,9 @@
image: dockercore/libcontainer
script:
# Setup the DockerInDocker environment.
- /dind
- sed -i 's!docker/docker!docker/libcontainer!' /go/src/github.com/docker/docker/hack/make/.validate
- bash /go/src/github.com/docker/docker/hack/make/validate-dco
- bash /go/src/github.com/docker/docker/hack/make/validate-gofmt
- export GOPATH="$GOPATH:/go:$(pwd)/vendor" # Drone mucks with our GOPATH
- make direct-test

View File

@ -1,36 +0,0 @@
language: go
go: 1.3
# let us have pretty experimental Docker-based Travis workers
sudo: false
env:
- TRAVIS_GLOBAL_WTF=1
- _GOOS=linux _GOARCH=amd64 CGO_ENABLED=1
- _GOOS=linux _GOARCH=amd64 CGO_ENABLED=0
# - _GOOS=linux _GOARCH=386 CGO_ENABLED=1 # TODO add this once Travis can handle it (https://github.com/travis-ci/travis-ci/issues/2207#issuecomment-49625061)
- _GOOS=linux _GOARCH=386 CGO_ENABLED=0
- _GOOS=linux _GOARCH=arm CGO_ENABLED=0
install:
- go get code.google.com/p/go.tools/cmd/cover
- mkdir -pv "${GOPATH%%:*}/src/github.com/docker" && [ -d "${GOPATH%%:*}/src/github.com/docker/libcontainer" ] || ln -sv "$(readlink -f .)" "${GOPATH%%:*}/src/github.com/docker/libcontainer"
- if [ -z "$TRAVIS_GLOBAL_WTF" ]; then
gvm cross "$_GOOS" "$_GOARCH";
export GOOS="$_GOOS" GOARCH="$_GOARCH";
fi
- export GOPATH="$GOPATH:$(pwd)/vendor"
- if [ -z "$TRAVIS_GLOBAL_WTF" ]; then go env; fi
- go get -d -v ./... # TODO remove this if /docker/docker gets purged from our includes
- if [ "$TRAVIS_GLOBAL_WTF" ]; then
export DOCKER_PATH="${GOPATH%%:*}/src/github.com/docker/docker";
mkdir -p "$DOCKER_PATH/hack/make";
( cd "$DOCKER_PATH/hack/make" && wget -c 'https://raw.githubusercontent.com/docker/docker/master/hack/make/'{.validate,validate-dco,validate-gofmt} );
sed -i 's!docker/docker!docker/libcontainer!' "$DOCKER_PATH/hack/make/.validate";
fi
script:
- if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-dco"; fi
- if [ "$TRAVIS_GLOBAL_WTF" ]; then bash "$DOCKER_PATH/hack/make/validate-gofmt"; fi
- if [ -z "$TRAVIS_GLOBAL_WTF" ]; then make direct-build; fi
- if [ -z "$TRAVIS_GLOBAL_WTF" -a "$GOARCH" != 'arm' ]; then make direct-test-short; fi

View File

@ -1,7 +1,7 @@
FROM crosbymichael/golang FROM crosbymichael/golang
RUN apt-get update && apt-get install -y gcc make RUN apt-get update && apt-get install -y gcc make
RUN go get code.google.com/p/go.tools/cmd/cover RUN go get golang.org/x/tools/cmd/cover
ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor ENV GOPATH $GOPATH:/go/src/github.com/docker/libcontainer/vendor
RUN go get github.com/docker/docker/pkg/term RUN go get github.com/docker/docker/pkg/term
@ -10,7 +10,7 @@ RUN go get github.com/docker/docker/pkg/term
RUN mkdir /busybox && \ RUN mkdir /busybox && \
curl -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.02/rootfs.tar' | tar -xC /busybox curl -sSL 'https://github.com/jpetazzo/docker-busybox/raw/buildroot-2014.02/rootfs.tar' | tar -xC /busybox
RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/hack/dind -o /dind && \ RUN curl -sSL https://raw.githubusercontent.com/docker/docker/master/project/dind -o /dind && \
chmod +x /dind chmod +x /dind
COPY . /go/src/github.com/docker/libcontainer COPY . /go/src/github.com/docker/libcontainer

View File

@ -2,5 +2,4 @@ Michael Crosby <michael@docker.com> (@crosbymichael)
Rohit Jnagal <jnagal@google.com> (@rjnagal) Rohit Jnagal <jnagal@google.com> (@rjnagal)
Victor Marmol <vmarmol@google.com> (@vmarmol) Victor Marmol <vmarmol@google.com> (@vmarmol)
Mrunal Patel <mpatel@redhat.com> (@mrunalp) Mrunal Patel <mpatel@redhat.com> (@mrunalp)
.travis.yml: Tianon Gravi <admwiggin@gmail.com> (@tianon)
update-vendor.sh: Tianon Gravi <admwiggin@gmail.com> (@tianon) update-vendor.sh: Tianon Gravi <admwiggin@gmail.com> (@tianon)

View File

@ -12,10 +12,10 @@ sh:
GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u) GO_PACKAGES = $(shell find . -not \( -wholename ./vendor -prune -o -wholename ./.git -prune \) -name '*.go' -print0 | xargs -0n1 dirname | sort -u)
direct-test: direct-test:
go test -cover -v $(GO_PACKAGES) go test $(TEST_TAGS) -cover -v $(GO_PACKAGES)
direct-test-short: direct-test-short:
go test -cover -test.short -v $(GO_PACKAGES) go test $(TEST_TAGS) -cover -test.short -v $(GO_PACKAGES)
direct-build: direct-build:
go build -v $(GO_PACKAGES) go build -v $(GO_PACKAGES)

View File

@ -1,4 +1,4 @@
## libcontainer - reference implementation for containers [![Build Status](https://travis-ci.org/docker/libcontainer.png?branch=master)](https://travis-ci.org/docker/libcontainer) ## libcontainer - reference implementation for containers [![Build Status](https://ci.dockerproject.com/github.com/docker/libcontainer/status.svg?branch=master)](https://ci.dockerproject.com/github.com/docker/libcontainer)
### Note on API changes: ### Note on API changes:

View File

@ -0,0 +1,321 @@
## Container Specification - v1
This is the standard configuration for version 1 containers. It includes
namespaces, standard filesystem setup, a default Linux capability set, and
information about resource reservations. It also has information about any
populated environment settings for the processes running inside a container.
Along with the configuration of how a container is created the standard also
discusses actions that can be performed on a container to manage and inspect
information about the processes running inside.
The v1 profile is meant to be able to accommodate the majority of applications
with a strong security configuration.
### System Requirements and Compatibility
Minimum requirements:
* Kernel version - 3.8 recommended 2.6.2x minimum(with backported patches)
* Mounted cgroups with each subsystem in its own hierarchy
### Namespaces
| Flag | Enabled |
| ------------ | ------- |
| CLONE_NEWPID | 1 |
| CLONE_NEWUTS | 1 |
| CLONE_NEWIPC | 1 |
| CLONE_NEWNET | 1 |
| CLONE_NEWNS | 1 |
| CLONE_NEWUSER | 0 |
In v1 the user namespace is not enabled by default for support of older kernels
where the user namespace feature is not fully implemented. Namespaces are
created for the container via the `clone` syscall.
### Filesystem
A root filesystem must be provided to a container for execution. The container
will use this root filesystem (rootfs) to jail and spawn processes inside where
the binaries and system libraries are local to that directory. Any binaries
to be executed must be contained within this rootfs.
Mounts that happen inside the container are automatically cleaned up when the
container exits as the mount namespace is destroyed and the kernel will
unmount all the mounts that were setup within that namespace.
For a container to execute properly there are certain filesystems that
are required to be mounted within the rootfs that the runtime will setup.
| Path | Type | Flags | Data |
| ----------- | ------ | -------------------------------------- | --------------------------------------- |
| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 |
| /dev/shm | shm | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k |
| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | |
| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid5 |
| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | |
After a container's filesystems are mounted within the newly created
mount namespace `/dev` will need to be populated with a set of device nodes.
It is expected that a rootfs does not need to have any device nodes specified
for `/dev` witin the rootfs as the container will setup the correct devices
that are required for executing a container's process.
| Path | Mode | Access |
| ------------ | ---- | ---------- |
| /dev/null | 0666 | rwm |
| /dev/zero | 0666 | rwm |
| /dev/full | 0666 | rwm |
| /dev/tty | 0666 | rwm |
| /dev/random | 0666 | rwm |
| /dev/urandom | 0666 | rwm |
| /dev/fuse | 0666 | rwm |
**ptmx**
`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within
the container.
The use of a pseudo TTY is optional within a container and it should support both.
If a pseudo is provided to the container `/dev/console` will need to be
setup by binding the console in `/dev/` after it has been populated and mounted
in tmpfs.
| Source | Destination | UID GID | Mode | Type |
| --------------- | ------------ | ------- | ---- | ---- |
| *pty host path* | /dev/console | 0 0 | 0600 | bind |
After `/dev/null` has been setup we check for any external links between
the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing
to `/dev/null` outside the container we close and `dup2` the the `/dev/null`
that is local to the container's rootfs.
After the container has `/proc` mounted a few standard symlinks are setup
within `/dev/` for the io.
| Source | Destination |
| ------------ | ----------- |
| /proc/1/fd | /dev/fd |
| /proc/1/fd/0 | /dev/stdin |
| /proc/1/fd/1 | /dev/stdout |
| /proc/1/fd/2 | /dev/stderr |
A `pivot_root` is used to change the root for the process, effectively
jailing the process inside the rootfs.
```c
put_old = mkdir(...);
pivot_root(rootfs, put_old);
chdir("/");
unmount(put_old, MS_DETACH);
rmdir(put_old);
```
For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined
with a `chroot` is required as `pivot_root` is not supported in `ramfs`.
```c
mount(rootfs, "/", NULL, MS_MOVE, NULL);
chroot(".");
chdir("/");
```
The `umask` is set back to `0022` after the filesystem setup has been completed.
### Resources
Cgroups are used to handle resource allocation for containers. This includes
system resources like cpu, memory, and device access.
| Subsystem | Enabled |
| ---------- | ------- |
| devices | 1 |
| memory | 1 |
| cpu | 1 |
| cpuacct | 1 |
| cpuset | 1 |
| blkio | 1 |
| perf_event | 1 |
| freezer | 1 |
All cgroup subsystem are joined so that statistics can be collected from
each of the subsystems. Freezer does not expose any stats but is joined
so that containers can be paused and resumed.
The parent process of the container's init must place the init pid inside
the correct cgroups before the initialization begins. This is done so
that no processes or threads escape the cgroups. This sync is
done via a pipe ( specified in the runtime section below ) that the container's
init process will block waiting for the parent to finish setup.
### Security
The standard set of Linux capabilities that are set in a container
provide a good default for security and flexibility for the applications.
| Capability | Enabled |
| -------------------- | ------- |
| CAP_NET_RAW | 1 |
| CAP_NET_BIND_SERVICE | 1 |
| CAP_AUDIT_WRITE | 1 |
| CAP_DAC_OVERRIDE | 1 |
| CAP_SETFCAP | 1 |
| CAP_SETPCAP | 1 |
| CAP_SETGID | 1 |
| CAP_SETUID | 1 |
| CAP_MKNOD | 1 |
| CAP_CHOWN | 1 |
| CAP_FOWNER | 1 |
| CAP_FSETID | 1 |
| CAP_KILL | 1 |
| CAP_SYS_CHROOT | 1 |
| CAP_NET_BROADCAST | 0 |
| CAP_SYS_MODULE | 0 |
| CAP_SYS_RAWIO | 0 |
| CAP_SYS_PACCT | 0 |
| CAP_SYS_ADMIN | 0 |
| CAP_SYS_NICE | 0 |
| CAP_SYS_RESOURCE | 0 |
| CAP_SYS_TIME | 0 |
| CAP_SYS_TTY_CONFIG | 0 |
| CAP_AUDIT_CONTROL | 0 |
| CAP_MAC_OVERRIDE | 0 |
| CAP_MAC_ADMIN | 0 |
| CAP_NET_ADMIN | 0 |
| CAP_SYSLOG | 0 |
| CAP_DAC_READ_SEARCH | 0 |
| CAP_LINUX_IMMUTABLE | 0 |
| CAP_IPC_LOCK | 0 |
| CAP_IPC_OWNER | 0 |
| CAP_SYS_PTRACE | 0 |
| CAP_SYS_BOOT | 0 |
| CAP_LEASE | 0 |
| CAP_WAKE_ALARM | 0 |
| CAP_BLOCK_SUSPE | 0 |
Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor)
and [selinux](http://selinuxproject.org/page/Main_Page) can be used with
the containers. A container should support setting an apparmor profile or
selinux process and mount labels if provided in the configuration.
Standard apparmor profile:
```c
#include <tunables/global>
profile <profile_name> flags=(attach_disconnected,mediate_deleted) {
#include <abstractions/base>
network,
capability,
file,
umount,
mount fstype=tmpfs,
mount fstype=mqueue,
mount fstype=fuse.*,
mount fstype=binfmt_misc -> /proc/sys/fs/binfmt_misc/,
mount fstype=efivarfs -> /sys/firmware/efi/efivars/,
mount fstype=fusectl -> /sys/fs/fuse/connections/,
mount fstype=securityfs -> /sys/kernel/security/,
mount fstype=debugfs -> /sys/kernel/debug/,
mount fstype=proc -> /proc/,
mount fstype=sysfs -> /sys/,
deny @{PROC}/sys/fs/** wklx,
deny @{PROC}/sysrq-trigger rwklx,
deny @{PROC}/mem rwklx,
deny @{PROC}/kmem rwklx,
deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx,
deny @{PROC}/sys/kernel/*/** wklx,
deny mount options=(ro, remount) -> /,
deny mount fstype=debugfs -> /var/lib/ureadahead/debugfs/,
deny mount fstype=devpts,
deny /sys/[^f]*/** wklx,
deny /sys/f[^s]*/** wklx,
deny /sys/fs/[^c]*/** wklx,
deny /sys/fs/c[^g]*/** wklx,
deny /sys/fs/cg[^r]*/** wklx,
deny /sys/firmware/efi/efivars/** rwklx,
deny /sys/kernel/security/** rwklx,
}
```
*TODO: seccomp work is being done to find a good default config*
### Runtime and Init Process
During container creation the parent process needs to talk to the container's init
process and have a form of synchronization. This is accomplished by creating
a pipe that is passed to the container's init. When the init process first spawns
it will block on its side of the pipe until the parent closes its side. This
allows the parent to have time to set the new process inside a cgroup hierarchy
and/or write any uid/gid mappings required for user namespaces.
The pipe is passed to the init process via FD 3.
The application consuming libcontainer should be compiled statically. libcontainer
does not define any init process and the arguments provided are used to `exec` the
process inside the application. There should be no long running init within the
container spec.
If a pseudo tty is provided to a container it will open and `dup2` the console
as the container's STDIN, STDOUT, STDERR as well as mounting the console
as `/dev/console`.
An extra set of mounts are provided to a container and setup for use. A container's
rootfs can contain some non portable files inside that can cause side effects during
execution of a process. These files are usually created and populated with the container
specific information via the runtime.
**Extra runtime files:**
* /etc/hosts
* /etc/resolv.conf
* /etc/hostname
* /etc/localtime
#### Defaults
There are a few defaults that can be overridden by users, but in their omission
these apply to processes within a container.
| Type | Value |
| ------------------- | ------------------------------ |
| Parent Death Signal | SIGKILL |
| UID | 0 |
| GID | 0 |
| GROUPS | 0, NULL |
| CWD | "/" |
| $HOME | Current user's home dir or "/" |
| Readonly rootfs | false |
| Pseudo TTY | false |
## Actions
After a container is created there is a standard set of actions that can
be done to the container. These actions are part of the public API for
a container.
| Action | Description |
| -------------- | ------------------------------------------------------------------ |
| Get processes | Return all the pids for processes running inside a container |
| Get Stats | Return resource statistics for the container as a whole |
| Wait | Wait waits on the container's init process ( pid 1 ) |
| Wait Process | Wait on any of the container's processes returning the exit status |
| Destroy | Kill the container's init process and remove any filesystem state |
| Signal | Send a signal to the container's init process |
| Signal Process | Send a signal to any of the container's processes |
| Pause | Pause all processes inside the container |
| Resume | Resume all processes inside the container if paused |
| Exec | Execute a new process inside of the container ( requires setns ) |

View File

@ -5,30 +5,17 @@ package libcontainer
import ( import (
"github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer/network" "github.com/docker/libcontainer/network"
) )
// TODO(vmarmol): Complete Stats() in final libcontainer API and move users to that. // TODO(vmarmol): Complete Stats() in final libcontainer API and move users to that.
// DEPRECATED: The below portions are only to be used during the transition to the official API. // DEPRECATED: The below portions are only to be used during the transition to the official API.
// Returns all available stats for the given container. // Returns all available stats for the given container.
func GetStats(container *Config, state *State) (*ContainerStats, error) { func GetStats(container *Config, state *State) (stats *ContainerStats, err error) {
var (
err error
stats = &ContainerStats{} stats = &ContainerStats{}
) if stats.CgroupStats, err = fs.GetStats(state.CgroupPaths); err != nil {
if systemd.UseSystemd() {
stats.CgroupStats, err = systemd.GetStats(container.Cgroups)
} else {
stats.CgroupStats, err = fs.GetStats(container.Cgroups)
}
if err != nil {
return stats, err return stats, err
} }
stats.NetworkStats, err = network.GetStats(&state.NetworkState) stats.NetworkStats, err = network.GetStats(&state.NetworkState)
return stats, err return stats, err
} }

View File

@ -50,11 +50,7 @@ type Cgroup struct {
CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuQuota int64 `json:"cpu_quota,omitempty"` // CPU hardcap limit (in usecs). Allowed cpu time in a given period.
CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpuPeriod int64 `json:"cpu_period,omitempty"` // CPU period to be used for hardcapping (in usecs). 0 to use system default.
CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use CpusetCpus string `json:"cpuset_cpus,omitempty"` // CPU to use
CpusetMems string `json:"cpuset_mems,omitempty"` // MEM to use
Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process Freezer FreezerState `json:"freezer,omitempty"` // set the freeze value for the process
Slice string `json:"slice,omitempty"` // Parent slice to use for systemd Slice string `json:"slice,omitempty"` // Parent slice to use for systemd
} }
type ActiveCgroup interface {
Cleanup() error
Paths() (map[string]string, error)
}

View File

@ -1,264 +0,0 @@
package main
import (
"encoding/json"
"fmt"
"log"
"os"
"syscall"
"time"
"github.com/codegangsta/cli"
"github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd"
)
var createCommand = cli.Command{
Name: "create",
Usage: "Create a cgroup container using the supplied configuration and initial process.",
Flags: []cli.Flag{
cli.StringFlag{Name: "config, c", Value: "cgroup.json", Usage: "path to container configuration (cgroups.Cgroup object)"},
cli.IntFlag{Name: "pid, p", Value: 0, Usage: "pid of the initial process in the container"},
},
Action: createAction,
}
var destroyCommand = cli.Command{
Name: "destroy",
Usage: "Destroy an existing cgroup container.",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: destroyAction,
}
var statsCommand = cli.Command{
Name: "stats",
Usage: "Get stats for cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: statsAction,
}
var pauseCommand = cli.Command{
Name: "pause",
Usage: "Pause cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: pauseAction,
}
var resumeCommand = cli.Command{
Name: "resume",
Usage: "Resume a paused cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: resumeAction,
}
var psCommand = cli.Command{
Name: "ps",
Usage: "Get list of pids for a cgroup",
Flags: []cli.Flag{
cli.StringFlag{Name: "name, n", Value: "", Usage: "container name"},
cli.StringFlag{Name: "parent, p", Value: "", Usage: "container parent"},
},
Action: psAction,
}
func getConfigFromFile(c *cli.Context) (*cgroups.Cgroup, error) {
f, err := os.Open(c.String("config"))
if err != nil {
return nil, err
}
defer f.Close()
var config *cgroups.Cgroup
if err := json.NewDecoder(f).Decode(&config); err != nil {
log.Fatal(err)
}
return config, nil
}
func openLog(name string) error {
f, err := os.OpenFile(name, os.O_CREATE|os.O_RDWR|os.O_APPEND, 0755)
if err != nil {
return err
}
log.SetOutput(f)
return nil
}
func getConfig(context *cli.Context) (*cgroups.Cgroup, error) {
name := context.String("name")
if name == "" {
log.Fatal(fmt.Errorf("Missing container name"))
}
parent := context.String("parent")
return &cgroups.Cgroup{
Name: name,
Parent: parent,
}, nil
}
func killAll(config *cgroups.Cgroup) {
// We could use freezer here to prevent process spawning while we are trying
// to kill everything. But going with more portable solution of retrying for
// now.
pids := getPids(config)
retry := 10
for len(pids) != 0 || retry > 0 {
killPids(pids)
time.Sleep(100 * time.Millisecond)
retry--
pids = getPids(config)
}
if len(pids) != 0 {
log.Fatal(fmt.Errorf("Could not kill existing processes in the container."))
}
}
func getPids(config *cgroups.Cgroup) []int {
pids, err := fs.GetPids(config)
if err != nil {
log.Fatal(err)
}
return pids
}
func killPids(pids []int) {
for _, pid := range pids {
// pids might go away on their own. Ignore errors.
syscall.Kill(pid, syscall.SIGKILL)
}
}
func setFreezerState(context *cli.Context, state cgroups.FreezerState) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
if systemd.UseSystemd() {
err = systemd.Freeze(config, state)
} else {
err = fs.Freeze(config, state)
}
if err != nil {
log.Fatal(err)
}
}
func createAction(context *cli.Context) {
config, err := getConfigFromFile(context)
if err != nil {
log.Fatal(err)
}
pid := context.Int("pid")
if pid <= 0 {
log.Fatal(fmt.Errorf("Invalid pid : %d", pid))
}
if systemd.UseSystemd() {
_, err := systemd.Apply(config, pid)
if err != nil {
log.Fatal(err)
}
} else {
_, err := fs.Apply(config, pid)
if err != nil {
log.Fatal(err)
}
}
}
func destroyAction(context *cli.Context) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
killAll(config)
// Systemd will clean up cgroup state for empty container.
if !systemd.UseSystemd() {
err := fs.Cleanup(config)
if err != nil {
log.Fatal(err)
}
}
}
func statsAction(context *cli.Context) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
stats, err := fs.GetStats(config)
if err != nil {
log.Fatal(err)
}
out, err := json.MarshalIndent(stats, "", "\t")
if err != nil {
log.Fatal(err)
}
fmt.Printf("Usage stats for '%s':\n %v\n", config.Name, string(out))
}
func pauseAction(context *cli.Context) {
setFreezerState(context, cgroups.Frozen)
}
func resumeAction(context *cli.Context) {
setFreezerState(context, cgroups.Thawed)
}
func psAction(context *cli.Context) {
config, err := getConfig(context)
if err != nil {
log.Fatal(err)
}
pids, err := fs.GetPids(config)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Pids in '%s':\n", config.Name)
fmt.Println(pids)
}
func main() {
logPath := os.Getenv("log")
if logPath != "" {
if err := openLog(logPath); err != nil {
log.Fatal(err)
}
}
app := cli.NewApp()
app.Name = "cgutil"
app.Usage = "Test utility for libcontainer cgroups package"
app.Version = "0.1"
app.Commands = []cli.Command{
createCommand,
destroyCommand,
statsCommand,
pauseCommand,
resumeCommand,
psCommand,
}
if err := app.Run(os.Args); err != nil {
log.Fatal(err)
}
}

View File

@ -1,10 +0,0 @@
{
"name": "luke",
"parent": "darth",
"allow_all_devices": true,
"memory": 1073741824,
"memory_swap": -1,
"cpu_shares": 2048,
"cpu_quota": 500000,
"cpu_period": 250000
}

View File

@ -57,20 +57,35 @@ type data struct {
pid int pid int
} }
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
d, err := getCgroupData(c, pid) d, err := getCgroupData(c, pid)
if err != nil { if err != nil {
return nil, err return nil, err
} }
for _, sys := range subsystems { paths := make(map[string]string)
defer func() {
if err != nil {
cgroups.RemovePaths(paths)
}
}()
for name, sys := range subsystems {
if err := sys.Set(d); err != nil { if err := sys.Set(d); err != nil {
d.Cleanup()
return nil, err return nil, err
} }
// FIXME: Apply should, ideally, be reentrant or be broken up into a separate
// create and join phase so that the cgroup hierarchy for a container can be
// created then join consists of writing the process pids to cgroup.procs
p, err := d.path(name)
if err != nil {
if cgroups.IsNotFound(err) {
continue
} }
return nil, err
return d, nil }
paths[name] = p
}
return paths, nil
} }
// Symmetrical public function to update device based cgroups. Also available // Symmetrical public function to update device based cgroups. Also available
@ -86,33 +101,13 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error {
return devices.Set(d) return devices.Set(d)
} }
func Cleanup(c *cgroups.Cgroup) error { func GetStats(systemPaths map[string]string) (*cgroups.Stats, error) {
d, err := getCgroupData(c, 0)
if err != nil {
return fmt.Errorf("Could not get Cgroup data %s", err)
}
return d.Cleanup()
}
func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
stats := cgroups.NewStats() stats := cgroups.NewStats()
for name, path := range systemPaths {
d, err := getCgroupData(c, 0) sys, ok := subsystems[name]
if err != nil { if !ok {
return nil, fmt.Errorf("getting CgroupData %s", err)
}
for sysname, sys := range subsystems {
path, err := d.path(sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue continue
} }
return nil, err
}
if err := sys.GetStats(path, stats); err != nil { if err := sys.GetStats(path, stats); err != nil {
return nil, err return nil, err
} }
@ -176,26 +171,6 @@ func (raw *data) parent(subsystem string) (string, error) {
return filepath.Join(raw.root, subsystem, initPath), nil return filepath.Join(raw.root, subsystem, initPath), nil
} }
func (raw *data) Paths() (map[string]string, error) {
paths := make(map[string]string)
for sysname := range subsystems {
path, err := raw.path(sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return nil, err
}
paths[sysname] = path
}
return paths, nil
}
func (raw *data) path(subsystem string) (string, error) { func (raw *data) path(subsystem string) (string, error) {
// If the cgroup name/path is absolute do not look relative to the cgroup of the init process. // If the cgroup name/path is absolute do not look relative to the cgroup of the init process.
if filepath.IsAbs(raw.cgroup) { if filepath.IsAbs(raw.cgroup) {
@ -234,13 +209,6 @@ func (raw *data) join(subsystem string) (string, error) {
return path, nil return path, nil
} }
func (raw *data) Cleanup() error {
for _, sys := range subsystems {
sys.Remove(raw)
}
return nil
}
func writeFile(dir, file, data string) error { func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700) return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
} }

View File

@ -14,17 +14,11 @@ type CpusetGroup struct {
} }
func (s *CpusetGroup) Set(d *data) error { func (s *CpusetGroup) Set(d *data) error {
// we don't want to join this cgroup unless it is specified
if d.c.CpusetCpus != "" {
dir, err := d.path("cpuset") dir, err := d.path("cpuset")
if err != nil { if err != nil {
return err return err
} }
return s.SetDir(dir, d.c.CpusetCpus, d.c.CpusetMems, d.pid)
return s.SetDir(dir, d.c.CpusetCpus, d.pid)
}
return nil
} }
func (s *CpusetGroup) Remove(d *data) error { func (s *CpusetGroup) Remove(d *data) error {
@ -35,7 +29,7 @@ func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error {
return nil return nil
} }
func (s *CpusetGroup) SetDir(dir, value string, pid int) error { func (s *CpusetGroup) SetDir(dir, cpus string, mems string, pid int) error {
if err := s.ensureParent(dir); err != nil { if err := s.ensureParent(dir); err != nil {
return err return err
} }
@ -46,9 +40,18 @@ func (s *CpusetGroup) SetDir(dir, value string, pid int) error {
return err return err
} }
if err := writeFile(dir, "cpuset.cpus", value); err != nil { // If we don't use --cpuset-xxx, the default value inherit from parent cgroup
// is set in s.ensureParent, otherwise, use the value we set
if cpus != "" {
if err := writeFile(dir, "cpuset.cpus", cpus); err != nil {
return err return err
} }
}
if mems != "" {
if err := writeFile(dir, "cpuset.mems", mems); err != nil {
return err
}
}
return nil return nil
} }

View File

@ -57,7 +57,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} else if value != 0 { } else if value != 0 {
t.Fatalf("Expected %d to equal %f", value, 0) t.Fatalf("Expected %d to equal %d", value, 0)
} }
// Success with negative values lesser than min int64 // Success with negative values lesser than min int64
@ -70,7 +70,7 @@ func TestGetCgroupParamsInt(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} else if value != 0 { } else if value != 0 {
t.Fatalf("Expected %d to equal %f", value, 0) t.Fatalf("Expected %d to equal %d", value, 0)
} }
// Not a float. // Not a float.

View File

@ -27,7 +27,7 @@ type CpuUsage struct {
type CpuStats struct { type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"` CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throlling_data,omitempty"` ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
} }
type MemoryStats struct { type MemoryStats struct {

View File

@ -12,7 +12,7 @@ func UseSystemd() bool {
return false return false
} }
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
return nil, fmt.Errorf("Systemd not supported") return nil, fmt.Errorf("Systemd not supported")
} }
@ -27,7 +27,3 @@ func ApplyDevices(c *cgroups.Cgroup, pid int) error {
func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error { func Freeze(c *cgroups.Cgroup, state cgroups.FreezerState) error {
return fmt.Errorf("Systemd not supported") return fmt.Errorf("Systemd not supported")
} }
func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
return nil, fmt.Errorf("Systemd not supported")
}

View File

@ -31,18 +31,15 @@ var (
connLock sync.Mutex connLock sync.Mutex
theConn *systemd.Conn theConn *systemd.Conn
hasStartTransientUnit bool hasStartTransientUnit bool
subsystems = map[string]subsystem{
"devices": &fs.DevicesGroup{},
"memory": &fs.MemoryGroup{},
"cpu": &fs.CpuGroup{},
"cpuset": &fs.CpusetGroup{},
"cpuacct": &fs.CpuacctGroup{},
"blkio": &fs.BlkioGroup{},
"perf_event": &fs.PerfEventGroup{},
"freezer": &fs.FreezerGroup{},
}
) )
func newProp(name string, units interface{}) systemd.Property {
return systemd.Property{
Name: name,
Value: dbus.MakeVariant(units),
}
}
func UseSystemd() bool { func UseSystemd() bool {
s, err := os.Stat("/run/systemd/system") s, err := os.Stat("/run/systemd/system")
if err != nil || !s.IsDir() { if err != nil || !s.IsDir() {
@ -84,7 +81,7 @@ func getIfaceForUnit(unitName string) string {
return "Unit" return "Unit"
} }
func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) { func Apply(c *cgroups.Cgroup, pid int) (map[string]string, error) {
var ( var (
unitName = getUnitName(c) unitName = getUnitName(c)
slice = "system.slice" slice = "system.slice"
@ -99,27 +96,27 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
} }
properties = append(properties, properties = append(properties,
systemd.Property{"Slice", dbus.MakeVariant(slice)}, systemd.PropSlice(slice),
systemd.Property{"Description", dbus.MakeVariant("docker container " + c.Name)}, systemd.PropDescription("docker container "+c.Name),
systemd.Property{"PIDs", dbus.MakeVariant([]uint32{uint32(pid)})}, newProp("PIDs", []uint32{uint32(pid)}),
) )
// Always enable accounting, this gets us the same behaviour as the fs implementation, // Always enable accounting, this gets us the same behaviour as the fs implementation,
// plus the kernel has some problems with joining the memory cgroup at a later time. // plus the kernel has some problems with joining the memory cgroup at a later time.
properties = append(properties, properties = append(properties,
systemd.Property{"MemoryAccounting", dbus.MakeVariant(true)}, newProp("MemoryAccounting", true),
systemd.Property{"CPUAccounting", dbus.MakeVariant(true)}, newProp("CPUAccounting", true),
systemd.Property{"BlockIOAccounting", dbus.MakeVariant(true)}) newProp("BlockIOAccounting", true))
if c.Memory != 0 { if c.Memory != 0 {
properties = append(properties, properties = append(properties,
systemd.Property{"MemoryLimit", dbus.MakeVariant(uint64(c.Memory))}) newProp("MemoryLimit", uint64(c.Memory)))
} }
// TODO: MemoryReservation and MemorySwap not available in systemd // TODO: MemoryReservation and MemorySwap not available in systemd
if c.CpuShares != 0 { if c.CpuShares != 0 {
properties = append(properties, properties = append(properties,
systemd.Property{"CPUShares", dbus.MakeVariant(uint64(c.CpuShares))}) newProp("CPUShares", uint64(c.CpuShares)))
} }
if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil { if _, err := theConn.StartTransientUnit(unitName, "replace", properties...); err != nil {
@ -140,57 +137,42 @@ func Apply(c *cgroups.Cgroup, pid int) (cgroups.ActiveCgroup, error) {
} }
// we need to manually join the freezer cgroup in systemd because it does not currently support it // we need to manually join the freezer and cpuset cgroup in systemd
// via the dbus api // because it does not currently support it via the dbus api.
if err := joinFreezer(c, pid); err != nil { if err := joinFreezer(c, pid); err != nil {
return nil, err return nil, err
} }
if c.CpusetCpus != "" {
if err := joinCpuset(c, pid); err != nil { if err := joinCpuset(c, pid); err != nil {
return nil, err return nil, err
} }
}
return res, nil
}
func writeFile(dir, file, data string) error {
return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
}
func (c *systemdCgroup) Paths() (map[string]string, error) {
paths := make(map[string]string) paths := make(map[string]string)
for _, sysname := range []string{
for sysname := range subsystems { "devices",
subsystemPath, err := getSubsystemPath(c.cgroup, sysname) "memory",
"cpu",
"cpuset",
"cpuacct",
"blkio",
"perf_event",
"freezer",
} {
subsystemPath, err := getSubsystemPath(res.cgroup, sysname)
if err != nil { if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem // Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) { if cgroups.IsNotFound(err) {
continue continue
} }
return nil, err return nil, err
} }
paths[sysname] = subsystemPath paths[sysname] = subsystemPath
} }
return paths, nil return paths, nil
} }
func (c *systemdCgroup) Cleanup() error { func writeFile(dir, file, data string) error {
// systemd cleans up, we don't need to do much return ioutil.WriteFile(filepath.Join(dir, file), []byte(data), 0700)
paths, err := c.Paths()
if err != nil {
return err
}
for _, path := range paths {
os.RemoveAll(path)
}
return nil
} }
func joinFreezer(c *cgroups.Cgroup, pid int) error { func joinFreezer(c *cgroups.Cgroup, pid int) error {
@ -260,35 +242,6 @@ func getUnitName(c *cgroups.Cgroup) string {
return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name) return fmt.Sprintf("%s-%s.scope", c.Parent, c.Name)
} }
/*
* This would be nicer to get from the systemd API when accounting
* is enabled, but sadly there is no way to do that yet.
* The lack of this functionality in the API & the approach taken
* is guided by
* http://www.freedesktop.org/wiki/Software/systemd/ControlGroupInterface/#readingaccountinginformation.
*/
func GetStats(c *cgroups.Cgroup) (*cgroups.Stats, error) {
stats := cgroups.NewStats()
for sysname, sys := range subsystems {
subsystemPath, err := getSubsystemPath(c, sysname)
if err != nil {
// Don't fail if a cgroup hierarchy was not found, just skip this subsystem
if cgroups.IsNotFound(err) {
continue
}
return nil, err
}
if err := sys.GetStats(subsystemPath, stats); err != nil {
return nil, err
}
}
return stats, nil
}
// Atm we can't use the systemd device support because of two missing things: // Atm we can't use the systemd device support because of two missing things:
// * Support for wildcards to allow mknod on any device // * Support for wildcards to allow mknod on any device
// * Support for wildcards to allow /dev/pts support // * Support for wildcards to allow /dev/pts support
@ -360,5 +313,5 @@ func joinCpuset(c *cgroups.Cgroup, pid int) error {
s := &fs.CpusetGroup{} s := &fs.CpusetGroup{}
return s.SetDir(path, c.CpusetCpus, pid) return s.SetDir(path, c.CpusetCpus, c.CpusetMems, pid)
} }

View File

@ -189,6 +189,17 @@ func EnterPid(cgroupPaths map[string]string, pid int) error {
} }
} }
} }
return nil return nil
} }
// RemovePaths iterates over the provided paths removing them.
// If an error is encountered the removal proceeds and the first error is
// returned to ensure a partial removal is not possible.
func RemovePaths(paths map[string]string) (err error) {
for _, path := range paths {
if rerr := os.RemoveAll(path); err == nil {
err = rerr
}
}
return err
}

View File

@ -10,6 +10,13 @@ type MountConfig mount.MountConfig
type Network network.Network type Network network.Network
// Namespace defines configuration for each namespace. It specifies an
// alternate path that is able to be joined via setns.
type Namespace struct {
Name string `json:"name"`
Path string `json:"path,omitempty"`
}
// Config defines configuration options for executing a process inside a contained environment. // Config defines configuration options for executing a process inside a contained environment.
type Config struct { type Config struct {
// Mount specific options. // Mount specific options.
@ -38,7 +45,7 @@ type Config struct {
// Namespaces specifies the container's namespaces that it should setup when cloning the init process // Namespaces specifies the container's namespaces that it should setup when cloning the init process
// If a namespace is not provided that namespace is shared from the container's parent process // If a namespace is not provided that namespace is shared from the container's parent process
Namespaces map[string]bool `json:"namespaces,omitempty"` Namespaces []Namespace `json:"namespaces,omitempty"`
// Capabilities specify the capabilities to keep when executing the process inside the container // Capabilities specify the capabilities to keep when executing the process inside the container
// All capbilities not specified will be dropped from the processes capability mask // All capbilities not specified will be dropped from the processes capability mask
@ -65,6 +72,10 @@ type Config struct {
// RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and // RestrictSys will remount /proc/sys, /sys, and mask over sysrq-trigger as well as /proc/irq and
// /proc/bus // /proc/bus
RestrictSys bool `json:"restrict_sys,omitempty"` RestrictSys bool `json:"restrict_sys,omitempty"`
// Rlimits specifies the resource limits, such as max open files, to set in the container
// If Rlimits are not set, the container will inherit rlimits from the parent process
Rlimits []Rlimit `json:"rlimits,omitempty"`
} }
// Routes can be specified to create entries in the route table as the container is started // Routes can be specified to create entries in the route table as the container is started
@ -87,3 +98,9 @@ type Route struct {
// The device to set this route up for, for example: eth0 // The device to set this route up for, for example: eth0
InterfaceName string `json:"interface_name,omitempty"` InterfaceName string `json:"interface_name,omitempty"`
} }
type Rlimit struct {
Type int `json:"type,omitempty"`
Hard uint64 `json:"hard,omitempty"`
Soft uint64 `json:"soft,omitempty"`
}

View File

@ -64,12 +64,12 @@ func TestConfigJsonFormat(t *testing.T) {
t.Fail() t.Fail()
} }
if !container.Namespaces["NEWNET"] { if getNamespaceIndex(container, "NEWNET") == -1 {
t.Log("namespaces should contain NEWNET") t.Log("namespaces should contain NEWNET")
t.Fail() t.Fail()
} }
if container.Namespaces["NEWUSER"] { if getNamespaceIndex(container, "NEWUSER") != -1 {
t.Log("namespaces should not contain NEWUSER") t.Log("namespaces should not contain NEWUSER")
t.Fail() t.Fail()
} }
@ -158,3 +158,12 @@ func TestSelinuxLabels(t *testing.T) {
t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel) t.Fatalf("expected mount label %q but received %q", label, container.MountConfig.MountLabel)
} }
} }
func getNamespaceIndex(config *Config, name string) int {
for i, v := range config.Namespaces {
if v.Name == name {
return i
}
}
return -1
}

View File

@ -103,7 +103,7 @@ func getDeviceNodes(path string) ([]*Device, error) {
switch { switch {
case f.IsDir(): case f.IsDir():
switch f.Name() { switch f.Name() {
case "pts", "shm", "fd": case "pts", "shm", "fd", "mqueue":
continue continue
default: default:
sub, err := getDeviceNodes(filepath.Join(path, f.Name())) sub, err := getDeviceNodes(filepath.Join(path, f.Name()))

View File

@ -1,8 +1,11 @@
package integration package integration
import ( import (
"os"
"strings" "strings"
"testing" "testing"
"github.com/docker/libcontainer"
) )
func TestExecPS(t *testing.T) { func TestExecPS(t *testing.T) {
@ -36,3 +39,152 @@ func TestExecPS(t *testing.T) {
t.Fatalf("expected output %q but received %q", expected, actual) t.Fatalf("expected output %q but received %q", expected, actual)
} }
} }
func TestIPCPrivate(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
l, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
config := newTemplateConfig(rootfs)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
}
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual == l {
t.Fatalf("ipc link should be private to the conatiner but equals host %q %q", actual, l)
}
}
func TestIPCHost(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
l, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces = append(config.Namespaces[:i], config.Namespaces[i+1:]...)
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
}
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
t.Fatalf("ipc link not equal to host link %q %q", actual, l)
}
}
func TestIPCJoinPath(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
l, err := os.Readlink("/proc/1/ns/ipc")
if err != nil {
t.Fatal(err)
}
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces[i].Path = "/proc/1/ns/ipc"
buffers, exitCode, err := runContainer(config, "", "readlink", "/proc/self/ns/ipc")
if err != nil {
t.Fatal(err)
}
if exitCode != 0 {
t.Fatalf("exit code not 0. code %d stderr %q", exitCode, buffers.Stderr)
}
if actual := strings.Trim(buffers.Stdout.String(), "\n"); actual != l {
t.Fatalf("ipc link not equal to host link %q %q", actual, l)
}
}
func TestIPCBadPath(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
i := getNamespaceIndex(config, "NEWIPC")
config.Namespaces[i].Path = "/proc/1/ns/ipcc"
_, _, err = runContainer(config, "", "true")
if err == nil {
t.Fatal("container succeded with bad ipc path")
}
}
func TestRlimit(t *testing.T) {
if testing.Short() {
return
}
rootfs, err := newRootFs()
if err != nil {
t.Fatal(err)
}
defer remove(rootfs)
config := newTemplateConfig(rootfs)
out, _, err := runContainer(config, "", "/bin/sh", "-c", "ulimit -n")
if err != nil {
t.Fatal(err)
}
if limit := strings.TrimSpace(out.Stdout.String()); limit != "1024" {
t.Fatalf("expected rlimit to be 1024, got %s", limit)
}
}
func getNamespaceIndex(config *libcontainer.Config, name string) int {
for i, v := range config.Namespaces {
if v.Name == name {
return i
}
}
return -1
}

View File

@ -6,7 +6,6 @@ import (
"runtime" "runtime"
"github.com/docker/libcontainer/namespaces" "github.com/docker/libcontainer/namespaces"
"github.com/docker/libcontainer/syncpipe"
) )
// init runs the libcontainer initialization code because of the busybox style needs // init runs the libcontainer initialization code because of the busybox style needs
@ -27,12 +26,7 @@ func init() {
log.Fatal(err) log.Fatal(err)
} }
syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3) if err := namespaces.Init(container, rootfs, "", os.NewFile(3, "pipe"), os.Args[3:]); err != nil {
if err != nil {
log.Fatalf("unable to create sync pipe: %s", err)
}
if err := namespaces.Init(container, rootfs, "", syncPipe, os.Args[3:]); err != nil {
log.Fatalf("unable to initialize for container: %s", err) log.Fatalf("unable to initialize for container: %s", err)
} }
os.Exit(1) os.Exit(1)

View File

@ -1,6 +1,8 @@
package integration package integration
import ( import (
"syscall"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/devices" "github.com/docker/libcontainer/devices"
@ -30,12 +32,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
"KILL", "KILL",
"AUDIT_WRITE", "AUDIT_WRITE",
}, },
Namespaces: map[string]bool{ Namespaces: []libcontainer.Namespace{
"NEWNS": true, {Name: "NEWNS"},
"NEWUTS": true, {Name: "NEWUTS"},
"NEWIPC": true, {Name: "NEWIPC"},
"NEWPID": true, {Name: "NEWPID"},
"NEWNET": true, {Name: "NEWNET"},
}, },
Cgroups: &cgroups.Cgroup{ Cgroups: &cgroups.Cgroup{
Parent: "integration", Parent: "integration",
@ -60,5 +62,12 @@ func newTemplateConfig(rootfs string) *libcontainer.Config {
Gateway: "localhost", Gateway: "localhost",
}, },
}, },
Rlimits: []libcontainer.Rlimit{
{
Type: syscall.RLIMIT_NOFILE,
Hard: uint64(1024),
Soft: uint64(1024),
},
},
} }
} }

View File

@ -25,6 +25,10 @@ func SetFileLabel(path string, fileLabel string) error {
return nil return nil
} }
func SetFileCreateLabel(fileLabel string) error {
return nil
}
func Relabel(path string, fileLabel string, relabel string) error { func Relabel(path string, fileLabel string, relabel string) error {
return nil return nil
} }
@ -43,3 +47,15 @@ func ReserveLabel(label string) error {
func UnreserveLabel(label string) error { func UnreserveLabel(label string) error {
return nil return nil
} }
// DupSecOpt takes an process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return nil
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return nil
}

View File

@ -17,7 +17,6 @@ func InitLabels(options []string) (string, string, error) {
if !selinux.SelinuxEnabled() { if !selinux.SelinuxEnabled() {
return "", "", nil return "", "", nil
} }
var err error
processLabel, mountLabel := selinux.GetLxcContexts() processLabel, mountLabel := selinux.GetLxcContexts()
if processLabel != "" { if processLabel != "" {
pcon := selinux.NewContext(processLabel) pcon := selinux.NewContext(processLabel)
@ -38,7 +37,7 @@ func InitLabels(options []string) (string, string, error) {
processLabel = pcon.Get() processLabel = pcon.Get()
mountLabel = mcon.Get() mountLabel = mcon.Get()
} }
return processLabel, mountLabel, err return processLabel, mountLabel, nil
} }
// DEPRECATED: The GenLabels function is only to be used during the transition to the official API. // DEPRECATED: The GenLabels function is only to be used during the transition to the official API.
@ -88,6 +87,14 @@ func SetFileLabel(path string, fileLabel string) error {
return nil return nil
} }
// Tell the kernel the label for all files to be created
func SetFileCreateLabel(fileLabel string) error {
if selinux.SelinuxEnabled() {
return selinux.Setfscreatecon(fileLabel)
}
return nil
}
// Change the label of path to the filelabel string. If the relabel string // Change the label of path to the filelabel string. If the relabel string
// is "z", relabel will change the MCS label to s0. This will allow all // is "z", relabel will change the MCS label to s0. This will allow all
// containers to share the content. If the relabel string is a "Z" then // containers to share the content. If the relabel string is a "Z" then
@ -130,3 +137,15 @@ func UnreserveLabel(label string) error {
selinux.FreeLxcContexts(label) selinux.FreeLxcContexts(label)
return nil return nil
} }
// DupSecOpt takes an process label and returns security options that
// can be used to set duplicate labels on future container processes
func DupSecOpt(src string) []string {
return selinux.DupSecOpt(src)
}
// DisableSecOpt returns a security opt that can disable labeling
// support for future container processes
func DisableSecOpt() []string {
return selinux.DisableSecOpt()
}

View File

@ -3,6 +3,7 @@
package label package label
import ( import (
"strings"
"testing" "testing"
"github.com/docker/libcontainer/selinux" "github.com/docker/libcontainer/selinux"
@ -33,7 +34,7 @@ func TestInit(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
if plabel != "user_u:user_r:user_t:s0:c1,c15" || mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15" { if plabel != "user_u:user_r:user_t:s0:c1,c15" || mlabel != "user_u:object_r:svirt_sandbox_file_t:s0:c1,c15" {
t.Log("InitLabels User Failed") t.Log("InitLabels User Match Failed")
t.Log(plabel, mlabel) t.Log(plabel, mlabel)
t.Fatal(err) t.Fatal(err)
} }
@ -46,3 +47,43 @@ func TestInit(t *testing.T) {
} }
} }
} }
func TestDuplicateLabel(t *testing.T) {
secopt := DupSecOpt("system_u:system_r:svirt_lxc_net_t:s0:c1,c2")
t.Log(secopt)
for _, opt := range secopt {
con := strings.SplitN(opt, ":", 3)
if len(con) != 3 || con[0] != "label" {
t.Errorf("Invalid DupSecOpt return value")
continue
}
if con[1] == "user" {
if con[2] != "system_u" {
t.Errorf("DupSecOpt Failed user incorrect")
}
continue
}
if con[1] == "role" {
if con[2] != "system_r" {
t.Errorf("DupSecOpt Failed role incorrect")
}
continue
}
if con[1] == "type" {
if con[2] != "svirt_lxc_net_t" {
t.Errorf("DupSecOpt Failed type incorrect")
}
continue
}
if con[1] == "level" {
if con[2] != "s0:c1,c2" {
t.Errorf("DupSecOpt Failed level incorrect")
}
continue
}
t.Errorf("DupSecOpt Failed invalid field %q", con[1])
}
secopt = DisableSecOpt()
if secopt[0] != "label:disable" {
t.Errorf("DisableSecOpt Failed level incorrect")
}
}

View File

@ -97,7 +97,7 @@ func InitializeMountNamespace(rootfs, console string, sysReadonly bool, mountCon
return nil return nil
} }
// mountSystem sets up linux specific system mounts like sys, proc, shm, and devpts // mountSystem sets up linux specific system mounts like mqueue, sys, proc, shm, and devpts
// inside the mount namespace // inside the mount namespace
func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error { func mountSystem(rootfs string, sysReadonly bool, mountConfig *MountConfig) error {
for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) { for _, m := range newSystemMounts(rootfs, mountConfig.MountLabel, sysReadonly) {
@ -168,6 +168,7 @@ func newSystemMounts(rootfs, mountLabel string, sysReadonly bool) []mount {
{source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags}, {source: "proc", path: filepath.Join(rootfs, "proc"), device: "proc", flags: defaultMountFlags},
{source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)}, {source: "tmpfs", path: filepath.Join(rootfs, "dev"), device: "tmpfs", flags: syscall.MS_NOSUID | syscall.MS_STRICTATIME, data: label.FormatMountLabel("mode=755", mountLabel)},
{source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)}, {source: "shm", path: filepath.Join(rootfs, "dev", "shm"), device: "tmpfs", flags: defaultMountFlags, data: label.FormatMountLabel("mode=1777,size=65536k", mountLabel)},
{source: "mqueue", path: filepath.Join(rootfs, "dev", "mqueue"), device: "mqueue", flags: defaultMountFlags},
{source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)}, {source: "devpts", path: filepath.Join(rootfs, "dev", "pts"), device: "devpts", flags: syscall.MS_NOSUID | syscall.MS_NOEXEC, data: label.FormatMountLabel("newinstance,ptmxmode=0666,mode=620,gid=5", mountLabel)},
} }

View File

@ -3,6 +3,7 @@
package namespaces package namespaces
import ( import (
"encoding/json"
"io" "io"
"os" "os"
"os/exec" "os/exec"
@ -13,7 +14,6 @@ import (
"github.com/docker/libcontainer/cgroups/fs" "github.com/docker/libcontainer/cgroups/fs"
"github.com/docker/libcontainer/cgroups/systemd" "github.com/docker/libcontainer/cgroups/systemd"
"github.com/docker/libcontainer/network" "github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/syncpipe"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
) )
@ -22,19 +22,17 @@ import (
// Exec performs setup outside of a namespace so that a container can be // Exec performs setup outside of a namespace so that a container can be
// executed. Exec is a high level function for working with container namespaces. // executed. Exec is a high level function for working with container namespaces.
func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) { func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Writer, console, dataPath string, args []string, createCommand CreateCommand, startCallback func()) (int, error) {
var ( var err error
err error
)
// create a pipe so that we can syncronize with the namespaced process and // create a pipe so that we can syncronize with the namespaced process and
// pass the veth name to the child // pass the state and configuration to the child process
syncPipe, err := syncpipe.NewSyncPipe() parent, child, err := newInitPipe()
if err != nil { if err != nil {
return -1, err return -1, err
} }
defer syncPipe.Close() defer parent.Close()
command := createCommand(container, console, dataPath, os.Args[0], syncPipe.Child(), args) command := createCommand(container, console, dataPath, os.Args[0], child, args)
// Note: these are only used in non-tty mode // Note: these are only used in non-tty mode
// if there is a tty for the container it will be opened within the namespace and the // if there is a tty for the container it will be opened within the namespace and the
// fds will be duped to stdin, stdiout, and stderr // fds will be duped to stdin, stdiout, and stderr
@ -43,39 +41,42 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
command.Stderr = stderr command.Stderr = stderr
if err := command.Start(); err != nil { if err := command.Start(); err != nil {
child.Close()
return -1, err return -1, err
} }
child.Close()
// Now we passed the pipe to the child, close our side terminate := func(terr error) (int, error) {
syncPipe.CloseChild() // TODO: log the errors for kill and wait
command.Process.Kill()
command.Wait()
return -1, terr
}
started, err := system.GetProcessStartTime(command.Process.Pid) started, err := system.GetProcessStartTime(command.Process.Pid)
if err != nil { if err != nil {
return -1, err return terminate(err)
} }
// Do this before syncing with child so that no children // Do this before syncing with child so that no children
// can escape the cgroup // can escape the cgroup
cgroupRef, err := SetupCgroups(container, command.Process.Pid) cgroupPaths, err := SetupCgroups(container, command.Process.Pid)
if err != nil { if err != nil {
command.Process.Kill() return terminate(err)
command.Wait()
return -1, err
}
defer cgroupRef.Cleanup()
cgroupPaths, err := cgroupRef.Paths()
if err != nil {
command.Process.Kill()
command.Wait()
return -1, err
} }
defer cgroups.RemovePaths(cgroupPaths)
var networkState network.NetworkState var networkState network.NetworkState
if err := InitializeNetworking(container, command.Process.Pid, syncPipe, &networkState); err != nil { if err := InitializeNetworking(container, command.Process.Pid, &networkState); err != nil {
command.Process.Kill() return terminate(err)
command.Wait() }
return -1, err // send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(parent).Encode(networkState); err != nil {
return terminate(err)
}
// shutdown writes for the parent side of the pipe
if err := syscall.Shutdown(int(parent.Fd()), syscall.SHUT_WR); err != nil {
return terminate(err)
} }
state := &libcontainer.State{ state := &libcontainer.State{
@ -86,17 +87,18 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
} }
if err := libcontainer.SaveState(dataPath, state); err != nil { if err := libcontainer.SaveState(dataPath, state); err != nil {
command.Process.Kill() return terminate(err)
command.Wait()
return -1, err
} }
defer libcontainer.DeleteState(dataPath) defer libcontainer.DeleteState(dataPath)
// Sync with child // wait for the child process to fully complete and receive an error message
if err := syncPipe.ReadFromChild(); err != nil { // if one was encoutered
command.Process.Kill() var ierr *initError
command.Wait() if err := json.NewDecoder(parent).Decode(&ierr); err != nil && err != io.EOF {
return -1, err return terminate(err)
}
if ierr != nil {
return terminate(ierr)
} }
if startCallback != nil { if startCallback != nil {
@ -108,7 +110,6 @@ func Exec(container *libcontainer.Config, stdin io.Reader, stdout, stderr io.Wri
return -1, err return -1, err
} }
} }
return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil return command.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
} }
@ -129,16 +130,6 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini
"data_path=" + dataPath, "data_path=" + dataPath,
} }
/*
TODO: move user and wd into env
if user != "" {
env = append(env, "user="+user)
}
if workingDir != "" {
env = append(env, "wd="+workingDir)
}
*/
command := exec.Command(init, append([]string{"init", "--"}, args...)...) command := exec.Command(init, append([]string{"init", "--"}, args...)...)
// make sure the process is executed inside the context of the rootfs // make sure the process is executed inside the context of the rootfs
command.Dir = container.RootFs command.Dir = container.RootFs
@ -157,23 +148,20 @@ func DefaultCreateCommand(container *libcontainer.Config, console, dataPath, ini
// SetupCgroups applies the cgroup restrictions to the process running in the container based // SetupCgroups applies the cgroup restrictions to the process running in the container based
// on the container's configuration // on the container's configuration
func SetupCgroups(container *libcontainer.Config, nspid int) (cgroups.ActiveCgroup, error) { func SetupCgroups(container *libcontainer.Config, nspid int) (map[string]string, error) {
if container.Cgroups != nil { if container.Cgroups != nil {
c := container.Cgroups c := container.Cgroups
if systemd.UseSystemd() { if systemd.UseSystemd() {
return systemd.Apply(c, nspid) return systemd.Apply(c, nspid)
} }
return fs.Apply(c, nspid) return fs.Apply(c, nspid)
} }
return map[string]string{}, nil
return nil, nil
} }
// InitializeNetworking creates the container's network stack outside of the namespace and moves // InitializeNetworking creates the container's network stack outside of the namespace and moves
// interfaces into the container's net namespaces if necessary // interfaces into the container's net namespaces if necessary
func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncpipe.SyncPipe, networkState *network.NetworkState) error { func InitializeNetworking(container *libcontainer.Config, nspid int, networkState *network.NetworkState) error {
for _, config := range container.Networks { for _, config := range container.Networks {
strategy, err := network.GetStrategy(config.Type) strategy, err := network.GetStrategy(config.Type)
if err != nil { if err != nil {
@ -183,18 +171,5 @@ func InitializeNetworking(container *libcontainer.Config, nspid int, pipe *syncp
return err return err
} }
} }
return pipe.SendToChild(networkState) return nil
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces map[string]bool) (flag int) {
for key, enabled := range namespaces {
if enabled {
if ns := GetNamespace(key); ns != nil {
flag |= ns.Value
}
}
}
return flag
} }

View File

@ -3,6 +3,7 @@
package namespaces package namespaces
import ( import (
"encoding/json"
"fmt" "fmt"
"io" "io"
"os" "os"
@ -15,7 +16,6 @@ import (
"github.com/docker/libcontainer/apparmor" "github.com/docker/libcontainer/apparmor"
"github.com/docker/libcontainer/cgroups" "github.com/docker/libcontainer/cgroups"
"github.com/docker/libcontainer/label" "github.com/docker/libcontainer/label"
"github.com/docker/libcontainer/syncpipe"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
) )
@ -41,11 +41,11 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
} }
} }
pipe, err := syncpipe.NewSyncPipe() parent, child, err := newInitPipe()
if err != nil { if err != nil {
return -1, err return -1, err
} }
defer pipe.Close() defer parent.Close()
// Note: these are only used in non-tty mode // Note: these are only used in non-tty mode
// if there is a tty for the container it will be opened within the namespace and the // if there is a tty for the container it will be opened within the namespace and the
@ -53,23 +53,28 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
cmd.Stdin = stdin cmd.Stdin = stdin
cmd.Stdout = stdout cmd.Stdout = stdout
cmd.Stderr = stderr cmd.Stderr = stderr
cmd.ExtraFiles = []*os.File{child}
cmd.ExtraFiles = []*os.File{pipe.Child()}
if err := cmd.Start(); err != nil { if err := cmd.Start(); err != nil {
child.Close()
return -1, err return -1, err
} }
pipe.CloseChild() child.Close()
terminate := func(terr error) (int, error) {
// TODO: log the errors for kill and wait
cmd.Process.Kill()
cmd.Wait()
return -1, terr
}
// Enter cgroups. // Enter cgroups.
if err := EnterCgroups(state, cmd.Process.Pid); err != nil { if err := EnterCgroups(state, cmd.Process.Pid); err != nil {
return -1, err return terminate(err)
} }
if err := pipe.SendToChild(container); err != nil { if err := json.NewEncoder(parent).Encode(container); err != nil {
cmd.Process.Kill() return terminate(err)
cmd.Wait()
return -1, err
} }
if startCallback != nil { if startCallback != nil {
@ -81,7 +86,6 @@ func ExecIn(container *libcontainer.Config, state *libcontainer.State, userArgs
return -1, err return -1, err
} }
} }
return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil return cmd.ProcessState.Sys().(syscall.WaitStatus).ExitStatus(), nil
} }
@ -107,7 +111,7 @@ func FinalizeSetns(container *libcontainer.Config, args []string) error {
} }
} }
if err := system.Execv(args[0], args[0:], container.Env); err != nil { if err := system.Execv(args[0], args[0:], os.Environ()); err != nil {
return err return err
} }

View File

@ -3,7 +3,9 @@
package namespaces package namespaces
import ( import (
"encoding/json"
"fmt" "fmt"
"io/ioutil"
"os" "os"
"strings" "strings"
"syscall" "syscall"
@ -17,7 +19,6 @@ import (
"github.com/docker/libcontainer/network" "github.com/docker/libcontainer/network"
"github.com/docker/libcontainer/security/capabilities" "github.com/docker/libcontainer/security/capabilities"
"github.com/docker/libcontainer/security/restrict" "github.com/docker/libcontainer/security/restrict"
"github.com/docker/libcontainer/syncpipe"
"github.com/docker/libcontainer/system" "github.com/docker/libcontainer/system"
"github.com/docker/libcontainer/user" "github.com/docker/libcontainer/user"
"github.com/docker/libcontainer/utils" "github.com/docker/libcontainer/utils"
@ -29,11 +30,22 @@ import (
// and other options required for the new container. // and other options required for the new container.
// The caller of Init function has to ensure that the go runtime is locked to an OS thread // The caller of Init function has to ensure that the go runtime is locked to an OS thread
// (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended. // (using runtime.LockOSThread) else system calls like setns called within Init may not work as intended.
func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syncPipe *syncpipe.SyncPipe, args []string) (err error) { func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, pipe *os.File, args []string) (err error) {
defer func() { defer func() {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
if err != nil { if err != nil {
syncPipe.ReportChildError(err) // ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(initError{
Message: err.Error(),
}); err != nil {
panic(err)
} }
}
// ensure that this pipe is always closed
pipe.Close()
}() }()
rootfs, err := utils.ResolveRootfs(uncleanRootfs) rootfs, err := utils.ResolveRootfs(uncleanRootfs)
@ -49,10 +61,13 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
// We always read this as it is a way to sync with the parent as well // We always read this as it is a way to sync with the parent as well
var networkState *network.NetworkState var networkState *network.NetworkState
if err := syncPipe.ReadFromParent(&networkState); err != nil { if err := json.NewDecoder(pipe).Decode(&networkState); err != nil {
return err
}
// join any namespaces via a path to the namespace fd if provided
if err := joinExistingNamespaces(container.Namespaces); err != nil {
return err return err
} }
if consolePath != "" { if consolePath != "" {
if err := console.OpenAndDup(consolePath); err != nil { if err := console.OpenAndDup(consolePath); err != nil {
return err return err
@ -66,6 +81,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
return fmt.Errorf("setctty %s", err) return fmt.Errorf("setctty %s", err)
} }
} }
if err := setupNetwork(container, networkState); err != nil { if err := setupNetwork(container, networkState); err != nil {
return fmt.Errorf("setup networking %s", err) return fmt.Errorf("setup networking %s", err)
} }
@ -73,6 +89,10 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
return fmt.Errorf("setup route %s", err) return fmt.Errorf("setup route %s", err)
} }
if err := setupRlimits(container); err != nil {
return fmt.Errorf("setup rlimits %s", err)
}
label.Init() label.Init()
if err := mount.InitializeMountNamespace(rootfs, if err := mount.InitializeMountNamespace(rootfs,
@ -84,7 +104,7 @@ func Init(container *libcontainer.Config, uncleanRootfs, consolePath string, syn
if container.Hostname != "" { if container.Hostname != "" {
if err := syscall.Sethostname([]byte(container.Hostname)); err != nil { if err := syscall.Sethostname([]byte(container.Hostname)); err != nil {
return fmt.Errorf("sethostname %s", err) return fmt.Errorf("unable to sethostname %q: %s", container.Hostname, err)
} }
} }
@ -151,26 +171,43 @@ func RestoreParentDeathSignal(old int) error {
// SetupUser changes the groups, gid, and uid for the user inside the container // SetupUser changes the groups, gid, and uid for the user inside the container
func SetupUser(u string) error { func SetupUser(u string) error {
uid, gid, suppGids, home, err := user.GetUserGroupSupplementaryHome(u, syscall.Getuid(), syscall.Getgid(), "/") // Set up defaults.
defaultExecUser := user.ExecUser{
Uid: syscall.Getuid(),
Gid: syscall.Getgid(),
Home: "/",
}
passwdFile, err := user.GetPasswdFile()
if err != nil {
return err
}
groupFile, err := user.GetGroupFile()
if err != nil {
return err
}
execUser, err := user.GetExecUserFile(u, &defaultExecUser, passwdFile, groupFile)
if err != nil { if err != nil {
return fmt.Errorf("get supplementary groups %s", err) return fmt.Errorf("get supplementary groups %s", err)
} }
if err := syscall.Setgroups(suppGids); err != nil { if err := syscall.Setgroups(execUser.Sgids); err != nil {
return fmt.Errorf("setgroups %s", err) return fmt.Errorf("setgroups %s", err)
} }
if err := syscall.Setgid(gid); err != nil { if err := system.Setgid(execUser.Gid); err != nil {
return fmt.Errorf("setgid %s", err) return fmt.Errorf("setgid %s", err)
} }
if err := syscall.Setuid(uid); err != nil { if err := system.Setuid(execUser.Uid); err != nil {
return fmt.Errorf("setuid %s", err) return fmt.Errorf("setuid %s", err)
} }
// if we didn't get HOME already, set it based on the user's HOME // if we didn't get HOME already, set it based on the user's HOME
if envHome := os.Getenv("HOME"); envHome == "" { if envHome := os.Getenv("HOME"); envHome == "" {
if err := os.Setenv("HOME", home); err != nil { if err := os.Setenv("HOME", execUser.Home); err != nil {
return fmt.Errorf("set HOME %s", err) return fmt.Errorf("set HOME %s", err)
} }
} }
@ -205,6 +242,16 @@ func setupRoute(container *libcontainer.Config) error {
return nil return nil
} }
func setupRlimits(container *libcontainer.Config) error {
for _, rlimit := range container.Rlimits {
l := &syscall.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}
if err := syscall.Setrlimit(rlimit.Type, l); err != nil {
return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err)
}
}
return nil
}
// FinalizeNamespace drops the caps, sets the correct user // FinalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaky file descriptors // and working dir, and closes any leaky file descriptors
// before execing the command inside the namespace // before execing the command inside the namespace
@ -261,3 +308,22 @@ func LoadContainerEnvironment(container *libcontainer.Config) error {
} }
return nil return nil
} }
// joinExistingNamespaces gets all the namespace paths specified for the container and
// does a setns on the namespace fd so that the current process joins the namespace.
func joinExistingNamespaces(namespaces []libcontainer.Namespace) error {
for _, ns := range namespaces {
if ns.Path != "" {
f, err := os.OpenFile(ns.Path, os.O_RDONLY, 0)
if err != nil {
return err
}
err = system.Setns(f.Fd(), uintptr(namespaceInfo[ns.Name]))
f.Close()
if err != nil {
return err
}
}
}
return nil
}

View File

@ -10,6 +10,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/prctl.h>
#include <sys/types.h> #include <sys/types.h>
#include <unistd.h> #include <unistd.h>
#include <getopt.h> #include <getopt.h>
@ -88,6 +89,11 @@ void nsenter()
return; return;
} }
if (prctl(PR_SET_CHILD_SUBREAPER, 1, 0, 0, 0) == -1) {
fprintf(stderr, "nsenter: failed to set child subreaper: %s", strerror(errno));
exit(1);
}
static const struct option longopts[] = { static const struct option longopts[] = {
{"nspid", required_argument, NULL, 'n'}, {"nspid", required_argument, NULL, 'n'},
{"console", required_argument, NULL, 't'}, {"console", required_argument, NULL, 't'},

View File

@ -1,50 +0,0 @@
package namespaces
import "errors"
type (
Namespace struct {
Key string `json:"key,omitempty"`
Value int `json:"value,omitempty"`
File string `json:"file,omitempty"`
}
Namespaces []*Namespace
)
// namespaceList is used to convert the libcontainer types
// into the names of the files located in /proc/<pid>/ns/* for
// each namespace
var (
namespaceList = Namespaces{}
ErrUnkownNamespace = errors.New("Unknown namespace")
ErrUnsupported = errors.New("Unsupported method")
)
func (ns *Namespace) String() string {
return ns.Key
}
func GetNamespace(key string) *Namespace {
for _, ns := range namespaceList {
if ns.Key == key {
cpy := *ns
return &cpy
}
}
return nil
}
// Contains returns true if the specified Namespace is
// in the slice
func (n Namespaces) Contains(ns string) bool {
return n.Get(ns) != nil
}
func (n Namespaces) Get(ns string) *Namespace {
for _, nsp := range n {
if nsp != nil && nsp.Key == ns {
return nsp
}
}
return nil
}

View File

@ -1,16 +0,0 @@
package namespaces
import (
"syscall"
)
func init() {
namespaceList = Namespaces{
{Key: "NEWNS", Value: syscall.CLONE_NEWNS, File: "mnt"},
{Key: "NEWUTS", Value: syscall.CLONE_NEWUTS, File: "uts"},
{Key: "NEWIPC", Value: syscall.CLONE_NEWIPC, File: "ipc"},
{Key: "NEWUSER", Value: syscall.CLONE_NEWUSER, File: "user"},
{Key: "NEWPID", Value: syscall.CLONE_NEWPID, File: "pid"},
{Key: "NEWNET", Value: syscall.CLONE_NEWNET, File: "net"},
}
}

View File

@ -1,30 +0,0 @@
package namespaces
import (
"testing"
)
func TestNamespacesContains(t *testing.T) {
ns := Namespaces{
GetNamespace("NEWPID"),
GetNamespace("NEWNS"),
GetNamespace("NEWUTS"),
}
if ns.Contains("NEWNET") {
t.Fatal("namespaces should not contain NEWNET")
}
if !ns.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
withNil := Namespaces{
GetNamespace("UNDEFINED"), // this element will be nil
GetNamespace("NEWPID"),
}
if !withNil.Contains("NEWPID") {
t.Fatal("namespaces should contain NEWPID but does not")
}
}

View File

@ -0,0 +1,45 @@
// +build linux
package namespaces
import (
"os"
"syscall"
"github.com/docker/libcontainer"
)
type initError struct {
Message string `json:"message,omitempty"`
}
func (i initError) Error() string {
return i.Message
}
var namespaceInfo = map[string]int{
"NEWNET": syscall.CLONE_NEWNET,
"NEWNS": syscall.CLONE_NEWNS,
"NEWUSER": syscall.CLONE_NEWUSER,
"NEWIPC": syscall.CLONE_NEWIPC,
"NEWUTS": syscall.CLONE_NEWUTS,
"NEWPID": syscall.CLONE_NEWPID,
}
// New returns a newly initialized Pipe for communication between processes
func newInitPipe() (parent *os.File, child *os.File, err error) {
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, nil, err
}
return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil
}
// GetNamespaceFlags parses the container's Namespaces options to set the correct
// flags on clone, unshare, and setns
func GetNamespaceFlags(namespaces []libcontainer.Namespace) (flag int) {
for _, v := range namespaces {
flag |= namespaceInfo[v.Name]
}
return flag
}

View File

@ -7,6 +7,7 @@ import (
"math/rand" "math/rand"
"net" "net"
"os" "os"
"path/filepath"
"sync/atomic" "sync/atomic"
"syscall" "syscall"
"unsafe" "unsafe"
@ -575,6 +576,31 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error {
return s.HandleAck(wb.Seq) return s.HandleAck(wb.Seq)
} }
// Set link queue length
// This is identical to running: ip link set dev $name txqueuelen $QLEN
func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_SETLINK, syscall.NLM_F_ACK)
msg := newIfInfomsg(syscall.AF_UNSPEC)
msg.Type = syscall.RTM_SETLINK
msg.Flags = syscall.NLM_F_REQUEST
msg.Index = int32(iface.Index)
msg.Change = DEFAULT_CHANGE
wb.AddData(msg)
wb.AddData(uint32Attr(syscall.IFLA_TXQLEN, uint32(txQueueLen)))
if err := s.Send(wb); err != nil {
return err
}
return s.HandleAck(wb.Seq)
}
func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error { func networkMasterAction(iface *net.Interface, rtattr *RtAttr) error {
s, err := getNetlinkSocket() s, err := getNetlinkSocket()
if err != nil { if err != nil {
@ -768,26 +794,38 @@ func NetworkLinkAddVlan(masterDev, vlanDev string, vlanId uint16) error {
return s.HandleAck(wb.Seq) return s.HandleAck(wb.Seq)
} }
// Add MAC VLAN network interface with masterDev as its upper device // MacVlan link has LowerDev, UpperDev and operates in Mode mode
// This is identical to running: // This simplifies the code when creating MacVlan or MacVtap interface
// ip link add name $name link $masterdev type macvlan mode $mode type MacVlanLink struct {
func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error { MasterDev string
s, err := getNetlinkSocket() SlaveDev string
if err != nil { mode string
return err
} }
defer s.Close()
macVlan := map[string]uint32{ func (m MacVlanLink) Mode() uint32 {
modeMap := map[string]uint32{
"private": MACVLAN_MODE_PRIVATE, "private": MACVLAN_MODE_PRIVATE,
"vepa": MACVLAN_MODE_VEPA, "vepa": MACVLAN_MODE_VEPA,
"bridge": MACVLAN_MODE_BRIDGE, "bridge": MACVLAN_MODE_BRIDGE,
"passthru": MACVLAN_MODE_PASSTHRU, "passthru": MACVLAN_MODE_PASSTHRU,
} }
return modeMap[m.mode]
}
// Add MAC VLAN network interface with masterDev as its upper device
// This is identical to running:
// ip link add name $name link $masterdev type macvlan mode $mode
func networkLinkMacVlan(dev_type string, mcvln *MacVlanLink) error {
s, err := getNetlinkSocket()
if err != nil {
return err
}
defer s.Close()
wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK) wb := newNetlinkRequest(syscall.RTM_NEWLINK, syscall.NLM_F_CREATE|syscall.NLM_F_EXCL|syscall.NLM_F_ACK)
masterDevIfc, err := net.InterfaceByName(masterDev) masterDevIfc, err := net.InterfaceByName(mcvln.MasterDev)
if err != nil { if err != nil {
return err return err
} }
@ -796,16 +834,16 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
wb.AddData(msg) wb.AddData(msg)
nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil) nest1 := newRtAttr(syscall.IFLA_LINKINFO, nil)
newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated("macvlan")) newRtAttrChild(nest1, IFLA_INFO_KIND, nonZeroTerminated(dev_type))
nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil) nest2 := newRtAttrChild(nest1, IFLA_INFO_DATA, nil)
macVlanData := make([]byte, 4) macVlanData := make([]byte, 4)
native.PutUint32(macVlanData, macVlan[mode]) native.PutUint32(macVlanData, mcvln.Mode())
newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData) newRtAttrChild(nest2, IFLA_MACVLAN_MODE, macVlanData)
wb.AddData(nest1) wb.AddData(nest1)
wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index))) wb.AddData(uint32Attr(syscall.IFLA_LINK, uint32(masterDevIfc.Index)))
wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(macVlanDev))) wb.AddData(newRtAttr(syscall.IFLA_IFNAME, zeroTerminated(mcvln.SlaveDev)))
if err := s.Send(wb); err != nil { if err := s.Send(wb); err != nil {
return err return err
@ -813,6 +851,22 @@ func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
return s.HandleAck(wb.Seq) return s.HandleAck(wb.Seq)
} }
func NetworkLinkAddMacVlan(masterDev, macVlanDev string, mode string) error {
return networkLinkMacVlan("macvlan", &MacVlanLink{
MasterDev: masterDev,
SlaveDev: macVlanDev,
mode: mode,
})
}
func NetworkLinkAddMacVtap(masterDev, macVlanDev string, mode string) error {
return networkLinkMacVlan("macvtap", &MacVlanLink{
MasterDev: masterDev,
SlaveDev: macVlanDev,
mode: mode,
})
}
func networkLinkIpAction(action, flags int, ifa IfAddr) error { func networkLinkIpAction(action, flags int, ifa IfAddr) error {
s, err := getNetlinkSocket() s, err := getNetlinkSocket()
if err != nil { if err != nil {
@ -1002,28 +1056,23 @@ func AddRoute(destination, source, gateway, device string) error {
} }
if source != "" { if source != "" {
srcIP, srcNet, err := net.ParseCIDR(source) srcIP := net.ParseIP(source)
if err != nil { if srcIP == nil {
return fmt.Errorf("source CIDR %s couldn't be parsed", source) return fmt.Errorf("source IP %s couldn't be parsed", source)
} }
srcFamily := getIpFamily(srcIP) srcFamily := getIpFamily(srcIP)
if currentFamily != -1 && currentFamily != srcFamily { if currentFamily != -1 && currentFamily != srcFamily {
return fmt.Errorf("source and destination ip were not the same IP family") return fmt.Errorf("source and destination ip were not the same IP family")
} }
currentFamily = srcFamily currentFamily = srcFamily
srcLen, bits := srcNet.Mask.Size()
if srcLen == 0 && bits == 0 {
return fmt.Errorf("source CIDR %s generated a non-canonical Mask", source)
}
msg.Family = uint8(srcFamily) msg.Family = uint8(srcFamily)
msg.Src_len = uint8(srcLen)
var srcData []byte var srcData []byte
if srcFamily == syscall.AF_INET { if srcFamily == syscall.AF_INET {
srcData = srcIP.To4() srcData = srcIP.To4()
} else { } else {
srcData = srcIP.To16() srcData = srcIP.To16()
} }
rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_SRC, srcData)) rtAttrs = append(rtAttrs, newRtAttr(syscall.RTA_PREFSRC, srcData))
} }
if gateway != "" { if gateway != "" {
@ -1204,6 +1253,28 @@ func SetMacAddress(name, addr string) error {
return nil return nil
} }
func SetHairpinMode(iface *net.Interface, enabled bool) error {
sysPath := filepath.Join("/sys/class/net", iface.Name, "brport/hairpin_mode")
sysFile, err := os.OpenFile(sysPath, os.O_WRONLY, 0)
if err != nil {
return err
}
defer sysFile.Close()
var writeVal []byte
if enabled {
writeVal = []byte("1")
} else {
writeVal = []byte("0")
}
if _, err := sysFile.Write(writeVal); err != nil {
return err
}
return nil
}
func ChangeName(iface *net.Interface, newName string) error { func ChangeName(iface *net.Interface, newName string) error {
if len(newName) >= IFNAMSIZ { if len(newName) >= IFNAMSIZ {
return fmt.Errorf("Interface name %s too long", newName) return fmt.Errorf("Interface name %s too long", newName)
@ -1224,5 +1295,6 @@ func ChangeName(iface *net.Interface, newName string) error {
if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 { if _, _, errno := syscall.Syscall(syscall.SYS_IOCTL, uintptr(fd), syscall.SIOCSIFNAME, uintptr(unsafe.Pointer(&data[0]))); errno != 0 {
return errno return errno
} }
return nil return nil
} }

View File

@ -116,7 +116,7 @@ func TestNetworkSetMacAddress(t *testing.T) {
ifcBeforeSet := readLink(t, tl.name) ifcBeforeSet := readLink(t, tl.name)
if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil { if err := NetworkSetMacAddress(ifcBeforeSet, macaddr); err != nil {
t.Fatalf("Could not set %s MAC address on %#v interface: err", macaddr, tl, err) t.Fatalf("Could not set %s MAC address on %#v interface: %s", macaddr, tl, err)
} }
ifcAfterSet := readLink(t, tl.name) ifcAfterSet := readLink(t, tl.name)
@ -140,7 +140,7 @@ func TestNetworkSetMTU(t *testing.T) {
ifcBeforeSet := readLink(t, tl.name) ifcBeforeSet := readLink(t, tl.name)
if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil { if err := NetworkSetMTU(ifcBeforeSet, mtu); err != nil {
t.Fatalf("Could not set %d MTU on %#v interface: err", mtu, tl, err) t.Fatalf("Could not set %d MTU on %#v interface: %s", mtu, tl, err)
} }
ifcAfterSet := readLink(t, tl.name) ifcAfterSet := readLink(t, tl.name)
@ -248,6 +248,30 @@ func TestNetworkLinkAddMacVlan(t *testing.T) {
readLink(t, tl.name) readLink(t, tl.name)
} }
func TestNetworkLinkAddMacVtap(t *testing.T) {
if testing.Short() {
return
}
tl := struct {
name string
mode string
}{
name: "tstVtap",
mode: "private",
}
masterLink := testLink{"tstEth", "dummy"}
addLink(t, masterLink.name, masterLink.linkType)
defer deleteLink(t, masterLink.name)
if err := NetworkLinkAddMacVtap(masterLink.name, tl.name, tl.mode); err != nil {
t.Fatalf("Unable to create %#v MAC VTAP interface: %s", tl, err)
}
readLink(t, tl.name)
}
func TestAddDelNetworkIp(t *testing.T) { func TestAddDelNetworkIp(t *testing.T) {
if testing.Short() { if testing.Short() {
return return
@ -280,6 +304,34 @@ func TestAddDelNetworkIp(t *testing.T) {
} }
} }
func TestAddRouteSourceSelection(t *testing.T) {
tstIp := "127.1.1.1"
tl := testLink{name: "tstEth", linkType: "dummy"}
addLink(t, tl.name, tl.linkType)
defer deleteLink(t, tl.name)
ip := net.ParseIP(tstIp)
mask := net.IPv4Mask(255, 255, 255, 255)
ipNet := &net.IPNet{IP: ip, Mask: mask}
iface, err := net.InterfaceByName(tl.name)
if err != nil {
t.Fatalf("Lost created link %#v", tl)
}
if err := NetworkLinkAddIp(iface, ip, ipNet); err != nil {
t.Fatalf("Could not add IP address %s to interface %#v: %s", ip.String(), iface, err)
}
upLink(t, tl.name)
defer downLink(t, tl.name)
if err := AddRoute("127.0.0.0/8", tstIp, "", tl.name); err != nil {
t.Fatalf("Failed to add route with source address")
}
}
func TestCreateVethPair(t *testing.T) { func TestCreateVethPair(t *testing.T) {
if testing.Short() { if testing.Short() {
return return

View File

@ -47,6 +47,10 @@ func NetworkSetMTU(iface *net.Interface, mtu int) error {
return ErrNotImplemented return ErrNotImplemented
} }
func NetworkSetTxQueueLen(iface *net.Interface, txQueueLen int) error {
return ErrNotImplemented
}
func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error { func NetworkCreateVethPair(name1, name2 string, txQueueLen int) error {
return ErrNotImplemented return ErrNotImplemented
} }

View File

@ -1,39 +0,0 @@
// +build linux
package network
import (
"fmt"
"os"
"syscall"
"github.com/docker/libcontainer/system"
)
// crosbymichael: could make a network strategy that instead of returning veth pair names it returns a pid to an existing network namespace
type NetNS struct {
}
func (v *NetNS) Create(n *Network, nspid int, networkState *NetworkState) error {
networkState.NsPath = n.NsPath
return nil
}
func (v *NetNS) Initialize(config *Network, networkState *NetworkState) error {
if networkState.NsPath == "" {
return fmt.Errorf("nspath does is not specified in NetworkState")
}
f, err := os.OpenFile(networkState.NsPath, os.O_RDONLY, 0)
if err != nil {
return fmt.Errorf("failed get network namespace fd: %v", err)
}
if err := system.Setns(f.Fd(), syscall.CLONE_NEWNET); err != nil {
f.Close()
return fmt.Errorf("failed to setns current network namespace: %v", err)
}
f.Close()
return nil
}

View File

@ -95,3 +95,11 @@ func SetMtu(name string, mtu int) error {
} }
return netlink.NetworkSetMTU(iface, mtu) return netlink.NetworkSetMTU(iface, mtu)
} }
func SetHairpinMode(name string, enabled bool) error {
iface, err := net.InterfaceByName(name)
if err != nil {
return err
}
return netlink.SetHairpinMode(iface, enabled)
}

View File

@ -13,7 +13,6 @@ var (
var strategies = map[string]NetworkStrategy{ var strategies = map[string]NetworkStrategy{
"veth": &Veth{}, "veth": &Veth{},
"loopback": &Loopback{}, "loopback": &Loopback{},
"netns": &NetNS{},
} }
// NetworkStrategy represents a specific network configuration for // NetworkStrategy represents a specific network configuration for

View File

@ -8,9 +8,6 @@ type Network struct {
// Type sets the networks type, commonly veth and loopback // Type sets the networks type, commonly veth and loopback
Type string `json:"type,omitempty"` Type string `json:"type,omitempty"`
// Path to network namespace
NsPath string `json:"ns_path,omitempty"`
// The bridge to use. // The bridge to use.
Bridge string `json:"bridge,omitempty"` Bridge string `json:"bridge,omitempty"`
@ -50,6 +47,4 @@ type NetworkState struct {
VethHost string `json:"veth_host,omitempty"` VethHost string `json:"veth_host,omitempty"`
// The name of the veth interface created inside the container for the child. // The name of the veth interface created inside the container for the child.
VethChild string `json:"veth_child,omitempty"` VethChild string `json:"veth_child,omitempty"`
// Net namespace path.
NsPath string `json:"ns_path,omitempty"`
} }

View File

@ -8,7 +8,6 @@ import (
"github.com/codegangsta/cli" "github.com/codegangsta/cli"
"github.com/docker/libcontainer/namespaces" "github.com/docker/libcontainer/namespaces"
"github.com/docker/libcontainer/syncpipe"
) )
var ( var (
@ -41,12 +40,8 @@ func initAction(context *cli.Context) {
log.Fatal(err) log.Fatal(err)
} }
syncPipe, err := syncpipe.NewSyncPipeFromFd(0, uintptr(pipeFd)) pipe := os.NewFile(uintptr(pipeFd), "pipe")
if err != nil { if err := namespaces.Init(container, rootfs, console, pipe, []string(context.Args())); err != nil {
log.Fatalf("unable to create sync pipe: %s", err)
}
if err := namespaces.Init(container, rootfs, console, syncPipe, []string(context.Args())); err != nil {
log.Fatalf("unable to initialize for container: %s", err) log.Fatalf("unable to initialize for container: %s", err)
} }
} }

View File

@ -8,7 +8,6 @@ import (
"github.com/codegangsta/cli" "github.com/codegangsta/cli"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
"github.com/docker/libcontainer/syncpipe"
) )
// rFunc is a function registration for calling after an execin // rFunc is a function registration for calling after an execin
@ -59,16 +58,13 @@ func findUserArgs() []string {
// loadConfigFromFd loads a container's config from the sync pipe that is provided by // loadConfigFromFd loads a container's config from the sync pipe that is provided by
// fd 3 when running a process // fd 3 when running a process
func loadConfigFromFd() (*libcontainer.Config, error) { func loadConfigFromFd() (*libcontainer.Config, error) {
syncPipe, err := syncpipe.NewSyncPipeFromFd(0, 3) pipe := os.NewFile(3, "pipe")
if err != nil { defer pipe.Close()
return nil, err
}
var config *libcontainer.Config var config *libcontainer.Config
if err := syncPipe.ReadFromParent(&config); err != nil { if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return nil, err return nil, err
} }
return config, nil return config, nil
} }

View File

@ -176,13 +176,13 @@
"TERM=xterm" "TERM=xterm"
], ],
"hostname": "koye", "hostname": "koye",
"namespaces": { "namespaces": [
"NEWIPC": true, {"name":"NEWIPC"},
"NEWNET": true, {"name": "NEWNET"},
"NEWNS": true, {"name": "NEWNS"},
"NEWPID": true, {"name": "NEWPID"},
"NEWUTS": true {"name": "NEWUTS"}
}, ],
"networks": [ "networks": [
{ {
"address": "127.0.0.1/0", "address": "127.0.0.1/0",

View File

@ -175,13 +175,13 @@
"TERM=xterm" "TERM=xterm"
], ],
"hostname": "koye", "hostname": "koye",
"namespaces": { "namespaces": [
"NEWIPC": true, {"name": "NEWIPC"},
"NEWNET": true, {"name": "NEWNET"},
"NEWNS": true, {"name": "NEWNS"},
"NEWPID": true, {"name": "NEWPID"},
"NEWUTS": true {"name": "NEWUTS"}
}, ],
"networks": [ "networks": [
{ {
"address": "127.0.0.1/0", "address": "127.0.0.1/0",

View File

@ -181,13 +181,13 @@
"TERM=xterm" "TERM=xterm"
], ],
"hostname": "koye", "hostname": "koye",
"namespaces": { "namespaces": [
"NEWIPC": true, {"name": "NEWIPC"},
"NEWNET": true, {"name": "NEWNET"},
"NEWNS": true, {"name": "NEWNS"},
"NEWPID": true, {"name": "NEWPID"},
"NEWUTS": true {"name": "NEWUTS"}
}, ],
"networks": [ "networks": [
{ {
"address": "127.0.0.1/0", "address": "127.0.0.1/0",

View File

@ -0,0 +1,209 @@
{
"capabilities": [
"CHOWN",
"DAC_OVERRIDE",
"FOWNER",
"MKNOD",
"NET_RAW",
"SETGID",
"SETUID",
"SETFCAP",
"SETPCAP",
"NET_BIND_SERVICE",
"SYS_CHROOT",
"KILL"
],
"cgroups": {
"allowed_devices": [
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "m",
"major_number": -1,
"minor_number": -1,
"type": 98
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 1,
"path": "/dev/console",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"path": "/dev/tty0",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 4,
"minor_number": 1,
"path": "/dev/tty1",
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 136,
"minor_number": -1,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 5,
"minor_number": 2,
"type": 99
},
{
"cgroup_permissions": "rwm",
"major_number": 10,
"minor_number": 200,
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
],
"name": "docker-koye",
"parent": "docker"
},
"restrict_sys": true,
"mount_config": {
"device_nodes": [
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 3,
"path": "/dev/null",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 5,
"path": "/dev/zero",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 7,
"path": "/dev/full",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 5,
"path": "/dev/tty",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 9,
"path": "/dev/urandom",
"type": 99
},
{
"cgroup_permissions": "rwm",
"file_mode": 438,
"major_number": 1,
"minor_number": 8,
"path": "/dev/random",
"type": 99
}
]
},
"environment": [
"HOME=/",
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"HOSTNAME=koye",
"TERM=xterm"
],
"hostname": "koye",
"namespaces": [
{"name": "NEWIPC"},
{"name": "NEWNET"},
{"name": "NEWNS"},
{"name": "NEWPID"},
{"name": "NEWUTS"}
],
"networks": [
{
"address": "127.0.0.1/0",
"gateway": "localhost",
"mtu": 1500,
"type": "loopback"
},
{
"address": "172.17.0.101/16",
"bridge": "docker0",
"veth_prefix": "veth",
"mtu": 1500,
"type": "veth"
}
],
"routes": [
{
"destination": "0.0.0.0/0",
"source": "172.17.0.101",
"gateway": "172.17.42.1",
"interface_name": "eth0"
}
],
"tty": true
}

View File

@ -177,13 +177,13 @@
"TERM=xterm" "TERM=xterm"
], ],
"hostname": "koye", "hostname": "koye",
"namespaces": { "namespaces": [
"NEWIPC": true, {"name": "NEWIPC"},
"NEWNET": true, {"name": "NEWNET"},
"NEWNS": true, {"name": "NEWNS"},
"NEWPID": true, {"name": "NEWPID"},
"NEWUTS": true {"name": "NEWUTS"}
}, ],
"networks": [ "networks": [
{ {
"address": "127.0.0.1/0", "address": "127.0.0.1/0",

View File

@ -434,3 +434,28 @@ func Chcon(fpath string, scon string, recurse bool) error {
return Setfilecon(fpath, scon) return Setfilecon(fpath, scon)
} }
// DupSecOpt takes an SELinux process label and returns security options that
// can will set the SELinux Type and Level for future container processes
func DupSecOpt(src string) []string {
if src == "" {
return nil
}
con := NewContext(src)
if con["user"] == "" ||
con["role"] == "" ||
con["type"] == "" ||
con["level"] == "" {
return nil
}
return []string{"label:user:" + con["user"],
"label:role:" + con["role"],
"label:type:" + con["type"],
"label:level:" + con["level"]}
}
// DisableSecOpt returns a security opt that can be used to disabling SELinux
// labeling support for future container processes
func DisableSecOpt() []string {
return []string{"label:disable"}
}

View File

@ -42,7 +42,7 @@ func TestSELinux(t *testing.T) {
t.Log("getenforce ", selinux.SelinuxGetEnforce()) t.Log("getenforce ", selinux.SelinuxGetEnforce())
t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode()) t.Log("getenforcemode ", selinux.SelinuxGetEnforceMode())
pid := os.Getpid() pid := os.Getpid()
t.Log("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023)) t.Logf("PID:%d MCS:%s\n", pid, selinux.IntToMcs(pid, 1023))
err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0") err = selinux.Setfscreatecon("unconfined_u:unconfined_r:unconfined_t:s0")
if err == nil { if err == nil {
t.Log(selinux.Getfscreatecon()) t.Log(selinux.Getfscreatecon())

View File

@ -1,105 +0,0 @@
package syncpipe
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"syscall"
)
// SyncPipe allows communication to and from the child processes
// to it's parent and allows the two independent processes to
// syncronize their state.
type SyncPipe struct {
parent, child *os.File
}
func NewSyncPipeFromFd(parentFd, childFd uintptr) (*SyncPipe, error) {
s := &SyncPipe{}
if parentFd > 0 {
s.parent = os.NewFile(parentFd, "parentPipe")
} else if childFd > 0 {
s.child = os.NewFile(childFd, "childPipe")
} else {
return nil, fmt.Errorf("no valid sync pipe fd specified")
}
return s, nil
}
func (s *SyncPipe) Child() *os.File {
return s.child
}
func (s *SyncPipe) Parent() *os.File {
return s.parent
}
func (s *SyncPipe) SendToChild(v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
return err
}
s.parent.Write(data)
return syscall.Shutdown(int(s.parent.Fd()), syscall.SHUT_WR)
}
func (s *SyncPipe) ReadFromChild() error {
data, err := ioutil.ReadAll(s.parent)
if err != nil {
return err
}
if len(data) > 0 {
return fmt.Errorf("%s", data)
}
return nil
}
func (s *SyncPipe) ReadFromParent(v interface{}) error {
data, err := ioutil.ReadAll(s.child)
if err != nil {
return fmt.Errorf("error reading from sync pipe %s", err)
}
if len(data) > 0 {
if err := json.Unmarshal(data, v); err != nil {
return err
}
}
return nil
}
func (s *SyncPipe) ReportChildError(err error) {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(s.child)
s.child.Write([]byte(err.Error()))
s.CloseChild()
}
func (s *SyncPipe) Close() error {
if s.parent != nil {
s.parent.Close()
}
if s.child != nil {
s.child.Close()
}
return nil
}
func (s *SyncPipe) CloseChild() {
if s.child != nil {
s.child.Close()
s.child = nil
}
}

View File

@ -1,20 +0,0 @@
package syncpipe
import (
"os"
"syscall"
)
func NewSyncPipe() (s *SyncPipe, err error) {
s = &SyncPipe{}
fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_STREAM|syscall.SOCK_CLOEXEC, 0)
if err != nil {
return nil, err
}
s.child = os.NewFile(uintptr(fds[0]), "child syncpipe")
s.parent = os.NewFile(uintptr(fds[1]), "parent syncpipe")
return s, nil
}

View File

@ -1,72 +0,0 @@
package syncpipe
import (
"fmt"
"syscall"
"testing"
)
type testStruct struct {
Name string
}
func TestSendErrorFromChild(t *testing.T) {
pipe, err := NewSyncPipe()
if err != nil {
t.Fatal(err)
}
defer func() {
if err := pipe.Close(); err != nil {
t.Fatal(err)
}
}()
childfd, err := syscall.Dup(int(pipe.Child().Fd()))
if err != nil {
t.Fatal(err)
}
childPipe, _ := NewSyncPipeFromFd(0, uintptr(childfd))
pipe.CloseChild()
pipe.SendToChild(nil)
expected := "something bad happened"
childPipe.ReportChildError(fmt.Errorf(expected))
childError := pipe.ReadFromChild()
if childError == nil {
t.Fatal("expected an error to be returned but did not receive anything")
}
if childError.Error() != expected {
t.Fatalf("expected %q but received error message %q", expected, childError.Error())
}
}
func TestSendPayloadToChild(t *testing.T) {
pipe, err := NewSyncPipe()
if err != nil {
t.Fatal(err)
}
defer func() {
if err := pipe.Close(); err != nil {
t.Fatal(err)
}
}()
expected := "libcontainer"
if err := pipe.SendToChild(testStruct{Name: expected}); err != nil {
t.Fatal(err)
}
var s *testStruct
if err := pipe.ReadFromParent(&s); err != nil {
t.Fatal(err)
}
if s.Name != expected {
t.Fatalf("expected name %q but received %q", expected, s.Name)
}
}

View File

@ -14,6 +14,9 @@ var setNsMap = map[string]uintptr{
"linux/386": 346, "linux/386": 346,
"linux/amd64": 308, "linux/amd64": 308,
"linux/arm": 374, "linux/arm": 374,
"linux/ppc64": 350,
"linux/ppc64le": 350,
"linux/s390x": 339,
} }
func Setns(fd uintptr, flags uintptr) error { func Setns(fd uintptr, flags uintptr) error {

View File

@ -1,4 +1,5 @@
// +build linux,amd64 // +build linux,amd64 linux,ppc64 linux,ppc64le linux,s390x
package system package system
import ( import (

View File

@ -7,7 +7,7 @@ import (
// Setuid sets the uid of the calling thread to the specified uid. // Setuid sets the uid of the calling thread to the specified uid.
func Setuid(uid int) (err error) { func Setuid(uid int) (err error) {
_, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID, uintptr(uid), 0, 0) _, _, e1 := syscall.RawSyscall(syscall.SYS_SETUID32, uintptr(uid), 0, 0)
if e1 != 0 { if e1 != 0 {
err = e1 err = e1
} }

View File

@ -0,0 +1,108 @@
package user
import (
"errors"
"fmt"
"syscall"
)
var (
// The current operating system does not provide the required data for user lookups.
ErrUnsupported = errors.New("user lookup: operating system does not provide passwd-formatted data")
)
func lookupUser(filter func(u User) bool) (User, error) {
// Get operating system-specific passwd reader-closer.
passwd, err := GetPasswd()
if err != nil {
return User{}, err
}
defer passwd.Close()
// Get the users.
users, err := ParsePasswdFilter(passwd, filter)
if err != nil {
return User{}, err
}
// No user entries found.
if len(users) == 0 {
return User{}, fmt.Errorf("no matching entries in passwd file")
}
// Assume the first entry is the "correct" one.
return users[0], nil
}
// CurrentUser looks up the current user by their user id in /etc/passwd. If the
// user cannot be found (or there is no /etc/passwd file on the filesystem),
// then CurrentUser returns an error.
func CurrentUser() (User, error) {
return LookupUid(syscall.Getuid())
}
// LookupUser looks up a user by their username in /etc/passwd. If the user
// cannot be found (or there is no /etc/passwd file on the filesystem), then
// LookupUser returns an error.
func LookupUser(username string) (User, error) {
return lookupUser(func(u User) bool {
return u.Name == username
})
}
// LookupUid looks up a user by their user id in /etc/passwd. If the user cannot
// be found (or there is no /etc/passwd file on the filesystem), then LookupId
// returns an error.
func LookupUid(uid int) (User, error) {
return lookupUser(func(u User) bool {
return u.Uid == uid
})
}
func lookupGroup(filter func(g Group) bool) (Group, error) {
// Get operating system-specific group reader-closer.
group, err := GetGroup()
if err != nil {
return Group{}, err
}
defer group.Close()
// Get the users.
groups, err := ParseGroupFilter(group, filter)
if err != nil {
return Group{}, err
}
// No user entries found.
if len(groups) == 0 {
return Group{}, fmt.Errorf("no matching entries in group file")
}
// Assume the first entry is the "correct" one.
return groups[0], nil
}
// CurrentGroup looks up the current user's group by their primary group id's
// entry in /etc/passwd. If the group cannot be found (or there is no
// /etc/group file on the filesystem), then CurrentGroup returns an error.
func CurrentGroup() (Group, error) {
return LookupGid(syscall.Getgid())
}
// LookupGroup looks up a group by its name in /etc/group. If the group cannot
// be found (or there is no /etc/group file on the filesystem), then LookupGroup
// returns an error.
func LookupGroup(groupname string) (Group, error) {
return lookupGroup(func(g Group) bool {
return g.Name == groupname
})
}
// LookupGid looks up a group by its group id in /etc/group. If the group cannot
// be found (or there is no /etc/group file on the filesystem), then LookupGid
// returns an error.
func LookupGid(gid int) (Group, error) {
return lookupGroup(func(g Group) bool {
return g.Gid == gid
})
}

View File

@ -0,0 +1,30 @@
// +build darwin dragonfly freebsd linux netbsd openbsd solaris
package user
import (
"io"
"os"
)
// Unix-specific path to the passwd and group formatted files.
const (
unixPasswdFile = "/etc/passwd"
unixGroupFile = "/etc/group"
)
func GetPasswdFile() (string, error) {
return unixPasswdFile, nil
}
func GetPasswd() (io.ReadCloser, error) {
return os.Open(unixPasswdFile)
}
func GetGroupFile() (string, error) {
return unixGroupFile, nil
}
func GetGroup() (io.ReadCloser, error) {
return os.Open(unixGroupFile)
}

View File

@ -0,0 +1,21 @@
// +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris
package user
import "io"
func GetPasswdFile() (string, error) {
return "", ErrUnsupported
}
func GetPasswd() (io.ReadCloser, error) {
return nil, ErrUnsupported
}
func GetGroupFile() (string, error) {
return "", ErrUnsupported
}
func GetGroup() (io.ReadCloser, error) {
return nil, ErrUnsupported
}

View File

@ -69,23 +69,36 @@ func parseLine(line string, v ...interface{}) {
} }
} }
func ParsePasswd() ([]*User, error) { func ParsePasswdFile(path string) ([]User, error) {
return ParsePasswdFilter(nil) passwd, err := os.Open(path)
}
func ParsePasswdFilter(filter func(*User) bool) ([]*User, error) {
f, err := os.Open("/etc/passwd")
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer f.Close() defer passwd.Close()
return parsePasswdFile(f, filter) return ParsePasswd(passwd)
}
func ParsePasswd(passwd io.Reader) ([]User, error) {
return ParsePasswdFilter(passwd, nil)
}
func ParsePasswdFileFilter(path string, filter func(User) bool) ([]User, error) {
passwd, err := os.Open(path)
if err != nil {
return nil, err
}
defer passwd.Close()
return ParsePasswdFilter(passwd, filter)
}
func ParsePasswdFilter(r io.Reader, filter func(User) bool) ([]User, error) {
if r == nil {
return nil, fmt.Errorf("nil source for passwd-formatted data")
} }
func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
var ( var (
s = bufio.NewScanner(r) s = bufio.NewScanner(r)
out = []*User{} out = []User{}
) )
for s.Scan() { for s.Scan() {
@ -103,7 +116,7 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
// Name:Pass:Uid:Gid:Gecos:Home:Shell // Name:Pass:Uid:Gid:Gecos:Home:Shell
// root:x:0:0:root:/root:/bin/bash // root:x:0:0:root:/root:/bin/bash
// adm:x:3:4:adm:/var/adm:/bin/false // adm:x:3:4:adm:/var/adm:/bin/false
p := &User{} p := User{}
parseLine( parseLine(
text, text,
&p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell, &p.Name, &p.Pass, &p.Uid, &p.Gid, &p.Gecos, &p.Home, &p.Shell,
@ -117,23 +130,36 @@ func parsePasswdFile(r io.Reader, filter func(*User) bool) ([]*User, error) {
return out, nil return out, nil
} }
func ParseGroup() ([]*Group, error) { func ParseGroupFile(path string) ([]Group, error) {
return ParseGroupFilter(nil) group, err := os.Open(path)
}
func ParseGroupFilter(filter func(*Group) bool) ([]*Group, error) {
f, err := os.Open("/etc/group")
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer f.Close() defer group.Close()
return parseGroupFile(f, filter) return ParseGroup(group)
}
func ParseGroup(group io.Reader) ([]Group, error) {
return ParseGroupFilter(group, nil)
}
func ParseGroupFileFilter(path string, filter func(Group) bool) ([]Group, error) {
group, err := os.Open(path)
if err != nil {
return nil, err
}
defer group.Close()
return ParseGroupFilter(group, filter)
}
func ParseGroupFilter(r io.Reader, filter func(Group) bool) ([]Group, error) {
if r == nil {
return nil, fmt.Errorf("nil source for group-formatted data")
} }
func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
var ( var (
s = bufio.NewScanner(r) s = bufio.NewScanner(r)
out = []*Group{} out = []Group{}
) )
for s.Scan() { for s.Scan() {
@ -151,7 +177,7 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
// Name:Pass:Gid:List // Name:Pass:Gid:List
// root:x:0:root // root:x:0:root
// adm:x:4:root,adm,daemon // adm:x:4:root,adm,daemon
p := &Group{} p := Group{}
parseLine( parseLine(
text, text,
&p.Name, &p.Pass, &p.Gid, &p.List, &p.Name, &p.Pass, &p.Gid, &p.List,
@ -165,94 +191,160 @@ func parseGroupFile(r io.Reader, filter func(*Group) bool) ([]*Group, error) {
return out, nil return out, nil
} }
// Given a string like "user", "1000", "user:group", "1000:1000", returns the uid, gid, list of supplementary group IDs, and home directory, if available and/or applicable. type ExecUser struct {
func GetUserGroupSupplementaryHome(userSpec string, defaultUid, defaultGid int, defaultHome string) (int, int, []int, string, error) { Uid, Gid int
var ( Sgids []int
uid = defaultUid Home string
gid = defaultGid }
suppGids = []int{}
home = defaultHome
// GetExecUserFile is a wrapper for GetExecUser. It reads data from each of the
// given file paths and uses that data as the arguments to GetExecUser. If the
// files cannot be opened for any reason, the error is ignored and a nil
// io.Reader is passed instead.
func GetExecUserFile(userSpec string, defaults *ExecUser, passwdPath, groupPath string) (*ExecUser, error) {
passwd, err := os.Open(passwdPath)
if err != nil {
passwd = nil
} else {
defer passwd.Close()
}
group, err := os.Open(groupPath)
if err != nil {
group = nil
} else {
defer group.Close()
}
return GetExecUser(userSpec, defaults, passwd, group)
}
// GetExecUser parses a user specification string (using the passwd and group
// readers as sources for /etc/passwd and /etc/group data, respectively). In
// the case of blank fields or missing data from the sources, the values in
// defaults is used.
//
// GetExecUser will return an error if a user or group literal could not be
// found in any entry in passwd and group respectively.
//
// Examples of valid user specifications are:
// * ""
// * "user"
// * "uid"
// * "user:group"
// * "uid:gid
// * "user:gid"
// * "uid:group"
func GetExecUser(userSpec string, defaults *ExecUser, passwd, group io.Reader) (*ExecUser, error) {
var (
userArg, groupArg string userArg, groupArg string
name string
) )
if defaults == nil {
defaults = new(ExecUser)
}
// Copy over defaults.
user := &ExecUser{
Uid: defaults.Uid,
Gid: defaults.Gid,
Sgids: defaults.Sgids,
Home: defaults.Home,
}
// Sgids slice *cannot* be nil.
if user.Sgids == nil {
user.Sgids = []int{}
}
// allow for userArg to have either "user" syntax, or optionally "user:group" syntax // allow for userArg to have either "user" syntax, or optionally "user:group" syntax
parseLine(userSpec, &userArg, &groupArg) parseLine(userSpec, &userArg, &groupArg)
users, err := ParsePasswdFilter(func(u *User) bool { users, err := ParsePasswdFilter(passwd, func(u User) bool {
if userArg == "" { if userArg == "" {
return u.Uid == uid return u.Uid == user.Uid
} }
return u.Name == userArg || strconv.Itoa(u.Uid) == userArg return u.Name == userArg || strconv.Itoa(u.Uid) == userArg
}) })
if err != nil && !os.IsNotExist(err) { if err != nil && passwd != nil {
if userArg == "" { if userArg == "" {
userArg = strconv.Itoa(uid) userArg = strconv.Itoa(user.Uid)
} }
return 0, 0, nil, "", fmt.Errorf("Unable to find user %v: %v", userArg, err) return nil, fmt.Errorf("Unable to find user %v: %v", userArg, err)
} }
haveUser := users != nil && len(users) > 0 haveUser := users != nil && len(users) > 0
if haveUser { if haveUser {
// if we found any user entries that matched our filter, let's take the first one as "correct" // if we found any user entries that matched our filter, let's take the first one as "correct"
uid = users[0].Uid name = users[0].Name
gid = users[0].Gid user.Uid = users[0].Uid
home = users[0].Home user.Gid = users[0].Gid
user.Home = users[0].Home
} else if userArg != "" { } else if userArg != "" {
// we asked for a user but didn't find them... let's check to see if we wanted a numeric user // we asked for a user but didn't find them... let's check to see if we wanted a numeric user
uid, err = strconv.Atoi(userArg) user.Uid, err = strconv.Atoi(userArg)
if err != nil { if err != nil {
// not numeric - we have to bail // not numeric - we have to bail
return 0, 0, nil, "", fmt.Errorf("Unable to find user %v", userArg) return nil, fmt.Errorf("Unable to find user %v", userArg)
} }
if uid < minId || uid > maxId {
return 0, 0, nil, "", ErrRange // Must be inside valid uid range.
if user.Uid < minId || user.Uid > maxId {
return nil, ErrRange
} }
// if userArg couldn't be found in /etc/passwd but is numeric, just roll with it - this is legit // if userArg couldn't be found in /etc/passwd but is numeric, just roll with it - this is legit
} }
if groupArg != "" || (haveUser && users[0].Name != "") { if groupArg != "" || name != "" {
groups, err := ParseGroupFilter(func(g *Group) bool { groups, err := ParseGroupFilter(group, func(g Group) bool {
// Explicit group format takes precedence.
if groupArg != "" { if groupArg != "" {
return g.Name == groupArg || strconv.Itoa(g.Gid) == groupArg return g.Name == groupArg || strconv.Itoa(g.Gid) == groupArg
} }
// Check if user is a member.
for _, u := range g.List { for _, u := range g.List {
if u == users[0].Name { if u == name {
return true return true
} }
} }
return false return false
}) })
if err != nil && !os.IsNotExist(err) { if err != nil && group != nil {
return 0, 0, nil, "", fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err) return nil, fmt.Errorf("Unable to find groups for user %v: %v", users[0].Name, err)
} }
haveGroup := groups != nil && len(groups) > 0 haveGroup := groups != nil && len(groups) > 0
if groupArg != "" { if groupArg != "" {
if haveGroup { if haveGroup {
// if we found any group entries that matched our filter, let's take the first one as "correct" // if we found any group entries that matched our filter, let's take the first one as "correct"
gid = groups[0].Gid user.Gid = groups[0].Gid
} else { } else {
// we asked for a group but didn't find id... let's check to see if we wanted a numeric group // we asked for a group but didn't find id... let's check to see if we wanted a numeric group
gid, err = strconv.Atoi(groupArg) user.Gid, err = strconv.Atoi(groupArg)
if err != nil { if err != nil {
// not numeric - we have to bail // not numeric - we have to bail
return 0, 0, nil, "", fmt.Errorf("Unable to find group %v", groupArg) return nil, fmt.Errorf("Unable to find group %v", groupArg)
} }
if gid < minId || gid > maxId {
return 0, 0, nil, "", ErrRange // Ensure gid is inside gid range.
if user.Gid < minId || user.Gid > maxId {
return nil, ErrRange
} }
// if groupArg couldn't be found in /etc/group but is numeric, just roll with it - this is legit // if groupArg couldn't be found in /etc/group but is numeric, just roll with it - this is legit
} }
} else if haveGroup { } else if haveGroup {
suppGids = make([]int, len(groups)) // If implicit group format, fill supplementary gids.
user.Sgids = make([]int, len(groups))
for i, group := range groups { for i, group := range groups {
suppGids[i] = group.Gid user.Sgids[i] = group.Gid
} }
} }
} }
return uid, gid, suppGids, home, nil return user, nil
} }

View File

@ -1,6 +1,8 @@
package user package user
import ( import (
"io"
"reflect"
"strings" "strings"
"testing" "testing"
) )
@ -54,7 +56,7 @@ func TestUserParseLine(t *testing.T) {
} }
func TestUserParsePasswd(t *testing.T) { func TestUserParsePasswd(t *testing.T) {
users, err := parsePasswdFile(strings.NewReader(` users, err := ParsePasswdFilter(strings.NewReader(`
root:x:0:0:root:/root:/bin/bash root:x:0:0:root:/root:/bin/bash
adm:x:3:4:adm:/var/adm:/bin/false adm:x:3:4:adm:/var/adm:/bin/false
this is just some garbage data this is just some garbage data
@ -74,7 +76,7 @@ this is just some garbage data
} }
func TestUserParseGroup(t *testing.T) { func TestUserParseGroup(t *testing.T) {
groups, err := parseGroupFile(strings.NewReader(` groups, err := ParseGroupFilter(strings.NewReader(`
root:x:0:root root:x:0:root
adm:x:4:root,adm,daemon adm:x:4:root,adm,daemon
this is just some garbage data this is just some garbage data
@ -92,3 +94,259 @@ this is just some garbage data
t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List)) t.Fatalf("Expected groups[1] to be 4 - adm - 3 members, got %v - %v - %v", groups[1].Gid, groups[1].Name, len(groups[1].List))
} }
} }
func TestValidGetExecUser(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
defaultExecUser := ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
}
tests := []struct {
ref string
expected ExecUser
}{
{
ref: "root",
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{0, 1234},
Home: "/root",
},
},
{
ref: "adm",
expected: ExecUser{
Uid: 42,
Gid: 43,
Sgids: []int{1234},
Home: "/var/adm",
},
},
{
ref: "root:adm",
expected: ExecUser{
Uid: 0,
Gid: 43,
Sgids: defaultExecUser.Sgids,
Home: "/root",
},
},
{
ref: "adm:1234",
expected: ExecUser{
Uid: 42,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: "/var/adm",
},
},
{
ref: "42:1234",
expected: ExecUser{
Uid: 42,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: "/var/adm",
},
},
{
ref: "1337:1234",
expected: ExecUser{
Uid: 1337,
Gid: 1234,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
{
ref: "1337",
expected: ExecUser{
Uid: 1337,
Gid: defaultExecUser.Gid,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
{
ref: "",
expected: ExecUser{
Uid: defaultExecUser.Uid,
Gid: defaultExecUser.Gid,
Sgids: defaultExecUser.Sgids,
Home: defaultExecUser.Home,
},
},
}
for _, test := range tests {
passwd := strings.NewReader(passwdContent)
group := strings.NewReader(groupContent)
execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
if err != nil {
t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
t.Fail()
continue
}
if !reflect.DeepEqual(test.expected, *execUser) {
t.Logf("got: %#v", execUser)
t.Logf("expected: %#v", test.expected)
t.Fail()
continue
}
}
}
func TestInvalidGetExecUser(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
tests := []string{
// No such user/group.
"notuser",
"notuser:notgroup",
"root:notgroup",
"notuser:adm",
"8888:notgroup",
"notuser:8888",
// Invalid user/group values.
"-1:0",
"0:-3",
"-5:-2",
}
for _, test := range tests {
passwd := strings.NewReader(passwdContent)
group := strings.NewReader(groupContent)
execUser, err := GetExecUser(test, nil, passwd, group)
if err == nil {
t.Logf("got unexpected success when parsing '%s': %#v", test, execUser)
t.Fail()
continue
}
}
}
func TestGetExecUserNilSources(t *testing.T) {
const passwdContent = `
root:x:0:0:root user:/root:/bin/bash
adm:x:42:43:adm:/var/adm:/bin/false
this is just some garbage data
`
const groupContent = `
root:x:0:root
adm:x:43:
grp:x:1234:root,adm
this is just some garbage data
`
defaultExecUser := ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
}
tests := []struct {
ref string
passwd, group bool
expected ExecUser
}{
{
ref: "",
passwd: false,
group: false,
expected: ExecUser{
Uid: 8888,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
},
},
{
ref: "root",
passwd: true,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{8888},
Home: "/root",
},
},
{
ref: "0",
passwd: false,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 8888,
Sgids: []int{8888},
Home: "/8888",
},
},
{
ref: "0:0",
passwd: false,
group: false,
expected: ExecUser{
Uid: 0,
Gid: 0,
Sgids: []int{8888},
Home: "/8888",
},
},
}
for _, test := range tests {
var passwd, group io.Reader
if test.passwd {
passwd = strings.NewReader(passwdContent)
}
if test.group {
group = strings.NewReader(groupContent)
}
execUser, err := GetExecUser(test.ref, &defaultExecUser, passwd, group)
if err != nil {
t.Logf("got unexpected error when parsing '%s': %s", test.ref, err.Error())
t.Fail()
continue
}
if !reflect.DeepEqual(test.expected, *execUser) {
t.Logf("got: %#v", execUser)
t.Logf("expected: %#v", test.expected)
t.Fail()
continue
}
}
}

View File

@ -27,6 +27,7 @@ import (
"github.com/fsouza/go-dockerclient" "github.com/fsouza/go-dockerclient"
"github.com/golang/glog" "github.com/golang/glog"
"github.com/google/cadvisor/container" "github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/info" "github.com/google/cadvisor/info"
"github.com/google/cadvisor/utils" "github.com/google/cadvisor/utils"
) )
@ -68,6 +69,9 @@ type dockerFactory struct {
usesAufsDriver bool usesAufsDriver bool
client *docker.Client client *docker.Client
// Information about the mounted cgroup subsystems.
cgroupSubsystems libcontainer.CgroupSubsystems
} }
func (self *dockerFactory) String() string { func (self *dockerFactory) String() string {
@ -85,6 +89,7 @@ func (self *dockerFactory) NewContainerHandler(name string) (handler container.C
self.machineInfoFactory, self.machineInfoFactory,
*dockerRootDir, *dockerRootDir,
self.usesAufsDriver, self.usesAufsDriver,
&self.cgroupSubsystems,
) )
return return
} }
@ -218,11 +223,17 @@ func Register(factory info.MachineInfoFactory) error {
glog.Infof("System is using systemd") glog.Infof("System is using systemd")
} }
cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
if err != nil {
return fmt.Errorf("failed to get cgroup subsystems: %v", err)
}
glog.Infof("Registering Docker factory") glog.Infof("Registering Docker factory")
f := &dockerFactory{ f := &dockerFactory{
machineInfoFactory: factory, machineInfoFactory: factory,
client: client, client: client,
usesAufsDriver: usesAufsDriver, usesAufsDriver: usesAufsDriver,
cgroupSubsystems: cgroupSubsystems,
} }
container.RegisterContainerHandlerFactory(f) container.RegisterContainerHandlerFactory(f)
return nil return nil

View File

@ -59,6 +59,10 @@ type dockerContainerHandler struct {
// Path to the libcontainer pid file. // Path to the libcontainer pid file.
libcontainerPidPath string libcontainerPidPath string
// Absolute path to the cgroup hierarchies of this container.
// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
cgroupPaths map[string]string
cgroup cgroups.Cgroup cgroup cgroups.Cgroup
usesAufsDriver bool usesAufsDriver bool
fsInfo fs.FsInfo fsInfo fs.FsInfo
@ -71,11 +75,19 @@ func newDockerContainerHandler(
machineInfoFactory info.MachineInfoFactory, machineInfoFactory info.MachineInfoFactory,
dockerRootDir string, dockerRootDir string,
usesAufsDriver bool, usesAufsDriver bool,
cgroupSubsystems *containerLibcontainer.CgroupSubsystems,
) (container.ContainerHandler, error) { ) (container.ContainerHandler, error) {
fsInfo, err := fs.NewFsInfo() fsInfo, err := fs.NewFsInfo()
if err != nil { if err != nil {
return nil, err return nil, err
} }
// Create the cgroup paths.
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
for key, val := range cgroupSubsystems.MountPoints {
cgroupPaths[key] = path.Join(val, name)
}
id := ContainerNameToDockerId(name) id := ContainerNameToDockerId(name)
handler := &dockerContainerHandler{ handler := &dockerContainerHandler{
id: id, id: id,
@ -85,6 +97,7 @@ func newDockerContainerHandler(
libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"), libcontainerConfigPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "container.json"),
libcontainerStatePath: path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"), libcontainerStatePath: path.Join(dockerRootDir, pathToLibcontainerState, id, "state.json"),
libcontainerPidPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"), libcontainerPidPath: path.Join(dockerRootDir, pathToLibcontainerState, id, "pid"),
cgroupPaths: cgroupPaths,
cgroup: cgroups.Cgroup{ cgroup: cgroups.Cgroup{
Parent: "/", Parent: "/",
Name: name, Name: name,
@ -159,6 +172,11 @@ func (self *dockerContainerHandler) readLibcontainerState() (state *libcontainer
} }
state = retState state = retState
// Create cgroup paths if they don't exist. This is since older Docker clients don't write it.
if len(state.CgroupPaths) == 0 {
state.CgroupPaths = self.cgroupPaths
}
return return
} }
@ -259,7 +277,7 @@ func (self *dockerContainerHandler) GetStats() (stats *info.ContainerStats, err
return return
} }
stats, err = containerLibcontainer.GetStats(&self.cgroup, state) stats, err = containerLibcontainer.GetStats(state)
if err != nil { if err != nil {
return return
} }

View File

@ -15,6 +15,7 @@
package libcontainer package libcontainer
import ( import (
"fmt"
"time" "time"
"github.com/docker/libcontainer" "github.com/docker/libcontainer"
@ -24,13 +25,60 @@ import (
"github.com/google/cadvisor/info" "github.com/google/cadvisor/info"
) )
type CgroupSubsystems struct {
// Cgroup subsystem mounts.
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
Mounts []cgroups.Mount
// Cgroup subsystem to their mount location.
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
MountPoints map[string]string
}
// Get information about the cgroup subsystems.
func GetCgroupSubsystems() (CgroupSubsystems, error) {
// Get all cgroup mounts.
allCgroups, err := cgroups.GetCgroupMounts()
if err != nil {
return CgroupSubsystems{}, err
}
if len(allCgroups) == 0 {
return CgroupSubsystems{}, fmt.Errorf("failed to find cgroup mounts")
}
// Trim the mounts to only the subsystems we care about.
supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups))
mountPoints := make(map[string]string, len(allCgroups))
for _, mount := range allCgroups {
for _, subsystem := range mount.Subsystems {
if _, ok := supportedSubsystems[subsystem]; ok {
supportedCgroups = append(supportedCgroups, mount)
mountPoints[subsystem] = mount.Mountpoint
}
}
}
return CgroupSubsystems{
Mounts: supportedCgroups,
MountPoints: mountPoints,
}, nil
}
// Cgroup subsystems we support listing (should be the minimal set we need stats from).
var supportedSubsystems map[string]struct{} = map[string]struct{}{
"cpu": {},
"cpuacct": {},
"memory": {},
"cpuset": {},
}
// Get stats of the specified container // Get stats of the specified container
func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.ContainerStats, error) { func GetStats(state *libcontainer.State) (*info.ContainerStats, error) {
// TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready. // TODO(vmarmol): Use libcontainer's Stats() in the new API when that is ready.
stats := &libcontainer.ContainerStats{} stats := &libcontainer.ContainerStats{}
var err error var err error
stats.CgroupStats, err = cgroupfs.GetStats(cgroup) stats.CgroupStats, err = cgroupfs.GetStats(state.CgroupPaths)
if err != nil { if err != nil {
return &info.ContainerStats{}, err return &info.ContainerStats{}, err
} }
@ -43,14 +91,6 @@ func GetStats(cgroup *cgroups.Cgroup, state *libcontainer.State) (*info.Containe
return toContainerStats(stats), nil return toContainerStats(stats), nil
} }
func GetStatsCgroupOnly(cgroup *cgroups.Cgroup) (*info.ContainerStats, error) {
s, err := cgroupfs.GetStats(cgroup)
if err != nil {
return nil, err
}
return toContainerStats(&libcontainer.ContainerStats{CgroupStats: s}), nil
}
func DiskStatsCopy(blkio_stats []cgroups.BlkioStatEntry) (stat []info.PerDiskStats) { func DiskStatsCopy(blkio_stats []cgroups.BlkioStatEntry) (stat []info.PerDiskStats) {
if len(blkio_stats) == 0 { if len(blkio_stats) == 0 {
return return

View File

@ -17,26 +17,18 @@ package raw
import ( import (
"fmt" "fmt"
"github.com/docker/libcontainer/cgroups"
"github.com/golang/glog" "github.com/golang/glog"
"github.com/google/cadvisor/container" "github.com/google/cadvisor/container"
"github.com/google/cadvisor/container/libcontainer"
"github.com/google/cadvisor/info" "github.com/google/cadvisor/info"
) )
type cgroupSubsystems struct {
// Cgroup subsystem mounts.
// e.g.: "/sys/fs/cgroup/cpu" -> ["cpu", "cpuacct"]
mounts []cgroups.Mount
// Cgroup subsystem to their mount location.
// e.g.: "cpu" -> "/sys/fs/cgroup/cpu"
mountPoints map[string]string
}
type rawFactory struct { type rawFactory struct {
// Factory for machine information. // Factory for machine information.
machineInfoFactory info.MachineInfoFactory machineInfoFactory info.MachineInfoFactory
cgroupSubsystems *cgroupSubsystems
// Information about the cgroup subsystems.
cgroupSubsystems *libcontainer.CgroupSubsystems
} }
func (self *rawFactory) String() string { func (self *rawFactory) String() string {
@ -53,46 +45,19 @@ func (self *rawFactory) CanHandle(name string) (bool, error) {
} }
func Register(machineInfoFactory info.MachineInfoFactory) error { func Register(machineInfoFactory info.MachineInfoFactory) error {
// Get all cgroup mounts. cgroupSubsystems, err := libcontainer.GetCgroupSubsystems()
allCgroups, err := cgroups.GetCgroupMounts()
if err != nil { if err != nil {
return err return fmt.Errorf("failed to get cgroup subsystems: %v", err)
} }
if len(allCgroups) == 0 { if len(cgroupSubsystems.Mounts) == 0 {
return fmt.Errorf("failed to find cgroup mounts for the raw factory")
}
// Trim the mounts to only the subsystems we care about.
supportedCgroups := make([]cgroups.Mount, 0, len(allCgroups))
mountPoints := make(map[string]string, len(allCgroups))
for _, mount := range allCgroups {
for _, subsystem := range mount.Subsystems {
if _, ok := supportedSubsystems[subsystem]; ok {
supportedCgroups = append(supportedCgroups, mount)
mountPoints[subsystem] = mount.Mountpoint
}
}
}
if len(supportedCgroups) == 0 {
return fmt.Errorf("failed to find supported cgroup mounts for the raw factory") return fmt.Errorf("failed to find supported cgroup mounts for the raw factory")
} }
glog.Infof("Registering Raw factory") glog.Infof("Registering Raw factory")
factory := &rawFactory{ factory := &rawFactory{
machineInfoFactory: machineInfoFactory, machineInfoFactory: machineInfoFactory,
cgroupSubsystems: &cgroupSubsystems{ cgroupSubsystems: &cgroupSubsystems,
mounts: supportedCgroups,
mountPoints: mountPoints,
},
} }
container.RegisterContainerHandlerFactory(factory) container.RegisterContainerHandlerFactory(factory)
return nil return nil
} }
// Cgroup subsystems we support listing (should be the minimal set we need stats from).
var supportedSubsystems map[string]struct{} = map[string]struct{}{
"cpu": {},
"cpuacct": {},
"memory": {},
"cpuset": {},
}

View File

@ -39,7 +39,7 @@ type rawContainerHandler struct {
// Name of the container for this handler. // Name of the container for this handler.
name string name string
cgroup *cgroups.Cgroup cgroup *cgroups.Cgroup
cgroupSubsystems *cgroupSubsystems cgroupSubsystems *libcontainer.CgroupSubsystems
machineInfoFactory info.MachineInfoFactory machineInfoFactory info.MachineInfoFactory
// Inotify event watcher. // Inotify event watcher.
@ -54,12 +54,16 @@ type rawContainerHandler struct {
// Cgroup paths being watchd for new subcontainers // Cgroup paths being watchd for new subcontainers
cgroupWatches map[string]struct{} cgroupWatches map[string]struct{}
// Absolute path to the cgroup hierarchies of this container.
// (e.g.: "cpu" -> "/sys/fs/cgroup/cpu/test")
cgroupPaths map[string]string
fsInfo fs.FsInfo fsInfo fs.FsInfo
networkInterface *networkInterface networkInterface *networkInterface
externalMounts []mount externalMounts []mount
} }
func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) { func newRawContainerHandler(name string, cgroupSubsystems *libcontainer.CgroupSubsystems, machineInfoFactory info.MachineInfoFactory) (container.ContainerHandler, error) {
fsInfo, err := fs.NewFsInfo() fsInfo, err := fs.NewFsInfo()
if err != nil { if err != nil {
return nil, err return nil, err
@ -77,6 +81,13 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac
break break
} }
} }
// Create the cgroup paths.
cgroupPaths := make(map[string]string, len(cgroupSubsystems.MountPoints))
for key, val := range cgroupSubsystems.MountPoints {
cgroupPaths[key] = path.Join(val, name)
}
return &rawContainerHandler{ return &rawContainerHandler{
name: name, name: name,
cgroup: &cgroups.Cgroup{ cgroup: &cgroups.Cgroup{
@ -88,6 +99,7 @@ func newRawContainerHandler(name string, cgroupSubsystems *cgroupSubsystems, mac
stopWatcher: make(chan error), stopWatcher: make(chan error),
watches: make(map[string]struct{}), watches: make(map[string]struct{}),
cgroupWatches: make(map[string]struct{}), cgroupWatches: make(map[string]struct{}),
cgroupPaths: cgroupPaths,
fsInfo: fsInfo, fsInfo: fsInfo,
networkInterface: networkInterface, networkInterface: networkInterface,
externalMounts: externalMounts, externalMounts: externalMounts,
@ -145,9 +157,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
} }
// CPU. // CPU.
cpuRoot, ok := self.cgroupSubsystems.mountPoints["cpu"] cpuRoot, ok := self.cgroupPaths["cpu"]
if ok { if ok {
cpuRoot = path.Join(cpuRoot, self.name)
if utils.FileExists(cpuRoot) { if utils.FileExists(cpuRoot) {
spec.HasCpu = true spec.HasCpu = true
spec.Cpu.Limit = readInt64(cpuRoot, "cpu.shares") spec.Cpu.Limit = readInt64(cpuRoot, "cpu.shares")
@ -156,9 +167,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
// Cpu Mask. // Cpu Mask.
// This will fail for non-unified hierarchies. We'll return the whole machine mask in that case. // This will fail for non-unified hierarchies. We'll return the whole machine mask in that case.
cpusetRoot, ok := self.cgroupSubsystems.mountPoints["cpuset"] cpusetRoot, ok := self.cgroupPaths["cpuset"]
if ok { if ok {
cpusetRoot = path.Join(cpusetRoot, self.name)
if utils.FileExists(cpusetRoot) { if utils.FileExists(cpusetRoot) {
spec.HasCpu = true spec.HasCpu = true
spec.Cpu.Mask = readString(cpusetRoot, "cpuset.cpus") spec.Cpu.Mask = readString(cpusetRoot, "cpuset.cpus")
@ -169,9 +179,8 @@ func (self *rawContainerHandler) GetSpec() (info.ContainerSpec, error) {
} }
// Memory. // Memory.
memoryRoot, ok := self.cgroupSubsystems.mountPoints["memory"] memoryRoot, ok := self.cgroupPaths["memory"]
if ok { if ok {
memoryRoot = path.Join(memoryRoot, self.name)
if utils.FileExists(memoryRoot) { if utils.FileExists(memoryRoot) {
spec.HasMemory = true spec.HasMemory = true
spec.Memory.Limit = readInt64(memoryRoot, "memory.limit_in_bytes") spec.Memory.Limit = readInt64(memoryRoot, "memory.limit_in_bytes")
@ -227,7 +236,10 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
} }
for _, fs := range filesystems { for _, fs := range filesystems {
stats.Filesystem = append(stats.Filesystem, stats.Filesystem = append(stats.Filesystem,
info.FsStats{fs.Device, fs.Capacity, fs.Capacity - fs.Free, info.FsStats{
fs.Device,
fs.Capacity,
fs.Capacity - fs.Free,
fs.DiskStats.ReadsCompleted, fs.DiskStats.ReadsCompleted,
fs.DiskStats.ReadsMerged, fs.DiskStats.ReadsMerged,
fs.DiskStats.SectorsRead, fs.DiskStats.SectorsRead,
@ -246,18 +258,20 @@ func (self *rawContainerHandler) getFsStats(stats *info.ContainerStats) error {
} }
func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) { func (self *rawContainerHandler) GetStats() (*info.ContainerStats, error) {
state := dockerlibcontainer.State{} // TODO(vmarmol): Don't re-create this every time.
state := dockerlibcontainer.State{
CgroupPaths: self.cgroupPaths,
}
if self.networkInterface != nil { if self.networkInterface != nil {
state = dockerlibcontainer.State{ state = dockerlibcontainer.State{
NetworkState: network.NetworkState{ NetworkState: network.NetworkState{
VethHost: self.networkInterface.VethHost, VethHost: self.networkInterface.VethHost,
VethChild: self.networkInterface.VethChild, VethChild: self.networkInterface.VethChild,
NsPath: "unknown",
}, },
} }
} }
stats, err := libcontainer.GetStats(self.cgroup, &state) stats, err := libcontainer.GetStats(&state)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -301,8 +315,8 @@ func listDirectories(dirpath string, parent string, recursive bool, output map[s
func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) { func (self *rawContainerHandler) ListContainers(listType container.ListType) ([]info.ContainerReference, error) {
containers := make(map[string]struct{}) containers := make(map[string]struct{})
for _, subsystem := range self.cgroupSubsystems.mounts { for _, cgroupPath := range self.cgroupPaths {
err := listDirectories(path.Join(subsystem.Mountpoint, self.name), self.name, listType == container.ListRecursive, containers) err := listDirectories(cgroupPath, self.name, listType == container.ListRecursive, containers)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -372,7 +386,7 @@ func (self *rawContainerHandler) processEvent(event *inotify.Event, events chan
// Derive the container name from the path name. // Derive the container name from the path name.
var containerName string var containerName string
for _, mount := range self.cgroupSubsystems.mounts { for _, mount := range self.cgroupSubsystems.Mounts {
mountLocation := path.Clean(mount.Mountpoint) + "/" mountLocation := path.Clean(mount.Mountpoint) + "/"
if strings.HasPrefix(event.Name, mountLocation) { if strings.HasPrefix(event.Name, mountLocation) {
containerName = event.Name[len(mountLocation)-1:] containerName = event.Name[len(mountLocation)-1:]
@ -437,8 +451,8 @@ func (self *rawContainerHandler) WatchSubcontainers(events chan container.Subcon
} }
// Watch this container (all its cgroups) and all subdirectories. // Watch this container (all its cgroups) and all subdirectories.
for _, mnt := range self.cgroupSubsystems.mounts { for _, cgroupPath := range self.cgroupPaths {
err := self.watchDirectory(path.Join(mnt.Mountpoint, self.name), self.name) err := self.watchDirectory(cgroupPath, self.name)
if err != nil { if err != nil {
return err return err
} }
@ -481,8 +495,8 @@ func (self *rawContainerHandler) StopWatchingSubcontainers() error {
func (self *rawContainerHandler) Exists() bool { func (self *rawContainerHandler) Exists() bool {
// If any cgroup exists, the container is still alive. // If any cgroup exists, the container is still alive.
for _, subsystem := range self.cgroupSubsystems.mounts { for _, cgroupPath := range self.cgroupPaths {
if utils.FileExists(path.Join(subsystem.Mountpoint, self.name)) { if utils.FileExists(cgroupPath) {
return true return true
} }
} }