Compare commits

...

7 Commits

Author SHA1 Message Date
Brian Goff 56f2c07af8 Set init process as non-dumpable
Backports fix from 2f7393a473 to 1.10.x
Resolves CVE-2016-9962 for Docker 1.10.x.

Signed-off-by: Brian Goff <cpuguy83@gmail.com>
2017-01-11 09:14:52 -05:00
Michael Crosby 27dd48f691 Move the process outside of the systemd cgroup
If you don't move the process out of the named cgroup for systemd then
systemd will try to delete all the cgroups that the process is currently
in.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
2016-02-19 11:23:30 -08:00
Michael Crosby 3d8a20bb77 Do not use stream encoders
Marshall the raw objects for the sync pipes so that no new line chars
are left behind in the pipe causing errors.

Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
2016-01-26 14:52:00 -08:00
Aleksa Sarai f36b00aa12 cgroups: fs: fix cgroup.Parent path sanitisation
Properly sanitise the --cgroup-parent path, to avoid potential issues
(as it starts creating directories and writing to files as root). In
addition, fix an infinite recursion due to incomplete base cases.

It might be a good idea to move pathClean to a separate library (which
deals with path safety concerns, so all of runC and Docker can take
advantage of it).

Signed-off-by: Aleksa Sarai <asarai@suse.com>
2016-01-21 16:53:06 -08:00
Michael Crosby 47e3f834d7 Handle seccomp proc parsing errors
Signed-off-by: Michael Crosby <crosbymichael@gmail.com>
2016-01-19 15:27:54 -08:00
Jessica Frazelle eab6cdf77b add seccomp.IsEnabled() function
This is much like apparmor.IsEnabled() function and a nice helper.

Signed-off-by: Jessica Frazelle <acidburn@docker.com>
2016-01-19 15:27:45 -08:00
Qiang Huang bef07f4078 Embed Resources for backward compatibility
Fixes: docker/docker#19329

Signed-off-by: Qiang Huang <h.huangqiang@huawei.com>
2016-01-19 15:27:26 -08:00
14 changed files with 190 additions and 12 deletions

View File

@ -29,6 +29,7 @@ var (
&NetPrioGroup{},
&PerfEventGroup{},
&FreezerGroup{},
&NameGroup{GroupName: "name=systemd", Join: true},
}
CgroupProcesses = "cgroup.procs"
HugePageSizes, _ = cgroups.GetHugePageSize()
@ -230,12 +231,39 @@ func (m *Manager) GetPids() ([]int, error) {
return cgroups.GetPids(dir)
}
// pathClean makes a path safe for use with filepath.Join. This is done by not
// only cleaning the path, but also (if the path is relative) adding a leading
// '/' and cleaning it (then removing the leading '/'). This ensures that a
// path resulting from prepending another path will always resolve to lexically
// be a subdirectory of the prefixed path. This is all done lexically, so paths
// that include symlinks won't be safe as a result of using pathClean.
func pathClean(path string) string {
// Ensure that all paths are cleaned (especially problematic ones like
// "/../../../../../" which can cause lots of issues).
path = filepath.Clean(path)
// If the path isn't absolute, we need to do more processing to fix paths
// such as "../../../../<etc>/some/path". We also shouldn't convert absolute
// paths to relative ones.
if !filepath.IsAbs(path) {
path = filepath.Clean(string(os.PathSeparator) + path)
// This can't fail, as (by definition) all paths are relative to root.
path, _ = filepath.Rel(string(os.PathSeparator), path)
}
// Clean the path again for good measure.
return filepath.Clean(path)
}
func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) {
root, err := getCgroupRoot()
if err != nil {
return nil, err
}
// Clean the parent slice path.
c.Parent = pathClean(c.Parent)
return &cgroupData{
root: root,
parent: c.Parent,

View File

@ -4,6 +4,7 @@ package fs
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"path/filepath"
@ -95,6 +96,10 @@ func (s *CpusetGroup) ensureParent(current, root string) error {
if filepath.Clean(parent) == root {
return nil
}
// Avoid infinite recursion.
if parent == current {
return fmt.Errorf("cpuset: cgroup parent path outside cgroup root")
}
if err := s.ensureParent(parent, root); err != nil {
return err
}

View File

@ -9,6 +9,7 @@ import (
type NameGroup struct {
GroupName string
Join bool
}
func (s *NameGroup) Name() string {
@ -16,6 +17,10 @@ func (s *NameGroup) Name() string {
}
func (s *NameGroup) Apply(d *cgroupData) error {
if s.Join {
// ignore errors if the named cgroup does not exist
d.join(s.GroupName)
}
return nil
}
@ -24,6 +29,9 @@ func (s *NameGroup) Set(path string, cgroup *configs.Cgroup) error {
}
func (s *NameGroup) Remove(d *cgroupData) error {
if s.Join {
removePath(d.path(s.GroupName))
}
return nil
}

View File

@ -20,7 +20,7 @@ type Cgroup struct {
ScopePrefix string `json:"scope_prefix"`
// Resources contains various cgroups settings to apply
Resources *Resources `json:"resources"`
*Resources
}
type Resources struct {

View File

@ -21,6 +21,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/criurpc"
"github.com/opencontainers/runc/libcontainer/utils"
"github.com/vishvananda/netlink/nl"
)
@ -968,7 +969,7 @@ func (c *linuxContainer) updateState(process parentProcess) error {
}
defer f.Close()
os.Remove(filepath.Join(c.root, "checkpoint"))
return json.NewEncoder(f).Encode(state)
return utils.WriteJSON(f, state)
}
func (c *linuxContainer) currentStatus() (Status, error) {

View File

@ -5,7 +5,6 @@ package libcontainer
import (
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
@ -19,6 +18,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups/systemd"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/configs/validate"
"github.com/opencontainers/runc/libcontainer/utils"
)
const (
@ -226,10 +226,7 @@ func (l *LinuxFactory) StartInitialization() (err error) {
// if we have an error during the initialization of the container's init then send it back to the
// parent process in the form of an initError.
if err != nil {
// ensure that any data sent from the parent is consumed so it doesn't
// receive ECONNRESET when the child writes to the pipe.
ioutil.ReadAll(pipe)
if err := json.NewEncoder(pipe).Encode(newSystemError(err)); err != nil {
if err := utils.WriteJSON(pipe, newSystemError(err)); err != nil {
panic(err)
}
}

View File

@ -3,7 +3,6 @@
package libcontainer
import (
"encoding/json"
"io/ioutil"
"os"
"path/filepath"
@ -12,6 +11,7 @@ import (
"github.com/docker/docker/pkg/mount"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/utils"
)
func newTestRoot() (string, error) {
@ -179,5 +179,5 @@ func marshal(path string, v interface{}) error {
return err
}
defer f.Close()
return json.NewEncoder(f).Encode(v)
return utils.WriteJSON(f, v)
}

View File

@ -11,6 +11,7 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/prctl.h>
#include <fcntl.h>
#include <signal.h>
#include <setjmp.h>
@ -110,6 +111,12 @@ void nsexec()
exit(1);
}
/* make the process non-dumpable */
if (prctl(PR_SET_DUMPABLE, 0, 0, 0, 0) != 0) {
pr_perror("failed to set process as non-dumpable");
exit(1);
}
char nlbuf[NLMSG_HDRLEN];
struct nlmsghdr *nh;
if ((n = read(pipenum, nlbuf, NLMSG_HDRLEN)) != NLMSG_HDRLEN) {

View File

@ -15,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
)
type parentProcess interface {
@ -83,7 +84,7 @@ func (p *setnsProcess) start() (err error) {
return newSystemError(err)
}
}
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return newSystemError(err)
}
if err := syscall.Shutdown(int(p.parentPipe.Fd()), syscall.SHUT_WR); err != nil {
@ -270,7 +271,7 @@ func (p *initProcess) startTime() (string, error) {
func (p *initProcess) sendConfig() error {
// send the state to the container's init process then shutdown writes for the parent
if err := json.NewEncoder(p.parentPipe).Encode(p.config); err != nil {
if err := utils.WriteJSON(p.parentPipe, p.config); err != nil {
return err
}
// shutdown writes for the parent side of the pipe

View File

@ -0,0 +1,47 @@
Name: cat
State: R (running)
Tgid: 19383
Ngid: 0
Pid: 19383
PPid: 19275
TracerPid: 0
Uid: 1000 1000 1000 1000
Gid: 1000 1000 1000 1000
FDSize: 256
Groups: 24 25 27 29 30 44 46 102 104 108 111 1000 1001
NStgid: 19383
NSpid: 19383
NSpgid: 19383
NSsid: 19275
VmPeak: 5944 kB
VmSize: 5944 kB
VmLck: 0 kB
VmPin: 0 kB
VmHWM: 744 kB
VmRSS: 744 kB
VmData: 324 kB
VmStk: 136 kB
VmExe: 48 kB
VmLib: 1776 kB
VmPTE: 32 kB
VmPMD: 12 kB
VmSwap: 0 kB
Threads: 1
SigQ: 0/30067
SigPnd: 0000000000000000
ShdPnd: 0000000000000000
SigBlk: 0000000000000000
SigIgn: 0000000000000080
SigCgt: 0000000000000000
CapInh: 0000000000000000
CapPrm: 0000000000000000
CapEff: 0000000000000000
CapBnd: 0000003fffffffff
CapAmb: 0000000000000000
Seccomp: 0
Cpus_allowed: f
Cpus_allowed_list: 0-3
Mems_allowed: 00000000,00000001
Mems_allowed_list: 0
voluntary_ctxt_switches: 0
nonvoluntary_ctxt_switches: 1

View File

@ -3,8 +3,11 @@
package seccomp
import (
"bufio"
"fmt"
"log"
"os"
"strings"
"syscall"
"github.com/opencontainers/runc/libcontainer/configs"
@ -17,6 +20,9 @@ var (
actKill = libseccomp.ActKill
actTrace = libseccomp.ActTrace.SetReturnCode(int16(syscall.EPERM))
actErrno = libseccomp.ActErrno.SetReturnCode(int16(syscall.EPERM))
// SeccompModeFilter refers to the syscall argument SECCOMP_MODE_FILTER.
SeccompModeFilter = uintptr(2)
)
// Filters given syscalls in a container, preventing them from being used
@ -73,6 +79,24 @@ func InitSeccomp(config *configs.Seccomp) error {
return nil
}
// IsEnabled returns if the kernel has been configured to support seccomp.
func IsEnabled() bool {
// Try to read from /proc/self/status for kernels > 3.8
s, err := parseStatusFile("/proc/self/status")
if err != nil {
// Check if Seccomp is supported, via CONFIG_SECCOMP.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_GET_SECCOMP, 0, 0); err != syscall.EINVAL {
// Make sure the kernel has CONFIG_SECCOMP_FILTER.
if _, _, err := syscall.RawSyscall(syscall.SYS_PRCTL, syscall.PR_SET_SECCOMP, SeccompModeFilter, 0); err != syscall.EINVAL {
return true
}
}
return false
}
_, ok := s["Seccomp"]
return ok
}
// Convert Libcontainer Action to Libseccomp ScmpAction
func getAction(act configs.Action) (libseccomp.ScmpAction, error) {
switch act {
@ -178,3 +202,30 @@ func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error {
return nil
}
func parseStatusFile(path string) (map[string]string, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
s := bufio.NewScanner(f)
status := make(map[string]string)
for s.Scan() {
if err := s.Err(); err != nil {
return nil, err
}
text := s.Text()
parts := strings.Split(text, ":")
if len(parts) <= 1 {
continue
}
status[parts[0]] = parts[1]
}
return status, nil
}

View File

@ -0,0 +1,17 @@
// +build linux,cgo,seccomp
package seccomp
import "testing"
func TestParseStatusFile(t *testing.T) {
s, err := parseStatusFile("fixtures/proc_self_status")
if err != nil {
t.Fatal(err)
}
if _, ok := s["Seccomp"]; !ok {
t.Fatal("expected to find 'Seccomp' in the map but did not.")
}
}

View File

@ -17,3 +17,8 @@ func InitSeccomp(config *configs.Seccomp) error {
}
return nil
}
// IsEnabled returns false, because it is not supported.
func IsEnabled() bool {
return false
}

View File

@ -3,6 +3,7 @@ package utils
import (
"crypto/rand"
"encoding/hex"
"encoding/json"
"io"
"path/filepath"
"syscall"
@ -36,10 +37,20 @@ func ResolveRootfs(uncleanRootfs string) (string, error) {
}
// ExitStatus returns the correct exit status for a process based on if it
// was signaled or exited cleanly.
// was signaled or exited cleanly
func ExitStatus(status syscall.WaitStatus) int {
if status.Signaled() {
return exitSignalOffset + int(status.Signal())
}
return status.ExitStatus()
}
// WriteJSON writes the provided struct v to w using standard json marshaling
func WriteJSON(w io.Writer, v interface{}) error {
data, err := json.Marshal(v)
if err != nil {
return err
}
_, err = w.Write(data)
return err
}