Fix autostart for swarm scope connected containers

The swarm scope network connected containers with autostart enabled
there was a dependency problem with the cluster to be initialized before
we can autostart them. With the current container restart code happening
before cluster init, these containers were not getting autostarted
properly. Added a fix to delay the container start of those containers
which has atleast one swarm scope endpoint to until after the cluster is
initialized.

Signed-off-by: Jana Radhakrishnan <mrjana@docker.com>
This commit is contained in:
Jana Radhakrishnan 2016-09-09 09:55:57 -07:00
parent cf58eb437c
commit c9fb551d60
6 changed files with 92 additions and 10 deletions

View File

@ -262,6 +262,11 @@ func (cli *DaemonCli) start(opts daemonOptions) (err error) {
logrus.Fatalf("Error creating cluster component: %v", err)
}
// Restart all autostart containers which has a swarm endpoint
// and is not yet running now that we have successfully
// initialized the cluster.
d.RestartSwarmContainers()
logrus.Info("Daemon has completed initialization")
logrus.WithFields(logrus.Fields{

View File

@ -135,10 +135,11 @@ type Cluster struct {
// helps in identifying the attachment ID via the taskID and the
// corresponding attachment configuration obtained from the manager.
type attacher struct {
taskID string
config *network.NetworkingConfig
attachWaitCh chan *network.NetworkingConfig
detachWaitCh chan struct{}
taskID string
config *network.NetworkingConfig
attachWaitCh chan *network.NetworkingConfig
attachCompleteCh chan struct{}
detachWaitCh chan struct{}
}
type node struct {
@ -1262,12 +1263,24 @@ func (c *Cluster) WaitForDetachment(ctx context.Context, networkName, networkID,
agent := c.node.Agent()
c.RUnlock()
if ok && attacher != nil && attacher.detachWaitCh != nil {
if ok && attacher != nil &&
attacher.detachWaitCh != nil &&
attacher.attachCompleteCh != nil {
// Attachment may be in progress still so wait for
// attachment to complete.
select {
case <-attacher.detachWaitCh:
case <-attacher.attachCompleteCh:
case <-ctx.Done():
return ctx.Err()
}
if attacher.taskID == taskID {
select {
case <-attacher.detachWaitCh:
case <-ctx.Done():
return ctx.Err()
}
}
}
return agent.ResourceAllocator().DetachNetwork(ctx, taskID)
@ -1289,9 +1302,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
agent := c.node.Agent()
attachWaitCh := make(chan *network.NetworkingConfig)
detachWaitCh := make(chan struct{})
attachCompleteCh := make(chan struct{})
c.attachers[aKey] = &attacher{
attachWaitCh: attachWaitCh,
detachWaitCh: detachWaitCh,
attachWaitCh: attachWaitCh,
attachCompleteCh: attachCompleteCh,
detachWaitCh: detachWaitCh,
}
c.Unlock()
@ -1306,6 +1321,11 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
return nil, fmt.Errorf("Could not attach to network %s: %v", target, err)
}
c.Lock()
c.attachers[aKey].taskID = taskID
close(attachCompleteCh)
c.Unlock()
logrus.Debugf("Successfully attached to network %s with tid %s", target, taskID)
var config *network.NetworkingConfig
@ -1316,7 +1336,6 @@ func (c *Cluster) AttachNetwork(target string, containerID string, addresses []s
}
c.Lock()
c.attachers[aKey].taskID = taskID
c.attachers[aKey].config = config
c.Unlock()
return config, nil

View File

@ -384,6 +384,9 @@ func (daemon *Daemon) findAndAttachNetwork(container *container.Container, idOrN
return nil, nil, err
}
// This container has attachment to a swarm scope
// network. Update the container network settings accordingly.
container.NetworkSettings.HasSwarmEndpoint = true
return n, config, nil
}
@ -492,6 +495,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
// on first network connecting.
defaultNetName := runconfig.DefaultDaemonNetworkMode().NetworkName()
if nConf, ok := container.NetworkSettings.Networks[defaultNetName]; ok {
cleanOperationalData(nConf)
if err := daemon.connectToNetwork(container, defaultNetName, nConf.EndpointSettings, updateSettings); err != nil {
return err
}
@ -512,6 +516,7 @@ func (daemon *Daemon) allocateNetwork(container *container.Container) error {
}
for i, epConf := range epConfigs {
cleanOperationalData(epConf)
if err := daemon.connectToNetwork(container, networks[i], epConf.EndpointSettings, updateSettings); err != nil {
return err
}

View File

@ -202,7 +202,13 @@ func (daemon *Daemon) restore() error {
// fixme: only if not running
// get list of containers we need to restart
if !c.IsRunning() && !c.IsPaused() {
if daemon.configStore.AutoRestart && c.ShouldRestart() {
// Do not autostart containers which
// has endpoints in a swarm scope
// network yet since the cluster is
// not initialized yet. We will start
// it after the cluster is
// initialized.
if daemon.configStore.AutoRestart && c.ShouldRestart() && !c.NetworkSettings.HasSwarmEndpoint {
mapLock.Lock()
restartContainers[c] = make(chan struct{})
mapLock.Unlock()
@ -346,6 +352,30 @@ func (daemon *Daemon) restore() error {
return nil
}
// RestartSwarmContainers restarts any autostart container which has a
// swarm endpoint.
func (daemon *Daemon) RestartSwarmContainers() {
group := sync.WaitGroup{}
for _, c := range daemon.List() {
if !c.IsRunning() && !c.IsPaused() {
// Autostart all the containers which has a
// swarm endpoint now that the cluster is
// initialized.
if daemon.configStore.AutoRestart && c.ShouldRestart() && c.NetworkSettings.HasSwarmEndpoint {
group.Add(1)
go func(c *container.Container) {
defer group.Done()
if err := daemon.containerStart(c, ""); err != nil {
logrus.Error(err)
}
}(c)
}
}
}
group.Wait()
}
// waitForNetworks is used during daemon initialization when starting up containers
// It ensures that all of a container's networks are available before the daemon tries to start the container.
// In practice it just makes sure the discovery service is available for containers which use a network that require discovery.

View File

@ -21,6 +21,7 @@ type Settings struct {
SecondaryIPAddresses []networktypes.Address
SecondaryIPv6Addresses []networktypes.Address
IsAnonymousEndpoint bool
HasSwarmEndpoint bool
}
// EndpointSettings is a package local wrapper for

View File

@ -242,3 +242,25 @@ func (s *DockerSwarmSuite) TestSwarmServiceWithGroup(c *check.C) {
c.Assert(err, checker.IsNil)
c.Assert(strings.TrimSpace(out), checker.Equals, "uid=0(root) gid=0(root) groups=10(wheel),29(audio),50(staff),777")
}
func (s *DockerSwarmSuite) TestSwarmContainerAutoStart(c *check.C) {
d := s.AddDaemon(c, true, true)
out, err := d.Cmd("network", "create", "--attachable", "-d", "overlay", "foo")
c.Assert(err, checker.IsNil)
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
out, err = d.Cmd("run", "-id", "--restart=always", "--net=foo", "--name=test", "busybox", "top")
c.Assert(err, checker.IsNil)
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
out, err = d.Cmd("ps", "-q")
c.Assert(err, checker.IsNil)
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
d.Restart()
out, err = d.Cmd("ps", "-q")
c.Assert(err, checker.IsNil)
c.Assert(strings.TrimSpace(out), checker.Not(checker.Equals), "")
}