Merge pull request #5 from jyrmyx/master

Fix forever-loop of already deleted slaves.
This commit is contained in:
Suresh Nallamilli 2015-01-21 14:30:36 -08:00
commit e171f8ecf5
7 changed files with 29 additions and 15 deletions

View File

@ -213,13 +213,13 @@ public class AzureCloud extends Cloud {
AzureComputer azureComputer = (AzureComputer)slaveComputer;
AzureSlave slaveNode = azureComputer.getNode();
LOGGER.info("Azure Cloud: provision: slave node"+slaveNode.getLabelString());
LOGGER.info("Azure Cloud: provision: slave template"+slaveTemplate.getLabels());
LOGGER.info("Azure Cloud: provision: slave node "+slaveNode.getLabelString());
LOGGER.info("Azure Cloud: provision: slave template "+slaveTemplate.getLabels());
if (!slaveNode.isDeleteSlave() && slaveNode.getLabelString().equalsIgnoreCase(slaveTemplate.getLabels())) {
try {
if(AzureManagementServiceDelegate.isVirtualMachineExists(slaveNode)) {
LOGGER.info("Found existing node , starting VM "+slaveNode.getNodeName());
LOGGER.info("Found existing node, starting VM "+slaveNode.getNodeName());
AzureManagementServiceDelegate.startVirtualMachine(slaveNode);
// set virtual machine details again
Thread.sleep(30 * 1000); // wait for 30 seconds

View File

@ -67,6 +67,7 @@ public class AzureCloudRetensionStrategy extends RetentionStrategy<AzureComputer
java.util.concurrent.Callable<Void> task = new java.util.concurrent.Callable<Void>() {
public Void call() throws Exception {
LOGGER.info("AzureCloudRetensionStrategy: going to idleTimeout slave: "+slaveNode.getName());
slaveNode.getNode().idleTimeout();
return null;
}
@ -74,9 +75,17 @@ public class AzureCloudRetensionStrategy extends RetentionStrategy<AzureComputer
try {
ExecutionEngine.executeWithRetry(task, new LinearRetryForAllExceptions(30 /*maxRetries*/, 30/*waitinterval*/, 30 * 60/*timeout*/));
} catch (AzureCloudException e) {
} catch (AzureCloudException ae) {
LOGGER.info("AzureCloudRetensionStrategy: check: could not terminate or shutdown "+slaveNode.getName());
}
} catch (Exception e) {
LOGGER.info("AzureCloudRetensionStrategy: execute: Exception occured while calling timeout on node, \n"
+ "Error code "+e.getMessage());
// We won't get exception for RNF , so for other exception types we can retry
if (e.getMessage().contains("not found in the currently deployed service")) {
LOGGER.info("AzureCloudRetensionStrategy: execute: Slave does not exist in the subscription anymore, setting shutdownOnIdle to True");
slaveNode.getNode().setShutdownOnIdle(true);
}
}
}
}
}

View File

@ -263,7 +263,7 @@ public class AzureManagementServiceDelegate {
// Throw exception so that in retry this will go through
throw new AzureCloudException("Provisioning Failure: Exception occured while creating virtual machine. Root cause: "+ex.getMessage());
} else {
LOGGER.info("AzureManagementServiceDelegate: handleProvisioningServiceException: conflict error: waiting for a minute ad will try again");
LOGGER.info("AzureManagementServiceDelegate: handleProvisioningServiceException: conflict error: waiting for a minute and will try again");
try {
Thread.sleep(60 * 1000);
} catch (InterruptedException e) {
@ -1341,7 +1341,7 @@ public class AzureManagementServiceDelegate {
client.getVirtualMachinesOperations().start(slave.getCloudServiceName(), slave.getDeploymentName(), slave.getNodeName());
successful = true; // may be we can just return
} catch (Exception e) {
LOGGER.info("AzureManagementServiceDelegate: startVirtualMachine: got exception while starting VM "+ slave.getNodeName()+ " will be retryig again");
LOGGER.info("AzureManagementServiceDelegate: startVirtualMachine: got exception while starting VM "+ slave.getNodeName()+ ". Will retry again after 30 seconds. Current retry count "+retryCount + " / " + Constants.MAX_PROV_RETRIES + "\n");
if (retryCount > Constants.MAX_PROV_RETRIES) {
throw e;
} else {

View File

@ -41,7 +41,7 @@ public final class AzureSlaveCleanUpTask extends AsyncPeriodicWork {
try {
if (azureComputer.isOffline()) {
if (!slaveNode.isDeleteSlave()) {
// Find out if node exists in azure , if not continue with delete else do not delete node
// Find out if node exists in azure, if not continue with delete else do not delete node
// although it is offline. May be JNLP or SSH launch is in progress
if(AzureManagementServiceDelegate.isVirtualMachineExists(slaveNode)) {
LOGGER.info("AzureSlaveCleanUpTask: execute: VM "+slaveNode.getDisplayName()+" exists in cloud");
@ -59,10 +59,15 @@ public final class AzureSlaveCleanUpTask extends AsyncPeriodicWork {
successful = true;
} catch (Exception e) {
retryCount++;
LOGGER.info("AzureSlaveCleanUpTask: execute: Exception occured while calling timeout on node , \n"
+ "Will retry again after 30 seconds. Current retry count "+retryCount + "\n"
LOGGER.info("AzureSlaveCleanUpTask: execute: Exception occured while calling timeout on node, \n"
+ "Will retry again after 30 seconds. Current retry count "+retryCount + " / 30\n"
+ "Error code "+e.getMessage());
// We won't get exception for RNF , so for other exception types we can retry
if (e.getMessage().contains("not found in the currently deployed service")) {
LOGGER.info("AzureSlaveCleanUpTask: execute: Slave does not exist in the subscription anymore, setting shutdownOnIdle to True");
slaveNode.setShutdownOnIdle(true);
break;
}
try {
Thread.sleep(30 * 1000);
} catch (InterruptedException e1) {

View File

@ -329,10 +329,10 @@ public class AzureSlaveTemplate implements Describable<AzureSlaveTemplate> {
Thread.sleep(5 * 60 * 1000);
} catch (InterruptedException e) {}
} else {
// Failure might be during Provisioning or post provisioning. back off for 10 minutes before retry.
LOGGER.info("AzureSlaveTemplate: handleTemplateStatus: Got "+failureStep+" error, waiting for 10 minutes before retry");
// Failure might be during Provisioning or post provisioning. back off for 5 minutes before retry.
LOGGER.info("AzureSlaveTemplate: handleTemplateStatus: Got "+failureStep+" error, waiting for 5 minutes before retry");
try {
Thread.sleep(10 * 60 * 1000);
Thread.sleep(5 * 60 * 1000);
} catch (InterruptedException e) {}
}

View File

@ -31,7 +31,7 @@ public class AzureUtil {
public static final String VAL_SPECIAL_CHAR_REGEX = "(?=.*[!@#$%^&*.]).{1,}";
public static final String VAL_PASSWORD_REGEX = "([0-9a-zA-Z!@#\\$%\\^&\\*\\.]*{8,123})";
public static final String VAL_ADMIN_USERNAME = "([a-zA-Z0-9_-]{6,15})";
public static final String VAL_ADMIN_USERNAME = "([a-zA-Z0-9_-]{3,15})";
// Although ugly to maintain this is best way for now.

View File

@ -5,7 +5,7 @@ Azure_GC_InitScript_Warn_Msg=Ensure image is pre-configured with a Java runtime
Azure_GC_LaunchMethod_Warn_Msg=Make sure the Azure slave can reach the master via the Jenkins URL. Refer to the help for details.
Azure_GC_TemplateStatus_Warn_Msg=The template is marked as disabled. Check the template status details in the Advanced section.
Azure_GC_UserName_Err=Not a valid user name. The user name must contain between 6 and 15 characters: alphanumerics, the underscore or the hyphen.
Azure_GC_UserName_Err=Not a valid user name. The user name must contain between 3 and 15 characters: alphanumerics, the underscore or the hyphen.
Azure_GC_Password_Err=Required: Not a valid password. Refer to the password rules in the help.
Azure_GC_JVM_Option_Err=Error: Not a valid JVM Option. JVM options should start with a hyphen(-). e.g. -Xmx1500m
Azure_GC_Template_Error_List=The following errors occurred while validating the template.