diff --git a/src/azure-cli-core/azure/cli/core/tests/test_util.py b/src/azure-cli-core/azure/cli/core/tests/test_util.py index ae329ed65b7..a2c4de303e8 100644 --- a/src/azure-cli-core/azure/cli/core/tests/test_util.py +++ b/src/azure-cli-core/azure/cli/core/tests/test_util.py @@ -17,7 +17,7 @@ (get_file_json, truncate_text, shell_safe_json_parse, b64_to_hex, hash_string, random_string, open_page_in_browser, can_launch_browser, handle_exception, ConfiguredDefaultSetter, send_raw_request, should_disable_connection_verify, parse_proxy_resource_id, get_az_user_agent, get_az_rest_user_agent, - _get_parent_proc_name, is_wsl, run_cmd, run_az_cmd, roughly_parse_command) + _get_parent_proc_name, is_wsl, run_cmd, run_az_cmd, roughly_parse_command, sdk_no_wait) from azure.cli.core.mock import DummyCli @@ -463,6 +463,47 @@ def test_run_az_cmd(self): self.assertIsInstance(output.result, dict, "unexpected cmd execution result") self.assertIn("azure-cli-core", output.result, "unexpected cmd execution result") + @mock.patch('time.sleep', autospec=True) + def test_sdk_no_wait_retries_on_provisioning_bad_request(self, sleep_mock): + class MockHttpError(Exception): + status_code = 400 + message = 'Resource cannot be updated during provisioning' + + operation = mock.Mock(side_effect=[MockHttpError(), MockHttpError(), 'ok']) + result = sdk_no_wait(False, operation) + + self.assertEqual(result, 'ok') + self.assertEqual(operation.call_count, 3) + self.assertEqual(sleep_mock.call_count, 2) + + @mock.patch('time.sleep', autospec=True) + def test_sdk_no_wait_no_wait_does_not_retry(self, sleep_mock): + class MockHttpError(Exception): + status_code = 400 + message = 'Resource cannot be updated during provisioning' + + operation = mock.Mock(side_effect=MockHttpError()) + + with self.assertRaises(MockHttpError): + sdk_no_wait(True, operation) + + self.assertEqual(operation.call_count, 1) + self.assertEqual(sleep_mock.call_count, 0) + + @mock.patch('time.sleep', autospec=True) + def test_sdk_no_wait_non_matching_error_no_retry(self, sleep_mock): + class MockHttpError(Exception): + status_code = 400 + message = 'A different bad request' + + operation = mock.Mock(side_effect=MockHttpError()) + + with self.assertRaises(MockHttpError): + sdk_no_wait(False, operation) + + self.assertEqual(operation.call_count, 1) + self.assertEqual(sleep_mock.call_count, 0) + class TestBase64ToHex(unittest.TestCase): diff --git a/src/azure-cli-core/azure/cli/core/util.py b/src/azure-cli-core/azure/cli/core/util.py index bc572f3efe6..67a814d7b1b 100644 --- a/src/azure-cli-core/azure/cli/core/util.py +++ b/src/azure-cli-core/azure/cli/core/util.py @@ -10,6 +10,7 @@ import platform import re import sys +import time from knack.log import get_logger from knack.util import CLIError, to_snake_case, to_camel_case @@ -52,6 +53,16 @@ # - https://azcliprod.blob.core.windows.net/cli/vm/aliases.json (VM image aliases) AME_STORAGE_BASE_URL = "https://azcliprod.blob.core.windows.net/cli" +_PROVISIONING_RETRY_ATTEMPTS = 6 +_PROVISIONING_RETRY_INTERVAL_SECONDS = 10 +_PROVISIONING_RETRYABLE_ERROR = 'resource cannot be updated during provisioning' + + +def _is_provisioning_retryable_error(ex): + error_msg = getattr(ex, 'message', str(ex)) + status_code = getattr(ex, 'status_code', None) + return status_code == 400 and _PROVISIONING_RETRYABLE_ERROR in error_msg.lower() + def handle_exception(ex): # pylint: disable=too-many-locals, too-many-statements, too-many-branches # For error code, follow guidelines at https://docs.python.org/2/library/sys.html#sys.exit, @@ -790,7 +801,23 @@ def augment_no_wait_handler_args(no_wait_enabled, handler, handler_args): def sdk_no_wait(no_wait, func, *args, **kwargs): if no_wait: kwargs.update({'polling': False}) - return func(*args, **kwargs) + + try: + return func(*args, **kwargs) + except Exception as ex: # pylint: disable=broad-except + if no_wait or not _is_provisioning_retryable_error(ex): + raise + + # Attempt 1 failed with a retryable provisioning-state error, so retry attempts 2..N. + for attempt_number in range(2, _PROVISIONING_RETRY_ATTEMPTS + 1): + logger.warning("Resource is still provisioning. Retrying in %s seconds...", + _PROVISIONING_RETRY_INTERVAL_SECONDS) + time.sleep(_PROVISIONING_RETRY_INTERVAL_SECONDS) + try: + return func(*args, **kwargs) + except Exception as retry_ex: # pylint: disable=broad-except + if not _is_provisioning_retryable_error(retry_ex) or attempt_number >= _PROVISIONING_RETRY_ATTEMPTS: + raise def open_page_in_browser(url):