Pipelines: retry service job on system errors (#26508)

Retry rebuild-index, cleanup, and no-op jobs automatically if they fail due to infrastructure-related problems.
2021-10-07 08:59:51 -06:00
parent b33a0923e1
commit 0561af1975
1 changed files with 12 additions and 0 deletions
--- a/lib/spack/spack/ci.py
+++ b/lib/spack/spack/ci.py
@@ -1004,6 +1004,14 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file,
        'after_script',
    ]

+    service_job_retries = {
+        'max': 2,
+        'when': [
+            'runner_system_failure',
+            'stuck_or_timeout_failure'
+        ]
+    }
+
    if job_id > 0:
        if temp_storage_url_prefix:
            # There were some rebuild jobs scheduled, so we will need to
@@ -1023,6 +1031,7 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file,
                    temp_storage_url_prefix)
            ]
            cleanup_job['when'] = 'always'
+            cleanup_job['retry'] = service_job_retries

            output_object['cleanup'] = cleanup_job

@@ -1046,6 +1055,7 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file,
                    index_target_mirror)
            ]
            final_job['when'] = 'always'
+            final_job['retry'] = service_job_retries

            output_object['rebuild-index'] = final_job

@@ -1109,6 +1119,8 @@ def generate_gitlab_ci_yaml(env, print_summary, output_file,
                'echo "All specs already up to date, nothing to rebuild."',
            ]

+        noop_job['retry'] = service_job_retries
+
        sorted_output = {'no-specs-to-rebuild': noop_job}

    if known_broken_specs_encountered: