Skip to content

Commit c62528e

Browse files
committed
WIP - verify GH action tag/SHA combinations
This change introduces a new function `verify_actions` to validate the contents against GitHub. TL;DR The function verifies that the SHAs specified in `actions.yml` exist in the GH repo. Also ensures that the SHA exists on the Git tag, if the `tag` attribute is specified. The rest of the (currently spaghetti code) function is a lot of output and error(failure) and warning collection. Although it issues quite a few GH API requests, the rate limiter should not kick in (with an authenticated GH token). I opted to rely on the HTTP/1.1 `urllib.request` stuff, which has no connection-reuse. The alternative would have been to add a dependency. The algorithm roughly works like this, for each action specified in `actions.yml`: * Issue a warning and stop, if the name is like `OWNER/*` ("wildcard" repository). Can't verify Git SHAs in this case. * Issue a warning and stop, if the name is like `docker:*` (not implemented) * Issue an error and stop, if the name doesn't start with an `OWNER/REPO` pattern. * Each expired entry is just skipped * If there is a wildcard reference and a SHA reference, issue an error. Then, for each reference for an action: * If no `tag` is specified, let GH resolve the commit SHA. Emit a warning to add the value of the `tag` attribute, if the SHA can be resolved. Otherwise, emit an error. * If `tag` is specified: * Add the SHA to the set of requested-shas-by-tag * Call GH's "matching-refs" endpoint for the 'tag' value * Emit en error, if the object type is not a tag or commit. * Also resolve 'tag' object types to 'commit' object types. * Add each returned SHA to the set of valid-shas-by-tag. * For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
1 parent ba8f12a commit c62528e

File tree

6 files changed

+501
-1
lines changed

6 files changed

+501
-1
lines changed

.github/workflows/update_actions.yml

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ jobs:
3232
- run: pip install ruyaml
3333

3434
- name: Update actions.yml
35-
shell: python
35+
shell: python
36+
env:
37+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3638
run: |
3739
import sys
3840
sys.path.append("./gateway/")
@@ -41,6 +43,11 @@ jobs:
4143
g.update_actions(".github/workflows/dummy.yml", "actions.yml")
4244
g.update_patterns("approved_patterns.yml", "actions.yml")
4345
46+
import action_tags as at
47+
result = at.verify_actions("actions.yml")
48+
if result.has_failures():
49+
raise Exception(f"Verify actions result summary:\n{result}")
50+
4451
- name: Commit and push changes
4552
if: ${{ github.event_name != 'pull_request' }}
4653
run: |

CONTRIBUTING.md

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
2+
# Updating code in `gateway/`
3+
4+
## Prerequisites
5+
6+
1. Python 3.13+
7+
2. `pipx install uv`
8+
9+
## Running tests
10+
11+
`uvx --with ruyaml pytest`
12+
13+
To print stdout/stderr to the console when running pytest:
14+
15+
`uvx --with ruyaml pytest -s`

actions.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,9 @@ scacap/action-surefire-report:
627627
keep: true
628628
5609ce4db72c09db044803b344a8968fd1f315da:
629629
tag: v1.9.1
630+
# GH API requests from GH hosted runners fail with 403 and the following error message:
631+
# 'Although you appear to have the correct authorization credentials, the `ScaCap` organization has an IP allow list enabled, and your IP address is not permitted to access this resource.'
632+
ignore_gh_api_errors: true
630633
scala-steward-org/scala-steward-action:
631634
53d486a68877f4a6d1e110e8058fe21e593db356:
632635
tag: v2.77.0

gateway/action_tags.py

Lines changed: 304 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
# /// script
2+
# requires-python = ">=3.13"
3+
# dependencies = [
4+
# "ruyaml",
5+
# ]
6+
# ///
7+
8+
import os
9+
import re
10+
from urllib.error import HTTPError
11+
12+
import ruyaml
13+
14+
from datetime import date
15+
from urllib.request import Request, urlopen
16+
from pathlib import Path
17+
from ruyaml import CommentedMap, CommentedSeq
18+
from gateway import ActionsYAML, load_yaml, on_gha
19+
20+
re_github_actions_repo_wildcard = r"^[A-Za-z0-9-_.]+/[*]$"
21+
re_github_actions_repo = r"^([A-Za-z0-9-_.]+/[A-Za-z0-9-_.]+)(/.+)?$"
22+
# Something like 'pytooling/actions/with-post-step' or 'readthedocs/actions/preview'.
23+
re_docker_image = r"^docker://.+"
24+
re_git_sha = r"^[a-f0-9]{7,}$"
25+
26+
class ActionTagsCheckResult(object):
27+
def __init__(self, log_to_console: bool = True):
28+
self.log_to_console = log_to_console
29+
self.logs = []
30+
self.failures = []
31+
self.warnings = []
32+
33+
def log(self, message: str) -> None:
34+
if self.log_to_console:
35+
print(message)
36+
self.logs.append(message)
37+
38+
def failure(self, message: str, indent: str) -> None:
39+
self.log(f"{indent}{message}")
40+
self.failures.append(message)
41+
42+
def warning(self, message: str, indent: str) -> None:
43+
self.log(f"{indent}{message}")
44+
self.warnings.append(message)
45+
46+
def has_failures(self) -> bool:
47+
return len(self.failures) > 0
48+
49+
def has_warnings(self) -> bool:
50+
return len(self.warnings) > 0
51+
52+
def __str__(self):
53+
return (
54+
''.join([f"FAILURE: {failure}\n" for failure in self.failures])
55+
+ ''.join([f"WARNING: {warning}\n" for warning in self.warnings]))
56+
57+
58+
class ApiResponse(object):
59+
def __init__(self, req_url: str, status: int, reason: str, headers: dict[str, str], body: str):
60+
self.req_url = req_url
61+
self.status = status
62+
self.reason = reason
63+
self.headers = headers
64+
self.body = body
65+
66+
67+
def _gh_api_get(url_abspath: str) -> ApiResponse:
68+
headers: dict[str, str] = {
69+
'Accept': 'application/vnd.github.v3+json',
70+
}
71+
# Use GH_TOKEN, if available.
72+
# Unauthorized GH API requests are quite rate-limited.
73+
# Tip: add an extra space before 'export' to prevent adding the line to the shell history.
74+
# export GH_TOKEN=$(gh auth token)
75+
gh_token = os.environ['GH_TOKEN']
76+
if gh_token:
77+
headers['Authorization'] = f"Bearer {gh_token}"
78+
req_url = f"https://api.github.com{url_abspath}"
79+
request = Request(url=req_url, headers=headers)
80+
try:
81+
with urlopen(request) as response:
82+
return ApiResponse(req_url, response.status, response.reason, dict(response.headers), response.read().decode('utf-8'))
83+
except HTTPError as e:
84+
return ApiResponse(req_url, e.code, e.reason, dict(e.headers), e.read().decode('utf-8'))
85+
except Exception as e:
86+
print(f"Failed to fetch '{req_url}' from GitHub API")
87+
raise e
88+
89+
def _gh_get_commit_object(owner_repo: str, sha: str) -> ApiResponse:
90+
return _gh_api_get(f"/repos/{owner_repo}/git/commits/{sha}")
91+
92+
def _gh_get_tag(owner_repo: str, tag_sha: str) -> ApiResponse:
93+
return _gh_api_get(f"/repos/{owner_repo}/git/tags/{tag_sha}")
94+
95+
def _gh_matching_tags(owner_repo: str, tag: str) -> ApiResponse:
96+
return _gh_api_get(f"/repos/{owner_repo}/git/matching-refs/tags/{tag}")
97+
98+
def verify_actions(actions: Path | ActionsYAML | str, log_to_console: bool = True, today: date = date.today()) -> ActionTagsCheckResult:
99+
"""
100+
Validates the contents of the actions file against GitHub.
101+
102+
The function verifies that the SHAs specified in `actions.yml` exist in the GH repo.
103+
Also ensures that the SHA exists on the Git tag if the `tag` attribute is specified.
104+
105+
The algorithm roughly works like this, for each action specified in `actions.yml`:
106+
* Issue a warning and stop if the name is like `OWNER/*` ("wildcard" repository).
107+
Can't verify Git SHAs in this case.
108+
* Issue a warning and stop if the name is like `docker:*` (not implemented)
109+
* Issue an error and stop if the name doesn't start with an `OWNER/REPO` pattern.
110+
* Each expired entry is just skipped
111+
* If there is a wildcard reference and an SHA reference, issue an error.
112+
113+
Then, for each reference for an action:
114+
* If no `tag` is specified, let GH resolve the commit SHA.
115+
Emit a warning to add the value of the `tag` attribute if the SHA can be resolved.
116+
Otherwise, emit an error.
117+
* If `tag` is specified:
118+
* Add the SHA to the set of requested-shas-by-tag
119+
* Call GitHub's "matching-refs" endpoint for the 'tag' value
120+
* Emit en error if the object type is not a tag or commit.
121+
* Also resolve 'tag' object types to 'commit' object types.
122+
* Add each returned SHA to the set of valid-shas-by-tag.
123+
* For each "requested tag" verify that the sets of valid and requested shas intersect. If not, emit an error.
124+
125+
Args:
126+
actions: Path to the actions list file (mandatory)
127+
log_to_console: Whether to log messages immediately to the console (default: True)
128+
today: The current date (default: today)
129+
"""
130+
if on_gha():
131+
print(f"::group::Verify GitHub Actions")
132+
gh_token = os.environ['GH_TOKEN']
133+
if not gh_token or len(gh_token) == 0:
134+
raise Exception("GH_TOKEN environment variable is not set or empty")
135+
136+
if isinstance(actions, Path) or isinstance(actions, str):
137+
actions = load_yaml(actions)
138+
actions_yaml: ActionsYAML = actions
139+
140+
result = ActionTagsCheckResult(log_to_console=log_to_console or on_gha())
141+
142+
for name, action in actions_yaml.items():
143+
gh_repo_matcher = re.match(re_github_actions_repo, name)
144+
if gh_repo_matcher is not None:
145+
owner_repo = gh_repo_matcher.group(1)
146+
result.log(f"Checking GitHub action {name} in GH repo 'https://github.com/{owner_repo}'...")
147+
valid_shas_by_tag: dict[str, set[str]] = {}
148+
requested_shas_by_tag: dict[str, set[str]] = {}
149+
has_wildcard = False
150+
has_wildcard_msg_emitted = False
151+
# Flag whether to not error out on tag/SHA mismatches due to explicitly ignored GH API errors.
152+
has_ignored_api_errors = False
153+
for ref, details in action.items():
154+
if details and 'expires_at' in details:
155+
expires_at: date = details.get('expires_at')
156+
if expires_at < today:
157+
# skip expired entries
158+
result.log(f" .. ref '{ref}' is expired, skipping")
159+
continue
160+
161+
# noinspection PyTypedDict
162+
ignore_gh_api_errors = details and 'ignore_gh_api_errors' in details and details['ignore_gh_api_errors'] == True
163+
if ignore_gh_api_errors:
164+
result.warning(f"ignore_gh_api_errors is set to true: will ignore GH API errors for action {name} ref '{ref}'", " ..")
165+
166+
if ref == '*':
167+
# "wildcard" SHA - what would we...
168+
result.log(f" .. detected wildcard ref")
169+
if len(requested_shas_by_tag) > 0 and not has_wildcard_msg_emitted:
170+
result.warning(f"GitHub action {name} references a wildcard SHA but also has specific SHAs", " ..")
171+
has_wildcard_msg_emitted = True
172+
has_wildcard = True
173+
continue
174+
elif re.match(re_git_sha, ref):
175+
result.log(f" .. detected entry with Git SHA '{ref}'")
176+
if has_wildcard and not has_wildcard_msg_emitted:
177+
result.warning(f"GitHub action {name} references a wildcard SHA but also has specific SHAs", " ..")
178+
has_wildcard_msg_emitted = True
179+
180+
if not details or not 'tag' in details:
181+
result.log(f" .. no Git tag")
182+
# https://docs.github.com/en/rest/git/commits?apiVersion=2022-11-28#get-a-commit-object
183+
response = _gh_get_commit_object(owner_repo, ref)
184+
match response.status:
185+
case 200:
186+
result.warning(f"GitHub action {name} references existing commit SHA '{ref}' but does not specify the tag name for it.", " ..")
187+
case 404:
188+
result.failure(f"GitHub action {name} references non existing commit SHA '{ref}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}", " ..")
189+
case _:
190+
m = f"Failed to fetch Git SHA '{ref}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
191+
if ignore_gh_api_errors:
192+
has_ignored_api_errors = True
193+
result.warning(m, " ..")
194+
else:
195+
result.failure(m, " ..")
196+
else:
197+
tag: str = details.get('tag')
198+
result.log(f" .. collecting Git SHAs for tag {tag}")
199+
200+
if not tag in requested_shas_by_tag:
201+
requested_shas_by_tag[tag] = set()
202+
requested_shas_by_tag[tag].add(ref)
203+
204+
if not tag in valid_shas_by_tag:
205+
valid_shas_by_tag[tag] = set()
206+
valid_shas_for_tag = valid_shas_by_tag[tag]
207+
208+
# https://docs.github.com/en/rest/git/refs?apiVersion=2022-11-28#list-matching-references
209+
response = _gh_matching_tags(owner_repo, tag)
210+
match response.status:
211+
case 200:
212+
response_json: CommentedSeq = ruyaml.YAML().load(response.body)
213+
for msg in response_json:
214+
tag_ref_map: CommentedMap = msg
215+
tag_object: CommentedMap = tag_ref_map["object"]
216+
tab_object_type: str = tag_object["type"]
217+
tag_object_sha: str = tag_object["sha"]
218+
result.log(f" .. GH yields {tab_object_type} SHA '{tag_object_sha}' for '{tag_ref_map['ref']}'")
219+
match tab_object_type:
220+
case "tag":
221+
valid_shas_for_tag.add(tag_object_sha)
222+
# https://docs.github.com/en/rest/git/tags?apiVersion=2022-11-28#get-a-tag
223+
response2 = _gh_get_tag(owner_repo, tag_object_sha)
224+
match response2.status:
225+
case 200:
226+
tag_object_sha = ruyaml.YAML().load(response2.body)["object"]["sha"]
227+
valid_shas_for_tag.add(tag_object_sha)
228+
result.log(f" .. GH returns commit SHA '{tag_object_sha}' for previous tag SHA")
229+
case 404:
230+
result.log(f" .. commit SHA '{tag_object_sha}' does not exist")
231+
case _:
232+
m = f"Failed to fetch details for Git tag '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response2.status}: {response2.reason}, API URL: {response2.req_url}\n{response2.body}"
233+
if ignore_gh_api_errors:
234+
has_ignored_api_errors = True
235+
result.warning(m, " ..")
236+
else:
237+
result.failure(m, " ..")
238+
case "commit":
239+
valid_shas_for_tag.add(tag_object_sha)
240+
case "branch":
241+
result.failure(f"Branch references mentioned for Git tag '{tag}' for GitHub action {name}", " ..")
242+
case _:
243+
result.failure(f"Invalid Git object type '{tag_object['type']}' for Git tag '{tag}' in GitHub repo 'https://github.com/{owner_repo}'", " ..")
244+
case _:
245+
m = f"Failed to fetch matching Git tags for '{tag}' from GitHub repo 'https://github.com/{owner_repo}': HTTP/{response.status}: {response.reason}, API URL: {response.req_url}\n{response.body}"
246+
if ignore_gh_api_errors:
247+
result.warning(m, " ..")
248+
has_ignored_api_errors = True
249+
else:
250+
result.failure(m, " ..")
251+
else:
252+
result.failure(f"GitHub action {name} references an invalid Git SHA '{ref}'", " ..")
253+
254+
for req_tag, req_shas in requested_shas_by_tag.items():
255+
result.log(f" .. checking tag '{req_tag}'")
256+
result.log(f" .. referenced SHAs: {req_shas}")
257+
valid_shas = valid_shas_by_tag.get(req_tag)
258+
result.log(f" .. verified SHAs: {valid_shas if len(valid_shas)>0 else '(none)'}")
259+
if not valid_shas:
260+
m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but no SHAs for tag could be found - does the Git tag exist?"
261+
if has_ignored_api_errors:
262+
result.warning(m, "")
263+
else:
264+
result.failure(m, "")
265+
elif req_shas.isdisjoint(valid_shas):
266+
m = f"GitHub action {name} references Git tag '{req_tag}' via SHAs '{req_shas}' but none of those matches the valid SHAs '{valid_shas}'"
267+
result.failure(m, "")
268+
else:
269+
result.log(f" ✅ GitHub action {name} definition for tag '{req_tag}' is good!")
270+
271+
elif re.match(re_github_actions_repo_wildcard, name):
272+
result.warning(f"Ignoring '{name}' because it uses a GitHub repository wildcard ...", "")
273+
274+
elif re.match(re_docker_image, name):
275+
result.warning(f"Ignoring '{name}' because it references a Docker image ...", "")
276+
277+
else:
278+
m = f"Cannot determine action kind for '{name}'"
279+
result.failure(m, "")
280+
281+
if on_gha():
282+
if result.has_failures() or result.has_warnings():
283+
with open(os.environ["GITHUB_STEP_SUMMARY"], "a") as f:
284+
f.write(f"# GitHub Actions verification result\n")
285+
if len(result.failures) > 0:
286+
f.write(f"## Failures ({len(result.failures)})\n")
287+
f.write('```\n')
288+
for msg in result.failures:
289+
f.write(f"{msg}\n\n")
290+
f.write('```\n')
291+
if len(result.warnings) > 0:
292+
f.write(f"## Warnings ({len(result.warnings)})\n")
293+
f.write('```\n')
294+
for msg in result.warnings:
295+
f.write(f"{msg}\n\n")
296+
f.write('```\n')
297+
f.write(f"## Log\n")
298+
f.write('```\n')
299+
for msg in result.logs:
300+
f.write(f"{msg}\n")
301+
f.write('```\n')
302+
print("::endgroup::")
303+
304+
return result

gateway/gateway.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class RefDetails(TypedDict):
2424

2525
expires_at: date
2626
keep: NotRequired[bool]
27+
tag: NotRequired[str]
2728

2829

2930
ActionRefs = Dict[str, RefDetails]

0 commit comments

Comments
 (0)