|
| 1 | +#!/bin/python3 |
| 2 | +import argparse |
| 3 | +import requests |
| 4 | +import re |
| 5 | +import sys |
| 6 | +from pathlib import Path |
| 7 | + |
| 8 | +aparser = argparse.ArgumentParser() |
| 9 | +aparser.add_argument( |
| 10 | + "--parser_file", default="../tidb/pkg/parser/parser.y", help="Path to parser.y" |
| 11 | +) |
| 12 | +aparser.add_argument( |
| 13 | + "--parser_url", |
| 14 | + default="https://github.com/pingcap/tidb/raw/refs/heads/master/pkg/parser/parser.y", |
| 15 | + help="URL to parser.y", |
| 16 | +) |
| 17 | +aparser.add_argument("--download_from_url", action="store_true") |
| 18 | +args = aparser.parse_args() |
| 19 | + |
| 20 | +if args.download_from_url: |
| 21 | + try: |
| 22 | + print(f"Fetching {args.parser_url}") |
| 23 | + r = requests.get(args.parser_url, timeout=30) |
| 24 | + r.raise_for_status() |
| 25 | + lines = r.text.splitlines() |
| 26 | + except requests.RequestException as e: |
| 27 | + sys.exit(f"Failed to download parser file: {e}") |
| 28 | +else: |
| 29 | + parser = Path(args.parser_file) |
| 30 | + if not parser.exists(): |
| 31 | + sys.exit(f"{parser} doesn't exist") |
| 32 | + lines = parser.read_text(encoding="utf-8").splitlines() |
| 33 | + |
| 34 | +kwdocs = Path("keywords.md") |
| 35 | +if not kwdocs.exists(): |
| 36 | + sys.exit(f"{kwdocs} doesn't exist") |
| 37 | + |
| 38 | +keywords = kwdocs.read_text() |
| 39 | + |
| 40 | +errors = 0 |
| 41 | +section = "Unknown" |
| 42 | +for line in lines: |
| 43 | + if line == "": |
| 44 | + section = "NotKeywordToken" |
| 45 | + |
| 46 | + elif line.find("The following tokens belong to ReservedKeyword") >= 0: |
| 47 | + section = "ReservedKeyword" |
| 48 | + |
| 49 | + elif line.find("The following tokens belong to UnReservedKeyword") >= 0: |
| 50 | + section = "UnReservedKeyword" |
| 51 | + |
| 52 | + elif line.find("The following tokens belong to TiDBKeyword") >= 0: |
| 53 | + section = "TiDBKeyword" |
| 54 | + |
| 55 | + elif line.find("The following tokens belong to NotKeywordToken") >= 0: |
| 56 | + section = "NotKeywordToken" |
| 57 | + |
| 58 | + if section == "ReservedKeyword": |
| 59 | + if m := re.match(r'^\t\w+\s+"(\w+)"$', line): |
| 60 | + kw = m.groups()[0] |
| 61 | + if not ( |
| 62 | + kwm := re.search(f"^- {kw} \\((R|R-Window)\\)$", keywords, re.MULTILINE) |
| 63 | + ): |
| 64 | + if kwm := re.search(f"^- {kw}$", keywords, re.MULTILINE): |
| 65 | + print(f"Reserved keyword not labeled as reserved: {kw}") |
| 66 | + else: |
| 67 | + print(f"Missing docs for reserved keyword: {kw}") |
| 68 | + errors += 1 |
| 69 | + |
| 70 | + if section in ["UnReservedKeyword", "TiDBKeyword"]: |
| 71 | + if m := re.match(r'^\t\w+\s+"(\w+)"$', line): |
| 72 | + kw = m.groups()[0] |
| 73 | + if not (kwm := re.search(f"^- {kw}$", keywords, re.MULTILINE)): |
| 74 | + if kwm := re.search( |
| 75 | + f"^- {kw} \\((R|R-Window)\\)$", keywords, re.MULTILINE |
| 76 | + ): |
| 77 | + print( |
| 78 | + f"Non-reserved keyword from {section} labeled as reserved: {kw}" |
| 79 | + ) |
| 80 | + else: |
| 81 | + print(f"Missing docs for non-reserved keyword from {section}: {kw}") |
| 82 | + errors += 1 |
| 83 | + |
| 84 | +sys.exit(errors) |
0 commit comments