Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions catalog/management/commands/download_program_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,16 @@


class Command(BaseCommand):
help = ""
help = "Fetch course content for each program from ULB API and save to csv/courses.json"

def handle(self, *args: Any, **options: Any) -> None:
"""
For each program in csv/programs.json, fetches its course content from ULB API.

Programs that are "options" (specializations) have a "parent" field and require
both the parent slug and option slug in the API call. If that fails, we retry
with just the option slug.
"""
with open("csv/programs.json") as f:
programs: list[dict] = json.load(f)
print("\n[bold blue]Listing the course content of all programs...[/]\n")
Expand All @@ -38,27 +45,42 @@ def handle(self, *args: Any, **options: Any) -> None:
advance=1,
description=f"Listing the course content of {progam['slug'].upper()}...",
)
if "parent" in progam:

# Build API query string
# If this is an option/specialization (has a parent), include both parent and option
# Otherwise, just query by the program slug
is_option = "parent" in progam

if is_option:
# Format: ?anet=PARENT_SLUG&option=OPTION_SLUG
qs = f"/ksup/programme?gen=prod&anet={progam['parent'].upper()}&option={progam['slug'].upper()}&lang=fr"
else:
# Format: ?anet=PROGRAM_SLUG
qs = f"/ksup/programme?gen=prod&anet={progam['slug'].upper()}&lang=fr"

URL = f"https://www.ulb.be/api/formation?path={quote(qs)}"
try:
response = requests.get(URL)
if not response.ok:
if "parent" in progam:
# If this is an option and the parent+option query failed,
# retry with just the option slug (parent might be invalid)
if is_option:
print(
f"[yellow]Skip:[/] [magenta]{progam['slug'].upper()}[/] with bogus parent {progam['parent'].upper()}"
f"[yellow]Failed to fetch option with parent:[/] "
f"[magenta]{progam['slug'].upper()}[/] (parent: {progam['parent'].upper()})"
)

print("Retry")
print("[yellow]Retrying without parent parameter...[/] ")
qs = f"/ksup/programme?gen=prod&anet={progam['slug'].upper()}&lang=fr"
URL = f"https://www.ulb.be/api/formation?path={quote(qs)}"
response = requests.get(URL)
if not response.ok:
print("Retry failed")
print(
f"[red]Failed with status {response.status_code}[/]"
)
continue
else:
print("[green]Success[/]")

else:
print(
Expand All @@ -80,7 +102,12 @@ def handle(self, *args: Any, **options: Any) -> None:
if len(programme_json["blocs"]) == 0:
continue

for course in programme_json["blocs"][-1]["progCourses"]:
# Extract courses from the last bloc (final year)
# Each bloc represents a year in the program
last_bloc = programme_json["blocs"][-1]

for course in last_bloc["progCourses"]:
# Skip placeholder courses (TEMP-0000, HULB-0000)
if course["id"] not in ["TEMP-0000", "HULB-0000"]:
program_content[progam["slug"]][course["id"]] = {
"id": course["id"],
Expand Down
42 changes: 33 additions & 9 deletions catalog/management/commands/download_programs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,25 @@


class Command(BaseCommand):
help = ""
help = "Scrape all academic programs from ULB website and save to csv/programs.json"

PAGE_SIZE = 20

URL = f"https://www.ulb.be/servlet/search?beanKey=beanKeyRechercheFormation&types=formation&natureFormation=ulb&s=FACULTE_ASC&limit={PAGE_SIZE}"

def handle(self, *args: Any, **options: Any) -> None:
"""
Scrapes ULB search results to build a list of academic programs.

Some programs are "parent programs" that exist only to group related options.
For example, a Master's program might have multiple specializations (options).
We filter out these parent programs and only keep the actual options that students
can enroll in.
"""
programs: list[dict] = []

parent_programs: set[str] = set()
# Track programs that are containers for options (these will be filtered out)
parent_program_slugs: set[str] = set()
print("[bold blue]Gathering the list of available programs...[/]\n")

with Progress(
Expand Down Expand Up @@ -90,17 +99,25 @@ def handle(self, *args: Any, **options: Any) -> None:
"faculty": faculties,
}

# Check if this is an "option" (specialization) of a parent program
# The HTML structure uses "resultat--fille" (child result) to indicate options
if option_div := mnemonic_span.find_previous(
"div", {"class": "search-result__resultat--fille"}
):
# Find the parent program that contains this option
parent_program_div = option_div.find_previous(
"div", {"class": "search-result__result-item"}
)
parent_mnemonic_span = parent_program_div.find(
"span", {"class": "search-result__mnemonique"}
)
p["parent"] = parent_mnemonic_span.text
parent_programs.add(parent_mnemonic_span.text)
parent_slug = parent_mnemonic_span.text

# Store the parent reference for API calls later
p["parent"] = parent_slug

# Mark this parent as a container (to be filtered out later)
parent_program_slugs.add(parent_slug)

programs.append(p)
else:
Expand All @@ -110,12 +127,19 @@ def handle(self, *args: Any, **options: Any) -> None:
progress.update(task1, completed=self.PAGE_SIZE * page)
page += 1

# Filter out parent programs that are just containers for options
# We only want the actual enrollable programs (the options themselves)
print(
f"Found {len(parent_programs)} programs containing options, ignoring those..."
f"Found {len(parent_program_slugs)} parent programs that contain options, filtering them out..."
)
print(parent_programs)
programs = [p for p in programs if p["slug"] not in parent_programs]
print(f"Parent programs to exclude: {parent_program_slugs}")

enrollable_programs = [
p for p in programs if p["slug"] not in parent_program_slugs
]

print(f"Found {len(programs)} distinct programs, dumping to json...")
print(
f"Found {len(enrollable_programs)} enrollable programs, saving to csv/programs.json..."
)
with open("csv/programs.json", "w") as f:
json.dump(programs, f, indent=4)
json.dump(enrollable_programs, f, indent=4)
8 changes: 7 additions & 1 deletion catalog/management/commands/load_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,13 @@ def handle(self, *args: Any, **options: Any) -> None: # noqa: PLR0912
elif "master" in program["name"].lower() or program["slug"].startswith(
"MA"
):
program_type = Category.CategoryType.MASTER
if (
"enseignement" in program["name"].lower()
or "-ES" in program["slug"]
):
program_type = Category.CategoryType.AGGREGATION
else:
program_type = Category.CategoryType.MASTER
elif "certificat" in program["name"].lower():
program_type = Category.CategoryType.CERTIFICATE
elif "agrégation" in program["name"].lower():
Expand Down
9 changes: 9 additions & 0 deletions catalog/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def better_name(self):
"Certificat d'aptitude pédagogique approprié à l'enseignement supérieur - ",
"",
)
name = name.replace("Master en enseignement section ", "Enseignement ")

name = name.removeprefix("Bachelier en ").removeprefix("Bachelier : ")
name = (
name.removeprefix("Master en ")
Expand Down Expand Up @@ -89,8 +91,15 @@ def better_name(self):

name = name.replace(", option Bruxelles", "")
name = name.replace("(Site de Charleroi)", "(Charleroi)")
name = name.replace("(Site de Mons)", "(Mons)")
name = name.replace("(Général à finalité ", "(")

name = name.replace("option SOIR", "(soir)")
name = name.replace("option JOUR", "")

name = name.replace("), (", ", ")
name = name.removesuffix(", ")

return name


Expand Down
9 changes: 0 additions & 9 deletions catalog/templates/catalog/course.html
Original file line number Diff line number Diff line change
Expand Up @@ -232,15 +232,6 @@ <h1 class="d-flex align-items-center gap-2">
</li>
{% empty %}
<div class="alert alert-success" role="alert">
<h4 class="alert-heading">

Rentrée 2023-2024: Nouveaux cours !
<span style="font-size: .5rem;transform: translate(0, -5px);"
class="badge rounded-pill bg-danger">
NEW
</span>

</h4>
Le programme de cours de l'ULB a parfois beaucoup changé au cours des années.
DocHub s'est adapté en renommant des cours et en déplacent les documents
mais il se pourrait que certains documents soient mal rangés.
Expand Down
10 changes: 1 addition & 9 deletions catalog/templates/catalog/finder.html
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,6 @@ <h4 class="alert-heading">
</div>
{% else %}
<div class="alert alert-success" role="alert">
<h4 class="alert-heading">

Rentrée 2023-2024: Nouveaux cours !
<span style="font-size: .5rem;transform: translate(0, -5px);" class="badge rounded-pill bg-danger">
NEW
</span>

</h4>
Le programme de cours de l'ULB a parfois beaucoup changé au cours des années.
Si tu ne trouves plus des documents des années précédentes,
<a href="/catalog/f/archives/">regarde dans les archives</a> !
Expand Down Expand Up @@ -73,7 +65,7 @@ <h5>
{% endif %}
{% for subcat in type.list %}
<li>
<a href="{{ column.category_prefix }}/{{ subcat.slug }}"
<a href="{{ column.category_prefix }}/{{ subcat.slug }}/"
class="text-body text-decoration-none">
<div class="d-flex align-items-center my-1 ">
<div class="flex-shrink-0 px-2 py-1">
Expand Down
6 changes: 5 additions & 1 deletion catalog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,11 @@ def finder(request, slugs: str = ""):
type=Category.CategoryType.MASTER_SPECIALIZATION,
then=Value(2),
),
default=Value(3),
When(
type=Category.CategoryType.AGGREGATION,
then=Value(3),
),
default=Value(4),
),
"name",
).all()
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ ignore = [
"PLC1901", # if x != "" is not the same as if x
"RUF012", # Mutable class attributes should be annotated with `typing.ClassVar`
"SIM103", # Return the condition directly
"PLR0912", # Too many branches
"PLR0915", # Too many statements
]

[tool.ruff.lint.per-file-ignores]
Expand Down
Loading