-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathweb_scrape.py
More file actions
39 lines (32 loc) · 1.13 KB
/
web_scrape.py
File metadata and controls
39 lines (32 loc) · 1.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import requests as rq
from bs4 import BeautifulSoup as bs
def get_album_info(album_name: str, artist_name: str=None) -> dict:
response = rq.get(f"https://en.wikipedia.org/wiki/{album_name}")
parsed_html = bs(response.content, 'html.parser') if response.status_code == 200 else None
if parsed_html is None:
return {
"status_code": response.status_code,
"content": None,
"album_name": album_name,
"artist_name": artist_name
}
else:
# Attempt to extract the following details:
# 1. Release Year
# 2. Genre Tags (if possible)
# 3. Label
# 4. Producer(s)
# 5. Track number
# 6. Length (if version is available)
return {
"status_code": response.status_code,
"content": parsed_html,
"album_name": album_name,
"artist_name": artist_name
}
def main():
album_name = "Good_Kid,_M.A.A.D_City"
print(f"Fetching information for album: {album_name}")
print(get_album_info(album_name)['content'].prettify())
if __name__ == "__main__":
main()