Easily Make Your New Favourite Playlist
Scraping the Billboard website and making Spotify playlists.
Functionality
This was a really fun project using APIs and webscraping to create Spotify playlists. The program asks the user for a date, and uses this date to retrieve Billboard's Hot 100 at that time, before redirecting the user to Spotify where a new playlist is automatically made with the 100 tracks.
Beautiful Soup
Beautiful Soup is a Python library that simplifies web scraping by helping you extract data from HTML pages. It lets you navigate, search, and modify elements in a webpage’s structure with ease. When combined with libraries like requests, it allows you to download and parse web content efficiently. Beautiful Soup supports tag, attribute, and text-based searches, making it ideal for gathering specific information from websites. It’s also resilient to messy or inconsistent HTML, which is common when scraping real-world pages.
Code
Step-by-Step
- Ask the User for a date to travel to
- Use Python's Request module to fetch the relevant Billboard Hot 100 page
- Use Beautiful Soup to parse the html, extracting song names and artists from the page
- Initialise Spotify instance
- Define scope for authorisation, i.e. allowing playlist creation and modification
- Redirect user to auth page for Spotify login
- Spotify will return user's id and an access token to create a playlist on the user's account
- The earlier list of song names and artists can then be searched for on Spotify and added to a playlist
Code Files
import requests
from bs4 import BeautifulSoup
from spotify import Spotify
BILLBOARD_ENDPOINT = "https://www.billboard.com/charts/hot-100/"
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0"}
date = input("What year do you want to travel to? (YYYY-MM-DD format): ")
# date = '2025-05-14'
response = requests.get(url= f"{BILLBOARD_ENDPOINT}{date}/", headers= header)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
songs = soup.select(selector= 'li.o-chart-results-list__item > h3#title-of-a-story')
artists = soup.select(selector= 'li.o-chart-results-list__item > h3#title-of-a-story + span.c-label')
song_list = [song.text.strip() for song in songs]
artist_list = [artist.text.strip().replace('Featuring', '') for artist in artists]
song_dict = dict(zip(artist_list, song_list))
# print(song_dict)
spotify = Spotify()
redirect = input("Paste URL here: ")
code = redirect.split('code=')[1]
spotify.get_bearer(code)
spotify.get_user_id()
playlist_name = input("What would you like to call your playlist? ")
spotify.create_playlist(playlist_name)
# artist, song = input("What song would you like to add to the playlist? (artist, song)? ").split(', ')
# print(artist)
# print(song)
for artist, song in song_dict.items():
spotify.get_song(artist, song)
import urllib.parse
import requests
import os
class Spotify:
def __init__(self):
self.CLIENT_ID = os.environ.get('CLIENT_ID')
self.CLIENT_SECRET = os.environ.get('CLIENT_SECRET')
self.REDIRECT_URI = 'https://www.example.com/'
self.code= ''
self.access_token = ''
self.refresh_token = ''
self.user_id = ''
self.playlist_id = ''
self.authorize()
def authorize(self):
scopes = 'playlist-modify-public playlist-modify-private user-read-private user-read-email' # Add more scopes as needed
auth_url = 'https://accounts.spotify.com/authorize'
params = {
"client_id": self.CLIENT_ID,
"response_type": "code",
"redirect_uri": self.REDIRECT_URI,
"scope": scopes
}
url = f"{auth_url}?{urllib.parse.urlencode(params)}"
print("Visit this URL to authorize the app:", url)
def get_bearer(self, code):
with open("code.txt", 'w') as code_file:
code_file.write(code)
token_url = 'https://accounts.spotify.com/api/token'
data = {
'grant_type': 'authorization_code',
'code': code,
'redirect_uri': self.REDIRECT_URI,
'client_id': self.CLIENT_ID,
'client_secret': self.CLIENT_SECRET
}
response = requests.post(token_url, data=data)
tokens = response.json()
self.access_token = tokens.get("access_token")
self.refresh_token = tokens.get("refresh_token") # Save this for future use
expires_in = tokens.get("expires_in") # in seconds
print("Access Token:", self.access_token)
print("Refresh Token:", self.refresh_token)
def refresh_access_token(self):
token_url = 'https://accounts.spotify.com/api/token'
data = {
'grant_type': 'refresh_token',
'refresh_token': self.refresh_token,
'client_id': self.CLIENT_ID,
'client_secret': self.CLIENT_SECRET
}
response = requests.post(token_url, data=data)
tokens = response.json()
if "access_token" in tokens:
self.access_token = tokens["access_token"]
else:
raise Exception(f"Failed to refresh token: {tokens}")
def get_user_id(self):
header = {
'Authorization': f"Bearer {self.access_token}",
}
response = requests.get(url= "https://api.spotify.com/v1/me", headers= header)
print(response.json())
self.user_id = response.json()['id']
def create_playlist(self, name):
header = {
'Authorization': f"Bearer {self.access_token}",
'Content-Type': 'application/json',
}
body = {
"name": name
}
response = requests.post(url= f"https://api.spotify.com/v1/users/{self.user_id}/playlists", headers= header, json= body)
print(response.json())
self.playlist_id = response.json()['id']
def get_song(self, artist, song):
endpoint = 'https://api.spotify.com/v1/search'
header = {
'Authorization': f"Bearer {self.access_token}",
}
query = f'artist:{artist} track:{song} '
params = {
'q': query,
'type': 'track',
'limit': 1,
# 'market': 'GB'
}
response = requests.get(url= f"{endpoint}", params= params, headers= header)
print(response.json())
try:
uri = response.json()['tracks']['items'][0]['uri']
except IndexError:
print("Index Error")
else:
self.add_to_playlist(uri)
def add_to_playlist(self, uri):
header = {
'Authorization': f"Bearer {self.access_token}",
'Content-Type': 'application/json',
}
body = {
"uris": [uri]
}
response = requests.post(url=f"https://api.spotify.com/v1/playlists/{self.playlist_id}/tracks", json=body, headers= header)
print(response.text)