Spotify

Scraping the Billboard website and making Spotify playlists.


Functionality

This was a really fun project using APIs and webscraping to create Spotify playlists. The program asks the user for a date, and uses this date to retrieve Billboard's Hot 100 at that time, before redirecting the user to Spotify where a new playlist is automatically made with the 100 tracks.

Beautiful Soup

Beautiful Soup is a Python library that simplifies web scraping by helping you extract data from HTML pages. It lets you navigate, search, and modify elements in a webpage’s structure with ease. When combined with libraries like requests, it allows you to download and parse web content efficiently. Beautiful Soup supports tag, attribute, and text-based searches, making it ideal for gathering specific information from websites. It’s also resilient to messy or inconsistent HTML, which is common when scraping real-world pages.


Code


Step-by-Step

  1. Ask the User for a date to travel to
  2. Use Python's Request module to fetch the relevant Billboard Hot 100 page
  3. Use Beautiful Soup to parse the html, extracting song names and artists from the page
  4. Initialise Spotify instance
    1. Define scope for authorisation, i.e. allowing playlist creation and modification
    2. Redirect user to auth page for Spotify login
    3. Spotify will return user's id and an access token to create a playlist on the user's account
    4. The earlier list of song names and artists can then be searched for on Spotify and added to a playlist

Code Files

                            
import requests
from bs4 import BeautifulSoup
from spotify import Spotify

BILLBOARD_ENDPOINT = "https://www.billboard.com/charts/hot-100/"

header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:137.0) Gecko/20100101 Firefox/137.0"}

date = input("What year do you want to travel to? (YYYY-MM-DD format): ")
# date = '2025-05-14'
response = requests.get(url= f"{BILLBOARD_ENDPOINT}{date}/", headers= header)
response.raise_for_status()

soup = BeautifulSoup(response.text, "html.parser")

songs = soup.select(selector= 'li.o-chart-results-list__item > h3#title-of-a-story')
artists = soup.select(selector= 'li.o-chart-results-list__item > h3#title-of-a-story + span.c-label')

song_list = [song.text.strip() for song in songs]
artist_list = [artist.text.strip().replace('Featuring', '') for artist in artists]

song_dict = dict(zip(artist_list, song_list))
# print(song_dict)

spotify = Spotify()

redirect = input("Paste URL here: ")
code = redirect.split('code=')[1]

spotify.get_bearer(code)
spotify.get_user_id()
playlist_name = input("What would you like to call your playlist? ")
spotify.create_playlist(playlist_name)
# artist, song = input("What song would you like to add to the playlist? (artist, song)? ").split(', ')
# print(artist)
# print(song)

for artist, song in song_dict.items():
    spotify.get_song(artist, song)
                            
                        
                            
import urllib.parse
import requests
import os

class Spotify:
    def __init__(self):
        self.CLIENT_ID = os.environ.get('CLIENT_ID')
        self.CLIENT_SECRET = os.environ.get('CLIENT_SECRET')
        self.REDIRECT_URI = 'https://www.example.com/'
        self.code= ''
        self.access_token = ''
        self.refresh_token = ''
        self.user_id = ''
        self.playlist_id = ''
        self.authorize()


    def authorize(self):
        scopes = 'playlist-modify-public playlist-modify-private user-read-private user-read-email'  # Add more scopes as needed
        auth_url = 'https://accounts.spotify.com/authorize'
        params = {
            "client_id": self.CLIENT_ID,
            "response_type": "code",
            "redirect_uri": self.REDIRECT_URI,
            "scope": scopes
        }

        url = f"{auth_url}?{urllib.parse.urlencode(params)}"
        print("Visit this URL to authorize the app:", url)


    def get_bearer(self, code):

        with open("code.txt", 'w') as code_file:
            code_file.write(code)

        token_url = 'https://accounts.spotify.com/api/token'
        data = {
            'grant_type': 'authorization_code',
            'code': code,
            'redirect_uri': self.REDIRECT_URI,
            'client_id': self.CLIENT_ID,
            'client_secret': self.CLIENT_SECRET
        }

        response = requests.post(token_url, data=data)
        tokens = response.json()

        self.access_token = tokens.get("access_token")
        self.refresh_token = tokens.get("refresh_token")  # Save this for future use
        expires_in = tokens.get("expires_in")  # in seconds

        print("Access Token:", self.access_token)
        print("Refresh Token:", self.refresh_token)


    def refresh_access_token(self):
        token_url = 'https://accounts.spotify.com/api/token'
        data = {
            'grant_type': 'refresh_token',
            'refresh_token': self.refresh_token,
            'client_id': self.CLIENT_ID,
            'client_secret': self.CLIENT_SECRET
        }

        response = requests.post(token_url, data=data)
        tokens = response.json()

        if "access_token" in tokens:
            self.access_token = tokens["access_token"]
        else:
            raise Exception(f"Failed to refresh token: {tokens}")


    def get_user_id(self):
        header = {
            'Authorization': f"Bearer {self.access_token}",
        }
        response = requests.get(url= "https://api.spotify.com/v1/me", headers= header)
        print(response.json())

        self.user_id = response.json()['id']


    def create_playlist(self, name):
        header = {
            'Authorization': f"Bearer {self.access_token}",
            'Content-Type': 'application/json',
        }

        body = {
            "name": name
        }

        response = requests.post(url= f"https://api.spotify.com/v1/users/{self.user_id}/playlists", headers= header, json= body)
        print(response.json())
        self.playlist_id = response.json()['id']

    def get_song(self, artist, song):
        endpoint = 'https://api.spotify.com/v1/search'

        header = {
            'Authorization': f"Bearer {self.access_token}",
        }

        query = f'artist:{artist} track:{song} '

        params = {
            'q': query,
            'type': 'track',
            'limit': 1,
            # 'market': 'GB'
        }

        response = requests.get(url= f"{endpoint}", params= params, headers= header)
        print(response.json())
        try:
            uri = response.json()['tracks']['items'][0]['uri']
        except IndexError:
            print("Index Error")
        else:
            self.add_to_playlist(uri)


    def add_to_playlist(self, uri):
        header = {
            'Authorization': f"Bearer {self.access_token}",
            'Content-Type': 'application/json',
        }

        body = {
            "uris": [uri]
        }

        response = requests.post(url=f"https://api.spotify.com/v1/playlists/{self.playlist_id}/tracks", json=body, headers= header)
        print(response.text)
                            
                        
Project image