Converts spotify songs in a given playlist to mp3 files, resulting from youtube search results.
Code:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from time import sleep
from datetime import date, timedelta
import json
import sys
import os
import subprocess
import pytube
from youtubesearchpython import VideosSearch
from moviepy.editor import *
import threading
# Info
client_id = ""
client_secret = ""
auth_url = "https://accounts.spotify.com/api/token"
# POST
auth_response = requests.post(auth_url, {
'grant_type' : 'client_credentials',
'client_id' : "[insert-client_id]",
'client_secret' : "[insert-client_secret]"
})
#map site
url = "[insert-spotify-playlist-link]"
# convert the response to JSON
auth_response_data = auth_response.json()
# save the access token
access_token = auth_response_data['access_token']
# access all endpoints
headers = {
'Authorization': 'Bearer {token}'.format(token=access_token)
}
# base URL all Spotify API endpoints
base_url = 'https://api.spotify.com/v1/'
#create empty arrays for data we're collecting
dates=[]
url_list=[]
final = []
#add_url()
url_list=[url]
def song_scrape(tracks, songs=[]):
global headers
for track in tracks["items"]:
artists = [i["name"] for i in track["track"]["artists"]]
if "Various Artists" in artists:
artists.remove("Various Artists")
#print(artists)
#print(track)
songs += [
{
"date": track["added_at"],
"url": track["track"]["external_urls"]["spotify"],
"album_url":track["track"]["album"]["external_urls"]["spotify"],
"name": track["track"]["name"],
"album": track["track"]["album"]["name"],
"release": track["track"]["album"]["release_date"],
"image": track["track"]["album"]["images"][0]["url"],
"artists": artists
}
]
if tracks["total"] - tracks["offset"] > 100:
url = tracks["next"]
#print(url, tracks["total"] - tracks["offset"])
r = requests.get(url, headers=headers)
sleep(2)
source = json.loads(r.text)
return song_scrape(source, songs)
return songs
#loop through urls to create array of all of our song info
all_songs = []
for u in url_list:
read_pg= requests.get(u)
sleep(1)
soup= BeautifulSoup(read_pg.text, "html.parser")
songs= soup.find(id="initial-state")
print(songs)
songs = json.loads(songs.string)
keys = songs["entities"]["items"].keys()
for key1 in keys:
if "playlist" in key1:
key = key1
break
tracks = songs["entities"]["items"][key]["tracks"]
all_songs += song_scrape(tracks)
def time_s(duration):
duration = duration.split(":")
total = 0
for i in range(1, len(duration)+1):
total += int(duration[-i])*60**(i-1)
#print(duration, total)
return total
def srch_result(artist, title, parent_dir=r"music_files/"):
file_name = filename("{}-{}".format(artist, title))+".mp4"
if os.path.isfile(parent_dir+file_name[:-3]+"mp3"):
global all_songs
#print(parent_dir+file_name[:-3]+"mp3")
return 0,0
info_scraped = dict()
videosSearch = VideosSearch('{} {} audio'.format(artist, title), limit = 4)
info = videosSearch.result()
for result in info["result"]:
view_count = result["viewCount"]["text"].split(" ")[0].replace(",","")
if view_count.isnumeric():
view_count = int(view_count)
else:
view_count = 0
time = time_s(result["duration"])
if time > 1200:
continue
else:
info_scraped[result["id"]] = {
"title" : result["title"],
"views" : view_count,
"channel" : result["channel"]["name"],
"link" : result["link"],
"duration" : time
}
top_search = [0]
for info in info_scraped:
if top_search[0] < info_scraped[info]["views"]:
top_search = [info_scraped[info]["views"], info]
if top_search == [0]:
return 0,0
top_search = top_search[1]
return info_scraped[top_search], file_name
def mp4_to_mp3(mp4):
mp3 = mp4[:-3]+"mp3"
mp4_without_frames = AudioFileClip(mp4)
mp4_without_frames.write_audiofile(mp3)
mp4_without_frames.close()
os.remove(mp4)
return
def ffmpeg_conv(mp4, duration):
path_ffmpeg = os.getcwd()+"/ffmpeg"
if not os.path.isfile(path_ffmpeg):
print("Error: Missing ffmpeg")
mp4_to_mp3(mp4)
return
mp3 = mp4[:-3]+"mp3"
#cmd = "{} -ss 0 -i {} -t {} -c:v libx264 -c:a copy -preset ultrafast -crf 0 {}".format(path_ffmpeg, mp4[:-4]+"2.mp4", int(duration/2)+1, mp4)
#os.system(cmd)
#error comes from the duration being estimated from the bitrate and that that bitrate is not set correctly automatically
cmd ="{} -i {} -b:a 192k -ar 48000 {}".format(path_ffmpeg, mp4, mp3)
#cmd = "{} -i {} -vn {}".format(path_ffmpeg, mp4, mp3)
os.system(cmd)
while not os.path.isfile(mp3):
sleep(1)
if os.path.isfile(mp3):
os.remove(mp4)
return
def filename(name):
illegal = "#%&{}\\<>*?/$!\":@ |"
illegal_start = "- ._"
name = list(name)
if name[0] in illegal_start:
name = name[1:]
if len(name) > 254:
name = name[:254]
for i in range(len(name)):
if name[i] in illegal:
name[i] = "_"
name = "".join(name).replace("(","[").replace(")","]")
name = name.replace("'", "")
return name
def download(top_search, file_name,parent_dir = r"music_files/"):
if top_search==0 or top_search==dir():
return
yt = pytube.YouTube(top_search["link"])
vids = yt.streams.filter(only_audio=True, file_extension="mp4")[-1].download(parent_dir, file_name)
#Converts mp4 to mp3
ffmpeg_conv(parent_dir+file_name, top_search["duration"])
def main():
global all_songs
i = 0
while len(all_songs):
try:
song = all_songs[i]
yt_info, file_name = srch_result(song["artists"][0], song["name"])
if song in all_songs:
all_songs.remove(song)
download(yt_info, file_name)
print(len(all_songs))
except Exception as e:
i += 1
finally:
if i > 50 and len(all_songs)>50:
i=0
elif i>=len(all_songs):
i = 0
for i in range(30):
t=threading.Thread(target=main)
t.start()