Thread Rating:
  • 2 Vote(s) - 2 Average
  • 1
  • 2
  • 3
  • 4
  • 5
Spotify playlist to mp3 (youtube-result)
#1
Converts spotify songs in a given playlist to mp3 files, resulting from youtube search results.
Code:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from time import sleep
from datetime import date, timedelta
import json
import sys
import os
import subprocess
import pytube
from youtubesearchpython import VideosSearch
from moviepy.editor import *
import threading

# Info
client_id = ""
client_secret = ""

auth_url = "https://accounts.spotify.com/api/token"

# POST
auth_response = requests.post(auth_url, {
    'grant_type'    : 'client_credentials',
    'client_id'     : "[insert-client_id]",
    'client_secret' : "[insert-client_secret]"
    })

#map site

url = "[insert-spotify-playlist-link]"

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

# access all endpoints
headers = {
    'Authorization': 'Bearer {token}'.format(token=access_token)
    }

# base URL all Spotify API endpoints
base_url = 'https://api.spotify.com/v1/'

#create empty arrays for data we're collecting
dates=[]
url_list=[]
final = []


#add_url()
url_list=[url]


def song_scrape(tracks, songs=[]):
    global headers
    for track in tracks["items"]:
        artists = [i["name"] for i in track["track"]["artists"]]
        if "Various Artists" in artists:
            artists.remove("Various Artists")
            #print(artists)
            #print(track)
        songs += [
            {
            "date":     track["added_at"],
            "url":      track["track"]["external_urls"]["spotify"],
            "album_url":track["track"]["album"]["external_urls"]["spotify"],
            "name":     track["track"]["name"],
            "album":    track["track"]["album"]["name"],
            "release":  track["track"]["album"]["release_date"],
            "image":    track["track"]["album"]["images"][0]["url"],
            "artists":  artists
            }
            ]
    if tracks["total"] - tracks["offset"] > 100:
        url = tracks["next"]
        #print(url, tracks["total"] - tracks["offset"])
        r = requests.get(url, headers=headers)
        sleep(2)
        source = json.loads(r.text)
        return song_scrape(source, songs)
    return songs
    
#loop through urls to create array of all of our song info
all_songs = []
for u in url_list:
    read_pg= requests.get(u)
    sleep(1)
    soup= BeautifulSoup(read_pg.text, "html.parser")
    songs= soup.find(id="initial-state")
    print(songs)
    songs = json.loads(songs.string)
    keys = songs["entities"]["items"].keys()
    for key1 in keys:
        if "playlist" in key1:
            key = key1
            break
    tracks = songs["entities"]["items"][key]["tracks"]
    all_songs += song_scrape(tracks)

def time_s(duration):
    duration = duration.split(":")
    total = 0
    for i in range(1, len(duration)+1):
        total += int(duration[-i])*60**(i-1)
    #print(duration, total)
    return total

def srch_result(artist, title, parent_dir=r"music_files/"):
    file_name = filename("{}-{}".format(artist, title))+".mp4"
   
    if os.path.isfile(parent_dir+file_name[:-3]+"mp3"):
        global all_songs
       
        #print(parent_dir+file_name[:-3]+"mp3")
        return 0,0
   
    info_scraped = dict()
    videosSearch = VideosSearch('{} {} audio'.format(artist, title), limit = 4)
    info = videosSearch.result()
    for result in info["result"]:
        view_count = result["viewCount"]["text"].split(" ")[0].replace(",","")

        if view_count.isnumeric():
            view_count = int(view_count)
        else:
            view_count = 0
        time = time_s(result["duration"])
        if time > 1200:
            continue
        else:
            info_scraped[result["id"]] = {
                "title"     : result["title"],
                "views"     : view_count,
                "channel"   : result["channel"]["name"],
                "link"      : result["link"],
                "duration"  : time
                }
    top_search = [0]
    for info in info_scraped:
        if top_search[0] < info_scraped[info]["views"]:
            top_search = [info_scraped[info]["views"], info]
    if top_search == [0]:
        return 0,0
    top_search = top_search[1]
    return info_scraped[top_search], file_name

def mp4_to_mp3(mp4):
    mp3 = mp4[:-3]+"mp3"
    mp4_without_frames = AudioFileClip(mp4)
    mp4_without_frames.write_audiofile(mp3)
    mp4_without_frames.close()
    os.remove(mp4)
    return

def ffmpeg_conv(mp4, duration):
    path_ffmpeg = os.getcwd()+"/ffmpeg"
    if not os.path.isfile(path_ffmpeg):
        print("Error: Missing ffmpeg")
        mp4_to_mp3(mp4)
        return
    mp3 = mp4[:-3]+"mp3"
    #cmd = "{} -ss 0 -i {} -t {} -c:v libx264 -c:a copy -preset ultrafast -crf 0 {}".format(path_ffmpeg, mp4[:-4]+"2.mp4", int(duration/2)+1, mp4)
    #os.system(cmd)

    #error comes from the duration being estimated from the bitrate and that that bitrate is not set correctly automatically
   
    cmd ="{} -i {} -b:a 192k -ar 48000 {}".format(path_ffmpeg, mp4, mp3)
    #cmd = "{} -i {} -vn {}".format(path_ffmpeg, mp4, mp3)
    os.system(cmd)
    while not os.path.isfile(mp3):
        sleep(1)

    if os.path.isfile(mp3):
        os.remove(mp4)
    return
   
def filename(name):
    illegal = "#%&{}\\<>*?/$!\":@ |"
    illegal_start = "- ._"
    name = list(name)
    if name[0] in illegal_start:
        name = name[1:]
    if len(name) > 254:
        name = name[:254]
    for i in range(len(name)):
        if name[i] in illegal:
            name[i] = "_"
    name = "".join(name).replace("(","[").replace(")","]")
    name = name.replace("'", "")
    return name

def download(top_search, file_name,parent_dir = r"music_files/"):
    if top_search==0 or top_search==dir():
        return
    yt = pytube.YouTube(top_search["link"])

    vids = yt.streams.filter(only_audio=True, file_extension="mp4")[-1].download(parent_dir, file_name)

    #Converts mp4 to mp3
    ffmpeg_conv(parent_dir+file_name, top_search["duration"])
   
def main():
    global all_songs
    i = 0
    while len(all_songs):
        try:
            song = all_songs[i]
            yt_info, file_name = srch_result(song["artists"][0], song["name"])
            if song in all_songs:
                all_songs.remove(song)
            download(yt_info, file_name)
            print(len(all_songs))
        except Exception as e:
            i += 1
        finally:
            if i > 50 and len(all_songs)>50:
                i=0
            elif i>=len(all_songs):
                i = 0
           

for i in range(30):
    t=threading.Thread(target=main)
    t.start()
Reply
#2
(05-02-2022, 01:57 PM)eso Wrote: Converts spotify songs in a given playlist to mp3 files, resulting from youtube search results.
Code:
from bs4 import BeautifulSoup
import pandas as pd
import requests
from time import sleep
from datetime import date, timedelta
import json
import sys
import os
import subprocess
import pytube
from youtubesearchpython import VideosSearch
from moviepy.editor import *
import threading

# Info
client_id = ""
client_secret = ""

auth_url = "https://accounts.spotify.com/api/token"

# POST
auth_response = requests.post(auth_url, {
    'grant_type'    : 'client_credentials',
    'client_id'     : "[insert-client_id]",
    'client_secret' : "[insert-client_secret]"
    })

#map site

url = "[insert-spotify-playlist-link]"

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

# access all endpoints
headers = {
    'Authorization': 'Bearer {token}'.format(token=access_token)
    }

# base URL all Spotify API endpoints
base_url = 'https://api.spotify.com/v1/'

#create empty arrays for data we're collecting
dates=[]
url_list=[]
final = []


#add_url()
url_list=[url]


def song_scrape(tracks, songs=[]):
    global headers
    for track in tracks["items"]:
        artists = [i["name"] for i in track["track"]["artists"]]
        if "Various Artists" in artists:
            artists.remove("Various Artists")
            #print(artists)
            #print(track)
        songs += [
            {
            "date":     track["added_at"],
            "url":      track["track"]["external_urls"]["spotify"],
            "album_url":track["track"]["album"]["external_urls"]["spotify"],
            "name":     track["track"]["name"],
            "album":    track["track"]["album"]["name"],
            "release":  track["track"]["album"]["release_date"],
            "image":    track["track"]["album"]["images"][0]["url"],
            "artists":  artists
            }
            ]
    if tracks["total"] - tracks["offset"] > 100:
        url = tracks["next"]
        #print(url, tracks["total"] - tracks["offset"])
        r = requests.get(url, headers=headers)
        sleep(2)
        source = json.loads(r.text)
        return song_scrape(source, songs)
    return songs
    
#loop through urls to create array of all of our song info
all_songs = []
for u in url_list:
    read_pg= requests.get(u)
    sleep(1)
    soup= BeautifulSoup(read_pg.text, "html.parser")
    songs= soup.find(id="initial-state")
    print(songs)
    songs = json.loads(songs.string)
    keys = songs["entities"]["items"].keys()
    for key1 in keys:
        if "playlist" in key1:
            key = key1
            break
    tracks = songs["entities"]["items"][key]["tracks"]
    all_songs += song_scrape(tracks)

def time_s(duration):
    duration = duration.split(":")
    total = 0
    for i in range(1, len(duration)+1):
        total += int(duration[-i])*60**(i-1)
    #print(duration, total)
    return total

def srch_result(artist, title, parent_dir=r"music_files/"):
    file_name = filename("{}-{}".format(artist, title))+".mp4"
   
    if os.path.isfile(parent_dir+file_name[:-3]+"mp3"):
        global all_songs
       
        #print(parent_dir+file_name[:-3]+"mp3")
        return 0,0
   
    info_scraped = dict()
    videosSearch = VideosSearch('{} {} audio'.format(artist, title), limit = 4)
    info = videosSearch.result()
    for result in info["result"]:
        view_count = result["viewCount"]["text"].split(" ")[0].replace(",","")

        if view_count.isnumeric():
            view_count = int(view_count)
        else:
            view_count = 0
        time = time_s(result["duration"])
        if time > 1200:
            continue
        else:
            info_scraped[result["id"]] = {
                "title"     : result["title"],
                "views"     : view_count,
                "channel"   : result["channel"]["name"],
                "link"      : result["link"],
                "duration"  : time
                }
    top_search = [0]
    for info in info_scraped:
        if top_search[0] < info_scraped[info]["views"]:
            top_search = [info_scraped[info]["views"], info]
    if top_search == [0]:
        return 0,0
    top_search = top_search[1]
    return info_scraped[top_search], file_name

def mp4_to_mp3(mp4):
    mp3 = mp4[:-3]+"mp3"
    mp4_without_frames = AudioFileClip(mp4)
    mp4_without_frames.write_audiofile(mp3)
    mp4_without_frames.close()
    os.remove(mp4)
    return

def ffmpeg_conv(mp4, duration):
    path_ffmpeg = os.getcwd()+"/ffmpeg"
    if not os.path.isfile(path_ffmpeg):
        print("Error: Missing ffmpeg")
        mp4_to_mp3(mp4)
        return
    mp3 = mp4[:-3]+"mp3"
    #cmd = "{} -ss 0 -i {} -t {} -c:v libx264 -c:a copy -preset ultrafast -crf 0 {}".format(path_ffmpeg, mp4[:-4]+"2.mp4", int(duration/2)+1, mp4)
    #os.system(cmd)

    #error comes from the duration being estimated from the bitrate and that that bitrate is not set correctly automatically
   
    cmd ="{} -i {} -b:a 192k -ar 48000 {}".format(path_ffmpeg, mp4, mp3)
    #cmd = "{} -i {} -vn {}".format(path_ffmpeg, mp4, mp3)
    os.system(cmd)
    while not os.path.isfile(mp3):
        sleep(1)

    if os.path.isfile(mp3):
        os.remove(mp4)
    return
   
def filename(name):
    illegal = "#%&{}\\<>*?/$!\":@ |"
    illegal_start = "- ._"
    name = list(name)
    if name[0] in illegal_start:
        name = name[1:]
    if len(name) > 254:
        name = name[:254]
    for i in range(len(name)):
        if name[i] in illegal:
            name[i] = "_"
    name = "".join(name).replace("(","[").replace(")","]")
    name = name.replace("'", "")
    return name

def download(top_search, file_name,parent_dir = r"music_files/"):
    if top_search==0 or top_search==dir():
        return
    yt = pytube.YouTube(top_search["link"])

    vids = yt.streams.filter(only_audio=True, file_extension="mp4")[-1].download(parent_dir, file_name)

    #Converts mp4 to mp3
    ffmpeg_conv(parent_dir+file_name, top_search["duration"])
   
def main():
    global all_songs
    i = 0
    while len(all_songs):
        try:
            song = all_songs[i]
            yt_info, file_name = srch_result(song["artists"][0], song["name"])
            if song in all_songs:
                all_songs.remove(song)
            download(yt_info, file_name)
            print(len(all_songs))
        except Exception as e:
            i += 1
        finally:
            if i > 50 and len(all_songs)>50:
                i=0
            elif i>=len(all_songs):
                i = 0
           

for i in range(30):
    t=threading.Thread(target=main)
    t.start()
this is very based gonna download entire spotify database salem
Reply


Forum Jump:


Users browsing this thread:
1 Guest(s)