Get genres ##

Get genres

import os
import spotipy

# Get your auth here: https://developer.spotify.com/web-api/search-item/
AUTH = "BQB4iYLVA8R2758qiVHtMmYnpiDUs_zc_pWDpHQCOxxQArffRUeqUakqWZXU6BrLg2B3dAGpB7Dy-HxhRjeKUXj9Pd6uSzeCbYGZNKGGsn4vJMDYtiMfX3RZpWARAuziHxCBHo-OZPw"

# Get artists from folder names
artists = [item for item in os.listdir('clean_midi') if not item.startswith('.')]

sp = spotipy.Spotify(auth=AUTH)
genres = {}

for i,artist in enumerate(artists):
    try:
        results = sp.search(q=artist, type='artist',limit=1)
        items = results['artists']['items']
        genre_list = items[0]['genres'] if len(items) else items['genres']
        genres[artist] = genre_list
        if i < 5:
            print(artist,genre_list[:5])
    except Exception as e:
        print(artist,items, e)
"Weird Al" Yankovic ['comedy rock', 'comic']
10,000 Maniacs ['dance rock', 'folk', 'lilith', 'mellow gold', 'new wave pop']
101 Strings ['adult standards', 'ballroom', 'christmas', 'easy listening', 'exotica']
10cc ['album rock', 'art rock', 'british blues', 'classic funk rock', 'classic rock']
1910 Fruitgum Company ['brill building pop', 'bubblegum pop', 'classic garage rock', 'folk rock', 'merseybeat']
Allan Theo []
Barry & Eileen []
Bommbastic []
Charlie Makes The Cook []
De Nachroave []
err.txt []
Gabber Piet []
Gaskin Street []
Gebroeders Grimm []
Good Shape []
Hollenboer []
out.txt []
Power Tools []
Roots Syndicate []
Sailor Moon []
Sandro & Gustavo []
Tears n' Joy []
Television Theme Songs []
The Syndicate of Sound []
Tim Immers []
Typically Tropical []
# import numpy as np
# d = dict( A = np.array(["one","two"]), B = np.array(["four","two","three","one"]))
# dftest = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in d.items()]),dtype="category")
# dftest
AB
0onefour
1twotwo
2NaNthree
3NaNone
data = []

for k,v in genres.items():
    encoded_dict = dict()
    for g in genre_set:
        if g in v:
            encoded_dict[g] = True
        else:
            encoded_dict[g] = False
    data.append(encoded_dict)
data
IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_extraction import DictVectorizer

flattened_list = [item for sublist in list(genres.values()) for item in sublist]
genre_set = set(flattened_list)

v = DictVectorizer(sparse=False)
X = v.fit_transform(genres)
X
# df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in genres.items()]),dtype='category')
# encoded_df = pd.get_dummies(columns=pd.Series(list(genre_set)))
---------------------------------------------------------------------------

ValueError                                Traceback (most recent call last)

<ipython-input-85-1562f19043f2> in <module>()
      7 
      8 v = DictVectorizer(sparse=False)
----> 9 X = v.fit_transform(genres)
     10 X
     11 # df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in genres.items()]),dtype='category')


/Users/justinshenk/anaconda3/envs/cvloop/lib/python3.5/site-packages/sklearn/feature_extraction/dict_vectorizer.py in fit_transform(self, X, y)
    229             Feature vectors; always 2-d.
    230         """
--> 231         return self._transform(X, fitting=True)
    232 
    233     def inverse_transform(self, X, dict_type=dict):


/Users/justinshenk/anaconda3/envs/cvloop/lib/python3.5/site-packages/sklearn/feature_extraction/dict_vectorizer.py in _transform(self, X, fitting)
    177                         vocab[f] = len(vocab)
    178                         indices.append(vocab[f])
--> 179                         values.append(dtype(v))
    180 
    181             indptr.append(len(indices))


ValueError: could not convert string to float: 'dancehall'
encoded_df
"Weird Al" Yankovic_comedy rock"Weird Al" Yankovic_comic10,000 Maniacs_dance rock10,000 Maniacs_folk10,000 Maniacs_lilith10,000 Maniacs_mellow gold10,000 Maniacs_new wave pop10,000 Maniacs_pop rock10,000 Maniacs_singer-songwriter101 Strings_adult standards...the mamas and papas_christmasthe mamas and papas_classic rockthe mamas and papas_folkthe mamas and papas_folk rockthe mamas and papas_mellow goldthe mamas and papas_psychedelic rockthe mamas and papas_rock-and-rollthe mamas and papas_singer-songwriterthe mamas and papas_soft rockthe mamas and papas_traditional folk
01010000001...0000000000
10101000000...0000000000
20000100000...0000000000
30000010000...1000000000
40000001000...0100000000
50000000100...0010000000
60000000010...0001000000
70000000000...0000100000
80000000000...0000010000
90000000000...0000001000
100000000000...0000000100
110000000000...0000000010
120000000000...0000000001
130000000000...0000000000
140000000000...0000000000
150000000000...0000000000
160000000000...0000000000
170000000000...0000000000
180000000000...0000000000
190000000000...0000000000
200000000000...0000000000
210000000000...0000000000
220000000000...0000000000
230000000000...0000000000
240000000000...0000000000
250000000000...0000000000
260000000000...0000000000
270000000000...0000000000
280000000000...0000000000
290000000000...0000000000
300000000000...0000000000
310000000000...0000000000
320000000000...0000000000
330000000000...0000000000
340000000000...0000000000
350000000000...0000000000
360000000000...0000000000
370000000000...0000000000
380000000000...0000000000

39 rows × 13554 columns