Get genres

In [32]:
import os
import spotipy

# Get your auth here: https://developer.spotify.com/web-api/search-item/
AUTH = "BQB4iYLVA8R2758qiVHtMmYnpiDUs_zc_pWDpHQCOxxQArffRUeqUakqWZXU6BrLg2B3dAGpB7Dy-HxhRjeKUXj9Pd6uSzeCbYGZNKGGsn4vJMDYtiMfX3RZpWARAuziHxCBHo-OZPw"

# Get artists from folder names
artists = [item for item in os.listdir('clean_midi') if not item.startswith('.')]

sp = spotipy.Spotify(auth=AUTH)
genres = {}

for i,artist in enumerate(artists):
    try:
        results = sp.search(q=artist, type='artist',limit=1)
        items = results['artists']['items']
        genre_list = items[0]['genres'] if len(items) else items['genres']
        genres[artist] = genre_list
        if i < 5:
            print(artist,genre_list[:5])
    except Exception as e:
        print(artist,items, e)
"Weird Al" Yankovic ['comedy rock', 'comic']
10,000 Maniacs ['dance rock', 'folk', 'lilith', 'mellow gold', 'new wave pop']
101 Strings ['adult standards', 'ballroom', 'christmas', 'easy listening', 'exotica']
10cc ['album rock', 'art rock', 'british blues', 'classic funk rock', 'classic rock']
1910 Fruitgum Company ['brill building pop', 'bubblegum pop', 'classic garage rock', 'folk rock', 'merseybeat']
Allan Theo []
Barry & Eileen []
Bommbastic []
Charlie Makes The Cook []
De Nachroave []
err.txt []
Gabber Piet []
Gaskin Street []
Gebroeders Grimm []
Good Shape []
Hollenboer []
out.txt []
Power Tools []
Roots Syndicate []
Sailor Moon []
Sandro & Gustavo []
Tears n' Joy []
Television Theme Songs []
The Syndicate of Sound []
Tim Immers []
Typically Tropical []
In [70]:
# import numpy as np
# d = dict( A = np.array(["one","two"]), B = np.array(["four","two","three","one"]))
# dftest = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in d.items()]),dtype="category")
# dftest
Out[70]:
A B
0 one four
1 two two
2 NaN three
3 NaN one
In [97]:
data = []

for k,v in genres.items():
    encoded_dict = dict()
    for g in genre_set:
        if g in v:
            encoded_dict[g] = True
        else:
            encoded_dict[g] = False
    data.append(encoded_dict)
data
IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.
In [85]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.feature_extraction import DictVectorizer

flattened_list = [item for sublist in list(genres.values()) for item in sublist]
genre_set = set(flattened_list)

v = DictVectorizer(sparse=False)
X = v.fit_transform(genres)
X
# df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in genres.items()]),dtype='category')
# encoded_df = pd.get_dummies(columns=pd.Series(list(genre_set)))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-85-1562f19043f2> in <module>()
      7 
      8 v = DictVectorizer(sparse=False)
----> 9 X = v.fit_transform(genres)
     10 X
     11 # df = pd.DataFrame(dict([(k,pd.Series(v)) for k,v in genres.items()]),dtype='category')

/Users/justinshenk/anaconda3/envs/cvloop/lib/python3.5/site-packages/sklearn/feature_extraction/dict_vectorizer.py in fit_transform(self, X, y)
    229             Feature vectors; always 2-d.
    230         """
--> 231         return self._transform(X, fitting=True)
    232 
    233     def inverse_transform(self, X, dict_type=dict):

/Users/justinshenk/anaconda3/envs/cvloop/lib/python3.5/site-packages/sklearn/feature_extraction/dict_vectorizer.py in _transform(self, X, fitting)
    177                         vocab[f] = len(vocab)
    178                         indices.append(vocab[f])
--> 179                         values.append(dtype(v))
    180 
    181             indptr.append(len(indices))

ValueError: could not convert string to float: 'dancehall'
In [76]:
encoded_df
Out[76]:
"Weird Al" Yankovic_comedy rock "Weird Al" Yankovic_comic 10,000 Maniacs_dance rock 10,000 Maniacs_folk 10,000 Maniacs_lilith 10,000 Maniacs_mellow gold 10,000 Maniacs_new wave pop 10,000 Maniacs_pop rock 10,000 Maniacs_singer-songwriter 101 Strings_adult standards ... the mamas and papas_christmas the mamas and papas_classic rock the mamas and papas_folk the mamas and papas_folk rock the mamas and papas_mellow gold the mamas and papas_psychedelic rock the mamas and papas_rock-and-roll the mamas and papas_singer-songwriter the mamas and papas_soft rock the mamas and papas_traditional folk
0 1 0 1 0 0 0 0 0 0 1 ... 0 0 0 0 0 0 0 0 0 0
1 0 1 0 1 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 1 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 1 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 1 0 0 0 ... 0 1 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 1 0 0 ... 0 0 1 0 0 0 0 0 0 0
6 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 1 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 1 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 1 0 0 0 0
9 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 1 0 0 0
10 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
11 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 1 0
12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 1
13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
15 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
17 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
19 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
20 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
21 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
22 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
23 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
24 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
25 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
26 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
27 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
28 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
29 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
30 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
31 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
32 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
33 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
34 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
35 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
36 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
37 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
38 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

39 rows × 13554 columns