# Music Analysis Model

In this page we explain the model we have followed to mathematically analyze what makes music more popular with parameters like "danceability, mood, liveness" for example.

```
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn import datasets, linear_model
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.manifold import TSNE

%matplotlib inline

data_frame = pd.read_csv("../input/data.csv")
data_frame = data_frame.drop("Unnamed: 0", axis="columns")
data_frame.head()
```

For our first trick we'll create a scatterplot based on the songs' valence and danceability values. In addition, we will also use linear regression to find their correlation.

```
x = data_frame["danceability"].values
y = data_frame["valence"].values

x = x.reshape(x.shape[0], 1)
y = y.reshape(y.shape[0], 1)

regr = linear_model.LinearRegression()
regr.fit(x, y)

fig = plt.figure(figsize=(6, 6))
fig.suptitle("Correlation between danceability and song mood")

ax = plt.subplot(1, 1, 1)
ax.scatter(x, y, alpha=0.5)
ax.plot(x, regr.predict(x), color="red", linewidth=3)
plt.xticks(())
plt.yticks(())

ax.xaxis.set_major_locator(ticker.MultipleLocator(0.1))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(0.02))

ax.yaxis.set_major_locator(ticker.MultipleLocator(0.1))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(0.02))

plt.xlabel("danceability")
plt.ylabel("valence")

plt.show()
```

<figure><img src="https://2256414207-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FXPA2rzfR4ZC98UyQBM0p%2Fuploads%2FydtjTrqqrrxWHvAMHmjb%2F__results___4_0.png?alt=media&#x26;token=e556d09b-eeae-4068-80eb-a58d5f7b33bf" alt=""><figcaption></figcaption></figure>

Now to create some histograms. The plot on the left illustrates the distribution of songs based on their energy levels, whereas the one on the right is a "heatmap" (histogram in two dimensions) that illustrates the number of songs found at all values of valence and danceability.

```
x = "danceability"
y = "valence"

fig, (ax1, ax2) = plt.subplots(1, 2, sharey=False, sharex=False, figsize=(10, 5))
fig.suptitle("Histograms")
h = ax2.hist2d(data_frame[x], data_frame[y], bins=20)
ax1.hist(data_frame["energy"])

ax2.set_xlabel(x)
ax2.set_ylabel(y)

ax1.set_xlabel("energy")

plt.colorbar(h[3], ax=ax2)

plt.show()
```

<figure><img src="https://2256414207-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FXPA2rzfR4ZC98UyQBM0p%2Fuploads%2FiBLsgIh6XJGVG5WLas9u%2F__results___6_0.png?alt=media&#x26;token=67f64a04-0a22-45e4-953d-9da50cf1a91f" alt=""><figcaption></figcaption></figure>

## This is where the fun begins[¶](https://www.kaggleusercontent.com/kf/1920358/eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0..o68Mrv90MnVbuTmYUHkBbQ.9kFIOiTvmRYl-RJNrEUOXRf9PEBrDQ2Hnlwo-d2cv1eG-bg7_FKT4KeStA5AetAnUEsATKE482uE3zQimPGhrqhj-MJYrPLs6NwHK4mo9N9yWR7SuBqAghv-3SXH8Jlm8rTVbXIE3jXIuS1KpET4a_vi3_U3UOzXsp1lMif7yDMsOUebGF5gzOPjsXeIHzfi_IYU6qGYwaOPeJo7XveeMTnzfsJyP9i1V1ZwAEaAXm5-OhPvHaWroRcnW18zTedwKVAf9MyZH0R-VzYXmFIHQbKLCNGL4OKcdMzQT1u0CgQnw2D5o5uZ--Vd7aIEaNT48ZS5WUFkXLU-H0jA4fHs5E09n-5ATm837b83Eox7hbitu1H0JxJ48G9mcbRFJ8edMOzDv647vlTCINp-5kXrUOGZUCmolsrHgU6NnUb_sqD81k8q8xE4cjng_uHY7Oq8ljwCPygiARNSEqUkRtdcupFvE0hmUOyTe0IUVOSCoWhH8T3Y7dMwtcLxecIIC79GJA1YbAQhKR4AHpXN0I_5dVLvbMIqWYU-sQfr1RZuUZq1MW4o52grPYyjyeLQcvpRJJnj6AtBjpDLtAsEtoPMd6HWC7kVAy8qAFXh3NBU-v2GKnc9OpRyiXHBmoDJwkPL3t25aGC61qxHY8Kpm4AGWg.XRdBPHGt2Oli6hXaFbD7EQ/__results__.html?sharingControls=true#This-is-where-the-fun-begins) <a href="#this-is-where-the-fun-begins" id="this-is-where-the-fun-begins"></a>

Next, we generate a list of "chosen" traits and use principal component analysis to reduce the dimensions of that list to 3, effectively creating a matrix suitable for generating the following 3-dimensional plot. The lesser the distance between any two songs, the larger the similarities in their traits.

```
chosen = ["energy", "liveness", "tempo", "valence", "loudness", "speechiness", "acousticness", "danceability", "instrumentalness"]
text1 = data_frame["artist"] + " - " + data_frame["song_title"]
text2 = text1.values

# X = data_frame.drop(droppable, axis=1).values
X = data_frame[chosen].values
y = data_frame["danceability"].values

min_max_scaler = MinMaxScaler()
X = min_max_scaler.fit_transform(X)

pca = PCA(n_components=3)
pca.fit(X)

X = pca.transform(X)

import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls

trace = go.Scatter3d(
    x=X[:,0],
    y=X[:,1],
    z=X[:,2],
    text=text2,
    mode="markers",
    marker=dict(
        size=8,
        color=y
    )
)

fig = go.Figure(data=[trace])
py.iplot(fig, filename="test-graph")
```

In another attempt, we use a smaller "chosen" list and PCA to generate a two-dimensional graph.This specific graph was chosen due to its seemingly spot-on axes naming.<br>

<figure><img src="https://2256414207-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FXPA2rzfR4ZC98UyQBM0p%2Fuploads%2FNDZfrf1BLnfs4Vhw0sCq%2FScreenshot_1.png?alt=media&#x26;token=5e65c660-292d-44f3-9835-16de6e570d87" alt=""><figcaption></figcaption></figure>

In another attempt, we use a smaller "chosen" list and PCA to generate a two-dimensional graph.This specific graph was chosen due to its seemingly spot-on axes naming.

```
chosen = ["energy", "liveness", "tempo", "valence"]
text1 = data_frame["artist"] + " - " + data_frame["song_title"]
text2 = text1.values

# X = data_frame.drop(droppable, axis=1).values
X = data_frame[chosen].values
y = data_frame["loudness"].values

min_max_scaler = MinMaxScaler()
X = min_max_scaler.fit_transform(X)

pca = PCA(n_components=2)
pca.fit(X)

X = pca.transform(X)

fig = {
    "data": [
        {
            "x": X[:, 0],
            "y": X[:, 1],
            "text": text2,
            "mode": "markers",
            "marker": {"size": "8", "color": y}
        }
    ],
    "layout": {
        "xaxis": {"title": "How hard is this to dance to?"},
        "yaxis": {"title": "How metal is this?"}
    }
}

py.iplot(fig, filename="test-graph2")
```

<figure><img src="https://2256414207-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FXPA2rzfR4ZC98UyQBM0p%2Fuploads%2F6MOhe6kBw7BEFLrNy1g5%2FScreenshot_2.png?alt=media&#x26;token=d4b0cadb-dad5-4c09-b2df-c751d57e0063" alt=""><figcaption></figcaption></figure>

Last but not least we generate a similar graph using t-SNE and yet another "chosen" list instead.

```
import time

chosen = ["energy", "liveness", "tempo", "valence", "loudness",
          "speechiness", "acousticness", "danceability", "instrumentalness"]

X = data_frame[chosen].values
y = data_frame["loudness"].values

min_max_scaler = MinMaxScaler()
X = min_max_scaler.fit_transform(X)

time_start = time.time()
tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
tsne_results = tsne.fit_transform(X)

print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))

fig = {
    "data": [
        {
            "x": tsne_results[:, 0],
            "y": tsne_results[:, 1],
            "text": text2,
            "mode": "markers",
            "marker": {"size": "8", "color": y}
        }
    ],
    "layout": {
        "xaxis": {"title": "x-tsne"},
        "yaxis": {"title": "y-tsne"}
    }
}

py.iplot(fig, filename="test-graph2")
```

```
[t-SNE] Computing 121 nearest neighbors...
[t-SNE] Indexed 2017 samples in 0.002s...
[t-SNE] Computed neighbors for 2017 samples in 0.155s...
[t-SNE] Computed conditional probabilities for sample 1000 / 2017
[t-SNE] Computed conditional probabilities for sample 2000 / 2017
[t-SNE] Computed conditional probabilities for sample 2017 / 2017
[t-SNE] Mean sigma: 0.145413
[t-SNE] KL divergence after 250 iterations with early exaggeration: 71.526894
[t-SNE] Error after 300 iterations: 1.485660
t-SNE done! Time elapsed: 16.126283168792725 seconds
```

<figure><img src="https://2256414207-files.gitbook.io/~/files/v0/b/gitbook-x-prod.appspot.com/o/spaces%2FXPA2rzfR4ZC98UyQBM0p%2Fuploads%2FpgfGFK0T1520SDysYOto%2FScreenshot_3.png?alt=media&#x26;token=49261efb-077f-4e94-b5e3-dbc317d27e64" alt=""><figcaption></figcaption></figure>

\ <br>
