Eurovision 2018 Explorations
In [1]:
import pandas as pd
import numpy as np
import altair as alt
import requests
import networkx as nx
Scrape Eurovision vote data¶
In [2]:
proper_names = ["Albania", "Armenia", "Australia", "Austria",
"Azerbaijan", "Belarus", "Belgium", "Bulgaria",
"Croatia", "Cyprus", "Czech Republic", "Denmark",
"Estonia", "F.Y.R. Macedonia", "Finland", "France",
"Georgia", "Germany", "Greece", "Hungary",
"Iceland", "Ireland", "Israel", "Italy", "Latvia",
"Lithuania", "Malta", "Moldova", "Montenegro",
"Norway", "Poland", "Portugal", "Romania",
"Russia", "San Marino", "Serbia", "Slovenia",
"Spain", "Sweden", "Switzerland", "The Netherlands",
"Ukraine", "United Kingdom"]
countries = ["albania", "armenia", "australia", "austria",
"azerbaijan", "belarus", "belgium", "bulgaria",
"croatia", "cyprus", "czech-republic", "denmark",
"estonia", "fyr-macedonia", "finland", "france",
"georgia", "germany", "greece", "hungary",
"iceland", "ireland", "israel", "italy", "latvia",
"lithuania", "malta", "moldova", "montenegro",
"norway", "poland", "portugal", "romania",
"russia", "san-marino", "serbia", "slovenia",
"spain", "sweden", "switzerland", "the-netherlands",
"ukraine", "united-kingdom"]
In [3]:
url = 'https://eurovision.tv/event/lisbon-2018/grand-final/voting-details/'
header = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
"X-Requested-With": "XMLHttpRequest"
}
dfs = []
for c in countries:
r = requests.get(url+c, headers=header)
dfs.extend(pd.read_html(r.text, match='Participants'))
Append to on DataFrame¶
In [4]:
def to_numeric(df):
'''
Turn timedelta columns into numeric dtype
'''
cols = ['A', 'B', 'C', 'D', 'E', 'Jury rank', 'Jury points',
'Televoting rank', 'Televoting points']
numeric = df[cols].apply(pd.to_numeric, errors='coerce')
df = df.copy()
df[cols] = numeric
return df
df = (pd.concat(dfs, keys=proper_names)
.pipe(to_numeric))
Calculate "antisemitism"¶
In [5]:
(df[['Participants', 'A', 'B', 'C', 'D', 'E']][df.Participants=='Israel']
.assign(jury_avg_rank = lambda x: (x.A+x.B+x.C+x.D+x.E)/5)
.assign(Antisemitism = lambda x: x.jury_avg_rank.rank(ascending=False))
.reset_index()[['level_0', 'jury_avg_rank', 'Antisemitism']]
.rename(columns={'level_0': 'Country', 'jury_avg_rank': 'Jury Average Ranking'}).set_index('Country')
.sort_values(by='Antisemitism')
)
Out[5]:
In [6]:
(df[['Participants', 'A', 'B', 'C', 'D', 'E']][df.Participants=='Israel']
.assign(jury_avg_rank = lambda x: (x.A+x.B+x.C+x.D+x.E)/5)
.assign(Antisemitism = lambda x: x.jury_avg_rank.rank(ascending=False, method='min'))
.reset_index()[['level_0', 'jury_avg_rank', 'Antisemitism', 'A', 'B', 'C', 'D', 'E']]
.rename(columns={'level_0': 'Country', 'jury_avg_rank': 'Jury Average Ranking'}).set_index('Country'))
Out[6]:
In [7]:
(df[['Participants', 'A', 'B', 'C', 'D', 'E']][df.Participants=='Israel']
.assign(jury_avg_rank = lambda x: (x.A+x.B+x.C+x.D+x.E)/5)
.assign(Antisemitism = lambda x: x.jury_avg_rank.rank(ascending=False))
.reset_index()[['level_0', 'jury_avg_rank', 'Antisemitism']]
.rename(columns={'level_0': 'Country', 'jury_avg_rank': 'Jury Average Ranking'}).set_index('Country')
).to_csv('eurovision/antisemite.csv')
In [8]:
df.to_csv('eurovision/all1.csv')
Create adjacency matrix for graph stuff¶
In [9]:
def get_adj(df, weight_col, reindex=True):
adj = (df.reset_index()[['level_0', 'Participants', weight_col]]
.rename(columns={'level_0': 'from', 'Participants': 'to', weight_col: 'weight'})
.pivot(index='from', columns='to', values='weight')
)
if reindex:
adj = adj.reindex(labels=proper_names, axis=1)
return adj
In [10]:
jury_ranks = get_adj(df, 'Jury rank')
jury_points = get_adj(df, 'Jury points')
tele = get_adj(df, 'Televoting points')
In [11]:
G = nx.from_pandas_adjacency(tele.fillna(0))
nx.write_gexf(G, 'eurovision/euro18_tele_points.gexf')
In [12]:
clean_names = dict(zip(proper_names, [c.replace(' ', '_').replace('.', '') for c in proper_names]))
clean_names
Out[12]:
In [13]:
tele.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/tele_circ.csv', sep='\t')
In [14]:
jury_points.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/jury_circ.csv', sep='\t')
In [15]:
tele_circ = get_adj(df, 'Televoting points', reindex=False)
jury_circ = get_adj(df, 'Jury points', reindex=False)
tele_circ.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/tele_circ.csv', sep='\t')
jury_circ.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/jury_circ.csv', sep='\t')
Datashader (at last!)¶
In [19]:
import datashader as ds
import datashader.transfer_functions as tf
from datashader.layout import random_layout, circular_layout, forceatlas2_layout
from datashader.bundling import connect_edges, hammer_bundle
from itertools import chain
In [20]:
nodes = pd.DataFrame(proper_names, columns=['name'])
nodes.tail()
Out[20]:
In [21]:
weight_col = 'Televoting points'
edges = (df.reset_index()[['level_0', 'Participants', weight_col]]
.rename(columns={'level_0': 'source', 'Participants': 'target', weight_col: 'weight'})
.dropna()
.assign(source=lambda x: pd.Categorical(x['source'], categories=proper_names).codes)
.assign(target=lambda x: pd.Categorical(x['target'], categories=proper_names).codes)
)
In [22]:
forcedirected = forceatlas2_layout(nodes, edges)
circular = circular_layout(nodes, uniform=False)
In [23]:
cvsopts = dict(plot_height=800, plot_width=800)
def nodesplot(nodes, name=None, canvas=None, cat=None):
canvas = ds.Canvas(**cvsopts) if canvas is None else canvas
aggregator=None if cat is None else ds.count_cat(cat)
agg=canvas.points(nodes,'x','y',aggregator)
return tf.spread(tf.shade(agg, cmap=["#FF3333"]), px=3, name=name)
%time forcedirected = forceatlas2_layout(nodes, edges)
tf.Images(nodesplot(forcedirected, "ForceAtlas2 layout"))
Out[23]:
In [24]:
def edgesplot(edges, name=None, canvas=None):
canvas = ds.Canvas(**cvsopts) if canvas is None else canvas
return tf.shade(canvas.line(edges, 'x','y', agg=ds.count()), name=name)
def graphplot(nodes, edges, name="", canvas=None, cat=None):
if canvas is None:
xr = nodes.x.min(), nodes.x.max()
yr = nodes.y.min(), nodes.y.max()
canvas = ds.Canvas(**cvsopts, x_range=xr, y_range=yr)
np = nodesplot(nodes, name + " nodes", canvas, cat)
ep = edgesplot(edges, name + " edges", canvas)
return tf.stack(ep, np, how="over", name=name)
In [25]:
cd = circular
fd = forcedirected
%time cd_d = graphplot(cd, connect_edges(cd,edges), "Circular layout")
%time fd_d = graphplot(fd, connect_edges(fd,edges), "Force-directed")
%time cd_b = graphplot(cd, hammer_bundle(cd,edges), "Circular layout, bundled")
%time fd_b = graphplot(fd, hammer_bundle(fd,edges), "Force-directed, bundled")
tf.Images(cd_d,fd_d,cd_b,fd_b).cols(2)
Out[25]:
HoloViews¶
In [48]:
import holoviews as hv
hv.extension('bokeh')
%opts Nodes Graph [width=500 height=500 xaxis=None yaxis=None]