Eurovision 2018 Explorations

In [1]:
import pandas as pd
import numpy as np
import altair as alt
import requests
import networkx as nx

Scrape Eurovision vote data

In [2]:
proper_names = ["Albania", "Armenia", "Australia", "Austria", 
                "Azerbaijan", "Belarus", "Belgium", "Bulgaria", 
                "Croatia", "Cyprus", "Czech Republic", "Denmark", 
                "Estonia", "F.Y.R. Macedonia", "Finland", "France", 
                "Georgia", "Germany", "Greece", "Hungary", 
                "Iceland", "Ireland", "Israel", "Italy", "Latvia", 
                "Lithuania", "Malta", "Moldova", "Montenegro", 
                "Norway", "Poland", "Portugal", "Romania", 
                "Russia", "San Marino", "Serbia", "Slovenia", 
                "Spain", "Sweden", "Switzerland", "The Netherlands", 
                "Ukraine", "United Kingdom"]

countries = ["albania", "armenia", "australia", "austria", 
             "azerbaijan", "belarus", "belgium", "bulgaria", 
             "croatia", "cyprus", "czech-republic", "denmark", 
             "estonia", "fyr-macedonia", "finland", "france", 
             "georgia", "germany", "greece", "hungary", 
             "iceland", "ireland", "israel", "italy", "latvia", 
             "lithuania", "malta", "moldova", "montenegro", 
             "norway", "poland", "portugal", "romania", 
             "russia", "san-marino", "serbia", "slovenia", 
             "spain", "sweden", "switzerland", "the-netherlands", 
             "ukraine", "united-kingdom"]
In [3]:
url = 'https://eurovision.tv/event/lisbon-2018/grand-final/voting-details/'

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}

dfs = []

for c in countries:
    r = requests.get(url+c, headers=header)
    dfs.extend(pd.read_html(r.text, match='Participants'))

Append to on DataFrame

In [4]:
def to_numeric(df):
    '''
    Turn timedelta columns into numeric dtype
    '''
    cols = ['A', 'B', 'C', 'D', 'E', 'Jury rank', 'Jury points',
           'Televoting rank', 'Televoting points']
    numeric = df[cols].apply(pd.to_numeric, errors='coerce')
    df = df.copy()
    df[cols] = numeric
    return df

df = (pd.concat(dfs, keys=proper_names)
             .pipe(to_numeric))

Calculate "antisemitism"

In [5]:
(df[['Participants', 'A', 'B', 'C', 'D', 'E']][df.Participants=='Israel']
 .assign(jury_avg_rank = lambda x: (x.A+x.B+x.C+x.D+x.E)/5)
 .assign(Antisemitism = lambda x: x.jury_avg_rank.rank(ascending=False))
 .reset_index()[['level_0', 'jury_avg_rank', 'Antisemitism']]
 .rename(columns={'level_0': 'Country', 'jury_avg_rank': 'Jury Average Ranking'}).set_index('Country')
 .sort_values(by='Antisemitism')
)
Out[5]:
Jury Average Ranking Antisemitism
Country
Estonia 20.4 1.0
Poland 20.0 2.0
Belarus 17.2 3.0
Norway 16.0 4.0
Montenegro 15.6 5.0
Slovenia 15.4 6.0
Latvia 14.8 7.0
F.Y.R. Macedonia 13.8 8.0
Romania 13.0 9.0
Switzerland 12.4 10.0
Serbia 11.6 11.0
Azerbaijan 11.4 12.0
Portugal 11.0 13.0
Germany 10.2 14.0
The Netherlands 9.8 15.0
Denmark 9.6 16.0
Cyprus 9.2 17.5
Hungary 9.2 17.5
Georgia 8.6 19.0
Italy 8.2 20.0
Croatia 7.4 21.0
Greece 7.2 22.5
Moldova 7.2 22.5
Australia 7.0 24.5
Bulgaria 7.0 24.5
Belgium 6.8 26.0
Sweden 6.0 27.0
Ireland 5.8 28.5
Malta 5.8 28.5
Albania 5.4 30.0
Iceland 5.0 31.0
Lithuania 4.8 32.0
Russia 4.0 33.0
Ukraine 3.8 34.5
United Kingdom 3.8 34.5
Finland 3.0 36.5
Armenia 3.0 36.5
Spain 2.6 38.0
San Marino 2.4 39.5
France 2.4 39.5
Austria 2.2 41.0
Czech Republic 1.6 42.0
Israel NaN NaN
In [6]:
(df[['Participants', 'A', 'B', 'C', 'D', 'E']][df.Participants=='Israel']
 .assign(jury_avg_rank = lambda x: (x.A+x.B+x.C+x.D+x.E)/5)
 .assign(Antisemitism = lambda x: x.jury_avg_rank.rank(ascending=False, method='min'))
 .reset_index()[['level_0', 'jury_avg_rank', 'Antisemitism', 'A', 'B', 'C', 'D', 'E']]
 .rename(columns={'level_0': 'Country', 'jury_avg_rank': 'Jury Average Ranking'}).set_index('Country'))
Out[6]:
Jury Average Ranking Antisemitism A B C D E
Country
Albania 5.4 30.0 6.0 8.0 4.0 6.0 3.0
Armenia 3.0 36.0 2.0 3.0 3.0 4.0 3.0
Australia 7.0 24.0 9.0 8.0 10.0 4.0 4.0
Austria 2.2 41.0 4.0 1.0 1.0 3.0 2.0
Azerbaijan 11.4 12.0 11.0 10.0 12.0 12.0 12.0
Belarus 17.2 3.0 23.0 16.0 10.0 14.0 23.0
Belgium 6.8 26.0 5.0 8.0 11.0 5.0 5.0
Bulgaria 7.0 24.0 6.0 8.0 10.0 7.0 4.0
Croatia 7.4 21.0 6.0 3.0 3.0 3.0 22.0
Cyprus 9.2 17.0 9.0 7.0 10.0 8.0 12.0
Czech Republic 1.6 42.0 2.0 1.0 1.0 3.0 1.0
Denmark 9.6 16.0 13.0 2.0 4.0 16.0 13.0
Estonia 20.4 1.0 24.0 22.0 7.0 24.0 25.0
F.Y.R. Macedonia 13.8 8.0 7.0 12.0 2.0 24.0 24.0
Finland 3.0 36.0 1.0 1.0 10.0 1.0 2.0
France 2.4 39.0 3.0 1.0 5.0 2.0 1.0
Georgia 8.6 19.0 8.0 7.0 6.0 7.0 15.0
Germany 10.2 14.0 5.0 16.0 8.0 11.0 11.0
Greece 7.2 22.0 2.0 7.0 8.0 11.0 8.0
Hungary 9.2 17.0 14.0 13.0 6.0 6.0 7.0
Iceland 5.0 31.0 5.0 5.0 3.0 8.0 4.0
Ireland 5.8 28.0 9.0 5.0 1.0 10.0 4.0
Israel NaN NaN NaN NaN NaN NaN NaN
Italy 8.2 20.0 3.0 8.0 11.0 9.0 10.0
Latvia 14.8 7.0 2.0 23.0 18.0 21.0 10.0
Lithuania 4.8 32.0 3.0 4.0 10.0 4.0 3.0
Malta 5.8 28.0 7.0 6.0 8.0 4.0 4.0
Moldova 7.2 22.0 24.0 4.0 3.0 2.0 3.0
Montenegro 15.6 5.0 26.0 10.0 9.0 18.0 15.0
Norway 16.0 4.0 23.0 12.0 13.0 12.0 20.0
Poland 20.0 2.0 19.0 12.0 24.0 20.0 25.0
Portugal 11.0 13.0 9.0 10.0 13.0 7.0 16.0
Romania 13.0 9.0 9.0 10.0 13.0 25.0 8.0
Russia 4.0 33.0 3.0 3.0 6.0 3.0 5.0
San Marino 2.4 39.0 6.0 2.0 1.0 1.0 2.0
Serbia 11.6 11.0 1.0 17.0 11.0 8.0 21.0
Slovenia 15.4 6.0 23.0 22.0 2.0 11.0 19.0
Spain 2.6 38.0 2.0 2.0 3.0 1.0 5.0
Sweden 6.0 27.0 3.0 10.0 9.0 3.0 5.0
Switzerland 12.4 10.0 13.0 2.0 7.0 17.0 23.0
The Netherlands 9.8 15.0 5.0 9.0 16.0 1.0 18.0
Ukraine 3.8 34.0 3.0 1.0 3.0 8.0 4.0
United Kingdom 3.8 34.0 5.0 4.0 5.0 1.0 4.0
In [7]:
(df[['Participants', 'A', 'B', 'C', 'D', 'E']][df.Participants=='Israel']
 .assign(jury_avg_rank = lambda x: (x.A+x.B+x.C+x.D+x.E)/5)
 .assign(Antisemitism = lambda x: x.jury_avg_rank.rank(ascending=False))
 .reset_index()[['level_0', 'jury_avg_rank', 'Antisemitism']]
 .rename(columns={'level_0': 'Country', 'jury_avg_rank': 'Jury Average Ranking'}).set_index('Country')
).to_csv('eurovision/antisemite.csv')
In [8]:
df.to_csv('eurovision/all1.csv')

Create adjacency matrix for graph stuff

In [9]:
def get_adj(df, weight_col, reindex=True):
    adj = (df.reset_index()[['level_0', 'Participants', weight_col]]
              .rename(columns={'level_0': 'from', 'Participants': 'to', weight_col: 'weight'})
              .pivot(index='from', columns='to', values='weight')
              )
    if reindex:
        adj = adj.reindex(labels=proper_names, axis=1)
    return adj
In [10]:
jury_ranks = get_adj(df, 'Jury rank')
jury_points = get_adj(df, 'Jury points')
tele = get_adj(df, 'Televoting points')
In [11]:
G = nx.from_pandas_adjacency(tele.fillna(0))
nx.write_gexf(G, 'eurovision/euro18_tele_points.gexf')
In [12]:
clean_names = dict(zip(proper_names, [c.replace(' ', '_').replace('.', '') for c in proper_names]))
clean_names
Out[12]:
{'Albania': 'Albania',
 'Armenia': 'Armenia',
 'Australia': 'Australia',
 'Austria': 'Austria',
 'Azerbaijan': 'Azerbaijan',
 'Belarus': 'Belarus',
 'Belgium': 'Belgium',
 'Bulgaria': 'Bulgaria',
 'Croatia': 'Croatia',
 'Cyprus': 'Cyprus',
 'Czech Republic': 'Czech_Republic',
 'Denmark': 'Denmark',
 'Estonia': 'Estonia',
 'F.Y.R. Macedonia': 'FYR_Macedonia',
 'Finland': 'Finland',
 'France': 'France',
 'Georgia': 'Georgia',
 'Germany': 'Germany',
 'Greece': 'Greece',
 'Hungary': 'Hungary',
 'Iceland': 'Iceland',
 'Ireland': 'Ireland',
 'Israel': 'Israel',
 'Italy': 'Italy',
 'Latvia': 'Latvia',
 'Lithuania': 'Lithuania',
 'Malta': 'Malta',
 'Moldova': 'Moldova',
 'Montenegro': 'Montenegro',
 'Norway': 'Norway',
 'Poland': 'Poland',
 'Portugal': 'Portugal',
 'Romania': 'Romania',
 'Russia': 'Russia',
 'San Marino': 'San_Marino',
 'Serbia': 'Serbia',
 'Slovenia': 'Slovenia',
 'Spain': 'Spain',
 'Sweden': 'Sweden',
 'Switzerland': 'Switzerland',
 'The Netherlands': 'The_Netherlands',
 'Ukraine': 'Ukraine',
 'United Kingdom': 'United_Kingdom'}
In [13]:
tele.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/tele_circ.csv', sep='\t')
In [14]:
jury_points.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/jury_circ.csv', sep='\t')
In [15]:
tele_circ = get_adj(df, 'Televoting points', reindex=False)
jury_circ = get_adj(df, 'Jury points', reindex=False)
tele_circ.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/tele_circ.csv', sep='\t')
jury_circ.fillna('-').rename(columns=clean_names, index=clean_names).to_csv('eurovision/jury_circ.csv', sep='\t')

Datashader (at last!)

In [19]:
import datashader as ds
import datashader.transfer_functions as tf
from datashader.layout import random_layout, circular_layout, forceatlas2_layout
from datashader.bundling import connect_edges, hammer_bundle

from itertools import chain
In [20]:
nodes = pd.DataFrame(proper_names, columns=['name'])
nodes.tail()
Out[20]:
name
38 Sweden
39 Switzerland
40 The Netherlands
41 Ukraine
42 United Kingdom
In [21]:
weight_col = 'Televoting points'
edges = (df.reset_index()[['level_0', 'Participants', weight_col]]
         .rename(columns={'level_0': 'source', 'Participants': 'target', weight_col: 'weight'})
         .dropna()
         .assign(source=lambda x: pd.Categorical(x['source'], categories=proper_names).codes)
         .assign(target=lambda x: pd.Categorical(x['target'], categories=proper_names).codes)
         )
In [22]:
forcedirected = forceatlas2_layout(nodes, edges)
circular  = circular_layout(nodes, uniform=False)
In [23]:
cvsopts = dict(plot_height=800, plot_width=800)

def nodesplot(nodes, name=None, canvas=None, cat=None):
    canvas = ds.Canvas(**cvsopts) if canvas is None else canvas
    aggregator=None if cat is None else ds.count_cat(cat)
    agg=canvas.points(nodes,'x','y',aggregator)
    return tf.spread(tf.shade(agg, cmap=["#FF3333"]), px=3, name=name)

%time forcedirected = forceatlas2_layout(nodes, edges)
tf.Images(nodesplot(forcedirected, "ForceAtlas2 layout"))
Wall time: 117 ms
Out[23]:
ForceAtlas2 layout

In [24]:
def edgesplot(edges, name=None, canvas=None):
    canvas = ds.Canvas(**cvsopts) if canvas is None else canvas
    return tf.shade(canvas.line(edges, 'x','y', agg=ds.count()), name=name)
    
def graphplot(nodes, edges, name="", canvas=None, cat=None):
    if canvas is None:
        xr = nodes.x.min(), nodes.x.max()
        yr = nodes.y.min(), nodes.y.max()
        canvas = ds.Canvas(**cvsopts, x_range=xr, y_range=yr)
        
    np = nodesplot(nodes, name + " nodes", canvas, cat)
    ep = edgesplot(edges, name + " edges", canvas)
    return tf.stack(ep, np, how="over", name=name)
In [25]:
cd = circular
fd = forcedirected

%time cd_d = graphplot(cd, connect_edges(cd,edges), "Circular layout")
%time fd_d = graphplot(fd, connect_edges(fd,edges), "Force-directed") 
%time cd_b = graphplot(cd, hammer_bundle(cd,edges), "Circular layout, bundled")
%time fd_b = graphplot(fd, hammer_bundle(fd,edges), "Force-directed, bundled") 

tf.Images(cd_d,fd_d,cd_b,fd_b).cols(2)
Wall time: 1.8 s
Wall time: 948 ms
Wall time: 4.36 s
Wall time: 1.7 s
Out[25]:
Circular layout

Force-directed

Circular layout, bundled

Force-directed, bundled

HoloViews

In [48]:
import holoviews as hv
hv.extension('bokeh')
%opts Nodes Graph [width=500 height=500 xaxis=None yaxis=None]