import sys
!{sys.executable} -m pip install tabulate
from tabulate import tabulate


from web3 import Web3
import json
import requests
import csv
import pandas as pd
import seaborn as sns 
import numpy as np
import matplotlib.pyplot as plt
from time import *
import plotly.express as px
import plotly.io as pio
pd.options.plotting.backend = "plotly"
pio.renderers.default = "plotly_mimetype+notebook_connected"
import plotly.graph_objects as go
import math
import warnings


staking_pools = pd.read_csv('staking_pools.csv')
nan_value = float("NaN")
staking_pools.replace("", nan_value, inplace=True)
staking_pools.dropna(subset = ["Service"], inplace=True)


df = pd.read_csv('staking_pools.csv')
df['Percentage Stake'] = (df['Stake']/df['Stake'].sum())
nan_value = float("NaN")
df.replace("", nan_value, inplace=True)
df.dropna(subset = ["Service"], inplace=True)
#df


df2 = df
lst = df2.index[df['Stake'] > 0].tolist()
d = {}

for i in lst:
    if(d.get(df2['Service'][i]) != None):
        d[df2['Service'][i]] += df2['Percentage Stake'][i]
    else:
        d[df2['Service'][i]] = df2['Percentage Stake'][i]

df2 = pd.DataFrame(d.items())
df2.rename(columns = {0: "service", 1: "percentage_stake"}, inplace=True)
df2 = df2.sort_values('percentage_stake',ascending=False)
df1 = df2.head(14)


dict_append = {'service': 'Others', 'percentage_stake': 1- df1['percentage_stake'].sum()}
#df1 = df1.append(dict_append, ignore_index=True)
df1


fig = px.pie(df1, values='percentage_stake', names='service', labels = {'service': 'Service', 'percentage_stake': 'Percentage'}, title = 'Major staking entities and the % of staked ETH they control')
fig.show()


#In order to run this chunk of code please run the blocks of code contaning the function definitions first (later in the notebook)

list_case_1 = np.array([1,1,1,1,1,1,1,1,1])
list_case_2 = np.array([0,0,0,0,0,0,0,0,1]) 

lorenz_curve_case_1 = lorenz(list_case_1)
lorenz_curve_case_2 = lorenz(list_case_2)


fig = px.line(y = lorenz_curve_case_1, x = lorenz_curve_case_1, title = "ETH Staking Decentralization in Case 1 (Lorenz Curve)", labels = {"x": "Cumulative Fraction of ETH Block Proposers", "y": "Cumulative Fraction of Blocks Produced"})
#fig.add_scatter(x = [0,1], y = [0,1], name = "Line of Decentralization")
fig.show()

fig = px.line(y = lorenz_curve_case_2 , x = lorenz_curve_case_1, title = "ETH Staking Decentralization in Case 2 (Lorenz Curve)", labels = {"x": "Cumulative Fraction of ETH Block Proposers", "y": "Cumulative Fraction of Blocks Produced"})
fig.add_scatter(x = [0,1], y = [0,1], name = "Line of Decentralization")
fig.show()


table_gini = [['Gini coefficient for Case 1', gini(list_case_1)]]
print(tabulate(table_gini, tablefmt='fancy_grid'))

table_gini = [['Gini coefficient for Case 2', gini(list_case_2)]]
print(tabulate(table_gini, tablefmt='fancy_grid'))

╒═════════════════════════════╤═══╕
│ Gini coefficient for Case 1 │ 0 │
╘═════════════════════════════╧═══╛
╒═════════════════════════════╤══════════╕
│ Gini coefficient for Case 2 │ 0.888889 │
╘═════════════════════════════╧══════════╛


# proposer_list = []


# with open('api_key.txt', 'r') as api_file:
#     api_key = api_file.read()

for epoch in range(50445,50995):
    x = requests.get('https://beaconcha.in/api/v1/epoch/{}/blocks?api_key={}'.format(epoch, "dDQvWWNGZzhxaTlZRC5id01rT1gu"))
    #Add a sleep to stay within the call rate limits
    sleep(6)
    
    data = x.json()['data']
    
    for i in data:
        proposer_list.append(i['proposer'])


# with open('proposer_indices.csv', 'w', newline='') as myfile:
#      wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
#      wr.writerow(proposer_list)


with open('proposer_indices.csv', newline='') as f:
    reader = csv.reader(f)
    data = list(reader)

print(len(data[0]))

17505


staking_entities = []
indices_list = []


for i in range(0,len(data[0]) + 1, 100):
    str1 = ''
    
    if i<=17400:
        for j in range(99):
            str1 = str1 + str(data[0][i+j]) + ','
        str1 = str1 + str(data[0][i+99])

#             str1 = str1 + str(i+j) + ','
#         str1 = str1 + str(i+99)
        
    else:
        for j in range(5):
            str1 = str1 + str(data[0][i+j]) + ','
        str1 = str1 + str(data[0][i+4])
#                 str1 = str1 + str(i+j) + ','
#         str1 = str1 + str(i+4)

    #print(str1)
    indices_list.append(str1)
    
#     x = requests.get('https://beaconcha.in/api/v1/validator/{}/deposits?api_key={}'.format(str1, "dDQvWWNGZzhxaTlZRC5id01rT1gu"))
#     #Add a sleep to stay within the call rate limits
#     sleep(6)
    
#     data = x.json()['data']
    
#     for t in data:
#         staking_entities.append(t['from_address'])


for i in indices_list:
    x = requests.get('https://beaconcha.in/api/v1/validator/{}/deposits?api_key={}'.format(i, "dDQvWWNGZzhxaTlZRC5id01rT1gu"))
    #Add a sleep to stay within the call rate limits
    sleep(6)
    
    data = x.json()['data']
    
    for t in data:
        staking_entities.append(t['from_address']


# with open('proposer_addresses.csv', 'w', newline='') as myfile:
#      wr = csv.writer(myfile, quoting=csv.QUOTE_ALL)
#      wr.writerow(staking_entities)


with open('proposer_addresses.csv', newline='') as f:
    reader = csv.reader(f)
    data = list(reader)
    
data = data[0]
#data


df = df[['Address','Service']]
df
address_list = df.values.tolist()
address_list

address_dict = {}

for i in address_list:
    address_dict[i[0]] = i[1]

#address_dict


gini_dict = {}

for i in data:
    
    if i in address_dict:
        if address_dict[i] in gini_dict:
            gini_dict[address_dict[i]] += 1
        else:
            gini_dict[address_dict[i]] = 1
    else:
        if i in gini_dict:
            gini_dict[i] += 1
        else:
            gini_dict[i] = 1
            
gini_dict


def gini(array):
    array = array.flatten() 
    array = np.sort(array) 
    index = np.arange(1,array.shape[0]+1)
    n = array.shape[0]
    return ((np.sum((2 * index - n  - 1) * array)) / (n * np.sum(array)))


gini_input = list(gini_dict.values())
gini_input = np.array(gini_input)


table_gini = [['Gini coefficient', gini(gini_input)]]
print(tabulate(table_gini, tablefmt='fancy_grid'))

╒══════════════════╤══════════╕
│ Gini coefficient │ 0.688628 │
╘══════════════════╧══════════╛


def lorenz(arr):
    arr = np.sort(arr)
    scaled_prefix_sum = arr.cumsum() / arr.sum()
    return np.insert(scaled_prefix_sum, 0, 0)

lorenz_curve = lorenz(gini_input)


fig = px.line(x = np.linspace(0.0, 1.0, lorenz_curve.size) , y = lorenz_curve, title = "ETH Staking Decentralization (Lorenz Curve)", labels = {"x": "Cumulative Fraction of ETH Block Proposers", "y": "Cumulative Fraction of Blocks Produced"})
fig.add_scatter(x = [0,1], y = [0,1], name = "Line of Decentralization")
fig.show()


nakamoto_df = pd.DataFrame(gini_dict.items(), columns = ["Proposer", "Number of Blocks Produced"])
nakamoto_df['Percentage Proposed'] = (nakamoto_df['Number of Blocks Produced']/nakamoto_df['Number of Blocks Produced'].sum())
nakamoto_df = nakamoto_df.sort_values(by='Percentage Proposed', ascending=False)
display_nakamoto = nakamoto_df[['Proposer', 'Percentage Proposed']]
display_nakamoto = display_nakamoto.head(10)
dict_append = {'Proposer': 'Others', 'Percentage Proposed': 1- display_nakamoto['Percentage Proposed'].sum()}
display_nakamoto = display_nakamoto.append(dict_append, ignore_index=True)
fig = px.pie(display_nakamoto, values='Percentage Proposed', names='Proposer', title = '% of proposed blocks by top 10 entities')
fig.show()


nakamoto_df['Cumulative Percentage Proposed'] = nakamoto_df['Percentage Proposed'].cumsum()
display_table = nakamoto_df.head(10)
index = [1,2,3,4,5,6,7,8,9,10]
display_table[''] = index
display_table.set_index('')


table_nakamoto = [['Number of parties needed to collude to control > 33%', len(nakamoto_df[nakamoto_df['Cumulative Percentage Proposed'] <= 0.33]) + 1], ['Number of parties needed to collude to control > 50%', len(nakamoto_df[nakamoto_df['Cumulative Percentage Proposed'] <= 0.50]) + 1], ['Number of parties needed to collude to control > 66%', len(nakamoto_df[nakamoto_df['Cumulative Percentage Proposed'] <= 0.66]) + 1]]


print(tabulate(table_nakamoto, tablefmt='fancy_grid'))

╒══════════════════════════════════════════════════════╤═════╕
│ Number of parties needed to collude to control > 33% │   8 │
├──────────────────────────────────────────────────────┼─────┤
│ Number of parties needed to collude to control > 50% │  31 │
├──────────────────────────────────────────────────────┼─────┤
│ Number of parties needed to collude to control > 66% │ 309 │
╘══════════════════════════════════════════════════════╧═════╛


table_hhi_reference = [['H below 0.01:', 'highly competitive industry'], ['H below 0.15:', 'unconcentrated industry'], ['H between 0.15 to 0.25:', 'moderate concentration'], [' H above 0.25:', 'high concentration']]
print(tabulate(table_hhi_reference, tablefmt='fancy_grid'))

╒═════════════════════════╤═════════════════════════════╕
│ H below 0.01:           │ highly competitive industry │
├─────────────────────────┼─────────────────────────────┤
│ H below 0.15:           │ unconcentrated industry     │
├─────────────────────────┼─────────────────────────────┤
│ H between 0.15 to 0.25: │ moderate concentration      │
├─────────────────────────┼─────────────────────────────┤
│ H above 0.25:           │ high concentration          │
╘═════════════════════════╧═════════════════════════════╛


hhi_df = nakamoto_df[['Proposer', 'Number of Blocks Produced']]
temp = 0
for i in hhi_df['Percentage Proposed']:
    temp += i**2
    
hhi_score = temp


table_gini = [['Herfindahl–Hirschman Index', hhi_score]]
print(tabulate(table_gini, tablefmt='fancy_grid'))

╒════════════════════════════╤═══════════╕
│ Herfindahl–Hirschman Index │ 0.0252203 │
╘════════════════════════════╧═══════════╛


hhi_df_further = nakamoto_df[['Proposer', 'Number of Blocks Produced']].head(10)
hhi_df_further['Percentage Proposed'] = (hhi_df_further['Number of Blocks Produced']/hhi_df_further['Number of Blocks Produced'].sum())


temp = 0
for i in hhi_df_further['Percentage Proposed']:
#     print(i)
    temp += i**2
    
hhi_score = temp


table_gini = [['Herfindahl–Hirschman Index for the 10 largest staking entities', hhi_score]]
print(tabulate(table_gini, tablefmt='fancy_grid'))

╒════════════════════════════════════════════════════════════════╤══════════╕
│ Herfindahl–Hirschman Index for the 10 largest staking entities │ 0.177926 │
╘════════════════════════════════════════════════════════════════╧══════════╛


li = []

all_files = ["deposits 1.csv", "deposits 2.csv", "deposits 3.csv", "deposits 4.csv"]
use_cols = [0,1]
names = ["Epoch", "Deposits"]

for file in all_files:
    df = pd.read_csv(file, header = None, names = names, usecols = use_cols)
    li.append(df)

df_deposits = pd.concat(li)


rng = np.random.default_rng(42)
df_deposits['temp'] = rng.uniform(0, 10, len(df_deposits["Epoch"]))
fig = px.scatter(
    df_deposits[df_deposits["Deposits"] > 0], x = 'Epoch', y = 'temp', size = 'Deposits',
    size_max = 20, labels = {"Epoch": "Epoch"})
fig.update_yaxes(visible=False)


li = []

all_files = ["GPR1.csv", "GPR2.csv", "GPR3.csv", "GPR4.csv"]
use_cols = [0,1]
names = ["Epoch", "Participation Rate"]

for file in all_files:
    df = pd.read_csv(file, header = None, names = names, usecols = use_cols)
    li.append(df)

df_participation_rate = pd.concat(li)


nan_value = float("NaN")

df_participation_rate.replace("", nan_value, inplace=True)

df_participation_rate.dropna(subset = ["Participation Rate"], inplace=True)


average_participation_list = []
df_list = []
x_axis_list = []
count = 0

for i in range(225,12364,225):
    df_list.append(df_participation_rate[i - 225:i])
    x_axis_list.append(count)
    count += 1


for i in df_list:
    average_participation_list.append(i['Participation Rate'].mean())


fig = px.line(x= x_axis_list, y= average_participation_list, title='Average participation rate in intervals of 225 Epochs (1 Day)', labels = {'x': 'Day', 'y': 'Average participation rate'})
fig.show()


li = []

all_files = ["BC1.csv", "BC2.csv", "BC3.csv", "BC4.csv"]
use_cols = [0,1]
names = ["Epoch", "Number of Blocks Produced"]

for file in all_files:
    df = pd.read_csv(file, header = None, names = names, usecols = use_cols)
    li.append(df)

df_blocks_produced = pd.concat(li)


nan_value = float("NaN")

df_blocks_produced.replace("", nan_value, inplace=True)

df_blocks_produced.dropna(subset = ["Number of Blocks Produced"], inplace=True)


x = df_blocks_produced["Number of Blocks Produced"]

trace = go.Histogram(x=x)

layout = go.Layout(
    title="Frequency of Blocks Produced"
)

fig = go.Figure(data=go.Data([trace]), layout=layout)
fig.show()

	service	percentage_stake
0	Kraken	0.145224
1	Binance	0.057429
2	Whale	0.041999
3	Lido	0.035372
4	Bitcoin Suisse	0.033422
5	Staked.us	0.025786
6	Stakefish	0.021061
7	Huobi	0.016219
8	Defi	0.012037
9	Stkr	0.009317
10	Bitfinex	0.007874
11	OKEX	0.005955
12	Piedao	0.004341
13	Cream	0.003882
14	Others	0.580082

	Proposer	Number of Blocks Produced	Percentage Proposed	Cumulative Percentage Proposed

1	Kraken	2278	0.129956	0.129956
2	Binance	832	0.047464	0.177420
3	Whale	628	0.035826	0.213247
4	Lido	531	0.030293	0.243539
5	Bitcoin Suisse	466	0.026585	0.270124
6	0xa76a7d0d06754e4fc4941519d1f9d56fd9f8d53b	446	0.025444	0.295567
7	Stakefish	428	0.024417	0.319984
8	Staked.us	365	0.020823	0.340807
9	0x00444797ba158a7bdb8302e72da98dcbccef0fbc	252	0.014376	0.355183
10	Huobi	225	0.012836	0.368019

Major staking entities in PoS Ethereum¶

Measures of wealth inequality¶

Gini coefficient and Lorenz curve¶

Data collection¶

Analysis¶

Nakamoto coefficient¶

Data collection¶

Analysis¶

Herfindahl–Hirschman Index¶

Data collection¶

Analysis¶

Limitations to the notebook¶

The Beacon Chain Digest - July 19th¶

Deposits¶

Participation rate¶

Number of blocks produced¶