#! /usr/bin/python -t 
# Last edited on 2016-02-10 18:53:56 by stolfilocal

# This python program simulates the evolution of the bitcoin blockchain
# after the miners have split into two sets with different validity
# rules, /restrictive/ (R) and /permissive/ (P); such that every block
# that is valid to the R miners is also valid for the P miners, but not
# the other way around. 

# This situation can arise in many situations, such as: after a typical
# soft or hard fork, with blockchain voting and all; if a bug affects
# the validity rules of only some miners; or when a subset of the miners
# decides to change their validiy rules, for any reason and by any
# procedure.

# For a qualitative discussion of this situation, see
# https://np.reddit.com/r/bitcoin_uncensored/comments/3hj2sl/soft_fork_hard_fork/

# In this situation, there are often two branches of the chain, forking
# from a common node: an R branch, consisting only of R-blocks (blocks
# mined by R), that is valid for ball miners; and a P branch, consisting
# of P-blocks, that is valid only for P miners. Either branch may be empty
# (that is, its tip may be the last common node).  

# Occasionally, the P miners may abandon a non-empty P branch and 
# begin to mine on top of the R branch.  This event is a /chain reorganization/
# or /reorg/. The number of P-blocks lost at once is the /size/ of the reorg.
# In such an event, the P miners will lose the rewards that they collected
# in the abandoned P blocks.  Moreover, transactions confirmed in them
# may be undone, cretaing a risk of double-spend for users who relied on
# those transactions.  

# This program estimates the expected number of reorgs of each size that
# may occur after such a fork, as a function of the hashpower 
# fractions {hP} and {hR = 1-hP} in the P and R sets, respectively.

# Basically, if {hP > 1/2}, there will be a few reorgs initially, but eventually
# the P branch will grow so far ahead of the R branch that the probability 
# of further reorgs is negligible. The R branch will continue to grow
# but at slower pace.  The coin will effectively split. 

# If {hP < 1/2}, on the other hand, the P branch will never get permanently 
# ahead of the R branch, and every P block will eventually be discarded.
# by all miners. The P miners will then starve.

# In a near-tie situation with {hP} close to {hR}, the evolution will be
# similar to that of {hP < 1/2} (the P branch will apparently never get
# a secure win), but the sizes of the reorgs will have a very broad
# distribution.

# This simulation does not apply to /clean forks/, when the blocks
# created by either set are invalid for the other set. (In that case
# there are no reorgs, and the coin is split from the start.)  It also
# does not apply to forks with three or more branches, or cases
# when only /some/ of the P-blocks are invalid to the R miners.

# Some simplifying assumptions made in this simulation:

#   (1) every block mined by P is invalid for R;
#
#   (2) every block has the same work in it, so the amount of work 
#   in a branch is proportional to the number of blocks in it.
#
#   (3) propagation delays are assumed to be negligible, so 
#   there are no reorgs due to them
#
#   (4) if a miner sees two valid branches with same length,
#   he will choose either one as the parent of his candidate 
#   block, with equal probability.
#
#   (5) neither side will try to sabotage the other side's work 
#   by mining on their branch.

import sys
import os
import random
import copy

# State of the blockchain:

# The state of the blockchain at some moment can be described by a pair
# {(r,p)}, signifying that the R branch has {r} blocks and the P branch
# has {p} blocks after the last common block.  Right after the fork,
# the chain is in the {(0,0)} state.

state = [0, 0];  

# The majority-of-work rule implies {r <= p} always.

# If {r < p}, the state can evolve to {(r+1,p)} with probability
# {hR}, and {(r,p+1)} with probability {hP}, both without reorgs.

# A reorg can occur only when the chain is in a tie state {(r,p)} with
# {r == p} and {p > 0}. Then, if the R set solves the next block (probability {hR}),
# there will be a reorg of size {p}, and all miners will opt for the R
# branch, bringing the state back to {(0,0)}. If the P miners win
# instead, then, with probability {hP/2} they will extend the P branch,
# yielding state {(r,p+1)}; with probability {hP/2} they will extend the
# R branch, causing a reorg of size {p} and leaving the chain in the
# state {(0,1)}.

# Statistics:

nruns = 160000;    # Number of forks to simulate.
maxblocks = 5000;  # Max number of blocks to simulate per fork.
nsplits = None;    # Number of runs that ended with a permanent coin split.
nundefs = None;    # Number of runs that failed to reach a permanent split.
maxorg = None;     # Size of largest reorg seen.
norgs = None;      # Total number of reorgs by size.

# This program repeats the simulation of a fork event {nruns} times.
# The simulation of one fork event goes though the above steps 
# until the difference {p-r} is so large that further reorgs are 
# unlikely, or {maxblocks} blocks have been generated.

# During the simulations, the program accumulates the count {norgs[k]}
# of reorgs of size {k}, for {k} in {1..maxblocks}.

# Debugging:

nbpath = None; # Number of blocks in debug path.
path = None;   # {path[i]} is state after {i} blocks since last reorg, {i} in {0..nbpath-1}.

def definite_split(hP, state) :
  "Returns {True} if further reorgs are practically impossible in the given state."
  r = state[0];
  p = state[1];
  assert r <= p;
  # Shoudl do something smarter:
  if hP < 0.55 :
    return False
  else :
    return (p - r > 50)
  # ----------------------------------------------------------------------
    
def count_reorg(sz) :
  "Counts one more P-branch reorg of size {sz},updates {maxorg}, resets {nbpath}."
  global state, maxblocks, maxorg, norgs, nbpath;
  assert sz <= maxblocks;
  if sz > maxorg : maxorg = sz;
  norgs[sz] += 1;
  # Debug long reorgs: */
  if sz > 30 :
    sys.stderr.write("    long %d-reorg:" % sz);
    for k in range(nbpath) :
      sys.stderr.write(" (%d,%d)" % (path[k][0], path[k][1]));
    sys.stderr.write("\n");
   # Reset debug path:
  nbpath = 0;
  # ----------------------------------------------------------------------

def simulate_block(hP) :
  " Simulates the mining of the next block, updating statistics in case of reorg."
  global state, maxorg, norgs, nbpath, path;
  r = state[0];
  p = state[1];
  assert r <= p;
  if random.random() > hP :
    # Next block mined by R:
    if r == p :
      # R branch overtakes P branch; reorg and reset to single branch:
      if p > 0 : count_reorg(p);
      r = 0;
      p = 0;
    else :
      # R grows by one, P doesn't reorg yet.
      r += 1;
  else :
    # Next block mined by P:
    if r == p :
      # P sees two tied branches:
      if random.random() > 0.5 :
        # P mines on top of R: reorg, and reset to P one block ahead of R:
        if p > 0 : count_reorg(p);
        r = 0;
        p = 1;
      else :
        # P mines on top of P, no reorg:
        p += 1;
    else :
      # P mines on top of P, no reorg:
      p += 1;
  state = [r, p];
  # Append state to path for debugging:
  path[nbpath] = copy.copy(state); 
  nbpath += 1;
  
  # ----------------------------------------------------------------------

def simulate_run(hP) :
  "Simulates one run for {maxblocks} blocks."
  global state, maxblocks, nsplits, nundefs, nbpath, path;
  state = copy.copy([0,0]);
  nbpath = 0;
  path = copy.copy(maxblocks*[None]);
  k = 0;
  while (k < maxblocks) and (not definite_split(hP, state)) :
    simulate_block(hP);
    k += 1;
  if k < maxblocks :
    nsplits += 1;
  else :
    nundefs += 1;
  # ----------------------------------------------------------------------

def get_reorg_stats(hP) :
  " Simulates {nruns} runs with P hash fractions {hP}"
  global nruns, maxblocks, nsplits, nundefs, maxorg, norgs;
  # Reset stats and simulate:
  nsplits = 0;
  nundefs = 0;
  maxorg = 0;
  norgs = copy.copy((maxblocks+1)*[0]); 
  for i in range(nruns) :
    simulate_run(hP);
  # Print results:
  sys.stderr.write("\n");
  sys.stderr.write("=== P fraction = %.3f ===================================\n" % hP);
  sys.stderr.write("\n");
  sys.stderr.write("%6d runs simulated\n" % nruns);
  sys.stderr.write("%6d runs ended in permament coin splits\n" % nsplits);
  sys.stderr.write("%6d runs were not resolved after %d blocks\n" % (nundefs, maxblocks));
  sys.stderr.write("%6d blocks in largest reorg seen\n" % maxorg);
  sys.stderr.write("\n");
  sys.stderr.write("Expected number of reorgs by size\n");
  reorgs = 0.0;  # Average number of reorg events per fork.
  orphans = 0.0; # Average number of orphaned blocks per fork.
  reorgs6 = 0.0; # Expected number of reorgs with size 6 or more.
  reorgs12 = 0.0; # Expected number of reorgs with size 12 or more.
  for k in range(maxorg+1) :
    if norgs[k] > 0 :
      ro = (norgs[k]+0.0)/nruns;
      ob = (k*norgs[k]+0.0)/nruns;
      sys.stderr.write("  %6d blocks  exp reorgs = %13.7f exp orphans = %13.7f\n" % (k, ro, ob));
      reorgs += ro;
      orphans += ob;
      if k >= 6 :
        reorgs6 += ro;
      if k >= 12 :
        reorgs12 += ro;
  sys.stderr.write("\n");
  sys.stderr.write("expected reorgs per fork = %10.4f\n" % reorgs);
  sys.stderr.write("expected big reorgs (6 blocks or more) per fork = %10.4f\n" % reorgs6);
  sys.stderr.write("expected extra-big reorgs (12 blocks or more) per fork = %10.4f\n" % reorgs12);
  sys.stderr.write("expected orphaned blocks per fork = %10.4f\n" % orphans);
  
  # Write summary file:
  sys.stdout.write("    %4.2f   %8.2f   %8.2f   %10.4f   %10.4f\n" % (hP, reorgs, orphans, reorgs6, reorgs12));
  
  # ----------------------------------------------------------------------
  
def get_all_reorg_stats() :
  "Gets reorg statistics for several hashrate fractions"
  sys.stdout.write("\n");
  sys.stdout.write("    %4s   %8s   %8s   %10s   %10s\n" % ("hP", "Reorgs", "Orphans", "Reorgs6+", "Reorgs12+"));
  sys.stdout.write("    %4s   %8s   %8s   %10s   %10s\n" % (4*"-", 8*"-", 8*"-", 10*"-", 10*"-"));
  
  get_reorg_stats(0.95);
  get_reorg_stats(0.90);
  get_reorg_stats(0.85);
  get_reorg_stats(0.80);
  get_reorg_stats(0.75);
  get_reorg_stats(0.70);
  get_reorg_stats(0.65);
  get_reorg_stats(0.60);
  get_reorg_stats(0.55);

  sys.stdout.write("\n");
  sys.stdout.close();
  # ----------------------------------------------------------------------

# Main:
get_all_reorg_stats();