""" Methods to build the main tables that represent the tree.
    - Nodes DataFrame
    - Transitions DataFrame


    === Example of node's initial DataFrame.
    node_idx    node   freq  active  depth  parent_idx   likelihood
           8    1010   5900       0      4           9 -3596.440982
           9     010   8405       0      3          10 -5121.113204
          10      10   8405       0      2          11 -5121.113204
          11       0  11589       0      1          52 -6814.372550


    === Example of transition probabilities DataFrame ===

    idx next_symbol  freq      prob   likelihood
      8           0  1761  0.298475 -2129.173188
      8           1  4139  0.701525 -1467.267794
      9           0  2506  0.298156 -3032.608109
      9           1  5899  0.701844 -2088.505095
     10           0  2506  0.298156 -3032.608109

"""

import numpy as np
import pandas as pd
#from operator import add


def nodes_and_transitions(sample):
    """Creates tables with contexts and transition probabilites

    For a given sample, this method returns two Pandas dataframes.
    The first one contains the node counts and likelihoods,
    while the second one contains the transition probabilities.

    Arguments:
        sample {Sample} -- Sample object

    Returns:
        tuple(DataFrame, DataFrame) -- Counts and transitions tables
    """

    # creates the main DataFrames (nodes and transitions)
    df = pd.DataFrame()
    df = create_nodes_table(sample)
    transition_probs = create_transitions_table(df, sample)
    df = calc_node_likelihoods(df, transition_probs)

    # remove unuseful data
    #df.reset_index(inplace=True)
    return df, transition_probs


def remove_last_level(df, max_depth):
    """ Removes max_depth + 1 nodes

    Node frequencies are initially calculated for nodes
    with length max_depth + 1 in order to efficiently extract
    the transition. Since transitions were already calculated
    we can drop the nodes with length == (max_depth + 1)
    """

    df = df[df.depth <= max_depth]


def create_nodes_table(sample):
    """Creates the nodes table given a sample

    Builds the node table. The node counts were already precomputed
    in the sample object (see sample.py#_calculate_substr_frequencies)

    Arguments:
        sample {Sample} -- A Sample object
    """
    cols = {'index': 'node', 'N': 'freq'}
    df = sample.F['N'].to_frame().reset_index().rename(columns=cols).copy()
    df['active'] = 0
    create_indexes(df)
    remove_last_level(df, sample.max_depth)
    bind_parent_nodes(df)

    return df


def create_transitions_table(df, sample):
    """Creates the transitions table

    This method creates the following transition table:

    idx next_symbol  freq      prob
      8           0  1761  0.298475
      8           1  4139  0.701525

    - where idx is the node index (in the nodes table)
    - next symbol is a possible transition from this node
    - freq is the number of occurrences of this node in the sample
    - prob is the probability of transition from this node to next_symbol

    Arguments:
        df {[type]} -- [description]
        sample {[type]} -- [description]

    Returns:
        DataFrame -- the transitions table
    """

    tbl = sample.F[[i for i in range(len(sample.A))]].astype(int)

    tbl['idx'] = df.reset_index().set_index('node')['node_idx']
    tbl = tbl.melt(id_vars=['idx'], var_name='next_symbol', value_name='freq')
    tbl = tbl.sort_values(['idx', 'next_symbol']).reset_index(drop=True).set_index('idx')
    tbl.freq = tbl.freq.astype(int)
    tbl['prob'] = tbl.freq / df.freq
    transition_probs = tbl.reset_index()
    return transition_probs


def create_indexes(df):
    # creates a column with the node length
    df['depth'] = df.node.str.len()
    df.reset_index(inplace=True, drop=True)
    # sets node_idx column as the index field
    df.index.name = 'node_idx'


def calc_node_likelihoods(df, transition_probs):
    """Calculates node log-likelihoods

    Given the node and transition tables, calculates the
    log-likelihood of each node.

    Arguments:
        df {DataFrame} -- node table
        transition_probs {DataFrame} -- transitions table

    Returns:
        DataFrame -- returns the (modified) node table
    """

    # For each row with freq > 0, compute N * log(p)
    # in a column named likelihood
    probs = transition_probs
    L = probs.freq[probs.freq > 0] * np.log(probs.prob[probs.freq > 0])
    transition_probs['likelihood'] = L

    # then we sum all the log-likelihoods grouped by each node.
    # the results are stored in the node table, associated to
    # their respective nodes.
    fn = lambda s: s.likelihood.sum()
    df['likelihood'] = transition_probs.groupby(['idx']).apply(fn)
    return df


def calculate_num_child_nodes(df):
    """ Calculates and stores the number of child nodes of all nodes

    Given a context tree dataframe, this method creates a column in
    the node table counting the number of child nodes

    Arguments:
        df {DataFrame} -- the node table

    Returns:
        DataFrame -- the (modified) node table
    """

    num_child_nodes = (df[df.depth >= 1]
                       .reset_index(drop=False)
                       .groupby(['parent_idx'])
                       .count()).node_idx

    if df.index.name != 'node_idx':
        df.set_index('node_idx', inplace=True)
    df['num_child_nodes'] = num_child_nodes
    df.reset_index(inplace=True, drop=False)
    return df


def bind_parent_nodes(df):
    """
#    Adds a column with reference to the parent node index for each node
    """

    # gets the parent node string
    pa = df.node.str.slice(start=1)

    # set the table index to the node column
    node_df = df.reset_index().set_index('node', drop=False)

#    # adds a column with the parent_idxs related to their child nodes
    parent_idxs = node_df.loc[pa.values].node_idx
    df['parent_idx'] = parent_idxs.values
    df.loc[df.node == '', 'parent_idx'] = -1