import os
import sys
import math
import functions

# Task: Obtain stack probabilities from RNAfold -p
# Create list of stack probabilities from PostScript file,
# threshold E-11 set by hand in RNAfold.c
def postscript(seq):
    f = open("dot2.ps",'r')
    lines = f.readlines()
    f.close()    
    bp_stack_dic = {}
    index = lines.index("drawgrid\n")
    for line in lines[index:]:    
        if(line.find("lbox")!=-1):
            info = line.split()     
            if int(info[0]) < len(seq):
                base_pair = int(info[0]), int(info[1])
                bp_stack_dic[base_pair] = float(info[2])*float(info[2])
    return bp_stack_dic

# Task: Function for constructing stems from the stack probability dictionary
# Find stacked base pairs (i+1,j-1), (i+2,j-2), ... where absolute
# percentage increase/decrease between stacked base pairs is < 66.7%.
def find_stems(bp_stack_dic):
    bp_list = bp_stack_dic.keys() 
    bp_list.sort()          # All stack probabilities are stored in bp_list
    stem_list = []
    prob_ij,prob_next = 0.0,0.0    
    for x in xrange(len(bp_list)):
        index, stem, bp = 0, [], bp_list[x]        
        bp_i, bp_j, stack_prob_ij = bp[0], bp[1], functions.find_in_dic(bp_stack_dic,bp)
        triple = bp, stack_prob_ij
        stem.append(triple) # Stacked base pair (i,j) and (i+1,j-1)
        count = 1
        while index == 0:   # Look for stacked base pairs (i+1,j-1), (i+2,j-2), ...           
            bp_next = (bp_i + count, bp_j - count)            
            stack_prob_next = functions.find_in_dic(bp_stack_dic,bp_next)
            if stack_prob_next > 0.0:                
                minimum = min(stack_prob_ij,stack_prob_next)
                maximum = max(stack_prob_ij,stack_prob_next)
                if (abs((maximum - minimum)/maximum))*100 < 66.7: # Calculate absolute percentage increase/decrease
                    triple = bp_next, stack_prob_next
                    stem.append(triple)                
                    count = count + 1
                    stack_prob_ij = stack_prob_next                    
                else:
                    index = 1
                    triple = bp_next, 0.0
                    stem.append(triple)                     
            else:   # The last stacked base pair is discovered, append with stack probability = 0.0                
                index = 1
                triple = bp_next, 0.0
                stem.append(triple)                 
        # Stem has format [((75, 96), 0.0011170568217599998), ((76, 95), 0.00089892632041000005), ((77, 94), 0.0)]    
        if len(stem) >= 3:
            start, end = stem[0][0][0], stem[0][0][1]                        
            prob = 0.0            
            for item in stem:
                prob = prob + item[1]
            confidence = prob/(len(stem)-1)  # Calculate confidence indicator                     
            stem_list.append(start)          
            stem_list.append(end)
            stem_list.append(len(stem))
            stem_list.append(confidence)               
    matrix_stems = {}                        # Store all stems in dictionary
    for x in xrange(0,len(stem_list),4):
        stem = stem_list[x], stem_list[x+1]
        matrix_stems[stem] = stem_list[x+2], stem_list[x+3]        
    # Delete duplicate stems from dictionary to keep only stems with maximum length, delete (57,65): 3 if (56,66): 4 exists
    matrix_stems_filtered = matrix_stems.copy()
    for stem in matrix_stems:
        start, end, length, confidence = stem[0], stem[1], matrix_stems[stem][0], matrix_stems[stem][1]
        duplicate = start + 1, end - 1         
        if duplicate in matrix_stems:
            if matrix_stems[duplicate][0] == length-1:  
                del matrix_stems_filtered[duplicate] 
    matrix_stems = {}
    matrix_stems = matrix_stems_filtered.copy()        
    
    return matrix_stems

# Task: Function for constructing stems from the stack probability dictionary
# Find stacked base pairs (i+1,j-1), (i+2,j-2), ... where absolute
# percentage increase/decrease between stacked base pairs is < 66.7%.
def find_stems_two(bp_stack_dic):
    bp_list = bp_stack_dic.keys() 
    bp_list.sort()          # All stack probabilities are stored in bp_list
    stem_list = []
    prob_ij,prob_next = 0.0,0.0    
    for x in xrange(len(bp_list)):
        index, stem, bp = 0, [], bp_list[x]        
        bp_i, bp_j, stack_prob_ij = bp[0], bp[1], functions.find_in_dic(bp_stack_dic,bp)
        triple = bp, stack_prob_ij
        stem.append(triple) # Stacked base pair (i,j) and (i+1,j-1)
        count = 1
        while index == 0:   # Look for stacked base pairs (i+1,j-1), (i+2,j-2), ...           
            bp_next = (bp_i + count, bp_j - count)            
            stack_prob_next = functions.find_in_dic(bp_stack_dic,bp_next)
            if stack_prob_next > 0.0:                
                minimum = min(stack_prob_ij,stack_prob_next)
                maximum = max(stack_prob_ij,stack_prob_next)
                if (abs((maximum - minimum)/maximum))*100 < 66.7: # Calculate absolute percentage increase/decrease
                    triple = bp_next, stack_prob_next
                    stem.append(triple)                
                    count = count + 1
                    stack_prob_ij = stack_prob_next                    
                else:
                    index = 1
                    triple = bp_next, 0.0
                    stem.append(triple)                     
            else:   # The last stacked base pair is discovered, append with stack probability = 0.0                
                index = 1
                triple = bp_next, 0.0
                stem.append(triple)                 
        # Stem has format [((75, 96), 0.0011170568217599998), ((76, 95), 0.00089892632041000005), ((77, 94), 0.0)]    
        if len(stem) >= 2:
            start, end = stem[0][0][0], stem[0][0][1]                        
            prob = 0.0            
            for item in stem:
                prob = prob + item[1]
            confidence = prob/(len(stem)-1)  # Calculate confidence indicator                     
            stem_list.append(start)          
            stem_list.append(end)
            stem_list.append(len(stem))
            stem_list.append(confidence)               
    matrix_stems = {}                        # Store all stems in dictionary
    for x in xrange(0,len(stem_list),4):
        stem = stem_list[x], stem_list[x+1]
        matrix_stems[stem] = stem_list[x+2], stem_list[x+3]       
    # Delete duplicate stems from dictionary to keep only stems with maximum length, delete (57,65): 3 if (56,66): 4 exists
    matrix_stems_filtered = matrix_stems.copy()
    for stem in matrix_stems:
        start, end, length, confidence = stem[0], stem[1], matrix_stems[stem][0], matrix_stems[stem][1]
        duplicate = start + 1, end - 1         
        if duplicate in matrix_stems:
            if matrix_stems[duplicate][0] == length-1:  
                del matrix_stems_filtered[duplicate] 
    matrix_stems = {}
    matrix_stems = matrix_stems_filtered.copy()        
    
    return matrix_stems

# Task: Function for evaluation of local stem energies
# Store stems with structure in file for RNAeval
# Update stem dictionary with both stacking and free energy weights
def evaluation(matrix_stems, seq):
    stem_list = matrix_stems.items()    
    stem_list.sort()    
    stem_structure = file("stem_structure.txt",'w')
    for stem in stem_list:
        start, end = stem[0][0], stem[0][1]
        length_stem = stem[1][0]
        confidence = stem[1][1]
        string_list = []
        for i in xrange(length_stem):                    
            string_list.append("(")
        for j in xrange(end-start+1-length_stem-length_stem):
            string_list.append(":")
        for k in xrange(length_stem):
            string_list.append(")")        
        if start == 1:
            if end != len(seq):     # Dangling end on the right only
                local_sequence = seq[start-1:end+1]                                
                string_list.append(":")                
            else:                   # No dangling ends
                local_sequence = seq[start-1:end]
        else:
            if end != len(seq):     # Dangling end on both ends
                local_sequence = seq[start-2:end+1]
                string_list.insert(0,":")
                string_list.append(":")         
            else:                   # Dangling end on the left only
                local_sequence = seq[start-2:end]
                string_list.insert(0,":")
        s = "".join(string_list)
        string = "> " + str(start) + " " + str(end) + " " + str(length_stem) + " " + str(confidence) + "\n"
        stem_structure.write(string)
        stem_structure.write(local_sequence)
        stem_structure.write("\n")
        stem_structure.write(s)
        stem_structure.write("\n")                
    stem_structure.close()

    # Evaluate with stacking only
    eval_stacking = os.popen("cat stem_structure.txt | ViennaRNA-1.8.4/Progs/RNAeval -d2 -P stacking_only.par")      
    energy_stacking = eval_stacking.read()
    output = file("stacking_energy.txt",'w')
    output.write(energy_stacking)    
    output.close()
    # Evaluate with stacking and loop entropies
    eval_loops = os.popen("cat stem_structure.txt | ViennaRNA-1.8.4/Progs/RNAeval -d2")    
    energy_loops = eval_loops.read()    
    output = file("loops_energy.txt",'w')
    output.write(energy_loops)    
    output.close()

    stem_list = []
    output = file("stacking_energy.txt",'r')
    for i in output:
        stem_list.append(i)
    output.close()    
    stem_loops_list = []
    output = file("loops_energy.txt",'r')
    for i in output:
        stem_loops_list.append(i)
    output.close()
    
    # The energy evaluation result is stored in two lists now    
    for i in xrange(0,len(stem_list),3):
        info_stack = stem_list[i+2].split()                
        info_loops = stem_loops_list[i+2].split()        
        if len(info_stack) > 2: 
            energy_stack = info_stack[2]    
            energy_stack = float(str(energy_stack)[0:len(energy_stack)-1])      
        else:
            energy_stack = info_stack[1]
            energy_stack = float(str(energy_stack)[1:len(energy_stack)-1])
        if len(info_loops) > 2:
            energy_loops = info_loops[2]
            energy_loops = float(str(energy_loops)[0:len(energy_loops)-1])  
        else:    
            energy_loops = info_loops[1]
            energy_loops = float(str(energy_loops)[1:len(energy_loops)-1])
        info = stem_list[i].split()            
        key = int(info[1]), int(info[2])
        length = int(info[3])
        confidence = float(info[4])
        entry = length, confidence, energy_stack, energy_loops
        matrix_stems[key] = entry           
    return matrix_stems

# Task: Function for evaluation of local stem energies for
# stems shortened during pseudoknot construction
# Store stems with structure in file for RNAeval
# Update shortened stem dictionary with both stacking and free energy weights
def re_evaluation_dangling(stems_shortened,matrix_stems,seq):
    stem_structure = file("stem_structure_reevaluation.txt",'w')    
    for stem in sorted(set(stems_shortened)):
        start, end = stem[0], stem[1]
        length_stem = stem[2]
        string_list = []
        for i in xrange(length_stem):
            string_list.append("(")
        for j in xrange(end-start+1-length_stem-length_stem):
            string_list.append(":")
        for k in xrange(length_stem):
            string_list.append(")")            
        if start == 1:
            if end != len(seq):     # Dangling end on the right only
                local_sequence = seq[start-1:end+1]
                string_list.append(":")
            else:                   # No dangling ends
                local_sequence = seq[start-1:end]   
        else:
            if end != len(seq):     # Dangling end on both ends
                local_sequence = seq[start-2:end+1]
                string_list.insert(0,":") 
                string_list.append(":")           
            else:                   # Dangling end on the left only
                local_sequence = seq[start-2:end]
                string_list.insert(0,":")
        s = "".join(string_list)
        string = "> " + str(start) + " " + str(end) + " " + str(length_stem) + "\n"
        stem_structure.write(string)
        stem_structure.write(local_sequence)
        stem_structure.write("\n")
        stem_structure.write(s)
        stem_structure.write("\n")
    stem_structure.close()
    # Evaluate with stacking only
    string = "cat stem_structure_reevaluation.txt | ViennaRNA-1.8.4/Progs/RNAeval -d2 -P stacking_only.par"
    eval_stacking = os.popen(string) 
    energy_stacking = eval_stacking.read()    
    # Evaluate with stacking and loop entropy
    string = "cat stem_structure_reevaluation.txt | ViennaRNA-1.8.4/Progs/RNAeval -d2"    
    eval_loops = os.popen(string)
    energy_loops = eval_loops.read()
    output = file("stacking_energy_reevaluation.txt",'w')
    output.write(energy_stacking)    
    output.close()
    output = file("loops_energy_reevaluation.txt",'w')
    output.write(energy_loops)    
    output.close()
    
    stem_list = []
    output = file("stacking_energy_reevaluation.txt",'r')
    for i in output:
        stem_list.append(i)
    output.close()    
    stem_loops_list = []
    output = file("loops_energy_reevaluation.txt",'r')
    for i in output:
        stem_loops_list.append(i)
    output.close()
    # The energy evaluation result is stored in two lists now
    stems_shortened_dic = {}    
    for i in range(0,len(stem_list),3):        
        info_stack = stem_list[i+2].split()                        
        info_loops = stem_loops_list[i+2].split()        
        if len(info_stack) > 2:
            energy_stack = info_stack[2]
            energy_stack = float(str(energy_stack)[0:len(energy_stack)-1])      
        else:
            energy_stack = info_stack[1]
            energy_stack = float(str(energy_stack)[1:len(energy_stack)-1])
        if len(info_loops) > 2:
            energy_loops = info_loops[2]
            energy_loops = float(str(energy_loops)[0:len(energy_loops)-1])  
        else:    
            energy_loops = info_loops[1]
            energy_loops = float(str(energy_loops)[1:len(energy_loops)-1])
        info = stem_list[i].split()            
        key = int(info[1]), int(info[2])
        length = int(info[3])
        key_new = int(info[1]), int(info[2]), length
        confidence = functions.find_in_dic(matrix_stems,key)[1]            
        quadruple = length, confidence, energy_stack, energy_loops
        stems_shortened_dic[key_new] = quadruple           
    return matrix_stems, stems_shortened_dic

# Task: Filtering of stems with high free energy
def filter_stems(matrix_stems, cutoff_stack, cutoff_loop):
    matrix_stems_filtered = matrix_stems.copy()
    for stem in sorted(set(matrix_stems)):
        if matrix_stems[stem][2] >= cutoff_stack or matrix_stems[stem][3] >= cutoff_loop:
            del matrix_stems_filtered[stem]
    return matrix_stems_filtered

# Task: Filtering of stems with low probability
def filter_stems_prob(matrix_stems, cutoff_prob):
    matrix_stems_ib = matrix_stems.copy()
    for stem in sorted(set(matrix_stems)):
        if matrix_stems[stem][1] < cutoff_prob:
            del matrix_stems_ib[stem]
    return matrix_stems_ib
