import os
import sys
import getopt
import stems
import bulges_internal
import pk_construction
import cc06
import cc09
import longPK
import pk_interrupted
import pk_energy
import mwis
import kissing_hairpins
import pk_tools

""" Main Program
    -k: predict kissing hairpins
    -o: direct output to specified file
    -s: show predicted secondary structure
"""

khp, local, global_structure = False, False, False

def Usage ():
    print "Usage"
    print "dotknot.py <infile> <outfile> [-k][-l][-g]"
    print "[-k] include kissing hairpins"
    print "[-l] show best local pseudoknots"
    print "[-g] show predicted global structure"
    sys.exit(0)

#---------------------------------------------------------------------------------------------------------   
    
# Read output file from user input in command line 
if len(sys.argv) > 2 and not '-' in sys.argv[2][0]:
    output_file_name = sys.argv[2]
    # Read optional arguments    
    try:
        optlist, arguments = getopt.getopt(sys.argv[3:], 'hklg')
        for opt in optlist:
            if opt[0] == '-h':
                print 
                print "Usage for DotKnot: ", 
                print "dotknot.py input_file [-k][-l][-g]"
                print "[-k] include kissing hairpins"
                print "[-l] show best local pseudoknots"
                print "[-g] show predicted global structure"
                sys.exit(0)            
            elif opt[0] == '-k':            
                khp = True
                print "Include kissing hairpins."
            elif opt[0] == '-l':
                local = True
                print "Show best local pseudoknots."              
            elif opt[0] == '-g':
                global_structure = True
                print "Show predicted global structure."
            else:
                print "Error while parsing options."
                sys.exit(0)
    except:
        print "Error while parsing options."
        sys.exit(0)
    
else:
    output_file_name = "pseudoknots.txt"
    # Read optional arguments    
    try:
        optlist, arguments = getopt.getopt(sys.argv[2:], 'hklg')
        for opt in optlist:
            if opt[0] == '-h':
                print 
                print "Usage for DotKnot: ", 
                print "dotknot.py input_file [-k][-l][-g]"
                print "[-k] include kissing hairpins"
                print "[-l] show best local pseudoknots"
                print "[-g] show predicted global structure"
                sys.exit(0)            
            elif opt[0] == '-k':            
                khp = True
                print "Include kissing hairpins."
            elif opt[0] == '-l':
                local = True
                print "Show best local pseudoknots."              
            elif opt[0] == '-g':
                global_structure = True
                print "Show predicted global structure."
            else:
                print "Error while parsing options."
                sys.exit(0)
    except:
        print "Error while parsing options."
        sys.exit(0)
    
# Read fasta file from user input in command line
try:
    if sys.argv[1] == '-h':
        print 
        print "Usage for DotKnot: ", 
        print "dotknot.py input_file [-k][-l][-g]"
        print "[-k] include kissing hairpins"
        print "[-l] show best local pseudoknots"
        print "[-g] show predicted global structure"

    # Read fasta file with multiple sequences from user input in command line
    try:
        fasta_file = sys.argv[1]
        f = open(fasta_file,'U')        
        list_of_ids_sequences = []              # Store ids and sequences in list
        for line in f:
            line = line.rstrip('\n')
            if line:
                list_of_ids_sequences.append(line)
    except IOError:
        print "Specify input file!"
        print "dotknot.py input_file [-k][-l][-g]"
        sys.exit(0)            
except IOError:
    print
    print "Specify input file!"
    print "dotknot.py input_file [-k][-l][-g]"
    sys.exit(0)
print

pk_file = open(output_file_name, "w")

if list_of_ids_sequences:    
    for i in xrange(0,len(list_of_ids_sequences),2):
        identifier = list_of_ids_sequences[i]
        seq = list_of_ids_sequences[i + 1]        
        seq = seq.upper()
        seq, identifier = seq.strip(), identifier.strip()
        seq = seq.replace('T','U')
        for base in seq:            
            if base in 'ACGU':
                pass
            else:
                print
                print "Sequence", identifier, "contains invalid characters."
                sys.exit(0)
        #--------------------------------------------------------------------------------------------------------- 
        infile = open("input.fasta","w")
        print >> infile, seq
        infile.close()
        #--------------------------------------------------------------------------------------------------------- 
        print identifier.strip()
        print seq
        print "Sequence length: ", len(seq)
        print
        print "DotKnot is running..."
        #--------------------------------------------------------------------------------------------------------- 
        try:            
            print >> pk_file, str(identifier)
            
            print "Predicting pseudoknots..."        
            mfe = os.popen('ViennaRNA-1.8.4/Progs/RNAfold -p2 -d2 -noLP < input.fasta')    
            result = mfe.read()
            result = result.split()
            mfestructure = result[1]
            mfenergy = result[2]
            mfenergy = mfenergy.replace('(','')
            mfenergy = mfenergy.replace(')','')
            #---------------------------------------------------------------------------------------------------------                
            # Create list of stack probabilities from PostScript file, threshold E-11
            bp_stack_dic = stems.postscript(seq)
            #---------------------------------------------------------------------------------------------------------            
            # Construct stems from stack probabilities and store with mean probability in stem dictionary
            matrix_stems = stems.find_stems(bp_stack_dic)
            #---------------------------------------------------------------------------------------------------------
            # Assign a local weight to the stems using enf energy evaluation
            # Format is the following (start, end): (length, confidence, stack_energy, loop_energy)
            matrix_stems = stems.evaluation(matrix_stems,seq)    
            # All stems are stored in the dictionary, now filter those with high free energy
            cutoff_stack = 0.0 
            cutoff_loop = 4.0
            matrix_stems = stems.filter_stems(matrix_stems, cutoff_stack, cutoff_loop)
            #---------------------------------------------------------------------------------------------------------
            # Construct stems with bulge loops, internal loops and multiloops 
            cutoff_prob =  0.001
            matrix_stems_ib = stems.filter_stems_prob(matrix_stems, cutoff_prob)
            structures_dic = bulges_internal.internal_mwis(matrix_stems_ib)
            bulge_internal_dic, multiloops = bulges_internal.evaluation_bulge_internal(structures_dic, seq)
            cutoff_loops = 0.0
            bulge_internal_dic = bulges_internal.filter_stems(bulge_internal_dic,cutoff_loops)
            multiloops = bulges_internal.filter_stems(multiloops,cutoff_loops) 
            #---------------------------------------------------------------------------------------------------------
            # Construct H-type core pseudoknots
            stems_shortened, pseudoknot_dic = pk_construction.build_pseudoknots(matrix_stems)
            # Construct pseudoknots with stems s_ib
            stems_shortened_ib, pk_with_IB = pk_construction.pseudoknots_with_IB(bulge_internal_dic, matrix_stems)      
            #---------------------------------------------------------------------------------------------------------
            # Re-evaluate stem energies to account for shortended stems
            stems_shortened.update(stems_shortened_ib)
            if stems_shortened:
                matrix_stems, stems_shortened_dic = stems.re_evaluation_dangling(stems_shortened,matrix_stems,seq)
            else:
                stems_shortened_dic = {}
            stems_shortened_dic = stems.filter_stems(stems_shortened_dic, cutoff_stack, cutoff_loop)
            #---------------------------------------------------------------------------------------------------------
            # Construct three different pseudoknot dictionaries
            pseudoknot_dic_cc06, pseudoknot_dic_cc09, pseudoknot_dic_longpk = pk_construction.pk_dic_scan(pseudoknot_dic, matrix_stems, stems_shortened_dic)
            #---------------------------------------------------------------------------------------------------------
            # Heuristic energy parameters
            init = 7.0 
            penalty = 0.1
            #---------------------------------------------------------------------------------------------------------
            # Evaluate energies for pseudoknots with loop L2 <= 1nt
            pseudoknot_dic_cc06_result = {}
            pseudoknot_dic_cc06_result = cc06.dic_caochen06(pseudoknot_dic_cc06,matrix_stems,stems_shortened_dic,seq)
            # Evaluate energies for pseudoknots with loop L2 >= 2nt and L2 <= 6nt
            pseudoknot_dic_cc09_result = {}
            pseudoknot_dic_cc09_result, entropies_dic, entropies_dic_L1, entropies_dic_L3 = cc09.dic_caochen09(pseudoknot_dic_cc09,matrix_stems)
            # Evaluate energies for pseudoknots with loop L2 >= 7nt
            pseudoknot_dic_longpk_result = {}
            pseudoknot_dic_longpk_result = longPK.dic_longpks(pseudoknot_dic_longpk,matrix_stems,init,penalty)
            # Now we form a pseudoknot dictionary out of the three results
            pk_core_dic = {}
            pk_core_dic.update(pseudoknot_dic_cc06_result)
            pk_core_dic.update(pseudoknot_dic_cc09_result)
            pk_core_dic.update(pseudoknot_dic_longpk_result)
            #---------------------------------------------------------------------------------------------------------
            # Now, look for recursive elements in the pseudoknot loops (MWIS) and add them to dictionary
            pk_core_dic = pk_energy.recursive_pk(matrix_stems, bulge_internal_dic, multiloops, pk_core_dic)
            # Combine recursive energies to overall pseudoknot energy
            # Construct three different pseudoknot dictionaries for energy re-evaluation because of the loop entropies
            pseudoknot_dic_cc06,pseudoknot_dic_cc09,pseudoknot_dic_longpk = pk_energy.pk_dic_scan_recursive(pk_core_dic,matrix_stems)  
            pseudoknot_dic_cc06_result = cc06.pk_energy_reevaluation_06(pseudoknot_dic_cc06,matrix_stems)
            pseudoknot_dic_cc09_result = cc09.pk_energy_reevaluation_09(pseudoknot_dic_cc09, matrix_stems, entropies_dic, entropies_dic_L1, entropies_dic_L3)
            pseudoknot_dic_longpk_result = longPK.pk_energy_reevaluation_long(pseudoknot_dic_longpk,matrix_stems,init,penalty) 
            pk_recursive_dic = {}
            pk_recursive_dic.update(pseudoknot_dic_cc06_result)
            pk_recursive_dic.update(pseudoknot_dic_cc09_result)
            pk_recursive_dic.update(pseudoknot_dic_longpk_result)
            #---------------------------------------------------------------------------------------------------------
            # Evaluate energies for pseudoknots with stems s_ib
            pk_dic_ib = pk_interrupted.evaluate_pk_with_IB(pk_with_IB, matrix_stems, stems_shortened_dic, init, penalty, seq)
            pk_dic_ib = pk_interrupted.recursive_pk(matrix_stems,bulge_internal_dic,multiloops,pk_dic_ib)
            pk_dic_ib = pk_interrupted.re_evaluate_pk_with_IB(pk_dic_ib, init, penalty)
            pk_dic_ib = pk_energy.pk_filter(pk_dic_ib)
            # Pseudoknots constructed with stems s_ib are stored in the pseudoknot dictionary
            pk_recursive_dic.update(pk_dic_ib)
            #---------------------------------------------------------------------------------------------------------
            # Here, return the best five near-optimal pseudoknots
            number_of_best_pks = 5
            best_pks_ratio = pk_tools.output_best_ratio(pk_recursive_dic, number_of_best_pks)
            best_pks_ratio = pk_tools.pk_structures(seq, best_pks_ratio, matrix_stems, stems_shortened_dic, bulge_internal_dic, multiloops, pk_recursive_dic, pk_dic_ib)
            best_pks_energy = pk_tools.output_best_energy(pk_recursive_dic, number_of_best_pks)
            best_pks_energy = pk_tools.pk_structures(seq, best_pks_energy, matrix_stems, stems_shortened_dic, bulge_internal_dic, multiloops, pk_recursive_dic, pk_dic_ib)       
            # Filter all pseudoknots with energy < 5.25 and normalized energy < -0.25
            pk_not_filtered = {}
            pk_not_filtered = pk_recursive_dic.copy()
            pk_recursive_dic = pk_energy.pk_filter(pk_recursive_dic)
            #---------------------------------------------------------------------------------------------------------
            # The user wants to predict only pseudoknots, no kissing hairpins
            if not khp:
                cutoff_prob =  0.001
                matrix_stems_mwis = stems.filter_stems_prob(matrix_stems, cutoff_prob)

                if matrix_stems_mwis or pk_recursive_dic or pk_dic_ib or bulge_internal_dic or multiloops:
                    mwis_dic, detected_pks, secondary_structures = mwis.method(matrix_stems_mwis, pk_recursive_dic, bulge_internal_dic, multiloops)
                    pseudoknot_list = mwis.pk_structures(seq, detected_pks, matrix_stems, stems_shortened_dic, bulge_internal_dic, multiloops, pk_recursive_dic, pk_dic_ib)
                else:
                    pseudoknot_list = []
                    secondary_structures = {}

                if pseudoknot_list:
                    print 
                    print "Detected pseudoknots:"
                    for pk in pseudoknot_list:
                        print pk[0], pk[1], pk[2]
                        print pk[4]
                        print pk[5]
                        print
                if not pseudoknot_list:
                    print "No pseudoknots were detected."
                                
                if pseudoknot_list:
                        print >> pk_file, "Detected pseudoknots:"
                        for pk in pseudoknot_list:
                                print >> pk_file, str(pk[0]),str(pk[1]), str(pk[2])
                                print >> pk_file, str(pk[4])
                                print >> pk_file, str(pk[5])		
                if not pseudoknot_list:
                    print >> pk_file, "No pseudoknots were detected."    

                # The user wants to see near-optimal pseudoknots 
                if local:
                    print
                    print "Best", number_of_best_pks, "pseudoknots in terms of energy to length ratio:"
                    for item in best_pks_ratio:
                        print item[0], item[1], item[2]
                        print item[3]
                        print item[4]
                    print
                    print "Best", number_of_best_pks, "pseudoknots in terms of free energy:"
                    for item in best_pks_energy:
                        print item[0], item[1], item[2]
                        print item[3]
                        print item[4]
                    print       
                                           
                # Assemble global structure if desired by the user
                if global_structure:
                    predicted_global_structure = mwis.assemble_global_structure(seq, secondary_structures, pseudoknot_list)            
                    print "Predicted global structure"
                    print seq
                    print predicted_global_structure        
            #---------------------------------------------------------------------------------------------------------    
            # If user wants to predict kissing hairpins
            if khp:
                print "Predicting kissing hairpins..."      
                kissing_hairpin_dic = {}          
                #---------------------------------------------------------------------------------------------------------
                # Kissing hairpin parameters
                cutoff_stack = -5.0 
                cutoff_loop = 2.0       
                init = 9.0 
                unpaired_nt = 0.5
                unpaired_nt_l3 = 0.0 
                #---------------------------------------------------------------------------------------------------------
                matrix_stems_kissing = stems.filter_stems(matrix_stems, cutoff_stack, cutoff_loop)       
                #---------------------------------------------------------------------------------------------------------
                pseudoknot_second, pseudoknot_first, stems_shortened = kissing_hairpins.build_pseudoknots(matrix_stems_kissing)        
                # Re-evaluate stem energies to account for shortended stems
                if stems_shortened:
                    matrix_stems_kissing, stems_shortened_dic = stems.re_evaluation_dangling(stems_shortened, matrix_stems_kissing, seq)
                else:
                    stems_shortened_dic = {}
                stems_shortened_dic = stems.filter_stems(stems_shortened_dic, cutoff_stack, cutoff_loop)
                #---------------------------------------------------------------------------------------------------------               
                # Stems can be filtered because only consecutive items are considered in recursive element search
                matrix_stems_mwis = kissing_hairpins.filter_kissing_stems_mwis(matrix_stems_kissing, bulge_internal_dic, multiloops)        
                # Use only stems with high probability
                cutoff_prob =  0.001
                matrix_stems_mwis = stems.filter_stems_prob(matrix_stems_mwis, cutoff_prob)
                best_khps = kissing_hairpins.kissing_hairpins(pseudoknot_second, pseudoknot_first, matrix_stems_kissing, stems_shortened_dic, matrix_stems_mwis, bulge_internal_dic, multiloops, init, unpaired_nt, unpaired_nt_l3, pk_core_dic)                        
                matrix_stems_mwis.clear()        
                #---------------------------------------------------------------------------------------------------------
                # Here, return the best five near-optimal pseudoknots (before filtering) and kissing hairpins
                number_of_best_pks = 5    
                best_pks_ratio = pk_tools.output_best_ratio_khp(pk_not_filtered, best_khps, number_of_best_pks)
                best_pks_ratio = pk_tools.pk_khp_structures(seq, best_pks_ratio, matrix_stems, stems_shortened_dic, bulge_internal_dic, multiloops, pk_not_filtered, best_khps)
                best_pks_energy = pk_tools.output_best_energy_khp(pk_not_filtered, best_khps, number_of_best_pks)
                best_pks_energy = pk_tools.pk_khp_structures(seq, best_pks_energy, matrix_stems, stems_shortened_dic, bulge_internal_dic, multiloops, pk_not_filtered, best_khps)       
                #---------------------------------------------------------------------------------------------------------
                # Kissing hairpin candidates are found        
                if best_khps:
                    cutoff_prob =  0.001
                    matrix_stems_mwis = stems.filter_stems_prob(matrix_stems, cutoff_prob)

                    if matrix_stems_mwis or pk_recursive_dic or best_khps or bulge_internal_dic or multiloops:
                        mwis_dic, crossing_structures, secondary_structures = kissing_hairpins.method(matrix_stems_mwis, pk_recursive_dic, bulge_internal_dic, multiloops, best_khps)
                        if crossing_structures:
                            pseudoknot_list = kissing_hairpins.khp_structures(seq, crossing_structures, best_khps, matrix_stems, stems_shortened_dic, bulge_internal_dic, multiloops, pk_recursive_dic, pk_dic_ib)
                        else:
                            pseudoknot_list = []
                    else:
                        pseudoknot_list = []
                        secondary_structures = {}

                # No kissing hairpin candidates are found
                if not best_khps:
                    cutoff_prob =  0.001
                    matrix_stems_mwis = stems.filter_stems_prob(matrix_stems, cutoff_prob)
                    
                    if matrix_stems_mwis or pk_recursive_dic or best_khps or bulge_internal_dic or multiloops:
                        mwis_dic, detected_pks, secondary_structures = mwis.method(matrix_stems_mwis, pk_recursive_dic, bulge_internal_dic, multiloops)
                        pseudoknot_list = mwis.pk_structures(seq, detected_pks, matrix_stems, stems_shortened_dic, bulge_internal_dic, multiloops, pk_recursive_dic, pk_dic_ib)
                    else:
                        pseudoknot_list = []
                        secondary_structures = {}

                if pseudoknot_list:
                    print
                    print "Detected pseudoknots and kissing hairpins:"            
                    for pk in pseudoknot_list:
                        print pk[0], pk[1], pk[2]
                        print pk[4]
                        print pk[5]
                        print
                if not pseudoknot_list:
                    print "No pseudoknots or kissing hairpins were detected."
                
                if pseudoknot_list:
                        print >> pk_file, "Detected pseudoknots and kissing hairpins:"
                        for pk in pseudoknot_list:
                                print >> pk_file, str(pk[0]),str(pk[1]), str(pk[2])
                                print >> pk_file, str(pk[4])
                                print >> pk_file, str(pk[5])		
                if not pseudoknot_list:
                    print >> pk_file, "No pseudoknots or kissing hairpins were detected."                           

                # The user wants to see near-optimal pseudoknots 
                if local:
                    print
                    print "Best", number_of_best_pks, "pseudoknots and kissing hairpins in terms of energy to length ratio:"
                    for item in best_pks_ratio:
                        if item[3] == 'khp':
                            print item[0], item[1], item[2]
                            print item[4]
                            print item[5]
                        else:                
                            print item[0], item[1], item[2]
                            print item[3]
                            print item[4]
                    print
                    print "Best", number_of_best_pks, "pseudoknots and kissing hairpins in terms of free energy:"
                    for item in best_pks_energy:
                        if item[3] == 'khp':
                            print item[0], item[1], item[2]
                            print item[4]
                            print item[5]
                        else:                  
                            print item[0], item[1], item[2]
                            print item[3]
                            print item[4]
                    print 

                # Assemble global structure if desired by the user
                if global_structure:
                    predicted_global_structure = mwis.assemble_global_structure(seq, secondary_structures, pseudoknot_list)
                    print "Predicted global structure"
                    print seq
                    print predicted_global_structure
            print
            print >> pk_file, str('\n')
        except IOError:
            print "Error while executing DotKnot."
            sys.exit(0)

pk_file.close()                                

