Last active
October 25, 2016 00:58
-
-
Save pratikone/8ff6b84319fc9d390a2748542ffea531 to your computer and use it in GitHub Desktop.
Create blosum matrix with 5 alignments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################## | |
# Author : Pratik Anand # | |
########################## | |
from sets import Set | |
from math import log | |
from decimal import * | |
from tabulate import tabulate | |
import copy | |
import global_alignment_DP as align | |
import affinity_global_align_DP as affine_align | |
getcontext().prec = 2 | |
#sequences = [ "SALAK", "EHMFK", "QYCQS", "IRFLQ", "CDAIR" ] | |
#sequences = [ "KRAAE", "WNAQS", "RDAIE", "RDAIE", "RDAIE" ] | |
#sequences = [ "LRLLQ", "MSVVE", "NRDRV", "GRDRV", "IGAFY" ] | |
sequences = [ "AAPMG", "ATSMG", "AAPMG", "AAPMG", "AAPMG" ] | |
SIZE = 5 | |
PRINT_TABLE = True | |
charSet = Set() | |
BLANK = "," | |
columns = [] | |
def printTable( headerList, table ) : | |
new_table = copy.deepcopy(table) | |
for indx,innerRow in enumerate(new_table): | |
innerRow.insert( 0, headerList[indx]) | |
print tabulate( new_table, headers=headerList, tablefmt="grid" ) | |
for i in xrange(SIZE) : | |
innerCol = {} | |
for j in xrange(SIZE) : | |
char = sequences[j][i] | |
charSet.add(char) #add to set for making table | |
if char in innerCol : | |
innerCol[char] = innerCol[char] + 1 | |
else : | |
innerCol[char] = 1 | |
columns.append( innerCol ) | |
print columns | |
combinations = {} | |
for col_id in xrange(SIZE): | |
dict_key_size = len(columns[col_id]) | |
for i in xrange(dict_key_size) : | |
for j in xrange(i, dict_key_size) : | |
string = columns[col_id].keys()[i] + columns[col_id].keys()[j] | |
if string[0] == string[1] : | |
val = columns[col_id][string[0]] | |
c = (val * (val-1))/2 | |
else : | |
val1 = columns[col_id][string[0]] | |
val2 = columns[col_id][string[1]] | |
c = val1 * val2 | |
if string in combinations : | |
combinations[string] = combinations[string] + c | |
else : | |
combinations[string] = c | |
print combinations | |
print sorted(charSet) | |
c_i_j = [] | |
for row in sorted(charSet) : | |
innerList = [] | |
for col in sorted(charSet) : | |
stringCombo = row + col | |
rev_string_combo = col + row | |
if stringCombo in combinations or rev_string_combo in combinations : #last bug hopefully, | |
if stringCombo in combinations : #it wasn't accounting for AB and BA together | |
key = stringCombo | |
else : | |
key = rev_string_combo | |
val = combinations[ key ] | |
innerList.append(val) | |
print str(val) + BLANK, | |
else : | |
innerList.append(0) #won't be computed against | |
print "?" + BLANK, | |
c_i_j.append( innerList ) | |
print "\n" | |
if PRINT_TABLE is True : | |
printTable( list(charSet), c_i_j ) | |
print "=============================================================" | |
q_i_j = [] | |
for i,row in enumerate(sorted(charSet)) : | |
innerList = [] | |
for j,col in enumerate(sorted(charSet)) : | |
stringCombo = row + col | |
val = c_i_j[i][j] | |
val = Decimal(val)/Decimal(50.0) | |
innerList.append(val) | |
print str(val) + BLANK, | |
q_i_j.append( innerList ) | |
print "\n" | |
if PRINT_TABLE is True : | |
printTable( list(charSet), q_i_j ) | |
print "=============================================================" | |
p_i_j = {} | |
tot = 0 | |
for i,row in enumerate(sorted(charSet)) : | |
sum = 0 | |
for j,col in enumerate(sorted(charSet)) : | |
if row == col : | |
val = q_i_j[i][j] | |
else: | |
val = q_i_j[i][j]/2 | |
sum = sum + val | |
#print row + " index : " + str(i) + " " + str(sum) + " " + str(val) | |
p_i_j[row] = sum | |
tot = tot+sum | |
print p_i_j | |
print tot | |
print "=============================================================" | |
e_i_j = [] | |
for i,row in enumerate(sorted(charSet)) : | |
innerList = [] | |
for j,col in enumerate(sorted(charSet)) : | |
if row == col : | |
val = p_i_j[row]**2 | |
else: | |
val = 2*p_i_j[row]*p_i_j[col] | |
print str(val) + BLANK, | |
innerList.append(val) | |
#print row + " index : " + str(i) + " " + str(sum) + " " + str(val) | |
e_i_j.append(innerList) | |
print "\n" | |
if PRINT_TABLE is True : | |
printTable( list(charSet), e_i_j ) | |
print "=============================================================" | |
blosum = [] | |
for i,row in enumerate(sorted(charSet)) : | |
innerList = [] | |
for j,col in enumerate(sorted(charSet)) : | |
if Decimal( e_i_j[i][j]) != Decimal(0.0) and Decimal( q_i_j[i][j]) != Decimal(0.0) : | |
val = int(round(2*log( Decimal( q_i_j[i][j] )/ Decimal( e_i_j[i][j] ), 2))) | |
else : val = "?" | |
print str(val) + BLANK, | |
innerList.append(val) | |
#print row + " index : " + str(i) + " " + str(sum) + " " + str(val) | |
blosum.append(innerList) | |
print "\n" | |
print "=============================================================" | |
if PRINT_TABLE is True : | |
printTable( list(charSet), blosum ) | |
print "=============================================================" | |
affine_align.USE_BLOSUM = True | |
if True : | |
for firstSeq in xrange(len(sequences) - 1) : | |
for secondSeq in xrange(firstSeq+1, len(sequences)) : | |
affine_align.sequence_align(sequences[firstSeq], sequences[secondSeq], charSet, blosum) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment