.packageName <- "bgafun"
"Calculate_Row_Weights" <-
function(my_amino,label){

## Calculate the sequence weights for all the rows 
## in my_amino,using label as the grouping

label<-as.factor(label)
weight<-as.vector(label)
group_no<-length(levels(label))

for(i in 1:group_no){
    x<-my_amino[levels(label)[i]==label,]
    weight[levels(label)[i]==label]<-Henikoff_weights(x)
}

return(as.numeric(weight))

}

"Henikoff_weights" <-
function(my_amino){ 
## Henikoff weights,
## For each position,
## count number of residue types then weights are 1/R
##                     AAABBC = Three residue types....
##                              1/3*(number of sequences)
##
## Input is amino acid binary array generated by create_aln_amino function
## Only want to sum over residues 



my_profile<-create_profile(my_amino)
seq_number<-dim(my_amino)[1]
seq_length<-dim(my_amino)[2]

## Total number of residues in each column
my_residues_total<-apply(my_profile,2,sum) 
my_residues_total<-rep(my_residues_total>3,each=20)

my_residue_counts<-apply(my_profile>0,2,sum)
my_residue_counts<-rep(my_residue_counts,each=20)
weight<-((1/my_profile)*(1/my_residue_counts))*as.numeric(my_residues_total)
weight[grep("Inf",weight)]<-0
weight[grep("NaN",weight)]<-0
dim(weight)<-c(1,seq_length)
weights_array<-rep(weight,each=seq_number)
dim(weights_array)<-c(seq_number,seq_length)
seq_weights<-apply(weights_array*my_amino,1,sum)
total<-sum(seq_weights)

## Normalize the weights

seq_weights<-(seq_weights/total)     
return(seq_weights)
}

"Weight_Amino" <-
function(my_amino,label){
## Weight an amino acid matrix based on the labels

seq_weights<-Calculate_Row_Weights(my_amino,label)
len<-dim(my_amino)[2]
x<-rep(seq_weights,times=len)
weighted_amino<-my_amino*x
return(weighted_amino)
}

"add_pseudo_counts" <-
function(amino,groups){
## Add pseudo counts to a binary matrix, defined seperately for each group
## The pseudo counts are calculated using the Henikoff method

new_amino<-amino
grouping<-as.factor(groups)
for(i in 1:length(levels(grouping))){
    index<-grouping==levels(grouping)[i]
    new_amino[index]<-amino[index,]+calculate_pseudo(amino,grouping,i)
}

return(new_amino)

}

"amino_counts" <-
function(x,k){
##Calculate the counts of amino acid types at a position.
##k is the position
##x is the matrix

i<-(k*20)+1
j<-i+19
y<-x[,i:j]
z<-(apply(y,2,sum))
return(z)
}

"average_cols_aap" <-
function(x,y){

## Average out the gaps in a Amino Acid Properties matrix 
## with the average of the group, and remove remaining gaps

aln_length<-dim(x)
gap_fraction<-0.8

y<-as.factor(y)
groups<-levels(y)
no_levels<-length(levels(y))
for(i in 1:no_levels){
    for(j in 1:aln_length[2]){
        col_list<-x[grep(groups[i],as.vector(y)),j]
        if(sum(!col_list==0)/length(col_list)>gap_fraction){
            gap<-grep("TRUE",col_list==0)
            not_gap<-grep("TRUE",!col_list==0)
            col_list[gap]<-sum(col_list[not_gap])/length(col_list[not_gap])
            x[grep(groups[i],as.vector(y)),j]<-col_list
            }
   }

}

no_gaps<-x[,!apply(x==0,2,sum)>0]
return(no_gaps)

}

"calculate_pseudo" <-
function(amino,groups,i){
##Calculate the fraction to add onto each residue in the amino matrix
##Need the amino matrix, the groups, and which group to work on.

groups<-as.factor(groups)
grouping=levels(groups)[i]
profile<-create_profile(amino[groups==grouping,])
pseudo<-pseudo_counts(profile)
to_add<-rep(pseudo,each=sum(groups==grouping))
dim(to_add)<-c(sum(groups==grouping),dim(profile)[1]*dim(profile)[2])
rownames(to_add)<-rownames(amino)[groups==grouping]
colnames(to_add)<-colnames(amino)
return(to_add)
}

"convert_aln_AAP" <-
function(Alignment){
## Convert Alignment into matrix using Amino Acid properties encoding
## There are five AAP factors: 
## Factor A: Polarity Index 
## Factor B: Secondary Structural Configurations
## Factor C: Molecular Size
## Factor D: "Amino Acid Composition, number of condons etc"
## Factor E: Charge

seq_count<-length(Alignment$seq)
z<-list()

for(i in 1:seq_count){
    x<- (unlist(strsplit(unlist(Alignment$seq[i]),split=NULL)))
    y<-convert_seq_AAP(x)
    z[[i]]<-y
}

z<-(as.data.frame(z))
z<-t(z)
rownames(z)<-Alignment$nam
colnames(z)<-create_colnames_AAP(Alignment)

return(z)

}

"convert_aln_amino" <-
function(Alignment){

## Convert an aligment object into a binary matrix, representing the presence or
## absence of amino acid at each position in the alignment

seq_count<-length(Alignment$seq)
z<-list()

for(i in 1:seq_count){
    x<- (unlist(strsplit(unlist(Alignment$seq[i]),split=NULL)))
    y<-convert_seq_amino(x)
    z[[i]]<-y
}

z<-(as.data.frame(z))
z<-t(z)
rownames(z)<-Alignment$nam
colnames(z)<-create_colnames_amino(Alignment)
return(z)
}

"convert_seq_AAP" <-
function(x){

## Convert sequence into vector using the property encoding scheme

D<-c(1.050, 0.302,-3.656,-0.259,-3.242)
T<-c(-0.032, 0.326, 2.213, 0.908, 1.313)
E <-c( 1.357,-1.453, 1.477, 0.113,-0.837)
C <-c(-1.343, 0.465,-0.862,-1.020,-0.255)
M <-c(-0.663,-1.524, 2.219,-1.005, 1.212)
Y <-c(0.260, 0.830, 3.097,-0.838, 1.512)
K <-c(1.831,-0.561, 0.533,-0.277, 1.648)
R <-c(1.538,-0.055, 1.502, 0.440, 2.897)
S <-c(-0.228, 1.399,-4.760, 0.670,-2.647)
Q <-c( 0.931,-0.179,-3.005,-0.503,-1.853)
F <-c(-1.006,-0.590, 1.891,-0.397, 0.412)
P <-c( 0.189, 2.081,-1.628, 0.421,-1.392)
W <-c(-0.595, 0.009, 0.672,-2.128,-0.184)
N <-c( 0.945, 0.828, 1.299,-0.169, 0.933)
G <-c(-0.384, 1.652, 1.330, 1.045, 2.064)
V <-c(-1.337,-0.279,-0.544, 1.242,-1.262)
I <-c(-1.239,-0.547, 2.131, 0.393, 0.816)
L <-c(-1.019,-0.987,-1.505, 1.266,-0.912)
A <-c(-0.591,-1.302,-0.733, 1.570,-0.146)
H <-c( 0.336,-0.417,-1.673,-1.474,-0.078)
gap <-c( 0, 0, 0, 0, 0)

z<-length(unlist(strsplit(x,split=NULL)))
ans<-vector()
for(i in 1:z){
   if((x[i]=="D")||(x[i]=="d")){
       ans<-c(ans,D)}
   if((x[i]=="T")||(x[i]=="t")){
       ans<-c(ans,T)}
   if((x[i]=="E")||(x[i]=="e")){
       ans<-c(ans,E)}
   if((x[i]=="C")||(x[i]=="c")){
       ans<-c(ans,C)}
   if((x[i]=="M")||(x[i]=="m")){
       ans<-c(ans,M)}
   if((x[i]=="Y")||(x[i]=="y")){
       ans<-c(ans,Y)}
   if((x[i]=="K")||(x[i]=="k")){
       ans<-c(ans,K)}
   if((x[i]=="R")||(x[i]=="r")){
       ans<-c(ans,R)}
   if((x[i]=="S")||(x[i]=="s")){
       ans<-c(ans,S)}
   if((x[i]=="Q")||(x[i]=="q")){
       ans<-c(ans,Q)}
   if((x[i]=="F")||(x[i]=="f")){
       ans<-c(ans,F)}
   if((x[i]=="P")||(x[i]=="p")){
       ans<-c(ans,P)}
   if((x[i]=="W")||(x[i]=="w")){
       ans<-c(ans,W)}
   if((x[i]=="N")||(x[i]=="n")){
       ans<-c(ans,N)}
   if((x[i]=="G")||(x[i]=="g")){
       ans<-c(ans,G)}
   if((x[i]=="V")||(x[i]=="v")){
       ans<-c(ans,V)}
   if((x[i]=="I")||(x[i]=="i")){
       ans<-c(ans,I)}
   if((x[i]=="L")||(x[i]=="l")){
       ans<-c(ans,L)}
   if((x[i]=="A")||(x[i]=="a")){
       ans<-c(ans,A)}
   if((x[i]=="H")||(x[i]=="h")){
       ans<-c(ans,H)}
   if(x[i]=="-"){
       ans<-c(ans,gap)}
   if(x[i]=="."){
       ans<-c(ans,gap)}
   if(x[i]=="X"){
       ans<-c(ans,gap)}

   if(!length(ans)==5*i){
       print(c("Error for position ",i, "in ",x))
       return("ERROR")
   }
}           


return(ans)

}

"convert_seq_amino" <-
function(x){

## Convert sequence into binary string

A <-c(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
R <-c(0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
N <-c(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
D <-c(0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
C <-c(0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
Q <-c(0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
E <-c(0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0)
G <-c(0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0)
H <-c(0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0)
I <-c(0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0)
L <-c(0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0)
K <-c(0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0)
M <-c(0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0)
F <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0)
P <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0)
S <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0)
T <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0)
W <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0)
Y <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0)
V <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1)
gap <-c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)

z<-length(unlist(strsplit(x,split=NULL)))
ans<-vector()
for(i in 1:z){
	if((x[i]=="D")||(x[i]=="d")){
	    ans<-c(ans,D)}
	if((x[i]=="T")||(x[i]=="t")){
	    ans<-c(ans,T)}
	if((x[i]=="E")||(x[i]=="e")){
	    ans<-c(ans,E)}
	if((x[i]=="C")||(x[i]=="c")){
	    ans<-c(ans,C)}
	if((x[i]=="M")||(x[i]=="m")){
	    ans<-c(ans,M)}
	if((x[i]=="Y")||(x[i]=="y")){
	    ans<-c(ans,Y)}
	if((x[i]=="K")||(x[i]=="k")){
	    ans<-c(ans,K)}
	if((x[i]=="R")||(x[i]=="r")){
	    ans<-c(ans,R)}
	if((x[i]=="S")||(x[i]=="s")){
	    ans<-c(ans,S)}
	if((x[i]=="Q")||(x[i]=="q")){
	    ans<-c(ans,Q)}
	if((x[i]=="F")||(x[i]=="f")){
	    ans<-c(ans,F)}
	if((x[i]=="P")||(x[i]=="p")){
	    ans<-c(ans,P)}
	if((x[i]=="W")||(x[i]=="w")){
	    ans<-c(ans,W)}
	if((x[i]=="N")||(x[i]=="n")){
	    ans<-c(ans,N)}
	if((x[i]=="G")||(x[i]=="g")){
	    ans<-c(ans,G)}
	if((x[i]=="V")||(x[i]=="v")){
	    ans<-c(ans,V)}
	if((x[i]=="I")||(x[i]=="i")){
	    ans<-c(ans,I)}
	if((x[i]=="L")||(x[i]=="l")){
	    ans<-c(ans,L)}
	if((x[i]=="A")||(x[i]=="a")){
	    ans<-c(ans,A)}
	if((x[i]=="H")||(x[i]=="h")){
	    ans<-c(ans,H)}
	if(x[i]=="-"){
	    ans<-c(ans,gap)}
	if(x[i]=="."){
	    ans<-c(ans,gap)}
	if(x[i]=="X"){
	    ans<-c(ans,gap)}
	
	if(!length(ans)==20*i){
    	print(c("Error for position ",i, "in ",x))
	    return("ERROR")
	}
}  

         
return(ans)

}

"create_colnames_AAP" <-
function(Alignment){
## Create column names for the Amino Acid Properties Matrix

generic_list<-c("xA","xB","xC","xD","xE")
col_names<-vector()
Aln_length<-length((unlist(strsplit(Alignment$seq[1],split=NULL))))
for(i in 1:Aln_length){
    add<-sub("x",i,generic_list)
    col_names<-c(col_names,add)
}

return(col_names)
}

"create_colnames_amino" <-
function(Alignment){

## Create column names for the amino acid matrix. 
## The order corresponds to the vectors used in convert_seq_amino()
listA<-c("xA","xR","xN","xD","xC","xQ","xE","xG","xH","xI")
listB<-c("xL","xK","xM","xF","xP","xS","xT","xW","xY","xV")
generic_list<-c(listA,listB)
col_names<-vector()
Aln_length<-length((unlist(strsplit(unlist(Alignment$seq[1]),split=NULL))))
for(i in 1:Aln_length){
    add<-sub("x",i,generic_list)
    col_names<-c(col_names,add)
}
return(col_names)
}

"create_probab" <-
function(){
  ## This will return the transition probability matrix needed to calculate the pseudocounts
  
q_A<-c(0.0215,0.0023,0.0019,0.0022,0.0016,0.0019,0.003,0.0058,0.0011,0.0032,0.0044,0.0033,0.0013,0.0016,0.0022,0.0063,0.0037,0.0004,0.0013,0.0051)
q_R<-c(0.0023,0.0178,0.002,0.0016,0.0004,0.0025,0.0027,0.0017,0.0012,0.0012,0.0024,0.0062,0.0008,0.0009,0.001,0.0023,0.0018,0.0003,0.0009,0.0016)
q_N<-c(0.0019,0.002,0.0141,0.0037,0.0004,0.0015,0.0022,0.0029,0.0014,0.001,0.0014,0.0024,0.0005,0.0008,0.0009,0.0031,0.0022,0.0002,0.0007,0.0012)
q_D<-c(0.0022,0.0016,0.0037,0.0213,0.0004,0.0016,0.0049,0.0025,0.001,0.0012,0.0015,0.0024,0.0005,0.0008,0.0012,0.0028,0.0019,0.0002,0.0006,0.0013)
q_C<-c(0.0016,0.0004,0.0004,0.0004,0.0119,0.0003,0.0004,0.0008,0.0002,0.0011,0.0016,0.0005,0.0004,0.0005,0.0004,0.001,0.0009,0.0001,0.0003,0.0014)
q_Q<-c(0.0019,0.0025,0.0015,0.0016,0.0003,0.0073,0.0035,0.0014,0.001,0.0009,0.0016,0.0031,0.0007,0.0005,0.0008,0.0019,0.0014,0.0002,0.0007,0.0012)
q_E<-c(0.003,0.0027,0.0022,0.0049,0.0004,0.0035,0.0161,0.0019,0.0014,0.0012,0.002,0.0041,0.0007,0.0009,0.0014,0.003,0.002,0.0003,0.0009,0.0017)
q_G<-c(0.0058,0.0017,0.0029,0.0025,0.0008,0.0014,0.0019,0.0378,0.001,0.0014,0.0021,0.0025,0.0007,0.0012,0.0014,0.0038,0.0022,0.0004,0.0008,0.0018)
q_H<-c(0.0011,0.0012,0.0014,0.001,0.0002,0.001,0.0014,0.001,0.0093,0.0006,0.001,0.0012,0.0004,0.0008,0.0005,0.0011,0.0007,0.0002,0.0015,0.0006)
q_I<-c(0.0032,0.0012,0.001,0.0012,0.0011,0.0009,0.0012,0.0014,0.0006,0.0184,0.0114,0.0016,0.0025,0.003,0.001,0.0017,0.0027,0.0004,0.0014,0.012)
q_L<-c(0.0044,0.0024,0.0014,0.0015,0.0016,0.0016,0.002,0.0021,0.001,0.0114,0.0371,0.0025,0.0049,0.0054,0.0014,0.0024,0.0033,0.0007,0.0022,0.0095)
q_K<-c(0.0033,0.0062,0.0024,0.0024,0.0005,0.0031,0.0041,0.0025,0.0012,0.0016,0.0025,0.0161,0.0009,0.0009,0.0016,0.0031,0.0023,0.0003,0.001,0.0019)
q_M<-c(0.0013,0.0008,0.0005,0.0005,0.0004,0.0007,0.0007,0.0007,0.0004,0.0025,0.0049,0.0009,0.004,0.0012,0.0004,0.0009,0.001,0.0002,0.0006,0.0023)
q_F<-c(0.0016,0.0009,0.0008,0.0008,0.0005,0.0005,0.0009,0.0012,0.0008,0.003,0.0054,0.0009,0.0012,0.0183,0.0005,0.0012,0.0012,0.0008,0.0042,0.0026)
q_P<-c(0.0022,0.001,0.0009,0.0012,0.0004,0.0008,0.0014,0.0014,0.0005,0.001,0.0014,0.0016,0.0004,0.0005,0.0191,0.0017,0.0014,0.0001,0.0005,0.0012)
q_S<-c(0.0063,0.0023,0.0031,0.0028,0.001,0.0019,0.003,0.0038,0.0011,0.0017,0.0024,0.0031,0.0009,0.0012,0.0017,0.0126,0.0047,0.0003,0.001,0.0024)
q_T<-c(0.0037,0.0018,0.0022,0.0019,0.0009,0.0014,0.002,0.0022,0.0007,0.0027,0.0033,0.0023,0.001,0.0012,0.0014,0.0047,0.0125,0.0003,0.0009,0.0036)
q_W<-c(0.0004,0.0003,0.0002,0.0002,0.0001,0.0002,0.0003,0.0004,0.0002,0.0004,0.0007,0.0003,0.0002,0.0008,0.0001,0.0003,0.0003,0.0065,0.0009,0.0004)
q_Y<-c(0.0013,0.0009,0.0007,0.0006,0.0003,0.0007,0.0009,0.0008,0.0015,0.0014,0.0022,0.001,0.0006,0.0042,0.0005,0.001,0.0009,0.0009,0.0102,0.0015)
q_V<-c(0.0051,0.0016,0.0012,0.0013,0.0014,0.0012,0.0017,0.0018,0.0006,0.012,0.0095,0.0019,0.0023,0.0026,0.0012,0.0024,0.0036,0.0004,0.0015,0.0196)

probab<-c(q_A,q_R,q_N,q_D,q_C,q_Q,q_E,q_G,q_H,q_I,q_L,q_K,q_M,q_F,q_P,q_S,q_T,q_W,q_Y,q_V)
dim(probab)<-c(20,20)
probab<-as.data.frame(probab)
rownames(probab) <- c("A",  "R",  "N",  "D",  "C",  "Q",  "E",  "G", "H",  "I",  "L",  "K",  "M",  "F",  "P",  "S",  "T",  "W",  "Y",  "V")
colnames(probab) <- c("A",  "R",  "N",  "D",  "C",  "Q",  "E",  "G", "H",  "I",  "L",  "K",  "M",  "F",  "P",  "S",  "T",  "W",  "Y",  "V")


 return(probab)

 }

"create_profile" <-
function(x){
## Create profile, number of amino acids in each position, 
## for amino acid matrix x

ans<-vector()
amino_labels<-colnames(x)[1:20]
amino_labels<-sub("1","",amino_labels)
dimensions<-dim(x) 
seq_count<-dimensions[1]

for(j in 0:((dimensions[2]/20)-1)){
    z<-amino_counts(x,j)
    ans<-c(ans,z)
}

dim(ans)<-c(20,(dimensions[2]/20))
rownames(ans)<-amino_labels
return(ans)

}

"create_profile_strings" <-
function(x,y){

## Return the profile strings from x based on groups in y

groups<-levels(y)
z<-rep(0,dim(x)[2])
for(i in 1:length(groups)){
    temp<-create_profile(x[y==groups[i],])
    dim(temp)<-c(1,20*dim(temp)[2])
    row.names(temp)<-groups[i]
    z<-rbind(z,temp)    
}

result<-z[!(row.names(z)=="z"),]
colnames(result)<-colnames(x)
return(result)
}

"pseudo_counts" <-
function(profile1){

## This will calculate the pseudocounts based on
## f(a,i) = n(a,i) + E*(Sumj n(j,i)*Matrix(j,a))      
##           -------------------------------------
##           n(i) + E
## Where E is the number of pseudocounts to add
## This is the Heinikoff method 
## profile1 is generated by create_profile()
 

## Create the probability matrix
probab<-create_probab()
len<-dim(profile1)[2]

## Sequence Count, assuming at least one ungapped column.
sequence_count<-max(apply(profile1,2,sum)) 

profile_result<-profile1

for(k in 1:len){
    for(j in 1:20){
        sum_col<-0
        sequence_count<-sum(profile1[,k])
        ## Bc=m*Rc where Rc is the number of different residues
        counts <- 5*sum(profile1[,k]>0) 
        if(sequence_count > 0){
            total<-0
            for(i in 1:20){
               sum_col<-sum_col+(profile1[i,k]/sequence_count)*(probab[i,j]/(sum(probab[,i])))
               total<-total+(probab[i,j]/(sum(probab[,i])))
            }    
        }
   profile_result[j,k] <- (profile1[j,k]+(counts*sum_col))/(sequence_count+counts)
   }
}

## Return the number of extra counts for each amino acid,
return(profile_result)


}

"remove_gaps" <-
function(x,gap_fraction=0.6){
# Remove gaps from Amino Acid matrix created by convert_aln_amino()

if(gap_fraction>=1||gap_fraction<0){
    y<-"Error::Gap_fraction should be between 0 and 1"
    print(y)
    return(y)
}

dimensions<-dim(x)
ungapped<-gap_fraction*dimensions[1]
total_residues<-sum_20_aln(x)
y<-x[,rep((total_residues>ungapped),each=20)]
return(y)

}

"remove_gaps_groups" <-
function(x,z,gap_fraction=0.6){
## Remove gaps from Amino Acid matrix created by convert_aln_amino()
## Remove columns that contain more than gap_fraction in any group

if(gap_fraction>=1 || gap_fraction<0){
    y<-"Error::Gap_fraction should be between 0 and 1"
    print(y)
    return(y)
}

z<-as.factor(z)
groups<-levels(z)
no_levels<-length(levels(z))

## Tracker index number of columns of amino acid matrix
include<-rep(TRUE,length(colnames(x))/20) 
for(i in 1:no_levels){
    sub_matrix<-x[grep(groups[i],as.vector(z)),]
    dimensions<-dim(sub_matrix)
    ungapped<-gap_fraction*dimensions[1]
    total_residues<-sum_20_aln(sub_matrix)
    include<-include & (total_residues>ungapped)
}
y<-x[,rep((include),each=20)]
return(y)
}

"run_between_pca" <-
function(x,z,y){
## Cover function to run the between group analysis
## y is the group labels
## x is the amino acid matrix, used to calculate weights
## z is the matrix to be analysed

y<-as.factor(y)
seq_weights<-Calculate_Row_Weights(x,y)
x<-as.matrix(x)
z<-as.matrix(z)

dataset.pca<-dudi.pca(as.data.frame(z),row.w=seq_weights,scannf=FALSE,nf=20)
dataset.bet<-between(dataset.pca,y,scannf=FALSE,nf=length(levels(y)))
res <- list(ord = dataset.pca, bet = dataset.bet, fac = y)
class(res) <- c("pca", "bga")
return(res)
}

"sum_20_aln" <-
function(x){
## Calculates number of amino acids in each group of 20 columns 
## (1 column in an alignment)

ans<-vector();
dimensions<-dim(x) 
for(j in 0:((dimensions[2]/20)-1)){
    z<-sum_20_cols(x,j)
    ans<-c(ans,z)
}

return(ans)

}

"sum_20_cols" <-
function(x,k){
##this will calculate the number of amino acids at the ith pos in the alignment
i<-(k*20)+1
j<-i+19
y<-x[,i:j]
z<-sum(apply(y,1,sum))
return(z)

}

"sum_aln" <-
function(x){
##Calculate the number of amino acids in each position in amino acid matrix
##
ans="";
for(j in 1:20){
    z<-sum_20_cols(x,j)
    ans<-c(ans,z)
    }
    
return(ans)
}

"top_residues_2_groups" <-
function(bga_results,residue_number=20){
## Return array sorted by absolute column coordinates
   
    if(length(levels(bga_results$fac))==2){
        index <- rev(order((as.numeric(bga_results$bet$co[,1]))))      
        c<-cbind(rownames(bga_results$bet$co)[index],bga_results$bet$co[index,])
        y<-as.matrix(as.numeric(c[,2])) 
        rownames(y)<-c[,1]
        x<-c(y[1:residue_number,], y[(length(y)-residue_number):length(y),])
        }else{
           x<-("Only use this function if the number of groups is 2")
    } 

return(x)
}

