LONG SCRIPTS

PYTHON

Simple Error function

Source

def err_f(x):
    #print "x in error function1",x
 
    # save the sign of x
    sign = 1 if x >= 0 else -1
    x = abs(x)
 
    # constants
    a1 =  0.254829592
    a2 = -0.284496736
    a3 =  1.421413741
    a4 = -1.453152027
    a5 =  1.061405429
    p  =  0.3275911
 
    # A&S formula 7.1.26
    t = 1.0/(1.0 + p*x)
    y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*exp(-x*x)
    #print "x in error function2",x
    return sign*y # erf(-x) = -erf(x)

Simple Histogram function

def hist(hist_input,num_bins):
  #hist_input should be a one dimensional array
  #this function produce num_bins*3 array, 
  #which contains the median of each bin, regular histogram, and normal histogram  
  min_data=min(hist_input[:])
  max_data=max(hist_input[:])
  hist_width=max_data-min_data
  bin_width=(hist_width)/float(num_bins)
  hist_out=zeros((num_bins,3))
  hist_inp_shape=shape(hist_input)
  hist_inp_length=int(hist_inp_shape[0])
  print("length",str(shape(hist_input[:])),"hist_width",hist_width,"bin_width",bin_width)
 
  for i in range(num_bins):
   hist_out[i,0]=(float(i)/float(num_bins)*(hist_width-bin_width))+min_data+bin_width/2
   #this is regular histogram
  for i in range(hist_inp_length):
        temp=hist_input[i]
        bin=int((temp-min_data)/(max_data-min_data)*num_bins)-int((temp-min_data)/(max_data-min_data))
        hist_out[bin,1]=hist_out[bin,1]+1.0
 
  hist_out[:,2]=hist_out[:,1]/float(hist_inp_length)
  return hist_out

pH_ex_overlap_V1.0

#!/usr/bin/env python
from numpy import *
import math,sys,pdb,time
#####defaults
method='e'
delta_N_threshold=3
total_time=0.0
######end of defaults
def printusage():
 #import sys
 print ' pka list: one pka per residue \n'
 print 'USAGE: -pkal list of pkas of titratable residues  -o output -pHi pH of replica i -pHj pH of replica j -m method -dNt delta_N threshold'
 sys.exit()
################################functions
def frange(start,stop,step):
    f=start
    while f<stop:
        yield f
        f+=step
#######
def pro_dep_prob(pstat,pka,pH):
    #p_stat protonation  status
    prot_prob=1/(1+10**(pH-pka))
    return pstat*prot_prob+(1.0-pstat)*(1.0-prot_prob)
######
def GetAllZeroOneCombByRow(num_tit):
    #num_tit=length(pka_data) =number of titrable residue
    global all_comb
    all_comb=zeros((2**num_tit,num_tit))
    for i in range(0,2**num_tit):
        comb=map(int,list(bin(i))[2:])
        comb_len=len(comb)
        all_comb[i,num_tit-comb_len:]=comb
    #print (all_comb,num_tit)
    return all_comb
#####
def prob_dens_acc(delta_N,delta_pH):
    prob_power=log(10)*delta_N*delta_pH
    if prob_power>=0:
        return 1.0
    else:
        return exp(prob_power)
######
def tot_acc_ratio(pH_i,pH_j):
    delta_pH=pH_i-pH_j
    global pka_array
    pka_array_len=len(pka_array)
    all_possible_delta_N=2*pka_array_len+1
    joint_prob_delta_N=zeros(all_possible_delta_N)
    pH_i_array=zeros((2,pka_array_len))
    pH_j_array=zeros((2,pka_array_len))
    for k in range(0,pka_array_len):
        pH_i_array[0,k]=pro_dep_prob(0,pka_array[k],pH_i)
        pH_i_array[1,k]=pro_dep_prob(1,pka_array[k],pH_i)
        pH_j_array[0,k]=pro_dep_prob(0,pka_array[k],pH_j)
        pH_j_array[1,k]=pro_dep_prob(1,pka_array[k],pH_j)
 
    for i in range(0,2**pka_array_len):
        for j in range(0,2**pka_array_len):
            temp_single_prob=1.0
            for k in range(0,pka_array_len):
                comp_ik=all_comb[i,k]
                comp_jk=all_comb[j,k]
 
                temp_single_prob=temp_single_prob*\
                pH_i_array[comp_ik,k]*pH_j_array[comp_jk,k]
 
            delta_N=sum(all_comb[i,:])-sum(all_comb[j,:])
            joint_prob_delta_N[pka_array_len-delta_N]=\
            joint_prob_delta_N[pka_array_len-delta_N]+temp_single_prob
    for m in range(0,all_possible_delta_N):
        delta_N=pka_array_len-m
        joint_prob_delta_N[m]=joint_prob_delta_N[m]*\
        prob_dens_acc(delta_N,delta_pH)
    return sum(joint_prob_delta_N)
##########
def approx_tot_acc_ratio(pH_i,pH_j):
    if pH_j>pH_i:#swap pH_i pH_j such  that delta_pH>0
        temp=pH_i
        pH_i=pH_j
        pH_j=temp
 
    delta_pH=pH_i-pH_j
    global pka_array
    global delta_N_threshold
    pka_array_len=len(pka_array)
    all_possible_delta_N=2*pka_array_len+1
    joint_prob_delta_N=zeros(all_possible_delta_N)
    pH_i_array=zeros((2,pka_array_len))
    pH_j_array=zeros((2,pka_array_len))
    for k in range(0,pka_array_len):
        pH_i_array[0,k]=pro_dep_prob(0,pka_array[k],pH_i)
        pH_i_array[1,k]=pro_dep_prob(1,pka_array[k],pH_i)
        pH_j_array[0,k]=pro_dep_prob(0,pka_array[k],pH_j)
        pH_j_array[1,k]=pro_dep_prob(1,pka_array[k],pH_j)
 
    for i in range(0,2**pka_array_len):
        for j in range(0,2**pka_array_len):
            temp_single_prob=1.0
            delta_N=sum(all_comb[i,:])-sum(all_comb[j,:])
            if delta_N>-(delta_N_threshold):
                for k in range(0,pka_array_len):
                    comp_ik=all_comb[i,k]
                    comp_jk=all_comb[j,k]
 
                    temp_single_prob=temp_single_prob*\
                    pH_i_array[comp_ik,k]*pH_j_array[comp_jk,k]
 
                joint_prob_delta_N[pka_array_len-delta_N]=\
                joint_prob_delta_N[pka_array_len-delta_N]+temp_single_prob
    for m in range(0,all_possible_delta_N):
        delta_N=pka_array_len-m
        joint_prob_delta_N[m]=joint_prob_delta_N[m]*\
        prob_dens_acc(delta_N,delta_pH)
    return sum(joint_prob_delta_N)
 
#################################end of functions
if len(sys.argv)==1 or sys.argv[1].startswith('-h') or  sys.argv[1].startswith('--h'):
 printusage()
for x in range (len(sys.argv)):
 if sys.argv[x] == '-pkal':
      i_list=sys.argv[x+1]
 if sys.argv[x] == '-o':
      output=sys.argv[x+1]
 if sys.argv[x]=='-pHi':
      pH_i=float(sys.argv[x+1])
 if sys.argv[x]=='-pHj':
      pH_j=float(sys.argv[x+1])
 if sys.argv[x]=='-m':
      method=sys.argv[x+1]
 if sys.argv[x]=='-dNt':
      delta_N_threshold=int(sys.argv[x+1])
tic=time.clock()
outf=open(output,"w")
 
pka_list=open(i_list,'r')
pka_array=[]
for line in pka_list:
    pka_data=map(float,line.split())
    pka_array.append(pka_data)
pka_array=array(pka_array,dtype=float64)
num_tit=len(pka_array)
all_comb=GetAllZeroOneCombByRow(num_tit)
 
if method=='e':
        tic=time.clock()
        acceptance_ratio=tot_acc_ratio(pH_i,pH_j)
        toc=time.clock()
        total_time+=toc-tic
else:
        tic=time.clock()
        acceptance_ratio=approx_tot_acc_ratio(pH_i,pH_j)
        toc=time.clock()
        total_time+=toc-tic
outf.write(str(pH_i)+'\t'+str(pH_j)+'\t'+str(acceptance_ratio)+'\n')
print'acceptance_ratio= ',acceptance_ratio
 
print('elapsed time= '+str(total_time)+' s')

pH_ex_overlap_optimizer_GA_V1.0

#!/usr/bin/env python
from numpy import *
import math,sys,pdb,time
 
from pyevolve import G1DList
from pyevolve import GSimpleGA
from pyevolve import Selectors, Initializators, Mutators
from pyevolve import Consts
from pyevolve import Crossovers
from pyevolve import Scaling
from pyevolve import GAllele
#####defaults
method='e'
delta_N_threshold=3
num_gens=10
######end of defaults
 
####################################functions
def printusage():
 #import sys
 print ' pka list: one pka per titratable residue \n'
 print 'USAGE: -pkal list of pkas of titratable residues -rep_num number of replicas  -o output -pH_min min of pH range' 
 print '-pH_max max of pH range -num_gens number of generation in GA -m method "e" for exact -dNt threshold for delta_N'
 sys.exit()
 ##############################
def pro_dep_prob(pstat,pka,pH):
    #p_stat protonation  status
    prot_prob=1/(1+10**(pH-pka))
    return pstat*prot_prob+(1.0-pstat)*(1.0-prot_prob)
######
def GetAllZeroOneCombByRow(num_tit):
    #num_tit=length(pka_data) =number of titrable residue
    global all_comb
    all_comb=zeros((2**num_tit,num_tit))
    for i in range(0,2**num_tit):
        comb=map(int,list(bin(i))[2:])
        comb_len=len(comb)
        all_comb[i,num_tit-comb_len:]=comb
    #print (all_comb,num_tit)
    return #all_comb
#####
def prob_dens_acc(delta_N,delta_pH):
    prob_power=log(10)*delta_N*delta_pH
    if prob_power>=0:
        return 1.0
    else:
        return exp(prob_power)
######
def tot_acc_ratio(pH_i,pH_j):
    delta_pH=pH_i-pH_j
    global pka_array
    pka_array_len=len(pka_array)
    all_possible_delta_N=2*pka_array_len+1
    joint_prob_delta_N=zeros(all_possible_delta_N)
    pH_i_array=zeros((2,pka_array_len))
    pH_j_array=zeros((2,pka_array_len))
    for k in range(0,pka_array_len):
        pH_i_array[0,k]=pro_dep_prob(0,pka_array[k],pH_i)
        pH_i_array[1,k]=pro_dep_prob(1,pka_array[k],pH_i)
        pH_j_array[0,k]=pro_dep_prob(0,pka_array[k],pH_j)
        pH_j_array[1,k]=pro_dep_prob(1,pka_array[k],pH_j)
 
    for i in range(0,2**pka_array_len):
        for j in range(0,2**pka_array_len):
            temp_single_prob=1.0
            for k in range(0,pka_array_len):
                comp_ik=all_comb[i,k]
                comp_jk=all_comb[j,k]
 
                temp_single_prob=temp_single_prob*\
                pH_i_array[comp_ik,k]*pH_j_array[comp_jk,k]
 
            delta_N=sum(all_comb[i,:])-sum(all_comb[j,:])
            joint_prob_delta_N[pka_array_len-delta_N]=\
            joint_prob_delta_N[pka_array_len-delta_N]+temp_single_prob
    for m in range(0,all_possible_delta_N):
        delta_N=pka_array_len-m
        joint_prob_delta_N[m]=joint_prob_delta_N[m]*\
        prob_dens_acc(delta_N,delta_pH)
    return sum(joint_prob_delta_N)
######
def approx_tot_acc_ratio(pH_i,pH_j):
    if pH_j>pH_i:#swap pH_i pH_j such  that delta_pH>0
        temp=pH_i
        pH_i=pH_j
        pH_j=temp
 
    delta_pH=pH_i-pH_j
    global pka_array
    global delta_N_threshold
    pka_array_len=len(pka_array)
    all_possible_delta_N=2*pka_array_len+1
    joint_prob_delta_N=zeros(all_possible_delta_N)
    pH_i_array=zeros((2,pka_array_len))
    pH_j_array=zeros((2,pka_array_len))
    for k in range(0,pka_array_len):
        pH_i_array[0,k]=pro_dep_prob(0,pka_array[k],pH_i)
        pH_i_array[1,k]=pro_dep_prob(1,pka_array[k],pH_i)
        pH_j_array[0,k]=pro_dep_prob(0,pka_array[k],pH_j)
        pH_j_array[1,k]=pro_dep_prob(1,pka_array[k],pH_j)
 
    for i in range(0,2**pka_array_len):
        for j in range(0,2**pka_array_len):
            temp_single_prob=1.0
            delta_N=sum(all_comb[i,:])-sum(all_comb[j,:])
            if delta_N>-(delta_N_threshold):
                for k in range(0,pka_array_len):
                    comp_ik=all_comb[i,k]
                    comp_jk=all_comb[j,k]
 
                    temp_single_prob=temp_single_prob*\
                    pH_i_array[comp_ik,k]*pH_j_array[comp_jk,k]
 
                joint_prob_delta_N[pka_array_len-delta_N]=\
                joint_prob_delta_N[pka_array_len-delta_N]+temp_single_prob
    for m in range(0,all_possible_delta_N):
        delta_N=pka_array_len-m
        joint_prob_delta_N[m]=joint_prob_delta_N[m]*\
        prob_dens_acc(delta_N,delta_pH)
    return sum(joint_prob_delta_N)
######
def eval_func(chromosome):
  score = 0.0
  scale_in_exp=5#
  #sort()
  global pH_max
  global pH_min
  global pH_array
  global rep_num
  #rep_num=len(chromosome)
  pH_array[1:rep_num-1]=array(chromosome[0:rep_num-2])
  pH_array.sort()
  all_ex_acc=zeros(rep_num-1)
 
  print "pH_array_test",pH_array[:]
 
  # Evaluate QM and MM energies for target torsion angles
  if method !='e':
      for i in range(0,rep_num-1):
          all_ex_acc[i]=tot_acc_ratio(pH_array[i],pH_array[i+1])
  else:
      for i in range(0,rep_num-1):
          all_ex_acc[i]=approx_tot_acc_ratio(pH_array[i],pH_array[i+1])
 
  all_ex_acc_mean=sum(all_ex_acc)/float(rep_num)
  for i in range(0,rep_num-1):
      score +=exp(-(all_ex_acc[i]-all_ex_acc_mean)**2)
 
  print'score= ',score
  #if (score < 1):
  #print "Individual:"
  #i_str = ""
  #for p in chromosome:
  #  i_str += str(p) + " "
  #print i_str
  #print "Score: " + str(score)
  #print
  return score
############################################ end of functions
if len(sys.argv)==1 or sys.argv[1].startswith('-h') or  sys.argv[1].startswith('--h'):
 printusage()
for x in range (len(sys.argv)):
 if sys.argv[x] == '-pkal':
    pka_list_file=sys.argv[x+1]
 if sys.argv[x] == '-rep_num':
    rep_num =int(sys.argv[x+1])
 if sys.argv[x] == '-o':
    output=sys.argv[x+1]
 if sys.argv[x]=='-pH_min':
    pH_min=float(sys.argv[x+1])
 if sys.argv[x]=='-pH_max':
    pH_max=float(sys.argv[x+1])
 if sys.argv[x]=='-num_gens':
    num_gens=int(sys.argv[x+1])
 if sys.argv[x]=='-m':
      method=sys.argv[x+1]
 if sys.argv[x]=='-dNt':
      delta_N_threshold=int(sys.argv[x+1]) 
 
tic=time.clock()
outf=open(output,'a')
 
pka_list=open(pka_list_file,'r')
pka_array=[]
for line in pka_list:
    pka_data=map(float,line.split())
    pka_array.append(pka_data)
pka_array=array(pka_array,dtype=float64)
tit_num=len(pka_array)
GetAllZeroOneCombByRow(tit_num)
 
pH_array=zeros(rep_num)
 
pH_array[0]=pH_min
pH_array[rep_num-1]=pH_max
num_params=rep_num-2
genome = G1DList.G1DList(num_params)
setOfAlleles = GAllele.GAlleles()
paramRange = GAllele.GAlleleRange(pH_min,pH_max,True)
for i in xrange(num_params):
  setOfAlleles.add(paramRange)
#genome.setParams(allele=setOfAlleles)
genome.setParams(rangemin=pH_min,rangemax=pH_max)
 
genome.initializator.set(Initializators.G1DListInitializatorReal)
#genome.initializator.set(Initializators.G1DListInitializatorAllele)
 
genome.mutator.set(Mutators.G1DListMutatorRealGaussian)
#genome.mutator.set(Mutators.G1DListMutatorAllele)
 
genome.crossover.set(Crossovers.G1DListCrossoverUniform)
 
genome.evaluator.set(eval_func)
 
ga = GSimpleGA.GSimpleGA(genome)
#ga.selector.set(Selectors.GRouletteWheelga.setPopulationSize(1000)
ga.setGenerations(num_gens)
ga.setMutationRate(0.27)
ga.setCrossoverRate(0.50)
ga.setMinimax(Consts.minimaxType["maximize"])
ga.setElitism(True)
 
ga.evolve(10)
result=zeros(rep_num)
result[1:rep_num-1]=array(ga.bestIndividual()[0:rep_num-2])
result[rep_num-1]=pH_max
result[0]=pH_min
result.sort()
outf.write ('best sets in '+ str(num_gens)+' generations are:'+'\n') 
for i in range(0,rep_num):
 
   outf.write (str(result[i])+'\t')
outf.write('\n'+'##############'+'\n')
for i in range(0,rep_num-1):
    overlap=tot_acc_ratio(result[i],result[i+1])
    outf.write('overlap of pH='+str(result[i])+' and pH='+str(result[i+1])+ ' is ' + str(overlap) +'\n')
toc=time.clock()
total_time=toc-tic
outf.write('elapsed time= '+str(total_time)+' s')

simple free energy reweighting code:

#!/ur/bin/env python
from numpy import *
import math,sys,pdb
def printusage():
 #import sys
 print ' data_file_format: dihedral exact_energy inexact_energy \n'
 print 'USAGE: -i data_file -o output -clb cys_lower_bound -chb c_higher_bound -tlb trans_lb -thb t_higher_b'
 sys.exit()
################ mean
def mean(a):
 ave=0
 i=0
 for row in a:
  ave+=row 
  i+=1
 return ave/float(i)
##################variance
def var(x):
 return mean(x**2)-(mean(x))**2
############### exp_sum
def exp_sum(a,b):
    #this function receive a,b and give you c=log(exp(a)+exp(b)), it is useful for free energy calculations
    #pdb.set_trace()
    return float64(max(a,b)+log(1+exp(min(a,b)-max(a,b))))
    #c=max(log(a),log(b))+log(1+exp(min(log(a),log(b))-max(log(a),log(b))))
if len(sys.argv)==1 or sys.argv[1].startswith('-h') or  sys.argv[1].startswith('--h'):
 printusage()
for x in range (len(sys.argv)):
 if sys.argv[x] == '-i':
      input=sys.argv[x+1]
 if sys.argv[x] == '-o':
      output=sys.argv[x+1]      
 if sys.argv[x]=='-clb':
      clb=float(sys.argv[x+1])
 if sys.argv[x]=='-chb':
      chb=float(sys.argv[x+1])
 if sys.argv[x]=='-tlb':
      tlb=float(sys.argv[x+1])
 if sys.argv[x]=='-thb':
      thb=float(sys.argv[x+1])               
dih_ene = open(input, "r")
outf=open(output,"w")
dih_ene_arr=[]
delta_t=[]
delta_c=[]
for line in dih_ene:
 dih_ene_data=map(float,line.split())
 dih_ene_arr.append(dih_ene_data)
 
dih_ene_arr=array(dih_ene_arr,dtype=float64)
i=0
beta1=1/0.6
nc=0
nt=0
ncf=0 #number of time that cys visited by fake ensemble
ntf=0 #number of time that trans visited by fake ensemble
#pdb.set_trace()
nt_test=0
nc_test=0
for row in dih_ene_arr:
    i=i+1
    if abs(row[1]) >clb and abs(row[1]) <chb:
        delta_c.append(float(beta1*(row[2]-row[3])))
        ncf=ncf+1
 
        if i >1:
            a=-beta1*(row[2]-row[3])
            nc=exp_sum(nc,a)
            nc_test=nc_test+exp(a)
        else:
            nc=-beta1*(row[2]-row[3]) 
            nt_test=exp(nc)  
    elif abs(row[1]) >tlb and abs(row[1]) <thb:
        delta_t.append(float(beta1*(row[2]-row[3])))
        ntf=ntf+1
 
        if i >1:
            a=-beta1*(row[2]-row[3])
            nt=exp_sum(nt,a)
            nt_test=nt_test+exp(a)
        else:
            nt=-beta1*(row[2]-row[3])    
            nt_test=exp(nt)
#nc s actually log(nc), the ame for nt
#pdb.set_trace()
 
dg=-1/beta1*(nc-nt)
dgf=-1/beta1*log(float64(ncf)/float64(ntf))
print ncf, ntf, ncf+ ntf,nc,nt
print "test",-1/beta1*log( nc_test/ nt_test), 'dg=',dg,'; dgf=',dgf
 
delta_t=array(delta_t)
delta_c=array(delta_c)
dg_dc=0
dg_dt=0
dg_dc=exp(-mean(delta_c))/exp(nc)
dg_dt=exp(-mean(delta_t))/exp(nt)
print 'mean(delta_c)=',mean(delta_c) ,';mean(delta_t)=',mean(delta_t),'mean((delta_c)**2)=',mean((delta_c)**2) ,';mean((delta_t)**2)=',mean((delta_t)**2)
var_g2=((dg_dc)**2)*(var(delta_c)) + ((dg_dt)**2)*(var(delta_t))
print 'var(delta_c)=',var(delta_c),';var(delta_t)=',var(delta_t),';var_g2=',var_g2
outf.write(str(dg)+'\t'+str(dgf))

multiple_files_reweighter

#!/usr/bin/env python
from numpy import *
import math,sys,pdb,re
def printusage():
 #import sys
 print ' data_file_format: dihedral exact_energy inexact_energy \n'
 print 'USAGE: -il list of data_files -o output' #-lb lower_bound -hb higher_bound' 
 sys.exit()
################ mean
def mean(a):
 ave=0
 i=0
 for row in a:
  ave+=row 
  i+=1
 return ave/float(i)
##################variance
def var(x):
 return mean(x**2)-(mean(x))**2
############### exp_sum
def exp_sum(a,b):
    #this function receive a,b and give you c=log(exp(a)+exp(b)), it is usfule for free energy calculations
    #pdb.set_trace()
    return float64(max(a,b)+log(1+exp(min(a,b)-max(a,b))))
    #c=max(log(a),log(b))+log(1+exp(min(log(a),log(b))-max(log(a),log(b))))
 
def Dg_reweighting(input,beta1):
    dih_ene = open(input, "r")
    dih_ene_arr=[]
    delta_t=[]
    delta_c=[]
    for line in dih_ene:
     dih_ene_data=map(float,line.split())
     dih_ene_arr.append(dih_ene_data)
 
    dih_ene_arr=array(dih_ene_arr,dtype=float64)
    i=0
 
    nc=0
    nt=0
    ncf=0 #number of time that cys visited by fake ensemble
    ntf=0 #number of time that trans visited by fake ensemble
    #pdb.set_trace()
    nt=0
    nc=0
    for row in dih_ene_arr:
            i=i+1
        #if abs(row[1]) >lb and abs(row[1]) <hb:
            delta_t.append(float(beta1*(row[2]-row[3])))
            nc=nc+1
 
            if i >1:
                a=-beta1*(row[2]-row[3])
                nt=exp_sum(nt,a)
 
            else:
                nt=-beta1*(row[2]-row[3])    
 
    #nc s actually log(nc), the ame for nt
    #pdb.set_trace()
    print 'nt=',nt,' nc=',nc 
    g_t=-1/beta1*(nt)
    g_c=-1/beta1*log(float64(nc))
    print ncf, ntf, ncf+ ntf,nc,nt
    return g_t#-g_c
 
if len(sys.argv)==1 or sys.argv[1].startswith('-h') or  sys.argv[1].startswith('--h'):
 printusage()
for x in range (len(sys.argv)):
 if sys.argv[x] == '-il':
      i_list=sys.argv[x+1]
 if sys.argv[x] == '-o':
      output=sys.argv[x+1]      
# if sys.argv[x]=='-lb':
#      clb=float(sys.argv[x+1])
# if sys.argv[x]=='-hb':
#      chb=float(sys.argv[x+1])
 
outf=open(output,"w")
beta1=1/0.6
inp_list=open(i_list,'r')
for inp_file in inp_list:
    #inp_name=map(str,inp_file.split())
    print 'input file:',inp_file
    inp_file=re.sub(r'\s+','',inp_file)
    print 'input file:',inp_file
    reweighted_dg=Dg_reweighting(inp_file,beta1)
    print 'reweighted_dg=' ,reweighted_dg
    outf.write(inp_file+'\t'+str(reweighted_dg)+'\n')

R

bootstrap, random error, ergodicity

randomerror_ergodicity_bootstrap.V.3.R:

RanE_Ergo<-function(num_files,in_format,fdir,output="random_error_ergo.dat",max_boots=200,boot_sets_length=num_files,hist_data=TRUE)
{
my_bootstrap<-function(mydata,max_boots,sets_length ){
#library(randtoolbox)
data_num=length(mydata)
#temp_data<-data.frame()
mydata<-as.numeric(mydata)
x<-1:data_num
myboot<-data.frame()
#print("mydata")
#print(mydata)
mean_mydata_boot<-data.frame()
for (i in 1:max_boots){
#mydata_boot<-data.frame()
#mydata_boot <-rbind(mydata_boot,sample(mydata,sets_length,replace=TRUE))
mean_mydata_boot<-rbind(mean_mydata_boot,mean(sample(mydata,sets_length,replace=TRUE)))
print (i)
#print(mean_mydata_boot)
#print(mean(sample(mydata,sets_length,replace=TRUE)))
}
#print("mean_mydata_boot")
#print(mean_mydata_boot)
boot_mean_sd<-cbind(mean(mean_mydata_boot),sd(mean_mydata_boot))
#if(hist_data==TRUE){
#y<-hist(mean_mydata_boot,breaks=20)
#output<-cbind(y$mids,y$counts)

#write.table(output,"final_boot_hist_data.out",row.names=F,col.names=F)
#}
#print(boot_mean_sd)
return( boot_mean_sd)
}
################end of my_bootstrap
setwd(fdir)
x<-1:num_files
f_names<-paste(in_format,x,sep="")

line_num_list<- numeric()
random_error<-numeric()
all_data<-numeric()
ergodicity<-numeric()
f_bar_t_temp<-data.frame()
bootstrap<-data.frame()
start=1
for (i in f_names){
y<-read.table(i)

#print(i)
#print(y[1,])
f_line_num<-length(y[,1])
print(i)
#print(f_line_num)
line_num_list<-c(line_num_list,f_line_num)

#print(all_data)
#print(line_num_list)
#dim(all_data) <- c(f_line_num, dim(all_data)[2])
#print(line_num_list)
all_data<-append(all_data,y)
#all_data <- cbind(all_data, y)
#print(y)
#print(all_data)
}

#print( all_data)
min_line_num<-min(line_num_list) #find the lowest number of lines in the input files 
#print(all_data)

all_data<-as.data.frame(sapply(all_data,"[",2:min_line_num))

num_col<-length(all_data[2,])
#print(all_data)
#print(all_data[1:min_line_num,])

#all_data<-all_data[1:min_line_num,]
write.table(all_data,"all_data.out",col.names=FALSE,row.names=FALSE,sep="\t")
min_line_num<-min_line_num-1
for(j in start:min_line_num){
#print (all_data[1:j,])
#print ("ergod")
#print (rowMeans(all_data[1:j,]))
#print (mean(rowMeans(all_data[1:j,])))
#print(mean((rowMeans(all_data[1:j,])-final_mean)**2))
#print("------------------------------------------")
f_bar_t<-mean(mean(all_data[start:j,])) #according Measures of effective ergodic convergence in liquids by Raymond D. Mountain, D. Thirumalai
f_ts<- mean(all_data[start:j,])  # J. Phys. Chem., 1989, 93 (19), pp 6975–6979

ergodicity<-rbind(ergodicity,mean((f_ts-f_bar_t)**2))
bootstrap<-rbind(bootstrap,my_bootstrap(all_data[j,],max_boots,boot_sets_length))
#print (bootstrap)
temp<-cbind(rowMeans(all_data[j,]),sqrt((rowMeans((all_data[j,])**2)-(rowMeans(all_data[j,]))**2)))

random_error<-rbind(random_error,temp)
f_bar_t_temp<-rbind(f_bar_t_temp,f_bar_t)

}

random_error_ergodicity_bootstrap<-cbind(random_error,ergodicity,f_bar_t_temp,bootstrap)
#print(random_error_ergodicity_bootstrap)
write.table(random_error_ergodicity_bootstrap,output,col.names=FALSE,row.names=FALSE,sep="\t")

}

USAGE:

source('~/scripts/UMSRE/randomerror_ergodicity_bootstrap.V.3.R')
RanE_Ergo(5,'PMF_diff_vs_time_10exch.out.val','/scratch/hpc/danial/all_UMSRE/Butane/explicit_sol/10_exch_-20_90','rand_ergod_10exch.out')

Kullback-Leibler_summedresidual

RD_KL<-function(test_input,ref_input,output="RD_KL_vst",jump=50,num_bins=100){
###this function calculate the cumulative residual sum and Kullback–Leibler divergence criteria 
###of the test_input data respect to the ref_input data 

data_test<-read.table(test_input)
data_ref<-read.table(ref_input)
########### This part make sure that the breaks start and end 
########### by min and  max values in ref_data 
xlim_l_range=min(data_ref[,2])
xlim_h_range=max(data_ref[,2])
bins_width=(xlim_h_range-xlim_l_range)/(num_bins)
bins<-seq(xlim_l_range,xlim_h_range,by=bins_width)

hist_test_final<-hist(data_test[,2],breaks=bins,prob='T',plot = FALSE)
hist_ref<-hist(data_ref[,2],breaks=bins,prob='T', plot = FALSE)

num_breaks=length(hist_ref$breaks[])

num_cum=trunc(length(data_test[,2])/jump)# number of cumulative calculations
header=cbind("#time","residual_diff","Kullback–Leibler_divergence")
write.table(header,output,col.names=FALSE,row.names=FALSE,sep="\t",append=FALSE)
for(i in 1:num_cum)
{
residual_diff=0.0
num_lines=i*jump
hist_test<-hist(data_test[1:num_lines,2],breaks=bins,prob='T',plot = FALSE)

residual_diff=sqrt(sum((hist_test$density-hist_ref$density)**2)/num_breaks)

non_zero_list<-hist_test$density>0
K_L_temp<-tapply(hist_test$density*log(hist_test$density/hist_ref$density),non_zero_list,sum)
print(K_L_temp[2])
K_L_int<- K_L_temp[2]
#K_L_int=sum(hist_test$density*log(hist_test$density/hist_ref$density))
write_data<-cbind(num_lines,residual_diff,K_L_int)
write.table(write_data,output,col.names=FALSE,row.names=FALSE,sep="\t",append=TRUE)
}
output_test_his<-cbind(hist_test_final$mids,hist_test_final$density)
output_ref_his<-cbind(hist_ref$mids,hist_ref$density)
write.table(output_test_his,paste("Final_hist_",test_input),col.names=FALSE,row.names=FALSE,sep="\t",append=FALSE)
write.table(output_ref_his,paste("Final_hist_",ref_input),col.names=FALSE,row.names=FALSE,sep="\t",append=FALSE)
warnings()
}
Unless otherwise stated, the content of this page is licensed under Creative Commons Attribution-ShareAlike 3.0 License