import random
import os
import pandas as pd
[docs]
def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_directory, modelfile, weightfile,\
reactionFile, prev_sol_dir ='prev_sol_dir/', log_dir='log_dir',env="MANA",dist_anneal=0.9, obj_tol=0.01,\
iters=100,para_batchs=False):
"""write_div_enum_script.
Parameters
----------
script_path : str
path to the diversity_enum.py dexom python script
batch_directory : str
path to the directory were batch files should be written
rxn_enum_set_dir : str
path to the directory of processed reaction-enum results
output_directory : str
path to the directory were diversity-enum modelling results should be written
modelfile : str
path to the model's json file
weightfile : str
path to the csvs file that contains binarized reactions activity (according to transcriptomic data)
reactionFile : str
path to the file that contains the list of reactions in the model
prev_sol_dir : str
path to the directory were reaction-enum solutions used as starting point for the diversity enumeration
process should be saved
log_dir : str
path to the directory were log files should be stored
env : str
name of the anaconda environment to be activated
dist_anneal : float
dexom-python parameter, 0<=a<=1 controls the distance between each successive solution
obj_tol : float
dexom-python parameter, objective value tolerance, as a fraction of the original value
iters : int
dexom-python parameter, maximal number of iterations
para_batchs : boolean
if True, launch each batch file independantly (instead of parallel on conditions, parallel on batch)
Returns
-------
write batch files ready to launch on a adequatly prepared slurm computing platform
"""
#generate as many batch as ranges step for reaction enum
barcode = os.path.basename(weightfile).split('_')[2]
with open(reactionFile, "r") as file:
rxns = file.read().split("\n")
split_val = (len(rxns) // iters) + 1
#load the reaction enum set file for this barcode
enum_set = pd.read_csv(rxn_enum_set_dir+'/'+barcode+'_solutions.csv')
enum_set.index = enum_set.iloc[:,0]
enum_set.index.name = 'ids'
enum_set.drop(enum_set.columns[0],axis=1,inplace=True)
#Generate stratified random sampling
nbatch = 0
prev_lb = 0 #previous number where i % split_val == 0
for i in range(enum_set.shape[0]):
if i % split_val == 0:
#random pick a solution in the range
tmp_sol = enum_set.iloc[random.randint(prev_lb,i),:]
prev_lb = i
prevsol_file = prev_sol_dir+barcode+'_'+str(i)+'.csv'
pd.DataFrame(tmp_sol).transpose().to_csv(prevsol_file)
#Now we generate the batch script:
if para_batchs:
with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "w+") as f:
f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
'-e %s/runerr%s_div.out\nsource activate %s \n'
% (str(log_dir),str(barcode),str(log_dir),str(barcode), str(env)))
with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "a") as f:
f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f'
% (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol))
else:
with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "w+") as f:
f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f'
% (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol))
nbatch=nbatch+1
if para_batchs == False:
with open(batch_directory+"/runfiles_"+barcode+"_diversity_enum.sh", "w+") as f:
f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
'-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_*_diversity_enum.sh|xargs -n 1 -P 1 bash'
% (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode)))
[docs]
def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfile, weightfile,\
reactionFile="", log_dir='log_dir',env="MANA",obj_tol=0.001, iters=100,para_batchs=False):
"""write_rxn_enum_script.
Parameters
----------
script_path : str
path to the diversity_enum.py dexom python script
batch_directory : str
path to the directory were batch files should be written
output_directory : str
path to the directory were diversity-enum modelling results should be written
modelfile : str
path to the model's json file
weightfile : str
path to the csvs file that contains binarized reactions activity (according to transcriptomic data)
reactionFile : str
path to the file that contains the list of reactions in the model
log_dir : str
path to the directory were log files should be stored
env : str
name of the anaconda environment to be activated
obj_tol : float
dexom-python parameter, objective value tolerance, as a fraction of the original value
iters : int
dexom-python parameter, maximal number of iterations
para_batchs : boolean
if True, launch each batch file independantly (instead of parallel on conditions, parallel on batch)
Returns
-------
write batch files ready to launch on a adequatly prepared slurm computing platform
"""
barcode = os.path.basename(weightfile).split('_')[2]
with open(reactionFile, "r") as file:
rxns = file.read().split("\n")
rxn_num = (len(rxns) // iters) + 1
if para_batchs:
for i in range(rxn_num):
with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "w+") as f:
f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
'-e %s/runerr%s_div.out\nsource activate %s \n'
% (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env)))
with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "a") as f:
f.write('python %s -o %s/%s_rxn_enum_%i --range %i_%i -m %s -r %s -l %s '
'-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol))
else:
for i in range(rxn_num):
with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "w+") as f:
f.write('python %s -o %s/%s_rxn_enum_%i --range %i_%i -m %s -r %s -l %s '
'-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol))
with open(batch_directory+"/runfiles_"+barcode+"_reaction_enum.sh", "w+") as f:
f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH '
'-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_{0..%i}_reaction_enum.sh|xargs -n 1 -P 1 bash'
% (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode), int(rxn_num-1)))