Source code for manamodeller.batchs


import random
import os
import pandas as pd

[docs] def write_div_enum_script(script_path,batch_directory, rxn_enum_set_dir,output_directory, modelfile, weightfile,\ reactionFile, prev_sol_dir ='prev_sol_dir/', log_dir='log_dir',env="MANA",dist_anneal=0.9, obj_tol=0.01,\ iters=100,para_batchs=False): """write_div_enum_script. Parameters ---------- script_path : str path to the diversity_enum.py dexom python script batch_directory : str path to the directory were batch files should be written rxn_enum_set_dir : str path to the directory of processed reaction-enum results output_directory : str path to the directory were diversity-enum modelling results should be written modelfile : str path to the model's json file weightfile : str path to the csvs file that contains binarized reactions activity (according to transcriptomic data) reactionFile : str path to the file that contains the list of reactions in the model prev_sol_dir : str path to the directory were reaction-enum solutions used as starting point for the diversity enumeration process should be saved log_dir : str path to the directory were log files should be stored env : str name of the anaconda environment to be activated dist_anneal : float dexom-python parameter, 0<=a<=1 controls the distance between each successive solution obj_tol : float dexom-python parameter, objective value tolerance, as a fraction of the original value iters : int dexom-python parameter, maximal number of iterations para_batchs : boolean if True, launch each batch file independantly (instead of parallel on conditions, parallel on batch) Returns ------- write batch files ready to launch on a adequatly prepared slurm computing platform """ #generate as many batch as ranges step for reaction enum barcode = os.path.basename(weightfile).split('_')[2] with open(reactionFile, "r") as file: rxns = file.read().split("\n") split_val = (len(rxns) // iters) + 1 #load the reaction enum set file for this barcode enum_set = pd.read_csv(rxn_enum_set_dir+'/'+barcode+'_solutions.csv') enum_set.index = enum_set.iloc[:,0] enum_set.index.name = 'ids' enum_set.drop(enum_set.columns[0],axis=1,inplace=True) #Generate stratified random sampling nbatch = 0 prev_lb = 0 #previous number where i % split_val == 0 for i in range(enum_set.shape[0]): if i % split_val == 0: #random pick a solution in the range tmp_sol = enum_set.iloc[random.randint(prev_lb,i),:] prev_lb = i prevsol_file = prev_sol_dir+barcode+'_'+str(i)+'.csv' pd.DataFrame(tmp_sol).transpose().to_csv(prevsol_file) #Now we generate the batch script: if para_batchs: with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' '-e %s/runerr%s_div.out\nsource activate %s \n' % (str(log_dir),str(barcode),str(log_dir),str(barcode), str(env))) with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "a") as f: f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f' % (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol)) else: with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_diversity_enum.sh", "w+") as f: f.write('python %s -o %s/%s_div_enum_%i -m %s -r %s -p %s -a %.5f -i %i --obj_tol %.4f' % (script_path,output_directory, barcode, i, modelfile, weightfile, prevsol_file, dist_anneal, iters, obj_tol)) nbatch=nbatch+1 if para_batchs == False: with open(batch_directory+"/runfiles_"+barcode+"_diversity_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 72:00:00\n#SBATCH -J div_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' '-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_*_diversity_enum.sh|xargs -n 1 -P 1 bash' % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode)))
[docs] def write_rxn_enum_script(script_path,batch_directory,output_directory, modelfile, weightfile,\ reactionFile="", log_dir='log_dir',env="MANA",obj_tol=0.001, iters=100,para_batchs=False): """write_rxn_enum_script. Parameters ---------- script_path : str path to the diversity_enum.py dexom python script batch_directory : str path to the directory were batch files should be written output_directory : str path to the directory were diversity-enum modelling results should be written modelfile : str path to the model's json file weightfile : str path to the csvs file that contains binarized reactions activity (according to transcriptomic data) reactionFile : str path to the file that contains the list of reactions in the model log_dir : str path to the directory were log files should be stored env : str name of the anaconda environment to be activated obj_tol : float dexom-python parameter, objective value tolerance, as a fraction of the original value iters : int dexom-python parameter, maximal number of iterations para_batchs : boolean if True, launch each batch file independantly (instead of parallel on conditions, parallel on batch) Returns ------- write batch files ready to launch on a adequatly prepared slurm computing platform """ barcode = os.path.basename(weightfile).split('_')[2] with open(reactionFile, "r") as file: rxns = file.read().split("\n") rxn_num = (len(rxns) // iters) + 1 if para_batchs: for i in range(rxn_num): with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' '-e %s/runerr%s_div.out\nsource activate %s \n' % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env))) with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "a") as f: f.write('python %s -o %s/%s_rxn_enum_%i --range %i_%i -m %s -r %s -l %s ' '-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol)) else: for i in range(rxn_num): with open(batch_directory+'/batch/'+barcode+ '_' + str(i) + "_reaction_enum.sh", "w+") as f: f.write('python %s -o %s/%s_rxn_enum_%i --range %i_%i -m %s -r %s -l %s ' '-t 600 --mipgap %f \n' % (script_path,output_directory,barcode, i, i*iters, i*iters+iters, modelfile, weightfile, reactionFile, obj_tol)) with open(batch_directory+"/runfiles_"+barcode+"_reaction_enum.sh", "w+") as f: f.write('#!/bin/bash\n#SBATCH -p workq\n#SBATCH --mem=12G\n#SBATCH --cpus-per-task=12\n#SBATCH -t 24:00:00\n#SBATCH -J rxn_enum\n#SBATCH -o %s/runout%s_div.out\n#SBATCH ' '-e %s/runerr%s_div.out\nsource activate %s\nls %s/batch/%s_{0..%i}_reaction_enum.sh|xargs -n 1 -P 1 bash' % (str(log_dir),str(barcode),str(log_dir),str(barcode),str(env),str(batch_directory),str(barcode), int(rxn_num-1)))