Skip to content
Snippets Groups Projects
Commit 2d022643 authored by Patrick Hajek's avatar Patrick Hajek
Browse files

Testing to make sure works with new data.

parent ba6a518d
Branches
Tags
No related merge requests found
......@@ -6,7 +6,7 @@ data = {}
parser = argparse.ArgumentParser(description='Create a json for the inhomo part of the Isolates Pipeline.')
parser.add_argument('--newFaaDir', '-n', type=str, nargs="?", dest='new_faa',
parser.add_argument('--newFaaDir', '-f', type=str, nargs="?", dest='new_faa',
default="/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new",
help="Directory that holds the new faa info")
......@@ -21,6 +21,13 @@ parser.add_argument('--seedfile', '-s', type=str, dest='seedfile',
parser.add_argument('--faaDb', '-d', type=str, nargs="?", dest='faaDb',
default="", help="Directory that holds the db files")
parser.add_argument('--json', '-j', type=str, nargs="?", dest='jsonName',
default="./inhomo.json",
help="Location you want the json file.")
parser.add_argument('-n', type=int, nargs="?", dest='n',
help="Total number of faa files you want in the json")
args = parser.parse_args()
data["inhomo.new_faa"] = []
......@@ -28,22 +35,29 @@ data["inhomo.new_faa"] = []
new_faa = os.listdir(args.new_faa)
new_faa.sort()
maximum = args.n
total_files = 0
for f in new_faa:
data["inhomo.new_faa"] += [os.path.join(args.new_faa, f)]
if args.faaDb:
data["inhomo.faaDb"] = []
faaDb = os.listdir(args.faaDb)
faaDb.sort()
for f in faaDb:
dbfiles = os.listdir(os.path.join(args.faaDb, f))
dbfiles.sort()
for g in dbfiles:
data["inhomo.faaDb"] += [os.path.join(args.faaDb, f, g)]
if maximum:
total_files += 1
if total_files >= maximum:
break
#if args.faaDb:
# data["inhomo.faaDb"] = []
# faaDb = os.listdir(args.faaDb)
# faaDb.sort()
#
# for f in faaDb:
# dbfiles = os.listdir(os.path.join(args.faaDb, f))
# dbfiles.sort()
# for g in dbfiles:
# data["inhomo.faaDb"] += [os.path.join(args.faaDb, f, g)]
data["inhomo.seedfile"] = args.seedfile
data["inhomo.geneName"] = args.geneName
with open("inhomo.json", "w") as json_file:
with open(args.jsonName, "w") as json_file:
json.dump(data,json_file, indent=8, sort_keys=True)
import os
import subprocess
import argparse
import glob
parser = argparse.ArgumentParser(description="Necessary info to grab all of the data")
parser.add_argument('-i', type=int, nargs="?", dest="runid")
parser.add_argument('-l', type=str, nargs="?", dest="tempLoc",
default="/global/cfs/cdirs/img/img/dataLoad10/data/inhomo3/tmp",
help="location to put files between jaws get and placing them in the appropriate space")
parser.add_argument('-f', type=str, nargs="?", dest="finalLoc",
default="/global/cfs/cdirs/img/img/dataLoad10/data/inhomo3",
help="Location to put the final data")
parser.add_argument('-t', type=str, nargs="?", dest="taxonLastDbs",
default="/global/cfs/cdirs/m342/img/web-data/taxon.faa/",
help="Location where the taxonLastDbs information is going to end up.")
parser.add_argument('-s', action='store_true', dest='skip',
help='Use if want to skip the jaws get and chmod steps')
parser.add_argument('-k', action='store_true', dest='keep',
help="Use if you don't want to delete the tmp directory at the end.")
args = parser.parse_args()
if not os.path.exists(args.finalLoc) or not os.path.exists(args.taxonLastDbs):
raise ValueError("One or more directories provided does not exist.")
if args.runid:
if not os.path.exists(args.tempLoc):
mkdir = subprocess.run(["mkdir", args.tempLoc])
if not args.skip:
#source = subprocess.run(["./source.sh"])
get_data = subprocess.run(["jaws", "get", str(args.runid), args.tempLoc], check=True)
chmod = subprocess.run(["chmod", "-R", "775", args.tempLoc], check=True)
move1 = subprocess.run(["cp", os.path.join(args.tempLoc, "call-genTabFiles/execution/paralog_group.tab.txt"), os.path.join(args.tempLoc, "call-genTabFiles/execution/paralog_group_genes.tab.txt"), os.path.join(args.finalLoc, "tab.files")], check=True)
# Organize the files into appropriate subdirectories.
glob_name = os.listdir(os.path.join(args.tempLoc, "call-processTaxon/execution"))[0]
faa_loc = globLoc = os.path.join(args.tempLoc, "call-processTaxon/execution", glob_name)
fileList = os.listdir(faa_loc)
taxon_oid_list = set()
for f in fileList:
taxon_oid_list.add(f.split(".")[0])
for t in taxon_oid_list:
taxonDir = os.path.join(args.taxonLastDbs, str(t) + ".faa.lastdb")
if not os.path.exists(taxonDir):
mkdir = subprocess.run(["mkdir", taxonDir])
faaGlob = glob.glob(faa_loc + "/" + str(t) + ".faa.*")
for faa in faaGlob:
move2 = subprocess.run(["cp", os.path.join(faa_loc, faa), taxonDir])
if not args.keep:
rmdir = subprocess.run(["rm", '-r', '-f', args.tempLoc])
else:
raise ValueError("Invalid runid")
This diff is collapsed.
{
"inhomo.geneName": "/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/geneName.tab.txt",
"inhomo.new_faa": [
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954849510.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954862917.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954869437.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954878483.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954891162.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954903902.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954909933.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954919250.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954933032.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954953494.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954963050.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954969401.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954978324.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954988299.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954991861.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2954998041.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955012907.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955020369.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955027585.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955035786.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955044034.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955054558.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955060881.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955066657.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955082314.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955088637.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955099264.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955107952.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955115411.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955120292.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955135195.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955152515.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955166661.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955175229.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955184960.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955188963.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955194527.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955200152.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955210285.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955237411.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955247257.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955251134.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955276307.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955319156.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955322921.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955363617.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955404044.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955428234.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955462201.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955482464.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955516475.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955525255.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955538263.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955566430.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955595458.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955629426.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955664053.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955698539.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955719969.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955745467.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955790241.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955839052.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955844479.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955855147.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955866175.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955877240.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955889134.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955898901.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955909675.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955917133.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955925447.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955933707.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955939424.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955948729.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955952073.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955963837.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955976453.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2955989171.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956002012.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956013887.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956019868.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956025942.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956032204.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956038407.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956044798.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956051605.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956055925.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956063521.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956067641.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956072671.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956078790.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956085143.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956091181.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956097856.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956103440.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956109334.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956115456.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956122060.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956128196.faa",
"/global/cfs/cdirs/m342/img/iso_pipeline_data/inhomo3/taxon.faa.new/2956150717.faa"
],
"inhomo.seedfile": "/global/cfs/cdirs/img/img/dataLoad10/steps/test_scripts_cody/inhomo/bin/ublast.seed"
}
\ No newline at end of file
......@@ -60,7 +60,7 @@ task processTaxon {
runtime {
poolname: "inhomo"
time: "08:00:00"
time: "16:00:00"
memory: "115G"
docker: "doejgi/img-isolate-homologs:1.0.1"
node: 1
......@@ -95,7 +95,7 @@ task genTabFiles {
runtime {
poolname: "inhomo"
time: "08:00:00"
time: "16:00:00"
memory: "115G"
docker: "doejgi/img-isolate-homologs:1.0.1"
node: 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment