6 months ago · 06f63a2c83
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,3 @@
 
				+logs
			
 
				+dag_tmp
			
 
				+.condor_datalad_lock
			
--- a/code/process.condor_dag
+++ b/code/process.condor_dag
--- a/code/process.condor_submit
+++ b/code/process.condor_submit
@@ -0,0 +1,52 @@
 
				+universe       = vanilla
			
 
				+# resource requirements for each job
			
 
				+request_cpus   = 1
			
 
				+request_memory = 3G
			
 
				+request_disk   = 4G
			
 
				+
			
 
				+# be nice and only use free resources
			
 
				+# nice_user = true
			
 
				+
			
 
				+# tell condor that a job is self contained and the executable
			
 
				+# is enough to bootstrap the computation on the execute node
			
 
				+should_transfer_files = yes
			
 
				+# explicitly do not transfer anything back
			
 
				+# we are using datalad for everything that matters
			
 
				+transfer_output_files = ""
			
 
				+
			
 
				+# the actual job script, nothing condor-specific in it
			
 
				+executable     = $ENV(PWD)/code/participant_job
			
 
				+
			
 
				+# the job expects these environment variables for labeling and synchronization
			
 
				+# - JOBID: subject AND process specific ID to make a branch name from
			
 
				+#     (must be unique across all (even multiple) submissions)
			
 
				+#     including the cluster ID will enable sorting multiple computing attempts
			
 
				+# - DSLOCKFILE: lock (must be accessible from all compute jobs) to synchronize
			
 
				+#     write access to the output dataset
			
 
				+# - DATALAD_GET_SUBDATASET__SOURCE__CANDIDATE__...:
			
 
				+#     (additional) locations for datalad to locate relevant subdatasets, in case
			
 
				+#     a configured URL is outdated
			
 
				+# - GIT_AUTHOR_...: Identity information used to save dataset changes in compute
			
 
				+#     jobs
			
 
				+environment = "\
			
 
				+  JOBID=$(subject).$(Cluster) \
			
 
				+  DSLOCKFILE=$ENV(PWD)/.condor_datalad_lock \
			
 
				+  GIT_AUTHOR_NAME='Felix Hoffstaedter' \
			
 
				+  GIT_AUTHOR_EMAIL='f.hoffstaedter@fz-juelich.de' \
			
 
				+  "
			
 
				+
			
 
				+# place the job logs into PWD/logs, using the same name as for the result branches
			
 
				+# (JOBID)
			
 
				+log    = $ENV(PWD)/logs/$(Cluster).log
			
 
				+output = $ENV(PWD)/logs/$(Cluster).out
			
 
				+error  = $ENV(PWD)/logs/$(Cluster).err
			
 
				+# essential args for "participant_job"
			
 
				+# 1: where to clone the analysis dataset
			
 
				+# 2: location to push the result git branch to. The "ria+" prefix is stripped.
			
 
				+# 3: ID of the subject to process
			
 
				+arguments = "\
			
 
				+  ria+file:///data/project/cat_preprocessed/inputstore#6c5791d8-1803-48a1-bbaa-2b5e23b5f707 \
			
 
				+  /data/project/cat_preprocessed/dataladstore/6c5/791d8-1803-48a1-bbaa-2b5e23b5f707 \
			
 
				+  $(subject) \
			
 
				+  "
			
 
				+queue