# GNU Parallel tutorial


Prerequisites

# mkdir parallel_tutorial
# cd parallel_tutorial
# GET=$(echo wget -qO -)
# LATEST=$($GET http://ftpmirror.gnu.org/parallel | perl -ne '/(parallel-\d{8})/ and print $1."\n"' | sort | tail -n1)
# $GET http://ftpmirror.gnu.org/parallel/$LATEST.tar.bz2 > $LATEST.tar.bz2
# $GET http://ftpmirror.gnu.org/parallel/$LATEST.tar.bz2.sig > $LATEST.tar.bz2.sig
# gpg --keyserver keys.gnupg.net --recv-key 88888888
# gpg --with-fingerprint $LATEST.tar.bz2.sig 2>&1 | grep -e RSA -e 'CDA0 1A42 08C4 F745 0610  7E7B D1AB 4516 8888 8888'
# bzip2 -dc $LATEST.tar.bz2 | tar xf -
# cd $LATEST
# ./configure
# make
# make install
# echo -e '--gnu' > /etc/parallel/config
# cd ..
# rm -rf $LATEST*
# which parallel
/usr/local/bin/parallel
# file /usr/local/bin/parallel
/usr/local/bin/parallel: Perl script, ASCII text executable, with escape sequences
# parallel --bibtex
> will cite
# parallel -k echo ::: A B C > abc-file
# parallel -k echo ::: D E F > def-file
# perl -e 'printf "A\0B\0C\0"' > abc0-file
# perl -e 'printf "A_B_C_"' > abc_-file
# perl -e 'printf "f1\tf2\nA\tB\nC\tD\n"' > tsv-file.tsv
# perl -e 'for(1..30000){print "$_\n"}' > num30000
# perl -e 'for(1..1000000){print "$_\n"}' > num1000000
# (echo %head1; echo %head2; perl -e 'for(1..10){print "$_\n"}') > num_%header
# SERVER1=192.168.1.1
# SERVER2=192.168.1.2
# ssh-keygen
# ssh-copy-id -i /root/.ssh/id_rsa.pub $SERVER1
# ssh-copy-id -i /root/.ssh/id_rsa.pub $SERVER2
# ssh $SERVER1 echo works
# ssh $SERVER2 echo works

A single input source

# parallel echo ::: A B C
# parallel -a abc-file echo
# cat abc-file | parallel echo
A
B
C

Multiple input sources

# parallel echo ::: A B C ::: D E F
# parallel -a abc-file -a def-file echo
# cat abc-file | parallel -a - -a def-file echo
# cat abc-file | parallel echo :::: - def-file
# parallel echo ::: A B C :::: def-file
A D
A E
A F
B D
B E
B F
C D
C E
C F
# parallel --xapply echo ::: A B C D E ::: F G
A F
B G
C
D
E
# parallel --xapply echo ::: A B C D E ::: F G
A F
B G
C F
D G
E F

Changing the argument separator/delimiter

# parallel --arg-sep ,, --arg-file-sep // echo ,, A B C // def-file
A D
A E
A F
B D
B E
B F
C D
C E
C F
# parallel -d _ echo :::: abc_-file
# parallel -d '\0' echo :::: abc0-file
# parallel -0 echo :::: abc0-file
A
B
C

End-of-file value for input source

# parallel -E stop echo ::: A B stop C D
A
B

Skipping empty lines

# echo -e "1\n\n\n\n\n2\n" | parallel --no-run-if-empty echo
1
2

No command means arguments are commands

# parallel ::: ls 'echo foo' pwd
abc0-file
abc_-file
abc-file
def-file
num1000000
num30000
num_%header
tsv-file.tsv
foo
/parallel_tutorial

The 5 replacement strings

# parallel echo ::: A/B.C
# parallel echo {} ::: A/B.C
A/B.C
# parallel echo {.} ::: A/B.C
A/B
# parallel echo {/} ::: A/B.C
B.C
# parallel echo {//} ::: A/B.C
A
# parallel echo {/.} ::: A/B.C
B
# parallel echo {#} ::: A B C # gives the job number
1
2
3

Changing the replacement strings

# parallel -I ,, echo ,, ::: A/B.C
A/B.C
# parallel --extensionreplace ,, echo ,, ::: A/B.C
A/B
# parallel --basenamereplace ,, echo ,, ::: A/B.C
B.C
# parallel --dirnamereplace ,, echo ,, ::: A/B.C
A
# parallel --basenameextensionreplace ,, echo ,, ::: A/B.C
B
# parallel --seqreplace ,, echo ,, ::: A B C
1
2
3

Positional replacement strings

# parallel echo {1} and {2} ::: A B ::: C D
A and C
A and D
B and C
B and D
# parallel echo /={1/} //={1//} /.={1/.} .={1.} ::: A/B.C D/E.F
/=B.C //=A /.=B .=A/B
/=E.F //=D /.=E .=D/E
# parallel echo 1={1} 2={2} 3={3} -1={-1} -2={-2} -3={-3} ::: A B ::: C D ::: E F
1=A 2=C 3=E -1=E -2=C -3=A
1=A 2=C 3=F -1=F -2=C -3=A
1=A 2=D 3=E -1=E -2=D -3=A
1=A 2=D 3=F -1=F -2=D -3=A
1=B 2=C 3=E -1=E -2=C -3=B
1=B 2=C 3=F -1=F -2=C -3=B
1=B 2=D 3=E -1=E -2=D -3=B
1=B 2=D 3=F -1=F -2=D -3=B

Input from columns

# parallel --colsep '\t' echo 1={1} 2={2} :::: tsv-file.tsv
1=f1 2=f2
1=A 2=B
1=C 2=D

Header defined replacement strings

# parallel --header : --colsep '\t' echo f1={f1} f2={f2} :::: tsv-file.tsv
f1=A f2=B
f1=C f2=D

More than one argument

# cat num30000 | parallel --xargs echo | wc -l
2
# cat num30000 | parallel --xargs -s 10000 echo | wc -l
17
# cat num30000 | parallel --jobs 4 -m echo | wc -l
5
# parallel --jobs 4 -m echo pre-{}-post ::: A B C D E F G
pre-A B-post
pre-C D-post
pre-E F-post
pre-G-post
# parallel --jobs 4 -X echo pre-{}-post ::: A B C D E F G
pre-A-post pre-B-post
pre-C-post pre-D-post
pre-E-post pre-F-post
pre-G-post
# parallel -N3 echo ::: A B C D E F G H
A B C
D E F
G H

Quoting

# parallel -q perl -e 'print "@ARGV\n"' ::: This works
# parallel perl -e \''print "@ARGV\n"'\' ::: This works
This
works

Trimming space

# parallel --trim r echo pre-{}-post ::: ' A '
pre- A-post
# parallel --trim l echo pre-{}-post ::: ' A '
pre-A -post
# parallel --trim lr echo pre-{}-post ::: ' A '
pre-A-post

Controling the output

# parallel --tag echo foo-{} ::: A B C
A       foo-A
B       foo-B
C       foo-C
# parallel --tagstring {}-bar echo foo-{} ::: A B C
A-bar   foo-A
B-bar   foo-B
C-bar   foo-C
# parallel --dryrun echo {} ::: A B C
echo A
echo B
echo C
# parallel --verbose echo {} ::: A B C
echo A
echo B
A
echo C
B
C
# parallel -j2 'printf "%s-start\n%s" {} {};sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1
2-start
2-middle
2-end
1-start
1-middle
1-end
4-start
4-middle
4-end
# parallel -j2 --ungroup 'printf "%s-start\n%s" {} {};sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1
4-start
42-start
2-middle
2-end
1-start
1-middle
1-end
-middle
4-end
# parallel -j2 --linebuffer 'printf "%s-start\n%s" {} {};sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1
4-start
2-start
2-middle
2-end
1-start
1-middle
1-end
4-middle
4-end
# parallel -j2 -k 'printf "%s-start\n%s" {} {};sleep {};printf "%s\n" -middle;echo {}-end' ::: 4 2 1
4-start
4-middle
4-end
2-start
2-middle
2-end
1-start
1-middle
1-end

Saving output into files

# parallel --files ::: A B C
/tmp/kDpvB7d8aA.par
/tmp/1WyUUJEhht.par
/tmp/PgWjftjche.par
# parallel --tmpdir /var/tmp --files ::: A B C
# TMPDIR=/var/tmp parallel --files ::: A B C
/var/tmp/nvpct2t9WP.par
/var/tmp/nufRxiok6Y.par
/var/tmp/_2gy6BRhiC.par
# parallel --results outdir echo ::: A B C
A
B
C
# find outdir | grep std
outdir/1/B/stderr
outdir/1/B/stdout
outdir/1/C/stderr
outdir/1/C/stdout
outdir/1/A/stderr
outdir/1/A/stdout

Number of simultaneous jobs

# # 64 jobs/#core
# /usr/bin/time parallel -N0 -j64 sleep 1 ::: {1..128}
# # 1 job/#core
# /usr/bin/time parallel -N0 sleep 1 ::: {1..128}
# #2 jobs for each core
# /usr/bin/time parallel -N0 --jobs 200% sleep 1 ::: {1..128}
# #Run as may jobs in parallel as possible
# /usr/bin/time parallel -N0 --jobs 0 sleep 1 ::: {1..128}
# parallel --use-cpus-instead-of-cores -N0 sleep 1 ::: {1..128}

Interactiveness

# parallel --interactive echo ::: 1 2 3
echo 1 ?...n
echo 2 ?...n
echo 3 ?...n
# parallel -X --dry-run --tty vi ::: 1 2 3
vi 1 2 3
# parallel --dry-run --tty vi ::: 1 2 3
vi 1
vi 2
vi 3

Timing

# parallel --delay 2.5 echo Starting {}\;date ::: 1 2 3
Starting 1
Sat Jan  4 13:37:00 CET 2014
Starting 2
Sat Jan  4 13:37:02 CET 2014
Starting 3
Sat Jan  4 13:37:05 CET 2014
# parallel --timeout 2.1 sleep {}\; echo {} ::: 1 2 3 4
1
2
# parallel --timeout 200% sleep {}\; echo {} ::: 2.1 2.2 3 7 2.3
2.1
2.2
3
2.3
# parallel --eta sleep ::: 1 3 2 2 1 3 3 2 1

Computers / CPU cores / Max jobs to run
1:local / 2 / 2

Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
ETA: 2s 0left 1.00avg  local:0/9/100%/1.0s

Progress

# parallel --progress sleep ::: 1 3 2 2 1 3 3 2 1

Computers / CPU cores / Max jobs to run
1:local / 2 / 2

Computer:jobs running/jobs completed/%of started jobs/Average seconds to complete
local:0/9/100%/1.0s
# parallel --joblog /tmp/log exit  ::: 1 2 3 0
# cat /tmp/log
Seq     Host    Starttime       Runtime Send    Receive Exitval Signal  Command
1       :       1388839020.557  0.004   0       0       1       0       exit 1
2       :       1388839020.560  0.006   0       0       2       0       exit 2
3       :       1388839020.566  0.005   0       0       3       0       exit 3
4       :       1388839020.570  0.003   0       0       0       0       exit 0

Termination

# parallel -j2 --halt 1 echo {}\; exit {} ::: 0 0 1 2 3
0
0
1
parallel: Starting no more jobs. Waiting for 2 jobs to finish. This job failed:
echo 1; exit 1
2
parallel: Starting no more jobs. Waiting for 1 jobs to finish. This job failed:
echo 2; exit 2
# parallel -j2 --halt 2 echo {}\; exit {} ::: 0 0 1 2 3
0
0
1
parallel: This job failed:
echo 1; exit 1
# parallel -k --retries 3 'echo tried {} >>/tmp/runs; echo completed {}; exit {}' ::: 1 2 0
completed 1
completed 2
completed 0
# cat /tmp/runs
tried 1
tried 2
tried 1
tried 2
tried 1
tried 2
tried 0

Limiting the ressources

# parallel --load 100% echo load is less than {} job per cpu ::: 1
load is less than 1 job per cpu
# parallel --noswap echo the system is not swapping ::: now
the system is not swapping now
# parallel --nice 17 echo this is being run with nice -n ::: 17
this is being run with nice -n 17

Sshlogin

# parallel -S 1/$SERVER1 echo running on {} (1 CPU)::: $SERVER1
running on 192.168.1.1 (1 CPU)
# parallel -S $SERVER1,$SERVER2 'hostname; echo {}' ::: running on more hosts
server2
running
server1
on
server2
more
server1
hosts

Transferring files

# echo This is input_file > input_file
# parallel -S $SERVER1 --transfer --return {}.out --cleanup cat {} ">" {}.out ::: input_file
# cat input_file.out
This is input_file
# echo common data > common_file
# parallel --basefile common_file -S $SERVER1 --cleanup cat common_file\; echo {} ::: foo
common data
foo

Running the same commands on all hosts

# parallel --onall -S $SERVER1,$SERVER2 echo ::: foo bar
foo
bar
foo
bar
# parallel --nonall -S $SERVER1,$SERVER2 echo foo bar
foo bar
foo bar

References

http://www.gnu.org/software/parallel/parallel_tutorial.html

No comments: