Notes from edX: Unix Tools: Data, Software and Production Engineering
grep
Number of repetitions
egrep 's{3}' words # Words with three s characters
egrep '[^aeiouy]{7}' words # Words with seven consonants
egrep '^.{,15}$' words | wc -l # Words with a length up to 15
egrep '^.{15,}$' words | wc -l # Words with a length of at least 15
egrep '^.{14}.+$' words | wc -l # Same using + (one or more)
egrep '^.{15,16}$' words | wc -l # Words with a length between 15 and 16
egrep '^.{15}.?$' words | wc -l # Same using ? (one or zero)
Back-references
egrep '^(.).*\1$' words | head # Words beginning and ending with same letter
egrep '^(.)(.)((.)\4)?\2\1$' words | head # Find 4-6 letter palindromes
Alternative matches
egrep '^(aba|ono).*(ly|ne)$' words # Words with alternate start/end parts
Path
echo $PATH |
> egrep '(^\.:)|(:\.:)|(:\.$)' >/dev/null && # Does the path contain .?
> echo Current directory in PATH
Complement matches
egrep '^[ ]*(/\*|\*)' *.c | head -5 # List comment lines
egrep -v '^[ ]*(/\*|\*)' *.c | head -5 # List non-comment lines
Search for fixed strings
cd /usr/src/linux/fs # Linux filesystem source code directory
fgrep ... *.c | head -5
grep -o 'st_[a-z]*' /usr/include/sys/stat.h | # Obtain status fields
> sort -u >/tmp/statfields
head /tmp/statfields # List status fields
fgrep -f /tmp/statfields *.c | head -5 # List status field matches
cut awk sed
head -5 /etc/passwd
cut -d: -f 1 /etc/passwd | head -5 # Output field 1
cut -d: -f 3-4 /etc/passwd | head -5 # Output fields 3-4
awk '/bash/' /etc/passwd # Output lines containing "bash"
awk -F: '$3 > 1000' /etc/passwd # Lines where field 3 > 1000
awk -F: '{print $1}' /etc/passwd | head -5 # Output field 1
awk '!/^#/ {print $1}' /etc/services | head # Combine predicate and action
cd /usr/src/linux/kernel # Linux kernel source code directory
sed -n 's/#include *["<]\([^">]*\).*/\1/p' *.c | # Output included file names
> head
cd /usr/share/dict # Output lines from lines 1000 to 1005
sed -n 1000,1005p words
cd /usr/src/linux/kernel/printk
sed -n '/^enum log_flags/,/^};$/p' printk.c # Output log_flags definition
curl -q 'http://api.geonames.org/citiesJSON?north=37&south=38&east=24&west=23&lang=en&username=demo&maxRows=1'
>result.json
jq -r '.geonames[0].name,.geonames[0].countrycode' result.json
curl -q 'http://api.geonames.org/cities?north=51&south=52&east=0&west=1&lang=en&username=demo&maxRows=1'
>result.xml
xmlstarlet sel -t -c /geonames/geoname/name result.xml
sort
sort -k 2 dates | head -5 # Sort by second and subsequent fields
sort -k 2,2 -k 1,1 dates | head -5 # Sort by second, then first field
sort -k 5.5,5.6 dates | head -5 # Sort by time minutes
sort -k 4r dates | head -5 # Reverse sort
sort -t : -k 4n /etc/passwd | head -8 # Sort by numeric group-id
logs wrangling
logresolve /var/log/access.log >resolved
head resolved
cut -d ' ' -f 1 resolved | # Obtain domain name
awk -F. '{print $NF}' | # Obtain top-level domain
> head
cut -d ' ' -f 1 resolved | # Obtain domain name
awk -F. '{print $NF}' | # Obtain top-level domain
grep -v '[0-9]' | # Remove numeric IP addresses
sort | # Order by TLD
uniq -c | # Count duplicates
sort -rn | # Order by number, descending
> head
compare
ls /bin >linux.bin
ssh freefall.freebsd.org ls /bin >freebsd.bin
comm linux.bin freebsd.bin | head -20
relational
find . -type f -print0 | # Output all files
> xargs -0 md5sum >md5-sum.out # Run md5-sum on each of them
cut -d ' ' -f 1 md5-sum.out | # Obtain first field
> sort | # Sort
> uniq -d >duplicates
tower of hanoi with sed
$ cat hanoi.sed
# Towers of Hanoi in sed.
#
# @(#)hanoi.sed 5.1 (Berkeley) 10/10/90
#
#
# Ex:
# Run "sed -f hanoi.sed", and enter:
#
# :abcd: : :<CR><CR>
#
# note -- TWO carriage returns, a peculiarity of sed), this will output the
# sequence of states involved in moving 4 rings, the largest called "a" and
# the smallest called "d", from the first to the second of three towers, so
# that the rings on any tower at any time are in descending order of size.
# You can start with a different arrangement and a different number of rings,
# say :ce:b:ax: and it will give the shortest procedure for moving them all
# to the middle tower. The rules are: the names of the rings must all be
# lower-case letters, they must be input within 3 fields (representing the
# towers) and delimited by 4 colons, such that the letters within each field
# are in alphabetical order (i.e. rings are in descending order of size).
#
# For the benefit of anyone who wants to figure out the script, an "internal"
# line of the form
# b:0abx:1a2b3 :2 :3x2
# has the following meaning: the material after the three markers :1, :2,
# and :3 represents the three towers; in this case the current set-up is
# ":ab : :x :". The numbers after a, b and x in these fields indicate
# that the next time it gets a chance, it will move a to tower 2, move b
# to tower 3, and move x to tower 2. The string after :0 just keeps track
# of the alphabetical order of the names of the rings. The b at the
# beginning means that it is now dealing with ring b (either about to move
# it, or re-evaluating where it should next be moved to).
#
# Although this version is "limited" to 26 rings because of the size of the
# alphabet, one could write a script using the same idea in which the rings
# were represented by arbitrary [strings][within][brackets], and in place of
# the built-in line of the script giving the order of the letters of the
# alphabet, it would accept from the user a line giving the ordering to be
# assumed, e.g. [ucbvax][decvax][hplabs][foo][bar].
#
# George Bergman
# Math, UC Berkeley 94720 USA
# cleaning, diagnostics
s/ *//g
/^$/d
/[^a-z:]/{a\
Illegal characters: use only a-z and ":". Try again.
d
}
/^:[a-z]*:[a-z]*:[a-z]*:$/!{a\
Incorrect format: use\
\ : string1 : string2 : string3 :<CR><CR>\
Try again.
d
}
/\([a-z]\).*\1/{a\
Repeated letters not allowed. Try again.
d
}
# initial formatting
h
s/[a-z]/ /g
G
s/^:\( *\):\( *\):\( *\):\n:\([a-z]*\):\([a-z]*\):\([a-z]*\):$/:1\4\2\3:2\5\1\3:3\6\1\2:0/
s/[a-z]/&2/g
s/^/abcdefghijklmnopqrstuvwxyz/
:a
s/^\(.\).*\1.*/&\1/
s/.//
/^[^:]/ba
s/\([^0]*\)\(:0.*\)/\2\1:/
s/^[^0]*0\(.\)/\1&/
:b
# outputting current state without markers
h
s/.*:1/:/
s/[123]//gp
g
:c
# establishing destinations
/^\(.\).*\1:1/td
/^\(.\).*:1[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/
/^\(.\).*:1[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/
/^\(.\).*:1[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/
/^\(.\).*:2[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/
/^\(.\).*:2[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/
/^\(.\).*:2[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/
/^\(.\).*:3[^:]*\11/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\32/
/^\(.\).*:3[^:]*\12/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\31/
/^\(.\).*:3[^:]*\13/s/^\(.\)\(.*\1\([a-z]\).*\)\3./\3\2\33/
bc
# iterate back to find smallest out-of-place ring
:d
s/^\(.\)\(:0[^:]*\([^:]\)\1.*:\([123]\)[^:]*\1\)\4/\3\2\4/
td
# move said ring (right, resp. left)
s/^\(.\)\(.*\)\1\([23]\)\(.*:\3[^ ]*\) /\1\2 \4\1\3/
s/^\(.\)\(.*:\([12]\)[^ ]*\) \(.*\)\1\3/\1\2\1\3\4 /
tb
s/.*/Done! Try another, or end with ^D./p
d
$ sed -f hanoi.sed
diff
diff -c file1 file2 # List file differences in context
diff -u file1 file2 # List file differences in unified context
diff -W 40 -y file1 file2 # List differences in two 40 character columns
diff -u mary.c mary2.c >mary.patch # Generate patch as a context diff
patch john.c <mary.patch # Patch John's copy with Mary's patch
diff -b john.c mary.c # Ignore changes in number of blanks
diff -w john.c mary.c # Ignore all whitespace changes
diff -r a b # Recursive diff
diff -q file1 file3 >/dev/null && echo Same
diff -q file1 file2 >/dev/null || echo Different
test and eval
test -d / && echo Directory # Test if directory
test -f / && echo File # Test if file
test hi = there && echo Same # Test if strings equal
test -z "" && echo Empty # Test if string empty
test . -nt / && echo . is newer than / # Test if file newer than other
test -w / && echo Writable # Test if writable
if [ -d /etc/bash_completion.d ] ; then # Script use
echo $(ls /etc/bash_completion.d | wc -l) completion scripts installed
fi
expr 1 + 2 # Add
expr 12 \% 5 # Remainder
expr John \> Mary # Compare strings
expr length 'To be or not to be' # String length
tr
curl -s --compressed https://www.gutenberg.org/cache/epub/1342/pg1342.txt >pride-and-prejudice.txt
tr a-z l-za-k <pride-and-prejudice.txt >secret
openssl enc -e -aes-256-cbc -pbkdf2 <pride-and-prejudice.txt >real-secret
openssl enc -d -aes-256-cbc -pbkdf2 <real-secret | head
find and paste
find . | # List current directory entries
> paste - /usr/share/dict/words | # Pair entries with words
> awk 'NF == 2 && $1 != "."' | # List pairs apart from the current directory
> tac |
> sed 's/^/mv /' | # Convert pairs to rename commands
> sh # Have the shell execute the commands
sound
sox sox-orig.wav sox-orig.mp3 # Convert between file formats
sox sox-orig.wav sox-low.wav pitch -600 # Lower pitch by 600 cents
play -q sox-low.wav
sox sox-orig.wav sox-fast.wav tempo 1.5 # Increase tempo by 50%
sox sox-orig.wav sox-chorus.wav chorus 0.5 0.9 50 0.4 0.25 2 -t \
> 60 0.32 0.4 2.3 -t 40 0.3 0.3 1.3 -s # Apply chorus effect
format and email
openssl ciphers |
> sed 's/:/ /g' | # Separate words with space
> fmt | # Format words in lines
> head
sendmail john.smith@example.com <<\EOF
> From: Alice Jones <alice.jones@example.com>
> To: John Smith <john.smith@example.com>
> Subject: Hi there
>
> I'm learning how to send email from the command line.
> EOF
$ cat send-connections.sh
#!/bin/sh
sendmail john.smith@example.com <<EOF
From: Diomidis Spinellis <dds@aueb.gr>
To: John Smith <john.smith@example.com>
Date: $(date -R)
Subject: Current network connections
These are the currently active network connections.
$(netstat)
EOF
sh send-connections.sh
##### convert attachements int text
dd if=/dev/random of=data count=32 bs=1
more data
base64 data >data.base64
base64 -d <data.base64 >data.decoded
cmp data data.decoded && echo Files are the same
Prefer redirection to pipes
cat file | command # Wasteful execution of cat
command <file # A redirection is all that's needed
Test command, not its exit code
command
if [ $? -ne 0 ] ; then # Verbose exit variable test
> echo Error >&2
> fi
if ! command ; then # A simple negation will do
> echo Error >&2
> fi
Use the sed and awk predicates
grep pattern | awk '{ ... }' # Unneeded use of grep
awk '/pattern/ { ... }' # Simply prepend pattern
grep pattern | sed '...' # Unneeded use of grep
sed '/pattern/ { ... }' # Simply prepend pattern
Grep can recurse directories
grep pattern afile | wc -l # Count matches
grep -c pattern afile # Modern count matches
find . -type f | xargs grep pattern # Recursive search
grep -r pattern . # Modern recursive search
Prefer wildcards to ls
echo $(ls) # This is the same
ls # As a simple invocation
for i in $(ls) ; do # The ls here
> ...
> done
for i in * ; do # can be replaced by a wildcard
> ...
> done
Replace awk with cut
head -2 /etc/passwd |
> awk -F: '{print $1, $7}' # Print fields 1 and 7
head -2 /etc/passwd |
> cut -d : -f 1,7 # More efficient way to print fields 1 and 7
Replace sed with expr
echo $LANG
echo $LANG |
> sed 's/.*\.\(.*\)/\1/' # Isolate encoding
expr "$LANG" : '.*\.\(.*\)' # More efficient way to isolate encoding
Process find’s output
ls -ld **/core # Find files named core; might not fit
find . -name core | # Find files named core
> while read filename ; do
> ls -ld "$filename"
> done
find . -name core \
> -exec ls -ld '{}' \; # Execute ls for each found file
find . -name core -print0 |
> xargs -0 ls -ld # Execute ls in batches
Pipe through ssh
tar -czf - work-directory | # Pack directory to standard output
> ssh backup-server dd of=/dev/st0 bs=1M # Send data to a remote tape
ssh backup-server dd if=/dev/st0 bs=1M | # Obtain data from a remote tape
> tar -xzf - # Unpack files from standard input
tar -czf - work-directory | # Pack directory to standard output
> ssh otherhost tar -xzf - # Unpack files from standard input
bypass firewall
ssh -f -L 8389:ldap.example.com:389 shell.example.com sleep 9999
local port 8389 goes to shell.example.com, which goes to ldap.example.com:839