Linux-快捷命令整理

Introduction

快捷命令整理

方便自己,快捷他人

更是对自己学习技能的实战检验。

注意: <abspath> 指代的是需要提供对应执行脚本/命令的绝对路径;当然可以使用相对路径,或者当前执行环境中含有该命令,前提是需要提前配置好相关环境。

Getting Started

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32

## 1. sizeg, sizem, sizek
alias sizeg='sizeg(){ echo `echo "scale=4; $1/(1024^3)" | bc` Gb;};sizeg'
alias sizem='sizem(){ echo `echo "scale=4; $1/(1024^2)" | bc` Mb;};sizem'
alias sizek='sizek(){ echo `echo "scale=4; $1/1024" | bc` Kb;};sizek'
## 2. headget
alias headget='headget(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print "num","col="head[name]";"name""}NR>1{print NR,$head[name]}'\'' $2; };headget'
alias headgetcol='headgetcol(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print head[name]}'\'' $2; };headgetcol' ## 获取列d
## 3. qxml
alias qxml='echo -e "job-ID\tprior\tname\tuser\tstate\tsubmit/start\tat\tqueue\tslots ja-task-ID" && printf %.s"-" {1..100};echo "" && qstat -xml| tr "\n" " " | sed "s#<job_list[^>]*>#\n#g"| sed "s#<[^>]*>##g" | column -t | sed "s/\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\)T\([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}\)/\1 \2/g"'
## 4. dfcol
alias dfcol='dfcol(){ diff -B <(headget $1 $2 | cut -f2 | sort) <(headget $1 $3 | cut -f2 | sort);};dfcol'
## 5. gsed
alias gsed='gsed(){ re_num=0;for num in `grep -n -w "$1" $2 | cut -d ":" -f1`; do sed -n "`echo "${num}-$re_num"|bc`p" $2;printf "\033[34;1mAre You Sure? [Y/n] \033[0m" && read -r input;if [[ -z $input ]];then echo -e "\033[31m???\033[0m"; elif [[ $input == "y" || $input == "Y" ]];then echo -e "sed -i \"${num}d\" $2"; sed -i "`echo "${num}-$re_num"|bc`d" $2; re_num=`echo "$re_num+1"|bc`;elif [[ $input == "n" || $input == "N" ]];then echo -e "\033[31;43mbreak off\033[0m";fi ;done;}; gsed'
## 6. mutalyzer
alias mutalyzer='mutalyzer(){ curl -X "GET" "https://v3.mutalyzer.nl/api/normalize/$1?only_variants=false" -H "accept: application/json" | jq '\''"Input Description\n\(.input_description)\nCorrected Description\n\(.corrected_description)\nProtein Description\n\(.protein.description)\nProtein Reference\n\(.protein.reference)\nProtein Predicted\n\(.protein.predicted)"'\'' | xargs echo -e | xargs -L 2 sh -c "ref=\"Protein Reference\";alt=\"Protein Predicted\";printf %.s"-" {1..79};echo -e \"\n\$0 \$1\";if [[ \"\$0 \$1\" == \$ref || \"\$0 \$1\" == \$alt ]];then echo \"\$2\"| tr \"\n\" \" \" |sed \"s/.\{10\}/& /g\" | xargs -d \" \" -n 6 echo -e;else echo -e \"\$2\"; fi";}; mutalyzer'
## 7. csed
alias csed='csed(){ sed "s/$1/\\\\\\\033[1;31m$1\\\\\\\\033[0m/g" | xargs -L 1 sh -c "echo -e \"\$@\"|tr \" \" \"\t\"" sh;};csed'
## 8. colm
alias colm='colm() { (head -n1 $1 && grep "$2" $1) | awk -vFS="\t" -v name="$3" '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i;}NR>1{print $head[name]}'\''; };colm'
## 9. format
alias format='format() { num=$(<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1| wc -L); (<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1|sed "s/$/\n`eval $(echo "printf %.s"-" {1..$num}")`/g")|le; };format'
## 10. wcl
alias wcl='wcl() { echo "$1" | wc -L; };wcl'
## 11. ublat
alias ublat='ublat() { (<abspath>/ucscBlat.sh $1 | sh) | sed ":a;N;s/\n/ /g;ta" | sed "s#<PRE>#\n<PRE>#;s#</PRE>#</PRE>\n#" | grep "<PRE>" | sed "s#<A[^>]*>##g;s#<[^>]*>##g;s/SPAN /SPAN\n/g;s/ browser/\nbrowser/g"; };ublat'
## 12. xfm
alias xfm='xfm() { awk -F'\''\t'\'' '\''{for(i=1;i<=NF;i++){cmd="cut -f "i" "FILENAME"|wc -L";cmd|getline len;colnum[i]=len;cmdl="printf %.s\"-\" {1.."colnum[i]+2"}";cmdl|getline line;lines[i]=line;printf "+%-"colnum[i]"s",lines[i];close(cmd);close(cmdl)}; printf "+%s\n",""}{for(i=1;i<=NF;i++){printf "| %-"colnum[i]"s ",$i};printf "|%s\n",""}END{for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i]};printf "+%s\n",""}'\'' $1 | le; };xfm'
## 13 xfm 升级版
alias xfm='xfm() { awk -F'\''\t'\'' '\''{ for(i=1;i<=NF;i++){cmd="cut -f "i" "FILENAME"|wc -L";cmd|getline len;colnum[i]=len;cmdl="printf %.s\"-\" {1.."colnum[i]+2"}";cmdl|getline line;lines[i]=line;close(cmd);close(cmdl)};}{if(FNR==1){printf ">> %-"title"s ",FILENAME;printf "<<%s\n","";close(cmdt);for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i]};printf "+%s\n","";}{for(i=1;i<=NF;i++){printf "| %-"colnum[i]"s ",$i};printf "|%s\n",""}{for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i];}printf "+%s\n","";}}'\'' $* | le; };xfm'
## 14 gcrev
alias gcrev='gcrev() { for i in `echo "$1" | rev | sed "s/\([ATGCatgcnN]\)\([ATGCatgcnN]\)/\1 \2 /g"`; do case $i in A|a) printf "%s" T ;; T|t) printf "%s" A;; G|g) printf "%s" C;; C|c) printf "%s" G;; N|n) printf "%s" N;; esac; done; echo ""; }; gcrev'

1. headget:文件列数80+;想要cut截取某一列,还得打印表头数一数哪一列???headget col_name file_name;可直接返回该列信息,并告知第几列

怎么用:$ headget col_name file_name

~/.bashrc 配置文件添加:

1
2
alias headget='headget(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print "num","col="head[name]";"name""}NR>1{print NR,$head[name]}'\'' $2; };headget'
alias headgetcol='headgetcol(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print "num","col="head[name]";"name""}NR>1{print NR,$head[name]}'\'' $2 | head -n1; };headgetcol'

用法:headget/headgetcol $col_name $file

headgetcol 可返回查询列的列数而不打印该列信息;headget 打印所查询列的整列信息

2. dfcol: 当我们只需要比较两个文件中某一列的差异时(必须是两个文件的共有列),还在cut出来另存文件来diff???这里搭配前面提到的 headget 快捷命令使用,提取对应的列,再diff

怎么用:$ dfcol col_name file1 file2

~/.bashrc 配置文件添加:

1
alias dfcol='dfcol(){ diff -B <(headget $1 $2 | cut -f2 | sort) <(headget $1 $3 | cut -f2 | sort);};dfcol'

用法:dfcol $col_name $file1 $file2

3. qxml: 还在qstat查询SGE任务???name只能显示前10个字符,都不知道哪个任务是哪个样本的分析任务(当然,可以qstat -j job_id 可查询对应分析任务的详细信息,但当只需要确认某个样本是否分析,就可能需要每个qstat -j,不便捷)

怎么用:$ qxml

~/.bashrc 配置文件添加:

1
2
alias qxml='echo -e "job-ID\tprior\tname\tuser\tstate\tsubmit/start\tat\tqueue\tslots ja-task-ID" && printf %.s"-" {1..100};echo "" && qstat -xml| tr "\n" " " | sed "s#<job_list[^>]*>#\n#g"| sed "s#<[^>]*>##g" | column -t | sed "s/\([0-9]\{4\}-[0-9]    \{2\}-[0-9]\{2\}\)T\([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}\)/\1 \2/g"'
alias qxmlp='echo -e "job-ID\tprior\tname\tuser\tstate\tsubmit/start\tat\tqueue\tslots ja-task-ID\tpath" && printf %.s"-" {3..100};echo "" && qstat -xml| tr "\n" " " | sed "s#<job_list[^>]*>#\n#g"| sed "s#<[^>]*>##g" | column -t | sed "s/\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\)T\([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}\)/\1 \2/g" | awk -vOFS="\t" '\''{cmd="qstat -j "$1" | grep cwd | awk '\''\'\'''\''{print $2}'\''\'\'''\''";cmd|getline path;print $0,path}'\'''

qxmlpqxml 多了分析任务的分析路径;直接运行 qxml/qxmlp 将会返回当前账号的任务情况

4. sizeg|sizem|sizek:就简单的字节换算

怎么用:$ sizeg|sizem|sizek b_size ## 注意:输入的是字节数,返回的是Gb|Mb|Kb

~/.bashrc 配置文件添加:

1
2
3
4
5
alias sizeg='sizeg(){ echo `echo "scale=4; $1/(1024^3)" | bc` Gb;};sizeg'

alias sizem='sizem(){ echo `echo "scale=4; $1/(1024^2)" | bc` Mb;};sizem'

alias sizek='sizek(){ echo `echo "scale=4; $1/1024" | bc` Kb;};sizek'

记得source

用法:sizeg/sizem/sizek $bp

5. gsed: 交互确认是否删除所匹配的行

怎么用:$ gsed [匹配值] file ## 循环确认是否删除所打印出的行;Y/y删除,N/n保留,其他值跳过,空值亦跳过

~/.bashrc 配置文件添加:

1
alias gsed='gsed(){ re_num=0;for num in `grep -n -w "$1" $2 | cut -d ":" -f1`; do sed -n "`echo "${num}-$re_num"|bc`p" $2;printf "\033[34;1mAre You Sure? [Y/n] \033[0m" && read -r input;if [[ -z $input ]];then echo -e "\033[31m???\033[0m"; elif [[ $input == "y" || $input == "Y" ]];then echo -e "sed -i \"${num}d\" $2"; sed -i "`echo "${num}-$re_num"|bc`d" $2; re_num=`echo "$re_num+1"|bc`;elif [[ $input == "n" || $input == "N" ]];then echo -e "\033[31;43mbreak off\033[0m";fi ;done;}; gsed'

用法:gsed $match_str $file

6.mutalyzer: 返回 Mutalyzer3 数据库所查询到的信息,版本更新,提供了 API

怎么用:$ mutalyzer "trans:c_site"

~/.bashrc 配置文件添加:

1
alias mutalyzer='mutalyzer(){ curl -X "GET" "https://v3.mutalyzer.nl/api/normalize/$1?only_variants=false" -H "accept: application/json" | <abspath>/jq '\''"Input Description\n\(.input_description)\nCorrected Description\n\(.corrected_description)\nProtein Description\n\(.protein.description)\nProtein Reference\n\(.protein.reference)\nProtein Predicted\n\(.protein.predicted)"'\'' | xargs echo -e | xargs -L 2 sh -c "ref=\"Protein Reference\";alt=\"Protein Predicted\";printf %.s"-" {1..79};echo -e \"\n\$0 \$1\";if [[ \"\$0 \$1\" == \$ref || \"\$0 \$1\" == \$alt ]];then echo \"\$2\"| tr \"\n\" \" \" |sed \"s/.\{10\}/& /g\" | xargs -d \" \" -n 6 echo -e;else echo -e \"\$2\"; fi";}; mutalyzer'

用法:mutalyzer "$trans:$c_site"

7. colm: 获取匹配行的某一列信息

1
$ alias colm='colm() { (head -n1 $1 && grep "$2" $1) | awk -vFS="\t" -v name="$3" '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i;}NR>1{print $head[name]}'\''; };colm'

第一个参数:文件名 第二个参数:匹配存在该字符串的行 第三个参数:返回匹配行的某一列信息

用法:colm $file $match_str $col

8. format:对文本文件进行格式化输出

1
alias format='format() { num=$(<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1| wc -L); (<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1|sed "s/$/\n`eval $(echo "printf %.s"-" {1..$num}")`/g")|le; };format'

用法:format $file

9. wcl: 判断字符串长度

1
2
3
alias wcl='wcl() { echo "$1" | wc -L; };wcl'
## usage:
$ wcl "str"

10. Glow:终端上的markdown查看器

1
$ conda create -n Glow -c dnachun glow-md -y

11. ublat

ucsc blat 本地化

1
2
3
4
5
6
7
alias ublat='ublat() { (<abspath>/ucscBlat.sh $1 | sh) | sed ":a;N;s/\n/ /g;ta" | sed "s#<PRE>#\n<PRE>#;s#</PRE>#</PRE>\n#" | grep "<PRE>" | sed "s#<A[^>]*>##g;s#<[^>]*>##g;s/SPAN /SPAN\n/g;s/ browser/\nbrowser/g"; };ublat'
$ ublat GCACAAGCAGTGCTATGAGTGGCAGTCATCAGGACCTCAGTGTGATACAGCCAATTGTAAAAGACTGCAAAGAGGAACCAACCGGTGAGCCCTCTCCTAA
## output
ACTIONS QUERY SCORE START END QSIZE IDENTITY CHROM STRAND START END SPAN
-----------------------------------------------------------------------------------------------
browser details YourSeq 75 1 75 100 100.0% chr12 + 70189051 70189125 75
browser details YourSeq 28 73 100 100 100.0% chr12 + 66221779 66221806 28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
## ucscBlat.sh
seq=$1
echo 'curl '\''https://genome.ucsc.edu/cgi-bin/hgBlat'\'' \
-H '\''Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'\'' \
-H '\''Accept-Language: zh-CN,zh;q=0.9,en;q=0.8'\'' \
-H '\''Cache-Control: max-age=0'\'' \
-H '\''Connection: keep-alive'\'' \
-H '\''Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryN2EGAypLqBwsLg9v'\'' \
-H '\''Cookie: hguid.genome-japan=723668884_OY1HNADbTpsXcALt7NvYvTdG79Qp; _ga=GA1.2.471529949.1663819139; _ga_PWFD1NPDNM=GS1.1.1664180680.2.1.1664181280.0.0.0; hguid=1400796865_VQ96g5e3qnvra4gymb0CdJgpTiDf; _gid=GA1.2.2126454113.1676274745; _gat=1'\'' \
-H '\''Origin: https://genome.ucsc.edu'\'' \
-H '\''Referer: https://genome.ucsc.edu/cgi-bin/hgBlat'\'' \
-H '\''Sec-Fetch-Dest: document'\'' \
-H '\''Sec-Fetch-Mode: navigate'\'' \
-H '\''Sec-Fetch-Site: same-origin'\'' \
-H '\''Sec-Fetch-User: ?1'\'' \
-H '\''Upgrade-Insecure-Requests: 1'\'' \
-H '\''User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'\'' \
-H '\''sec-ch-ua: "Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"'\'' \
-H '\''sec-ch-ua-mobile: ?0'\'' \
-H '\''sec-ch-ua-platform: "Windows"'\'' \
--data $'\''------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="hgsid"\r\n\r\n1569233731_63ApO5jAfET6mwe2b4l7U8yamDV1\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="changeInfo"\r\n\r\n\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="org"\r\n\r\nHuman\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="db"\r\n\r\nhg19\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="type"\r\n\r\nBLATs guess\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="sort"\r\n\r\nquery,score\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="output"\r\n\r\nhyperlink\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="userSeq"\r\n\r\n'$seq'\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="boolshad.allResults"\r\n\r\n0\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="Submit"\r\n\r\nSubmit\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="seqFile"; filename=""\r\nContent-Type: application/octet-stream\r\n\r\n\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v--\r\n'\'' \
--compressed' #| sed ":a;N;s/\n/ /g;ta" | sed "s#<PRE>#\n<PRE>#;s#</PRE>#</PRE>\n#" | grep "<PRE>" | sed "s#<A[^>]*>##g;s#<[^>]*>##g;s/SPAN /SPAN\n/g;s/ browser/\nbrowser/g"

12. xfm

格式化输出表格型数据;即给文本文件添加类表格显示

1
2
3
4
5
6
7
8
9
10
alias xfm='xfm() { awk -F'\''\t'\'' '\''{for(i=1;i<=NF;i++){cmd="cut -f "i" "FILENAME"|wc -L";cmd|getline len;colnum[i]=len;cmdl="printf %.s\"-\" {1.."colnum[i]+2"}";cmdl|getline line;lines[i]=line;printf "+%-"colnum[i]"s",lines[i];close(cmd);close(cmdl)}; printf "+%s\n",""}{for(i=1;i<=NF;i++){printf "| %-"colnum[i]"s ",$i};printf "|%s\n",""}END{for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i]};printf "+%s\n",""}'\'' $1 | le; };xfm'
$ xfm test.xls
+----------+----------+-----------+-----------+-----------+------------+
| Chr_Left | Pos_Left | Gene_Left | Chr_Right | Pos_Right | Gene_Right |
+----------+----------+-----------+-----------+-----------+------------+
| chr2 | 42526812 | EML4 | chr2 | 29448241 | ALK |
+----------+----------+-----------+-----------+-----------+------------+
$ cat test.xls
Chr_Left Pos_Left Gene_Left Chr_Right Pos_Right Gene_Right
chr2 42526812 EML4 chr2 29448241 ALK

13. gcrev

返回序列的反向互补序列

1
2
3
$ alias gcrev='gcrev() { for i in `echo "$1" | rev | sed "s/\([ATGCatgcnN]\)\([ATGCatgcnN]\)/\1 \2 /g"`; do case $i in A|a) printf "%s" T ;; T|t) printf "%s" A;; G|g) printf "%s" C;; C|c) printf "%s" G;; N|n) printf "%s" N;; esac; done; echo ""; }; gcrev'
$ gcrev AGGCAGGAAAATCACTTGAACCCAGGAGGCAGAGATTGCAGTGAGCCGAGATCACACCACTGCACTCCAGCCAGGGCGACAGAGTGAGACTC
GAGTCTCACTCTGTCGCCCTGGCTGGAGTGCAGTGGTGTGATCTCGGCTCACTGCAATCTCTGCCTCCTGGGTTCAAGTGATTTTCCTGCCT

References

。。。 。。。