Introduction
快捷命令整理
方便自己,快捷他人
更是对自己学习技能的实战检验。
注意: <abspath>
指代的是需要提供对应执行脚本/命令的绝对路径;当然可以使用相对路径,或者当前执行环境中含有该命令,前提是需要提前配置好相关环境。
Getting Started
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
| # alias sizeg='sizeg(){ echo `echo "scale=4; $1/(1024^3)" | bc` Gb;};sizeg' alias sizem='sizem(){ echo `echo "scale=4; $1/(1024^2)" | bc` Mb;};sizem' alias sizek='sizek(){ echo `echo "scale=4; $1/1024" | bc` Kb;};sizek' # alias headget='headget(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print "num","col="head[name]";"name""}NR>1{print NR,$head[name]}'\'' $2; };headget' alias headgetcol='headgetcol(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print head[name]}'\'' $2; };headgetcol' ## 获取列d # alias qxml='echo -e "job-ID\tprior\tname\tuser\tstate\tsubmit/start\tat\tqueue\tslots ja-task-ID" && printf %.s"-" {1..100};echo "" && qstat -xml| tr "\n" " " | sed "s#<job_list[^>]*>#\n#g"| sed "s#<[^>]*>##g" | column -t | sed "s/\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\)T\([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}\)/\1 \2/g"' # alias dfcol='dfcol(){ diff -B <(headget $1 $2 | cut -f2 | sort) <(headget $1 $3 | cut -f2 | sort);};dfcol' # alias gsed='gsed(){ re_num=0;for num in `grep -n -w "$1" $2 | cut -d ":" -f1`; do sed -n "`echo "${num}-$re_num"|bc`p" $2;printf "\033[34;1mAre You Sure? [Y/n] \033[0m" && read -r input;if [[ -z $input ]];then echo -e "\033[31m???\033[0m"; elif [[ $input == "y" || $input == "Y" ]];then echo -e "sed -i \"${num}d\" $2"; sed -i "`echo "${num}-$re_num"|bc`d" $2; re_num=`echo "$re_num+1"|bc`;elif [[ $input == "n" || $input == "N" ]];then echo -e "\033[31;43mbreak off\033[0m";fi ;done;}; gsed' # alias mutalyzer='mutalyzer(){ curl -X "GET" "https://v3.mutalyzer.nl/api/normalize/$1?only_variants=false" -H "accept: application/json" | jq '\''"Input Description\n\(.input_description)\nCorrected Description\n\(.corrected_description)\nProtein Description\n\(.protein.description)\nProtein Reference\n\(.protein.reference)\nProtein Predicted\n\(.protein.predicted)"'\'' | xargs echo -e | xargs -L 2 sh -c "ref=\"Protein Reference\";alt=\"Protein Predicted\";printf %.s"-" {1..79};echo -e \"\n\$0 \$1\";if [[ \"\$0 \$1\" == \$ref || \"\$0 \$1\" == \$alt ]];then echo \"\$2\"| tr \"\n\" \" \" |sed \"s/.\{10\}/& /g\" | xargs -d \" \" -n 6 echo -e;else echo -e \"\$2\"; fi";}; mutalyzer' # alias csed='csed(){ sed "s/$1/\\\\\\\033[1;31m$1\\\\\\\\033[0m/g" | xargs -L 1 sh -c "echo -e \"\$@\"|tr \" \" \"\t\"" sh;};csed' # alias colm='colm() { (head -n1 $1 && grep "$2" $1) | awk -vFS="\t" -v name="$3" '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i;}NR>1{print $head[name]}'\''; };colm' # alias format='format() { num=$(<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1| wc -L); (<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1|sed "s/$/\n`eval $(echo "printf %.s"-" {1..$num}")`/g")|le; };format' # alias wcl='wcl() { echo "$1" | wc -L; };wcl' # alias ublat='ublat() { (<abspath>/ucscBlat.sh $1 | sh) | sed ":a;N;s/\n/ /g;ta" | sed "s#<PRE>#\n<PRE>#;s#</PRE>#</PRE>\n#" | grep "<PRE>" | sed "s#<A[^>]*>##g;s#<[^>]*>##g;s/SPAN /SPAN\n/g;s/ browser/\nbrowser/g"; };ublat' # alias xfm='xfm() { awk -F'\''\t'\'' '\''{for(i=1;i<=NF;i++){cmd="cut -f "i" "FILENAME"|wc -L";cmd|getline len;colnum[i]=len;cmdl="printf %.s\"-\" {1.."colnum[i]+2"}";cmdl|getline line;lines[i]=line;printf "+%-"colnum[i]"s",lines[i];close(cmd);close(cmdl)}; printf "+%s\n",""}{for(i=1;i<=NF;i++){printf "| %-"colnum[i]"s ",$i};printf "|%s\n",""}END{for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i]};printf "+%s\n",""}'\'' $1 | le; };xfm' # alias xfm='xfm() { awk -F'\''\t'\'' '\''{ for(i=1;i<=NF;i++){cmd="cut -f "i" "FILENAME"|wc -L";cmd|getline len;colnum[i]=len;cmdl="printf %.s\"-\" {1.."colnum[i]+2"}";cmdl|getline line;lines[i]=line;close(cmd);close(cmdl)};}{if(FNR==1){printf ">> %-"title"s ",FILENAME;printf "<<%s\n","";close(cmdt);for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i]};printf "+%s\n","";}{for(i=1;i<=NF;i++){printf "| %-"colnum[i]"s ",$i};printf "|%s\n",""}{for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i];}printf "+%s\n","";}}'\'' $* | le; };xfm' # alias gcrev='gcrev() { for i in `echo "$1" | rev | sed "s/\([ATGCatgcnN]\)\([ATGCatgcnN]\)/\1 \2 /g"`; do case $i in A|a) printf "%s" T ;; T|t) printf "%s" A;; G|g) printf "%s" C;; C|c) printf "%s" G;; N|n) printf "%s" N;; esac; done; echo ""; }; gcrev'
|
1.
headget:文件列数80+;想要cut截取某一列,还得打印表头数一数哪一列???headget
col_name file_name;可直接返回该列信息,并告知第几列
怎么用:$ headget col_name file_name
~/.bashrc 配置文件添加:
1 2
| alias headget='headget(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print "num","col="head[name]";"name""}NR>1{print NR,$head[name]}'\'' $2; };headget' alias headgetcol='headgetcol(){ awk -vFS="\t" -vOFS="\t" -v name=$1 '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i; print "num","col="head[name]";"name""}NR>1{print NR,$head[name]}'\'' $2 | head -n1; };headgetcol'
|
用法:headget/headgetcol $col_name $file
headgetcol
可返回查询列的列数而不打印该列信息;headget
打印所查询列的整列信息
2.
dfcol:
当我们只需要比较两个文件中某一列的差异时(必须是两个文件的共有列),还在cut出来另存文件来diff???这里搭配前面提到的
headget
快捷命令使用,提取对应的列,再diff
怎么用:$ dfcol col_name file1 file2
~/.bashrc 配置文件添加:
1
| alias dfcol='dfcol(){ diff -B <(headget $1 $2 | cut -f2 | sort) <(headget $1 $3 | cut -f2 | sort);};dfcol'
|
用法:dfcol $col_name $file1 $file2
3.
qxml:
还在qstat查询SGE任务???name只能显示前10个字符,都不知道哪个任务是哪个样本的分析任务(当然,可以qstat
-j job_id
可查询对应分析任务的详细信息,但当只需要确认某个样本是否分析,就可能需要每个qstat
-j,不便捷)
怎么用:$ qxml
~/.bashrc 配置文件添加:
1 2
| alias qxml='echo -e "job-ID\tprior\tname\tuser\tstate\tsubmit/start\tat\tqueue\tslots ja-task-ID" && printf %.s"-" {1..100};echo "" && qstat -xml| tr "\n" " " | sed "s#<job_list[^>]*>#\n#g"| sed "s#<[^>]*>##g" | column -t | sed "s/\([0-9]\{4\}-[0-9] \{2\}-[0-9]\{2\}\)T\([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}\)/\1 \2/g"' alias qxmlp='echo -e "job-ID\tprior\tname\tuser\tstate\tsubmit/start\tat\tqueue\tslots ja-task-ID\tpath" && printf %.s"-" {3..100};echo "" && qstat -xml| tr "\n" " " | sed "s#<job_list[^>]*>#\n#g"| sed "s#<[^>]*>##g" | column -t | sed "s/\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\)T\([0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}\)/\1 \2/g" | awk -vOFS="\t" '\''{cmd="qstat -j "$1" | grep cwd | awk '\''\'\'''\''{print $2}'\''\'\'''\''";cmd|getline path;print $0,path}'\'''
|
qxmlp
较 qxml
多了分析任务的分析路径;直接运行 qxml/qxmlp
将会返回当前账号的任务情况
4.
sizeg|sizem|sizek:就简单的字节换算
怎么用:$ sizeg|sizem|sizek b_size
##
注意:输入的是字节数,返回的是Gb|Mb|Kb
~/.bashrc 配置文件添加:
1 2 3 4 5
| alias sizeg='sizeg(){ echo `echo "scale=4; $1/(1024^3)" | bc` Gb;};sizeg'
alias sizem='sizem(){ echo `echo "scale=4; $1/(1024^2)" | bc` Mb;};sizem'
alias sizek='sizek(){ echo `echo "scale=4; $1/1024" | bc` Kb;};sizek'
|
记得source
用法:sizeg/sizem/sizek $bp
5. gsed:
交互确认是否删除所匹配的行
怎么用:$ gsed [匹配值] file
##
循环确认是否删除所打印出的行;Y/y删除,N/n保留,其他值跳过,空值亦跳过
~/.bashrc 配置文件添加:
1
| alias gsed='gsed(){ re_num=0;for num in `grep -n -w "$1" $2 | cut -d ":" -f1`; do sed -n "`echo "${num}-$re_num"|bc`p" $2;printf "\033[34;1mAre You Sure? [Y/n] \033[0m" && read -r input;if [[ -z $input ]];then echo -e "\033[31m???\033[0m"; elif [[ $input == "y" || $input == "Y" ]];then echo -e "sed -i \"${num}d\" $2"; sed -i "`echo "${num}-$re_num"|bc`d" $2; re_num=`echo "$re_num+1"|bc`;elif [[ $input == "n" || $input == "N" ]];then echo -e "\033[31;43mbreak off\033[0m";fi ;done;}; gsed'
|
用法:gsed $match_str $file
6.mutalyzer:
返回 Mutalyzer3 数据库所查询到的信息,版本更新,提供了 API
怎么用:$ mutalyzer "trans:c_site"
~/.bashrc 配置文件添加:
1
| alias mutalyzer='mutalyzer(){ curl -X "GET" "https://v3.mutalyzer.nl/api/normalize/$1?only_variants=false" -H "accept: application/json" | <abspath>/jq '\''"Input Description\n\(.input_description)\nCorrected Description\n\(.corrected_description)\nProtein Description\n\(.protein.description)\nProtein Reference\n\(.protein.reference)\nProtein Predicted\n\(.protein.predicted)"'\'' | xargs echo -e | xargs -L 2 sh -c "ref=\"Protein Reference\";alt=\"Protein Predicted\";printf %.s"-" {1..79};echo -e \"\n\$0 \$1\";if [[ \"\$0 \$1\" == \$ref || \"\$0 \$1\" == \$alt ]];then echo \"\$2\"| tr \"\n\" \" \" |sed \"s/.\{10\}/& /g\" | xargs -d \" \" -n 6 echo -e;else echo -e \"\$2\"; fi";}; mutalyzer'
|
用法:mutalyzer "$trans:$c_site"
7. colm:
获取匹配行的某一列信息
1
| $ alias colm='colm() { (head -n1 $1 && grep "$2" $1) | awk -vFS="\t" -v name="$3" '\''NR==1{for(i=1;i<=NF;i++) head[$i]=i;}NR>1{print $head[name]}'\''; };colm'
|
第一个参数:文件名 第二个参数:匹配存在该字符串的行
第三个参数:返回匹配行的某一列信息
用法:colm $file $match_str $col
1
| alias format='format() { num=$(<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1| wc -L); (<abspath>/column -s$'\''\t'\'' -t -o "|" -x $1|sed "s/$/\n`eval $(echo "printf %.s"-" {1..$num}")`/g")|le; };format'
|
用法:format $file
9. wcl: 判断字符串长度
1 2 3
| alias wcl='wcl() { echo "$1" | wc -L; };wcl' # $ wcl "str"
|
10.
Glow:终端上的markdown查看器
1
| $ conda create -n Glow -c dnachun glow-md -y
|
11. ublat
ucsc blat 本地化
1 2 3 4 5 6 7
| alias ublat='ublat() { (<abspath>/ucscBlat.sh $1 | sh) | sed ":a;N;s/\n/ /g;ta" | sed "s#<PRE>#\n<PRE>#;s#</PRE>#</PRE>\n#" | grep "<PRE>" | sed "s#<A[^>]*>##g;s#<[^>]*>##g;s/SPAN /SPAN\n/g;s/ browser/\nbrowser/g"; };ublat' $ ublat GCACAAGCAGTGCTATGAGTGGCAGTCATCAGGACCTCAGTGTGATACAGCCAATTGTAAAAGACTGCAAAGAGGAACCAACCGGTGAGCCCTCTCCTAA # ACTIONS QUERY SCORE START END QSIZE IDENTITY CHROM STRAND START END SPAN ----------------------------------------------------------------------------------------------- browser details YourSeq 75 1 75 100 100.0% chr12 + 70189051 70189125 75 browser details YourSeq 28 73 100 100 100.0% chr12 + 66221779 66221806 28
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
| # seq=$1 echo 'curl '\''https://genome.ucsc.edu/cgi-bin/hgBlat'\'' \ -H '\''Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7'\'' \ -H '\''Accept-Language: zh-CN,zh;q=0.9,en;q=0.8'\'' \ -H '\''Cache-Control: max-age=0'\'' \ -H '\''Connection: keep-alive'\'' \ -H '\''Content-Type: multipart/form-data; boundary=----WebKitFormBoundaryN2EGAypLqBwsLg9v'\'' \ -H '\''Cookie: hguid.genome-japan=723668884_OY1HNADbTpsXcALt7NvYvTdG79Qp; _ga=GA1.2.471529949.1663819139; _ga_PWFD1NPDNM=GS1.1.1664180680.2.1.1664181280.0.0.0; hguid=1400796865_VQ96g5e3qnvra4gymb0CdJgpTiDf; _gid=GA1.2.2126454113.1676274745; _gat=1'\'' \ -H '\''Origin: https://genome.ucsc.edu'\'' \ -H '\''Referer: https://genome.ucsc.edu/cgi-bin/hgBlat'\'' \ -H '\''Sec-Fetch-Dest: document'\'' \ -H '\''Sec-Fetch-Mode: navigate'\'' \ -H '\''Sec-Fetch-Site: same-origin'\'' \ -H '\''Sec-Fetch-User: ?1'\'' \ -H '\''Upgrade-Insecure-Requests: 1'\'' \ -H '\''User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36'\'' \ -H '\''sec-ch-ua: "Chromium";v="110", "Not A(Brand";v="24", "Google Chrome";v="110"'\'' \ -H '\''sec-ch-ua-mobile: ?0'\'' \ -H '\''sec-ch-ua-platform: "Windows"'\'' \ --data $'\''------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="hgsid"\r\n\r\n1569233731_63ApO5jAfET6mwe2b4l7U8yamDV1\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="changeInfo"\r\n\r\n\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="org"\r\n\r\nHuman\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="db"\r\n\r\nhg19\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="type"\r\n\r\nBLATs guess\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="sort"\r\n\r\nquery,score\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="output"\r\n\r\nhyperlink\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="userSeq"\r\n\r\n'$seq'\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="boolshad.allResults"\r\n\r\n0\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="Submit"\r\n\r\nSubmit\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v\r\nContent-Disposition: form-data; name="seqFile"; filename=""\r\nContent-Type: application/octet-stream\r\n\r\n\r\n------WebKitFormBoundaryN2EGAypLqBwsLg9v--\r\n'\'' \ --compressed' #| sed ":a;N;s/\n/ /g;ta" | sed "s#<PRE>#\n<PRE>#;s#</PRE>#</PRE>\n#" | grep "<PRE>" | sed "s#<A[^>]*>##g;s#<[^>]*>##g;s/SPAN /SPAN\n/g;s/ browser/\nbrowser/g"
|
12. xfm
格式化输出表格型数据;即给文本文件添加类表格显示
1 2 3 4 5 6 7 8 9 10
| alias xfm='xfm() { awk -F'\''\t'\'' '\''{for(i=1;i<=NF;i++){cmd="cut -f "i" "FILENAME"|wc -L";cmd|getline len;colnum[i]=len;cmdl="printf %.s\"-\" {1.."colnum[i]+2"}";cmdl|getline line;lines[i]=line;printf "+%-"colnum[i]"s",lines[i];close(cmd);close(cmdl)}; printf "+%s\n",""}{for(i=1;i<=NF;i++){printf "| %-"colnum[i]"s ",$i};printf "|%s\n",""}END{for(i=1;i<=NF;i++){printf "+%-"colnum[i]"s",lines[i]};printf "+%s\n",""}'\'' $1 | le; };xfm' $ xfm test.xls +----------+----------+-----------+-----------+-----------+------------+ | Chr_Left | Pos_Left | Gene_Left | Chr_Right | Pos_Right | Gene_Right | +----------+----------+-----------+-----------+-----------+------------+ | chr2 | 42526812 | EML4 | chr2 | 29448241 | ALK | +----------+----------+-----------+-----------+-----------+------------+ $ cat test.xls Chr_Left Pos_Left Gene_Left Chr_Right Pos_Right Gene_Right chr2 42526812 EML4 chr2 29448241 ALK
|
13. gcrev
返回序列的反向互补序列
1 2 3
| $ alias gcrev='gcrev() { for i in `echo "$1" | rev | sed "s/\([ATGCatgcnN]\)\([ATGCatgcnN]\)/\1 \2 /g"`; do case $i in A|a) printf "%s" T ;; T|t) printf "%s" A;; G|g) printf "%s" C;; C|c) printf "%s" G;; N|n) printf "%s" N;; esac; done; echo ""; }; gcrev' $ gcrev AGGCAGGAAAATCACTTGAACCCAGGAGGCAGAGATTGCAGTGAGCCGAGATCACACCACTGCACTCCAGCCAGGGCGACAGAGTGAGACTC GAGTCTCACTCTGTCGCCCTGGCTGGAGTGCAGTGGTGTGATCTCGGCTCACTGCAATCTCTGCCTCCTGGGTTCAAGTGATTTTCCTGCCT
|
References
。。。 。。。