Python3 代码编写习惯 - 2

发表于 2023-07-30 分类于 Learn ， Python3 阅读次数：本文字数： 11k 阅读时长 ≈ 10 分钟

Introduction

记录个人 Python 编程的一些小习惯。

Getting Started

主要记录关于编程语言 python 的一些快捷的代码编写习惯，方便后期查询，这里做个记录

关于缓存的理解：

通过缓存机制，可以减少磁盘读写的次数，提高并发处理程序的效率
缓存是一种提供任务存储和处理效率的有效方法
变量定义可以认为是一种临时缓存的方式，调用变量便可在缓存空间中找到对应的变量值；而无需重新计算，省去了交换的过程。

1. lambda --- 匿名函数

场景：现在需要生成一个以某个 list 为键，值均为空 list 即 [] 的字典

方案：

for 循环

lambda 匿名函数

### 利用 for 循环
list_test = list(...)
dict_list = dict([(key, []) for key in list_test])

### 利用 lambda 函数
dict_list = dict(map(lambda key: (key, []), list_test))

2. 判断字典是否含有某个键

1	dict.__contains__(ksy) ## 若有，则返回 True；反之，返回 False

3. 判断某个列表是否为另一个列表的子列表

1
2
3

ls1 = list(...)
ls2 = list(...)
set(ls1).issubset(set(ls2))  ## 返回 bool 值，利用集合的包含原理

4. 合并 list

## 直接相加
ls1 = list(...)
ls2 = list(...)
ls3 = ls1 + ls2
## extend()
ls3 = ls1.extend(ls2)
## 使用切片
ls2[len(ls2):len(ls2)] = ls1 ## 在列表 ls2 的最后插入列表 ls1

5. 字典初始化，或追加

dict().setdefault(name,default).append(which)

>>> dic = {}
>>> dic.setdefault('0',[]).append(1)
>>> dic
{'0': [1]}
>>> dic.setdefault('0',[]).append(2)
>>> dic
{'0': [1, 2]}

6. 一个读取文件的类

将类进行实例化，便能获得对应文件的字典，但是这个字典是“字典嵌套字典”；并设置获取文件行数、迭代器、原始字典、某行某列的值、某一行的整行信息。

且可以为无表头的文件指定list作为key

class Read_table():
	'''
		用于处理文本文件进行dict化产生的字典
		输入：文本文件
	'''
	def __init__(self, file, title=None):
		'''读取文件为字典'''
		with open(file, 'r') as infile:
			intitle = infile.readline().strip().split('\t') if title == None else title
			dic_list = [{} for i in range(len(intitle))]
			count = 0
			for line in infile.readlines():
				line = line.strip().split('\t')
				for i in range(len(line)):
					dic_list[i][count] = line[i]
				count += 1
		self.dic = dict(zip(intitle, dic_list))
	def getdict(self):
		'''返回原始字典'''
		return self.dic
	def getlen(self):
		'''获取字典元素的长度'''
		return len(self.dic[list(self.dic.keys())[0]])
	def getcol(self, str, match):
		'''对某一列进行模糊匹配，并返回满足条件的行'''
		import re
		return list(filter(lambda i: re.findall(r'%s'%match, self.getvalue(str, i)), list(self.getiter())))
	def getiter(self):
		'''创建可循环的迭代器'''
		dic_len = self.getlen()
		return iter(range(dic_len))
	def getvalue(self, key, line):
		'''获取字典的值'''
		value = str(self.dic[key][line]) if key in self.dic else '-'
		return value
	def getline(self, line):
		'''获取整行信息'''
		line_list = [str(self.dic[key][line]) for key in self.dic.keys()]
		return line_list
## 上面的文件读取用到了两个for循环，这是一个很不好的习惯
## 改进如下：
class Read_table():
	'''
		用于处理文本文件进行dict化产生的字典
		输入：文本文件
	'''
	def __init__(self, file, title=None):
		'''读取文件为字典'''
		with open(file, 'r') as infile:
			intitle = infile.readline().strip().split('\t') if title == None else title
			lines = list(zip(*[line.strip().split('\t') for line in infile.readlines()]))
			self.dic = dict(zip(intitle, [dict(zip(range(len(lines[0])),i)) for i in lines]))	
	def getdict(self):
		'''返回原始字典'''
		return self.dic
	def getlen(self):
		'''获取字典元素的长度'''
		return len(self.dic[list(self.dic.keys())[0]])
	def getcol(self, str, match):
		'''对某一列进行模糊匹配，并返回满足条件的行'''
		import re
		return list(filter(lambda i: re.findall(r'%s'%match, self.getvalue(str, i)), list(self.getiter())))
	def getiter(self):
		'''创建可循环的迭代器'''
		dic_len = self.getlen()
		return iter(range(dic_len))
	def getvalue(self, key, line):
		'''获取字典的值'''
		value = str(self.dic[key][line]) if key in self.dic else '-'
		return value
	def getline(self, line):
		'''获取整行信息'''
		line_list = [str(self.dic[key][line]) for key in self.dic.keys()]
		return line_list
## 其实，仔细一看，发现其中还是利用了两个for循环，只不过是在两个不同的列表推导式中
## 那么，for既然这么碍眼，那我们就让它消失；于是 map() 来了
## 如下：
class Read_table():
	'''
		用于处理文本文件进行dict化产生的字典
		输入：文本文件
	'''
	def __init__(self, file, title=None):
		'''读取文件为字典'''
		with open(file, 'r') as infile:
			intitle = infile.readline().strip().split('\t') if title == None else title
			lines = list(zip(*map(lambda line: line.strip().split('\t'),infile.readlines())))
			self.dic = dict(zip(intitle, map(lambda line: dict(zip(range(len(lines[0])), line)), lines)))
	def getdict(self):
		'''返回原始字典'''
		return self.dic
	def getlen(self):
		'''获取字典元素的长度'''
		return len(self.dic[list(self.dic.keys())[0]])
	def getcol(self, str, match):
		'''对某一列进行模糊匹配，并返回满足条件的行'''
		import re
		return list(filter(lambda i: re.findall(r'%s'%match, self.getvalue(str, i)), list(self.getiter())))
	def getiter(self):
		'''创建可循环的迭代器'''
		dic_len = self.getlen()
		return iter(range(dic_len))
	def getvalue(self, key, line):
		'''获取字典的值'''
		value = str(self.dic[key][line]) if key in self.dic else '-'
		return value
	def getline(self, line):
		'''获取整行信息'''
		line_list = [str(self.dic[key][line]) for key in self.dic.keys()]
		return line_list

## 添加 gz 文件支持
class Read_table():
	'''
		用于处理文本文件进行dict化产生的字典
		输入：文本文件
	'''
	def __init__(self, file, title=None, skip=None):
		'''读取文件为字典'''
		import os
		filetype = os.path.splitext(file)[-1]
		if filetype == '.gz':
			import gzip
			gzInFile = gzip.open(file, 'rb')
			intitle = gzInFile.readline().decode().strip('\n').split('\t') if title == None else title
			if skip != None:
				if gzInFile.readline().startswith(skip): next
			lines = list(zip(*map(lambda line: line.decode().strip('\n').split('\t'),gzInFile.readlines())))
			self.dic = dict(zip(intitle, map(lambda line: dict(zip(range(len(lines[0])), line)), lines)))
			gzInFile.close()
		else:
			with open(file, 'r') as infile:
				intitle = infile.readline().strip('\n').split('\t') if title == None else title
				if skip != None: 
					if infile.readline().startswith(skip): next
				lines = list(zip(*map(lambda line: line.strip('\n').split('\t'),infile.readlines())))
				self.dic = dict(zip(intitle, map(lambda line: dict(zip(range(len(lines[0])), line)), lines)))
		self.title = intitle
	def getdict(self):
		'''返回原始字典'''
		return self.dic
	def getTitleList(self):
		'''返回表头'''
		return self.title
	def getlen(self):
		'''获取字典元素的长度, 即行数'''
		return len(self.dic[list(self.dic.keys())[0]]) if len(self.dic.keys()) != 0 else 0
	def getcol(self, str, match):
		'''对某一列进行模糊匹配，并返回满足条件的行'''
		import re
		return list(filter(lambda i: re.findall(r'%s'%match, self.getvalue(str, i)), list(self.getiter())))
	def getiter(self):
		'''创建可循环的迭代器'''
		dic_len = self.getlen()
		return iter(range(dic_len))
	def getvalue(self, key, line):
		'''获取字典的值'''
		value = str(self.dic[key][line]) if key in self.dic else '-'
		return value
	def getValueList(self, keyList, line):
		'''获取提供list的值'''
		valueList = [str(self.dic[key][line]) if key in self.dic else '-' for key in keyList]
		return valueList
	def getline(self, line):
		'''获取整行信息'''
		line_list = [str(self.dic[key][line]) for key in self.dic.keys()]
		return line_list

7. 几种 for 循环的替代

这些内置函数虽然简洁了代码结构，同时也降低了代码的可读性；况且，python的开发者也提倡减少内置函数的使用；需要注意的是，有些针对list的内置函数完全可以采用列表推导式代替。

7.1 lambda + map

利用“映射”(mapping)将提前设置好的匿名函数覆盖至列表中的所有元素；即循环

>>> func = lambda x: x*2
>>> list(map(func,range(10)))
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
## or
>>> list(map(lambda x: x*2, range(10)))
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]
## or 列表推导式
>>> [x*2 for x in range(10)]
[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

需要强调一点的是：map() 方法返回的迭代器(map对象)，所以若想获得具体的值，需要通过list()进行列表化

列表推导式：map(F, S) = [F(x) for x in S]

7.2 filter()

对列表进行过滤

## 返回的同样是 迭代器
>>> filter(lambda x: x%2==0, range(10))
<filter object at 0x7fd3952ace10>
## 转换成 list
>>> list(filter(lambda x: x%2==0, range(10)))
[0, 2, 4, 6, 8]

列表推导式：filter(P, S) = [x for x in S if P(x)]

7.3 reduce()

将函数function应用于迭代对象的前两个元素，得到的结果再与下一个元素进行function运算

## 需要注意的是，reduce()函数在python3中被移除，被放在了 functools 包里
>>> from functools import reduce
>>> reduce(lambda x,y: x+y,range(10))
45

7.4 any() & all()

用于对list进行条件判断，并返回布尔值

any(): 只要有一个元素满足条件即 True

all(): 只要有一个元素不满足条件即 False

>>> a
[0, 1, 2]
## any(P(x) for x in S)
>>> any(x!=0 for x in a)
True
## all(P(y) for y in S)
>>> all(x!=0 for x in a)
False

7.5 参考链接

[1] https://www.artima.com/weblogs/viewpost.jsp?thread=98196

8. 有趣的一行代码

8.1 变量赋值

当变量名少于需要赋值的变量时，报错，但是添加一个*便能将多余的变量以list的形式赋值给最后一个变量

## ERROR
>>> a,b,c = range(10)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
ValueError: too many values to unpack (expected 3)
## PASS
>>> a,b,*c = range(10)
>>> a,b,c
(0, 1, [2, 3, 4, 5, 6, 7, 8, 9])
## 即： c = [2, 3, 4, 5, 6, 7, 8, 9]

8.2 for & if ... else

对某个list中满足某些条件的元素进行某种操作；即列表推导式

## 偶数
>>> [x for x in range(10) if x%2==0]
[0, 2, 4, 6, 8]
## 当然可以对获得的元素进行一定的计算
>>> [x*2 for x in range(10) if x%2==0]
[0, 4, 8, 12, 16]
## 打印从 1 到 20 的数；3 的倍数打印 'Fizz'；5 的倍数打印 'Buzz'；同时是 3 和 5 的倍数，打印 'FizzBuzz'；否则打印数字
>>> ['FizzBuzz' if i%3==0 and i%5==0 else 'Fizz' if i%3==0 else 'Buzz' if i%5==0 else i for i in range(1,20)]
[1, 2, 'Fizz', 4, 'Buzz', 'Fizz', 7, 8, 'Fizz', 'Buzz', 11, 'Fizz', 13, 14, 'FizzBuzz', 16, 17, 'Fizz', 19]

8.3 list 反向

>>> list(range(10))
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
>>> list(range(10))[::-1]
[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

9. 生成器(`generator`)与迭代器(`iterate`)的使用

待定...

10. 笛卡尔积

集合 X 与集合 Y 的笛卡尔积等于所有可能的有序对组成的集合；

实现方式：双for循环、列表推导式、内置模块itertools

## 初始化两个 list
>>> a=list(range(3))
>>> a
[0, 1, 2]
>>> b=list(range(3,6))
>>> b
[3, 4, 5]
## 双 for 循环
>>> p1=[]
>>> for i in a:
...     for j in b:
...             p1.append([i,j])
... 
>>> p1
[[0, 3], [0, 4], [0, 5], [1, 3], [1, 4], [1, 5], [2, 3], [2, 4], [2, 5]]
>>> len(p1)
9
## 列表推导式
>>> p2=[[i,j] for i in a for j in b]
>>> p2
[[0, 3], [0, 4], [0, 5], [1, 3], [1, 4], [1, 5], [2, 3], [2, 4], [2, 5]]
## 内置模块
>>> from itertools import product
>>> product(a,b)
<itertools.product object at 0x7f93fc15f580>
>>> list(product(a,b))
[(0, 3), (0, 4), (0, 5), (1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5)]

虽然三种方法都可以实现两个list的循环，并输出对应有序对的组合；但就效率而言，两个for循环无疑是最差的；而列表推导式调用的是python的底层C代码执行；简洁还属内置模块，但如不知道存在该模块该方法，将很难知道还能这么用，所以这也将缺少一定的可读性。整体而言，各有利弊，但推荐列表推导式。

其中 itertools 模块是 python 的内置模块，主要是用来操作迭代器的一个模块，包含的函数都是能够创建迭代器来用于for循环或者next()

11. Glob

glob 模块用来查找文件目录和文件，并将搜索到的结果返回到一个列表中

常用方法：

glob.glob(): 支持通配符 *?[]，搜索满足条件的所有文件或目录，并返回一个列表
glob.iglob(): 获取一个可遍历的对象，为一迭代器，一次只能返回一个匹配路径

==注意：==

glob 默认不匹配以点符号开始的文件。

References

。。。。。。