0%

python:处理excel

安装

pip install openpyxl
需要在文件中包含图片:pip install pillow

打开保存

创建工作簿并保存

1
2
3
4
5
6
7
8
9
from openpyxl import Workbook
# 创建工作簿,包含一张默认工作表
wb = Workbook()
# active默认值为0,默认的工作表
ws = wb.active
print(ws.title) #...Sheet
# 保存工作簿,参数:路径
wb.save("test.xlsx")

加载已有的工作簿

1
2
3
4
5
from openpyxl import Workbook, load_workbook
# load_workbook加载工作簿
wb = load_workbook("test.xlsx")
ws = wb.active
print(ws.title)

操作工作表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from openpyxl import Workbook, load_workbook

wb = Workbook()
ws = wb.active

# Workbook.create_sheet创建新的工作簿
ws1 = wb.create_sheet("test1") # 默认在最后生成
ws2 = wb.create_sheet("test2", 0) # 在第一个位置生成
ws3 = wb.create_sheet("test3", -1) # 在倒数第二个生成
# 工作簿自动生成名字Sheeti,还可以通过title属性命名
ws.title = "New Title"
# 标题选项卡背景默认白色,可更改颜色
ws.sheet_properties.tabColor = "1072BA"
# 名字可以作为工作表的键,获取已有的表
ws4 = wb["test1"]
# wb.sheetnames获取所有表格
print(wb.sheetnames)
# 遍历工作表
for sheet in wb:
print(sheet.title)
# 改变位置
# 参数:工作表,偏移量(原地为0,向前为负,向后为正)
wb.move_sheet(ws3, -1)
# 删除
del wb["New Title"]
# 复制工作表
cp_sheet = wb.copy_worksheet(ws1)
print(cp_sheet.title) # ...test1 Copy

访问单元格

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from openpyxl import Workbook

wb = Workbook()
ws = wb.active
# 单元格赋值
ws["a1"] = "test"
cell = ws.cell(6, 1, "test") # cell(行,列)获取对应单元格、cell(行,列,值)修改对应单元格的值
cell.value = "test1" # 单元格的value属性——>访问/修改

print(cell.value)
print(cell.coordinate) # 单元格坐标
print(cell.row) # 行
print(cell.column) # 列
print(cell.col_idx)
print(cell.column_letter)# 字母形式的列

x = 1
for i in range(1, 11): # range左闭右开,1~10
for j in range(1, 6):# 1~5
ws.cell(i, j, x) # 单元格赋值
x += 1
# a到c列
print(ws["a:c"])
'''元组套元组的形式
((<Cell 'Sheet'.A1>,....<Cell 'Sheet'.A10>),
(<Cell 'Sheet'.B1>,....<Cell 'Sheet'.B10>),
(<Cell 'Sheet'.C1>,....<Cell 'Sheet'.C10>))
'''
#1~5行
print(ws[1:5])
'''
((<Cell 'Sheet'.A1>, <Cell 'Sheet'.B1>, <Cell 'Sheet'.C1>, <Cell 'Sheet'.D1>, <Cell 'Sheet'.E1>),
......
(<Cell 'Sheet'.A5>, <Cell 'Sheet'.B5>, <Cell 'Sheet'.C5>, <Cell 'Sheet'.D5>, <Cell 'Sheet'.E5>))
'''
print(ws["a1:c4"])
'''
((<Cell 'Sheet'.A1>, <Cell 'Sheet'.B1>, <Cell 'Sheet'.C1>),
(<Cell 'Sheet'.A2>, <Cell 'Sheet'.B2>, <Cell 'Sheet'.C2>),
(<Cell 'Sheet'.A3>, <Cell 'Sheet'.B3>, <Cell 'Sheet'.C3>),
(<Cell 'Sheet'.A4>, <Cell 'Sheet'.B4>, <Cell 'Sheet'.C4>))
'''
print(ws["b"]) # 第b列
# 遍历:ws["a1:c4"]结果是元组的元组
for cells in ws["a1:c4"]:
for cell in cells:
print(cell.value)

操作单元格

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from openpyxl import Workbook

wb = Workbook()
ws = wb.active

x = 1
for i in range(1, 11): # 1~10
for j in range(1, 11):
ws.cell(i, j, x)
x += 1

# 合并:只保留左上角的值
ws.merge_cells("b2:d4")
# 取消合并
ws.unmerge_cells("b2:d4")

# 插入
ws.insert_cols(2, 3) # 从第二列开始插入三列
ws.insert_rows(1, 2) # 从第一行开始插入两行
# 删除
ws.delete_cols(2, 3) # 从第二列开始删除三列
ws.delete_rows(2, 3) # 从第二行开始删除三行

# 移动
ws.move_range("c4:d5", 2, -2) # 向下移动两行,向左移动两列,下、右:正数,上、左:负数
wb.save("test5.xlsx")

使用公式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from openpyxl import Workbook
from openpyxl.utils import FORMULAE
from openpyxl.formula.translate import Translator

wb = Workbook()
ws = wb.active

# 添加一行,参数:可迭代的对象
ws.append(["price1", "price2", "sum", "average"])
ws.append([54, 21])
ws.append([12, 45])
ws.append([45, 78])
# 直接使用公式
ws["c2"] = "=SUM(a2:b2)"
ws["d2"] = "=AVERAGE(a2:b2)"

# 翻译源单元格的行为,参数:Translator(公式,复制源).(目标单元格)
ws["c3"] = Translator(formula="=SUM(a2:b2)", origin="c2").translate_formula("c3")
ws["c4"] = Translator(formula="=SUM(a2:b2)", origin="c2").translate_formula("c4")
ws["d3"] = Translator(formula="=AVERAGE(a2:b2)", origin="d2").translate_formula("d3")
ws["d4"] = Translator(formula="=AVERAGE(a2:b2)", origin="d2").translate_formula("d4")

# 通过遍历翻译单元格的行为
for cell in ws["c3:d4"]:
cell[0].value = Translator(formula="=SUM(a2:b2)", origin="c2").translate_formula(cell[0].coordinate)

for cell in ws["d3:d4"]:
cell[0].value = Translator(formula="=AVERAGE(a2:b2)", origin="d2").translate_formula(cell[0].coordinate)

wb.save("test6.xlsx")

排序

使用pandas
pip install pandas

1
2
3
4
5
6
7
8
9
10
import pandas as pd

# 获取工作簿的工作表
df = pd.read_excel("test7.xlsx", sheet_name="Sheet")
# 参数:排序依据(先按桃子排,桃子一样再按西瓜排),升序/降序
df_value = df.sort_values(by=["桃子", "西瓜"], ascending=False)
# 保存
writer = pd.ExcelWriter("test7_sort.xlsx")
df_value.to_excel(writer, sheet_name="Sheet_sort", index=False)
writer._save()

插入图表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
from openpyxl import Workbook
from openpyxl.chart import Reference, LineChart

wb = Workbook()
ws = wb.active

rows = [
['月份', '桃子', '西瓜', '龙眼'],
[1, 23, 23, 56],
[2, 56, 56, 28],
[3, 58, 45, 74],
[4, 23, 23, 56],
[5, 56, 56, 28],
[6, 58, 45, 74],
]

for row in rows:
ws.append(row)

c1 = LineChart() # 实例化
c1.title = "水果销量折线图"
c1.style = 13
c1.y_axis.title = "销量"
c1.x_axis.title = "月份"

data = Reference(ws, min_row=1, max_row=7, min_col=2, max_col=4) # data范围
c1.add_data(data, titles_from_data=True) # true:范围表示包括title

s0 = c1.series[0]
# 样式
s0.marker.symbol = "triangle"
s0.marker.graphicalProperties.solidFill = "FF0000"
s0.marker.graphicalProperties.line.solidFill = "0000FF"

s1 = c1.series[1]
s2 = c1.series[2]
s2.smooth = True # 线条平滑
ws.add_chart(c1, "A8")
wb.save("test8.xlsx")

只读只写

把excel读到内存当中,大小是原来的50倍左右

只读模式

只读模式,如果需要读取很大的Excel文件,但是又不改变和保存,例如只读取数值用于其他数据分析,可以使用只读模式提供性能

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from openpyxl import load_workbook

# 加载Excel文件时使用read_only指定只读模式
'''
load_workbook参数说明:
定义:
def load_workbook(filename, read_only=False, keep_vba=KEEP_VBA, data_only=False, keep_links=True)
参数:
read_only:是否只读,默认False
keep_vba:是否使用VBA编程,默认False
data_only:是否只加载数据值,即丢弃公式、排序等操作,默认False
keep_links:是否保留超链接,默认True
'''
wb = load_workbook(filename='large_file.xlsx', read_only=True)
ws = wb['big_data']

# 可以正常读取值
for row in ws.rows:
for cell in row:
print(cell.value)

# 注意:读取完之后需要手动关闭避免内存泄露
wb.close()

只写模式

如果文件是以写为主,可以在创建工作簿的时候指定为只写模式以便提高性能,不管文件有多大,都可以把内存保持在10M以下

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
from openpyxl import Workbook
from openpyxl.cell import WriteOnlyCell
from openpyxl.comments import Comment
from openpyxl.styles import Font

wb = Workbook(write_only=True) # 创建工作簿时指定只写模式
ws = wb.create_sheet() # 需要通过create_sheet创建一个sheet

# 可以正常保存数据
for _ in range(100):
ws.append([i for i in range(200)]) # 只能通过append写

# 如果需要保留公式、注释等操作,可以使用WriteOnlyCell
cell = WriteOnlyCell(ws, value="test")
cell.font = Font(name='黑体', size=15)
cell.comment = Comment(text="这是注释", author="pan")

ws.append([cell])

wb.save('openpyxl/test.xlsx')

只写模式注意点:

  1. 需要通过create_sheet()创建表
  2. 只能通过append()增加数据,不能通过cell或iter_rows()
  3. wb.save()之后不能再修改,否则抛出WorkbookAlreadySaved异常