Python|实现招行回单自动下载

关于selenium库的应用我在《Python|selenium实现自动点赞收藏(进阶版)》中有写过,因为selenium对XPATH有很好的支持,所以对自动化运维/爬虫的场景支持比较稳定。
根据工作需要,我需要每天下载招行的回单,以便进行后续处理,所以这次就用selenium实现招行回单的自动下载。

总结

通过代码先遍历下载过去一年的清单,之后只需要通过定时任务每天晚上执行一次获取当日清单即可。下载完PDF之后,每月统一进行一次PDF的处理即可(PDF的处理参见:《Python|PDF内容识别并按规则拆分》)
回单下载部分实测发现:

  • 招行生成PDF较慢,处理过程需要进行较长时间的time.sleep()。
  • 因为账户较多,一次性生成一定区间的回单容易报错,只能逐日生成PDF。
  • 在处理历史回单下载时,因为回单下载界面的日期区间不便进行XPATH定位(其实可以定位的,在后面的招行对账单下载部分实现了自动输入👻👻👻),所以通过os.system修改计算机时间,从而实现回单下载界面自动填充为目标时间。但这样又有几个小问题:
    • 下载一次就要重新登录一次,比较繁琐。
    • 浏览器容易提示时钟错误,从而打不开登录页(偶发问题,出现过几次)。

下载招行回单代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
'''
下载招行回单(按月下载),每天执行一次,下载昨天的回单。
'''
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
import time
import datetime
import os
from loguru import logger

# 日志记录
def fileLog():
FileLog= 'cmb.log'
logger.add(FileLog,rotation='500MB',encoding='utf-8',enqueue=True)

# 执行下载
def queryData2():
logger.info('点击银行回执批量导出')
driver.find_element(By.XPATH,'//div[text()="银行回执批量导出"]').click()
time.sleep(2)
# driver.find_element(By.XPATH,r'//div[@class="ccspSInput el-input el-input--small noBorder"]').click() # 打开项目下拉菜单
driver.find_element(By.XPATH,'//div[@class="yhhzDia"]/div/div/div[2]/div/div[2]/div[2]/div/div/div/div/input').click()
time.sleep(2)
driver.find_element(By.XPATH,'//div[@class="checkAllBox"]/span[1]/label/span[1]').click() # 全选
time.sleep(5)
driver.find_element(By.XPATH,'//span[text()="确定"]').click() # 确定
time.sleep(2)
logger.info('全选所有项目成功')
# driver.find_element(By.XPATH,'//div[@class="yhhzDia"]/div/div/div[2]/div/div[3]/div[2]/div/i[@class="el-input__icon el-range__icon el-icon-date"]').click() # 点击日历
driver.find_element(By.XPATH,'//span[text()="删除"]').click() # 删除上次已有的
time.sleep(2)
logger.info('删除上次已生成PDF成功')
driver.find_element(By.XPATH,'//button[@class="el-button btn-css el-button--default el-button--small"]').click() # 生成PDF
time.sleep(120)
logger.info('生成PDF成功')
driver.find_element(By.XPATH,'//span[text()="下载"]').click() # 下载新生成的
time.sleep(10)
logger.info('下载成功')

# 登录网页
def login():
logger.info('打开网页')
time.sleep(5)
driver.maximize_window()
driver.get(r'https://custody.ebank.cmbchina.com/ccsp/Web/index.html#/login/entrance')
time.sleep(5)
logger.info('UKey登录')
driver.find_element(By.XPATH,'//i[text()="UKey登录"]').click()
time.sleep(2)
driver.find_element(By.XPATH,'//div[@class="pw_ukey"]/div/div').click()
time.sleep(1)
pwd = driver.find_element(By.XPATH,'//div[@class="pw_ukey"]/div/div')
pwd.send_keys('***') # password
driver.find_element(By.XPATH,'//div[@class="bottom_btn ready"]/span').click()
time.sleep(20)
logger.info('网页登录成功')
driver.find_element(By.XPATH,'//span[text()="回单打印"][1]').click()
time.sleep(1)
logger.info('进入回单打印')

if __name__ == "__main__":
fileLog()
logger.info('***********招行回单下载开始*************')
s = Service(r'F:\\workspace\\msedgedriver.exe')

# 按照当前日期获取
day = datetime.date.today()
logger.info('-------开始处理{}日-------'.format(str(day)))
driver = webdriver.Edge(service=s)
login()
queryData2()
driver.quit()
logger.info('-------{}日处理完成-------'.format(str(day)))

# # 按照日期区间遍历下载
# begin = datetime.date(2023,4,1)
# end = datetime.date(2023,4,12)
# for i in range((end-begin).days+1):
# day = begin + datetime.timedelta(days=i)
# logger.info('-------开始处理{}日-------'.format(str(day)))
# os.system('date {}'.format(str(day)))
# logger.info('修改系统日期成功')
# time.sleep(8)
# driver = webdriver.Edge(service=s)
# login()
# queryData2()
# driver.quit()
# logger.info('-------{}日处理完成-------'.format(str(day)))

下载招行对账单代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
'''
下载招行对账单(按月下载),每月执行一次,下载上个月的对账单。
'''
from selenium import webdriver
from selenium.webdriver.edge.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import datetime
from loguru import logger

# 日志记录
def fileLog():
FileLog= 'cmb.log'
logger.add(FileLog,rotation='500MB',encoding='utf-8',enqueue=True)

# 获取上月第一天和最后一天
def getLastMonthStartAndEnd():
today = datetime.date.today()
lastMonthEnd = datetime.date(today.year,today.month,1)-datetime.timedelta(1)
lastMonthStart = datetime.date(lastMonthEnd.year,lastMonthEnd.month,1)
return lastMonthStart,lastMonthEnd

# 执行下载
def queryData2(bdate,edate):
logger.info('点击对账单打印')
driver.find_element(By.XPATH,'//div[text()="对账单打印"]').click()
time.sleep(2)
# driver.find_element(By.XPATH,r'//div[@class="ccspSInput el-input el-input--small noBorder"]').click() # 打开项目下拉菜单
driver.find_element(By.XPATH,'/html/body/div[1]/div/div[1]/div[2]/div[2]/div/div[8]/div/div[2]/div[1]/div[2]/div/div/div/div/div/input').click()
time.sleep(2)
driver.find_element(By.XPATH,'//*[@id="zxc"]/div/div[2]/div/div[1]/div/span[1]/label/span[1]/span').click() # 全选
time.sleep(5)
driver.find_element(By.XPATH,'//span[text()="确定"]').click() # 确定
time.sleep(2)
logger.info('全选所有项目成功')
time.sleep(2)
logger.info('填写开始日期')
begindate = driver.find_element(By.XPATH,'//*[@id="lyContentIn"]/div/div[8]/div/div[2]/div[1]/div[4]/div/div/input[1]')
begindate.send_keys(Keys.CONTROL,'a')
begindate.send_keys(bdate)
time.sleep(2)
logger.info('填写结束日期')
enddate = driver.find_element(By.XPATH,'//*[@id="lyContentIn"]/div/div[8]/div/div[2]/div[1]/div[4]/div/div/input[2]')
enddate.send_keys(Keys.CONTROL,'a')
enddate.send_keys(edate)
driver.find_element(By.XPATH,'//*[@id="lyContentIn"]/div/div[8]/div/div[1]/span').click()
time.sleep(2)
# driver.find_element(By.XPATH,'//div[@class="yhhzDia"]/div/div/div[2]/div/div[3]/div[2]/div/i[@class="el-input__icon el-range__icon el-icon-date"]').click() # 点击日历
driver.find_element(By.XPATH,'//*[@id="lyContentIn"]/div/div[8]/div/div[2]/div[3]/div[2]/div/div/div/div[2]/div[2]/span[2]').click() # 删除上次已有的
time.sleep(2)
logger.info('删除上次已生成PDF成功')
driver.find_element(By.XPATH,'//*[@id="lyContentIn"]/div/div[8]/div/div[2]/div[2]/button').click() # 生成PDF
time.sleep(300)
logger.info('生成PDF成功')
driver.find_element(By.XPATH,'//*[@id="lyContentIn"]/div/div[8]/div/div[2]/div[3]/div[2]/div/div/div/div[2]/div[2]/span[1]').click() # 下载新生成的
time.sleep(80)
logger.info('下载成功')

# 登录网页
def login():
logger.info('打开网页')
time.sleep(5)
driver.maximize_window()
driver.get(r'https://custody.ebank.cmbchina.com/ccsp/Web/index.html#/login/entrance')
time.sleep(5)
logger.info('UKey登录')
driver.find_element(By.XPATH,'//i[text()="UKey登录"]').click()
time.sleep(2)
driver.find_element(By.XPATH,'//div[@class="pw_ukey"]/div/div').click()
time.sleep(1)
pwd = driver.find_element(By.XPATH,'//div[@class="pw_ukey"]/div/div')
pwd.send_keys('******')
driver.find_element(By.XPATH,'//div[@class="bottom_btn ready"]/span').click()
time.sleep(20)
logger.info('网页登录成功')
driver.find_element(By.XPATH,'//span[text()="回单打印"][1]').click()
time.sleep(1)
logger.info('进入回单打印')

if __name__ == "__main__":
fileLog()
logger.info('***********招行对账单下载开始*************')
s = Service(r'F:\\workspace\\msedgedriver.exe')
first,end = getLastMonthStartAndEnd()
first = str(first)
end = str(end)
logger.info('-------开始处理{}招行对账单-------'.format(first+'至'+end))
driver = webdriver.Edge(service=s)
login()
queryData2(first,end)
driver.quit()
logger.info('-------{}招行对账单处理完成-------'.format(first+'至'+end))


商业转载请联系作者获得授权,非商业转载请注明出处。

支付宝打赏 微信打赏

如果文章对你有帮助,欢迎点击上方按钮打赏作者

Python|实现招行回单自动下载
http://hncd1024.github.io/2023/04/13/Python_CMBdownload/
作者
CHEN DI
发布于
2023-04-13
许可协议