python爬虫正则匹配获得邮箱,写邮件把获得邮箱列表循环发送邮件。----根据网上的视频教程介绍,编写的
# -*- coding: UTF-8 -*-
__author__ = 'Intgrp'
pwd = 'xxxxxxxxx'
import urllib # 网页操作模块,获取网页数据
import re # 正则表达式
from email.mime.text import MIMEText
from email.header import Header
import smtplib
def page():
html = urllib.urlopen('http://tieba.baidu.com/p/2339649637').read()
reg = r'<a href="/p/(\d+)\?pn=(\d+)">尾页</a>' # 如果链接里面含有元字符如 ? 就要把它转义 \?
# 网页里面有两个 尾页 字符
reg = re.compile(reg)
page = re.findall(reg, html)
return page
def get(url):
html=urllib.urlopen(url).read()
reg = r'[0-9a-zA-Z_\-\.]+@[0-9a-zA-Z\-\.]+\.[a-zA-Z]{2,4}'#邮箱正则
#[]内的用于匹配,.和-要转义,用\.,+号为至少一个,
# {}为[]里面的数目限制为2到4位
reg = re.compile(reg)
mail = re.findall(reg,html)
return mail
def mailpost(email):
#邮件包括 发件人 收件人 主题 发件服务器 账号 密码
print '%s正在发送!'%email
sender ='xxx@163.com'
receive = email
title = '测试'
smtpserver = 'smtp.163.com'
name = 'xxx@163.com'
msg = MIMEText('点击链接直接进入阿里天池:<a href="https://tianchi.aliyun.com/" >https://tianchi.aliyun.com/</a>','html','utf-8')
msg['Subject']=Header(title,'utf-8')
msg['From']=sender
msg['To']=email
smtp=smtplib.SMTP()
smtp.connect(smtpserver)
smtp.login(name,pwd)
smtp.sendmail(sender,receive,msg.as_string())
smtp.quit()
print '%s已发送完毕'%email
pa = page()#[('2339649637', '59'), ('2339649637', '59')]
from time import sleep
for i in range(int(pa[0][1])):
mail = get("http://tieba.baidu.com/p/%s?pn=%s" %(pa[0][0],i+1))
for n in mail:
mailpost(mail)
sleep(2)