1:
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
html_sample = ' \
<html> \
<body> \
<h1 id = "title">Hello World</h1> \
<a href ="#" class="link">this is link1</a> \
<a href = "# link2" Class = "link">This is link2</a> \
</body> \
</html>'
soup = BeautifulSoup(html_sample,'html.parser')
#使用select找到所有id为title的元素(id 前面需要加#) 一般id 为唯一元素
alink = soup.select('#title')
print(alink)
运行结果为:
[<h1 id="title">Hello World</h1>]
2:
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
html_sample = ' \
<html> \
<body> \
<h1 id = "title">Hello World</h1> \
<a href ="#" class="link">this is link1</a> \
<a href = "# link2" Class = "link">This is link2</a> \
</body> \
</html>'
soup = BeautifulSoup(html_sample,'html.parser')
#使用selet 找到所有class为link的元素(class前面需要加.) (class一般多重复)
for link in soup.select('.link'):
print (link)
运行结果为:
<a class="link" href="#">this is link1</a>
<a class="link" href="# link2">This is link2</a>
3:
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
html_sample = ' \
<html> \
<body> \
<h1 id = "title">Hello World</h1> \
<a href ="#" class="link">this is link1</a> \
<a href = "# link2" Class = "link">This is link2</a> \
</body> \
</html>'
soup = BeautifulSoup(html_sample,'html.parser')
alinks = soup.select( 'a' )
for link in alinks:
print (link)
运行结果为:
<a class="link" href="#">this is link1</a>
<a class="link" href="# link2">This is link2</a>
4:
# -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
html_sample = ' \
<html> \
<body> \
<h1 id = "title">Hello World</h1> \
<a href ="#" class="link">this is link1</a> \
<a href = "# link2" Class = "link">This is link2</a> \
</body> \
</html>'
soup = BeautifulSoup(html_sample,'html.parser')
#使用select找到所有a tag 的href 连结
alinks = soup.select( 'a' )
for link in alinks:
print (link['href'])
运行结果为:
#
# link2