beautifulSoup(1)

时间:2023-03-08 17:41:32
beautifulSoup(1)

import re
from bs4 import BeautifulSoup
doc = ['<html><head><title>Page title</title></head>',
       '<body><p id="firstpara" align="center">This is paragraph <b>one</b>.',
       '<p id="secondpara" align="blah">This is paragraph <b>two</b>.',
       '</html>']  
soup = BeautifulSoup(''.join(doc))
print(soup.prettify())
title=soup.html.head.title
print(title)
print(title.string)
print(len(soup('p')))
print(soup.findAll('p',align='center'))
print(soup.find('p',align='center'))
print(soup('p',align='center')[0]['id'])
print(soup.find('p',align=re.compile('^b.*'))['id'])
print(soup.find('p').b.string)
print(soup('p')[1].b.string)
-----------------------------------------------------------------------------------

<html>
 <head>
  <title>
   Page title
  </title>
 </head>
 <body>
  <p align="center" id="firstpara">
   This is paragraph
   <b>
    one
   </b>
   .
   <p align="blah" id="secondpara">
    This is paragraph
    <b>
     two
    </b>
    .
   </p>
  </p>
 </body>
</html>
<title>Page title</title>
Page title
2
[<p align="center" id="firstpara">This is paragraph <b>one</b>.<p align="blah" id="secondpara">This is paragraph <b>two</b>.</p></p>]
<p align="center" id="firstpara">This is paragraph <b>one</b>.<p align="blah" id="secondpara">This is paragraph <b>two</b>.</p></p>
firstpara
secondpara
one
two
[Finished in 0.5s]