首页 > 代码库 > bs4取数

bs4取数

# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup

def file2soup(ffile):
with open(ffile,‘r+b‘) as f:
f.encoding=‘utf-8‘
soup=BeautifulSoup(f,‘lxml‘)
# print (soup)
return soup
pass

# def soup2list(soup,ulist):
# # print (soup.div)
# for i in soup.find(‘tbody‘).children:
# if isinstance(i,bs4.element.Tag):
# o=i(‘td‘)
# ulist.append([o[0].string,o[1].string,o[3].string])
# return ulist

# def soup2list(soup,ulist):
# # print (soup.div)
# for i in soup.find(‘tbody‘).children:
# if len(i)>1:#去空行
# o=i(‘td‘)#取出td标签 "td"加引号 o:<class ‘bs4.element.ResultSet‘> !!!
# a,b,c=0,1,3
# ulist.append([o[a].string,o[b].string,o[c].string])
# return ulist

def soup2list(soup,ulist):
i=soup.find(‘tbody‘,id="hidden_zhpm")
# print (len(i))
print (i)



url = ‘http://bj.58.com/pinpaigongyu/pn/{ppp}/?minprice=2000_4000‘
ffile=‘d://best.txt‘
ulist=[]

soup=file2soup(ffile)
ulist=soup2list(soup,ulist)
# print (ulist)
# print (‘{:0>3}\t {:+<15}\t {: >5}\t‘.format(‘排名‘,‘校名‘,‘总分‘))
# for i in range(11):
# u=ulist[i]
# #{:起头+一个填充符+对齐方式+字符长度}
# print (‘{:0>3}\t {:+<15}\t {: >5}\t‘.format(u[0], u[1], u[2] ))

bs4取数