1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | import requests import bs4 from bs4 import BeautifulSoup
def GetHTMLText(url): try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return ""
def fillUnivList(ulist, html): soup = BeautifulSoup(html,"html.parser") '''一个tr标签存放一所大学的信息''' for tr in soup.find("tbody").children: if isinstance(tr,bs4.element.Tag): tds = tr('td') '''由于进行了遍历,使用print打印tds会得到多个列表''' ulist.append([tds[0].string, tds[1].string, tds[3].string])
'''def printUnivlist(ulist, num): print("{:^10}\t{:^6}\t{:^10}".format("排名","学校","总分")) for i in range(num): u = ulist[i] print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2])) ''' '''优化输出格式,中文对齐问题,使用chr(12288)表示一个中文空格,utf-8编码''' def printUnivlist(ulist, num): tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}" print(tplt.format("排名","学校","总分",chr(12288))) for i in range(num): u = ulist[i] print(tplt.format(u[0],u[1],u[2],chr(12288)))
def main(): uinfo = [] url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html" html = GetHTMLText(url) fillUnivList(uinfo,html) printUnivlist(uinfo,10)
main()
|