soup1 = BS(requests.get(url).content.decode('utf-8'), 'lxml') number = soup1.select('div > div.bus-layer.depth.w120 > div:nth-child(1) a') for element in number: href_list.append(url + element['href']) number = soup1.select('div > div.bus-layer.depth.w120 > div:nth-child(2) a') for element in number: href_list.append(url + element['href'])
# 存储要遍历的网址 href_list = [] soup1=BS(requests.get(url).content.decode('utf-8'),'lxml') number=soup1.select('div > div.bus-layer.depth.w120 > div:nth-child(1) a') for element in number: href_list.append(url+element['href']) number=soup1.select('div > div.bus-layer.depth.w120 > div:nth-child(2) a') for element in number: href_list.append(url+element['href'])
for web1 in href_list: soup2=BS(requests.get(web1).content.decode('utf-8'),'lxml') number=soup2.select('div > div.list.clearfix a') for element in number: website=url+element['href'] print(website+" ",cnt) soup3=BS(requests.get(website).content.decode('utf-8'),'lxml') # 路线名 name=soup3.select('div > div.info > h1 > span') name=name[0].text # 运行时间 time=soup3.select('div > div.info > ul > li:nth-child(1)') time=time[0].text[5:] # 参考票价 price=soup3.select('div > div.info > ul > li:nth-child(2)') price=price[0].text[5:] # 往返线 come_back=soup3.find_all(class_='trip') come_line=come_back[0].text iflen(come_back)==2: back_line=come_back[1].text else: back_line="Null" # 来路线 come=soup3.select('div.service-area > div:nth-child(2) > ol> li >a') sum_come=0 str_come="" for element in come: if'aria-label'in element.attrs: str_come+=element['aria-label'] sum_come+=1 sum_come=f"共{sum_come}站" # 反路线 back=soup3.select('div.service-area > div:nth-child(4) > ol> li >a') sum_back=0 str_back="" for element in back: if'aria-label'in element.attrs: str_back+=element['aria-label'] sum_back+=1 if sum_back==0: sum_back="Null" else: sum_back=f"共{sum_back}站"