import requests import string import time from lxml import etree from openpyxl import styles, Workbook proj_info = "http://119.97.201.22:8083/spfxmcx/spfcx_mx.aspx?DengJh=%BA%FE2100999" lpb = "http://119.97.201.22:8083/spfxmcx/spfcx_lpb.aspx?DengJh=%BA%FE2100999" sell_url_header = "http://119.97.201.22:8083/spfxmcx/spfcx_fang.aspx?dengJH=%BA%FE2100999&houseDengJh=%BA%FE001105" sell_status_url = [sell_url_header+str(i) for i in range(1, 10)] useragent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" headers = {"User-Agent": useragent} resfile = "花山印象销售情况_"+time.strftime("%y-%m-%d_%H", time.localtime())+".xlsx" sold_cell_style = styles.PatternFill("solid", fgColor='FF00B050') wb = Workbook() wb.remove(wb.active) wb.create_sheet("总表") sold_strs = [] for idx, url in enumerate(sell_status_url): ws = wb.active = wb.create_sheet(str(idx+1)+"栋") print(idx+1, url) ws['A1'].value = "数据来源" ws['B1'].value = url resp = requests.get(url, headers=headers, timeout=300) sell_table = etree.HTML(resp.text).xpath('//*[@id="fwxx"]/table')[0] # our table starts at A4 table_col = 1 table_row = 4 for idx, element in enumerate(sell_table.getchildren()): if element.tag == "th": # table header ws.cell(row=table_row, column=table_col).value = element.text table_col += 1 elif element.tag == "tr": table_row += 1 for idx, td in enumerate(element.getchildren()): cell = ws.cell(row=table_row, column=idx+1) if td.attrib.get('style') != None: cell.value = td.getchildren()[0].text if td.attrib.get('style') == "background-color:#FF0000": sold_str = ws['A'+str(cell.row)].value + "-" sold_str += ws['B'+str(cell.row)].value + "-" sold_str += ws['C'+str(cell.row)].value + "0" sold_str += cell.value.strip(string.punctuation) print(sold_str, " sold out") sold_strs.append(sold_str) cell.value += " 网签" cell.fill = sold_cell_style elif td.attrib.get('style') == "background-color:#FFFF00": cell.value += " 抵押" else: cell.value = td.text ws = wb.active = wb['总表'] for idx, sold_str in enumerate(sold_strs): ws['A'+str(idx+1)].value = sold_str wb.save(resfile) wb.close()