import requests from lxml import etree filmNameList = [] defdownload(url): global filmNameList resp = requests.get(url) resp.encoding="gb2312" html = etree.HTML(resp.text) filmName = html.xpath('//table[@class="tbspan"]/tr[2]/td[2]/b/a/text()') for each in filmName: filmNameList.append(each) pass
if __name__=="__main__": url = "https://www.dydytt.net/html/gndy/dyzz/list_23_1.html" download(url) for i in filmNameList: print(i)
import requests import threading from concurrent.futures import ThreadPoolExecutor from lxml import etree filmNameList = [] lock = threading.Lock()
defdownload(url): global filmNameList resp = requests.get(url) resp.encoding="gb2312" html = etree.HTML(resp.text) filmName = html.xpath('//table[@class="tbspan"]/tr[2]/td[2]/b/a/text()') for each in filmName: lock.acquire() filmNameList.append(each) lock.release() resp.close()
if __name__=="__main__": with ThreadPoolExecutor(5) as t: for i inrange(1, 11): url = f"https://www.dydytt.net/html/gndy/dyzz/list_23_{i}.html" t.submit(download, url=url) for i in filmNameList: print(i)