1. 使用`threading`模块:
import threadingimport requestsdef fetch_page(url):抓取页面并处理数据response = requests.get(url)print(response.text)def main():urls = ['http://example.com', 'http://example.org']threads = []for url in urls:thread = threading.Thread(target=fetch_page, args=(url,))threads.append(thread)thread.start()for thread in threads:thread.join()if __name__ == '__main__':main()
2. 使用`concurrent.futures`模块中的`ThreadPoolExecutor`:
import concurrent.futuresimport requestsdef fetch_page(url):抓取页面并处理数据response = requests.get(url)print(response.text)def main():urls = ['http://example.com', 'http://example.org']with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:executor.map(fetch_page, urls)if __name__ == '__main__':main()
3. 使用`asyncio`和`aiohttp`库实现异步爬虫,虽然这不是传统意义上的多线程,但是可以实现并发:
import asyncioimport aiohttpasync def fetch_page(session, url):async with session.get(url) as response:return await response.text()async def main():urls = ['http://example.com', 'http://example.org']async with aiohttp.ClientSession() as session:tasks = [fetch_page(session, url) for url in urls]responses = await asyncio.gather(*tasks)for response in responses:print(response)if __name__ == '__main__':asyncio.run(main())
以上示例展示了如何使用Python实现多线程爬虫。请根据实际需求选择合适的方法。

