使用socket和asynico库爬取数据

###　使用socket和asynico库爬取数据

# 使用socket和asynico库爬取数据
#asyncio 没有提供http协议的接口 aiohttp
import asyncio
import socket
from urllib.parse import urlparse


async def get_url(url):
    #通过socket请求html
    url = urlparse(url)
    host = url.netloc
    path = url.path
    if path == "":
        path = "/"

    #建立socket连接
    reader, writer = await asyncio.open_connection(host,80)
    writer.write("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode("utf8"))
    all_lines = []
    async for raw_line in reader:
        data = raw_line.decode("utf8")
        all_lines.append(data)
    html = "\n".join(all_lines)
    html_data = html.split("\r\n\r\n")
    print(len(html_data))
    return html_data

async def main():
    url = "xxx/?page={}"
    for i in range(2,20):
        task = asyncio.create_task(get_url(url.format(i)))
        await task

if __name__ == "__main__":
    import time
    start_time = time.time()
    asyncio.run(main(),debug=True)
    print('last time:{}'.format(time.time()-start_time))

https://docs.python.org/zh-cn/3/library/asyncio.html