selenium如何打开网页下载网页所有静态资源文件js css image等?
网友回复
附上示例代码:
import socket import select import threading import requests import os from urllib.parse import urlparse, urljoin # 代理服务器监听IP和端口 host = "127.0.0.1" port = 8081 # 最大连接数 max_connections = 100 # 缓存大小 buffer_size = 4096 # 初始化代理服务器 proxy_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM) proxy_server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) proxy_server.bind((host, port)) proxy_server.listen(max_connections) print(f"代理服务器已启动,监听地址:{host}:{port}") def download_file(url, folder,headers): # 获取网页URL的路径和文件名 parsed_url = urlparse(url) path_parts = parsed_url.path.split("/") if path_parts[-1] == "": filename = "index.html" else: filename = path_parts[-1] # 获取本地文件路径 local_path = os.path.join(folder, *path_parts[1:-1]) if not os.path.exists(local_path): os.makedirs(local_path) local_file = os.path.join(local_path, filename) response = requests.get(url, headers=headers) if response.status_code == 200: with open(local_file, "wb") as f: f.write(response.content) # 处理HTTP请求 def handle_request(client_socket, client_address): # 接收客户端请求数据 request = client_socket.recv(buffer_size).decode("utf-8") # 解析请求头部 headers = request.split("\r\n") method, url, protocol = headers[0].split(" ") host = headers[1].split(" ")[1] # 输出请求信息 print(f"get http req:{method} {url} {protocol} {host}") print(f"请求头部:{headers[:2]}") # 建立与目标服务器的连接 remote_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) remote_socket.connect((host, 80)) # 转发HTTP请求到目标服务器 remote_socket.send(request.encode("utf-8")) # 接收目标服务器的响应数据 response = b"" while True: ready_to_read, _, _ = select.select([remote_socket], [], [], 3) if ready_to_read: data = remote_socket.recv(buffer_size) if not data: break response += data else: break # 输出响应信息 print(f"收到HTTP响应:{len(response)} bytes") print(f"响应头部:{response[:100].decode('utf-8')}") # 将响应数据返回给客户端 client_socket.send(response) filename = os.path.basename(url) #headers = {'Referer': 'custom header'} download_file(url,"test",headers) # 关闭连接 remote_socket.close() client_socket.close() # 接受客户端连接并处理请求 while True: client_socket, client_address = proxy_server.accept() print(f"收到来自 {client_address} 的连接") threading.Thread(target=handle_request, args=(client_socket, client_address)).start()