socket.makefile 未收到回复 Python 3.6
socket.makefile not receiving response Python 3.6
我在python写了一个很简单的http代理脚本,但是不知为什么,每次有请求的时候总是挂掉。
如果您 运行 使用 python 3.6,并尝试通过在您选择的浏览器中浏览 localhost:8080/www.google.com
或您想要的任何网站来使用它,您会发现脚本在其本地文件缓存未命中后挂起,并尝试从 www.google.com
请求文件
from socket import *
def main():
# Create a server socket, bind it to a port and start listening
tcpSerSock = socket(AF_INET, SOCK_STREAM)
tcpSerSock.bind(('localhost', 8080))
tcpSerSock.listen(5)
while 1:
# Start receiving data from the client
print('Ready to serve...')
tcpCliSock, addr = tcpSerSock.accept()
print('Received a connection from:', addr)
message = tcpCliSock.recv(1024)
print (message)
# Extract the filename from the given message
filename = message.split()[1].partition(b"/")[2]
print ("Full File Name: ", filename)
fileExist = False
filetouse = b"/" + filename
print ("File to use:", filetouse)
try:
# Check wether the file exist in the cache
f = open(filetouse[1:], "r")
outputdata = f.readlines()
fileExist = True
# ProxyServer finds a cache hit and generates a response message
tcpCliSock.send(b"HTTP/1.0 200 OK\r\n")
tcpCliSock.send(b"Content-Type:text/html\r\n")
for line in outputdata:
tcpCliSock.send(bytes(line, 'utf-8'))
print("Read From Cache")
# Error handling for file not found in cache
except IOError:
if fileExist is False:
# Create a socket on the proxyserver
c = socket(AF_INET, SOCK_DGRAM)
hostn = filename.split(b'/')[0].replace(b"www.",b"",1)
print("Host Name: ", hostn)
try:
# Connect to the socket to port 80
c.connect((hostn, 80))
# Create a temporary file on this socket and ask port 80
# for the file requested by the client
fileobj = c.makefile(mode='rwb')
fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n")
fileobj.flush()
# Read the response into buffer
print("Waiting for response...")
buffer = fileobj.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket and the corresponding file in the cache
tmpFile = open(b"./" + filename,"w+b")
for line in buffer:
tmpFile.write(line)
tcpCliSock.send(bytes(line, "utf-8"))
# tmpFile.close()
except error as err:
print(err)
print("Illegal request")
else:
# HTTP response message for file not found
tcpCliSock.send(b"HTTP/1.0 404 NOT FOUND\r\n")
tcpCliSock.send(b"Content-Type:text/html\r\n")
tcpCliSock.send(b"<html><header><title>404: Page Not Found</title></header><body>ERROR 404: PAGE NOT FOUND</body></html>")
# Close the client and the server sockets
tcpCliSock.close()
tcpSerSock.close()
if __name__ == "__main__":
main()
运行 我机器上的这个脚本并在 Chrome 中导航到我的服务器,我得到以下输出,表明我的本地服务器正在接收来自浏览器的连接,但我未收到来自 Google.com 的回复。
Ready to serve...
Received a connection from: ('127.0.0.1', 51909)
b'GET /www.google.com HTTP/1.1\r\nHost: localhost:8080\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n'
Full File Name: b'www.google.com'
File to use: b'/www.google.com'
Host Name: b'google.com'
Waiting for response...
我不知道我的错误在哪里,或者我是否不符合某些协议(Google 不是唯一不发送响应的协议)。
有什么想法吗?
fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\n...
这看起来不像是正确的 HTTP 请求。它会导致这样的结果:
GET http://www.google.com
Host: google.com
HTTP/1.1
...
由于多种原因而被破坏:没有正确的 HTTP 并在访问服务器(而不是代理)时尝试使用完整的 URL 而不是相对路径。除此之外:
buffer = fileobj.readlines()
在这里你读取了所有你能得到的数据,因此隐含地期望服务器在请求完成后关闭连接。但是由于您还使用 HTTP keep-alive 服务器可能只是保持连接打开以等待更多请求。您需要正确解析响应 header,以便了解响应的大小,即检查 Content-length,处理 Transfer-Encoding 分块(如果 HTTP/1 .1) 等等
我在python写了一个很简单的http代理脚本,但是不知为什么,每次有请求的时候总是挂掉。
如果您 运行 使用 python 3.6,并尝试通过在您选择的浏览器中浏览 localhost:8080/www.google.com
或您想要的任何网站来使用它,您会发现脚本在其本地文件缓存未命中后挂起,并尝试从 www.google.com
from socket import *
def main():
# Create a server socket, bind it to a port and start listening
tcpSerSock = socket(AF_INET, SOCK_STREAM)
tcpSerSock.bind(('localhost', 8080))
tcpSerSock.listen(5)
while 1:
# Start receiving data from the client
print('Ready to serve...')
tcpCliSock, addr = tcpSerSock.accept()
print('Received a connection from:', addr)
message = tcpCliSock.recv(1024)
print (message)
# Extract the filename from the given message
filename = message.split()[1].partition(b"/")[2]
print ("Full File Name: ", filename)
fileExist = False
filetouse = b"/" + filename
print ("File to use:", filetouse)
try:
# Check wether the file exist in the cache
f = open(filetouse[1:], "r")
outputdata = f.readlines()
fileExist = True
# ProxyServer finds a cache hit and generates a response message
tcpCliSock.send(b"HTTP/1.0 200 OK\r\n")
tcpCliSock.send(b"Content-Type:text/html\r\n")
for line in outputdata:
tcpCliSock.send(bytes(line, 'utf-8'))
print("Read From Cache")
# Error handling for file not found in cache
except IOError:
if fileExist is False:
# Create a socket on the proxyserver
c = socket(AF_INET, SOCK_DGRAM)
hostn = filename.split(b'/')[0].replace(b"www.",b"",1)
print("Host Name: ", hostn)
try:
# Connect to the socket to port 80
c.connect((hostn, 80))
# Create a temporary file on this socket and ask port 80
# for the file requested by the client
fileobj = c.makefile(mode='rwb')
fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n")
fileobj.flush()
# Read the response into buffer
print("Waiting for response...")
buffer = fileobj.readlines()
# Create a new file in the cache for the requested file.
# Also send the response in the buffer to client socket and the corresponding file in the cache
tmpFile = open(b"./" + filename,"w+b")
for line in buffer:
tmpFile.write(line)
tcpCliSock.send(bytes(line, "utf-8"))
# tmpFile.close()
except error as err:
print(err)
print("Illegal request")
else:
# HTTP response message for file not found
tcpCliSock.send(b"HTTP/1.0 404 NOT FOUND\r\n")
tcpCliSock.send(b"Content-Type:text/html\r\n")
tcpCliSock.send(b"<html><header><title>404: Page Not Found</title></header><body>ERROR 404: PAGE NOT FOUND</body></html>")
# Close the client and the server sockets
tcpCliSock.close()
tcpSerSock.close()
if __name__ == "__main__":
main()
运行 我机器上的这个脚本并在 Chrome 中导航到我的服务器,我得到以下输出,表明我的本地服务器正在接收来自浏览器的连接,但我未收到来自 Google.com 的回复。
Ready to serve...
Received a connection from: ('127.0.0.1', 51909)
b'GET /www.google.com HTTP/1.1\r\nHost: localhost:8080\r\nConnection: keep-alive\r\nCache-Control: max-age=0\r\nUpgrade-Insecure-Requests: 1\r\nUser-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36\r\nAccept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8\r\nDNT: 1\r\nAccept-Encoding: gzip, deflate, br\r\nAccept-Language: en-US,en;q=0.9,es-US;q=0.8,es;q=0.7\r\n\r\n'
Full File Name: b'www.google.com'
File to use: b'/www.google.com'
Host Name: b'google.com'
Waiting for response...
我不知道我的错误在哪里,或者我是否不符合某些协议(Google 不是唯一不发送响应的协议)。 有什么想法吗?
fileobj.write(b"GET http://" + filename + b"\r\nHost: "+hostn+b"\r\n HTTP/1.1\r\n...
这看起来不像是正确的 HTTP 请求。它会导致这样的结果:
GET http://www.google.com
Host: google.com
HTTP/1.1
...
由于多种原因而被破坏:没有正确的 HTTP 并在访问服务器(而不是代理)时尝试使用完整的 URL 而不是相对路径。除此之外:
buffer = fileobj.readlines()
在这里你读取了所有你能得到的数据,因此隐含地期望服务器在请求完成后关闭连接。但是由于您还使用 HTTP keep-alive 服务器可能只是保持连接打开以等待更多请求。您需要正确解析响应 header,以便了解响应的大小,即检查 Content-length,处理 Transfer-Encoding 分块(如果 HTTP/1 .1) 等等