如何在不阅读 HTTP 资源的其余部分的情况下阅读 header?
How do I read the header without reading the rest of the HTTP resource?
我确信当我这样做时:
http = Net::HTTP.start uri.host, uri.port
request = Net::HTTP::Get.new uri
response = http.request request
我建立了某种与远程 HTTP 资源的连接,以便能够判断,例如,它的 Content-Type,而要真正加载整个资源,我然后调用 response.body
。
但是要么我总是错,要么是我现在访问的服务器出了问题,http.request
加载了我无法接受的整个远程文件:
[Net::HTTP debug] opening connection to v.redd.it:80...
[Net::HTTP debug] opened
[Net::HTTP debug] <- "GET /6otzwem1c7721/DASH_9_6_M?source=fallback HTTP/1.1\r\nAccept-Encoding: gzip;q=1.0,deflate;q=0.6,identity;q=0.3\r\nAccept: */*\r\nUser-Agent: Ruby\r\nHost: v.redd.it\r\nConnection: close\r\n\r\n"
[Net::HTTP debug] -> "HTTP/1.1 200 OK\r\n"
[Net::HTTP debug] -> "Last-Modified: Sat, 29 Dec 2018 11:25:57 GMT\r\n"
[Net::HTTP debug] -> "ETag: \"662291aec20b252aaebcf54c3b1827af-42\"\r\n"
[Net::HTTP debug] -> "Content-Type: video/mp4\r\n"
[Net::HTTP debug] -> "Cache-Control: public, max-age=604800, s-maxage=86400, must-revalidate\r\n"
[Net::HTTP debug] -> "Accept-Ranges: bytes\r\n"
[Net::HTTP debug] -> "Content-Length: 218756120\r\n"
[Net::HTTP debug] -> "Accept-Ranges: bytes\r\n"
[Net::HTTP debug] -> "Date: Sun, 30 Dec 2018 13:44:21 GMT\r\n"
[Net::HTTP debug] -> "Via: 1.1 varnish\r\n"
[Net::HTTP debug] -> "Connection: close\r\n"
[Net::HTTP debug] -> "X-Served-By: cache-fra19120-FRA\r\n"
[Net::HTTP debug] -> "X-Cache: HIT\r\n"
[Net::HTTP debug] -> "X-Cache-Hits: 0\r\n"
[Net::HTTP debug] -> "X-Timer: S1546177461.284280,VS0,VE0\r\n"
[Net::HTTP debug] -> "Server: snooserv\r\n"
[Net::HTTP debug] -> "Vary: Origin\r\n"
[Net::HTTP debug] -> "\r\n"
[Net::HTTP debug] reading 218756120 bytes...
我带着 byebug 进去了,直到找到它发生的地方:
[159, 168] in /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http/response.rb
159: def reading_body(sock, reqmethodallowbody) #:nodoc: internal use only
160: @socket = sock
161: @body_exist = reqmethodallowbody && self.class.body_permitted?
162: begin
163: yield
=> 164: self.body # ensure to read body
165: ensure
166: @socket = nil
167: end
168: end
(byebug) where
--> #0 Net::HTTPResponse.reading_body(sock#Net::BufferedIO, reqmethodallowbody#TrueClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http/response.rb:164
#1 Net::HTTP.transport_request(req#Net::HTTP::Get) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1445
#2 Net::HTTP.request(req#Net::HTTP::Get, body#NilClass, &block#NilClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1407
#3 block in Net::HTTP.block in request(req#Net::HTTP::Get, body#NilClass, &block#NilClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1400
#4 Net::HTTP.start at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:853
#5 Net::HTTP.request(req#Net::HTTP::Get, body#NilClass, &block#NilClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1398
是服务器的问题吗? Ruby错了?或者如果我想在不加载整个资源的情况下获取 header 数据,我应该使用其他方法吗?
P.S.: 我不需要 third-party 花哨的依赖,我只需要使用 Net::HTTP
.
取决于您实际想要完成的事情:
完全不在乎身材:
使用HEAD
代替GET
:
uri = URI('http://example.com')
http = Net::HTTP.start uri.host, uri.port
request = Net::HTTP::Head.new uri
response = http.request request
response.body
# => nil
有条件地加载正文
使用带有 net/http
的块可以让你在 body 实际加载之前挂钩:
uri = URI('http://example.com')
res = nil
Net::HTTP.start(uri.host, uri.port) do |http|
request = Net::HTTP::Get.new uri
http.request request do |response|
res = response
break
end
end
res
# => #<Net::HTTPOK 200 OK readbody=false>
res['Content-Type']
# => "text/html; charset=UTF-8"
我确信当我这样做时:
http = Net::HTTP.start uri.host, uri.port
request = Net::HTTP::Get.new uri
response = http.request request
我建立了某种与远程 HTTP 资源的连接,以便能够判断,例如,它的 Content-Type,而要真正加载整个资源,我然后调用 response.body
。
但是要么我总是错,要么是我现在访问的服务器出了问题,http.request
加载了我无法接受的整个远程文件:
[Net::HTTP debug] opening connection to v.redd.it:80...
[Net::HTTP debug] opened
[Net::HTTP debug] <- "GET /6otzwem1c7721/DASH_9_6_M?source=fallback HTTP/1.1\r\nAccept-Encoding: gzip;q=1.0,deflate;q=0.6,identity;q=0.3\r\nAccept: */*\r\nUser-Agent: Ruby\r\nHost: v.redd.it\r\nConnection: close\r\n\r\n"
[Net::HTTP debug] -> "HTTP/1.1 200 OK\r\n"
[Net::HTTP debug] -> "Last-Modified: Sat, 29 Dec 2018 11:25:57 GMT\r\n"
[Net::HTTP debug] -> "ETag: \"662291aec20b252aaebcf54c3b1827af-42\"\r\n"
[Net::HTTP debug] -> "Content-Type: video/mp4\r\n"
[Net::HTTP debug] -> "Cache-Control: public, max-age=604800, s-maxage=86400, must-revalidate\r\n"
[Net::HTTP debug] -> "Accept-Ranges: bytes\r\n"
[Net::HTTP debug] -> "Content-Length: 218756120\r\n"
[Net::HTTP debug] -> "Accept-Ranges: bytes\r\n"
[Net::HTTP debug] -> "Date: Sun, 30 Dec 2018 13:44:21 GMT\r\n"
[Net::HTTP debug] -> "Via: 1.1 varnish\r\n"
[Net::HTTP debug] -> "Connection: close\r\n"
[Net::HTTP debug] -> "X-Served-By: cache-fra19120-FRA\r\n"
[Net::HTTP debug] -> "X-Cache: HIT\r\n"
[Net::HTTP debug] -> "X-Cache-Hits: 0\r\n"
[Net::HTTP debug] -> "X-Timer: S1546177461.284280,VS0,VE0\r\n"
[Net::HTTP debug] -> "Server: snooserv\r\n"
[Net::HTTP debug] -> "Vary: Origin\r\n"
[Net::HTTP debug] -> "\r\n"
[Net::HTTP debug] reading 218756120 bytes...
我带着 byebug 进去了,直到找到它发生的地方:
[159, 168] in /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http/response.rb
159: def reading_body(sock, reqmethodallowbody) #:nodoc: internal use only
160: @socket = sock
161: @body_exist = reqmethodallowbody && self.class.body_permitted?
162: begin
163: yield
=> 164: self.body # ensure to read body
165: ensure
166: @socket = nil
167: end
168: end
(byebug) where
--> #0 Net::HTTPResponse.reading_body(sock#Net::BufferedIO, reqmethodallowbody#TrueClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http/response.rb:164
#1 Net::HTTP.transport_request(req#Net::HTTP::Get) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1445
#2 Net::HTTP.request(req#Net::HTTP::Get, body#NilClass, &block#NilClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1407
#3 block in Net::HTTP.block in request(req#Net::HTTP::Get, body#NilClass, &block#NilClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1400
#4 Net::HTTP.start at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:853
#5 Net::HTTP.request(req#Net::HTTP::Get, body#NilClass, &block#NilClass) at /System/Library/Frameworks/Ruby.framework/Versions/2.3/usr/lib/ruby/2.3.0/net/http.rb:1398
是服务器的问题吗? Ruby错了?或者如果我想在不加载整个资源的情况下获取 header 数据,我应该使用其他方法吗?
P.S.: 我不需要 third-party 花哨的依赖,我只需要使用 Net::HTTP
.
取决于您实际想要完成的事情:
完全不在乎身材:
使用HEAD
代替GET
:
uri = URI('http://example.com')
http = Net::HTTP.start uri.host, uri.port
request = Net::HTTP::Head.new uri
response = http.request request
response.body
# => nil
有条件地加载正文
使用带有 net/http
的块可以让你在 body 实际加载之前挂钩:
uri = URI('http://example.com')
res = nil
Net::HTTP.start(uri.host, uri.port) do |http|
request = Net::HTTP::Get.new uri
http.request request do |response|
res = response
break
end
end
res
# => #<Net::HTTPOK 200 OK readbody=false>
res['Content-Type']
# => "text/html; charset=UTF-8"