如何在并发 goroutine 超时(截止日期)后 "continue"?
How to "continue" after timeout (deadline) in concurrent goroutines?
我正在对不同的 URL 执行并发 GET 请求(在本例中为 1000)。对于这些请求,我遵循了消费者-生产者设计。有 50 个工人(goroutines - 爬虫)和 1 个生产者(用 url 填充频道)。
问题: 我在客户端中将超时设置为 15 秒(我不想每个请求等待超过 15 秒)。但是当 URL 使 goroutine 等待超过 15 秒时,我的代码以
退出
context deadline exceeded (Client.Timeout or context cancellation while reading body)
想要的行为: 当服务器花费超过 15 秒时,我希望相关的 goroutine 在下一个 URL
上继续
代码如下:
package main
import (
"bufio"
"fmt"
"io"
"log"
"net/http"
"os"
"sync"
"time"
)
func crawler(wg *sync.WaitGroup, urlChannel <-chan string) {
defer wg.Done()
client := &http.Client{Timeout: 15 * time.Second} // single client is sufficient for multiple requests
for urlItem := range urlChannel {
req1, _ := http.NewRequest("GET", "http://"+urlItem, nil) // generating the request
req1.Header.Add("User-agent", "Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/74.0") // changing user-agent
resp1, respErr1 := client.Do(req1) // sending the prepared request and getting the response
if respErr1 != nil {
fmt.Println("server error", urlItem)
continue
}
if resp1.StatusCode/100 == 2 { // means server responded with 2xx code
f1, fileErr1 := os.Create("200/" + urlItem + "_original.txt") // creating the relative file
if fileErr1 != nil {
fmt.Println("file error", urlItem)
log.Fatal(fileErr1)
}
_, writeErr1 := io.Copy(f1, resp1.Body) // writing the sourcecode into our file
if writeErr1 != nil {
fmt.Println("file error", urlItem)
log.Fatal(writeErr1)
}
f1.Close()
resp1.Body.Close()
fmt.Println("success:", urlItem)
}
}
}
func main() {
var wg sync.WaitGroup // synchronization to wait for all the goroutines
file, err := os.Open("urls.txt") // the file containing the url's
if err != nil {
log.Fatal(err)
}
defer file.Close() // don't forget to close the file
urlChannel := make(chan string) // create a channel to store all the url's
_ = os.Mkdir("200", 0755) // if it's there, it will create an error, and we will simply ignore it
for i := 0; i < 50; i++ {
wg.Add(1)
go crawler(&wg, urlChannel)
}
scanner := bufio.NewScanner(file) // each line has another url
for scanner.Scan() {
urlChannel <- scanner.Text()
}
close(urlChannel)
wg.Wait()
}
具体来说,我认为我正在处理这里的问题(但显然我不是):
resp1, respErr1 := client.Do(req1)
// sending the prepared request and getting the response
if respErr1 != nil {
fmt.Println("server error", urlItem)
continue
}
我怎样才能实现想要的行为(如果达到超时则跳过 URL)?
大概在这里:
_, writeErr1 := io.Copy(f1, resp1.Body) // writing the sourcecode into our file
if writeErr1 != nil {
fmt.Println("file error", urlItem)
log.Fatal(writeErr1)
}
这个操作的结果不一定是写错误,也可能是读错误,在这种情况下,很可能是。读取响应正文超时。
这种情况下不要调用 log.Fatal
。
我正在对不同的 URL 执行并发 GET 请求(在本例中为 1000)。对于这些请求,我遵循了消费者-生产者设计。有 50 个工人(goroutines - 爬虫)和 1 个生产者(用 url 填充频道)。
问题: 我在客户端中将超时设置为 15 秒(我不想每个请求等待超过 15 秒)。但是当 URL 使 goroutine 等待超过 15 秒时,我的代码以
退出context deadline exceeded (Client.Timeout or context cancellation while reading body)
想要的行为: 当服务器花费超过 15 秒时,我希望相关的 goroutine 在下一个 URL
上继续代码如下:
package main
import (
"bufio"
"fmt"
"io"
"log"
"net/http"
"os"
"sync"
"time"
)
func crawler(wg *sync.WaitGroup, urlChannel <-chan string) {
defer wg.Done()
client := &http.Client{Timeout: 15 * time.Second} // single client is sufficient for multiple requests
for urlItem := range urlChannel {
req1, _ := http.NewRequest("GET", "http://"+urlItem, nil) // generating the request
req1.Header.Add("User-agent", "Mozilla/5.0 (X11; Linux i586; rv:31.0) Gecko/20100101 Firefox/74.0") // changing user-agent
resp1, respErr1 := client.Do(req1) // sending the prepared request and getting the response
if respErr1 != nil {
fmt.Println("server error", urlItem)
continue
}
if resp1.StatusCode/100 == 2 { // means server responded with 2xx code
f1, fileErr1 := os.Create("200/" + urlItem + "_original.txt") // creating the relative file
if fileErr1 != nil {
fmt.Println("file error", urlItem)
log.Fatal(fileErr1)
}
_, writeErr1 := io.Copy(f1, resp1.Body) // writing the sourcecode into our file
if writeErr1 != nil {
fmt.Println("file error", urlItem)
log.Fatal(writeErr1)
}
f1.Close()
resp1.Body.Close()
fmt.Println("success:", urlItem)
}
}
}
func main() {
var wg sync.WaitGroup // synchronization to wait for all the goroutines
file, err := os.Open("urls.txt") // the file containing the url's
if err != nil {
log.Fatal(err)
}
defer file.Close() // don't forget to close the file
urlChannel := make(chan string) // create a channel to store all the url's
_ = os.Mkdir("200", 0755) // if it's there, it will create an error, and we will simply ignore it
for i := 0; i < 50; i++ {
wg.Add(1)
go crawler(&wg, urlChannel)
}
scanner := bufio.NewScanner(file) // each line has another url
for scanner.Scan() {
urlChannel <- scanner.Text()
}
close(urlChannel)
wg.Wait()
}
具体来说,我认为我正在处理这里的问题(但显然我不是):
resp1, respErr1 := client.Do(req1)
// sending the prepared request and getting the response
if respErr1 != nil {
fmt.Println("server error", urlItem)
continue
}
我怎样才能实现想要的行为(如果达到超时则跳过 URL)?
大概在这里:
_, writeErr1 := io.Copy(f1, resp1.Body) // writing the sourcecode into our file
if writeErr1 != nil {
fmt.Println("file error", urlItem)
log.Fatal(writeErr1)
}
这个操作的结果不一定是写错误,也可能是读错误,在这种情况下,很可能是。读取响应正文超时。
这种情况下不要调用 log.Fatal
。