为什么 header 不是预期的?
Why the header is not expected?
如您所见,我正在尝试制作一个多进程下载器。
它运行良好,直到我打开最终文件:它坏了。
我检查了一下,但找不到任何错误。但可能是header下载header错误,Range值错误。
这是代码
class MultiprocessDownload:
def __init__(self, url, path, filename, thread_num):
self.url = url
self.path = path
self.filename = filename
self.thread_num = thread_num
self.threads = []
self.head = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}).headers
self.length = int(self.head.get('Content-Length', False))
print(self.length)
self.proc = []
self.lock = []
for i in range(self.thread_num):
self.proc.append(None)
self.lock.append(None)
if self.length == False: raise Exception('Thik file does not support multiprocess download')
num = self.length // self.thread_num
last = -1
for i in range(1, thread_num + 1):
self.threads.append([last + 1, num * i])
last = num * i
self.threads[-1:][0][1] += self.length % self.thread_num
print(self.threads)
def thread(self, num):
self.lock[num] = _thread.allocate_lock()
with self.lock[num]:
header = {'Range': f'bytes=' + str(self.threads[num][0]) + '-' + str(self.threads[num][1])}
print(header)
self.proc[num] = 0
req = requests.get(self.url, headers=header, stream = True)
blk_size = self.threads[num][1] - self.threads[num][0]
file = open('dl_block_' + str(num), 'wb')
i = 0
for chunk in req.iter_content(chunk_size=512):
if chunk:
file.write(chunk)
i += 1
self.proc[num] = i * 512 / blk_size
file.close()
return 0
def getDownloadInfo(self):
info = []
total = 0
for i in range(self.thread_num):
if None in self.proc: return 1
info.append(str(self.proc[i] * 100) + '%')
total += self.proc[i]
info.append(str(total / self.thread_num * 100) + '%') #[*threads_info, total_info]
return info
def run(self):
for i in range(self.thread_num):
print(i)
_thread.start_new_thread(self.thread, (i,))
time.sleep(10)
locked = 1
print(self.lock)
while locked:
print(self.getDownloadInfo())
locked = 0
for n in range(self.thread_num):
if None in self.lock:
locked = 1
continue
if self.lock[n].locked():
locked += 1
print(locked)
time.sleep(1)
for num in range(self.thread_num):
blk = open('dl_block_' + str(num), 'rb')
target = open(self.path + self.filename, 'ab')
target.write(blk.read())
target.close()
blk.close()
os.remove('dl_block_' + str(num))
print('file downloaded as', self.path + self.filename)
它的一些输出:
[[0, 48967091], [48967092, 97934182], [97934183, 146901273], [146901274, 195868364], [195868365, 244835455], [244835456, 293802546], [293802547, 342769637], [342769638, 391736728], [ 391736729, 440703819], [440703820, 489670910], [489670911, 538638001], [538638002, 587605092], [587605093, 636572183], [636572184, 685539274], [685539275, 734506365], [734506366, 783473471]]
0
1个
2个
3个
4个
5个
6个
7
8个
9
10
11
12
13
14
15
{'Range': 'bytes=48967092-97934182'}
{'Range': 'bytes=244835456-293802546'}{'Range': 'bytes=195868365-244835455'}
{'Range': 'bytes=489670911-538638001'}{'Range': 'bytes=391736729-440703819'}{'Range': 'bytes=342769638-391736728'}{'Range': 'bytes=0-48967091'}{'Range': 'bytes=97934183-146901273'}
{'Range': 'bytes=146901274-195868364'}{'Range': 'bytes=440703820-489670910'}{'Range': 'bytes=293802547-342769637'}
{'Range': 'bytes=538638002-587605092'}{'Range': 'bytes=587605093-636572183'}
{'Range': 'bytes=685539275-734506365'}
{'Range': 'bytes=636572184-685539274'}{'Range': 'bytes=734506366-783473471'}
线程的范围很奇怪,但我找不到哪里不对。
预计程序打印出的范围不按顺序,这取决于哪个线程首先 started 而不是取决于哪个线程 create第一。
您应该像这样更改您的代码:
target = open(self.path + self.filename, 'wb')
for num in range(self.thread_num):
blk = open('dl_block_' + str(num), 'rb')
target.write(blk.read())
blk.close()
os.remove('dl_block_' + str(num))
target.close()
此更改将在您第一次 运行 或从第二次 运行 重写目标文件时创建目标文件。
代码虽然有效,但看起来很丑陋。更多建议:
- 使用 threading 库来实现这个下载器
- 使用 barrier 等待所有工作完成而不是休眠 while-loop
blk_size = self.threads[num][1] - self.threads[num][0] + 1
- 使用
ls -l
检查文件大小
如您所见,我正在尝试制作一个多进程下载器。 它运行良好,直到我打开最终文件:它坏了。 我检查了一下,但找不到任何错误。但可能是header下载header错误,Range值错误。 这是代码
class MultiprocessDownload:
def __init__(self, url, path, filename, thread_num):
self.url = url
self.path = path
self.filename = filename
self.thread_num = thread_num
self.threads = []
self.head = requests.head(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko'}).headers
self.length = int(self.head.get('Content-Length', False))
print(self.length)
self.proc = []
self.lock = []
for i in range(self.thread_num):
self.proc.append(None)
self.lock.append(None)
if self.length == False: raise Exception('Thik file does not support multiprocess download')
num = self.length // self.thread_num
last = -1
for i in range(1, thread_num + 1):
self.threads.append([last + 1, num * i])
last = num * i
self.threads[-1:][0][1] += self.length % self.thread_num
print(self.threads)
def thread(self, num):
self.lock[num] = _thread.allocate_lock()
with self.lock[num]:
header = {'Range': f'bytes=' + str(self.threads[num][0]) + '-' + str(self.threads[num][1])}
print(header)
self.proc[num] = 0
req = requests.get(self.url, headers=header, stream = True)
blk_size = self.threads[num][1] - self.threads[num][0]
file = open('dl_block_' + str(num), 'wb')
i = 0
for chunk in req.iter_content(chunk_size=512):
if chunk:
file.write(chunk)
i += 1
self.proc[num] = i * 512 / blk_size
file.close()
return 0
def getDownloadInfo(self):
info = []
total = 0
for i in range(self.thread_num):
if None in self.proc: return 1
info.append(str(self.proc[i] * 100) + '%')
total += self.proc[i]
info.append(str(total / self.thread_num * 100) + '%') #[*threads_info, total_info]
return info
def run(self):
for i in range(self.thread_num):
print(i)
_thread.start_new_thread(self.thread, (i,))
time.sleep(10)
locked = 1
print(self.lock)
while locked:
print(self.getDownloadInfo())
locked = 0
for n in range(self.thread_num):
if None in self.lock:
locked = 1
continue
if self.lock[n].locked():
locked += 1
print(locked)
time.sleep(1)
for num in range(self.thread_num):
blk = open('dl_block_' + str(num), 'rb')
target = open(self.path + self.filename, 'ab')
target.write(blk.read())
target.close()
blk.close()
os.remove('dl_block_' + str(num))
print('file downloaded as', self.path + self.filename)
它的一些输出: [[0, 48967091], [48967092, 97934182], [97934183, 146901273], [146901274, 195868364], [195868365, 244835455], [244835456, 293802546], [293802547, 342769637], [342769638, 391736728], [ 391736729, 440703819], [440703820, 489670910], [489670911, 538638001], [538638002, 587605092], [587605093, 636572183], [636572184, 685539274], [685539275, 734506365], [734506366, 783473471]] 0 1个 2个 3个 4个 5个 6个 7 8个 9 10 11 12 13 14 15 {'Range': 'bytes=48967092-97934182'} {'Range': 'bytes=244835456-293802546'}{'Range': 'bytes=195868365-244835455'} {'Range': 'bytes=489670911-538638001'}{'Range': 'bytes=391736729-440703819'}{'Range': 'bytes=342769638-391736728'}{'Range': 'bytes=0-48967091'}{'Range': 'bytes=97934183-146901273'} {'Range': 'bytes=146901274-195868364'}{'Range': 'bytes=440703820-489670910'}{'Range': 'bytes=293802547-342769637'}
{'Range': 'bytes=538638002-587605092'}{'Range': 'bytes=587605093-636572183'} {'Range': 'bytes=685539275-734506365'}
{'Range': 'bytes=636572184-685539274'}{'Range': 'bytes=734506366-783473471'}
线程的范围很奇怪,但我找不到哪里不对。
预计程序打印出的范围不按顺序,这取决于哪个线程首先 started 而不是取决于哪个线程 create第一。
您应该像这样更改您的代码:
target = open(self.path + self.filename, 'wb')
for num in range(self.thread_num):
blk = open('dl_block_' + str(num), 'rb')
target.write(blk.read())
blk.close()
os.remove('dl_block_' + str(num))
target.close()
此更改将在您第一次 运行 或从第二次 运行 重写目标文件时创建目标文件。
代码虽然有效,但看起来很丑陋。更多建议:
- 使用 threading 库来实现这个下载器
- 使用 barrier 等待所有工作完成而不是休眠 while-loop
blk_size = self.threads[num][1] - self.threads[num][0] + 1
- 使用
ls -l
检查文件大小