Ping pandas 数据帧中的主机

Ping hosts in pandas dataframes

我有一个包含 15,000 个主机 IP 地址(IP v4 和 IP v6)的数据框,我正在尝试通过对主机执行 ping 操作来检查其中哪些主机已启动(运行ning)。

我编写了以下代码

def ping_host(hostname):
    # hostname = "10.0.0.10 #example
    print(hostname)
    if hostname != hostname:
        return "No Hostname"
    response = os.system("ping -c 1 " + hostname + " > /dev/null 2>&1")
    # and then check the response...
    if response == 0:
        print(hostname, 'is up!')
        return "Up"
    else:
        print(hostname, 'is down!')
        return "Down"
df['Host_Status'] = df['IP Addresses'].apply(ping_host)

这需要很长时间才能完成。有 better/faster 的方法吗?

我试过了 -

df['Host_Status'] = df['IP Addresses'].swifter.apply(ping_host)

但即便如此,速度也没有提高多少。

编辑 1-

我让这个 运行 使用 32/64/256 个线程使用多线程 5 小时(我认为在任何给定点只有 32 个线程在工作)但是在脚本结束时

from multiprocessing.pool import ThreadPool

def ping_host(hostname):
    # hostname = "10.0.0.10 #example
    print(hostname)
    if hostname != hostname:
        return "No Hostname"
    response = os.system("ping -c 1 " + hostname + " > /dev/null 2>&1")
    # and then check the response...
    if response == 0:
        print(hostname, 'is up!')
        return "Up"
    else:
        print(hostname, 'is down!')
        return "Down"

# you can fiddle with pool size
with ThreadPool(processes=32) as t:
    df['Host_Status'] = t.map(ping_host, df['IP Addresses'])

df.to_excel('output.xlsx')

当我尝试导出它时出现以下错误 -

Exception in thread Thread-1:
Traceback (most recent call last):
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 64, in _read_forever
    self._read_and_dispatch_next_frame()
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 37, in _read_and_dispatch_next_frame
    direction, frame = self._read_frame()
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 45, in _read_frame
    buff = readall(self._socket.recv, 4)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_io.py", line 110, in readall
    chunk = read_fn(sz - have)
TimeoutError: [Errno 60] Operation timed out
Traceback (most recent call last):
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_code_executor.py", line 112, in add_exec
    self.finish_exec(more)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_bundle/pydev_console_utils.py", line 210, in finish_exec
    return server.notifyFinished(more)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/pydev/_pydev_comm/pydev_transport.py", line 226, in _req
    return super(TSyncClient, self)._req(_api, *args, **kwargs)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/thrift.py", line 160, in _req
    return self._recv(_api)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/thrift.py", line 172, in _recv
    fname, mtype, rseqid = self._iprot.read_message_begin()
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/protocol/binary.py", line 372, in read_message_begin
    self.trans, strict=self.strict_read)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/protocol/binary.py", line 164, in read_message_begin
    sz = unpack_i32(inbuf.read(4))
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/transport/__init__.py", line 32, in read
    return readall(self._read, sz)
  File "/Applications/PyCharm.app/Contents/plugins/python/helpers/third_party/thriftpy/_shaded_thriftpy/transport/__init__.py", line 20, in readall
    "End of file reading from transport")
_shaded_thriftpy.transport.TTransportException: TTransportException(type=4, message='End of file reading from transport')

尝试 2 Asyncio-

阅读数据框后我尝试了 -

async def async_ping(host):
    proc = await asyncio.create_subprocess_shell(
        f"/sbin/ping -c 1 {host} > /dev/null 2>&1",
        stdout=asyncio.subprocess.PIPE,
        stderr=asyncio.subprocess.PIPE
    )
    status = await proc.wait()

    if status == 0:
        return 'Alive'
    else:
        return 'Timeout'


async def async_main(hosts):
    tasks1 = deque()
    for host in hosts:
        tasks1.append(asyncio.create_task(async_ping(host)))

    return (t1 for t1 in await asyncio.gather(*tasks1))

start = time.perf_counter()

loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
resp = loop.run_until_complete(async_main(df['IP Addresses'].to_list()))
loop.close()

finish = time.perf_counter()

df['Status'] = list(resp)
print(df)
print(f'Runtime: {round(finish-start,4)} seconds')

我运行进入阻塞错误-

Traceback (most recent call last):
  File "~path/venv/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-2646fe9bd357>", line 26, in <module>
    resp = loop.run_until_complete(async_main(df['IP Addresses'].to_list()))
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 587, in run_until_complete
    return future.result()
  File "<ipython-input-6-2646fe9bd357>", line 20, in async_main
    return (t1 for t1 in await asyncio.gather(*tasks1))
  File "<ipython-input-6-2646fe9bd357>", line 5, in async_ping
    stderr=asyncio.subprocess.PIPE
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/subprocess.py", line 202, in create_subprocess_shell
    stderr=stderr, **kwds)
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_events.py", line 1514, in subprocess_shell
    protocol, cmd, True, stdin, stdout, stderr, bufsize, **kwargs)
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/unix_events.py", line 190, in _make_subprocess_transport
    **kwargs)
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/base_subprocess.py", line 37, in __init__
    stderr=stderr, bufsize=bufsize, **kwargs)
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/asyncio/unix_events.py", line 775, in _start
    universal_newlines=False, bufsize=bufsize, **kwargs)
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/subprocess.py", line 800, in __init__
    restore_signals, start_new_session)
  File "/usr/local/Cellar/python/3.7.7/Frameworks/Python.framework/Versions/3.7/lib/python3.7/subprocess.py", line 1482, in _execute_child
    restore_signals, start_new_session, preexec_fn)
BlockingIOError: [Errno 35] Resource temporarily unavailable

你可以用线程池来做。

已更新 参数以在 1 秒后超时 ping,块大小为 1,更多线程并跳过中间 shell - 适用于我本地的少量 IP 地址机器复制到 14000 总数。等待时间和块大小可能是最重要的。默认情况下,ping 在 10 秒后超时,线程池在返回主线程之前运行 14000/threadcount ping。考虑到测试此代码时超时,更新应该是一个很好的改进。

from multiprocessing.pool import ThreadPool
import subprocess as subp

def ping_host(hostname):
    # hostname = "10.0.0.10 #example
    print(hostname)
    if hostname != hostname:
        return "No Hostname"
    response = subp.run(["ping","-c", "1", "-W", "1", hostname], stdout=subp.DEVNULL,
        stderr=subp.DEVNULL).returncode
    # and then check the response...
    if response == 0:
        print(hostname, 'is up!')
        return "Up"
    else:
        print(hostname, 'is down!')
        return "Down"

# you can fiddle with pool size

with ThreadPool(processes=128) as t:
    df['Host_Status'] = t.map(ping_host, df['IP Addresses'],
        chunksize=1)

当我看到这个问题时,我想尝试做一些使用 asyncio 模块来同时 运行 ping 的东西。下面的脚本 运行 在大约 7 秒内完成了我的测试 IP 地址。当我 运行 同步测试相同的 IP 地址列表时,它需要大约 127 秒。我使用了 python 版本 3.8.2Windows 10 OS。也许它对你有用。

import asyncio
import time
from collections import deque

import pandas as pd


async def async_ping(host, semaphore):
    async with semaphore:
        for _ in range(5):
            proc = await asyncio.create_subprocess_shell(
                f'C:\Windows\System32\ping {host} -n 1 -w 1 -l 1',
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )
            status = await proc.wait()
            if status == 0:
                return 'Alive'

        return 'Timeout'


async def async_main(hosts, limit):
    semaphore = asyncio.Semaphore(limit)
    tasks1 = deque()
    for host in hosts:
        tasks1.append(asyncio.create_task(
            async_ping(host, semaphore))
        )
    return (t1 for t1 in await asyncio.gather(*tasks1))


host_df = pd.read_csv('ping_ip_dest.csv')

# set concurrent task limit
limit = 256

start = time.perf_counter()

loop = asyncio.get_event_loop()
asyncio.set_event_loop(loop)
resp = loop.run_until_complete(async_main(host_df['IP'].to_list(), limit))
loop.close()

finish = time.perf_counter()

host_df['Status'] = list(resp)
print(host_df)
print(f'Runtime: {round(finish-start,4)} seconds')

OUTPUT:
0       N. Virginia (Virginia, USA)     23.235.60.92    Alive
1               Dallas (Texas, USA)    69.162.81.155    Alive
2            Denver (Colorado, USA)   192.199.248.75    Alive
3              Miami (Florida, USA)  162.254.206.227    Alive
4      Minneapolis (Minnesota, USA)  207.250.234.100    Alive
5                 Montreal (Canada)  184.107.126.165    Alive
6          New York (New York, USA)    206.71.50.230    Alive
7   San Francisco (California, USA)      65.49.22.66    Alive
8         Seattle (Washington, USA)       23.81.0.59    Alive
9     Washington DC (Virginia, USA)    207.228.238.7    Alive
10         Buenos Aires (Argentina)     131.255.7.26    Alive
11          Amsterdam (Netherlands)   95.142.107.181    Alive
12             Copenhagen (Denmark)   185.206.224.67    Alive
13              Frankfurt (Germany)  195.201.213.247    Alive
14          London (United Kingdom)    5.152.197.179    Alive
15                   Madrid (Spain)    195.12.50.155    Alive
16                   Paris (France)    51.158.22.211    Alive
17                  Warsaw (Poland)   46.248.187.100    Alive
18      Johannesburg (South Africa)   197.221.23.194    Alive
19                  Beijing (China)    47.94.129.116    Alive
20                Hong Kong (China)     103.1.14.238    Alive
21                   Mumbai (India)   103.120.178.71    Alive
22                 Shanghai (China)   106.14.156.213    Alive
23                    Tokyo (Japan)     110.50.243.6    Alive
24                         Brisbane   223.252.19.130    Alive
25                           Sydney      101.0.86.43    Alive
26                Tel-Aviv (Israel)   185.229.226.83    Alive
27                             Test    47.94.129.115  Timeout
Runtime: 3.1945 seconds