这个 python pip install script 是正版还是木马?

Is this python pip install script legitmate or a trojan horse?

我正在按照此处的教程安装 python:http://docs.python-guide.org/en/latest/starting/install/win/

当涉及到安装设置工具的部分时,将其 links 到我下载的 ez_setup.py 脚本和 运行。它似乎安装了 setup_tools。然后我点击 link 到 pip-get.py 并只注意到它 包含以下内容:

#!/usr/bin/env python
import sys
def main():
    sys.exit(
        "You're using an outdated location for the get-pip.py script, please "
        "use the one available from https://bootstrap.pypa.io/get-pip.py"
    )
if __name__ == "__main__":
    main()

引用的 url 包含以下看似可疑的代码。这是恶意代码还是安装 pip 的合法方式?

注意:我已经删除了 "binary blob" 的大部分行(例如 18000 行),因为如果它是恶意的,我不希望这个 post 成为感染媒介。

#!/usr/bin/env python
#
# Hi There!
# You may be wondering what this giant blob of binary data here is, you might
# even be worried that we're up to something nefarious (good for you for being
# paranoid!). This is a base85 encoding of a zip file, this zip file contains
# an entire copy of pip.
#
# Pip is a thing that installs packages, pip itself is a package that someone
# might want to install, especially if they're looking to run this get-pip.py
# script. Pip has a lot of code to deal with the security of installing
# packages, various edge cases on various platforms, and other such sort of
# "tribal knowledge" that has been encoded in its code base. Because of this
# we basically include an entire copy of pip inside this blob. We do this
# because the alternatives are attempt to implement a "minipip" that probably
# doesn't do things correctly and has weird edge cases, or compress pip itself
# down into a single file.
#
# If you're wondering how this is created, it is using an invoke task located
# in tasks/generate.py called "installer". It can be invoked by using
# ``invoke generate.installer``.

import os.path
import pkgutil
import shutil
import sys
import struct
import tempfile

# Useful for very coarse version differentiation.
PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3

if PY3:
    iterbytes = iter
else:
    def iterbytes(buf):
        return (ord(byte) for byte in buf)

try:
    from base64 import b85decode
except ImportError:
    _b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                    b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")

    def b85decode(b):
        _b85dec = [None] * 256
        for i, c in enumerate(iterbytes(_b85alphabet)):
            _b85dec[c] = i

        padding = (-len(b)) % 5
        b = b + b'~' * padding
        out = []
        packI = struct.Struct('!I').pack
        for i in range(0, len(b), 5):
            chunk = b[i:i + 5]
            acc = 0
            try:
                for c in iterbytes(chunk):
                    acc = acc * 85 + _b85dec[c]
            except TypeError:
                for j, c in enumerate(iterbytes(chunk)):
                    if _b85dec[c] is None:
                        raise ValueError(
                            'bad base85 character at position %d' % (i + j)
                        )
                raise
            try:
                out.append(packI(acc))
            except struct.error:
                raise ValueError('base85 overflow in hunk starting at byte %d'
                                 % i)

        result = b''.join(out)
        if padding:
            result = result[:-padding]
        return result


def bootstrap(tmpdir=None):
    # Import pip so we can use it to install pip and maybe setuptools too
    import pip
    from pip.commands.install import InstallCommand
    from pip.req import InstallRequirement

    # Wrapper to provide default certificate with the lowest priority
    class CertInstallCommand(InstallCommand):
        def parse_args(self, args):
            # If cert isn't specified in config or environment, we provide our
            # own certificate through defaults.
            # This allows user to specify custom cert anywhere one likes:
            # config, environment variable or argv.
            if not self.parser.get_default_values().cert:
                self.parser.defaults["cert"] = cert_path  # calculated below
            return super(CertInstallCommand, self).parse_args(args)

    pip.commands_dict["install"] = CertInstallCommand

    implicit_pip = True
    implicit_setuptools = True
    implicit_wheel = True

    # Check if the user has requested us not to install setuptools
    if "--no-setuptools" in sys.argv or os.environ.get("PIP_NO_SETUPTOOLS"):
        args = [x for x in sys.argv[1:] if x != "--no-setuptools"]
        implicit_setuptools = False
    else:
        args = sys.argv[1:]

    # Check if the user has requested us not to install wheel
    if "--no-wheel" in args or os.environ.get("PIP_NO_WHEEL"):
        args = [x for x in args if x != "--no-wheel"]
        implicit_wheel = False

    # We only want to implicitly install setuptools and wheel if they don't
    # already exist on the target platform.
    if implicit_setuptools:
        try:
            import setuptools  # noqa
            implicit_setuptools = False
        except ImportError:
            pass
    if implicit_wheel:
        try:
            import wheel  # noqa
            implicit_wheel = False
        except ImportError:
            pass

    # We want to support people passing things like 'pip<8' to get-pip.py which
    # will let them install a specific version. However because of the dreaded
    # DoubleRequirement error if any of the args look like they might be a
    # specific for one of our packages, then we'll turn off the implicit
    # install of them.
    for arg in args:
        try:
            req = InstallRequirement.from_line(arg)
        except:
            continue

        if implicit_pip and req.name == "pip":
            implicit_pip = False
        elif implicit_setuptools and req.name == "setuptools":
            implicit_setuptools = False
        elif implicit_wheel and req.name == "wheel":
            implicit_wheel = False

    # Add any implicit installations to the end of our args
    if implicit_pip:
        args += ["pip"]
    if implicit_setuptools:
        args += ["setuptools"]
    if implicit_wheel:
        args += ["wheel"]

    delete_tmpdir = False
    try:
        # Create a temporary directory to act as a working directory if we were
        # not given one.
        if tmpdir is None:
            tmpdir = tempfile.mkdtemp()
            delete_tmpdir = True

        # We need to extract the SSL certificates from requests so that they
        # can be passed to --cert
        cert_path = os.path.join(tmpdir, "cacert.pem")
        with open(cert_path, "wb") as cert:
            cert.write(pkgutil.get_data("pip._vendor.requests", "cacert.pem"))

        # Execute the included pip and use it to install the latest pip and
        # setuptools from PyPI
        sys.exit(pip.main(["install", "--upgrade"] + args))
    finally:
        # Remove our temporary directory
        if delete_tmpdir and tmpdir:
            shutil.rmtree(tmpdir, ignore_errors=True)


def main():
    tmpdir = None
    try:
        # Create a temporary working directory
        tmpdir = tempfile.mkdtemp()

        # Unpack the zipfile into the temporary directory
        pip_zip = os.path.join(tmpdir, "pip.zip")
        with open(pip_zip, "wb") as fp:
            fp.write(b85decode(DATA.replace(b"\n", b"")))

        # Add the zipfile to sys.path so that we can import it
        sys.path.insert(0, pip_zip)

        # Run the bootstrap
        bootstrap(tmpdir=tmpdir)
    finally:
        # Clean up our temporary working directory
        if tmpdir:
            shutil.rmtree(tmpdir, ignore_errors=True)


DATA = b"""
P)h>@6aWAK2mly|Wk_|XSV~_F001E<000jF003}la4%n9X>MtBUtcb8d7WDSZ`-yK|J{ED>nxD8%G$E
w;SJgIu%S({0^J&<?b`!VLy#@n<|0cPDLHYs{qOJYNQ#stXW2BYmPFnc-j@&WsGL3fqE+&Xr6|AP<(}
1tW?Pk$wXAk5P1kMHN}i@n?CMH3EL*CoXd9mD=gGvpFRIN(lpFh4sqU_B>P#wbpYJnS!bH_kszWzd@`
H<yy>jp@sr7ZwDlGf&a-YEanKd3T&PZSB8xIW}4#deBf88!4R+dd5;5p$#pOUa^5aT0Nk+BIptF)mwO
xjJjFJ?H+W&B2{TtylTo<j0GFr)fNUDQIC5u!#EK|<945pLJg)3w2@^h6n(oAv4gUo>xXfT^H*v|rV`
Qf>&QoJEizcI-q^mp#kf>pM6NX_ZErFw2hh^uk2n|nM{8?fo*PBWjxO8U$uZxv0l}Dh+oJ9x(83VFs)
2<8d2{Jx`7Y(rd<REF6I*yg7!+JBfyDphOs8@@)t|QEJSw*YDBeZyaWmzU6csB~+(~1M_TE}r6vxIiR
?Ik89#CMk(g<2|&Lyn3;TdKH*QW3HbSiA0U_j??u<5;{jUXYm#(!?cp#E2HYewxW0s;v~`5sv}w`DLA
iCeGrx?
)H
"""


if __name__ == "__main__":
    main()

如果您谈论的是来自 https://bootstrap.pypa.io/get-pip.py 的脚本,那是完全安全的*。

其实你可以按照评论中的说明,自己解码编码后的文字,看看完全没问题。

如果你特别偏执,只需将其复制到气隙计算机上并尝试执行它。

*假设您的连接实际上是安全的并且 pypa.io 没有受到损害。但如果是这样的话,那么你可能有更大的问题。