BigTable & gRPC - 内存泄漏,如何解决?

BigTable & gRPC - Memory Leak, How to Resolve?

我在从 Cloud Functions 写入 BigTable 时遇到问题。

一台机器的内存消耗上升,所以我怀疑某处有内存泄漏。

BigTable 设置:

cbt createtable test
cbt createfamily test data

最小代码:

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "google_auth.json"

import datetime
import random
import sys
import os
import string

from google.cloud import bigtable
from google.api_core.retry import Retry

def insert_row(project_id, instance, table, row_key, column_family_id, data):
    client_bigtable = bigtable.Client(project=project_id, admin=True)

    # Generate an instance so that we can put our features into BigTable
    instance = client_bigtable.instance(instance)
    table = instance.table(table)

    # We need to keep a list of table changes into a list, then perform a mutation to update all changes.
    rows = []

    # Create a timestamp, this is required to set a cell
    time = datetime.datetime.utcnow()

    for key, value in data.items():
        row = table.row(row_key)
        # If we cannot convert to string, and this happens with unicode values, we need to encode the string
        # into unicode first
        row.set_cell(
            column_family_id=column_family_id.encode(),
            column=key.encode(),
            value=str(value).encode("unicode-escape"),
            timestamp=time
        )
        rows.append(row)
    table_mutation = table.mutate_rows(rows, retry=Retry(deadline=1200))

    # If the code is not 0, then there is an issue
    if any([True if i.code != 0 else False for i in table_mutation]):
        logging.error(f"Function Name: {insert_row.__qualname__}, Function Locals: {locals()}")
        raise RuntimeError("BigTable table mutation error")`
data = {
    ''.join(random.choices(string.ascii_uppercase + string.digits, k=15)):''.join(random.choices(string.ascii_uppercase + string.digits, k=15)) for _ in range(100)
}

while True:
    insert_row(
        project_id="user-tracking-228117",
        instance="analytics",
        table="test",
        row_key=f"{random.randint(0, sys.maxsize)}_{datetime.datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')}",
        column_family_id="data",
        data=data
    )

内存消耗:

> 34755 ubuntu    20   0 1118552 128968  21952 S   2.3  0.4   0:04.99 python3.6
> 34755 ubuntu    20   0 1118552 129844  21952 S   2.0  0.4   0:05.05 python3.6
> 34755 ubuntu    20   0 1118552 129968  21952 S   1.0  0.4   0:05.08 python3.6
> 34755 ubuntu    20   0 1118552 130780  21952 S   1.6  0.4   0:05.13 python3.6
> 34755 ubuntu    20   0 1119116 132212  21952 S   9.2  0.4   0:05.41 python3.6
> 34755 ubuntu    20   0 1119116 134008  21952 S   4.3  0.4   0:05.54 python3.6
> 34755 ubuntu    20   0 1119116 134944  21952 S   2.6  0.4   0:05.62 python3.6
> 34755 ubuntu    20   0 1119116 137924  21952 S   6.6  0.4   0:05.82 python3.6
> 34755 ubuntu    20   0 1119116 138536  21952 S   1.7  0.4   0:05.87 python3.6
> 34755 ubuntu    20   0 1119116 139988  21952 S   3.0  0.4   0:05.96 python3.6
> 34755 ubuntu    20   0 1119096 141768  21952 S  11.2  0.4   0:06.30 python3.6
> 34755 ubuntu    20   0 1119096 144140  21952 S   4.6  0.4   0:06.44 python3.6
> 34755 ubuntu    20   0 1119096 145140  21952 S   2.6  0.4   0:06.52 python3.6
> 34755 ubuntu    20   0 1119096 148648  21952 S   7.6  0.5   0:06.75 python3.6
> 34755 ubuntu    20   0 1119096 148920  21952 S   0.7  0.5   0:06.77 python3.6
> 34755 ubuntu    20   0 1119096 149828  21952 S   2.3  0.5   0:06.84 python3.6
> 34755 ubuntu    20   0 1119608 152796  21952 S  12.2  0.5   0:07.21 python3.6
> 34755 ubuntu    20   0 1119608 154692  21952 S   4.6  0.5   0:07.35 python3.6
> 34755 ubuntu    20   0 1119608 155448  21952 S   1.6  0.5   0:07.40 python3.6
> 34755 ubuntu    20   0 1119608 156656  21952 S   2.6  0.5   0:07.48 python3.6
> 34755 ubuntu    20   0 1119608 157724  21952 S   2.6  0.5   0:07.56 python3.6
> 34755 ubuntu    20   0 1119608 160796  21952 S   6.9  0.5   0:07.77 python3.6
> 34755 ubuntu    20   0 1119608 161616  21952 S   1.7  0.5   0:07.82 python3.6
> 34755 ubuntu    20   0 1185468 163896  21952 S  12.8  0.5   0:08.21 python3.6
> 34755 ubuntu    20   0 1185476 167612  21952 S   7.6  0.5   0:08.44 python3.6
> 34755 ubuntu    20   0 1185476 170552  21952 S   6.6  0.5   0:08.64 python3.6
> 34755 ubuntu    20   0 1185804 173848  21952 S  13.5  0.5   0:09.05 python3.6
> 34755 ubuntu    20   0 1185804 173848  21952 S   0.3  0.5   0:09.06 python3.6
> 34755 ubuntu    20   0 1185804 176724  21952 S   6.6  0.5   0:09.26 python3.6
> 34755 ubuntu    20   0 1185804 180588  21952 S   8.5  0.6   0:09.52 python3.6
> 34755 ubuntu    20   0 1185804 181524  21952 S   2.3  0.6   0:09.59 python3.6
> 34755 ubuntu    20   0 1186116 183228  21952 S  12.5  0.6   0:09.97 python3.6
> 34755 ubuntu    20   0 1186116 183228  21952 S   0.7  0.6   0:09.99 python3.6
> 34755 ubuntu    20   0 1186116 183276  21952 S   0.3  0.6   0:10.00 python3.6
> 34755 ubuntu    20   0 1186116 183296  21952 S   0.3  0.6   0:10.01 python3.6
> 34755 ubuntu    20   0 1186116 183332  21952 S   0.3  0.6   0:10.02 python3.6
> 34755 ubuntu    20   0 1186116 183436  21952 S   0.7  0.6   0:10.04 python3.6
> 34755 ubuntu    20   0 1186116 183516  21952 S   0.7  0.6   0:10.06 python3.6
> 34755 ubuntu    20   0 1186116 183616  21952 S   0.3  0.6   0:10.07 python3.6
> 34755 ubuntu    20   0 1186116 183656  21952 S   0.3  0.6   0:10.08 python3.6
> 34755 ubuntu    20   0 1186116 183656  21952 S   0.3  0.6   0:10.09 python3.6
> 34755 ubuntu    20   0 1186116 183680  21952 S   0.3  0.6   0:10.10 python3.6
> 34755 ubuntu    20   0 1186116 183708  21952 S   0.3  0.6   0:10.11 python3.6
> 34755 ubuntu    20   0 1186116 183728  21952 S   0.3  0.6   0:10.12 python3.6

内存:128968 ==> 183728

过了 运行 一段时间后,我 运行 遇到了一个错误:

---------------------------------------------------------------------------
_Rendezvous                               Traceback (most recent call last)
~/python3_virtualenv/python3_env/lib/python3.6/site-packages/google/api_core/grpc_helpers.py in next(self)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/grpc/_channel.py in __next__(self)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/grpc/_channel.py in _next(self)

_Rendezvous: <_Rendezvous of RPC that terminated with:
    status = StatusCode.UNAVAILABLE
    details = "Getting metadata from plugin failed with error: [('system library', 'fopen', 'Too many open files'), ('BIO routines', 'BIO_new_file', 'system lib'), ('x509 certificate routines', 'X509_load_cert_crl_file', 'system lib')]"
    debug_error_string = "{"created":"@1548443075.831642536","description":"Getting metadata from plugin failed with error: [('system library', 'fopen', 'Too many open files'), ('BIO routines', 'BIO_new_file', 'system lib'), ('x509 certificate routines', 'X509_load_cert_crl_file', 'system lib')]","file":"src/core/lib/security/credentials/plugin/plugin_credentials.cc","file_line":82,"grpc_status":14}"
>

The above exception was the direct cause of the following exception:

ServiceUnavailable                        Traceback (most recent call last)
<ipython-input-3-9f1f73192607> in <module>
     12         row_key=f"{random.randint(0, sys.maxsize)}_{datetime.datetime.utcnow().strftime('%Y_%m_%d_%H_%M_%S_%f')}",
     13         column_family_id="data",
---> 14         data=data
     15     )
     16 

<ipython-input-1-e79f91f922f9> in insert_row(project_id, instance, table, row_key, column_family_id, data)
     34         )
     35         rows.append(row)
---> 36     table_mutation = table.mutate_rows(rows, retry=Retry(deadline=1200))
     37 
     38     # If the code is not 0, then there is an issue

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/google/cloud/bigtable/table.py in mutate_rows(self, rows, retry)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/google/cloud/bigtable/table.py in __call__(self, retry)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/google/api_core/retry.py in retry_wrapped_func(*args, **kwargs)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/google/api_core/retry.py in retry_target(target, predicate, sleep_generator, deadline, on_error)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/google/cloud/bigtable/table.py in _do_mutate_retryable_rows(self)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/google/api_core/grpc_helpers.py in next(self)

~/python3_virtualenv/python3_env/lib/python3.6/site-packages/six.py in raise_from(value, from_value)

ServiceUnavailable: 503 Getting metadata from plugin failed with error: [('system library', 'fopen', 'Too many open files'), ('BIO routines', 'BIO_new_file', 'system lib'), ('x509 certificate routines', 'X509_load_cert_crl_file', 'system lib')]

我认为是 gRPC 客户端的问题。

Cloud Functions 的一个问题是所有对象都不是持久化的,所以每次写入请求都会打开客户端,但是由于 gRPC 客户端没有释放,内存消耗仍然在该实例下使用.但是,由于gRPC客户端没有关闭,请问有没有办法强行关闭BigTable下使用的gRPC客户端?

您应该可以使用以下代码关闭客户端。它是在 pull request https://github.com/googleapis/google-cloud-python/pull/6630.

中引入的
client.table_data_client.transport.channel.close()