如何将数据附加到 pandas 数据框?

How to append data to a pandas dataframe?

我有一个相当复杂的函数序列调用 api 并将结果集附加到数据帧 - 问题是当我在每个附加循环中打印数据帧时,我看到了新值但最后当循环中断,我只看到 final_df 的值是多少?有什么想法吗?

df = pd.DataFrame(columns = ['repo', 'number', 'title', 'branch', 'merged_at', 'created_at', 'authored_by', 'merged_by', 'from_version', 'to_version'] )

  
def get_prs(repo,pr_number):
    response = requests.request("GET", pgv.github_pr_url + str(repo) + '/pulls/' + str(pr_number), headers=pgv.headers)
    response = response.json()
    return response        
def get_commits(repo,from_version,to_version):
    response = requests.request("GET", pgv.github_commits_url + str(repo) +'/compare/' + str(from_version) + '...' + str(to_version) , headers=pgv.headers)
    response = response.json()
    # print(len(response['commits']))
    # print(response['commits'])
    for i in range(0,len(response['commits'])):
        # print(response['commits'][i])
        # x = re.match(r"\AMerge pull request #(?P<number>\d+) from/(?P<branch>(.+)\s*$)", response['commits'][i].get('commit').get('message'))
        x = re.search("\AMerge pull request #(?P<number>\d+) from/(?P<branch>.*)", response['commits'][i].get('commit').get('message'))
        # print(x)
        if x is None:
            pass
        else:
            # return re.search("(\d+)",x.group(0)).group(0), response['commits'][i].get('branches_url')
          return x.group('number'), x.group('branch')
        
        #   print(x.group('branch'))
#query GitHub to get all commits between from_version and to_version.
def return_deploy_events():
    final_object = []
    response = requests.request('POST',pgv.url, params = {'api_key' : pgv.key}, json = pgv.query_params)
    response = response.json()
    if "jobs" in response:
        time.sleep(5)
    else:
        for i in range(0,len(response['query_result']['data']['rows'])):
            # print(response['query_result']['data']['rows'])
            # get_prs(response['query_result']['data']['rows'][i].get('REPO'),get_commits(response['query_result']['data']['rows'][i].get('REPO'),response['query_result']['data']['rows'][i].get('FROM_VERSION'), response['query_result']['data']['rows'][i].get('TO_VERSION'))).get('merged_at')
            try:
                
                repo = response['query_result']['data']['rows'][i].get('REPO')  
                from_version = response['query_result']['data']['rows'][i].get('FROM_VERSION')
                to_version = response['query_result']['data']['rows'][i].get('TO_VERSION') 
                # print(get_prs(repo,get_commits(repo,from_version, to_version)))
                
                pull_requests = get_prs(repo,get_commits(repo,from_version, to_version)[0]) 
                ##pack into all one return
                final_df = df.append({
                'repo':repo, 
                'title': pull_requests.get('title'), 
                'branch': get_commits(repo,from_version, to_version)[1], 
                'created_at': pull_requests.get('created_at'),
                'merged_at': pull_requests.get('merged_at'),
                'authored_by': pull_requests.get('user').get('login'), 
                'merged_by': pull_requests.get('merged_by').get('login'),
                'number': get_commits(repo,from_version, to_version)[0],
                'from_version': from_version,
                'to_version': to_version,}, ignore_index = True)  
                # print(get_commits(repo,from_version, to_version))
**HERE, WHEN UNCOMMENTED, PRINTS ALL RECORDS I WANT APPENDED **
                # print(final_df.head(10))
            except Exception:
                pass
                
            # 'title':, 'branch', 
            # 'merged_at', 'created_at', 'authored_by', 'merged_by', 
            
            # 'from_version': response['query_result']['data']['rows'][i].get('FROM_VERSION'), 'to_version':response['query_result']['data']['rows'][i].get('TO_VERSION')}, 
                # ignore_index = True)
**BELOW IS WHERE IT PRINTS ONLY 1 RECORD **
    print(final_df)
    # final_df = json.loads(final_df.to_json(orient = 'records'))
    # gec.json_to_s3(final_df, glob_common_vars.s3_resource,glob_common_vars.s3_bucket_name, 'test/test.json.gzip')
    
return_deploy_events()

我认为问题在于,您将每一行分配给同一个变量。 所以最后一行将在最后打印。所以尝试将每一行附加到结果列表。

def return_deploy_events():
    final_object = []
    result = []
    response = requests.request('POST',pgv.url, params = {'api_key' : pgv.key}, json = pgv.query_params)
    response = response.json()
    if "jobs" in response:
        time.sleep(5)
    else:
        for i in range(0,len(response['query_result']['data']['rows'])):
            # print(response['query_result']['data']['rows'])
            # get_prs(response['query_result']['data']['rows'][i].get('REPO'),get_commits(response['query_result']['data']['rows'][i].get('REPO'),response['query_result']['data']['rows'][i].get('FROM_VERSION'), response['query_result']['data']['rows'][i].get('TO_VERSION'))).get('merged_at')
            try:
                
                repo = response['query_result']['data']['rows'][i].get('REPO')  
                from_version = response['query_result']['data']['rows'][i].get('FROM_VERSION')
                to_version = response['query_result']['data']['rows'][i].get('TO_VERSION') 
                # print(get_prs(repo,get_commits(repo,from_version, to_version)))
                
                pull_requests = get_prs(repo,get_commits(repo,from_version, to_version)[0]) 
                ##pack into all one return
                final_df = df.append({
                'repo':repo, 
                'title': pull_requests.get('title'), 
                'branch': get_commits(repo,from_version, to_version)[1], 
                'created_at': pull_requests.get('created_at'),
                'merged_at': pull_requests.get('merged_at'),
                'authored_by': pull_requests.get('user').get('login'), 
                'merged_by': pull_requests.get('merged_by').get('login'),
                'number': get_commits(repo,from_version, to_version)[0],
                'from_version': from_version,
                'to_version': to_version,}, ignore_index = True)  
                # print(get_commits(repo,from_version, to_version))
**HERE, WHEN UNCOMMENTED, PRINTS ALL RECORDS I WANT APPENDED **
                # print(final_df.head(10))
                result.append(final_df)  # append the current row to result
            except Exception:
                pass
                
**BELOW IS WHERE IT PRINTS ONLY 1 RECORD **
    print(result) # print the final result
    

我刚刚添加了两行代码,但我希望它有效。