GOOGLE Analytics API 下一个请求需要时间

GOOGLE Analytics API taking time for next request

我正在尝试从 Google analytics Reporting API 获取数据,问题是第一个请求很快就获取了数据,但下一个请求大约需要 5 mins.Does这是因为我的分页代码发生在 while 循环中?

我的页面大小是“300”。我用这个尺寸来检查我的分页。

        from apiclient.discovery import build
        from time import sleep
        from oauth2client.service_account import ServiceAccountCredentials
        import pandas as pd

        SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
        KEY_FILE_LOCATION = 'analytcisproject.json'
        VIEW_ID = '182363141'

        #dd = [] 
        #for pag_index in range(0, 50): 
        #    dd.append(service.data().ga().get( ids='ga:', start_date='2018-04-01', end_date='2018-04-30', dimensions = "ga:date,ga:dimension2", metrics='ga:pageviews', start_index=str(pag_index*10000+1), max_results=str(pag_index*10000+10000)).execute()) 

        #print(dd)    


        def initialize_analyticsreporting():
            credentials = ServiceAccountCredentials.from_json_keyfile_name(KEY_FILE_LOCATION, SCOPES)
            analytics = build('analyticsreporting', 'v4', credentials=credentials)
            return analytics

        def get_report(analytics, pageTokenVariable):
            return analytics.reports().batchGet(
              body={
                'reportRequests': [
                {
                  'viewId': VIEW_ID,
                  'dateRanges': [{'startDate': '50daysAgo', 'endDate': 'today'}],
                  'metrics': [{'expression': 'ga:sessions'},{'expression': 'ga:users'},{'expression': 'ga:pageviews'}
                              ,{'expression': 'ga:sessionDuration'},{'expression': 'ga:timeOnPage'},{'expression': 'ga:pageLoadTime'},{'expression': 'ga:bounceRate'}],
                  'dimensions': [{'name': 'ga:date'},{'name': 'ga:browser'},
                                 {'name':'ga:city'},{'name':'ga:country'},{'name':'ga:medium'},
                                 {'name':'ga:continent'},{'name':'ga:landingScreenName'},{'name':'ga:pagePath'}
                                 ,{'name':'ga:userType'}],
                    "pageToken": pageTokenVariable,

                     "pageSize": "300",


                }]
              }
          ).execute()



        analytics = initialize_analyticsreporting()
        response = get_report(analytics, "0")

        for report in response.get('reports', []):
            j=0
            list = []
            pagetoken = report.get('nextPageToken', None)
            print(pagetoken)
            #------printing the pagetoken here returns `100,000` which is expected

            for report in response.get('reports', []):
                columnHeader = report.get('columnHeader', {})
                dimensionHeaders = columnHeader.get('dimensions', [])
                metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
                rows = report.get('data', {}).get('rows', [])
                for row in rows:
                    dict = {}
                    dimensions = row.get('dimensions', [])
                    dateRangeValues = row.get('metrics', [])
                    for header, dimension in zip(dimensionHeaders, dimensions):
                        dict[header] = dimension
                    for i, values in enumerate(dateRangeValues):
                        for metric, value in zip(metricHeaders, values.get('values')):
                            if ',' in value or '.' in value:
                                dict[metric.get('name')] = float(value)
                            else:
                                dict[metric.get('name')] = int(value)
                    list.append(dict)
                    j=j+1
                    print(j)
                    #print(response)
              # Append that data to a list as a dictionary

        # pagination function
            while   pagetoken != "" and pagetoken != None:  # This says while there is info in the nextPageToken get the data, process it and add to the list
                i=0
                response = get_report(analytics, pagetoken)
                #pagetoken = response['reports'][0]['nextPageToken']
                pagetoken = response['reports'][0].get('nextPageToken') 
                print(pagetoken)
                #------printing the pagetoken here returns `200,000` as is expected but the data being pulled is the same as for the first batch and so on. While in the loop the pagetoken is being incremented but it does not retrieve new data
                for row in rows:
                    dict = {}
                    dimensions = row.get('dimensions', [])
                    dateRangeValues = row.get('metrics', [])
                    for header, dimension in zip(dimensionHeaders, dimensions):
                        dict[header] = dimension
                    for i, values in enumerate(dateRangeValues):
                        for metric, value in zip(metricHeaders, values.get('values')):
                            if ',' in value or '.' in value:
                                dict[metric.get('name')] = float(value)
                            else:
                                dict[metric.get('name')] = int(value)
                    list.append(dict)
                    sleep(1)
                    i=i+1
                    print(i)

                    #df = pd.DataFrame(list)
                    if i==3:
                       break;

            print('afn')


        df = pd.DataFrame(list)
        #df.head()
        #response

抱歉,我不是 python 专家。 您的行循环中有一个 "sleep(1)",因此每行 1 秒 = 每行 300 秒(5 分钟)。这对我来说最有意义。

尝试移除行循环中的睡眠。

希望对您有所帮助。