Flurry CSV 下载 (API) 事件计数不匹配
Flurry CSV Download (API) Event Count Mismatch
我正在使用 API 测试我的应用程序事件的 CSV 下载。
我注意到 CSV 对同一时间段的不同呼叫有不同的事件计数。
所有数据(每次下载)对于我的应用程序和请求的时间段都是正确的。
有谁知道他们是否对数据进行采样以创建下载文件?
已编辑以包含样本调用、提取代码和同一时间段内 2 次调用的结果。
致电
str_init = '20191101'
str_end = '20191102'
# Call data extraction for Flurry from IOS app
get_csv_from_flurry(str_init, str_end, 'IOS')
提取代码
from datetime import datetime
from dateutil import parser
import requests
import json
import time
from functions.ribon_path import ribon_root_path_join
from functions.ribon_s3_integration import ribon_upload_to_s3
"""
Make CSV extraction from flurry based on initial date (yyyy-mm-dd), end date (yyyy-mm-dd) and platform
Save Uncompressed CSV locally for processing
Save compressed file (parquet) to S3 for backup
"""
def get_csv_from_flurry(str_ini, str_end, str_platform):
# Convert time period to datetime format
dt_ini = parser.parse(str_ini)
dt_end = parser.parse(str_end)
def unix_time_millis(dt):
# Convert date periods to unix milisecon epoch
epoch = datetime.utcfromtimestamp(0)
return (dt - epoch).total_seconds() * 1000.0
epoch_ini = unix_time_millis(dt_ini)
epoch_end = unix_time_millis(dt_end)
#print(epoch_ini)
#print(epoch_fim)
if str_platform == 'IOS' :
Flurry_apiKey = 'XXX'
else :
Flurry_apiKey = 'YYY'
# Build the parameters of the post request to the flurry API
url = 'https://rawdata.flurry.com/pulse/v1/rawData'
payload = {"data": {
"type":"rawData",
"attributes":{
"startTime": epoch_ini,
"endTime": epoch_end,
"outputFormat": "CSV",
"apiKey": Flurry_apiKey
}
}
}
headers = {"accept": "application/vnd.api+json",
"authorization": "Bearer ZZZ",
"cache-control": "no-cache",
"content-type": "application/vnd.api+json"
}
#print(payload)
# Make the request
print('Make Request to Flurry')
r = requests.post(url, data=json.dumps(payload), headers=headers)
#print(r.content)
# Test the return, get the status, download url and request id
test = r.json()
#print(teste['data']['attributes']['s3URI'])
#print(teste['data']['id'])
r_s3URI = test['data']['attributes']['s3URI']
r_id = test['data']['id']
# Check if the download link is ready
url = 'https://rawdata.flurry.com/pulse/v1/rawData/' + r_id + '?fields[rawData]=requestStatus,s3URI'
#print(url)
payload = {}
headers = {"accept": "application/vnd.api+json",
"authorization": "Bearer ZZZ",
"cache-control": "no-cache",
"content-type": "application/vnd.api+json"
}
print('Request OK')
# Check each minute if the download link is ready
print('Start Pooling to Check if the File is Ready for Download')
while r_s3URI == None:
time.sleep(60)
# Make the request
r = requests.get(url, data=json.dumps(payload), headers=headers)
print(r.content)
test = r.json()
#print(test['data']['attributes']['s3URI'])
r_s3URI = test['data']['attributes']['s3URI']
# When the download is ready, get the file and save
# Set local folder to save file
flurry_filename = str_ini + '_' + str_end + '_' + str_platform + '.csv.gz'
flurry_path_gz = ribon_root_path_join('data', 'Flurry_Download', flurry_filename)
# Download the file
print('Start Flurry Download')
myfile = requests.get(r_s3URI)
open(flurry_path_gz, 'wb').write(myfile.content)
On the link there is an image with the 2 files I got, they are not the same size and don't have the same number of records
在 Flurry Support 的帮助下,我找出了不同之处。
对于超过 15 天的 API 下载,API 调用每次都提供相同的数字。
API 调用最多 15 天的日期大多数时候会得到不同的结果(较新的调用有更多的记录)。呼叫时间越长,差异越小,所以我同意支持,这可以解释为延迟到达的事件。
Flurry 不在线,它通过在移动设备上排队数据并将其转储到服务器来工作。
我正在使用 API 测试我的应用程序事件的 CSV 下载。 我注意到 CSV 对同一时间段的不同呼叫有不同的事件计数。 所有数据(每次下载)对于我的应用程序和请求的时间段都是正确的。 有谁知道他们是否对数据进行采样以创建下载文件?
已编辑以包含样本调用、提取代码和同一时间段内 2 次调用的结果。
致电
str_init = '20191101'
str_end = '20191102'
# Call data extraction for Flurry from IOS app
get_csv_from_flurry(str_init, str_end, 'IOS')
提取代码
from datetime import datetime
from dateutil import parser
import requests
import json
import time
from functions.ribon_path import ribon_root_path_join
from functions.ribon_s3_integration import ribon_upload_to_s3
"""
Make CSV extraction from flurry based on initial date (yyyy-mm-dd), end date (yyyy-mm-dd) and platform
Save Uncompressed CSV locally for processing
Save compressed file (parquet) to S3 for backup
"""
def get_csv_from_flurry(str_ini, str_end, str_platform):
# Convert time period to datetime format
dt_ini = parser.parse(str_ini)
dt_end = parser.parse(str_end)
def unix_time_millis(dt):
# Convert date periods to unix milisecon epoch
epoch = datetime.utcfromtimestamp(0)
return (dt - epoch).total_seconds() * 1000.0
epoch_ini = unix_time_millis(dt_ini)
epoch_end = unix_time_millis(dt_end)
#print(epoch_ini)
#print(epoch_fim)
if str_platform == 'IOS' :
Flurry_apiKey = 'XXX'
else :
Flurry_apiKey = 'YYY'
# Build the parameters of the post request to the flurry API
url = 'https://rawdata.flurry.com/pulse/v1/rawData'
payload = {"data": {
"type":"rawData",
"attributes":{
"startTime": epoch_ini,
"endTime": epoch_end,
"outputFormat": "CSV",
"apiKey": Flurry_apiKey
}
}
}
headers = {"accept": "application/vnd.api+json",
"authorization": "Bearer ZZZ",
"cache-control": "no-cache",
"content-type": "application/vnd.api+json"
}
#print(payload)
# Make the request
print('Make Request to Flurry')
r = requests.post(url, data=json.dumps(payload), headers=headers)
#print(r.content)
# Test the return, get the status, download url and request id
test = r.json()
#print(teste['data']['attributes']['s3URI'])
#print(teste['data']['id'])
r_s3URI = test['data']['attributes']['s3URI']
r_id = test['data']['id']
# Check if the download link is ready
url = 'https://rawdata.flurry.com/pulse/v1/rawData/' + r_id + '?fields[rawData]=requestStatus,s3URI'
#print(url)
payload = {}
headers = {"accept": "application/vnd.api+json",
"authorization": "Bearer ZZZ",
"cache-control": "no-cache",
"content-type": "application/vnd.api+json"
}
print('Request OK')
# Check each minute if the download link is ready
print('Start Pooling to Check if the File is Ready for Download')
while r_s3URI == None:
time.sleep(60)
# Make the request
r = requests.get(url, data=json.dumps(payload), headers=headers)
print(r.content)
test = r.json()
#print(test['data']['attributes']['s3URI'])
r_s3URI = test['data']['attributes']['s3URI']
# When the download is ready, get the file and save
# Set local folder to save file
flurry_filename = str_ini + '_' + str_end + '_' + str_platform + '.csv.gz'
flurry_path_gz = ribon_root_path_join('data', 'Flurry_Download', flurry_filename)
# Download the file
print('Start Flurry Download')
myfile = requests.get(r_s3URI)
open(flurry_path_gz, 'wb').write(myfile.content)
On the link there is an image with the 2 files I got, they are not the same size and don't have the same number of records
在 Flurry Support 的帮助下,我找出了不同之处。 对于超过 15 天的 API 下载,API 调用每次都提供相同的数字。 API 调用最多 15 天的日期大多数时候会得到不同的结果(较新的调用有更多的记录)。呼叫时间越长,差异越小,所以我同意支持,这可以解释为延迟到达的事件。 Flurry 不在线,它通过在移动设备上排队数据并将其转储到服务器来工作。