使用 Zenpy 从 Zendesk 获取增量数据
getting incremental data from Zendesk using Zenpy
我有下面的代码,我从 zendesk 获取数据,问题是每当我 运行 这个脚本我花了最后 30 天 data.Can 有人让我知道我有什么变化做使其增量。
理想情况下,此脚本应该 运行 一天两次或三次(当前脚本在每次执行时都会获取过去 30 天的数据,这是不必要的)。
from zenpy import Zenpy
import time,datetime
import json
import psycopg2
# Connecting DB..
DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5432'"
conn = psycopg2.connect(DSN)
conn.set_client_encoding('utf-8')
cur = conn.cursor()
ins_cur = conn.cursor()
script = 'DROP TABLE IF EXISTS ticket_events; CREATE TABLE ticket_events ( ID serial NOT NULL ' \
'PRIMARY KEY, info json NOT NULL); '
cur.execute(script)
conn.commit()
print('Table dropped and recreated')
# Zenpy accepts an API token
creds = {
'email': 'xxxxx@xxx.com',
'token': '*************',
'subdomain': 'xxxxxx'
}
rday = datetime.datetime.now() - datetime.timedelta(days=30)
# Default connect
zenpy_client = Zenpy(**creds)
print('Zendesk connected via zenpy')
requests = zenpy_client.tickets.events(start_time=rday,include=None)
# loop the tickets and insert to dwh
for request in requests:
req_json = json.dumps(request.to_dict(), sort_keys=False)
print(req_json)
insert_query = '''INSERT INTO ticket_events(info) VALUES ( $$ ''' + req_json + ''' $$ )'''
cur.execute(insert_query)
conn.commit()
conn.close()
下面是我定义为取值的 table 结构,但我觉得不是这样 good.I 希望这个 table 随数据递增更新并删除任何它有冗余数据(如果有的话)。请对此提出任何建议。
drop table if exists zendesk_ticket_events;
create table zendesk_ticket_events as
SELECT
CAST (info ->> 'id' as BIGINT) as parent_id,
CAST (info ->> 'ticket_id' as BIGINT) as ticket_id,
CAST (info ->> 'updater_id' as BIGINT) as updater_id,
CAST (info ->> 'via' as VARCHAR (50)) as via,
CAST (info ->> 'event_type' as VARCHAR (50)) as parent_event_type,
CAST (info ->> 'created_at' as timestamp without time zone) as created_at,
CAST(enrolment_info->>'via_reference_id'as TEXT) AS via_reference_id,
CAST(enrolment_info->>'id'as TEXT) AS child_id,
CAST(enrolment_info->>'assignee_id' as BIGINT) AS assignee_id,
CAST(enrolment_info->>'subject' as VARCHAR (50)) AS subject,
CAST(enrolment_info->>'requester_id'as TEXT) AS requester_id,
CAST(enrolment_info->>'status' as VARCHAR (50)) AS status,
CAST(enrolment_info->>'priority' as VARCHAR (50)) AS priority,
CAST(enrolment_info->>'comment_public' as VARCHAR (50)) AS comment_public,
CAST(enrolment_info->>'comment_present' as VARCHAR (50)) AS comment_present,
CAST(enrolment_info->>'event_type' as VARCHAR (50)) AS child_event_type,
CAST(enrolment_info->>'previous_value'as TEXT) AS previous_value,
CAST(enrolment_info->>'group_id'as TEXT) AS group_id
FROM ticket_events t, json_array_elements(t.info -> 'child_events') AS enrolment_info;
下面是示例 data.Can 谁能交叉验证下面的数据,让我知道上面的 table 结构是否正确?
{
"child_events": [
{
"id": 54334560,
"via": "Mail",
"via_reference_id": null,
"comment_present": true,
"comment_public": true,
"event_type": "Comment"
},
{
"id": 54334580,
"via": "Mail",
"via_reference_id": null,
"subject": "Order 10056 on 20.03.20",
"event_type": "Create"
},
{
"id": 54334600,
"via": "Mail",
"via_reference_id": null,
"requester_id": 369854,
"event_type": "Create"
},
{
"id": 54334620,
"via": "Mail",
"via_reference_id": null,
"locale_id": "8",
"event_type": "Create"
},
{
"id": 543342310640,
"via": "Mail",
"via_reference_id": null,
"status": "new",
"event_type": "Create"
},
{
"id": 54334660,
"via": "Mail",
"via_reference_id": null,
"priority": null,
"event_type": "Create"
},
{
"id": 54334700,
"via": "Mail",
"via_reference_id": null,
"type": null,
"event_type": "Create"
},
{
"id": 54334740,
"via": "Mail",
"via_reference_id": null,
"tags": [
"bestellung"
],
"added_tags": [
"Orders"
],
"removed_tags": [
],
"event_type": "Create"
},
{
"id": 54334860,
"via": "Rule",
"via_reference_id": 44967,
"group_id": 2117,
"rel": "trigger",
"revision_id": 1,
"event_type": "Change",
"previous_value": null
}
],
"id": 54334540,
"ticket_id": 159978,
"updater_id": 369854,
"via": "Mail",
"created_at": "2020-03-29T18:41:22Z",
"event_type": "Audit",
"timestamp": 1585507282,
"system": {
"client": "Microsoft Outlook 14.0",
"ip_address": null,
"latitude": 48.3074,
"location": "Linz, 4, Austria",
"longitude": 14.285
}
}
从您创建的地方 table - 直到您创建 rday 变量 - 我已将代码更改为:
create_table_sql = 'CREATE TABLE IF NOT EXISTS ' \
'ticket_events ( ID serial NOT NULL ' \
'PRIMARY KEY, info json NOT NULL); ' # create table only if not present already
cur.execute(create_table_sql)
conn.commit()
# Zenpy accepts an API token
creds = {
'email': 'xxxxx@xxx.com',
'token': '*************',
'subdomain': 'xxxxxx'
}
select_max_created = 'SELECT MAX(created_at) FROM ticket_events;' #get max-created-date of your DB
cur.execute(create_table_sql)
row = cur.fetchone() # single row
created_at = datetime.datetime(row[0])
rday = created_at + datetime.timedelta(hours=1) # buffer of 1 hour
我有下面的代码,我从 zendesk 获取数据,问题是每当我 运行 这个脚本我花了最后 30 天 data.Can 有人让我知道我有什么变化做使其增量。 理想情况下,此脚本应该 运行 一天两次或三次(当前脚本在每次执行时都会获取过去 30 天的数据,这是不必要的)。
from zenpy import Zenpy
import time,datetime
import json
import psycopg2
# Connecting DB..
DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5432'"
conn = psycopg2.connect(DSN)
conn.set_client_encoding('utf-8')
cur = conn.cursor()
ins_cur = conn.cursor()
script = 'DROP TABLE IF EXISTS ticket_events; CREATE TABLE ticket_events ( ID serial NOT NULL ' \
'PRIMARY KEY, info json NOT NULL); '
cur.execute(script)
conn.commit()
print('Table dropped and recreated')
# Zenpy accepts an API token
creds = {
'email': 'xxxxx@xxx.com',
'token': '*************',
'subdomain': 'xxxxxx'
}
rday = datetime.datetime.now() - datetime.timedelta(days=30)
# Default connect
zenpy_client = Zenpy(**creds)
print('Zendesk connected via zenpy')
requests = zenpy_client.tickets.events(start_time=rday,include=None)
# loop the tickets and insert to dwh
for request in requests:
req_json = json.dumps(request.to_dict(), sort_keys=False)
print(req_json)
insert_query = '''INSERT INTO ticket_events(info) VALUES ( $$ ''' + req_json + ''' $$ )'''
cur.execute(insert_query)
conn.commit()
conn.close()
下面是我定义为取值的 table 结构,但我觉得不是这样 good.I 希望这个 table 随数据递增更新并删除任何它有冗余数据(如果有的话)。请对此提出任何建议。
drop table if exists zendesk_ticket_events;
create table zendesk_ticket_events as
SELECT
CAST (info ->> 'id' as BIGINT) as parent_id,
CAST (info ->> 'ticket_id' as BIGINT) as ticket_id,
CAST (info ->> 'updater_id' as BIGINT) as updater_id,
CAST (info ->> 'via' as VARCHAR (50)) as via,
CAST (info ->> 'event_type' as VARCHAR (50)) as parent_event_type,
CAST (info ->> 'created_at' as timestamp without time zone) as created_at,
CAST(enrolment_info->>'via_reference_id'as TEXT) AS via_reference_id,
CAST(enrolment_info->>'id'as TEXT) AS child_id,
CAST(enrolment_info->>'assignee_id' as BIGINT) AS assignee_id,
CAST(enrolment_info->>'subject' as VARCHAR (50)) AS subject,
CAST(enrolment_info->>'requester_id'as TEXT) AS requester_id,
CAST(enrolment_info->>'status' as VARCHAR (50)) AS status,
CAST(enrolment_info->>'priority' as VARCHAR (50)) AS priority,
CAST(enrolment_info->>'comment_public' as VARCHAR (50)) AS comment_public,
CAST(enrolment_info->>'comment_present' as VARCHAR (50)) AS comment_present,
CAST(enrolment_info->>'event_type' as VARCHAR (50)) AS child_event_type,
CAST(enrolment_info->>'previous_value'as TEXT) AS previous_value,
CAST(enrolment_info->>'group_id'as TEXT) AS group_id
FROM ticket_events t, json_array_elements(t.info -> 'child_events') AS enrolment_info;
下面是示例 data.Can 谁能交叉验证下面的数据,让我知道上面的 table 结构是否正确?
{
"child_events": [
{
"id": 54334560,
"via": "Mail",
"via_reference_id": null,
"comment_present": true,
"comment_public": true,
"event_type": "Comment"
},
{
"id": 54334580,
"via": "Mail",
"via_reference_id": null,
"subject": "Order 10056 on 20.03.20",
"event_type": "Create"
},
{
"id": 54334600,
"via": "Mail",
"via_reference_id": null,
"requester_id": 369854,
"event_type": "Create"
},
{
"id": 54334620,
"via": "Mail",
"via_reference_id": null,
"locale_id": "8",
"event_type": "Create"
},
{
"id": 543342310640,
"via": "Mail",
"via_reference_id": null,
"status": "new",
"event_type": "Create"
},
{
"id": 54334660,
"via": "Mail",
"via_reference_id": null,
"priority": null,
"event_type": "Create"
},
{
"id": 54334700,
"via": "Mail",
"via_reference_id": null,
"type": null,
"event_type": "Create"
},
{
"id": 54334740,
"via": "Mail",
"via_reference_id": null,
"tags": [
"bestellung"
],
"added_tags": [
"Orders"
],
"removed_tags": [
],
"event_type": "Create"
},
{
"id": 54334860,
"via": "Rule",
"via_reference_id": 44967,
"group_id": 2117,
"rel": "trigger",
"revision_id": 1,
"event_type": "Change",
"previous_value": null
}
],
"id": 54334540,
"ticket_id": 159978,
"updater_id": 369854,
"via": "Mail",
"created_at": "2020-03-29T18:41:22Z",
"event_type": "Audit",
"timestamp": 1585507282,
"system": {
"client": "Microsoft Outlook 14.0",
"ip_address": null,
"latitude": 48.3074,
"location": "Linz, 4, Austria",
"longitude": 14.285
}
}
从您创建的地方 table - 直到您创建 rday 变量 - 我已将代码更改为:
create_table_sql = 'CREATE TABLE IF NOT EXISTS ' \
'ticket_events ( ID serial NOT NULL ' \
'PRIMARY KEY, info json NOT NULL); ' # create table only if not present already
cur.execute(create_table_sql)
conn.commit()
# Zenpy accepts an API token
creds = {
'email': 'xxxxx@xxx.com',
'token': '*************',
'subdomain': 'xxxxxx'
}
select_max_created = 'SELECT MAX(created_at) FROM ticket_events;' #get max-created-date of your DB
cur.execute(create_table_sql)
row = cur.fetchone() # single row
created_at = datetime.datetime(row[0])
rday = created_at + datetime.timedelta(hours=1) # buffer of 1 hour