Propublica API json 数据放入 Google 工作表

Propublica API json data into Google Sheets

这是我的第一个 post。我是一个 Google Sheet 的爱好者,使用 add-ons 来处理任何复杂的事情。 Sans add-on,我潜伏(偷偷摸摸)寻找有时有用的片段。厌倦了非法的盗窃,我无法从中学习,也无法回归基础。我已经研究下面的代码好几天了,在观看视频和阅读教程后,我被卡住了。

访问:Propublica Campaign Finance API in Google Sheets 下载我打算使用的几个表格之一。我的代码中有 2 个函数:getData 和 populateSheet。日志每次都显示数据。 Sheet 仅显示 header 行。

这是我的 public 电子表格:

https://docs.google.com/spreadsheets/d/11imT4T5wrvacZ0dRn-mjYA53EB5zsEvKFw4hcTcfJeg/edit?usp=sharing

Propublica Campaign Finance API 说明:

"Campaign Finance API 使用 RESTful 样式。API 仅接受 GET 请求。所有请求都以:https://api.propublica.org/campaign-finance/v1/ 开头。 API 键必须包含在对服务器的所有 API 请求中,设置为 header: X-API-Key: PROPUBLICA_API_KEY

这是我使用的代码:


var URL = 'https://api.propublica.org/campaign-finance/v1/2018/committees/leadership.json'
var params = {
  headers: {'X-API-Key': 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'}
  }

function getData() {
    var response = UrlFetchApp.fetch(URL,params); //Fetch data from a given API URL
    var json = response.getContentText(); //Get data content responded from API URL. This is returned as text
    var data = JSON.parse(json); //Parse the text data as JSON
    Logger.log(data) //This will log the JSON response from API
    return data //Return JSON object recevied from API

}

function populateSheet() {
    var data = getData(); //Get JSON data from the above function
    var sheet = SpreadsheetApp.getActiveSheet();
    var lastRowCount = sheet.getLastRow();
    var header = ['id','relative_uri','name','address','city','state','zip','treasurer','party','fec_uri','candidate','leadership','super_pac','sponsor_name','designation','filing_frequency','committee_type','interest_group'];

    sheet.clear();
    sheet.getRange(1, 1, 1, header.length).setValues([header]);

    for (var i = 0; i < data.length; i++) {
        var row = [];
        var nextRow = sheet.getLastRow() + 1;
        row.push(data[i].id, data[i].relative_uri, data[i].name, data[i].address, data[i].city, data[i].state, data[i].zip, data[i].treasurer, data[i].party, data[i].fec_uri, data[i].candidate, data[i].leadership, data[i].super_pac, data[i].sponsor_name, data[i].designation, data[i].filing_frequency, data[i].committee_type, data[i].interest_group)
        sheet.getRange(nextRow, 1, 1, row.length).setValues([row]);

    }
}

提前感谢您提供的任何指导(甚至 "go read this and learn")..

这个修改怎么样?

修改点:

  • 您要的数据在data.results
    • 为了了解这一点,我确认了您共享的电子表格。 https://docs.google.com/spreadsheets/d/11imT4T5wrvacZ0dRn-mjYA53EB5zsEvKFw4hcTcfJeg/edit?usp=sharing.
  • 您可以立即使用 setValues() 将数据导入电子表格。

修改后的脚本如下

修改脚本:

function populateSheet() {
  var data = getData(); //Get JSON data from the above function
  var sheet = SpreadsheetApp.getActiveSheet();
  var lastRowCount = sheet.getLastRow();
  var header = ['id','relative_uri','name','address','city','state','zip','treasurer','party','fec_uri','candidate','leadership','super_pac','sponsor_name','designation','filing_frequency','committee_type','interest_group'];
  sheet.clear();
  sheet.getRange(1, 1, 1, header.length).setValues([header]);

  // I added below.
  var res = [];
  data.results.forEach(function(e) {
    var temp = [];
    header.forEach(function(h) {
      temp.push(e[h]);
    });
    res.push(temp);
  });

  var nextRow = sheet.getLastRow() + 1;
  sheet.getRange(nextRow, 1, res.length, res[0].length).setValues(res); // Modified
}

注:

  • 我认为 getData() 有效。

如果我误解了你的问题,我很抱歉。

编辑:

要检索超过20条的数据,请使用查询参数offset

The API provides JSON and XML responses for every type of request, and supports JSONP callbacks. Responses that are not date-based or containing aggregate totals return the first 20 results; pagination is available via an offset query string parameter using multiples of 20.

参考:https://propublica.github.io/campaign-finance-api-docs/#responses

例如,当您要检索接下来的20-40条数据时,请修改端点如下。请再次运行。

发件人:

var URL = 'https://api.propublica.org/campaign-finance/v1/2018/committees/leadership.json'

收件人:

var URL = 'https://api.propublica.org/campaign-finance/v1/2018/committees/leadership.json?offset=20'

为了提取所有记录而不是一次只提取 20 条记录,您需要在记录集末尾使用递归调用再次调用整个过程,并获取下一批。

此代码通过更改 URL 的 "offset" 值来改变您的 API 调用,每次函数重新 运行 时递增 20。

你需要一个"out clause",这样当它到达最后一条记录并且数据流结束时,它就结束了这个过程。可以在此处的这一行中找到:

 if (!res || !res[0]) return

这已经过测试,似乎可以提取 566 条记录,而不仅仅是 20 条。

var params = {
  headers: {'X-API-Key': 'your key'}
  }

var offset = 0;

function getData(offset) {
    var URL = 'https://api.propublica.org/campaign-finance/v1/2018/committees/leadership.json?offset='+offset;
    var response = UrlFetchApp.fetch(URL,params); //Fetch data from a given API URL
    var json = response.getContentText(); //Get data content responded from API URL. This is returned as text
    var data = JSON.parse(json); //Parse the text data as JSON
    Logger.log(data) //This will log the JSON response from API
    populateSheet(data,offset);
//    return data //Return JSON object recevied from API
}

function populateSheet(data,offset) {

 // var data = getData(); //Get JSON data from the above function
  var sheet = SpreadsheetApp.getActiveSheet();
  var lastRowCount = sheet.getLastRow();
  var header = ['id','relative_uri','name','address','city','state','zip','treasurer','party','fec_uri','candidate','leadership','super_pac','sponsor_name','designation','filing_frequency','committee_type','interest_group'];
  sheet.clear();
  sheet.getRange(1, 1, 1, header.length).setValues([header]);


  // I added below.
  var res = [];
  var lenny = data.results.length;
  var counter = 0;
  data.results.forEach(function(e) {
    var temp = [];
    header.forEach(function(h) {
      temp.push(e[h]);
    });
    res.push(temp);
    counter++;
    Logger.log('counter is '+counter);
    if (counter == lenny) {
      Logger.log('GOT LENNY counter is '+counter);
      offset = offset + 20;
      getData(offset);
    }
  });

  var nextRow = sheet.getLastRow() + 1;
  if (!res || !res[0]) return
  sheet.getRange(nextRow, 1, res.length, res[0].length).setValues(res); // Modified

}