使用 gmail 和应用程序脚本解析 google 条提醒
Parsing google alerts using gmail and app script
我设置了 google 警报并收到如下所示的警报电子邮件:
我想解析警报并将其保存在 google sheet 中,格式为:alert_name,publication_date, title,news source
不知道从哪里开始 help/guidance 感谢
谢谢
您可以将 gmail api 与 appscript 一起使用,GmailApp 的工作方式与 gmail api 的工作方式非常相似。
您应该做的第一件事是在 gmail 网络应用程序中创建一个搜索,该搜索将 return 只是您要查找的邮件
var threads = GmailApp.search('from:(googlealerts-noreply@google.com)');
for(var i=0; i<threads.length; i++)
{
messages = threads[i].getMessages();
for(var j=0; j<messages.length; j++)
{
var date = messages[j].getDate();
var body = messages[j].getPlainBody();
var start = body.indexOf('<table');
var end = body.indexOf('</table>');
}
}
}
}
}
正文以 html 格式返回,因此您必须对其进行清理才能找到所需的文本。找到所需的文本后,您可以使用 SpreadsheetApp
将其写入 sheet
function WriteToSheet(date, value){
var sheet = SpreadsheetApp.getActiveSheet();
sheet.appendRow([date, value]);
}
此代码来自我用来扫描我自己的一封电子邮件中的一些文本的脚本。
尽管我同意 @Ruben has given in the comments, I think this topic is interesting and could help other users to save and manage their Google Alerts 的观点。
Code.gs
/* Retrieve all news from googlealert source */
const threads = GmailApp.search('from:(googlealerts-noreply@google.com)')
/* Our sheet for save the news */
const sS = SpreadsheetApp.openById(SS_ID).getSheetByName('Google Alerts')
/* Control the already added answers */
let addedNews = []
try { addedNews = sS.getRange('A1:A' + sS.getLastRow()).getValues().flat() } catch (err) { }
function parseContent() {
const totalNews = []
/* Maybe add a control system for remove the threads already saved */
threads.forEach((th) => {
const msgs = th.getMessages()
msgs.forEach((msg) => {
/* Divide the content in new lines an parse the content */
const body = msg.getPlainBody().split('\n')
/* Extract the filter name eg === News - 2 new results for [python] === */
const filterName = body.slice(0, 1)[0].match(/\[(.*?)\]/)[1]
const date = msg.getDate()
/* Remove the unnecessary lines */
const cleanedBody = body.slice(1, -11)
/* Detect the news via empty new lines "\r" */
const newsIdxs = cleanedBody.reduce((pre, curr, idx) => {
curr === "\r" && pre.push(idx)
return pre
}, [])
newsIdxs.forEach((v, idx, arr) => {
if (idx == arr.length - 1) return
/* From one empty line to the nex */
const parsedNew = cleanedBody.slice(v + 1, arr[idx + 1])
/* Simply extracted from the first line */
const title = parsedNew[0].split('|')[0]
/* Last line and between <> */
const url = parsedNew[parsedNew.length - 1].match(/<(.*?)>/)[1]
/* Extracted from the URL rather than the title due variability */
const source = url.match(/url=https:\/\/(.*?)\//)[1]
totalNews.push({ title, url, date, source, filterName })
})
})
})
totalNews.forEach((nw) => {
/* Hash the object for preventing adding already present */
const id = hashCode(Object.values(nw).toString())
if (addedNews.includes(id)) return
sS.appendRow([id, ...Object.values(nw)])
})
}
/* Extracted from here */
const hashCode = s => s.split('').reduce((a, b) => { a = ((a << 5) - a) + b.charCodeAt(0); return a & a }, 0)
Results
注意 1:此脚本是问题的近似值,仅针对与新闻相关的警报进行测试。
注2:感谢的pseudo-code,帮助我更快地解决问题。
注释 3:hashCode
函数已从 here
中提取
注释 4:我决定采用 RegExp due to the use of getPlainBody()
, but I think that in this case, using a library that allows parsing HTML with getBody()
的方法更容易实现。
我设置了 google 警报并收到如下所示的警报电子邮件:
我想解析警报并将其保存在 google sheet 中,格式为:alert_name,publication_date, title,news source
不知道从哪里开始 help/guidance 感谢 谢谢
您可以将 gmail api 与 appscript 一起使用,GmailApp 的工作方式与 gmail api 的工作方式非常相似。
您应该做的第一件事是在 gmail 网络应用程序中创建一个搜索,该搜索将 return 只是您要查找的邮件
var threads = GmailApp.search('from:(googlealerts-noreply@google.com)');
for(var i=0; i<threads.length; i++)
{
messages = threads[i].getMessages();
for(var j=0; j<messages.length; j++)
{
var date = messages[j].getDate();
var body = messages[j].getPlainBody();
var start = body.indexOf('<table');
var end = body.indexOf('</table>');
}
}
}
}
}
正文以 html 格式返回,因此您必须对其进行清理才能找到所需的文本。找到所需的文本后,您可以使用 SpreadsheetApp
将其写入 sheetfunction WriteToSheet(date, value){
var sheet = SpreadsheetApp.getActiveSheet();
sheet.appendRow([date, value]);
}
此代码来自我用来扫描我自己的一封电子邮件中的一些文本的脚本。
尽管我同意 @Ruben has given in the comments, I think this topic is interesting and could help other users to save and manage their Google Alerts 的观点。
Code.gs
/* Retrieve all news from googlealert source */
const threads = GmailApp.search('from:(googlealerts-noreply@google.com)')
/* Our sheet for save the news */
const sS = SpreadsheetApp.openById(SS_ID).getSheetByName('Google Alerts')
/* Control the already added answers */
let addedNews = []
try { addedNews = sS.getRange('A1:A' + sS.getLastRow()).getValues().flat() } catch (err) { }
function parseContent() {
const totalNews = []
/* Maybe add a control system for remove the threads already saved */
threads.forEach((th) => {
const msgs = th.getMessages()
msgs.forEach((msg) => {
/* Divide the content in new lines an parse the content */
const body = msg.getPlainBody().split('\n')
/* Extract the filter name eg === News - 2 new results for [python] === */
const filterName = body.slice(0, 1)[0].match(/\[(.*?)\]/)[1]
const date = msg.getDate()
/* Remove the unnecessary lines */
const cleanedBody = body.slice(1, -11)
/* Detect the news via empty new lines "\r" */
const newsIdxs = cleanedBody.reduce((pre, curr, idx) => {
curr === "\r" && pre.push(idx)
return pre
}, [])
newsIdxs.forEach((v, idx, arr) => {
if (idx == arr.length - 1) return
/* From one empty line to the nex */
const parsedNew = cleanedBody.slice(v + 1, arr[idx + 1])
/* Simply extracted from the first line */
const title = parsedNew[0].split('|')[0]
/* Last line and between <> */
const url = parsedNew[parsedNew.length - 1].match(/<(.*?)>/)[1]
/* Extracted from the URL rather than the title due variability */
const source = url.match(/url=https:\/\/(.*?)\//)[1]
totalNews.push({ title, url, date, source, filterName })
})
})
})
totalNews.forEach((nw) => {
/* Hash the object for preventing adding already present */
const id = hashCode(Object.values(nw).toString())
if (addedNews.includes(id)) return
sS.appendRow([id, ...Object.values(nw)])
})
}
/* Extracted from here */
const hashCode = s => s.split('').reduce((a, b) => { a = ((a << 5) - a) + b.charCodeAt(0); return a & a }, 0)
Results
注意 1:此脚本是问题的近似值,仅针对与新闻相关的警报进行测试。
注2:感谢
注释 3:hashCode
函数已从 here
注释 4:我决定采用 RegExp due to the use of getPlainBody()
, but I think that in this case, using a library that allows parsing HTML with getBody()
的方法更容易实现。