如何在 google 应用程序脚本中将段落 html 字符串转换为没有 html 标签的纯文本?
How to convert a paragraph html string to plain text without html tags in google app script?
这是我上一个问题的后续问题。当我想使用 问题中的引用将 HTML 字符串转换为 google 应用程序脚本中没有 HTML 标签的纯文本时,我遇到了麻烦。但是,这次是段落格式。
这是我使用的脚本:
function pullDataFromWorkday() {
var url = 'https://services1.myworkday.com/ccx/service/customreport2/[company name]/[owner's email]/[Report Name]?format=csv'; //this is the csv link from workday report
var b64 = 'asdfghjklkjhgfdfghj=='; //this is supposed to be our workday password in b64
var response = UrlFetchApp.fetch(url, {
headers: {
Authorization: 'Basic '+ b64
}
});
//Parse
if (response.getResponseCode() >= 200 && response.getResponseCode() < 300) {
var blob = response.getBlob();
var string = blob.getDataAsString();
var data = Utilities.parseCsv(string, ",");
for(i=1;i<data.length;i++)
{
data[i][0];
data[i][1];
data[i][2]=toStringFromHtml(data[i][2]);
data[i][3]=toStringFromHtml(data[i][3]);
data[i][4]=toStringFromHtml(data[i][4]);
data[i][5]=toStringFromHtml(data[i][5]);
}
//Paste it in
var ss = SpreadsheetApp.getActive();
var sheet = ss.getSheetByName('Sheet1');
sheet.clear();
sheet.getRange(1,1,data.length,data[0].length).setValues(data);
}
else {
return;
}
}
function toStringFromHtml(html)
{
html = '<div>' + html + '</div>';
html = html.replace(/<br>/g,"");
var document = XmlService.parse(html);
var strText = XmlService.getPrettyFormat().format(document);
strText = strText.replace(/<[^>]*>/g,"");
return strText.trim();
}
这是我想要的数据样本:
或者您可以使用此 sample 电子表格。
有没有漏掉或做错的步骤?
谢谢之前的回答
根据您的情况,将toStringFromHtml
修改如下如何?
修改后的脚本:
function toStringFromHtml(html) {
html = '<div>' + html + '</div>';
html = html.replace(/<br>/g, "").replace(/<p><\/p><p><\/p>/g, "<p></p>").replace(/<span>|<\/span>/g, "");
var document = XmlService.parse(html);
var strText = XmlService.getPrettyFormat().setIndent("").format(document);
strText = strText.replace(/<[^>]*>/g, "");
return strText.trim();
}
在此修改后的脚本中,您的以下示例 HTML 转换如下。
来自
<p><span>Hi Katy</span></p>
<p></p>
<p><span>The illustration (examples) paragraph is useful when we want to explain or clarify something, such as an object, a person, a concept, or a situation. Sample Illustration Topics:</span></p>
<p></p>
<p></p>
<p><span>1. Examples of annoying habits people have on the Skytrain.</span></p>
<p><span>2. Positive habits that you admire in other people. </span></p>
<p><span>3. Endangered animals in Asia. </span></p>
到
<div>
<p>Hi Katy</p>
<p></p>
<p>The illustration (examples) paragraph is useful when we want to explain or clarify something,
such as an object,
a person,
a concept,
or a situation. Sample Illustration Topics:</p>
<p></p>
<p>1. Examples of annoying habits people have on the Skytrain.</p>
<p>2. Positive habits that you admire in other people. </p>
<p>3. Endangered animals in Asia. </p>
</div>
通过这种转换,得到如下结果。
Hi Katy
The illustration (examples) paragraph is useful when we want to explain or clarify something, such as an object, a person, a concept, or a situation. Sample Illustration Topics:
1. Examples of annoying habits people have on the Skytrain.
2. Positive habits that you admire in other people.
3. Endangered animals in Asia.
注:
- 当使用您问题中显示的示例 HTML 时,修改后的脚本可以实现您的目标。但是,我不确定您的其他 HTML 数据。所以我不确定这个修改后的脚本是否可以用于您的实际 HTML 数据。请注意这一点。
我认为你可以使用这个库:[cheerio for Google Apps Script][1]
function htmltotext() {
let html = `<p><span>Hi Katy</span></p><p></p><p><span>The illustration (examples) paragraph is useful when we want to explain or clarify something, such as an object, a person, a concept, or a situation. Sample Illustration Topics:</span></p><p></p><p></p><p><span>1. Examples of annoying habits people have on the Skytrain.</span></p><p><span>2. Positive habits that you admire in other people. </span></p><p><span>3. Endangered animals in Asia. </span></p>`
const $ = Cheerio.load(html)
let paragraph = []
let lines = $('p')
for(let i = 0; i < lines.length;i++) {
let line = lines.get((i))
let line_text = $(line).text();
if(line_text) {
paragraph.push(line_text)
}
}
Logger.log(paragraph.join('\n'))
return paragraph.join('\n')
}
这是我上一个问题的后续问题。当我想使用
这是我使用的脚本:
function pullDataFromWorkday() {
var url = 'https://services1.myworkday.com/ccx/service/customreport2/[company name]/[owner's email]/[Report Name]?format=csv'; //this is the csv link from workday report
var b64 = 'asdfghjklkjhgfdfghj=='; //this is supposed to be our workday password in b64
var response = UrlFetchApp.fetch(url, {
headers: {
Authorization: 'Basic '+ b64
}
});
//Parse
if (response.getResponseCode() >= 200 && response.getResponseCode() < 300) {
var blob = response.getBlob();
var string = blob.getDataAsString();
var data = Utilities.parseCsv(string, ",");
for(i=1;i<data.length;i++)
{
data[i][0];
data[i][1];
data[i][2]=toStringFromHtml(data[i][2]);
data[i][3]=toStringFromHtml(data[i][3]);
data[i][4]=toStringFromHtml(data[i][4]);
data[i][5]=toStringFromHtml(data[i][5]);
}
//Paste it in
var ss = SpreadsheetApp.getActive();
var sheet = ss.getSheetByName('Sheet1');
sheet.clear();
sheet.getRange(1,1,data.length,data[0].length).setValues(data);
}
else {
return;
}
}
function toStringFromHtml(html)
{
html = '<div>' + html + '</div>';
html = html.replace(/<br>/g,"");
var document = XmlService.parse(html);
var strText = XmlService.getPrettyFormat().format(document);
strText = strText.replace(/<[^>]*>/g,"");
return strText.trim();
}
这是我想要的数据样本:
或者您可以使用此 sample 电子表格。
有没有漏掉或做错的步骤?
谢谢之前的回答
根据您的情况,将toStringFromHtml
修改如下如何?
修改后的脚本:
function toStringFromHtml(html) {
html = '<div>' + html + '</div>';
html = html.replace(/<br>/g, "").replace(/<p><\/p><p><\/p>/g, "<p></p>").replace(/<span>|<\/span>/g, "");
var document = XmlService.parse(html);
var strText = XmlService.getPrettyFormat().setIndent("").format(document);
strText = strText.replace(/<[^>]*>/g, "");
return strText.trim();
}
在此修改后的脚本中,您的以下示例 HTML 转换如下。
来自
<p><span>Hi Katy</span></p> <p></p> <p><span>The illustration (examples) paragraph is useful when we want to explain or clarify something, such as an object, a person, a concept, or a situation. Sample Illustration Topics:</span></p> <p></p> <p></p> <p><span>1. Examples of annoying habits people have on the Skytrain.</span></p> <p><span>2. Positive habits that you admire in other people. </span></p> <p><span>3. Endangered animals in Asia. </span></p>
到
<div> <p>Hi Katy</p> <p></p> <p>The illustration (examples) paragraph is useful when we want to explain or clarify something, such as an object, a person, a concept, or a situation. Sample Illustration Topics:</p> <p></p> <p>1. Examples of annoying habits people have on the Skytrain.</p> <p>2. Positive habits that you admire in other people. </p> <p>3. Endangered animals in Asia. </p> </div>
通过这种转换,得到如下结果。
Hi Katy The illustration (examples) paragraph is useful when we want to explain or clarify something, such as an object, a person, a concept, or a situation. Sample Illustration Topics: 1. Examples of annoying habits people have on the Skytrain. 2. Positive habits that you admire in other people. 3. Endangered animals in Asia.
注:
- 当使用您问题中显示的示例 HTML 时,修改后的脚本可以实现您的目标。但是,我不确定您的其他 HTML 数据。所以我不确定这个修改后的脚本是否可以用于您的实际 HTML 数据。请注意这一点。
我认为你可以使用这个库:[cheerio for Google Apps Script][1]
function htmltotext() {
let html = `<p><span>Hi Katy</span></p><p></p><p><span>The illustration (examples) paragraph is useful when we want to explain or clarify something, such as an object, a person, a concept, or a situation. Sample Illustration Topics:</span></p><p></p><p></p><p><span>1. Examples of annoying habits people have on the Skytrain.</span></p><p><span>2. Positive habits that you admire in other people. </span></p><p><span>3. Endangered animals in Asia. </span></p>`
const $ = Cheerio.load(html)
let paragraph = []
let lines = $('p')
for(let i = 0; i < lines.length;i++) {
let line = lines.get((i))
let line_text = $(line).text();
if(line_text) {
paragraph.push(line_text)
}
}
Logger.log(paragraph.join('\n'))
return paragraph.join('\n')
}