如何使用纯 JS 从此网页上的 span itemprop 行生成值?
how do I generate the values using plain JS from the span itemprop lines on this webpage?
我正在尝试从 Winmo 配置文件(例如 https://open.winmo.com/open/decision_makers/ca/pasadena/jorge/garcia/489325)中解析某些没有 ID 或 class 值的跨度标签,即
<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>
我发现两个旧的 Whosebug 示例很有帮助 (this and this),但我仍然使用以下代码为网页上的 9 个 span itemprop 匹配行中的每一行获取空值:
var nodes=[], values=[];
var els = document.getElementsByTagName('span'), i = 0, whatev;
for(i; i < els.length; i++) {
prop = els[i].getAttribute('itemprop');
if(prop) {
whatev = els[i];
nodes.push(whatev.nodeName); // provides attribute names, in all CAPS = "SPAN"
values.push(whatev.nodeValue); // for attribute values, why saying null if els[i] is fine?
console.log(values); // (whatev) outputs whole thing, but it seems values is what I need
// break; // need this? seems to prevent values after first span from generating
}
}
我如何 return 只是部分隐藏的电子邮件值 (j****@***********.com) 和来自这些类型的邮政编码 (91195)页数?我需要纯 JS 的解决方案,因为我会将它压缩成书签供其他人使用。
您可以通过选择器获取电子邮件范围
span[itemprop="email"]
和postalCode用同样的方法
span[itemprop="postalCode"]
使用这些选择器,使用 querySelector
获取元素,然后提取其 textContent
:
const [email, postalCode] = ['email', 'postalCode'].map(
val => document.querySelector(`span[itemprop="${val}"]`).textContent
);
console.log(email);
console.log(postalCode);
<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>
您可以从 itemprop 属性中获取分配。
像这样:
function getItemPropsAsJSON(){
var ob = {};
Array.from(document.getElementsByTagName('span')).forEach(el=> {
var key = el.getAttribute('itemprop');
var val = el.innerText;
if (key && val) ob[key] = val;
});
return ob;
}
/* expected output:
{
"name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
"email": "j****@***********.com",
"telephone": "(347) ***-****",
"streetAddress": "177 East West Colorado Boulevard",
"addressLocality": "Pasadena",
"addressRegion": "CA",
"postalCode": "91195",
"addressCountry": "USA"
}
*/
如果您在其他地方使用它,您可能需要规范化键,因为 itemprop 属性可能并不总是转换为理想的对象表示法格式。为此,请使用以下内容:
function normalizeObjectNotation(key){
return key && typeof key == 'string' && /[A-Z]/.test(key) && /\W+/.test(key) == false
? key.trim().split(/(?=[A-Z])/).reduce((a,b)=> a+'_'+b).replace(/^\d+/, '').toLowerCase()
: key && typeof key == 'string' ? key.trim().replace(/\W+/g, '_').replace(/^\d+/, '').toLowerCase()
: 'failed_object';
}
function getItemPropsAsJSON(){
var ob = {};
Array.from(document.getElementsByTagName('span')).forEach(el=> {
var key = el.getAttribute('itemprop');
var val = el.innerText;
if (key && val) ob[normalizeObjectNotation(key)] = val;
});
return ob;
}
getItemPropsAsJSON()
/* Expected Output:
{
"name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
"email": "j****@***********.com",
"telephone": "(347) ***-****",
"street_address": "177 East West Colorado Boulevard",
"address_locality": "Pasadena",
"address_region": "CA",
"postal_code": "91195",
"address_country": "USA"
}
*/
我正在尝试从 Winmo 配置文件(例如 https://open.winmo.com/open/decision_makers/ca/pasadena/jorge/garcia/489325)中解析某些没有 ID 或 class 值的跨度标签,即
<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>
我发现两个旧的 Whosebug 示例很有帮助 (this and this),但我仍然使用以下代码为网页上的 9 个 span itemprop 匹配行中的每一行获取空值:
var nodes=[], values=[];
var els = document.getElementsByTagName('span'), i = 0, whatev;
for(i; i < els.length; i++) {
prop = els[i].getAttribute('itemprop');
if(prop) {
whatev = els[i];
nodes.push(whatev.nodeName); // provides attribute names, in all CAPS = "SPAN"
values.push(whatev.nodeValue); // for attribute values, why saying null if els[i] is fine?
console.log(values); // (whatev) outputs whole thing, but it seems values is what I need
// break; // need this? seems to prevent values after first span from generating
}
}
我如何 return 只是部分隐藏的电子邮件值 (j****@***********.com) 和来自这些类型的邮政编码 (91195)页数?我需要纯 JS 的解决方案,因为我会将它压缩成书签供其他人使用。
您可以通过选择器获取电子邮件范围
span[itemprop="email"]
和postalCode用同样的方法
span[itemprop="postalCode"]
使用这些选择器,使用 querySelector
获取元素,然后提取其 textContent
:
const [email, postalCode] = ['email', 'postalCode'].map(
val => document.querySelector(`span[itemprop="${val}"]`).textContent
);
console.log(email);
console.log(postalCode);
<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>
您可以从 itemprop 属性中获取分配。
像这样:
function getItemPropsAsJSON(){
var ob = {};
Array.from(document.getElementsByTagName('span')).forEach(el=> {
var key = el.getAttribute('itemprop');
var val = el.innerText;
if (key && val) ob[key] = val;
});
return ob;
}
/* expected output:
{
"name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
"email": "j****@***********.com",
"telephone": "(347) ***-****",
"streetAddress": "177 East West Colorado Boulevard",
"addressLocality": "Pasadena",
"addressRegion": "CA",
"postalCode": "91195",
"addressCountry": "USA"
}
*/
如果您在其他地方使用它,您可能需要规范化键,因为 itemprop 属性可能并不总是转换为理想的对象表示法格式。为此,请使用以下内容:
function normalizeObjectNotation(key){
return key && typeof key == 'string' && /[A-Z]/.test(key) && /\W+/.test(key) == false
? key.trim().split(/(?=[A-Z])/).reduce((a,b)=> a+'_'+b).replace(/^\d+/, '').toLowerCase()
: key && typeof key == 'string' ? key.trim().replace(/\W+/g, '_').replace(/^\d+/, '').toLowerCase()
: 'failed_object';
}
function getItemPropsAsJSON(){
var ob = {};
Array.from(document.getElementsByTagName('span')).forEach(el=> {
var key = el.getAttribute('itemprop');
var val = el.innerText;
if (key && val) ob[normalizeObjectNotation(key)] = val;
});
return ob;
}
getItemPropsAsJSON()
/* Expected Output:
{
"name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
"email": "j****@***********.com",
"telephone": "(347) ***-****",
"street_address": "177 East West Colorado Boulevard",
"address_locality": "Pasadena",
"address_region": "CA",
"postal_code": "91195",
"address_country": "USA"
}
*/