如何使用纯 JS 从此网页上的 span itemprop 行生成值?

how do I generate the values using plain JS from the span itemprop lines on this webpage?

我正在尝试从 Winmo 配置文件(例如 https://open.winmo.com/open/decision_makers/ca/pasadena/jorge/garcia/489325)中解析某些没有 ID 或 class 值的跨度标签,即

<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>

我发现两个旧的 Whosebug 示例很有帮助 (this and this),但我仍然使用以下代码为网页上的 9 个 span itemprop 匹配行中的每一行获取空值:

var nodes=[], values=[];
var els = document.getElementsByTagName('span'), i = 0, whatev;
for(i; i < els.length; i++) {
    prop = els[i].getAttribute('itemprop');
    if(prop) {
        whatev = els[i];
        nodes.push(whatev.nodeName); // provides attribute names, in all CAPS = "SPAN"
        values.push(whatev.nodeValue); // for attribute values, why saying null if els[i] is fine?
        console.log(values); // (whatev) outputs whole thing, but it seems values is what I need
       // break; // need this? seems to prevent values after first span from generating
    }
}

我如何 return 只是部分隐藏的电子邮件值 (j****@***********.com) 和来自这些类型的邮政编码 (91195)页数?我需要纯 JS 的解决方案,因为我会将它压缩成书签供其他人使用。

您可以通过选择器获取电子邮件范围

span[itemprop="email"]

和postalCode用同样的方法

span[itemprop="postalCode"]

使用这些选择器,使用 querySelector 获取元素,然后提取其 textContent:

const [email, postalCode] = ['email', 'postalCode'].map(
  val => document.querySelector(`span[itemprop="${val}"]`).textContent
);
console.log(email);
console.log(postalCode);
<span itemprop="email">j****@***********.com</span>
<div itemscope="" itemprop="address" itemtype="http://schema.org/PostalAddress">
<span itemprop="streetAddress">177 East West Colorado Boulevard</span>
<span itemprop="addressLocality">Pasadena</span>,
<span itemprop="addressRegion">CA</span>
<span itemprop="postalCode">91195</span>
<span itemprop="addressCountry">USA</span>

您可以从 itemprop 属性中获取分配。

像这样:

function getItemPropsAsJSON(){
  var ob = {};
  Array.from(document.getElementsByTagName('span')).forEach(el=> {     
    var key = el.getAttribute('itemprop');
    var val = el.innerText;
    if (key && val) ob[key] = val;
  });
  return ob;
}
/* expected output: 
    {
      "name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
      "email": "j****@***********.com",
      "telephone": "(347) ***-****",
      "streetAddress": "177 East West Colorado Boulevard",
      "addressLocality": "Pasadena",
      "addressRegion": "CA",
      "postalCode": "91195",
      "addressCountry": "USA"
    }
*/

如果您在其他地方使用它,您可能需要规范化键,因为 itemprop 属性可能并不总是转换为理想的对象表示法格式。为此,请使用以下内容:

function normalizeObjectNotation(key){
  return key && typeof key == 'string' && /[A-Z]/.test(key) && /\W+/.test(key) == false
  ? key.trim().split(/(?=[A-Z])/).reduce((a,b)=> a+'_'+b).replace(/^\d+/, '').toLowerCase() 
  : key && typeof key == 'string' ? key.trim().replace(/\W+/g, '_').replace(/^\d+/, '').toLowerCase() 
  : 'failed_object';
}

function getItemPropsAsJSON(){
  var ob = {};
  Array.from(document.getElementsByTagName('span')).forEach(el=> {     
    var key = el.getAttribute('itemprop');
    var val = el.innerText;
    if (key && val) ob[normalizeObjectNotation(key)] = val;
  });
  return ob;
}
getItemPropsAsJSON()

/* Expected Output:

{
  "name": "Jorge Garcia - Co-Founder & Chief Technology Officer, ICONIC | Contact Information, Email Address, Phone Number, Budgets and Responsibilities",
  "email": "j****@***********.com",
  "telephone": "(347) ***-****",
  "street_address": "177 East West Colorado Boulevard",
  "address_locality": "Pasadena",
  "address_region": "CA",
  "postal_code": "91195",
  "address_country": "USA"
}

*/