How to extract / parse HTML using Microdata

I am new to Microdata.

I have an HTML string with Microdata. I am trying to figure out if it is possible to dynamically retrieve the required information using Microdata with JS or jQuery. Has anyone done this before?

HTML string example: I'm trying to get the "content" matching itemprop 'ratingValue' for the prop-name element 'Blendmagic'

<html>
    <div itemscope itemtype="http://schema.org/Offer">
        <span itemprop="name">Blendmagic</span>
        <span itemprop="price">$19.95</span>
        <div itemprop="reviews" itemscope itemtype="http://schema.org/AggregateRating">
            <img src="four-stars.jpg" />
            <meta itemprop="ratingValue" content="4" />
            <meta itemprop="bestRating" content="5" />
            Based on <span itemprop="ratingCount">25</span> user ratings
        </div>
    </div>
    <div itemscope itemtype="http://schema.org/Offer">
        <span itemprop="name">testMagic</span>
        <span itemprop="price">$10.95</span>
        <div itemprop="reviews" itemscope itemtype="http://schema.org/AggregateRating">
            <img src="four-stars.jpg" />
            <meta itemprop="ratingValue" content="4" />
            <meta itemprop="bestRating" content="5" />
            Based on <span itemprop="ratingCount">25</span> user ratings
        </div>
    </div>
</html>
+4
source share
2 answers

Try starting with the root itemscopenode, filter the descendants that have attributes itemprop; return object resultcontaining an array itemscontaining Microdata items.

This solution is based on an algorithm found in Microdata.

7 Convert HTML to other formats

7.1 JSON

, JSON:

.

.

node , item, .

"items", - .

JSON ( , Unicode- , escape-), "e" , , . [JSON]

, , , , .

, , :

.

, .

.

- , , "", , , itemtype.

, , "id", .

.

, , , , , :

- .

, : , "ERROR". , , , .

:

name, name , .

name .

"", .

.

var result = {};
var items = [];
document.querySelectorAll("[itemscope]")
  .forEach(function(el, i) {
    var item = {
      "type": [el.getAttribute("itemtype")],
      "properties": {}
    };
    var props = el.querySelectorAll("[itemprop]");
    props.forEach(function(prop) {
      item.properties[prop.getAttribute("itemprop")] = [
        prop.content || prop.textContent || prop.src
      ];
      if (prop.matches("[itemscope]") && prop.matches("[itemprop]")) {
        var _item = {
          "type": [prop.getAttribute("itemtype")],
          "properties": {}
        };
        prop.querySelectorAll("[itemprop]")
          .forEach(function(_prop) {
            _item.properties[_prop.getAttribute("itemprop")] = [
              _prop.content || _prop.textContent || _prop.src
            ];
          });
        item.properties[prop.getAttribute("itemprop")] = [_item];
      }
    });
    items.push(item)
  })

result.items = items;

console.log(result);

document.body
  .insertAdjacentHTML("beforeend", "<pre>" + JSON.stringify(result, null, 2) + "<pre>");

var props = ["Blendmagic", "ratingValue"];

// get the 'content' corresponding to itemprop 'ratingValue' 
// for item prop-name 'Blendmagic'
var data = result.items.map(function(value, key) {
  if (value.properties.name && value.properties.name[0] === props[0]) {
    var prop = value.properties.reviews[0].properties;
    var res = {},
      _props = {};
    _props[props[1]] = prop[props[1]];
    res[props[0]] = _props
    return res
  };
})[0];

console.log(data);
document.querySelector("pre").insertAdjacentHTML("beforebegin", "<pre>" + JSON.stringify(result, null, 2) + "<pre>");
<!DOCTYPE html>
<html>

<head>
</head>

<body>
  <div itemscope itemtype="http://schema.org/Offer">
    <span itemprop="name">Blendmagic</span>
    <span itemprop="price">$19.95</span>
    <div itemprop="reviews" itemscope itemtype="http://schema.org/AggregateRating">
      <img data-src="four-stars.jpg" />
      <meta itemprop="ratingValue" content="4" />
      <meta itemprop="bestRating" content="5" />Based on <span itemprop="ratingCount">25</span> user ratings
    </div>
  </div>
  <div itemscope itemtype="http://schema.org/Offer">
    <span itemprop="name">testMagic</span>
    <span itemprop="price">$10.95</span>
    <div itemprop="reviews" itemscope itemtype="http://schema.org/AggregateRating">
      <img data-src="four-stars.jpg" />
      <meta itemprop="ratingValue" content="4" />
      <meta itemprop="bestRating" content="5" />Based on <span itemprop="ratingCount">25</span> user ratings
    </div>
  </div>
</body>

</html>
Hide result

. Microdata

+5

Fiddle

$("span[itemprop='name']").each(function(e) {
    if ($(arguments[1]).text() == 'Blendmagic') {
        alert($($("meta[itemprop='ratingValue']")[e]).attr('content'));       
    }    
});
0

All Articles