正在尝试检索 Express/NodeJS 中的 url 元数据

Trying to retrieve url metadata in Express/NodeJS

我有一个资源,其中包含用户输入的 URL 字段。我正在尝试使用这个包:https://github.com/mozilla/page-metadata-parser 来检索与 URL 关联的标题和描述,并在创建时将其保存到数据库中。

我已将包文档中建模的代码添加到 Express 中的 post 请求并且没有错误,新书签已创建但元数据值未返回。

这是我的模型:

const mongoose = require('mongoose');

const { Schema } = mongoose;

const BookmarksSchema = new Schema({
  userId: {
    type: Schema.Types.ObjectId,
    required: true
  },
  url: {
    type: String,
    trim: true,
    required: true
  },

...

  title: {
    type: String,
    trim: true,
    required: false
  },
  description: {
    type: String,
    trim: true,
    required: false
  }
});

mongoose.model('Bookmarks', BookmarksSchema);

我的创建方法:

const mongoose = require('mongoose');
const passport = require('passport');
const router = require('express').Router();
const auth = require('../auth');
const Bookmarks = mongoose.model('Bookmarks');

router.post('/', auth.required, (req, res, next) => {
  const userId = req.user.id;
  const bookmark = req.body.bookmark;

  if (!bookmark.url) {
    return res.status(422).json({
      errors: {
        url: 'is required',
      },
    });
  }

  const { getMetadata } = require('page-metadata-parser');
  const domino = require('domino');

  const url = bookmark.url;
  const response = fetch(url);
  const html = response.text();
  const doc = domino.createWindow(html).document;
  const metadata = getMetadata(doc, url);

  bookmark.userId = userId;
  bookmark.title = metadata.title;
  bookmark.description = metadata.description;

  const finalBookmark = new Bookmarks(bookmark);

  return finalBookmark.save()
    .then(() => res.json({ bookmark: finalBookmark }));
});

和package.json:

{
  "name": "server",
  "version": "1.0.0",
  "description": "",
  "main": "app.js",
  "dependencies": {
    "body-parser": "^1.18.3",
    "cors": "^2.8.5",
    "domino": "^2.1.3",
    "errorhandler": "^1.5.0",
    "express": "^4.16.4",
    "express-jwt": "^5.3.1",
    "express-session": "^1.15.6",
    "jsonwebtoken": "^8.5.1",
    "mongoose": "^5.4.20",
    "morgan": "^1.9.1",
    "page-metadata-parser": "^1.1.3",
    "passport": "^0.4.0",
    "passport-local": "^1.0.0",
    "path": "^0.12.7"
  },
  "devDependencies": {},
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1",
    "dev": "nodemon app"
  },
  "author": "",
  "license": "ISC"
}

在此处发布答案以便我们将答案标记为正确。

错误是由于对 fetch() 的调用是异步调用并且未使用关键字 await 而引起的。 NPM 站点上的示例可在此处找到:

https://www.npmjs.com/package/page-metadata-parser

表明他们在 fetch() 调用中使用了 await。为了使用await匿名回调函数,以(req, res, next)开头的函数前面必须有关键字async。调用应该如下所示:

router.post('/', auth.required, async (req, res, next) => {
     // Do your stuff here as before.
     const url = bookmark.url;
     const response = await fetch(url);
     const html = response.text();
     const doc = domino.createWindow(html).document;
     const metadata = getMetadata(doc, url);
     // Finish stuff here.
});

现在正在填充响应,程序会等到 fetch 调用完成后再继续,从而填充剩余的变量并能够获取元数据。