Web Crawler - 返回一个数组以在下一个函数中使用

Question

我可以确认的第一个函数可以正常工作。

我想 return 一个数组到变量 AtoZLinks 以便它可以在后面的函数中使用。我将向数组中的每个 url 发出请求，并从这些链接中提取更多信息。

非常感谢，我已经将这个项目作为一个项目工作了几天，我是 jQuery、网络爬虫、JS、NodeJS 和 expressJS 的初学者。为工作埋头苦干

    var express = require('express');
    var request = require('request');
    var cheerio = require('cheerio');
    var router = express.Router();

    var fullUrl;
    fullUrl = [];

    var AtoZLinks = function(){
        var url = 'http://example1.com';
        request(url, function(error, response, html) {
            if (!error && response.statusCode === 200) {
                var $ = cheerio.load(html);
                var fullUrl = [];
                var places = "Places";
                $('.clear a').each(function() {
                    var link = $(this);
                    link.each(function(index) {
                        var href = link.attr('href');
                        if (href.match(places)) {
                            // The urls from fullUrl here to be returned to parent variable.
                            fullUrl[index] = url + href; // Think something is wrong here... I've also tried "fullUrl.push(url + href);"
                            console.log(fullUrl); // This prints out all urls correctly

                        }
                    });
                });
                for (var i = 0; i < fullUrl.length; i++) {
                    console.log(fullUrl[i];
                } // This code only prints out the last url stored (So I'm thinking the urls are being stored incorrectly...)
            }
        });
    };

    /* GET crawler page. */
    router.get('/crawler', function(req, res, next) {
      AtoZLinks();
      next();
    }, function(req, res) {

    });

    module.exports = router;

    // Feel free to ignore the following work I've done or..
    // Your support with the the following function will be a bonus!
    // I need to use the links in the previous array variable in the following
    // function to extract further urls within those urls that I will work with.

    var url = AtoZLinks;

    request(AtoZLinks, function(error, response, html) {
        if (!error && response.statusCode === 200) {

            var $ = cheerio.load(html);
            // This selector is the code needed to extract the links from within the
            // links in the AtoZLinks array.
            $('div.fclist-section.clear.list-four').each(function() {
                $(this).find('a').each(function() {
                    var link = $(this);
                    var href = link.attr('href');
                    fullUrl = url + href;
                    console.log(fullUrl);
                });
            });

        }
    );

Answer 1

你的意思是这样的吗？

var arrURLs;

arrURLs = [
    'www.ask.com',
    'www.google.com',
    'www.bing.com',
    'www.yahoo.com'
];

AtoZLinks(arrURLs);

  var AtoZLinks = function(theURLs){
      for (var i = 0; i < theURLs.length; i++) {
          var url = theURLs[i];
          request(url, function(error, response, html) {
              if (!error && response.statusCode === 200) {
                  var $ = cheerio.load(html);
                  var fullUrl = [];
                  var places = "Places";
                  $('.clear a').each(function() {
                      var link = $(this);
                      link.each(function(index) {
                          var href = link.attr('href');

                          //absolute match
                          if (href === url) {
                            //true
                          } else {
                            //false
                          }

                          //href contains url
                          if (href.indexOf(url) > -1) {
                            //true
                          } else {
                            //false
                          }                              

                          if (href.match(places)) {
                              // The urls from fullUrl here to be returned to parent variable.
                              fullUrl.push(url + href);
                              console.log(JSON.stringify(fullUrl));

                          }
                      });
                  });
              }
          });
      }
  };

Web Crawler - 返回一个数组以在下一个函数中使用

Web Crawler - returning an array to be used in the next function

javascript

jquery

request

node.js

cheerio