如何从嵌套数组中获得承诺?

How to get promises from nested arrays?

有人可以帮我解决这个问题吗?

我正在尝试抓取网站并将收集到的数据存储在 Json 文件中。我正在使用 cheerios 和请求承诺。

Json 结构是这样的:公司 > 套餐 > 城市

      "companies": [
    {
      "id": 0,
      "name": "companyName",
      "url": "https://www.url-company.net/",
      "packages": [
        {
          "id": 0,
          "name": "general",
          "url": "https://www.url-company.net/package",
          "cities": [
            {
              "id": 0,
              "name": "cityName",
              "url": "https://www.url-company.net/package/city",
            },
            ...]
        }
      ...]
    }
  ..]

我已经从该站点提取了一系列公司。

我只能填充公司和 packagesByCompany,但在尝试填充 citiesByPackage 时我迷路了:

const rp = require('request-promise');
const cheerio = require('cheerio');
const jsonfile = require('jsonfile');
const baseUrl = 'https://www.base-url-example.net';

scrapeAll();


function scrapeAll() {
    return scrapeCompanies().then(function (dataCompanys) {
        //Map every endpoint so we can make a request with each URL
        var promises = dataCompanys.map(function (company) {
            return scrapePackagesByCompany(company)  // Populate each company with all the array of packages from this company
        });
        return Promise.all(promises);
    })
    .then(function(promiseArray) { // Need help here!!!!

        var promise4all = Promise.all(
            promiseArray.map(function(company) {

                 return Promise.all(   // This is NOT working, I do not know how to get promises from nested arrays
                    company.packages.map(function(package) {

                         return Promise.all(
                            scrapeCitiesByPackage(package) // Try to populate each package with all the array of cities from this package
                             );
                    })
                     );
            })
         );

        return promise4all;
    })
    .then(function (data) {
        saveScrapedDateIntoJsonFile(data);

        return data;
    })
    .catch(function (err) {
        return Promise.reject(err);
    });
}

function scrapeCompanies() {
    return rp(baseUrl)
      .then(function(html){
        const data = []; 
        let companysImg = '#content section .elementor-container > .elementor-row > .elementor-element.elementor-top-column .elementor-widget-wrap .elementor-widget-image >.elementor-widget-container > .elementor-image';
        let $ = cheerio.load(html); 
        
        $(companysImg).each(function(index, element){
            
            const urlCompany = $(element).find('a').attr('href');
            const imgCompany = $(element).find('img').data('lazy-src');
            
            if (urlCompany && imgCompany) {
                
                const nameCompany = urlCompany;
    
                const company = {
                    id : index,
                    name: nameCompany,
                    url : baseUrl + urlCompany,
                    img: imgCompany,
                };
    
                data.push(company);
            }       
        });     
        
        return data;
      })
      .catch(function(err){
        //handle error
        console.error('errorrr2', err);
      });
}


  function scrapePackagesByCompany(company) {
    return rp(company.url)
        .then(function(html){
            company.packages = []; 
            let packagesImg = '#content section .elementor-container > .elementor-row > .elementor-element.elementor-top-column .elementor-widget-wrap .elementor-widget-image >.elementor-widget-container > .elementor-image';
            let $ = cheerio.load(html); 
            
            $(packagesImg).each(function(index, element){
                
                const urlPackage = $(element).find('a').attr('href');
                const imgPackage = $(element).find('img').data('lazy-src');

                if (urlPackage && imgPackage) {
                    
                    const namePackage = urlPackage.text();

                    const package = {
                        id : index,
                        name: namePackage,
                        url : urlPackage,
                        img: imgPackage,
                    };

                    company.packages.push(package);
                }       
            });

            return company;
        })      
        .catch(function(err){
            //handle error
            console.error('errorrr2', err);
        });
  }


  function scrapeCitiesByPackage(insurancePackage) {
    return rp(insurancePackage.url)
        .then(function(html){
            insurancePackage.cities = []; 
            let citiesLinks = '#content section .elementor-container > .elementor-row > .elementor-element .elementor-widget.elementor-widget-posts .elementor-posts-container article';
            let $ = cheerio.load(html); 
            
            $(citiesLinks).each(function(index, element) {
                
                const $linkCity = $(element).find('a');
                const urlCity = $linkCity.attr('href');
                const nameCity = $linkCity.text();
                
                if (urlCity && nameCity) {
                    
                    const city = {
                        id : index,
                        name: nameCity,
                        url : urlCity,
                    };
                    insurancePackage.cities.push(city);
                }       
            });
            return insurancePackage;

        })
        .catch(function(err){
            //handle error
            console.error('errorrr2', err);
        });
  }


  function saveScrapedDateIntoJsonFile(data) {
    jsonfile.writeFile(
        './data/company.json',
        {companies : data },
        //data,
        {spaces: 2},
        function(err) {
            console.error('errorrr', err);
        });
  }

提前致谢:)

您正在尝试的可能会起作用,但可以说 scrapePackagesByCompany()scrapeCitiesByPackage() 只是传送数据并执行所有 "assembly" 工作(即捆绑在 scrapeAll() 中将数组传递到更高级别的对象中)。

你可以这样写:

scrapeAll()
.catch(function(err) {
    console.log(err);
});

function scrapeAll() {
    return scrapeCompanies()
    .then(function(companies) {
        return Promise.all(companies.map(function(company) {
            return scrapePackagesByCompany(company)
            .then(function(packages) {
                company.packages = packages; // assembly
                return Promise.all(packages.map(function(package) {
                    return scrapeCitiesByPackage(package)
                    .then(function(cities) {
                        package.cities = cities; // assembly
                    });
                }));
            });
        }))
        .then(function() {
            return saveScrapedDateIntoJsonFile(companies);
        });
    });
}

那么简化 scrapePackagesByCompany()scrapeCitiesByPackage(package) 就很简单了,它们分别提供 packages 数组和 cities 数组。