Axios.get 数据太慢 return

Axios.get too slow to return data

我有一个 axios get 请求需要很长时间才能解决。这是针对托管在 Heroku 上的站点,该站点的请求超时设置为 30 秒。下面的代码 returns 大约 50 秒后的请求(这段时间出奇地长,因为在 playerLink 中只有 21 个 url 可以循环)。因此,该请求永远不会在实时站点上得到解决。

这是 Promise 代码:

const PORT = 8000

const axios = require('axios')
const cheerio = require('cheerio')
const express = require('express')
const cors = require('cors')

const app = express()
app.use(cors())


app.listen(PORT , () => console.log(`server running on PORT ${PORT}`))

const players = 'https://www.trinethunder.com/sports/sball/2021-22/teams/trine?view=roster'
const playerStats = 'https://www.trinethunder.com'

const playerLink = []

app.get('/players', (req, res) => {
    function getPlayers() {
      return new Promise((resolve, reject) => {
        axios(players)
          .then((response) => {
            const html = response.data;
            const $ = cheerio.load(html);
            
            $("td.text.pinned-col > a", html).each(function () {
              var link = $(this).attr("href");
              //if link not yet in array, push to array
              if (playerLink.indexOf(playerStats + link) === -1) {
                playerLink.push(playerStats + link);
              }
            });
            resolve()
          })
          .catch((err) => {
            console.log(err);
          });
      });
    }
    function getPlayerStats() {
      setTimeout(async () => {
        const statsArray = []
        for (let i = 0; i < playerLink.length; i++) {
          await new Promise((resolve, reject) => {
            axios.get
              (playerLink[i])
              .then((response) => {
                const html = response.data;
                const $ = cheerio.load(html);
                const statName = [];
                const statDesc = [];
                const statNum = [];
  
                $("h2 > span:nth-child(1)", html).each(function () {
                  var name = $(this).text();
                  statName.push(name);
                });
                $(".stat-title", html).each(function () {
                  var stat1 = $(this).text();
                  statDesc.push(stat1);
                });
                $(".stat-value", html).each(function () {
                  var stat2 = $(this).text();
                  statNum.push(stat2);
                });
                //Conditional is here because sometimes statsArray
                //gets filled multiple times
                if (statsArray.length < 63) {
                  statsArray.push(statName, statDesc, statNum);
                }
                resolve();
              })
              .catch((err) => console.log(err));
          });
        }
        res.json(statsArray)
      }, 400);
    }
    
    getPlayers()
      .then(getPlayerStats)
      .catch((err) => console.log(err));
  });

/players 的简化获取语句:

fetch('http://localhost:8000/players')
    .then(response => response.json())
    .then(data => {
        
     console.log(data)
             
}).catch(err=>console.log(err))

如果您发现任何可能减慢请求执行速度的问题,请告诉我。

我清理了代码,删除了 setTimeout(),将其设置为最大并行化并对其进行了检测,使其可以 运行 stand-alone。这样做之后,它产生的日志如下,我看到 getPlayers() 花费了 2413 毫秒,各个玩家请求的同步 cheerio 处理总共花费了 6087 毫秒。从开始到结束,整个过程在我的系统上花费了 9415 毫秒。

这比您报告的要快得多。我所做的最大结构更改是所有单独的 getPlayerStat 请求都是并行发出的,而不是串行发出的,这(如果目标服务器可以处理)将缩短获取玩家统计信息的网络请求的总等待时间。我还删除了 setTimeout(),因为这似乎是解决其他问题的 hack,一旦代码结构正确进行异步处理,就没有必要了。

如果您想查看所有详细时间都花在哪里,这里有详细的日志。您可以 运行 在您自己的系统上使用下面的代码来查看结果:

000000:  begin all
000006:  begin getPlayers()
002419:  end getPlayers()
002419:  begin getPlayerStats
002420:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
002423:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
002424:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
002424:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
002425:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
002426:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
002427:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
002427:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
002428:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
002429:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
002430:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
002430:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
002431:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
002432:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
002432:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
002433:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
002434:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
002434:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
002435:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
002436:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
002436:  begin get https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
003251:  after get https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
003596:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/kaylyncoahranhp6r
003599:  after get https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
003902:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/makinzeromingersy0k
003905:  after get https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
004200:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emersynhaneyjnrb
004203:  after get https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
004489:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/amandapratheruluw
004492:  after get https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
004771:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emmabeyeri6zz
004773:  after get https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
005060:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/aprilsellersi95s
005063:  after get https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
005345:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/elizabethkoch5umu
005348:  after get https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
005638:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/emilywheaton1jym
005643:  after get https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
005943:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ashleyswartouta714
005951:  after get https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
006243:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ainsleyphillipsmfe9
006245:  after get https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
006541:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/adrienneroseybff7
006545:  after get https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
006821:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/annagilli8rl
006824:  after get https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
007111:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/mercededaughertyiswy
007118:  after get https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
007402:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/lexiclark77gr
007411:  after get https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
007681:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/angelenaperry2scn
007685:  after get https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
007974:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/laurenclausenfb4j
007976:  after get https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
008265:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/scarlettelliott0bvt
008267:  after get https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
008553:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/ellietrinexhe2
008555:  after get https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
008838:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/gisellerileybdb8
008840:  after get https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
009129:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/annakoeppl38q8
009131:  after get https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
009415:  after cheerio parse https://www.trinethunder.com/sports/sball/2021-22/players/taylormurdockgeho
009415:  end all
 ... data here
getPlayers() took 2413ms
cheerio processing took 6087ms

而且,这是 stand-alone 代码,任何人都可以 运行:

const axios = require('axios');
const cheerio = require('cheerio');

const players = 'https://www.trinethunder.com/sports/sball/2021-22/teams/trine?view=roster'
const playerStats = 'https://www.trinethunder.com'



const zeroes = "000000000000000000000000000000";

function zeroPad(num, padLen) {
    let str = num + "";
    let padNum = padLen - str.length;
    if (padNum > 0) {
        str = zeroes.slice(0, padNum) + str;
    }
    return str;
}

const base = Date.now();

function log(...args) {
    let delta = Date.now() - base;
    let deltaPad = zeroPad(delta, 6);
    console.log(deltaPad + ": ", ...args);
}

let getPlayersT = 0;
let cheerioT = 0;

async function run() {

    async function getPlayers() {
        log("begin getPlayers()");
        let startT = Date.now();
        const playerLink = [];
        const response = await axios(players);
        const html = response.data;
        const $ = cheerio.load(html);

        $("td.text.pinned-col > a", html).each(function() {
            const link = $(this).attr("href");
            //if link not yet in array, push to array
            if (playerLink.indexOf(playerStats + link) === -1) {
                playerLink.push(playerStats + link);
            }
        });
        log("end getPlayers()")
        getPlayersT += Date.now() - startT;
        return playerLink;
    }

    async function getPlayerStats(playerLink) {
        log("begin getPlayerStats");
        const statsArray = [];
        await Promise.all(playerLink.map(async link => {
            log(`begin get ${link}`)
            const response = await axios.get(link);
            log(`after get ${link}`)
            const html = response.data;
            const startT = Date.now();
            const $ = cheerio.load(html);
            const statName = [];
            const statDesc = [];
            const statNum = [];

            $("h2 > span:nth-child(1)", html).each(function() {
                var name = $(this).text();
                statName.push(name);
            });
            $(".stat-title", html).each(function() {
                var stat1 = $(this).text();
                statDesc.push(stat1);
            });
            $(".stat-value", html).each(function() {
                var stat2 = $(this).text();
                statNum.push(stat2);
            });
            //Conditional is here because sometimes statsArray
            //gets filled multiple times
            if (statsArray.length < 63) {
                statsArray.push(statName, statDesc, statNum);
            }
            cheerioT += Date.now() - startT;
            log(`after cheerio parse ${link}`);
        }));
        return statsArray;
    }

    try {
        log("begin all")
        const playerLink = await getPlayers();
        const statsArray = await getPlayerStats(playerLink);
        log("end all")
        return statsArray;
    } catch (e) {
        console.log(e);
    }
}

run().then(result => {
    console.log(result);
    console.log(`getPlayers() took ${getPlayersT}ms`);
    console.log(`cheerio processing took ${cheerioT}ms`);
}).catch(err => {
    console.log("error");
});