我想在浏览器中打印访问次数最多的 sites/urls

I want to print the most visited sites/urls in the browser

下面是我用 C++ 编写的代码,它在第二和第三输出行打印了错误的结果。我无法弄清楚为什么会这样。

下面是我写的代码,它是 visual studio 上的完整功能代码。此代码需要一个名为 urlMgr.txt 的输入文件,其内容应为 URL。下面是我正在使用的示例 URL。

  web.whatsapp.com 
  web.whatsapp.com 
  cplusplus.com/reference/algorithm/find_if 
  whosebug.com/questions/760221/breaking-in-stdfor-each-loop 
  mail.google.com/mail/u/0/#inbox 
  
  mail.google.com/mail/u/0/#inbox 
  en.cppreference.com/w/cpp/language/lambda 
  https://www.google.co.in/?ion=1&espv=2#q=invariant%20meaning
  mail.google.com/mail/u/0/#inbox
  
  https://www.google.co.in/?ion=1&espv=2#q=array+of+references:quora&start=10
  mail.google.com/mail/u/0/#inbox 
  web.whatsapp.com 
  quora.com/Whats-the-purpose-of-load-factor-in-hash-tables 
  https://www.quora.com/Whats-the-difference-between-the-rehash-and-reserve- methods-of-the-C++-unordered_map      cplusplus.com/reference/unordered_map/unordered_map/load_factor
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 
  cplusplus.com/max_load_factor 

代码也贴在下面

#include <iostream>
#include <string>
#include <unordered_set>
#include <algorithm>
#include <fstream>
#include <sstream>
#include <functional>
#include <unordered_map>
#include <queue>
using namespace std;

class urlInfo
{
public:
    urlInfo(string &url):urlName(url),hitCount(1)
    {
    }

    int getHitCount() const
    {
        return hitCount;
    }

    string getURL()
    {
        return urlName;
    }

    string getURL() const
    {
        return urlName;
    }

    void updateHitCount()
    {
        hitCount++;
    }

    void setHitCount(int count)
    {
        hitCount = count;
    }

private:
    string urlName;
    int hitCount;
};

class urlInfoMaxHeap
{
public:
    bool operator() (urlInfo *url1, urlInfo *url2) const
    {
        if(url2->getHitCount() > url1->getHitCount())
            return true;
        else
            return false;
    }
};


bool operator==(const urlInfo &ui1,const urlInfo& ui2)
{
    //return (ui1.getURL().compare(ui2.getURL()) == 0) ? 1:0;

    return (ui1.getURL() == ui2.getURL());
}

namespace std
{
    template <> struct hash<urlInfo>
    {
        size_t operator()(urlInfo const & ui)
        {
            return hash<string>()(ui.getURL());
        }
    };
}

class urlMgr
{
public:
    urlMgr(string &fileName)
    {
        ifstream rdStr;
        string str;
        rdStr.open(fileName.c_str(),ios::in);
        if(rdStr.is_open())
        {
            int len;
            rdStr.seekg(0,ios::end);
            len = rdStr.tellg();
            rdStr.seekg(0,ios::beg);
            str.reserve(len+1);
            char *buff = new char[len +1];
            memset(buff,0,len+1);
            rdStr.read(buff,len);
            rdStr.close();
            str.assign(buff);
            delete [] buff;
        }
        stringstream ss(str);
        string token;

        while(getline(ss,token,'\n'))
        {
            //cout<<endl<<token;
            addUrl(token);
        }

    }


    void addUrl(string &url)
    {
        unordered_map<string,urlInfo*>::iterator itr;
        itr = urls.find(url);
        if(itr == urls.end())
        {
            urlInfo *u = new urlInfo(url);
            urls[url] = u;
            maxHeap.push_back(u);
        }
        else
        {
            itr->second->updateHitCount();
            urlInfo* u = itr->second;
            vector<urlInfo*>::iterator vItr;
            vItr = find(maxHeap.begin(),maxHeap.end(),u);
            if(vItr!=maxHeap.end())
            {
                maxHeap.erase(vItr);
                maxHeap.push_back(u);
            }
        }

        make_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());
    }

    void releaseResources()
    {
        for_each(urls.begin(),urls.end(),[](pair<string,urlInfo*> p){
            urlInfo* u = p.second;
            delete u;
        });
    }

    void printHeap()
    {
        for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
            cout<<endl<<u->getHitCount()<<"  "<<u->getURL();
        });
    }
private:
    unordered_map<string,urlInfo*> urls;
    vector<urlInfo*> maxHeap;
};


int main()
{
    string fileName("urlMgr.txt");
    urlMgr um(fileName);
    um.printHeap();
    um.releaseResources();
    cout<<endl<<"Successfully inserted the data"<<endl;
}

我得到的输出是

   8 cplusplus.com/max_load_factor
   3 web.whatsapp.com
   4 mail.google.com/mail/u/0/#inbox
   1 en.cppreference.com/w/cpp/language/lambda
   1 other url's and so on. //all other url's show count as 1.

我期待的是

   8 cplusplus.com/max_load_factor   
   4 mail.google.com/mail/u/0/#inbox
   3 web.whatsapp.com
   1 en.cppreference.com/w/cpp/language/lambda
   1 other url's and so on. //all other url's show count as 1.

最后,经过一些调试,我发现 problem.The 问题出在您解释 max_heap() 工作方式的方式上。

考虑一下。

url1 occurs 8 times
url2 occurs 4 times
url3 occurs 3 times

调用 max_heap() 之后,您将得到

maxHeap[0]=8                     8
maxHeap[1]=4                   4   3
maxHeap[2]=3

或者你也可以获得

maxHeap[0]=8                     8
maxHeap[1]=3                  3     4
maxHeap[2]=4

上面两个都是 maxHeaps 但你考虑到只有第一个堆可以出现,所以在下面的代码中你只是打印 maxHeap 内容而没有意识到你可能正在打印第二个堆。

  void printHeap()
{
    for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
        cout<<endl<<u->getHitCount()<<"  "<<u->getURL();
    });
}

解决this.One的方法是在拾取maxHeap[0]之后删除第一个元素,然后在拾取maxHeap[0]之前再次调用max_heap again.Or你也可以像下面这样。

 while(maxHeap.size()>0){
    cout<<(*maxHeap.begin())->getHitCount()<<" "<<(*maxHeap.begin())->getURL()<<endl;
    std::pop_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());maxHeap.pop_back();}

在上面的代码中,pop_heap() 会将最顶层的元素(根据您传递给 make_heap() 的比较函数的实现,它具有最高优先级)移动到末尾并再次堆化。然后您可以删除最后一个元素。

我也没有在你的代码中找到下面的用法

 vector<urlInfo*>::iterator vItr;
        vItr = find(maxHeap.begin(),maxHeap.end(),u);
        if(vItr!=maxHeap.end())
        {
            maxHeap.erase(vItr);
            maxHeap.push_back(u);
        }