我想在浏览器中打印访问次数最多的 sites/urls
I want to print the most visited sites/urls in the browser
下面是我用 C++ 编写的代码,它在第二和第三输出行打印了错误的结果。我无法弄清楚为什么会这样。
下面是我写的代码,它是 visual studio 上的完整功能代码。此代码需要一个名为 urlMgr.txt 的输入文件,其内容应为 URL。下面是我正在使用的示例 URL。
web.whatsapp.com
web.whatsapp.com
cplusplus.com/reference/algorithm/find_if
whosebug.com/questions/760221/breaking-in-stdfor-each-loop
mail.google.com/mail/u/0/#inbox
mail.google.com/mail/u/0/#inbox
en.cppreference.com/w/cpp/language/lambda
https://www.google.co.in/?ion=1&espv=2#q=invariant%20meaning
mail.google.com/mail/u/0/#inbox
https://www.google.co.in/?ion=1&espv=2#q=array+of+references:quora&start=10
mail.google.com/mail/u/0/#inbox
web.whatsapp.com
quora.com/Whats-the-purpose-of-load-factor-in-hash-tables
https://www.quora.com/Whats-the-difference-between-the-rehash-and-reserve- methods-of-the-C++-unordered_map cplusplus.com/reference/unordered_map/unordered_map/load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
代码也贴在下面
#include <iostream>
#include <string>
#include <unordered_set>
#include <algorithm>
#include <fstream>
#include <sstream>
#include <functional>
#include <unordered_map>
#include <queue>
using namespace std;
class urlInfo
{
public:
urlInfo(string &url):urlName(url),hitCount(1)
{
}
int getHitCount() const
{
return hitCount;
}
string getURL()
{
return urlName;
}
string getURL() const
{
return urlName;
}
void updateHitCount()
{
hitCount++;
}
void setHitCount(int count)
{
hitCount = count;
}
private:
string urlName;
int hitCount;
};
class urlInfoMaxHeap
{
public:
bool operator() (urlInfo *url1, urlInfo *url2) const
{
if(url2->getHitCount() > url1->getHitCount())
return true;
else
return false;
}
};
bool operator==(const urlInfo &ui1,const urlInfo& ui2)
{
//return (ui1.getURL().compare(ui2.getURL()) == 0) ? 1:0;
return (ui1.getURL() == ui2.getURL());
}
namespace std
{
template <> struct hash<urlInfo>
{
size_t operator()(urlInfo const & ui)
{
return hash<string>()(ui.getURL());
}
};
}
class urlMgr
{
public:
urlMgr(string &fileName)
{
ifstream rdStr;
string str;
rdStr.open(fileName.c_str(),ios::in);
if(rdStr.is_open())
{
int len;
rdStr.seekg(0,ios::end);
len = rdStr.tellg();
rdStr.seekg(0,ios::beg);
str.reserve(len+1);
char *buff = new char[len +1];
memset(buff,0,len+1);
rdStr.read(buff,len);
rdStr.close();
str.assign(buff);
delete [] buff;
}
stringstream ss(str);
string token;
while(getline(ss,token,'\n'))
{
//cout<<endl<<token;
addUrl(token);
}
}
void addUrl(string &url)
{
unordered_map<string,urlInfo*>::iterator itr;
itr = urls.find(url);
if(itr == urls.end())
{
urlInfo *u = new urlInfo(url);
urls[url] = u;
maxHeap.push_back(u);
}
else
{
itr->second->updateHitCount();
urlInfo* u = itr->second;
vector<urlInfo*>::iterator vItr;
vItr = find(maxHeap.begin(),maxHeap.end(),u);
if(vItr!=maxHeap.end())
{
maxHeap.erase(vItr);
maxHeap.push_back(u);
}
}
make_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());
}
void releaseResources()
{
for_each(urls.begin(),urls.end(),[](pair<string,urlInfo*> p){
urlInfo* u = p.second;
delete u;
});
}
void printHeap()
{
for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
cout<<endl<<u->getHitCount()<<" "<<u->getURL();
});
}
private:
unordered_map<string,urlInfo*> urls;
vector<urlInfo*> maxHeap;
};
int main()
{
string fileName("urlMgr.txt");
urlMgr um(fileName);
um.printHeap();
um.releaseResources();
cout<<endl<<"Successfully inserted the data"<<endl;
}
我得到的输出是
8 cplusplus.com/max_load_factor
3 web.whatsapp.com
4 mail.google.com/mail/u/0/#inbox
1 en.cppreference.com/w/cpp/language/lambda
1 other url's and so on. //all other url's show count as 1.
我期待的是
8 cplusplus.com/max_load_factor
4 mail.google.com/mail/u/0/#inbox
3 web.whatsapp.com
1 en.cppreference.com/w/cpp/language/lambda
1 other url's and so on. //all other url's show count as 1.
最后,经过一些调试,我发现 problem.The 问题出在您解释 max_heap()
工作方式的方式上。
考虑一下。
url1 occurs 8 times
url2 occurs 4 times
url3 occurs 3 times
调用 max_heap()
之后,您将得到
maxHeap[0]=8 8
maxHeap[1]=4 4 3
maxHeap[2]=3
或者你也可以获得
maxHeap[0]=8 8
maxHeap[1]=3 3 4
maxHeap[2]=4
上面两个都是 maxHeaps 但你考虑到只有第一个堆可以出现,所以在下面的代码中你只是打印 maxHeap
内容而没有意识到你可能正在打印第二个堆。
void printHeap()
{
for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
cout<<endl<<u->getHitCount()<<" "<<u->getURL();
});
}
解决this.One的方法是在拾取maxHeap[0]
之后删除第一个元素,然后在拾取maxHeap[0]
之前再次调用max_heap again.Or你也可以像下面这样。
while(maxHeap.size()>0){
cout<<(*maxHeap.begin())->getHitCount()<<" "<<(*maxHeap.begin())->getURL()<<endl;
std::pop_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());maxHeap.pop_back();}
在上面的代码中,pop_heap() 会将最顶层的元素(根据您传递给 make_heap()
的比较函数的实现,它具有最高优先级)移动到末尾并再次堆化。然后您可以删除最后一个元素。
我也没有在你的代码中找到下面的用法
vector<urlInfo*>::iterator vItr;
vItr = find(maxHeap.begin(),maxHeap.end(),u);
if(vItr!=maxHeap.end())
{
maxHeap.erase(vItr);
maxHeap.push_back(u);
}
下面是我用 C++ 编写的代码,它在第二和第三输出行打印了错误的结果。我无法弄清楚为什么会这样。
下面是我写的代码,它是 visual studio 上的完整功能代码。此代码需要一个名为 urlMgr.txt 的输入文件,其内容应为 URL。下面是我正在使用的示例 URL。
web.whatsapp.com
web.whatsapp.com
cplusplus.com/reference/algorithm/find_if
whosebug.com/questions/760221/breaking-in-stdfor-each-loop
mail.google.com/mail/u/0/#inbox
mail.google.com/mail/u/0/#inbox
en.cppreference.com/w/cpp/language/lambda
https://www.google.co.in/?ion=1&espv=2#q=invariant%20meaning
mail.google.com/mail/u/0/#inbox
https://www.google.co.in/?ion=1&espv=2#q=array+of+references:quora&start=10
mail.google.com/mail/u/0/#inbox
web.whatsapp.com
quora.com/Whats-the-purpose-of-load-factor-in-hash-tables
https://www.quora.com/Whats-the-difference-between-the-rehash-and-reserve- methods-of-the-C++-unordered_map cplusplus.com/reference/unordered_map/unordered_map/load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
cplusplus.com/max_load_factor
代码也贴在下面
#include <iostream>
#include <string>
#include <unordered_set>
#include <algorithm>
#include <fstream>
#include <sstream>
#include <functional>
#include <unordered_map>
#include <queue>
using namespace std;
class urlInfo
{
public:
urlInfo(string &url):urlName(url),hitCount(1)
{
}
int getHitCount() const
{
return hitCount;
}
string getURL()
{
return urlName;
}
string getURL() const
{
return urlName;
}
void updateHitCount()
{
hitCount++;
}
void setHitCount(int count)
{
hitCount = count;
}
private:
string urlName;
int hitCount;
};
class urlInfoMaxHeap
{
public:
bool operator() (urlInfo *url1, urlInfo *url2) const
{
if(url2->getHitCount() > url1->getHitCount())
return true;
else
return false;
}
};
bool operator==(const urlInfo &ui1,const urlInfo& ui2)
{
//return (ui1.getURL().compare(ui2.getURL()) == 0) ? 1:0;
return (ui1.getURL() == ui2.getURL());
}
namespace std
{
template <> struct hash<urlInfo>
{
size_t operator()(urlInfo const & ui)
{
return hash<string>()(ui.getURL());
}
};
}
class urlMgr
{
public:
urlMgr(string &fileName)
{
ifstream rdStr;
string str;
rdStr.open(fileName.c_str(),ios::in);
if(rdStr.is_open())
{
int len;
rdStr.seekg(0,ios::end);
len = rdStr.tellg();
rdStr.seekg(0,ios::beg);
str.reserve(len+1);
char *buff = new char[len +1];
memset(buff,0,len+1);
rdStr.read(buff,len);
rdStr.close();
str.assign(buff);
delete [] buff;
}
stringstream ss(str);
string token;
while(getline(ss,token,'\n'))
{
//cout<<endl<<token;
addUrl(token);
}
}
void addUrl(string &url)
{
unordered_map<string,urlInfo*>::iterator itr;
itr = urls.find(url);
if(itr == urls.end())
{
urlInfo *u = new urlInfo(url);
urls[url] = u;
maxHeap.push_back(u);
}
else
{
itr->second->updateHitCount();
urlInfo* u = itr->second;
vector<urlInfo*>::iterator vItr;
vItr = find(maxHeap.begin(),maxHeap.end(),u);
if(vItr!=maxHeap.end())
{
maxHeap.erase(vItr);
maxHeap.push_back(u);
}
}
make_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());
}
void releaseResources()
{
for_each(urls.begin(),urls.end(),[](pair<string,urlInfo*> p){
urlInfo* u = p.second;
delete u;
});
}
void printHeap()
{
for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
cout<<endl<<u->getHitCount()<<" "<<u->getURL();
});
}
private:
unordered_map<string,urlInfo*> urls;
vector<urlInfo*> maxHeap;
};
int main()
{
string fileName("urlMgr.txt");
urlMgr um(fileName);
um.printHeap();
um.releaseResources();
cout<<endl<<"Successfully inserted the data"<<endl;
}
我得到的输出是
8 cplusplus.com/max_load_factor
3 web.whatsapp.com
4 mail.google.com/mail/u/0/#inbox
1 en.cppreference.com/w/cpp/language/lambda
1 other url's and so on. //all other url's show count as 1.
我期待的是
8 cplusplus.com/max_load_factor
4 mail.google.com/mail/u/0/#inbox
3 web.whatsapp.com
1 en.cppreference.com/w/cpp/language/lambda
1 other url's and so on. //all other url's show count as 1.
最后,经过一些调试,我发现 problem.The 问题出在您解释 max_heap()
工作方式的方式上。
考虑一下。
url1 occurs 8 times
url2 occurs 4 times
url3 occurs 3 times
调用 max_heap()
之后,您将得到
maxHeap[0]=8 8
maxHeap[1]=4 4 3
maxHeap[2]=3
或者你也可以获得
maxHeap[0]=8 8
maxHeap[1]=3 3 4
maxHeap[2]=4
上面两个都是 maxHeaps 但你考虑到只有第一个堆可以出现,所以在下面的代码中你只是打印 maxHeap
内容而没有意识到你可能正在打印第二个堆。
void printHeap()
{
for_each(maxHeap.begin(),maxHeap.end(),[](urlInfo* u){
cout<<endl<<u->getHitCount()<<" "<<u->getURL();
});
}
解决this.One的方法是在拾取maxHeap[0]
之后删除第一个元素,然后在拾取maxHeap[0]
之前再次调用max_heap again.Or你也可以像下面这样。
while(maxHeap.size()>0){
cout<<(*maxHeap.begin())->getHitCount()<<" "<<(*maxHeap.begin())->getURL()<<endl;
std::pop_heap(maxHeap.begin(),maxHeap.end(),urlInfoMaxHeap());maxHeap.pop_back();}
在上面的代码中,pop_heap() 会将最顶层的元素(根据您传递给 make_heap()
的比较函数的实现,它具有最高优先级)移动到末尾并再次堆化。然后您可以删除最后一个元素。
我也没有在你的代码中找到下面的用法
vector<urlInfo*>::iterator vItr;
vItr = find(maxHeap.begin(),maxHeap.end(),u);
if(vItr!=maxHeap.end())
{
maxHeap.erase(vItr);
maxHeap.push_back(u);
}