Ruby Nokogiri 拿走全部内容
Ruby Nokogiri take all the content
我正在处理报废项目,但我遇到了问题:
我想用 nokigiri 获取 https://coinmarketcap.com/all/views/all/ 的所有数据
但我在 200 个装有 nokogiri
的加密名称上只得到 20 个加密名称
代码:
ruby
require 'nokogiri'
require 'open-uri'
require 'rubygems'
def scrapper
return doc = Nokogiri::HTML(URI.open('https://coinmarketcap.com/all/views/all/'))
end
def fusiontab(tab1,tab2)
return Hash[tab1.zip(tab2)]
end
def crypto(page)
array_name=[]
array_value=[]
name_of_crypto=page.xpath('//tr//td[3]')
value_of_crypto=page.xpath('//tr//td[5]')
hash={}
name_of_crypto.each{ |name|
array_name<<name.text
}
value_of_crypto.each{|price|
array_value << price.text
}
hash=fusiontab(array_name,array_value)
return hash
end
puts crypto(scrapper)
你能帮我弄到所有的加密货币吗?
您使用的 URL 并没有像 HTML 那样生成所有数据;很多都是在页面加载后呈现的。
查看页面的源代码,数据似乎是从页面中嵌入的 JSON 脚本呈现的。
为了找出 JSON 数据的哪一部分包含您要处理的内容,我们花了相当长的时间来查找对象:
- HTML 中的 JSON 对象作为
String
对象
page.css('script[type="application/json"]').first.inner_html
把JSONString
转换成真正的JSONHash
JSON.parse(page.css('script[type="application/json"]').first.inner_html)
CryptoJSON或Array
内的位置Hash
es
my_json["props"]["initialState"]["cryptocurrency"]["listingLatest"]["data"]
漂亮打印第一个“crypto”
2.7.2 :142 > pp cryptos.first
{"id"=>1,
"name"=>"Bitcoin",
"symbol"=>"BTC",
"slug"=>"bitcoin",
"tags"=>
["mineable",
"pow",
"sha-256",
"store-of-value",
"state-channel",
"coinbase-ventures-portfolio",
"three-arrows-capital-portfolio",
"polychain-capital-portfolio",
"binance-labs-portfolio",
"blockchain-capital-portfolio",
"boostvc-portfolio",
"cms-holdings-portfolio",
"dcg-portfolio",
"dragonfly-capital-portfolio",
"electric-capital-portfolio",
"fabric-ventures-portfolio",
"framework-ventures-portfolio",
"galaxy-digital-portfolio",
"huobi-capital-portfolio",
"alameda-research-portfolio",
"a16z-portfolio",
"1confirmation-portfolio",
"winklevoss-capital-portfolio",
"usv-portfolio",
"placeholder-ventures-portfolio",
"pantera-capital-portfolio",
"multicoin-capital-portfolio",
"paradigm-portfolio"],
"cmcRank"=>1,
"marketPairCount"=>9158,
"circulatingSupply"=>18960043,
"selfReportedCirculatingSupply"=>0,
"totalSupply"=>18960043,
"maxSupply"=>21000000,
"isActive"=>1,
"lastUpdated"=>"2022-02-16T14:26:00.000Z",
"dateAdded"=>"2013-04-28T00:00:00.000Z",
"quotes"=>
[{"name"=>"USD",
"price"=>43646.858047604175,
"volume24h"=>20633664171.70021,
"marketCap"=>827546305397.4712,
"percentChange1h"=>-0.86544168,
"percentChange24h"=>-1.6482985,
"percentChange7d"=>-0.73945082,
"lastUpdated"=>"2022-02-16T14:26:00.000Z",
"percentChange30d"=>2.18336134,
"percentChange60d"=>-6.84146969,
"percentChange90d"=>-26.08073361,
"fullyDilluttedMarketCap"=>916584018999.69,
"marketCapByTotalSupply"=>827546305397.4712,
"dominance"=>42.1276,
"turnover"=>0.02493355,
"ytdPriceChangePercentage"=>-8.4718}],
"isAudited"=>false,
"rank"=>1,
"hasFilters"=>false,
"quote"=>
{"USD"=>
{"name"=>"USD",
"price"=>43646.858047604175,
"volume24h"=>20633664171.70021,
"marketCap"=>827546305397.4712,
"percentChange1h"=>-0.86544168,
"percentChange24h"=>-1.6482985,
"percentChange7d"=>-0.73945082,
"lastUpdated"=>"2022-02-16T14:26:00.000Z",
"percentChange30d"=>2.18336134,
"percentChange60d"=>-6.84146969,
"percentChange90d"=>-26.08073361,
"fullyDilluttedMarketCap"=>916584018999.69,
"marketCapByTotalSupply"=>827546305397.4712,
"dominance"=>42.1276,
"turnover"=>0.02493355,
"ytdPriceChangePercentage"=>-8.4718}}
}
第一个“crypto”的值
crypto.first["quote"]["USD"]["price"]
您在 Hash
中用于第一个“加密”的密钥
crypto.first["symbol"]
将它们放在一起,您将得到以下代码(使用 each_with_object
遍历每个“加密货币”)
require `json`
require 'nokogiri'
require 'open-uri'
...
def crypto(page)
my_json = JSON.parse(page.css('script[type="application/json"]').first.inner_html)
cryptos = my_json["props"]["initialState"]["cryptocurrency"]["listingLatest"]["data"]
hash = cryptos.each_with_object({}) do |crypto, hsh|
hsh[crypto["name"]] = crypto["quote"]["USD"]["price"]
end
return hash
end
puts crypto(scrapper);
我正在处理报废项目,但我遇到了问题:
我想用 nokigiri 获取 https://coinmarketcap.com/all/views/all/ 的所有数据 但我在 200 个装有 nokogiri
的加密名称上只得到 20 个加密名称代码:
ruby
require 'nokogiri'
require 'open-uri'
require 'rubygems'
def scrapper
return doc = Nokogiri::HTML(URI.open('https://coinmarketcap.com/all/views/all/'))
end
def fusiontab(tab1,tab2)
return Hash[tab1.zip(tab2)]
end
def crypto(page)
array_name=[]
array_value=[]
name_of_crypto=page.xpath('//tr//td[3]')
value_of_crypto=page.xpath('//tr//td[5]')
hash={}
name_of_crypto.each{ |name|
array_name<<name.text
}
value_of_crypto.each{|price|
array_value << price.text
}
hash=fusiontab(array_name,array_value)
return hash
end
puts crypto(scrapper)
你能帮我弄到所有的加密货币吗?
您使用的 URL 并没有像 HTML 那样生成所有数据;很多都是在页面加载后呈现的。
查看页面的源代码,数据似乎是从页面中嵌入的 JSON 脚本呈现的。
为了找出 JSON 数据的哪一部分包含您要处理的内容,我们花了相当长的时间来查找对象:
- HTML 中的 JSON 对象作为
String
对象
page.css('script[type="application/json"]').first.inner_html
把JSONString
转换成真正的JSONHash
JSON.parse(page.css('script[type="application/json"]').first.inner_html)
CryptoJSON或Array
内的位置Hash
es
my_json["props"]["initialState"]["cryptocurrency"]["listingLatest"]["data"]
漂亮打印第一个“crypto”
2.7.2 :142 > pp cryptos.first
{"id"=>1,
"name"=>"Bitcoin",
"symbol"=>"BTC",
"slug"=>"bitcoin",
"tags"=>
["mineable",
"pow",
"sha-256",
"store-of-value",
"state-channel",
"coinbase-ventures-portfolio",
"three-arrows-capital-portfolio",
"polychain-capital-portfolio",
"binance-labs-portfolio",
"blockchain-capital-portfolio",
"boostvc-portfolio",
"cms-holdings-portfolio",
"dcg-portfolio",
"dragonfly-capital-portfolio",
"electric-capital-portfolio",
"fabric-ventures-portfolio",
"framework-ventures-portfolio",
"galaxy-digital-portfolio",
"huobi-capital-portfolio",
"alameda-research-portfolio",
"a16z-portfolio",
"1confirmation-portfolio",
"winklevoss-capital-portfolio",
"usv-portfolio",
"placeholder-ventures-portfolio",
"pantera-capital-portfolio",
"multicoin-capital-portfolio",
"paradigm-portfolio"],
"cmcRank"=>1,
"marketPairCount"=>9158,
"circulatingSupply"=>18960043,
"selfReportedCirculatingSupply"=>0,
"totalSupply"=>18960043,
"maxSupply"=>21000000,
"isActive"=>1,
"lastUpdated"=>"2022-02-16T14:26:00.000Z",
"dateAdded"=>"2013-04-28T00:00:00.000Z",
"quotes"=>
[{"name"=>"USD",
"price"=>43646.858047604175,
"volume24h"=>20633664171.70021,
"marketCap"=>827546305397.4712,
"percentChange1h"=>-0.86544168,
"percentChange24h"=>-1.6482985,
"percentChange7d"=>-0.73945082,
"lastUpdated"=>"2022-02-16T14:26:00.000Z",
"percentChange30d"=>2.18336134,
"percentChange60d"=>-6.84146969,
"percentChange90d"=>-26.08073361,
"fullyDilluttedMarketCap"=>916584018999.69,
"marketCapByTotalSupply"=>827546305397.4712,
"dominance"=>42.1276,
"turnover"=>0.02493355,
"ytdPriceChangePercentage"=>-8.4718}],
"isAudited"=>false,
"rank"=>1,
"hasFilters"=>false,
"quote"=>
{"USD"=>
{"name"=>"USD",
"price"=>43646.858047604175,
"volume24h"=>20633664171.70021,
"marketCap"=>827546305397.4712,
"percentChange1h"=>-0.86544168,
"percentChange24h"=>-1.6482985,
"percentChange7d"=>-0.73945082,
"lastUpdated"=>"2022-02-16T14:26:00.000Z",
"percentChange30d"=>2.18336134,
"percentChange60d"=>-6.84146969,
"percentChange90d"=>-26.08073361,
"fullyDilluttedMarketCap"=>916584018999.69,
"marketCapByTotalSupply"=>827546305397.4712,
"dominance"=>42.1276,
"turnover"=>0.02493355,
"ytdPriceChangePercentage"=>-8.4718}}
}
第一个“crypto”的值
crypto.first["quote"]["USD"]["price"]
您在 Hash
中用于第一个“加密”的密钥
crypto.first["symbol"]
将它们放在一起,您将得到以下代码(使用 each_with_object
遍历每个“加密货币”)
require `json`
require 'nokogiri'
require 'open-uri'
...
def crypto(page)
my_json = JSON.parse(page.css('script[type="application/json"]').first.inner_html)
cryptos = my_json["props"]["initialState"]["cryptocurrency"]["listingLatest"]["data"]
hash = cryptos.each_with_object({}) do |crypto, hsh|
hsh[crypto["name"]] = crypto["quote"]["USD"]["price"]
end
return hash
end
puts crypto(scrapper);