在 Ruby 中使用 Nokogiri 抓取
Scraping with Nokogiri in Ruby
如何将名称和描述属性放在一起并为每个岛屿制作一个岛屿对象?我已经尽我所能将这两个属性放在一起来制作一个对象,但只能将它们分开。我需要帮助,因为我必须在两天内提交这个。这是我的:
class MostBeautifulIslands::Islands
attr_accessor :name, :description
@@all = []
def initialize(name)
@name = name
@description = description
@@all << self
end
def self.scrape_world_best_islands
doc = Nokogiri::HTML(open("http://www.planetware.com/world/most-beautiful-islands-in-the-world-sey-1-2.htm"))
islands_names = doc.search("div h2.sitename")
names = islands_names.collect{|island_name| new(island_name.text.strip)}
island_description = doc.search("div.site_desc > p")
descriptions = island_description.collect{|d| d.first.text.strip}
new_island = self.new(names)
new_island
binding.pry
#end
end
end
首先,在 initialize
中,您使用了 description
从未获得的参数。应该是:
def initialize(name, description)
@name = name
@description = description
@@all << self
end
其次,您应该收集名称、描述,然后使用这些值(压缩)生成新实例:
islands_names = doc.search("div h2.sitename").map(&:text)
islands_descs = doc.search("div.site_desc > p").map(&:text)
islands_names.zip(islands_descs).map { |(name, desc)| new(name, desc) }
#⇒ Array of 15 newly created objects
我会把它分成两个单独的 类。一个处理 Nokogiri 解析,另一个处理 MostBeutifulIslands::Islands 对象。这使您在处理数据时更加灵活。
require 'open-uri'
require 'nokogiri'
module MostBeutifulIslands
class Islands
attr_reader :name, :description
def initialize(name, description)
@name = name
@description = description
end
def valid?
!name.nil? && !description.nil?
end
def save
# if using rails could save to Islands object
island = Island.new(name: name, description: description)
if island.save
puts island.save
else
puts island.errors
end
end
end
end
module MostBeutifulIslands
class ParseIslands
attr_reader :url, :islands
def initialize(url)
@url = url
end
def html
Nokogiri::HTML(open(url))
end
def scrap_world_best_islands
# maybe no need to us each_with_object could do everything you need inside the block
html.css("div .article_block").css('.site').each_with_object([]).map do |node, array|
name = node.css('.sitename').text.strip
description = node.css('.site_desc').text.strip
@islands = array.push MostBeutifulIslands::Islands.new(name, description)
end
end
# just an example
def save_islands
@islands.each do |island|
if island.valid?
island.save
end
end
end
islands = MostBeutifulIslands::ParseIslands.new("http://www.planetware.com/world/most-beautiful-islands-in-the-world-sey-1-2.htm")
islands.scrap_world_best_islands
islands.save_islands
end
end
如何将名称和描述属性放在一起并为每个岛屿制作一个岛屿对象?我已经尽我所能将这两个属性放在一起来制作一个对象,但只能将它们分开。我需要帮助,因为我必须在两天内提交这个。这是我的:
class MostBeautifulIslands::Islands
attr_accessor :name, :description
@@all = []
def initialize(name)
@name = name
@description = description
@@all << self
end
def self.scrape_world_best_islands
doc = Nokogiri::HTML(open("http://www.planetware.com/world/most-beautiful-islands-in-the-world-sey-1-2.htm"))
islands_names = doc.search("div h2.sitename")
names = islands_names.collect{|island_name| new(island_name.text.strip)}
island_description = doc.search("div.site_desc > p")
descriptions = island_description.collect{|d| d.first.text.strip}
new_island = self.new(names)
new_island
binding.pry
#end
end
end
首先,在 initialize
中,您使用了 description
从未获得的参数。应该是:
def initialize(name, description)
@name = name
@description = description
@@all << self
end
其次,您应该收集名称、描述,然后使用这些值(压缩)生成新实例:
islands_names = doc.search("div h2.sitename").map(&:text)
islands_descs = doc.search("div.site_desc > p").map(&:text)
islands_names.zip(islands_descs).map { |(name, desc)| new(name, desc) }
#⇒ Array of 15 newly created objects
我会把它分成两个单独的 类。一个处理 Nokogiri 解析,另一个处理 MostBeutifulIslands::Islands 对象。这使您在处理数据时更加灵活。
require 'open-uri'
require 'nokogiri'
module MostBeutifulIslands
class Islands
attr_reader :name, :description
def initialize(name, description)
@name = name
@description = description
end
def valid?
!name.nil? && !description.nil?
end
def save
# if using rails could save to Islands object
island = Island.new(name: name, description: description)
if island.save
puts island.save
else
puts island.errors
end
end
end
end
module MostBeutifulIslands
class ParseIslands
attr_reader :url, :islands
def initialize(url)
@url = url
end
def html
Nokogiri::HTML(open(url))
end
def scrap_world_best_islands
# maybe no need to us each_with_object could do everything you need inside the block
html.css("div .article_block").css('.site').each_with_object([]).map do |node, array|
name = node.css('.sitename').text.strip
description = node.css('.site_desc').text.strip
@islands = array.push MostBeutifulIslands::Islands.new(name, description)
end
end
# just an example
def save_islands
@islands.each do |island|
if island.valid?
island.save
end
end
end
islands = MostBeutifulIslands::ParseIslands.new("http://www.planetware.com/world/most-beautiful-islands-in-the-world-sey-1-2.htm")
islands.scrap_world_best_islands
islands.save_islands
end
end