如何将 URL 数组作为函数的参数传递
How to pass an array of URLs as a parameter for a function
我想将第一个函数返回的一组 URL 传递到第二个函数中,但我不确定如何执行此操作。
require 'open-uri'
require 'nokogiri'
require 'byebug'
def fetch_recipe_urls
base_url = 'https://cooking.nytimes.com'
easy_recipe_url = 'https://cooking.nytimes.com/search?q=easy'
easy_searchpage = Nokogiri::HTML(open(easy_recipe_url))
recipes = easy_searchpage.search('//article[@class="card recipe-card"]/@data-url')
recipes_url_array = recipes.map do |recipe|
uri = URI.parse(recipe.text)
uri.scheme = "http"
uri.host = "cooking.nytimes.com"
uri.query = nil
uri.to_s
end
end
def scraper(url)
html_file = open(url).read
html_doc = Nokogiri::HTML(html_file)
recipes = Array.new
recipe = {
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
recipes << recipe
end
因为你在调用fetch_recipe_urls
之后有一个数组,你可以迭代并为每个URL调用scraper
:
def scraper(url)
html_file = open(url).read
html_doc = Nokogiri::HTML(html_file)
{
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
end
fetch_recipe_urls.map { |url| scraper(url) }
但实际上我会将代码构造为:
BASE_URL = 'https://cooking.nytimes.com/'
def fetch_recipe_urls
page = Nokogiri::HTML(open(BASE_URL + 'search?q=easy'))
recipes = page.search('//article[@class="card recipe-card"]/@data-url')
recipes.map { |recipe_node| BASE_URL + URI.parse(recipe_node.text).to_s }
end
def scrape(url)
html_doc = Nokogiri::HTML(open(url).read)
{
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
end
fetch_recipe_urls.map { |url| scrape(url) }
您也可以在 fetch_recipe_urls
中调用 scrape
/scraper
,但我建议采用单一职责方法。一个更好的主意是制作此 OOP 并构建一个 Scraper
class 和一个 CookingRecipe
以使其更加惯用。
或者如果你想将数组传递给 scraper...
def fetch_recipe_urls
...
recipes = scraper(recipes_url_array)
end
def scraper(urls)
recipes = []
urls.each do |url|
html_file = open(url).read
html_doc = Nokogiri::HTML(html_file)
recipe = {
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
recipes << recipe
end
recipes
end
我想将第一个函数返回的一组 URL 传递到第二个函数中,但我不确定如何执行此操作。
require 'open-uri'
require 'nokogiri'
require 'byebug'
def fetch_recipe_urls
base_url = 'https://cooking.nytimes.com'
easy_recipe_url = 'https://cooking.nytimes.com/search?q=easy'
easy_searchpage = Nokogiri::HTML(open(easy_recipe_url))
recipes = easy_searchpage.search('//article[@class="card recipe-card"]/@data-url')
recipes_url_array = recipes.map do |recipe|
uri = URI.parse(recipe.text)
uri.scheme = "http"
uri.host = "cooking.nytimes.com"
uri.query = nil
uri.to_s
end
end
def scraper(url)
html_file = open(url).read
html_doc = Nokogiri::HTML(html_file)
recipes = Array.new
recipe = {
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
recipes << recipe
end
因为你在调用fetch_recipe_urls
之后有一个数组,你可以迭代并为每个URL调用scraper
:
def scraper(url)
html_file = open(url).read
html_doc = Nokogiri::HTML(html_file)
{
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
end
fetch_recipe_urls.map { |url| scraper(url) }
但实际上我会将代码构造为:
BASE_URL = 'https://cooking.nytimes.com/'
def fetch_recipe_urls
page = Nokogiri::HTML(open(BASE_URL + 'search?q=easy'))
recipes = page.search('//article[@class="card recipe-card"]/@data-url')
recipes.map { |recipe_node| BASE_URL + URI.parse(recipe_node.text).to_s }
end
def scrape(url)
html_doc = Nokogiri::HTML(open(url).read)
{
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
end
fetch_recipe_urls.map { |url| scrape(url) }
您也可以在 fetch_recipe_urls
中调用 scrape
/scraper
,但我建议采用单一职责方法。一个更好的主意是制作此 OOP 并构建一个 Scraper
class 和一个 CookingRecipe
以使其更加惯用。
或者如果你想将数组传递给 scraper...
def fetch_recipe_urls
...
recipes = scraper(recipes_url_array)
end
def scraper(urls)
recipes = []
urls.each do |url|
html_file = open(url).read
html_doc = Nokogiri::HTML(html_file)
recipe = {
title: html_doc.css('h1.recipe-title').text.strip,
time: html_doc.css('span.recipe-yield-value').text.split("servings")[1],
steps: html_doc.css('ol.recipe-steps').text.split.join(" "),
ingredients: html_doc.css('ul.recipe-ingredients').text.split.join(" ")
}
recipes << recipe
end
recipes
end