如何使用并行 gem 加速 sitemap_generator

How to speed up sitemap_generator with parallel gem

我正在尝试加快 sitemap_generator by adding parallelization via the parallel gem。我有以下代码,但我的组没有写入 public/sitemaps 目录。我认为这是因为 lambda 在不同的 space 中并行执行。任何反馈都会有所帮助。谢谢!

#!/usr/bin/env ruby

require 'rubygems'
require 'sitemap_generator'
require 'benchmark'
require 'parallel'
require 'random-word'


SitemapGenerator::Sitemap.default_host = "http://localhost"

a = lambda {
  SitemapGenerator::Sitemap.group(:filename => :biz, :sitemaps_path => 'sitemaps/biz/') do
    (1..1000).each do |index|
      url = "/#{RandomWord.adjs.next}/#{RandomWord.nouns.next}"
      add url, :priority => 0.8 
    end
  end
}

b = lambda {
  SitemapGenerator::Sitemap.group(:filename => :wedding_ugc, :sitemaps_path => 'sitemaps/ugc') do
    (1..1000).each do |index|
      url = "/#{RandomWord.adjs.next}/#{RandomWord.nouns.next}"
      add url, :priority => 0.8 
    end
  end
}

#working example
# SitemapGenerator::Sitemap.default_host = "http://localhost"
# SitemapGenerator::Sitemap.create(:compress => false) do
#   group(:filename => :biz, :sitemaps_path => 'sitemaps/biz/') do
#     (1..1000).each do |index|
#       url = "/#{RandomWord.adjs.next}/#{RandomWord.nouns.next}"
#       add url, :priority => 0.8 
#     end
#   end
# end


puts Time.now
Parallel.each([a,b]){|job| job.call()}
puts Time.now

我得到了这个工作并在 github here

上发布了解决方案

这是 url 损坏的代码。

SitemapGenerator::Sitemap.create(:compress => false, :create_index => false) do
  group1 = lambda {
    group = sitemap.group(:filename => :group1, :sitemaps_path => 'sitemaps/group1') do
      Record.find_each do |record|
        add '/record/path'
      end
    end
    group.sitemap.write unless group.sitemap.written?  #write if not full
  }

  # group2 like above...

  Parallel.each([group1, group2], :in_processes => 8) do |group|
    group.call
  end
end

#regenerate the index sitemap xml file because I couldn't figure out how to track it with multiple processes
SitemapGenerator::Sitemap.create(:compress => false) do
  Dir.chdir(sitemap.public_path.to_s)
  xml_files      = File.join("**", "sitemaps", "**", "*.xml")
  xml_file_paths = Dir.glob(xml_files)

  xml_file_paths.each do |file|
    add file
  end
end