@ledsun blog

無味の味は佳境に入らざればすなわち知れず

ActiveRecordのバルクインサート比較 その1

手始めにRails6で追加されたinsert_allとimport(とその他)のパフォーマンス検証 - Qiita追証します。 次のように検証用のRailsアプリケーションを用意します。

bundle exec rails new . bulk_insert -MCAJT
bin/rails g user name:string
bin/rails db:prepare

DBはとりあえずSQLite3で行きます。 app/models/user.rbに元記事通りのスクリプトをコピペします。

class User < ApplicationRecord
  class << self
    def benchmark_bulk_insert
      # create data
      import_data = []
      1_000.times { import_data << new(name: 'name', created_at: Time.current, updated_at: Time.current) }

      insert_data = []
      1_000.times { insert_data << { name: 'name', created_at: Time.current, updated_at: Time.current } }

      values = []
      1_000.times { values << "('name', '#{Time.current.to_s(:db)}', '#{Time.current.to_s(:db)}')" }
      sql = "INSERT INTO users (name, created_at, updated_at) VALUES #{values.join(',')}"

      require 'benchmark'
      Benchmark.bm 15 do |r|
        transaction do
          r.report 'sql' do
            100.times { bulk_insert_using_sql(sql) }
          end
          raise ActiveRecord::Rollback
        end

        transaction do
          r.report 'insert_all' do
            100.times { bulk_insert_using_insert_all(insert_data) }
          end
          raise ActiveRecord::Rollback
        end

        transaction do
          r.report 'import' do
            100.times { bulk_insert_using_import(import_data) }
          end
          raise ActiveRecord::Rollback
        end
      end
    end

    def profiler_bulk_insert
      # create data
      import_data = []
      1_000.times { import_data << new(name: 'name', created_at: Time.current, updated_at: Time.current) }

      insert_data = []
      1_000.times { insert_data << { name: 'name', created_at: Time.current, updated_at: Time.current } }

      values = []
      1_000.times { values << "('name', '#{Time.current.to_s(:db)}', '#{Time.current.to_s(:db)}')" }
      sql = "INSERT INTO users (name, created_at, updated_at) VALUES #{values.join(',')}"

      p '################# sql ########################'
      transaction do
        report = MemoryProfiler.report do
          bulk_insert_using_sql(sql)
        end
        report.pretty_print(retained_strings: 0, allocated_strings: 100, normalize_paths: true)
        raise ActiveRecord::Rollback
      end

      p '################# insert_all ########################'
      transaction do
        report = MemoryProfiler.report do
          bulk_insert_using_insert_all(insert_data)
        end
        report.pretty_print(retained_strings: 0, allocated_strings: 100, normalize_paths: true)
        raise ActiveRecord::Rollback
      end

      p '################# import ########################'
      transaction do
        report = MemoryProfiler.report do
          bulk_insert_using_import(import_data)
        end
        report.pretty_print(retained_strings: 0, allocated_strings: 100, normalize_paths: true)
        raise ActiveRecord::Rollback
      end
    end

    def bulk_insert_using_import(users)
      import users
    end

    def bulk_insert_using_insert_all(users)
      insert_all users
    end

    def bulk_insert_using_sql(sql)
      connection.execute sql
    end
  end
end

rails consoleでUser.benchmark_bulk_insertを実行します。 次の結果が得られました。

[#<Benchmark::Tms:0x00007f6ab70f5360 @cstime=0.0, @cutime=0.0, @label="sql", @real=0.6942104189947713, @stime=0.082255, @total=0.410029, @utime=0.327774>,
 #<Benchmark::Tms:0x00007f6ab6f49480 @cstime=0.0, @cutime=0.0, @label="insert_all", @real=6.082169839006383, @stime=0.12776800000000005, @total=6.034355, @utime=5.906587>,
 #<Benchmark::Tms:0x00007f6ab718ea88 @cstime=0.0, @cutime=0.0, @label="import", @real=5.663791493017925, @stime=0.11862299999999998, @total=5.607844, @utime=5.489221>]

sqlが0.6s、insert_allとimportが一桁多い5~6sと、元記事と概ね同じ結果が得られました。 追証環境が出来たと考えられます。