I use this program. It's not technically open source because it's too
small to be open source but you're welcome to it.
We use it at my company to back servers up across LAN and WAN links.
-Mike
vadi raj wrote:
>
>
> Dear All,
>
> Thanks for the useful informations sharing...:)
>
> I like know any Linux(RHEL) based applications (open source) which use
> rsync based algorithm to sink data between two systems located in WAN.
> (Something link network mirroring).
>
> Regards,
> Vadiraj
>
> [Non-text portions of this message have been removed]
>
>
----------
#! /usr/bin/env ruby
# == Synopsis
#
# tk-backup: rsync-based disk to disk backup
#
# == Usage
#
# tk-backup [options]
#
# --bwlimit=n
# limit bandwidth used by rsync to n kb/sec.
#
# --compress, -z
# compress network traffic.
#
# --config FILE, -c FILE
# configure based on FILE instead of the default /etc/tk-backup.conf.
#
# --dry-run, -n
# show commands executed and run rsync in dry-run mode.
# pass twice to not even run rsync in dry-run mode.
#
# --help, -h
# show this help.
#
# --verbose, -v
require 'fileutils'
require 'getoptlong'
require 'ostruct'
require 'rdoc/usage'
require 'socket'
$VERBOSE = true
class ParseError < RuntimeError
end
def parse_options()
opts = OpenStruct.new
opts.bwlimit = 0
opts.compress = false
opts.config_file = '/etc/tk-backup.conf'
opts.dry_run = 0
opts.keep = nil
opts.name = nil
opts.rsync_flags = ['-e', 'ssh', '-av', '--numeric-ids']
opts.ssh_batch_flags = ['-o', 'BatchMode=yes']
opts.verbose = 0
opts.server = nil
GetoptLong.new(
['--bwlimit', GetoptLong::REQUIRED_ARGUMENT],
['--compress', '-z', GetoptLong::NO_ARGUMENT],
['--config', '-c', GetoptLong::REQUIRED_ARGUMENT],
['--dry-run', '-n', GetoptLong::NO_ARGUMENT],
['--help', '-h', GetoptLong::NO_ARGUMENT],
['--keep', '-K', GetoptLong::REQUIRED_ARGUMENT],
['--name', '-N', GetoptLong::REQUIRED_ARGUMENT],
['--verbose', '-v', GetoptLong::NO_ARGUMENT]
).each do |opt, arg|
case opt
when '--bwlimit'
opts.bwlimit = arg.to_i
when '--compress'
opts.compress = true
when '--config'
opts.config_file = arg
when '--dry-run'
opts.dry_run += 1
when '--help'
RDoc::usage()
when '--keep'
opts.keep = arg
when '--name'
opts.name = arg
when '--verbose'
opts.verbose += 1
end
end
if ARGV.size > 0
opts.server = ARGV[0]
end
return opts
end
module RsyncPaths
def join(left, right)
result = left.dup
concat(result, right)
return result
end
def concat(result, path)
if result[-1, 1] == '/'
if path[0, 1] == '/'
result << path[1..-1]
else
result << path
end
else
if path[0, 1] != '/'
result << '/'
end
result << path
end
return result
end
def trailing_slash(result)
result << '/' unless result[-1, 1] == '/'
end
end
class LocalUnix; end
class RemoteUnix; end
class Repo
include RsyncPaths
attr_reader :name
attr_accessor :host
attr_accessor :user
attr_accessor :location
def initialize(name)
@name = name
@host = nil
@user = nil
@location = nil
@bwlimit = 0
@compress = false
end
def bwlimit()
remote? ? @bwlimit : 0
end
def bwlimit=(value)
@bwlimit = value.kind_of?(String) ? value.to_i : value
end
def compress?()
remote? && @compress
end
def compress=(value)
@compress = value.kind_of?(String) ? (value == 'on') : value
end
def remote?()
!@...?
end
def repo(*dirs)
result = ""
result << @user << '@' if @user
result << @host << ':' if @host
result << @location
dirs.each do |el|
concat(result, el)
end
trailing_slash(result)
return result
end
def rdir(*dirs)
result = dir(*dirs)
trailing_slash(result)
return result
end
def dir(*dirs)
result = @location.dup
dirs.each do |el|
concat(result, el)
end
return result
end
def unix()
remote? ? RemoteUnix : LocalUnix
end
end
class Server < Repo
attr_accessor :keep
end
class Source < Repo
def initialize(name)
super(name)
@location = "/"
end
end
class Items
end
class BackupConfig
# the name of the backup (usually)
attr_accessor :name
attr_accessor :operator_email
attr_reader :dirs
def initialize(opts)
@name = 'backup'
@filename = opts.config_file
@selected_server = opts.server
@selected_source = opts.source
File.open(@filename, 'r') do |fd|
@fd = fd
parse()
end
@name = opts.name if opts.name
@keep = opts.keep if opts.keep
end
def make_selection(kind, name, list)
if name
entries = list.select { |x| x.name == name }
if entries.size == 0
raise ParseError, "#{kind} '#{name}' is not defined in #{@filename}"
elsif entries.size > 1
raise ParseError, "#{kind} '#{name}' is defined more than once in
#{@filename}"
else
return entries[0]
end
else
if list.size == 0
raise ParseError, "no #{kind} entries defined in #{@filename}"
else
return list[0]
end
end
end
def source()
make_selection('source', @selected_source, @sources)
end
def server()
make_selection('server', @selected_server, @servers)
end
OBJECT_MAP = {
'dir' => Items,
'server' => Server,
'source' => Source
}
def parse()
# for each object type create a list instance variable and initialize it
# empty. Also keep a hash of object type names to these lists so that
# the parser can create them.
lists = {}
OBJECT_MAP.keys.each do |key|
var = "@#{key}s"
val = []
lists[key] = val
instance_variable_set(var, val)
end
state = :INITIAL
target = nil
@fd.each_line do |line|
line.sub!(/#.*/, '')
line.strip!
next if line.empty?
case state
when :INITIAL
case line
when /(\w+)\s*=\s*(.*)/
assign(self, $1, $2)
when /\[(\w+):\s*([^\]]+)\]/
kind = OBJECT_MAP[$1]
if kind.nil?
raise ParseError, "#{@filename}:
#{@...}: unknown object type
'#{$1}'"
end
target = kind.new($2)
lists[$1] << target
state = :OBJECT
else
raise ParseError, "#{@filename}:
#{@...}: unrecognized line
#{line}"
end
when :OBJECT
case line
when /(\w+)\s*=\s*(.*)/
assign(target, $1, $2)
when /\[.*\]/
target = nil
state = :INITIAL
redo
else
raise ParseError, "#{@filename}:
#{@...}: unrecognized line
#{line}"
end
end
end
end
def assign(object, name, value)
sym = "#{name}=".to_sym
if !object.respond_to?(sym)
raise ParseError, "#{@filename}:
#{@...}: unrecognized variable
'#{name}' in #{name} = #{value}"
end
object.__send__(sym, value)
end
end
class Items
class Filter
attr_accessor :mode, :name
def initialize(mode, name, leaf = true)
@mode = mode
@name = name
@leaf = leaf
end
def inspect()
"#{@mode == '--include' ? 'I' : 'E'}:#{@name}"
end
def render(list)
list << @mode << @name
if @leaf && (@mode != '--exclude' || @name != '*')
list << @mode << "#{@name}/**"
end
end
end
attr_reader :root
attr_reader :hard_links
attr_accessor :pre_command
attr_accessor :post_command
def initialize(dir, &block)
@root = dir
@filters = []
@hard_links = false
@pre_command = nil
@post_command = nil
instance_eval(&block) if block_given?
end
alias :name :root
def compile()
rules = {}
result = []
@filters.each do |f|
if f.mode == '--include'
parts = f.name.split(/\//)
(1 .. parts.length - 1).each do |i|
path = parts[0, i].join('/')
if !rules[path]
rules[path] = true
result << Filter.new(f.mode, path, false)
end
end
end
result << f
end
return result
end
def filters()
list = compile()
result = []
list.each do |f|
f.render(result)
end
return result
end
def hard_links=(value)
@hard_links = value.kind_of?(String) ? (value == 'on') : value
end
def include=(dir)
@filters << Filter.new('--include', dir)
end
def exclude=(dir)
@filters << Filter.new('--exclude', dir)
end
end
module Tracing
def trace(*out)
puts(out.join(' ')) if @opts.verbose > 0
end
end
module Commands
include RsyncPaths
def ln_s(one, two)
[make_ecmd('rm', '-f', two),
make_ecmd('ln', '-s', one, two)
].each do |cmd|
trace(*cmd) unless @opts.dry_run > 0
if !system(*cmd)
puts "failed to link #{one} to #{two}"
exit($?.exitstatus)
end
end
end
def ls(dir)
cmd = make_cmd('ls', "'#{dir}'")
trace(*cmd)
result = []
IO.popen(cmd.join(' '), 'r') do |fd|
fd.each_line do |line|
line.chomp!
result << line
end
end
return result
end
def mkdir(dir)
cmd = make_ecmd('mkdir', '-p', dir)
trace(*cmd) unless @opts.dry_run > 0
system(*cmd)
return dir
end
def mv(one, two)
cmd = make_ecmd('mv', one, two)
trace(*cmd) unless @opts.dry_run > 0
if !system(*cmd)
puts "failed to move #{one} to #{two}"
exit($?.exitstatus)
end
end
def rm_r(*items)
cmd = make_ecmd('rm', '-rf', *items)
trace(*cmd) unless @opts.dry_run > 0
if !system(*cmd)
puts("failed to remove #{items.join(' ')}")
exit($?.exitstatus)
end
end
def run(*lcmd)
cmd = make_ecmd(*lcmd)
trace(*cmd) unless @opts.dry_run > 0
if !system(*cmd)
puts("failed to run #{lcmd.join(' ')}")
exit($?.exitstatus)
end
end
end
class LocalUnix
include Tracing
include Commands
def initialize(repo, opts)
@repo = repo
@opts = opts
end
def make_cmd(*args)
return args
end
def make_ecmd(*args)
cmd = []
cmd.push('echo', sprintf('%-12s', 'localhost:')) if @opts.dry_run > 0
cmd.push(*args)
end
def verify_available()
# nothing to do, this is localhost
end
end
class RemoteUnix
include Tracing
include Commands
def initialize(repo, opts)
@repo = repo
@opts = opts
@first_ssh = true
end
# an ssh command that unconditionally does what the command, even
# in echo mode.
def make_cmd(*args)
cmd = []
cmd.push('ssh')
cmd.push(
*@..._batch_flags) if !@first_ssh
cmd.push(@repo.host)
cmd.push(*args)
@first_ssh = false
return cmd
end
# an ssh command that echos what it's going to do rather than
# actually doing it when in echo mode
def make_ecmd(*args)
cmd = []
cmd.push('ssh')
cmd.push(
*@..._batch_flags) if !@first_ssh
cmd.push(@repo.host)
cmd.push('echo', sprintf('"%-12s"', "
#{@...}:")) if @opts.dry_run > 0
cmd.push(*args)
@first_ssh = false
return cmd
end
def verify_available()
ping = ['ping', '-c', '2', @repo.host]
cmd = ['sh', '-c', ping.join(' ') + ' > /dev/null 2>&1']
if !system(*cmd)
system(*ping)
puts()
puts("backup host not available!")
exit(1)
end
end
end
class Backup
include Tracing
DATED_DOT_DIR = /^[0-9]{8}(\.[0-9]+)?$/
def initialize(opts, conf)
@opts = opts
@conf = conf
@source = conf.source()
@target = conf.server()
@backup_dirs = []
@source_unix = @source.unix.new(@source, @opts)
@unix = @target.unix.new(@target, @opts)
end
def date_dir_cmp(lhs, rhs)
lhs = lhs.split(/\./).map { |x| x.to_i }
rhs = rhs.split(/\./).map { |x| x.to_i }
lhs <=> rhs
end
def bwlimit()
[@opts.bwlimit, @source.bwlimit, @target.bwlimit].each do |x|
return x if x != 0
end
return 0
end
def compress?()
@opts.compress || @source.compress? || @target.compress?
end
def send(*items)
[@source_unix, @unix].each { |u| u.verify_available() }
current = nil
@backup_dirs = @unix.ls(@target.location)
@backup_dirs.each do |rdir|
if rdir =~ DATED_DOT_DIR
if !current || date_dir_cmp(rdir, current) > 0
current = rdir
end
end
end
items.each do |item|
if item.pre_command
@source_unix.run(item.pre_command)
end
@unix.mkdir(@target.dir('in-progress', item.root))
cmd = []
cmd.push('echo', sprintf('%-12s', "localhost:")) if @opts.dry_run > 1
cmd.push('rsync')
cmd.push('-n') if @opts.dry_run == 1
cmd.push(
*@..._flags)
cmd.push("--bwlimit=#{bwlimit()}") if bwlimit() != 0
cmd.push("-z") if compress?
cmd.push("--hard-links") if item.hard_links
cmd.push(*item.filters)
cmd.push("
--link-dest=#{@...(current, item.root)}") if current
cmd.push(@source.repo(item.root), @target.repo('in-progress', item.root))
for i in (1..3)
trace(*cmd) unless @opts.dry_run > 1
rsyncstatus = system(*cmd)
# If the system call exited just fine, we're done
if rsyncstatus
break
end
# If the system call exited non-zero, but it was code 24 we don't care
# that just means a file disappeared, so we don't need to back it up
if !rsyncstatus && ($?.exitstatus == 24)
break
end
# If we're still here, it was some other exit code,
# retry three times, then give up
if !rsyncstatus
if i < 3
puts "rsync failed for some reason, retrying"
else
puts "rsync failed"
exit($?.exitstatus)
end
end
end
if item.post_command
@source_unix.run(item.post_command)
end
end
end
def name(date, count)
if count == 0
return date
else
return "#{date}.#{count}"
end
end
def find_backup_name()
now = Time.now.strftime('%Y%m%d')
re = /^#{now}/
existing = {}
@backup_dirs.select { |x| x =~ re }.each do |rdir|
existing[rdir] = true
end
count = 0
while existing.size > 0
existing.delete(name(now, count))
count += 1
end
return name(now, count)
end
def commit()
rname = find_backup_name()
@backup_result = @target.dir(rname)
@unix.mv(@target.dir('in-progress'), @backup_result)
@unix.ln_s(rname, @target.dir('current'))
@backup_dirs << rname
end
def rotate()
commit()
dirs = @backup_dirs.select { |x| x =~ DATED_DOT_DIR }
dirs.sort! { |a, b| date_dir_cmp(a, b) }
days = @target.keep.to_i
if dirs.length > days
keep = dirs[dirs.length - days, days]
kill = dirs[0, dirs.length - days]
puts "keeping only #{days} backups:"
kill.each do |k|
puts "killing #{k}"
end
keep.each do |k|
puts "keeping #{k}"
end
@unix.rm_r(*kill.collect { |x| @target.dir(x) })
puts()
end
puts("backup is in #{@backup_result}")
end
end
class BackupMonitor
STATE_DIR = '/var/run/tk-backup'
def initialize(conf)
server = conf.server().name
@dir = File.join(STATE_DIR, "#{conf.name}-#{server}")
@monitor = File.join(@dir, 'monitor')
FileUtils.mkdir_p(@dir) if !File.directory?(@dir)
write($$, @monitor)
write(Time.now, File.join(@dir, 'starttime.txt'))
end
def close()
write(Time.now, File.join(@dir, 'endtime.txt'))
FileUtils.rm_f(@monitor)
end
def write(contents, filename)
File.open(filename, 'w') do |fd|
fd.puts(contents.to_s)
end
end
class << self
def monitor(conf)
if File.directory?(STATE_DIR)
# NOTE: we don't use ensure bmon.close() here because we want the
# endtime to be missing when the script fails.
bmon = BackupMonitor.new(conf)
yield
bmon.close()
else
yield
end
end
end
end
def main()
opts = parse_options()
conf = BackupConfig.new(opts)
backup = Backup.new(opts, conf)
BackupMonitor.monitor(conf) do
backup.send(*conf.dirs)
backup.rotate()
end
end
main()
# :nodoc:
# vim: ai et sts=2 sw=2
----------
# tk-backup configuration file
#
[server: storage]
# The server on which to run the backup
host = storage.tacitknowledge.com
operator_email =
USERNAME@...
user = USERNAME
# Where on the server to back things up. The actual backup will be in
# $location/$name/current
location = /backup/USERNAME
# How many backups to keep on the server.
keep = 2
# The backup name usually your userid or perhaps machine-name.
[source: localhost]
name = USERNAME
[dir: /Users/USERNAME]
# Do you want to preserve hard links?
#hard_links = true
# Do you have some commands you would like to run first or last?
#pre_command = echo Starting /Users/USERNAME backup...
#post_command = echo Finishing /Users/USERNAME backup...
# tacit stuff
exclude = Projects/Tacit/**/.svn
include = Projects/Tacit
exclude = Projects/*
include = Documents/Tacit
exclude = Documents/*
# big stuff in Library that doesn't belong
exclude = Library/Application Support/ElectricSheep
exclude = Library/Caches # safari, quicktime and firefox caches
exclude = Library/iTunes # includes iphone software updates
exclude = Library/Logs
exclude = Library/Mail # can get this back from the server
exclude = Library/Mail Downloads
exclude = Library/Printers
# general personal stuff
exclude = Downloads
exclude = Movies
exclude = Music
exclude = Pictures
exclude = Public
exclude = Sites
# my own personal stuff
exclude = Archive
exclude = Backups
exclude = Interest
# maven gunk
exclude = .maven/cache
exclude = .maven/repository
exclude = .m2/repository
# sensitive things backed up elsewhere or not at all
exclude = .gnupg
exclude = .ssh
exclude = .subversion/auth
# temporary stuff
exclude = tmp
exclude = .Trash
[Non-text portions of this message have been removed]