Commit 8ea59d02 authored by Jonathan Hartman's avatar Jonathan Hartman Committed by Ben Abrams
Browse files

When first starting a collector, wait for it to register

This should help with two issues regarding collector registration:

* The collector init script starts the service asynchronously, so Chef
  considers the service started, even if the registration fails and it bails
  out seconds later.
* When the collector registers itself with Sumo Logic, it overwrites the
  user.properties file. This causes the second Chef run to regenerate the
  template once more and signal another collector restart.
parent fa154ca3
# frozen_string_literal: true
require 'json'
module SumologicCollector
# A set of helper methods for monitoring the registration status of a
# sumologic_collector resource.
class Helpers
class << self
#
# Monitor a specified Sumo directory and wait X number of seconds for a
# successful registration.
#
# @param dir [String] the main Sumo directory
# @param retries [Integer] the number of times to retry before failing
# @raise [Chef::Exceptions::Service] if registration fails or times out
#
def wait_for_registration(dir, retries = 30)
(1..retries).each do
case registration_status(dir)
when true then registration_succeeded! && return
when false then registration_failed!
else registration_pending!
end
end
registration_timed_out!
end
#
# Return true if registration has succeeded, false if it failed, or nil
# if we can't tell. Default to checking for a .installerResult file and
# fallback to checking the collector.log, as the collector doesn't create
# .installerResult if it was started by the Sumo installer script.
#
# @param dir [String] the main Sumo directory
# @return [TrueClass,FalseClass,NilClass] success, failure, or unknown
#
def registration_status(dir)
result = result_status(dir)
return result unless result.nil?
log_status(dir)
end
#
# Check for a .installerResult file and return true if registration has
# succeeded, false if it failed, or nil if we can't tell.
#
# @param dir [String] the main Sumo directory
# @return [TrueClass,FalseClass,NilClass] success, failure, or unknown
#
def result_status(dir)
file = File.join(dir, 'config/.installerResult')
JSON.parse(File.read(file))['success'] if File.exist?(file)
end
#
# Check the collector.log file for registration status and return true if
# registration has succeeded, false if it failed, or nil if we can't
# tell.
#
# @param dir [String] the main Sumo directory
# @return [TrueClass,FalseClass,NilClass] success, failure, or unknown
#
def log_status(dir)
file = File.join(dir, 'logs/collector.log')
return nil unless File.exist?(file)
status = File.read(file).lines.find do |l|
l.include?('Notifying installer of registration result: ' \
'RegistrationResult(')
end
return nil unless status
return true if status.include?('RegistrationResult(true,')
return false if status.include?('RegistrationResult(false,')
end
#
# Log an informational message and wait for the next retry attempt.
#
def registration_pending!
Chef::Log.info('Waiting for Sumo Collector to register...')
sleep(1)
end
#
# Log the registration success and give the collector a couple seconds to
# release its hold on the user.properties file before continuing.
#
def registration_succeeded!
Chef::Log.info('Sumo Collector registered successfully.')
sleep(2)
end
#
# Log an error message and raise an exception.
#
# @raise [Chef::Exceptions::Service] registration failed
#
def registration_failed!
raise(Chef::Exceptions::Service,
'Sumo Collector registration failed!')
end
#
# Log an error message and raise an exception.
#
# @raise [Chef::Exceptions::Service] registration timed out
#
def registration_timed_out!
raise(Chef::Exceptions::Service,
'Timed out waiting for Sumo Collector registration!')
end
end
end
end
......@@ -45,7 +45,10 @@ action :configure do
cookbook 'sumologic-collector'
variables resource: new_resource
sensitive true
notifies :restart, new_resource unless new_resource.skip_restart
unless new_resource.skip_restart
notifies :configure, new_resource unless ::File.exist?(::File.join(new_resource.dir, 'data'))
notifies :restart, new_resource
end
end
end
end
......@@ -67,6 +70,7 @@ action :start do
Chef::Log.info "Collector Directory is not found at #{new_resource.dir}. Will not do anything."
else
sumo_service :start
wait_if_initial_startup
end
end
......@@ -83,6 +87,7 @@ action :restart do
Chef::Log.info "Collector Directory is not found at #{new_resource.dir}. Will not do anything."
else
sumo_service :restart
wait_if_initial_startup
end
end
......@@ -145,6 +150,16 @@ def run_installer(installer_cmd)
end
end
def wait_if_initial_startup
return if ::File.exist?(::File.join(new_resource.dir, 'data'))
ruby_block 'Wait for Sumo Collector to register' do
block do
SumologicCollector::Helpers.wait_for_registration(new_resource.dir)
end
end
end
def sumo_service(action = :nothing)
service 'sumo-collector' do
service_name 'collector' unless node['platform_family'] == 'windows'
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment