2015-01-09 23:55:35 +00:00
|
|
|
package com.github.lookout.verspaetung
|
|
|
|
|
2015-01-19 19:45:22 +00:00
|
|
|
import com.github.lookout.verspaetung.zk.BrokerTreeWatcher
|
2015-01-28 10:45:49 +00:00
|
|
|
import com.github.lookout.verspaetung.zk.KafkaSpoutTreeWatcher
|
2015-01-19 21:08:01 +00:00
|
|
|
import com.github.lookout.verspaetung.zk.StandardTreeWatcher
|
2015-02-06 16:18:12 +00:00
|
|
|
import com.github.lookout.verspaetung.metrics.ConsumerGauge
|
2015-09-01 22:50:31 +00:00
|
|
|
import com.github.lookout.verspaetung.metrics.HeartbeatGauge
|
2015-01-19 19:45:22 +00:00
|
|
|
|
2015-01-18 18:25:04 +00:00
|
|
|
import java.util.concurrent.ConcurrentHashMap
|
2015-02-06 16:18:12 +00:00
|
|
|
import java.util.concurrent.ConcurrentSkipListSet
|
|
|
|
import java.util.concurrent.TimeUnit
|
2015-01-18 18:25:04 +00:00
|
|
|
|
2015-01-26 14:15:35 +00:00
|
|
|
import org.apache.commons.cli.*
|
2015-01-10 02:17:45 +00:00
|
|
|
import org.apache.curator.retry.ExponentialBackoffRetry
|
|
|
|
import org.apache.curator.framework.CuratorFrameworkFactory
|
2015-01-17 22:42:24 +00:00
|
|
|
import org.apache.curator.framework.CuratorFramework
|
2015-03-20 17:58:45 +00:00
|
|
|
import org.coursera.metrics.datadog.DatadogReporter
|
|
|
|
import org.coursera.metrics.datadog.transport.UdpTransport
|
2015-01-26 12:09:47 +00:00
|
|
|
import org.slf4j.Logger
|
|
|
|
import org.slf4j.LoggerFactory
|
2015-01-19 19:45:22 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
import com.codahale.metrics.*
|
|
|
|
|
2015-09-01 22:50:31 +00:00
|
|
|
/**
|
|
|
|
* Main entry point for running the verspaetung application
|
|
|
|
*/
|
|
|
|
@SuppressWarnings
|
2015-01-10 02:17:45 +00:00
|
|
|
class Main {
|
2015-01-26 14:15:35 +00:00
|
|
|
private static final String METRICS_PREFIX = 'verspaetung'
|
|
|
|
|
2015-09-01 22:50:31 +00:00
|
|
|
private static final Logger logger = LoggerFactory.getLogger(Main)
|
2015-02-06 16:18:12 +00:00
|
|
|
private static final MetricRegistry registry = new MetricRegistry()
|
2015-09-01 22:50:31 +00:00
|
|
|
private static ScheduledReporter reporter
|
2015-01-20 21:23:01 +00:00
|
|
|
|
2015-01-10 02:17:45 +00:00
|
|
|
static void main(String[] args) {
|
2015-01-30 10:05:24 +00:00
|
|
|
String statsdPrefix = METRICS_PREFIX
|
2015-01-26 14:15:35 +00:00
|
|
|
String zookeeperHosts = 'localhost:2181'
|
|
|
|
String statsdHost = 'localhost'
|
|
|
|
Integer statsdPort = 8125
|
2015-09-01 21:41:16 +00:00
|
|
|
Integer delayInSeconds = 5
|
2015-01-26 14:15:35 +00:00
|
|
|
|
|
|
|
CommandLine cli = parseCommandLine(args)
|
|
|
|
|
|
|
|
if (cli.hasOption('z')) {
|
|
|
|
zookeeperHosts = cli.getOptionValue('z')
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli.hasOption('H')) {
|
|
|
|
statsdHost = cli.getOptionValue('H')
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cli.hasOption('p')) {
|
|
|
|
statsdPort = cli.getOptionValue('p')
|
|
|
|
}
|
|
|
|
|
2015-09-01 21:41:16 +00:00
|
|
|
if (cli.hasOption('d')) {
|
|
|
|
delayInSeconds = cli.getOptionValue('d').toInteger()
|
|
|
|
}
|
|
|
|
|
2015-01-26 12:09:47 +00:00
|
|
|
logger.info("Running with: ${args}")
|
2015-09-01 21:41:16 +00:00
|
|
|
logger.warn("Using: zookeepers={} statsd={}:{}", zookeeperHosts, statsdHost, statsdPort)
|
|
|
|
logger.info("Reporting every {} seconds", delayInSeconds)
|
2015-01-10 02:17:45 +00:00
|
|
|
|
2015-01-30 10:05:24 +00:00
|
|
|
if (cli.hasOption('prefix')) {
|
|
|
|
statsdPrefix = "${cli.getOptionValue('prefix')}.${METRICS_PREFIX}"
|
|
|
|
}
|
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
registry.register(MetricRegistry.name(Main.class, 'heartbeat'),
|
2015-09-01 22:50:31 +00:00
|
|
|
new HeartbeatGauge())
|
2015-02-06 16:18:12 +00:00
|
|
|
|
2015-01-10 02:17:45 +00:00
|
|
|
ExponentialBackoffRetry retry = new ExponentialBackoffRetry(1000, 3)
|
2015-01-26 14:15:35 +00:00
|
|
|
CuratorFramework client = CuratorFrameworkFactory.newClient(zookeeperHosts, retry)
|
2015-02-06 16:18:12 +00:00
|
|
|
client.start()
|
2015-01-19 19:45:22 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
/* We need a good shared set of all the topics we should keep an eye on
|
|
|
|
* for the Kafka poller. This will be written to by the tree watchers
|
|
|
|
* and read from by the poller, e.g.
|
|
|
|
* Watcher --/write/--> watchedTopics --/read/--> KafkaPoller
|
|
|
|
*/
|
|
|
|
ConcurrentSkipListSet<String> watchedTopics = new ConcurrentSkipListSet<>()
|
2015-01-26 14:15:35 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
/* consumerOffsets is where we will keep all the offsets from Zookeeper
|
|
|
|
* from the Kafka consumers
|
|
|
|
*/
|
|
|
|
ConcurrentHashMap<KafkaConsumer, Integer> consumerOffsets = new ConcurrentHashMap<>()
|
2015-01-17 22:42:24 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
/* topicOffsets is where the KafkaPoller should be writing all of it's
|
|
|
|
* latest offsets from querying the Kafka brokers
|
|
|
|
*/
|
|
|
|
ConcurrentHashMap<TopicPartition, Long> topicOffsets = new ConcurrentHashMap<>()
|
|
|
|
|
|
|
|
/* Hash map for keeping track of KafkaConsumer to ConsumerGauge
|
|
|
|
* instances. We're only really doing this because the MetricRegistry
|
|
|
|
* doesn't do a terrific job of exposing this for us
|
|
|
|
*/
|
|
|
|
ConcurrentHashMap<KafkaConsumer, ConsumerGauge> consumerGauges = new ConcurrentHashMap<>()
|
|
|
|
|
|
|
|
|
|
|
|
KafkaPoller poller = new KafkaPoller(topicOffsets, watchedTopics)
|
2015-01-28 10:45:49 +00:00
|
|
|
BrokerTreeWatcher brokerWatcher = new BrokerTreeWatcher(client).start()
|
2015-02-06 16:18:12 +00:00
|
|
|
brokerWatcher.onBrokerUpdates << { brokers -> poller.refresh(brokers) }
|
|
|
|
|
|
|
|
poller.start()
|
|
|
|
|
|
|
|
/* Need to reuse this closure for the KafkaSpoutTreeWatcher if we have
|
|
|
|
* one
|
|
|
|
*/
|
|
|
|
Closure gaugeRegistrar = { KafkaConsumer consumer ->
|
|
|
|
registerMetricFor(consumer, consumerGauges, consumerOffsets, topicOffsets)
|
|
|
|
}
|
|
|
|
|
|
|
|
StandardTreeWatcher consumerWatcher = new StandardTreeWatcher(client,
|
|
|
|
watchedTopics,
|
|
|
|
consumerOffsets)
|
|
|
|
consumerWatcher.onConsumerData << gaugeRegistrar
|
|
|
|
consumerWatcher.start()
|
|
|
|
|
2015-01-28 10:45:49 +00:00
|
|
|
|
|
|
|
/* Assuming that most people aren't needing to run Storm-based watchers
|
|
|
|
* as well
|
|
|
|
*/
|
|
|
|
if (cli.hasOption('s')) {
|
2015-02-06 16:18:12 +00:00
|
|
|
KafkaSpoutTreeWatcher stormWatcher = new KafkaSpoutTreeWatcher(client,
|
|
|
|
watchedTopics,
|
|
|
|
consumerOffsets)
|
|
|
|
stormWatcher.onConsumerData << gaugeRegistrar
|
2015-01-28 10:45:49 +00:00
|
|
|
stormWatcher.start()
|
|
|
|
}
|
2015-01-28 09:13:13 +00:00
|
|
|
|
2015-01-19 21:08:01 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
if (cli.hasOption('n')) {
|
|
|
|
reporter = ConsoleReporter.forRegistry(registry)
|
|
|
|
.convertRatesTo(TimeUnit.SECONDS)
|
|
|
|
.convertDurationsTo(TimeUnit.MILLISECONDS)
|
|
|
|
.build()
|
2015-01-19 19:45:22 +00:00
|
|
|
}
|
2015-03-20 17:58:45 +00:00
|
|
|
else {
|
|
|
|
UdpTransport transport = new UdpTransport.Builder()
|
|
|
|
.withPrefix(statsdPrefix)
|
|
|
|
.build()
|
|
|
|
|
|
|
|
reporter = DatadogReporter.forRegistry(registry)
|
|
|
|
.withEC2Host()
|
|
|
|
.withTransport(transport)
|
|
|
|
.build()
|
|
|
|
}
|
2015-01-19 19:45:22 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
/* Start the reporter if we've got it */
|
2015-09-01 21:41:16 +00:00
|
|
|
reporter?.start(delayInSeconds, TimeUnit.SECONDS)
|
2015-01-17 22:42:24 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
logger.info("Starting wait loop...")
|
2015-03-20 17:58:45 +00:00
|
|
|
synchronized(this) {
|
|
|
|
wait()
|
2015-01-26 15:00:07 +00:00
|
|
|
}
|
2015-01-10 02:17:45 +00:00
|
|
|
}
|
2015-01-26 14:15:35 +00:00
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
static void registerMetricFor(KafkaConsumer consumer,
|
|
|
|
ConcurrentHashMap<KafkaConsumer, ConsumerGauge> consumerGauges,
|
|
|
|
ConcurrentHashMap<KafkaConsumer, Integer> consumerOffsets,
|
|
|
|
ConcurrentHashMap<TopicPartition, Long> topicOffsets) {
|
|
|
|
if (consumerGauges.containsKey(consumer)) {
|
|
|
|
return
|
2015-01-28 09:13:13 +00:00
|
|
|
}
|
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
ConsumerGauge gauge = new ConsumerGauge(consumer,
|
|
|
|
consumerOffsets,
|
|
|
|
topicOffsets)
|
2015-03-20 17:58:45 +00:00
|
|
|
consumerGauges.put(consumer, gauge)
|
|
|
|
this.registry.register(gauge.nameForRegistry, gauge)
|
2015-01-28 09:13:13 +00:00
|
|
|
}
|
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Create the Options option necessary for verspaetung to have CLI options
|
|
|
|
*/
|
2015-01-26 14:15:35 +00:00
|
|
|
static Options createCLI() {
|
|
|
|
Options options = new Options()
|
|
|
|
|
|
|
|
Option zookeeper = OptionBuilder.withArgName('HOSTS')
|
|
|
|
.hasArg()
|
|
|
|
.withDescription('Comma separated list of Zookeeper hosts (e.g. localhost:2181)')
|
|
|
|
.withLongOpt('zookeeper')
|
|
|
|
.withValueSeparator(',' as char)
|
|
|
|
.create('z')
|
|
|
|
|
2015-01-28 09:13:13 +00:00
|
|
|
Option statsdHost = OptionBuilder.withArgName('STATSD')
|
|
|
|
.hasArg()
|
|
|
|
.withType(String)
|
|
|
|
.withDescription('Hostname for a statsd instance (defaults to localhost)')
|
|
|
|
.withLongOpt('statsd-host')
|
|
|
|
.create('H')
|
|
|
|
|
|
|
|
Option statsdPort = OptionBuilder.withArgName('PORT')
|
|
|
|
.hasArg()
|
|
|
|
.withType(Integer)
|
|
|
|
.withDescription('Port for the statsd instance (defaults to 8125)')
|
|
|
|
.withLongOpt('statsd-port')
|
|
|
|
.create('p')
|
|
|
|
|
|
|
|
Option dryRun = OptionBuilder.withDescription('Disable reporting to a statsd host')
|
|
|
|
.withLongOpt('dry-run')
|
|
|
|
.create('n')
|
2015-01-26 14:15:35 +00:00
|
|
|
|
2015-01-28 10:45:49 +00:00
|
|
|
Option stormSpouts = OptionBuilder.withDescription('Watch Storm KafkaSpout offsets (under /kafka_spout)')
|
|
|
|
.withLongOpt('storm')
|
|
|
|
.create('s')
|
|
|
|
|
2015-01-30 10:05:24 +00:00
|
|
|
Option statsdPrefix = OptionBuilder.withArgName('PREFIX')
|
|
|
|
.hasArg()
|
|
|
|
.withType(String)
|
|
|
|
.withDescription("Prefix all metrics with PREFIX before they're reported (e.g. PREFIX.verspaetung.mytopic)")
|
|
|
|
.withLongOpt('prefix')
|
|
|
|
.create()
|
|
|
|
|
2015-09-01 21:41:16 +00:00
|
|
|
Option delaySeconds = OptionBuilder.withArgName('DELAY')
|
|
|
|
.hasArg()
|
|
|
|
.withType(Integer)
|
|
|
|
.withDescription("Seconds to delay between reporting metrics to the metrics receiver (defaults: 5s)")
|
|
|
|
.withLongOpt('delay')
|
|
|
|
.create('d')
|
|
|
|
|
2015-01-26 14:15:35 +00:00
|
|
|
options.addOption(zookeeper)
|
2015-01-28 09:13:13 +00:00
|
|
|
options.addOption(statsdHost)
|
|
|
|
options.addOption(statsdPort)
|
2015-01-30 10:05:24 +00:00
|
|
|
options.addOption(statsdPrefix)
|
2015-01-28 09:13:13 +00:00
|
|
|
options.addOption(dryRun)
|
2015-01-28 10:45:49 +00:00
|
|
|
options.addOption(stormSpouts)
|
2015-09-01 21:41:16 +00:00
|
|
|
options.addOption(delaySeconds)
|
2015-01-26 14:15:35 +00:00
|
|
|
|
|
|
|
return options
|
|
|
|
}
|
|
|
|
|
2015-02-06 16:18:12 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse out all the command line options from the array of string
|
|
|
|
* arguments
|
|
|
|
*/
|
2015-01-26 14:15:35 +00:00
|
|
|
static CommandLine parseCommandLine(String[] args) {
|
|
|
|
Options options = createCLI()
|
|
|
|
PosixParser parser = new PosixParser()
|
|
|
|
|
|
|
|
try {
|
|
|
|
return parser.parse(options, args)
|
|
|
|
}
|
|
|
|
catch (MissingOptionException|UnrecognizedOptionException ex) {
|
|
|
|
HelpFormatter formatter = new HelpFormatter()
|
|
|
|
println ex.message
|
|
|
|
formatter.printHelp('verspaetung', options)
|
|
|
|
System.exit(1)
|
|
|
|
}
|
|
|
|
}
|
2015-01-10 02:17:45 +00:00
|
|
|
}
|