Implementation of BatchSpout proxy class
This commit is contained in:
parent
3b4e4d6f27
commit
e3e5fb957a
|
@ -0,0 +1,71 @@
|
||||||
|
require 'java'
|
||||||
|
|
||||||
|
java_import 'storm.trident.operation.TridentCollector'
|
||||||
|
java_import 'backtype.storm.task.TopologyContext'
|
||||||
|
java_import 'storm.trident.spout.IBatchSpout'
|
||||||
|
java_import 'backtype.storm.topology.OutputFieldsDeclarer'
|
||||||
|
java_import 'backtype.storm.tuple.Tuple'
|
||||||
|
java_import 'backtype.storm.tuple.Fields'
|
||||||
|
java_import 'backtype.storm.tuple.Values'
|
||||||
|
java_import 'java.util.Map'
|
||||||
|
module Backtype
|
||||||
|
java_import 'backtype.storm.Config'
|
||||||
|
end
|
||||||
|
|
||||||
|
java_package 'redstorm.proxy'
|
||||||
|
|
||||||
|
# the BatchSpout class is a proxy to the real batch spout to avoid having to deal with all the
|
||||||
|
# Java artifacts when creating a spout.
|
||||||
|
#
|
||||||
|
# The real batch spout class implementation must define these methods:
|
||||||
|
# - open(conf, context, collector)
|
||||||
|
# - emitBatch
|
||||||
|
# - getOutputFields
|
||||||
|
# - ack(batch_id)
|
||||||
|
#
|
||||||
|
# and optionnaly:
|
||||||
|
# - close
|
||||||
|
#
|
||||||
|
|
||||||
|
class BatchSpout
|
||||||
|
java_implements IBatchSpout
|
||||||
|
|
||||||
|
java_signature 'IBatchSpout (String base_class_path, String real_spout_class_name)'
|
||||||
|
def initialize(base_class_path, real_spout_class_name)
|
||||||
|
@real_spout = Object.module_eval(real_spout_class_name).new
|
||||||
|
rescue NameError
|
||||||
|
require base_class_path
|
||||||
|
@real_spout = Object.module_eval(real_spout_class_name).new
|
||||||
|
end
|
||||||
|
|
||||||
|
java_signature 'void open(Map, TopologyContext)'
|
||||||
|
def open(conf, context)
|
||||||
|
@real_spout.open(conf, context)
|
||||||
|
end
|
||||||
|
|
||||||
|
java_signature 'void close()'
|
||||||
|
def close
|
||||||
|
@real_spout.close if @real_spout.respond_to?(:close)
|
||||||
|
end
|
||||||
|
|
||||||
|
java_signature 'void emitBatch(long, TridentCollector)'
|
||||||
|
def emitBatch(batch_id, collector)
|
||||||
|
@real_spout.emit_batch(batch_id, collector)
|
||||||
|
end
|
||||||
|
|
||||||
|
java_signature 'void ack(long)'
|
||||||
|
def ack(batch_id)
|
||||||
|
@real_spout.ack(batch_id)
|
||||||
|
end
|
||||||
|
|
||||||
|
java_signature 'Fields getOutputFields()'
|
||||||
|
def getOutputFields
|
||||||
|
@real_spout.get_output_fields()
|
||||||
|
end
|
||||||
|
|
||||||
|
java_signature 'Map<String, Object> getComponentConfiguration()'
|
||||||
|
def getComponentConfiguration
|
||||||
|
@real_spout.get_component_configuration
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,89 @@
|
||||||
|
package redstorm.storm.jruby;
|
||||||
|
|
||||||
|
import storm.trident.operation.TridentCollector;
|
||||||
|
import backtype.storm.task.TopologyContext;
|
||||||
|
import storm.trident.spout.IBatchSpout;
|
||||||
|
import backtype.storm.topology.OutputFieldsDeclarer;
|
||||||
|
import backtype.storm.tuple.Tuple;
|
||||||
|
import backtype.storm.tuple.Fields;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* the JRubyBatchSpout class is a simple proxy class to the actual spout implementation in JRuby.
|
||||||
|
* this proxy is required to bypass the serialization/deserialization process when dispatching
|
||||||
|
* the spout to the workers. JRuby does not yet support serialization from Java
|
||||||
|
* (Java serialization call on a JRuby class).
|
||||||
|
*
|
||||||
|
* Note that the JRuby spout proxy class is instanciated in the open method which is called after
|
||||||
|
* deserialization at the worker and in both the declareOutputFields and isDistributed methods which
|
||||||
|
* are called once before serialization at topology creation.
|
||||||
|
*/
|
||||||
|
public class JRubyBatchSpout implements IBatchSpout {
|
||||||
|
IBatchSpout _proxySpout;
|
||||||
|
String _realSpoutClassName;
|
||||||
|
String _baseClassPath;
|
||||||
|
String[] _fields;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create a new JRubyBatchSpout
|
||||||
|
*
|
||||||
|
* @param baseClassPath the topology/project base JRuby class file path
|
||||||
|
* @param realSpoutClassName the fully qualified JRuby spout implementation class name
|
||||||
|
*/
|
||||||
|
public JRubyBatchSpout(String baseClassPath, String realSpoutClassName, String[] fields) {
|
||||||
|
_baseClassPath = baseClassPath;
|
||||||
|
_realSpoutClassName = realSpoutClassName;
|
||||||
|
_fields = fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void open(final Map conf, final TopologyContext context) {
|
||||||
|
// create instance of the jruby proxy class here, after deserialization in the workers.
|
||||||
|
_proxySpout = newProxySpout(_baseClassPath, _realSpoutClassName);
|
||||||
|
_proxySpout.open(conf, context);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void emitBatch(final long batchId, final TridentCollector collector) {
|
||||||
|
_proxySpout.emitBatch(batchId, collector);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() {
|
||||||
|
_proxySpout.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void ack(final long batchId) {
|
||||||
|
_proxySpout.ack(batchId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Fields getOutputFields() {
|
||||||
|
// getOutputFields is executed in the topology creation time before serialisation.
|
||||||
|
// do not set the _proxySpout instance variable here to avoid JRuby serialization
|
||||||
|
// issues. Just create tmp spout instance to call declareOutputFields.
|
||||||
|
IBatchSpout spout = newProxySpout(_baseClassPath, _realSpoutClassName);
|
||||||
|
return spout.getOutputFields();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, Object> getComponentConfiguration() {
|
||||||
|
// getComponentConfiguration is executed in the topology creation time before serialisation.
|
||||||
|
// do not set the _proxySpout instance variable here to avoid JRuby serialization
|
||||||
|
// issues. Just create tmp spout instance to call declareOutputFields.
|
||||||
|
IBatchSpout spout = newProxySpout(_baseClassPath, _realSpoutClassName);
|
||||||
|
return spout.getComponentConfiguration();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static IBatchSpout newProxySpout(String baseClassPath, String realSpoutClassName) {
|
||||||
|
try {
|
||||||
|
redstorm.proxy.BatchSpout proxy = new redstorm.proxy.BatchSpout(baseClassPath, realSpoutClassName);
|
||||||
|
return proxy;
|
||||||
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue