redstorm/examples/ruby_word_count_topology.rb

79 lines
1.8 KiB
Ruby

class RubyRandomSentenceSpout
attr_reader :is_distributed
def initialize
@is_distributed = true
@sentences = [
"the cow jumped over the moon",
"an apple a day keeps the doctor away",
"four score and seven years ago",
"snow white and the seven dwarfs",
"i am at two with nature"
]
end
def open(conf, context, collector)
@collector = collector
end
def next_tuple
@collector.emit(Values.new(@sentences[rand(@sentences.length)]))
end
def declare_output_fields(declarer)
declarer.declare(Fields.new("word"))
end
end
class RubySplitSentence
def prepare(conf, context, collector)
@collector = collector
end
def execute(tuple)
tuple.getString(0).split(" ").each {|w| @collector.emit(Values.new(w)) }
end
def declare_output_fields(declarer)
declarer.declare(Fields.new("word"))
end
end
class RubyWordCount
def initialize
@counts = Hash.new{|h, k| h[k] = 0}
end
def prepare(conf, context, collector)
@collector = collector
end
def execute(tuple)
word = tuple.getString(0)
@counts[word] += 1
@collector.emit(Values.new(word, @counts[word]))
end
def declare_output_fields(declarer)
declarer.declare(Fields.new("word", "count"))
end
end
class RubyWordCountTopology
def start(base_class_path)
builder = TopologyBuilder.new
builder.setSpout(1, JRubySpout.new(base_class_path, "RubyRandomSentenceSpout"), 5)
builder.setBolt(2, JRubyBolt.new(base_class_path, "RubySplitSentence"), 8).shuffleGrouping(1)
builder.setBolt(3, JRubyBolt.new(base_class_path, "RubyWordCount"), 12).fieldsGrouping(2, Fields.new("word"))
conf = Config.new
conf.setDebug(true)
conf.setMaxTaskParallelism(3)
cluster = LocalCluster.new
cluster.submitTopology("word-count", conf, builder.createTopology)
sleep(5)
cluster.shutdown
end
end