iAdd experimentation on the different ways that (de)serialization fails with JRuby
This commit is contained in:
parent
be4c8d4815
commit
13e1c99d7c
|
@ -28,7 +28,7 @@ dependencies {
|
|||
jrubyJar {
|
||||
group 'Redspark'
|
||||
description 'Package up a jar for Spark execution'
|
||||
dependsOn compileJava
|
||||
dependsOn compileJava, jar
|
||||
initScript "${projectDir}/simple.rb"
|
||||
}
|
||||
|
||||
|
|
40
simple.rb
40
simple.rb
|
@ -1,17 +1,49 @@
|
|||
#!/usr/bin/env ruby
|
||||
#
|
||||
# To run, first execute `./gradlew jrubyJar` to package the jar, then call
|
||||
# `./run.sh` to send the jar to a local spark cluster installation
|
||||
#
|
||||
|
||||
java_import 'org.apache.spark.sql.SparkSession'
|
||||
java_import 'org.apache.spark.api.java.function.FilterFunction'
|
||||
java_import 'org.apache.spark.api.java.function.ForeachFunction'
|
||||
|
||||
logfile = 'build.gradle'
|
||||
spark = SparkSession.builder.appName('Simple Application').getOrCreate
|
||||
data = spark.read.textFile(logfile).cache()
|
||||
|
||||
class BeeForeach
|
||||
include org.apache.spark.api.java.function.ForeachFunction
|
||||
def call(item)
|
||||
puts "foreaching item: #{item}"
|
||||
end
|
||||
end
|
||||
class BeeFilter
|
||||
include org.apache.spark.api.java.function.FilterFunction
|
||||
def call(item)
|
||||
puts "filtering item: #{item}"
|
||||
end
|
||||
end
|
||||
|
||||
alphas = data.distinct
|
||||
puts "about to filter"
|
||||
betas = data.filter do |line|
|
||||
puts 'filtering..'
|
||||
line.contains 'b'
|
||||
end.count
|
||||
#
|
||||
# Failure caused while deserializing on the spark worker
|
||||
#
|
||||
# java.lang.ClassCastException: cannot assign instance of
|
||||
# scala.collection.immutable.List$SerializationProxy to field
|
||||
# org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of typ
|
||||
#betas = data.filter(BeeFilter.new).count
|
||||
|
||||
# Failure caused while deserializting on the spark worker
|
||||
#
|
||||
# java.lang.ClassNotFoundException: org.jruby.gen.BeeForeach_799252494
|
||||
betas = data.foreach(BeeForeach.new).count
|
||||
|
||||
# Failure caused while serializing on the spark master
|
||||
#
|
||||
# java.io.IOException: can not serialize singleton object
|
||||
#betas = data.filter { |line| line.contains('b') }.count
|
||||
puts "filtered"
|
||||
|
||||
puts
|
||||
|
|
|
@ -10,7 +10,6 @@ describe 'Serializing Ruby for Spark' do
|
|||
let(:spark) do
|
||||
SparkSession
|
||||
.builder
|
||||
.config('spark.serializer', 'com.github.jrubygradle.redspark.RubySerializer')
|
||||
.master('local[*]')
|
||||
.appName('rspec')
|
||||
.getOrCreate
|
||||
|
|
Loading…
Reference in New Issue