iAdd experimentation on the different ways that (de)serialization fails with JRuby

This commit is contained in:
R Tyler Croy 2019-05-20 20:44:29 -07:00
parent be4c8d4815
commit 13e1c99d7c
No known key found for this signature in database
GPG Key ID: E5C92681BEF6CEA2
3 changed files with 37 additions and 6 deletions

View File

@ -28,7 +28,7 @@ dependencies {
jrubyJar {
group 'Redspark'
description 'Package up a jar for Spark execution'
dependsOn compileJava
dependsOn compileJava, jar
initScript "${projectDir}/simple.rb"
}

View File

@ -1,17 +1,49 @@
#!/usr/bin/env ruby
#
# To run, first execute `./gradlew jrubyJar` to package the jar, then call
# `./run.sh` to send the jar to a local spark cluster installation
#
java_import 'org.apache.spark.sql.SparkSession'
java_import 'org.apache.spark.api.java.function.FilterFunction'
java_import 'org.apache.spark.api.java.function.ForeachFunction'
logfile = 'build.gradle'
spark = SparkSession.builder.appName('Simple Application').getOrCreate
data = spark.read.textFile(logfile).cache()
class BeeForeach
include org.apache.spark.api.java.function.ForeachFunction
def call(item)
puts "foreaching item: #{item}"
end
end
class BeeFilter
include org.apache.spark.api.java.function.FilterFunction
def call(item)
puts "filtering item: #{item}"
end
end
alphas = data.distinct
puts "about to filter"
betas = data.filter do |line|
puts 'filtering..'
line.contains 'b'
end.count
#
# Failure caused while deserializing on the spark worker
#
# java.lang.ClassCastException: cannot assign instance of
# scala.collection.immutable.List$SerializationProxy to field
# org.apache.spark.rdd.RDD.org$apache$spark$rdd$RDD$$dependencies_ of typ
#betas = data.filter(BeeFilter.new).count
# Failure caused while deserializting on the spark worker
#
# java.lang.ClassNotFoundException: org.jruby.gen.BeeForeach_799252494
betas = data.foreach(BeeForeach.new).count
# Failure caused while serializing on the spark master
#
# java.io.IOException: can not serialize singleton object
#betas = data.filter { |line| line.contains('b') }.count
puts "filtered"
puts

View File

@ -10,7 +10,6 @@ describe 'Serializing Ruby for Spark' do
let(:spark) do
SparkSession
.builder
.config('spark.serializer', 'com.github.jrubygradle.redspark.RubySerializer')
.master('local[*]')
.appName('rspec')
.getOrCreate