There appears to be something problematic about this Proc as it's coming through
the serialization. Other DataSet functions work fine, but there's something
about filter() which isn't quite working.
Since most of what we want to do is with maps and filters, I suppose figuring
this out will be important
I haven't been able to make this work, and I cannot quite figure out where the
pieces are coming together for serializing Ruby objects.
When trying my example code, there's an exception thrown, but my expected log
lines do not get printed to indicate that the RubySerializerInstance is about to
process a Ruby object
19/05/18 18:20:00 INFO ContextCleaner: Cleaned accumulator 1
Unhandled Java exception: org.apache.spark.SparkException: Task not serializable
org.apache.spark.SparkException: Task not serializable
ensureSerializable at org/apache/spark/util/ClosureCleaner.scala:403
org$apache$spark$util$ClosureCleaner$$clean at org/apache/spark/util/ClosureCleaner.scala:393
clean at org/apache/spark/util/ClosureCleaner.scala:162
clean at org/apache/spark/SparkContext.scala:2326
apply at org/apache/spark/rdd/RDD.scala:850
apply at org/apache/spark/rdd/RDD.scala:849
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
withScope at org/apache/spark/rdd/RDDOperationScope.scala:112
withScope at org/apache/spark/rdd/RDD.scala:363
mapPartitionsWithIndex at org/apache/spark/rdd/RDD.scala:849
doExecute at org/apache/spark/sql/execution/WholeStageCodegenExec.scala:630
apply at org/apache/spark/sql/execution/SparkPlan.scala:131
apply at org/apache/spark/sql/execution/SparkPlan.scala:127
apply at org/apache/spark/sql/execution/SparkPlan.scala:155
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
executeQuery at org/apache/spark/sql/execution/SparkPlan.scala:152
execute at org/apache/spark/sql/execution/SparkPlan.scala:127
prepareShuffleDependency at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:92
apply at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:128
apply at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:119
attachTree at org/apache/spark/sql/catalyst/errors/package.scala:52
doExecute at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:119
apply at org/apache/spark/sql/execution/SparkPlan.scala:131
apply at org/apache/spark/sql/execution/SparkPlan.scala:127
apply at org/apache/spark/sql/execution/SparkPlan.scala:155
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
executeQuery at org/apache/spark/sql/execution/SparkPlan.scala:152
execute at org/apache/spark/sql/execution/SparkPlan.scala:127
inputRDDs at org/apache/spark/sql/execution/WholeStageCodegenExec.scala:391
inputRDDs at org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala:151
doExecute at org/apache/spark/sql/execution/WholeStageCodegenExec.scala:627
apply at org/apache/spark/sql/execution/SparkPlan.scala:131
apply at org/apache/spark/sql/execution/SparkPlan.scala:127
apply at org/apache/spark/sql/execution/SparkPlan.scala:155
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
executeQuery at org/apache/spark/sql/execution/SparkPlan.scala:152
execute at org/apache/spark/sql/execution/SparkPlan.scala:127
getByteArrayRdd at org/apache/spark/sql/execution/SparkPlan.scala:247
executeCollect at org/apache/spark/sql/execution/SparkPlan.scala:296
apply at org/apache/spark/sql/Dataset.scala:2830
apply at org/apache/spark/sql/Dataset.scala:2829
apply at org/apache/spark/sql/Dataset.scala:3364
apply at org/apache/spark/sql/execution/SQLExecution.scala:78
withSQLConfPropagated at org/apache/spark/sql/execution/SQLExecution.scala:125
withNewExecutionId at org/apache/spark/sql/execution/SQLExecution.scala:73
withAction at org/apache/spark/sql/Dataset.scala:3363
count at org/apache/spark/sql/Dataset.scala:2829
invoke at java/lang/reflect/Method.java:498
invokeDirectWithExceptionHandling at org/jruby/javasupport/JavaMethod.java:440
invokeDirect at org/jruby/javasupport/JavaMethod.java:304
invokeOther18:count at simple.rb:11
<main> at simple.rb:11
invokeWithArguments at java/lang/invoke/MethodHandle.java:627
runScript at org/jruby/Ruby.java:856
runNormally at org/jruby/Ruby.java:779
runNormally at org/jruby/Ruby.java:797
runFromMain at org/jruby/Ruby.java:609
doRunFromMain at org/jruby/Main.java:415
internalRun at org/jruby/Main.java:307
run at org/jruby/Main.java:234
main at org/jruby/Main.java:206
invoke at java/lang/reflect/Method.java:498
start at org/apache/spark/deploy/SparkApplication.scala:52
org$apache$spark$deploy$SparkSubmit$$runMain at org/apache/spark/deploy/SparkSubmit.scala:849
doRunMain$1 at org/apache/spark/deploy/SparkSubmit.scala:167
submit at org/apache/spark/deploy/SparkSubmit.scala:195
doSubmit at org/apache/spark/deploy/SparkSubmit.scala:86
doSubmit at org/apache/spark/deploy/SparkSubmit.scala:924
main at org/apache/spark/deploy/SparkSubmit.scala:933
main at org/apache/spark/deploy/SparkSubmit.scala:-1
Caused by:
java.io.IOException: can not serialize singleton object
writeObject at org/jruby/RubyBasicObject.java:3045
invoke0 at sun/reflect/NativeMethodAccessorImpl.java:-2
invoke at sun/reflect/NativeMethodAccessorImpl.java:62
invoke at sun/reflect/DelegatingMethodAccessorImpl.java:43
invoke at java/lang/reflect/Method.java:498
invokeWriteObject at java/io/ObjectStreamClass.java:1140
writeSerialData at java/io/ObjectOutputStream.java:1496
writeOrdinaryObject at java/io/ObjectOutputStream.java:1432
writeObject0 at java/io/ObjectOutputStream.java:1178
defaultWriteFields at java/io/ObjectOutputStream.java:1548
writeSerialData at java/io/ObjectOutputStream.java:1509
writeOrdinaryObject at java/io/ObjectOutputStream.java:1432
writeObject0 at java/io/ObjectOutputStream.java:1178
writeArray at java/io/ObjectOutputStream.java:1378
writeObject0 at java/io/ObjectOutputStream.java:1174
defaultWriteFields at java/io/ObjectOutputStream.java:1548
writeSerialData at java/io/ObjectOutputStream.java:1509
writeOrdinaryObject at java/io/ObjectOutputStream.java:1432
writeObject0 at java/io/ObjectOutputStream.java:1178
writeObject at java/io/ObjectOutputStream.java:348
writeObject at org/apache/spark/serializer/JavaSerializer.scala:43
serialize at org/apache/spark/serializer/JavaSerializer.scala:100
ensureSerializable at org/apache/spark/util/ClosureCleaner.scala:400
org$apache$spark$util$ClosureCleaner$$clean at org/apache/spark/util/ClosureCleaner.scala:393
clean at org/apache/spark/util/ClosureCleaner.scala:162
clean at org/apache/spark/SparkContext.scala:2326
apply at org/apache/spark/rdd/RDD.scala:850
apply at org/apache/spark/rdd/RDD.scala:849
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
withScope at org/apache/spark/rdd/RDDOperationScope.scala:112
withScope at org/apache/spark/rdd/RDD.scala:363
mapPartitionsWithIndex at org/apache/spark/rdd/RDD.scala:849
doExecute at org/apache/spark/sql/execution/WholeStageCodegenExec.scala:630
apply at org/apache/spark/sql/execution/SparkPlan.scala:131
apply at org/apache/spark/sql/execution/SparkPlan.scala:127
apply at org/apache/spark/sql/execution/SparkPlan.scala:155
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
executeQuery at org/apache/spark/sql/execution/SparkPlan.scala:152
execute at org/apache/spark/sql/execution/SparkPlan.scala:127
prepareShuffleDependency at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:92
apply at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:128
apply at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:119
attachTree at org/apache/spark/sql/catalyst/errors/package.scala:52
doExecute at org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala:119
apply at org/apache/spark/sql/execution/SparkPlan.scala:131
apply at org/apache/spark/sql/execution/SparkPlan.scala:127
apply at org/apache/spark/sql/execution/SparkPlan.scala:155
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
executeQuery at org/apache/spark/sql/execution/SparkPlan.scala:152
execute at org/apache/spark/sql/execution/SparkPlan.scala:127
inputRDDs at org/apache/spark/sql/execution/WholeStageCodegenExec.scala:391
inputRDDs at org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala:151
doExecute at org/apache/spark/sql/execution/WholeStageCodegenExec.scala:627
apply at org/apache/spark/sql/execution/SparkPlan.scala:131
apply at org/apache/spark/sql/execution/SparkPlan.scala:127
apply at org/apache/spark/sql/execution/SparkPlan.scala:155
withScope at org/apache/spark/rdd/RDDOperationScope.scala:151
executeQuery at org/apache/spark/sql/execution/SparkPlan.scala:152
execute at org/apache/spark/sql/execution/SparkPlan.scala:127
getByteArrayRdd at org/apache/spark/sql/execution/SparkPlan.scala:247
executeCollect at org/apache/spark/sql/execution/SparkPlan.scala:296
apply at org/apache/spark/sql/Dataset.scala:2830
apply at org/apache/spark/sql/Dataset.scala:2829
apply at org/apache/spark/sql/Dataset.scala:3364
apply at org/apache/spark/sql/execution/SQLExecution.scala:78
withSQLConfPropagated at org/apache/spark/sql/execution/SQLExecution.scala:125
withNewExecutionId at org/apache/spark/sql/execution/SQLExecution.scala:73
withAction at org/apache/spark/sql/Dataset.scala:3363
count at org/apache/spark/sql/Dataset.scala:2829
invoke0 at sun/reflect/NativeMethodAccessorImpl.java:-2
invoke at sun/reflect/NativeMethodAccessorImpl.java:62
invoke at sun/reflect/DelegatingMethodAccessorImpl.java:43
invoke at java/lang/reflect/Method.java:498
invokeDirectWithExceptionHandling at org/jruby/javasupport/JavaMethod.java:440
invokeDirect at org/jruby/javasupport/JavaMethod.java:304
call at org/jruby/java/invokers/InstanceMethodInvoker.java:36
cacheAndCall at org/jruby/runtime/callsite/CachingCallSite.java:318
call at org/jruby/runtime/callsite/CachingCallSite.java:139
invokeOther18:count at simple.rb:11
<main> at simple.rb:11
invokeWithArguments at java/lang/invoke/MethodHandle.java:627
load at org/jruby/ir/Compiler.java:94
runScript at org/jruby/Ruby.java:856
runNormally at org/jruby/Ruby.java:779
runNormally at org/jruby/Ruby.java:797
runFromMain at org/jruby/Ruby.java:609
doRunFromMain at org/jruby/Main.java:415
internalRun at org/jruby/Main.java:307
run at org/jruby/Main.java:234
main at org/jruby/Main.java:206
invoke0 at sun/reflect/NativeMethodAccessorImpl.java:-2
invoke at sun/reflect/NativeMethodAccessorImpl.java:62
invoke at sun/reflect/DelegatingMethodAccessorImpl.java:43
invoke at java/lang/reflect/Method.java:498
start at org/apache/spark/deploy/SparkApplication.scala:52
org$apache$spark$deploy$SparkSubmit$$runMain at org/apache/spark/deploy/SparkSubmit.scala:849
doRunMain$1 at org/apache/spark/deploy/SparkSubmit.scala:167
submit at org/apache/spark/deploy/SparkSubmit.scala:195
doSubmit at org/apache/spark/deploy/SparkSubmit.scala:86
doSubmit at org/apache/spark/deploy/SparkSubmit.scala:924
main at org/apache/spark/deploy/SparkSubmit.scala:933
main at org/apache/spark/deploy/SparkSubmit.scala:-1
19/05/18 18:20:00 INFO SparkContext: Invoking stop() from shutdown hook