question

NareshSandeepKongathiVendor-8506 avatar image
0 Votes"
NareshSandeepKongathiVendor-8506 asked ·

Reading file from Azure Data Lake Storage V2 with Spark 2.4

I am trying to read a simple csv file Azure Data Lake Storage V2 with Spark 2.4 on my IntelliJ-IDE on mac

Code Below

 package com.example
    
 import org.apache.spark.SparkConf
 import org.apache.spark.sql._
    
    
    
 object Test extends App {
    
   val appName: String = "DataExtract"
   val master: String = "local[*]"
   val sparkConf: SparkConf = new SparkConf()
     .setAppName(appName)
     .setMaster(master)
     .set("spark.scheduler.mode", "FAIR")
     .set("spark.sql.session.timeZone", "UTC")
     .set("spark.sql.shuffle.partitions", "32")
     .set("fs.defaultFS", "abfs://development@xyz.dfs.core.windows.net/")
     .set("fs.azure.account.key.xyz.dfs.core.windows.net", "~~key~~")
    
    
   val spark: SparkSession = SparkSession
     .builder()
     .config(sparkConf)
     .getOrCreate()
   spark.time(run(spark))
    
    
 def run(spark: SparkSession): Unit = {
    
   val df = spark.read.csv("abfs://development@xyz.dfs.core.windows.net/development/sales.csv")
   df.show(10)
    
 }
    
 }

It's not able to read, and throwing security exception

 Exception in thread "main" java.lang.NullPointerException
     at org.wildfly.openssl.CipherSuiteConverter.toJava(CipherSuiteConverter.java:284)
     at org.wildfly.openssl.OpenSSLEngine.toJavaCipherSuite(OpenSSLEngine.java:1094)
     at org.wildfly.openssl.OpenSSLEngine.getEnabledCipherSuites(OpenSSLEngine.java:729)
     at org.wildfly.openssl.OpenSSLContextSPI.getCiphers(OpenSSLContextSPI.java:333)
     at org.wildfly.openssl.OpenSSLContextSPI$1.getSupportedCipherSuites(OpenSSLContextSPI.java:365)
     at org.apache.hadoop.fs.azurebfs.utils.SSLSocketFactoryEx.<init>(SSLSocketFactoryEx.java:105)
     at org.apache.hadoop.fs.azurebfs.utils.SSLSocketFactoryEx.initializeDefaultFactory(SSLSocketFactoryEx.java:72)
     at org.apache.hadoop.fs.azurebfs.services.AbfsClient.<init>(AbfsClient.java:79)
     at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.initializeClient(AzureBlobFileSystemStore.java:817)
     at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.<init>(AzureBlobFileSystemStore.java:149)
     at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:108)


Can anyone help me, what is the mistake?




azure-data-lake-storage
· 1
10 |1000 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

Hello @NareshSandeepKongathiVendor-8506,

Welcome to Microsoft Q&A platform.

In order to investigate further, could you please share the complete stack trace of the error message which you are experiencing?

0 Votes 0 ·
NareshSandeepKongathiVendor-8506 avatar image
0 Votes"
NareshSandeepKongathiVendor-8506 answered ·
 20/08/06 09:58:28 INFO SharedState: Setting hive.metastore.warehouse.dir ('null') to the value of spark.sql.warehouse.dir ('file:/Users/vn500p0/IdeaProjects/plumCompleteExtract/spark-warehouse').
 20/08/06 09:58:28 INFO SharedState: Warehouse path is 'file:/Users/vn500p0/IdeaProjects/plumCompleteExtract/spark-warehouse'.
 20/08/06 09:58:29 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
 20/08/06 09:58:29 INFO SSL: WFOPENSSL0002 OpenSSL Version OpenSSL 1.1.1g  21 Apr 2020
 20/08/06 09:58:29 WARN FileStreamSink: Error while looking for metadata directory.
 Exception in thread "main" java.lang.NullPointerException
     at org.wildfly.openssl.CipherSuiteConverter.toJava(CipherSuiteConverter.java:284)
     at org.wildfly.openssl.OpenSSLEngine.toJavaCipherSuite(OpenSSLEngine.java:1094)
     at org.wildfly.openssl.OpenSSLEngine.getEnabledCipherSuites(OpenSSLEngine.java:729)
     at org.wildfly.openssl.OpenSSLContextSPI.getCiphers(OpenSSLContextSPI.java:333)
     at org.wildfly.openssl.OpenSSLContextSPI$1.getSupportedCipherSuites(OpenSSLContextSPI.java:365)
     at org.apache.hadoop.fs.azurebfs.utils.SSLSocketFactoryEx.<init>(SSLSocketFactoryEx.java:105)
     at org.apache.hadoop.fs.azurebfs.utils.SSLSocketFactoryEx.initializeDefaultFactory(SSLSocketFactoryEx.java:72)
     at org.apache.hadoop.fs.azurebfs.services.AbfsClient.<init>(AbfsClient.java:79)
     at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.initializeClient(AzureBlobFileSystemStore.java:817)
     at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.<init>(AzureBlobFileSystemStore.java:149)
     at org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem.initialize(AzureBlobFileSystem.java:108)
     at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3303)
     at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:124)
     at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3352)
     at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:3320)
     at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:479)
     at org.apache.hadoop.fs.Path.getFileSystem(Path.java:361)
     at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:547)
     at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:545)
     at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
     at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
     at scala.collection.immutable.List.foreach(List.scala:392)
     at scala.collection.TraversableLike$class.flatMap(TraversableLike.scala:241)
     at scala.collection.immutable.List.flatMap(List.scala:355)
     at org.apache.spark.sql.execution.datasources.DataSource.org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary(DataSource.scala:545)
     at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:359)
     at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:223)
     at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:211)
     at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:619)
     at org.apache.spark.sql.DataFrameReader.csv(DataFrameReader.scala:468)
     at com.pbpd.plum.Test$.run(Test.scala:53)
     at com.pbpd.plum.Test$$anonfun$1.apply$mcV$sp(Test.scala:42)
     at com.pbpd.plum.Test$$anonfun$1.apply(Test.scala:42)
     at com.pbpd.plum.Test$$anonfun$1.apply(Test.scala:42)
     at org.apache.spark.sql.SparkSession.time(SparkSession.scala:677)
     at com.pbpd.plum.Test$.delayedEndpoint$com$pbpd$plum$Test$1(Test.scala:42)
     at com.pbpd.plum.Test$delayedInit$body.apply(Test.scala:9)
     at scala.Function0$class.apply$mcV$sp(Function0.scala:34)
     at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
     at scala.App$$anonfun$main$1.apply(App.scala:76)
     at scala.App$$anonfun$main$1.apply(App.scala:76)
     at scala.collection.immutable.List.foreach(List.scala:392)
     at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
     at scala.App$class.main(App.scala:76)
     at com.pbpd.plum.Test$.main(Test.scala:9)
     at com.pbpd.plum.Test.main(Test.scala)
 20/08/06 09:58:29 INFO SparkContext: Invoking stop() from shutdown hook
 20/08/06 09:58:29 INFO SparkUI: Stopped Spark web UI at http://192.168.0.127:4040
 20/08/06 09:58:29 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
 20/08/06 09:58:29 INFO MemoryStore: MemoryStore cleared
 20/08/06 09:58:29 INFO BlockManager: BlockManager stopped
 20/08/06 09:58:29 INFO BlockManagerMaster: BlockManagerMaster stopped
 20/08/06 09:58:29 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
 20/08/06 09:58:29 INFO SparkContext: Successfully stopped SparkContext
 20/08/06 09:58:29 INFO ShutdownHookManager: Shutdown hook called
 20/08/06 09:58:29 INFO ShutdownHookManager: Deleting directory /private/var/folders/jp/kd4w_kwd7_v503mg5tjl1jt00000gn/T/spark-1022fe97-e1a0-44b2-984d-486d9cdd29f6


·
10 |1000 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

PRADEEPCHEEKATLA-MSFT avatar image
0 Votes"
PRADEEPCHEEKATLA-MSFT answered ·

Hello @NareshSandeepKongathiVendor-8506,

You will receive this error message when you have incompatible jar with the hadoop version.

I would request you to kindly go through the below issues:

http://mail-archives.apache.org/mod_mbox/spark-issues/201907.mbox/%3CJIRA.13243325.1562321895000.591499.1562323440292@Atlassian.JIRA%3E

https://issues.apache.org/jira/browse/HADOOP-16410

Hope this helps. Do let us know if you any further queries.


Do click on "Accept Answer" and Upvote on the post that helps you, this can be beneficial to other community members.

· 2 ·
10 |1000 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

Hello @NareshSandeepKongathiVendor-8506,
Just checking in to see if the above answer helped. If this answers your query, do click “Accept Answer” and Up-Vote for the same. And, if you have any further query do let us know.


0 Votes 0 ·

Hello @NareshSandeepKongathiVendor-8506,
Following up to see if the above suggestion was helpful. And, if you have any further query do let us know.

0 Votes 0 ·
jhansi-6011 avatar image
0 Votes"
jhansi-6011 answered ·

@PRADEEPCHEEKATLA-MSFT

I am trying to write data into the Azure Data Lake Storage V2 with Spark, But I am getting below error but I could read and write from spark-shell from local itself. Could someone help me here

20/11/10 22:58:18 INFO SharedState: Warehouse path is 'file:/C:/sparkpoc/spark-warehouse'.
20/11/10 22:58:18 INFO StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
20/11/10 22:58:19 WARN FileStreamSink: Error while looking for metadata directory.
Exception in thread "main" java.io.IOException: No FileSystem for scheme: abfss
at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2586)
at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2593)
at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:91)
at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2632)
at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2614)
at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:370)
at org.apache.hadoop.fs.Path.getFileSystem(Path.java:296)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:547)
at org.apache.spark.sql.execution.datasources.DataSource$$anonfun$org$apache$spark$sql$execution$datasources$DataSource$$checkAndGlobPathIfNecessary$1.apply(DataSource.scala:545)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)
at scala.collection.TraversableLike$$anonfun$flatMap$1.apply(TraversableLike.scala:241)

·
10 |1000 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.

SrinivasuluKopparapu-5936 avatar image
0 Votes"
SrinivasuluKopparapu-5936 answered ·

Solved.

We faced this issue when we setting up a new machine . Initially, we thought JDK versioning mismatch .But is due openssl installed versions are not compatible with wildfly-openssl-*.jar .

Upgrading to latest version of wildfly-openssl-*.jar helped out. In this case, the version is wildfly-openssl-1.0.7.final.jar helped out.

Good luck.
Srinivasulu Kopparapo

·
10 |1000 characters needed characters left characters exceeded

Up to 10 attachments (including images) can be used with a maximum of 3.0 MiB each and 30.0 MiB total.