I am trying to run Spark History Server with minIO but keep getting
AmazonHttpClient: Unable to execute HTTP request: Connection refused
Complete exception:
20/10/20 09:06:21 INFO AmazonHttpClient: Unable to execute HTTP request: Connection refused (Connection refused)
java.net.ConnectException: Connection refused (Connection refused)
at java.net.PlainSocketImpl.socketConnect(Native Method)
at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
at java.net.Socket.connect(Socket.java:607)
at org.apache.http.conn.scheme.PlainSocketFactory.connectSocket(PlainSocketFactory.java:121)
at org.apache.http.impl.conn.DefaultClientConnectionOperator.openConnection(DefaultClientConnectionOperator.java:180)
at org.apache.http.impl.conn.ManagedClientConnectionImpl.open(ManagedClientConnectionImpl.java:326)
at org.apache.http.impl.client.DefaultRequestDirector.tryConnect(DefaultRequestDirector.java:610)
at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:445)
at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:835)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:83)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:56)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:384)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:232)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3528)
My Docker compose:
version: '3.0'
services:
minio:
image: minio/minio
container_name: minio
volumes:
- data1-1:/data1
ports:
- 9000:9000
environment:
MINIO_ACCESS_KEY: AKIAIOSFODNN7EXAMPLE
MINIO_SECRET_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
command: server /data1
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9000/minio/health/live" ]
interval: 30s
timeout: 20s
retries: 3
spark-history:
image: test
build:
context: spark-history
container_name: spark-history
links:
- minio
depends_on:
- minio
ports:
- 18080:18080
volumes:
- data2:/data2
environment:
MINIO_ACCESS_KEY: AKIAIOSFODNN7EXAMPLE
MINIO_SECRET_KEY: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
volumes:
data1-1:
data2:
My Dockerfile for Spark History:
ARG SPARK_IMAGE=gcr.io/spark-operator/spark:v3.0.0
FROM ${SPARK_IMAGE}
# Switch to user root so we can add additional jars, packages and configuration files.
USER root
RUN apt-get -y update && apt-get install -y coreutils
#USER 1234
#RUN apk --update add coreutils
#RUN mkdir /tmp/spark-events
RUN mkdir -p /etc/hadoop/conf
# Add dependency for hadoop-aws
ADD https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk/1.7.4/aws-java-sdk-1.7.4.jar $SPARK_HOME/jars
# Add hadoop-aws to access Amazon S3
ADD https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/2.7.7/hadoop-aws-2.7.7.jar $SPARK_HOME/jars
ADD spark-defaults.conf /opt/spark/conf/
ENV SPARK_NO_DAEMONIZE TRUE
ENTRYPOINT ["/opt/spark/sbin/start-history-server.sh"]
My spark-defaults config:
spark.history.fs.logDirectory s3a://spark-streaming/checkpoint/
spark.hadoop.fs.s3a.endpoint http://minio:9000
spark.hadoop.fs.s3a.access.key AKIAIOSFODNN7EXAMPLE
spark.hadoop.fs.s3a.secret.key wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
spark.hadoop.fs.s3a.path.style.access true
spark.hadoop.fs.s3a.impl org.apache.hadoop.fs.s3a.S3AFileSystem
Not sure what I am missing
I think you have the same post in the GitHub. Here is the Reference to the Post if you are still looking for the solution. Basically you have to add the
minio
dependency in the Docker.https://github.com/minio/minio/issues/10715