在使用Apache Spark的spark-submit k8s API时,如果遇到https报错,可能是由于证书验证失败导致的。以下是解决方法的示例代码:
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
# 创建SparkSession
spark = SparkSession.builder \
.appName("spark-submit k8s API with https") \
.config("spark.kubernetes.authenticate.caCertFile", "/path/to/ca-certificate") \
.config("spark.kubernetes.authenticate.clientKeyFile", "/path/to/client-key") \
.config("spark.kubernetes.authenticate.clientCertFile", "/path/to/client-certificate") \
.config("spark.kubernetes.authenticate.driver.serviceAccountName", "spark") \
.config("spark.kubernetes.authenticate.submission.caCertFile", "/path/to/ca-certificate") \
.config("spark.kubernetes.authenticate.submission.clientKeyFile", "/path/to/client-key") \
.config("spark.kubernetes.authenticate.submission.clientCertFile", "/path/to/client-certificate") \
.config("spark.kubernetes.authenticate.submission.oauthToken", "your-oauth-token") \
.config("spark.kubernetes.authenticate.submission.oauthTokenFile", "/path/to/oauth-token-file") \
.config("spark.kubernetes.authenticate.submission.oauthTokenSecret", "your-oauth-token-secret") \
.config("spark.kubernetes.authenticate.submission.oauthTokenSecretFile", "/path/to/oauth-token-secret-file") \
.config("spark.kubernetes.authenticate.submission.ssl.enabled", "false") \ # 禁用SSL验证
.getOrCreate()
# 使用SparkSession执行Spark作业
df = spark.read.csv("/path/to/input/file.csv")
df.show()
from pyspark.sql import SparkSession
from pyspark.conf import SparkConf
# 创建SparkSession
spark = SparkSession.builder \
.appName("spark-submit k8s API with https") \
.config("spark.kubernetes.authenticate.caCertFile", "/path/to/ca-certificate") \
.config("spark.kubernetes.authenticate.clientKeyFile", "/path/to/client-key") \
.config("spark.kubernetes.authenticate.clientCertFile", "/path/to/client-certificate") \
.config("spark.kubernetes.authenticate.driver.serviceAccountName", "spark") \
.config("spark.kubernetes.authenticate.submission.caCertFile", "/path/to/ca-certificate") \
.config("spark.kubernetes.authenticate.submission.clientKeyFile", "/path/to/client-key") \
.config("spark.kubernetes.authenticate.submission.clientCertFile", "/path/to/client-certificate") \
.config("spark.kubernetes.authenticate.submission.oauthToken", "your-oauth-token") \
.config("spark.kubernetes.authenticate.submission.oauthTokenFile", "/path/to/oauth-token-file") \
.config("spark.kubernetes.authenticate.submission.oauthTokenSecret", "your-oauth-token-secret") \
.config("spark.kubernetes.authenticate.submission.oauthTokenSecretFile", "/path/to/oauth-token-secret-file") \
.config("spark.kubernetes.authenticate.submission.ssl.enabled", "true") \
.config("spark.kubernetes.authenticate.submission.ssl.caCertFile", "/path/to/custom-ca-certificate") \ # 添加自定义CA证书
.config("spark.kubernetes.authenticate.submission.ssl.keyFile", "/path/to/custom-key") \ # 添加自定义私钥
.config("spark.kubernetes.authenticate.submission.ssl.certFile", "/path/to/custom-certificate") \ # 添加自定义证书
.getOrCreate()
# 使用SparkSession执行Spark作业
df = spark.read.csv("/path/to/input/file.csv")
df.show()
请根据实际情况修改上述代码中的证书文件路径和其他配置参数。