Archive for December, 2021

Impala JDBC Tester – Cloudera driver

Posted: December 21, 2021 in Hadoop
Tags:

Prerequisites

impala-jdbc-tester

ClouderaImpalaJdbcTester.java

import java.io.IOException;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;

public class ClouderaImpalaJdbcTester {

    private static final String CONNECTION_URL_PROPERTY = "connection.url";
    private static final String JDBC_DRIVER_NAME_PROPERTY = "jdbc.driver.class.name";

    private static String connectionUrl;
    private static String jdbcDriverName;

    private static void loadConfiguration() throws IOException {
        InputStream input = null;
        try {
            String filename = ClouderaImpalaJdbcTester.class.getSimpleName() + ".conf";
            filename = "ClouderaImpalaJdbcTester.conf";
            input = ClouderaImpalaJdbcTester.class.getClassLoader().getResourceAsStream(filename);
            Properties prop = new Properties();
            prop.load(input);

            connectionUrl = prop.getProperty(CONNECTION_URL_PROPERTY);
            jdbcDriverName = prop.getProperty(JDBC_DRIVER_NAME_PROPERTY);
        } finally {
            try {
                if (input != null)
                    input.close();
            } catch (IOException e) {
                // ToDo
            }
        }
    }

    public static void main(String[] args) throws IOException {
        //String sqlStatement = args[0];
        String sqlStatement = "show databases";
        //sqlStatement = "select * from <table>;";

        loadConfiguration();

        System.out.println("\n=============================================");
        System.out.println("Cloudera Impala JDBC Tester");
        System.out.println("Using Connection URL: " + connectionUrl);
        System.out.println("Running Query: " + sqlStatement);

        Connection con = null;
        try {

            Class.forName(jdbcDriverName);
            con = DriverManager.getConnection(connectionUrl);
            Statement stmt = con.createStatement();
            ResultSet rs = stmt.executeQuery(sqlStatement);
            System.out.println("\n== Begin Query Results ======================");
            // print the results to the console
            while (rs.next()) {
                // the tester query returns one String column
                System.out.println(rs.getString(1));
            }
            System.out.println("== End Query Results =======================\n\n");
        } catch (SQLException e) {
            e.printStackTrace();
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                con.close();
            } catch (Exception e) {
                // ToDo
            }
        }
    }
}

ClouderaImpalaJdbcTester.conf

#For secure cluster with Kerberos authentication and TLS enabled; Here we allow the SSL certificate used by the server to be self-signed, by sending the AllowSelfSignedCerts property=1
connection.url = jdbc:impala://<impala-load-balancer>:21050;AuthMech=3;UID=<username>;PWD=<password>;SSL=1;AllowSelfSignedCerts=1
jdbc.driver.class.name = com.cloudera.impala.jdbc.Driver
#com.cloudera.hive.jdbc.HS2Driver

Source Code

https://github.com/mplacko/impala-jdbc-tester

Additional Info

Client {
com.sun.security.auth.module.Krb5LoginModule required
useKeyTab=true
doNotPrompt=true
useTicketCache=true
principal=”impala@<…>”
keyTab=”<…>/impala.keytab”;
};

  • SSLTrustStore vs JAVA_HOME

jssecacerts and cacerts are part of the JAVA_HOME, that’s what hive/impala driver is loking for.
If jssecacerts does not exist, then cacerts is used. (The default location of cacerts is jre/lib/security.)
If JAVA_HOME is not set, it cannot access the truststore files; Hence in the connection string explicitly has to be defined following: SSLTrustStore=/etc/pki/java/cacerts;SSLTrustStorePwd=changeit;
note: If the trust store requires a password, provide it using the property SSLTrustStorePwd.
where:
{
“name” : “hiveserver2_truststore_file”,
“value” : “/etc/pki/java/cacerts”
}, {
“name” : “hiveserver2_truststore_password”,
“value” : “changeit”
}, {..
where:
JAVA_HOME: /usr/java/default

  • Impala
    • INVALIDATE METADATA
      • The INVALIDATE METADATA <tablename> must be used if:
        • if the table is newly created/removed  (outside impala: impala is not aware of this operation, e.g: the table is created/dropped in Hive or Spark)
        • if the table schema is altered outside impala (e.g column name added/removed/renamed from Hive), if partitions are added/removed outside impala (e.g from Hive)
        • if the blocks location is changed (in case the blocks were moved from 1 machine to another, e.g execution of the hdfs rebalance)
    • REFRESH
    • COMPUTE STATS

Prerequisites

References

Hive/Impala SQLLine Tester

prepare directory

mkdir sqllinetester && cd sqllinetester

download SQLLine with-dependencies

wget https://repo1.maven.org/maven2/sqlline/sqlline/1.11.0/sqlline-1.11.0-jar-with-dependencies.jar
chmod +x sqlline-1.11.0-jar-with-dependencies.jar

download Hive JDBC driver

wget https://repository.cloudera.com/artifactory/cdh-build/Hive/HiveJDBC42/2.6.15.1018/HiveJDBC42-2.6.15.1018.jar
chmod +x HiveJDBC42-2.6.15.1018.jar

download Impala JDBC driver

wget https://repository.cloudera.com/artifactory/cdh-build/Impala/ImpalaJDBC42/2.6.15.1017/ImpalaJDBC42-2.6.15.1017.jar
chmod +x ImpalaJDBC42-2.6.15.1017.jar

obtain TGT in ticket cache

# kinit -kt <path-to-keytab> <client-principal>
kinit -kt /etc/security/keytabs/hiveimpalatester.keytab hiveimpalatester

execute SQLLine with the jars in the classpath

java -cp "./*" sqlline.SqlLine

check if JDBC drivers are loaded

sqlline> !scan
scan complete in 84ms
3 driver classes found
Compliant Version Driver Class
no        2.6     com.cloudera.hive.jdbc.HS1Driver
no        2.6     com.cloudera.hive.jdbc.HS2Driver
no        2.6     com.cloudera.impala.jdbc.Driver

# class names:
# com.cloudera.impala.jdbc.Driver
# com.cloudera.hive.jdbc.HS2Driver

connect to Hive via Hive JDBC

sqlline> !connect jdbc:hive2://<hs2_node>:10000/default;AuthMech=1;ssl=1;sslTrustStore=/<path>/certs.jks;KrbRealm=<krb_realm>;KrbHostFQDN=_HOST;KrbServiceName=hive;KrbAuthType=2;

# When you authenticate via Kerberos, sqlline will ask for username/password. When prompted, just press Enter and Enter.

0: jdbc:hive2://<hs2_node> show tables;

connect to Impala via Impala JDBC

sqlline> !connect jdbc:impala://<impala-load-balancer>:21050/default;AuthMech=1;ssl=1;sslTrustStore=/<path>/certs.jks;KrbRealm=<krb_realm>;KrbHostFQDN=_HOST;KrbServiceName=impala;KrbAuthType=2;

# When you authenticate via Kerberos, sqlline will ask for username/password. When prompted, just press Enter and Enter.

0: jdbc:impala://<impala-load-balancer> show tables;

exit SQLLine

sqlline> !quit

Additional Info

AuthMech

  • 0: no authentication
  • 1: Kerberos
  • 2: user name
  • 3: user name and password
  • 4: user name and password with SSL

KrbAuthType

  • 0: To configure the driver to automatically detect which method to use for obtaining the Subject, set the KrbAuthType property to 0. Alternatively, do not set the KrbAuthType property.
  • 1: Or, to create a LoginContext from a JAAS configuration and then use the Subject associated with it, set the KrbAuthType property to 1.
  • 2: Or, to create a LoginContext from a Kerberos ticket cache and then use the Subject associated with it, set the KrbAuthType property to 2.