HBase – Operational Database on Hadoop – Part: Java API

Posted: January 25, 2023 in Hadoop
Tags:

An example for explaining how to work with HBase Java API – CRUD.

What is HBase? See: HBase – Operational Database on Hadoop – Part: Basics and Shell

How many column families? HBase currently does not do well with anything above two or three column families so keep the number of column families in your schema low. See: https://hbase.apache.org/book.html#number.of.cfs

Structure of HBase Key-Value object
Key: row_key | col_family | col_qualifier | timestamp
Value: cell_vallue

Prerequisites

  • OS: Linux (RHEL 7.9)
  • Hadoop: Cloudera (CDP 7.1.7 SP1)
  • Authentication via Kerberos
  • OpenJDK 64-Bit 1.8.0_292

HBase Java API – CRUD

HBaseClientConnect,java (path: /hbase-crud/src/main/java/eu/placko/examples/hbase/)

package eu.placko.examples.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.client.*;

public class HBaseClientConnect {
	public static void main(String[] args) throws IOException {
        new HBaseClientConnect().connect();
    }
	
	private void connect() throws IOException {
        Configuration config = HBaseConfiguration.create();

        try {
            HBaseAdmin.available(config);
            System.out.println("\n*** HBase is running. ***");
        } catch (MasterNotRunningException ex) {
            System.out.println("\n*** HBase is not running. ***" + ex.getMessage());
            return;
        }

        HBaseClientOperations HBaseClientOperations = new HBaseClientOperations();
        HBaseClientOperations.run(config);
    }
}

HBaseClientOperations.java (path: /hbase-crud/src/main/java/eu/placko/examples/hbase/)

package eu.placko.examples.hbase;

import java.io.IOException;
import java.util.Map;
import java.util.NavigableMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

public class HBaseClientOperations {
	private static final TableName tb = TableName.valueOf("shop");
    private static final byte[] cf = Bytes.toBytes("shop");
    private static final byte[] rk1 = Bytes.toBytes("1");
    private static final byte[] rk2 = Bytes.toBytes("2");
    private static final byte[] cq1 = Bytes.toBytes("category");
    private static final byte[] cq2 = Bytes.toBytes("product");
    private static final byte[] cq3 = Bytes.toBytes("size_eu");
    private static final byte[] cq4 = Bytes.toBytes("color");
    private static final byte[] cq5 = Bytes.toBytes("sex");
    private static final byte[] cq6 = Bytes.toBytes("price_eu");
	
	public void run(final Configuration config) throws IOException {
        try (final Connection connection = ConnectionFactory.createConnection(config)) {
            final Admin admin = connection.getAdmin();
            deleteTable(admin);
            createTable(admin);
            
            final Table table = connection.getTable(tb);
            put(table);
            get(table);
            update(table);
            delete(admin);
            
            connection.close();
        }
    }
	
	public static void deleteTable(final Admin admin) throws IOException {
        if (admin.tableExists(tb)) {
            admin.disableTable(tb);
            admin.deleteTable(tb);
        }
    }
	
	public static void createTable(final Admin admin) throws IOException {
        if(!admin.tableExists(tb)) {
            TableDescriptor desc = TableDescriptorBuilder.newBuilder(tb)
                    .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf))
                    .build();
            admin.createTable(desc);
        }
    }
	
	public static void put(final Table table) throws IOException {
		System.out.println("\n*** Create/Insert - BEGIN ***");
		
		table.put(new Put(rk1).addColumn(cf, cq1, Bytes.toBytes("shoes")));
		table.put(new Put(rk1).addColumn(cf, cq2, Bytes.toBytes("productA")));
		table.put(new Put(rk1).addColumn(cf, cq3, Bytes.toBytes("42")));
		table.put(new Put(rk1).addColumn(cf, cq4, Bytes.toBytes("black")));
		table.put(new Put(rk1).addColumn(cf, cq5, Bytes.toBytes("m")));
		table.put(new Put(rk1).addColumn(cf, cq6, Bytes.toBytes("44.50")));
		
		table.put(new Put(rk2).addColumn(cf, cq1, Bytes.toBytes("shoes")));
		table.put(new Put(rk2).addColumn(cf, cq2, Bytes.toBytes("productA")));
		table.put(new Put(rk2).addColumn(cf, cq3, Bytes.toBytes("42")));
		table.put(new Put(rk2).addColumn(cf, cq4, Bytes.toBytes("white")));
		table.put(new Put(rk2).addColumn(cf, cq5, Bytes.toBytes("m")));
		table.put(new Put(rk2).addColumn(cf, cq6, Bytes.toBytes("40.50")));
		
		System.out.println("OK");
		
		System.out.println("*** Create/Insert - END ***");
    }
	
	public static void get(final Table table) throws IOException {
        System.out.println("\n*** Read/Select - BEGIN ***");

        //System.out.println(table.get(new Get(Bytes.toBytes("1"))));
        //System.out.println(table.get(new Get(Bytes.toBytes("2"))));
        
        for (int i = 1; i < 3; i++) {
        	Get get = new Get(Bytes.toBytes(Integer.toString(i)));
        	Result result = table.get(get);
        	String row = Bytes.toString(result.getRow());
        	//String specificValue = Bytes.toString(result.getValue(Bytes.toBytes(Bytes.toString(cf)), 
Bytes.toBytes(Bytes.toString(cq1))));
        	//System.out.println("latest cell value in shoes:category for row 1 is: " + specificValue);
        
        	// Traverse entire returned rows: 1 and 2
        	System.out.println(row);
        	NavigableMap<byte[], NavigableMap<byte[],NavigableMap<Long,byte[]>>> map = result.getMap();
        	for (Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> navigableMapEntry : map.entrySet()) {
        		String family = Bytes.toString(navigableMapEntry.getKey());
        		System.out.println("\t" + family);
        		NavigableMap<byte[], NavigableMap<Long, byte[]>> familyContents = navigableMapEntry.getValue();
        		for (Map.Entry<byte[], NavigableMap<Long, byte[]>> mapEntry : familyContents.entrySet()) {
        			String qualifier = Bytes.toString(mapEntry.getKey());
        			System.out.println("\t\t" + qualifier);
        			NavigableMap<Long, byte[]> qualifierContents = mapEntry.getValue();
        			for (Map.Entry<Long, byte[]> entry : qualifierContents.entrySet()) {
        				Long timestamp = entry.getKey();
        				String value = Bytes.toString(entry.getValue());
        				System.out.printf("\t\t\t%s, %d\n", value, timestamp);
        			}
        		}
        	}
        }
        
        System.out.println("*** Read/Select - End ***");
    }
	
	public static void update(final Table table) throws IOException {
        System.out.println("\n*** Update - BEGIN ***");

        table.put(new Put(rk1).addColumn(cf, cq1, Bytes.toBytes("shoes")));
		table.put(new Put(rk1).addColumn(cf, cq2, Bytes.toBytes("productA")));
		table.put(new Put(rk1).addColumn(cf, cq3, Bytes.toBytes("42")));
		table.put(new Put(rk1).addColumn(cf, cq4, Bytes.toBytes("black")));
		table.put(new Put(rk1).addColumn(cf, cq5, Bytes.toBytes("m")));
		table.put(new Put(rk1).addColumn(cf, cq6, Bytes.toBytes("42.50")));
		
		System.out.println("OK");
		get(table);
		
        System.out.println("*** Update - End ***");
    }
	
	public static void delete(final Admin admin) throws IOException {
        System.out.println("\n*** Delete - BEGIN ***");

        deleteTable(admin);
        System.out.println("OK");
        
        System.out.println("*** Delete - End ***");
    }
}

pom.xml (path: /hbase-crud/)

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>eu.placko.examples.hbase</groupId>
  <artifactId>hbase-crud</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <name>hbase-crud</name>
  <description>An example for explaining how to work with HBase Java API – CRUD</description>
  <packaging>jar</packaging>
  <properties>
		<revision>Local-SNAPSHOT</revision>
		<maven.compiler.source>1.7</maven.compiler.source>
		<maven.compiler.target>1.7</maven.compiler.target>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<jar.main.class>eu.placko.examples.hbase.HBaseClientConnect</jar.main.class>
	</properties>
	
	<!-- HBase -->
	<dependencies>
		<dependency>
    		<groupId>org.apache.hbase</groupId>
    		<artifactId>hbase-client</artifactId>
    		<version>2.5.2</version>
		</dependency>
	</dependencies>
	
  <build>
	<plugins>
		<plugin>
    		<artifactId>maven-assembly-plugin</artifactId>
    			<configuration>
        			<archive>
            			<manifest>
                			<mainClass>eu.placko.examples.hbase.HBaseClientConnect</mainClass>
            			</manifest>
        			</archive>
        			<descriptorRefs>
            			<descriptorRef>jar-with-dependencies</descriptorRef>
        			</descriptorRefs>
    			</configuration>
		</plugin>
	</plugins>
	<pluginManagement />
  </build>
</project>

README.md (path: /hbase-crud/)

HOW TO CONFIGURE THE PROJECT
 
path: /hbase-crud/src/main/resources/
add core-site.xml from /etc/hbase/conf.cloudera.hbase/
add hbase-site.xml from /etc/hbase/conf.cloudera.hbase/
 
Building and Running
  
Build
To build the application it is required to have this installed:
Java 9
Maven 3.x
Then just run this:
mvn clean install assembly:single
  
Run
$ su <user>
$ cd /home/<user>
$ chmod 770 ./hbase/hbase-crud-0.0.1-SNAPSHOT-jar-with-dependencies.jar
$ chown <user>:<user> ./hbase/hbase-crud-0.0.1-SNAPSHOT-jar-with-dependencies.jar
$ kinit -kt /etc/security/keytabs/<user>.keytab <user>
$ java -jar ./hbase-crud-0.0.1-SNAPSHOT-jar-with-dependencies.jar

Result

*** HBase is running. ***

*** Create/Insert - BEGIN ***
OK
*** Create/Insert - END ***

*** Read/Select - BEGIN ***
1
        shop
                category
                        shoes, 1674459300795
                color
                        black, 1674459300856
                price_eu
                        44.50, 1674459300872
                product
                        productA, 1674459300836
                sex
                        m, 1674459300864
                size_eu
                        42, 1674459300845
2
        shop
                category
                        shoes, 1674459300879
                color
                        white, 1674459300916
                price_eu
                        40.50, 1674459300931
                product
                        productA, 1674459300892
                sex
                        m, 1674459300924
                size_eu
                        42, 1674459300904
*** Read/Select - End ***

*** Update - BEGIN ***
OK

*** Read/Select - BEGIN ***
1
        shop
                category
                        shoes, 1674459300968
                color
                        black, 1674459301013
                price_eu
                        42.50, 1674459301029
                product
                        productA, 1674459300997
                sex
                        m, 1674459301022
                size_eu
                        42, 1674459301006
2
        shop
                category
                        shoes, 1674459300879
                color
                        white, 1674459300916
                price_eu
                        40.50, 1674459300931
                product
                        productA, 1674459300892
                sex
                        m, 1674459300924
                size_eu
                        42, 1674459300904
*** Read/Select - End ***
*** Update - End ***

*** Delete - BEGIN ***
OK
*** Delete - End ***
1st step: Create/Insert
 ________________________________
/               /t1 (version 1) /
|_______________|_______________|
|row_key        |cf:cq6         |
|_______________|_______________|
|1              |44.50          |
|_______________|_______________|
|2              |40.50          |
|_______________|_______________|

2nd step: Update
   ________________________________
  /               /t1 (version 1) /
 /_______________/_______________/
/               /t2 (version 2) /
|_______________|_______________|
|row_key        |cf:cq6         |
|_______________|_______________|
|1              |42.50          |
|_______________|_______________|
|2              |40.50          |
|_______________|_______________|

Source Code

https://github.com/mplacko/hbase-crud

Additional Info

Advertisement

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s