Archive for January, 2023

An example for explaining how to work with HBase Java API – CRUD.

What is HBase? See: HBase – Operational Database on Hadoop – Part: Basics and Shell

How many column families? HBase currently does not do well with anything above two or three column families so keep the number of column families in your schema low. See: https://hbase.apache.org/book.html#number.of.cfs

Structure of HBase Key-Value object
Key: row_key | col_family | col_qualifier | timestamp
Value: cell_vallue

Prerequisites

  • OS: Linux (RHEL 7.9)
  • Hadoop: Cloudera (CDP 7.1.7 SP1)
  • Authentication via Kerberos
  • OpenJDK 64-Bit 1.8.0_292

HBase Java API – CRUD

HBaseClientConnect,java (path: /hbase-crud/src/main/java/eu/placko/examples/hbase/)

package eu.placko.examples.hbase;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.client.*;

public class HBaseClientConnect {
	public static void main(String[] args) throws IOException {
        new HBaseClientConnect().connect();
    }
	
	private void connect() throws IOException {
        Configuration config = HBaseConfiguration.create();

        try {
            HBaseAdmin.available(config);
            System.out.println("\n*** HBase is running. ***");
        } catch (MasterNotRunningException ex) {
            System.out.println("\n*** HBase is not running. ***" + ex.getMessage());
            return;
        }

        HBaseClientOperations HBaseClientOperations = new HBaseClientOperations();
        HBaseClientOperations.run(config);
    }
}

HBaseClientOperations.java (path: /hbase-crud/src/main/java/eu/placko/examples/hbase/)

package eu.placko.examples.hbase;

import java.io.IOException;
import java.util.Map;
import java.util.NavigableMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

public class HBaseClientOperations {
	private static final TableName tb = TableName.valueOf("shop");
    private static final byte[] cf = Bytes.toBytes("shop");
    private static final byte[] rk1 = Bytes.toBytes("1");
    private static final byte[] rk2 = Bytes.toBytes("2");
    private static final byte[] cq1 = Bytes.toBytes("category");
    private static final byte[] cq2 = Bytes.toBytes("product");
    private static final byte[] cq3 = Bytes.toBytes("size_eu");
    private static final byte[] cq4 = Bytes.toBytes("color");
    private static final byte[] cq5 = Bytes.toBytes("sex");
    private static final byte[] cq6 = Bytes.toBytes("price_eu");
	
	public void run(final Configuration config) throws IOException {
        try (final Connection connection = ConnectionFactory.createConnection(config)) {
            final Admin admin = connection.getAdmin();
            deleteTable(admin);
            createTable(admin);
            
            final Table table = connection.getTable(tb);
            put(table);
            get(table);
            update(table);
            delete(admin);
            
            connection.close();
        }
    }
	
	public static void deleteTable(final Admin admin) throws IOException {
        if (admin.tableExists(tb)) {
            admin.disableTable(tb);
            admin.deleteTable(tb);
        }
    }
	
	public static void createTable(final Admin admin) throws IOException {
        if(!admin.tableExists(tb)) {
            TableDescriptor desc = TableDescriptorBuilder.newBuilder(tb)
                    .setColumnFamily(ColumnFamilyDescriptorBuilder.of(cf))
                    .build();
            admin.createTable(desc);
        }
    }
	
	public static void put(final Table table) throws IOException {
		System.out.println("\n*** Create/Insert - BEGIN ***");
		
		table.put(new Put(rk1).addColumn(cf, cq1, Bytes.toBytes("shoes")));
		table.put(new Put(rk1).addColumn(cf, cq2, Bytes.toBytes("productA")));
		table.put(new Put(rk1).addColumn(cf, cq3, Bytes.toBytes("42")));
		table.put(new Put(rk1).addColumn(cf, cq4, Bytes.toBytes("black")));
		table.put(new Put(rk1).addColumn(cf, cq5, Bytes.toBytes("m")));
		table.put(new Put(rk1).addColumn(cf, cq6, Bytes.toBytes("44.50")));
		
		table.put(new Put(rk2).addColumn(cf, cq1, Bytes.toBytes("shoes")));
		table.put(new Put(rk2).addColumn(cf, cq2, Bytes.toBytes("productA")));
		table.put(new Put(rk2).addColumn(cf, cq3, Bytes.toBytes("42")));
		table.put(new Put(rk2).addColumn(cf, cq4, Bytes.toBytes("white")));
		table.put(new Put(rk2).addColumn(cf, cq5, Bytes.toBytes("m")));
		table.put(new Put(rk2).addColumn(cf, cq6, Bytes.toBytes("40.50")));
		
		System.out.println("OK");
		
		System.out.println("*** Create/Insert - END ***");
    }
	
	public static void get(final Table table) throws IOException {
        System.out.println("\n*** Read/Select - BEGIN ***");

        //System.out.println(table.get(new Get(Bytes.toBytes("1"))));
        //System.out.println(table.get(new Get(Bytes.toBytes("2"))));
        
        for (int i = 1; i < 3; i++) {
        	Get get = new Get(Bytes.toBytes(Integer.toString(i)));
        	Result result = table.get(get);
        	String row = Bytes.toString(result.getRow());
        	//String specificValue = Bytes.toString(result.getValue(Bytes.toBytes(Bytes.toString(cf)), 
Bytes.toBytes(Bytes.toString(cq1))));
        	//System.out.println("latest cell value in shoes:category for row 1 is: " + specificValue);
        
        	// Traverse entire returned rows: 1 and 2
        	System.out.println(row);
        	NavigableMap<byte[], NavigableMap<byte[],NavigableMap<Long,byte[]>>> map = result.getMap();
        	for (Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> navigableMapEntry : map.entrySet()) {
        		String family = Bytes.toString(navigableMapEntry.getKey());
        		System.out.println("\t" + family);
        		NavigableMap<byte[], NavigableMap<Long, byte[]>> familyContents = navigableMapEntry.getValue();
        		for (Map.Entry<byte[], NavigableMap<Long, byte[]>> mapEntry : familyContents.entrySet()) {
        			String qualifier = Bytes.toString(mapEntry.getKey());
        			System.out.println("\t\t" + qualifier);
        			NavigableMap<Long, byte[]> qualifierContents = mapEntry.getValue();
        			for (Map.Entry<Long, byte[]> entry : qualifierContents.entrySet()) {
        				Long timestamp = entry.getKey();
        				String value = Bytes.toString(entry.getValue());
        				System.out.printf("\t\t\t%s, %d\n", value, timestamp);
        			}
        		}
        	}
        }
        
        System.out.println("*** Read/Select - End ***");
    }
	
	public static void update(final Table table) throws IOException {
        System.out.println("\n*** Update - BEGIN ***");

        table.put(new Put(rk1).addColumn(cf, cq1, Bytes.toBytes("shoes")));
		table.put(new Put(rk1).addColumn(cf, cq2, Bytes.toBytes("productA")));
		table.put(new Put(rk1).addColumn(cf, cq3, Bytes.toBytes("42")));
		table.put(new Put(rk1).addColumn(cf, cq4, Bytes.toBytes("black")));
		table.put(new Put(rk1).addColumn(cf, cq5, Bytes.toBytes("m")));
		table.put(new Put(rk1).addColumn(cf, cq6, Bytes.toBytes("42.50")));
		
		System.out.println("OK");
		get(table);
		
        System.out.println("*** Update - End ***");
    }
	
	public static void delete(final Admin admin) throws IOException {
        System.out.println("\n*** Delete - BEGIN ***");

        deleteTable(admin);
        System.out.println("OK");
        
        System.out.println("*** Delete - End ***");
    }
}

pom.xml (path: /hbase-crud/)

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>
  <groupId>eu.placko.examples.hbase</groupId>
  <artifactId>hbase-crud</artifactId>
  <version>0.0.1-SNAPSHOT</version>
  <name>hbase-crud</name>
  <description>An example for explaining how to work with HBase Java API – CRUD</description>
  <packaging>jar</packaging>
  <properties>
		<revision>Local-SNAPSHOT</revision>
		<maven.compiler.source>1.7</maven.compiler.source>
		<maven.compiler.target>1.7</maven.compiler.target>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
		<jar.main.class>eu.placko.examples.hbase.HBaseClientConnect</jar.main.class>
	</properties>
	
	<!-- HBase -->
	<dependencies>
		<dependency>
    		<groupId>org.apache.hbase</groupId>
    		<artifactId>hbase-client</artifactId>
    		<version>2.5.2</version>
		</dependency>
	</dependencies>
	
  <build>
	<plugins>
		<plugin>
    		<artifactId>maven-assembly-plugin</artifactId>
    			<configuration>
        			<archive>
            			<manifest>
                			<mainClass>eu.placko.examples.hbase.HBaseClientConnect</mainClass>
            			</manifest>
        			</archive>
        			<descriptorRefs>
            			<descriptorRef>jar-with-dependencies</descriptorRef>
        			</descriptorRefs>
    			</configuration>
		</plugin>
	</plugins>
	<pluginManagement />
  </build>
</project>

README.md (path: /hbase-crud/)

HOW TO CONFIGURE THE PROJECT
 
path: /hbase-crud/src/main/resources/
add core-site.xml from /etc/hbase/conf.cloudera.hbase/
add hbase-site.xml from /etc/hbase/conf.cloudera.hbase/
 
Building and Running
  
Build
To build the application it is required to have this installed:
Java 9
Maven 3.x
Then just run this:
mvn clean install assembly:single
  
Run
$ su <user>
$ cd /home/<user>
$ chmod 770 ./hbase/hbase-crud-0.0.1-SNAPSHOT-jar-with-dependencies.jar
$ chown <user>:<user> ./hbase/hbase-crud-0.0.1-SNAPSHOT-jar-with-dependencies.jar
$ kinit -kt /etc/security/keytabs/<user>.keytab <user>
$ java -jar ./hbase-crud-0.0.1-SNAPSHOT-jar-with-dependencies.jar

Result

*** HBase is running. ***

*** Create/Insert - BEGIN ***
OK
*** Create/Insert - END ***

*** Read/Select - BEGIN ***
1
        shop
                category
                        shoes, 1674459300795
                color
                        black, 1674459300856
                price_eu
                        44.50, 1674459300872
                product
                        productA, 1674459300836
                sex
                        m, 1674459300864
                size_eu
                        42, 1674459300845
2
        shop
                category
                        shoes, 1674459300879
                color
                        white, 1674459300916
                price_eu
                        40.50, 1674459300931
                product
                        productA, 1674459300892
                sex
                        m, 1674459300924
                size_eu
                        42, 1674459300904
*** Read/Select - End ***

*** Update - BEGIN ***
OK

*** Read/Select - BEGIN ***
1
        shop
                category
                        shoes, 1674459300968
                color
                        black, 1674459301013
                price_eu
                        42.50, 1674459301029
                product
                        productA, 1674459300997
                sex
                        m, 1674459301022
                size_eu
                        42, 1674459301006
2
        shop
                category
                        shoes, 1674459300879
                color
                        white, 1674459300916
                price_eu
                        40.50, 1674459300931
                product
                        productA, 1674459300892
                sex
                        m, 1674459300924
                size_eu
                        42, 1674459300904
*** Read/Select - End ***
*** Update - End ***

*** Delete - BEGIN ***
OK
*** Delete - End ***
1st step: Create/Insert
 ________________________________
/               /t1 (version 1) /
|_______________|_______________|
|row_key        |cf:cq6         |
|_______________|_______________|
|1              |44.50          |
|_______________|_______________|
|2              |40.50          |
|_______________|_______________|

2nd step: Update
   ________________________________
  /               /t1 (version 1) /
 /_______________/_______________/
/               /t2 (version 2) /
|_______________|_______________|
|row_key        |cf:cq6         |
|_______________|_______________|
|1              |42.50          |
|_______________|_______________|
|2              |40.50          |
|_______________|_______________|

Source Code

https://github.com/mplacko/hbase-crud

Additional Info

Advertisement

An example for explaining how to work with HBase Shell – CRUD.

HBase is an open source and sorted map data suitable for sparse datasets (i.e. a lot of columns have null values) built on Hadoop. It is column oriented, NoSQL and horizontally scalable. It is based on Google’s Big Table. It has set of tables which keep data in key value format. Fits when you need random, real-time read/write access. HBase offers an alternative to Hive which is based on HDFS and has a write-once, read-many approach.

HBaseRDBMS
HBase is schema-less, it doesn’t have the concept of fixed columns schema; defines only column families.An RDBMS is governed by its schema, which describes the whole structure of tables.
It is built for wide tables. HBase is horizontally scalable.It is thin and built for small tables. Hard to scale.
No transactions exist in HBase.RDBMS is transactional.
It has de-normalized data tables.It will have normalized data.
It is good for semi-structured as well as structured data.It is good for structured data.
source: https://jcsites.juniata.edu/faculty/rhodes/smui/hbase.htm

Concept: the map is indexed by a row key, column key and a timestamp (following by an example based on geonames-all-cities-with-a-population-1000.csv):

ROW COLUMN+CELL
1000006 column=admin1_code:, timestamp=1672844046876, value=DC24
1000006 column=admin2_code:, timestamp=1672844046876, value=KZN245
1000006 column=admin3_code:, timestamp=1672844046876, value=
1000006 column=admin4_code:, timestamp=1672844046876, value=13912
1000006 column=alternate_names:, timestamp=1672844046876, value=P
1000006 column=ascii_name:, timestamp=1672844046876, value=Greytown
1000006 column=cou_name_en:, timestamp=1672844046876, value=
1000006 column=country_code:, timestamp=1672844046876, value=South Africa
1000006 column=country_code_2:, timestamp=1672844046876, value=02
1000006 column=dem:, timestamp=1672844046876, value=Africa/Johannesburg
1000006 column=elevation:, timestamp=1672844046876, value=1050
1000006 column=feature_class:, timestamp=1672844046876, value=PPLA3
1000006 column=feature_code:, timestamp=1672844046876, value=ZA
1000006 column=geoname_id:, timestamp=1672844046876, value=Greytown
1000006 column=label_en:, timestamp=1672844046876, value=-29.06415,30.59279
1000006 column=modification_date:, timestamp=1672844046876, value=South Africa
1000006 column=name:, timestamp=1672844046876, value=Greytown
1000006 column=population:, timestamp=1672844046876, value=
1000006 column=timezone:, timestamp=1672844046876, value=2012-07-12

Prerequisites

  • OS: Linux (RHEL 7.9)
  • Hadoop: Cloudera (CDP 7.1.7 SP1)
  • Authentication via Kerberos
  • OpenJDK 64-Bit 1.8.0_292

HBase Shell – CRUD

--- Ranger Policy
HBase > read / write / create / execute > <user>
su <user>

kinit -kt /etc/security/keytabs/<user>.keytab <user>

hdfs dfs -mkdir /user/<user>/hbase
hdfs dfs -put /home/<user>/hbase/geonames-all-cities-with-a-population-1000.csv /user/<user>/hbase
hbase shell
create 'cities1000', 'geoname_id', 'name', 'ascii_name', 'alternate_names', 'feature_class', 'feature_code', 'country_code', 'cou_name_en', 'country_code_2', 'admin1_code', 'admin2_code', 'admin3_code', 'admin4_code', 'population', 'elevation', 'dem', 'timezone', 'modification_date', 'label_en', 'coordinates'
exit

hbase org.apache.hadoop.hbase.mapreduce.ImportTsv -Dimporttsv.separator=';' -Dimporttsv.columns=HBASE_ROW_KEY,geoname_id,name,ascii_name,alternate_names,feature_class,feature_code,country_code,cou_name_en,country_code_2,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modification_date,label_en,coordinates cities1000 /user/vsl3509/hbase/geonames-all-cities-with-a-population-1000.csv

hbase shell
list
describe 'cities1000'
scan 'cities1000', {'LIMIT' => 10}
exit

--- OPTIONAL
hbase shell
disable 'cities1000'
drop 'cities1000'
exit
hbase shell

--- operation: CREATE (Insert)
hbase:001:0> put 'cities1000', '9999999', 'ascii_name', 'test'

--- operation: READ (Select)
hbase:002:0> get 'cities1000', '9999999', {COLUMN => 'ascii_name'}
COLUMN                         CELL
 ascii_name:                   timestamp=1672913385003, value=test

--- operation: UPDATE
hbase:003:0> put 'cities1000', '9999999', 'ascii_name', 'test_new'
hbase:004:0> get 'cities1000', '9999999', {COLUMN => 'ascii_name'}
COLUMN                         CELL
 ascii_name:                   timestamp=1672913636442, value=test_new

--- operation: DELETE
hbase:005:0> deleteall 'cities1000', '9999999'
hbase:006:0> get 'cities1000', '9999999'
COLUMN                         CELL
0 row(s)

exit

--- SOURCE: https://www.guru99.com/hbase-shell-general-commands.html

Additional Info