here is my hive table
create table if not exists dumdum (val map<string,map<string,struct<student_id:string,age:int>>>);
insert into dumdum select map('A',map('1',named_struct('student_id','123a', 'age',11)));
insert into dumdum select map('B',map('2',named_struct('student_id','987z', 'age',11)));
select * from dumdum;
and i see
{"A":{"1":{"student_id":"123a","age":11}}}
{"B":{"2":{"student_id":"987z","age":11}}}
I want to extract all the student_id from the inner map i.e. 123a and 987z. So here is what i want to do
select some_udf(val) from dumdum;
and the result should be
["123a","987z"]
Here is the Java UDF i wrote
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class CustomUDF extends GenericUDF {
private MapObjectInspector inputMapOI = null;
private Converter inputMapKeyConverter = null;
private MapObjectInspector inputMapValueMapOI = null;
private Converter inputMapValueConverter;
@Override
public String getDisplayString(String[] arguments) {
return "my udf";
}
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
if ((null == arguments) || (arguments.length != 1)) {
throw new UDFArgumentLengthException("1 arguments are expected.");
}
if (!(arguments[0] instanceof MapObjectInspector)) {
throw new UDFArgumentException("The first parameter should be a map object ");
}
inputMapOI = (MapObjectInspector) arguments[0];
ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
inputMapKeyConverter = ObjectInspectorConverters.getConverter(this.inputMapOI.getMapKeyObjectInspector(), mapKeyOI);
if (!(inputMapOI.getMapValueObjectInspector() instanceof MapObjectInspector)) {
throw new UDFArgumentException("The map value type must be a map ");
}
inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();
List<String> structFieldNames = new ArrayList<String>();
structFieldNames.add("student_id");
structFieldNames.add("age");
List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
ObjectInspector inputMapElementOI = inputMapValueMapOI.getMapValueObjectInspector();
ObjectInspector outputMapElementOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
inputMapValueConverter = ObjectInspectorConverters.getConverter(inputMapElementOI, outputMapElementOI);
return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
if ((null == arguments) || (arguments.length != 1)) {
throw new UDFArgumentLengthException("1 argument is expected.");
}
Map<?, ?> map = inputMapOI.getMap(arguments[0].get());
List<String> dataList = new ArrayList<String>();
for (Object key : map.keySet()) {
Map<?, ?> valueMap = this.inputMapValueMapOI.getMap(map.get(key));
if ((valueMap == null) || (valueMap.size() == 0)) {
continue;
}
for (Object value : valueMap.keySet()) {
try{
String innerkey = (String) this.inputMapKeyConverter.convert(value);
System.out.println("Got "+innerKey);
Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(key));
if ((innerMap == null) || (innerMap.size() == 0)) {
System.out.println("Got null");
continue;
}
for (Object struct : innerMap.keySet()) {
String strValue = (String) this.inputMapValueConverter.convert(struct);
StructField str = (StructField) inputMapValueConverter.convert(innerMap.get(strValue));
/*
Not sure what to do here. Maybe
str.getFieldID();
dataList.add(str.toString());
*/
}
}
catch (ClassCastException c){
System.out.println("Got ClassCastException");
}
}
}
return dataList;
}
}
when i invoke it as
add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF';
select modudf(val) from dumdum;
i never get past
Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(inner));
if ((innerMap == null) || (innerMap.size() == 0)) {
System.out.println("Got null");
continue;
}
I can see the output of
System.out.println("Got "+innerKey);
on the console.
Why cant my converter access the inner map ?
Also, how will i dereference the StructField once i am able to access the inner map ?
Update
Thanks serge_k for the suggestion. I'm afraid i still need one converter else i wont be able to get the key. here is what i tried
First i defined the second map inspector and the struct inspector as
inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();
List<String> structFieldNames = new ArrayList<String>();
structFieldNames.add("student_id");
structFieldNames.add("age");
List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
structOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);
then here is what i tried
String innerkey = (String) inputMapKeyConverter.convert(value);
System.out.println(innerKey);
Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(innerkey));
if ((innerMap == null) || (innerMap.size() == 0)) {
System.out.println("null inner map");
continue;
}
for (Object struct : innerMap.keySet()) {
String ikey = (String) inputMapKeyConverter.convert(struct);
Object obj = structOI.getStructFieldData(innerMap.get(ikey), structOI.getStructFieldRef("student_id"));
dataList.add(obj.toString());
}
but i still see
null inner map
have i not defined the inner map inspector properly ?
I would recommend you not to use converters, just define second
MapObjectInspectorfor the inner map, get outer map value and callgetMaplike for the first map. To get the struct values you need to define a variable ofStructObjectInspectortype ininitialize, e.g.then
Update: Try to convert map keys to standard keys as follows
See https://github.com/klout/brickhouse/blob/master/src/main/java/brickhouse/udf/collect/MapRemoveKeysUDF.java for more details