大数据Spark “蘑菇云”行动第93课:Hive中的内置函数、UDF、UDAF实战 select sum_all(age) from ... hive> use default; show tables; select * from employeeforhaving; 一:udf 编码 import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.io.Text; public final class LowerCase extends UDF { public Text evaluate(final Text s) { if (s == null) { return null; } return new Text(s.toString().toLowerCase()); } } 二:导出jar包 hadoopapps.jar 三,加载jar包 hive> add jar /root/..../hadoopapps.jar 四,hive> CREATE TEMPORARY FUNCTION tolower AS com.dt.spark.hive.HIVEUDF 五,使用 hive> SELECT tolower(name) from employeeforhaving; 一:UDAF 聚合 编码 import org.apache.hadoop.hive.ql.exec.UDAF; import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; public class concat extends UDAF { public static class ConcatUDAFEvaluator implements UDAFEvaluator{ public static class PartialResult{ String result; String delimiter; } private PartialResult partial; public void init() { partial = null; } public boolean iterate(String value,String deli){ if (value == null){ return true; } if (partial == null){ partial = new PartialResult(); partial.result = new String(""); if( deli == null || deli.equals("") ) { partial.delimiter = new String(","); } else { partial.delimiter = new String(deli); } } if ( partial.result.length() > 0 ) { partial.result = partial.result.concat(partial.delimiter); } partial.result = partial.result.concat(value); return true; } public PartialResult terminatePartial(){ return partial; } public boolean merge(PartialResult other){ if (other == null){ return true; } if (partial == null){ partial = new PartialResult(); partial.result = new String(other.result); partial.delimiter = new String(other.delimiter); } else { if ( partial.result.length() > 0 ) { partial.result = partial.result.concat(partial.delimiter); } partial.result = partial.result.concat(other.result); } return true; } public String terminate(){ return new String(partial.result); } } } 二:
UDAF 聚合