org.apache.pig.piggybank.storage.avro
Class AvroStorageUtils

java.lang.Object
  extended by org.apache.pig.piggybank.storage.avro.AvroStorageUtils

public class AvroStorageUtils
extends Object

This is utility class for this package


Field Summary
static org.apache.avro.Schema BooleanSchema
           
static org.apache.avro.Schema BytesSchema
           
static org.apache.avro.Schema DoubleSchema
           
static org.apache.avro.Schema FloatSchema
           
static org.apache.avro.Schema IntSchema
           
static org.apache.avro.Schema LongSchema
           
static org.apache.avro.Schema NullSchema
           
static org.apache.hadoop.fs.PathFilter PATH_FILTER
          ignore hdfs files with prefix "_" and "."
static org.apache.avro.Schema StringSchema
           
 
Constructor Summary
AvroStorageUtils()
           
 
Method Summary
static boolean addInputPaths(String pathString, org.apache.hadoop.mapreduce.Job job)
          get input paths to job config
static boolean containsGenericUnion(org.apache.avro.Schema s)
          determine whether the input schema contains generic unions
static boolean containsRecursiveRecord(org.apache.avro.Schema s)
          determine whether the input schema contains recursive records
protected static boolean containsRecursiveRecord(org.apache.avro.Schema s, Set<String> definedRecordNames)
          Called by containsRecursiveRecord(Schema) and it recursively checks whether the input schema contains recursive records.
static org.apache.avro.Schema.Field createUDField(int index, org.apache.avro.Schema s)
          create an avro field using the given schema
static org.apache.avro.Schema createUDPartialRecordSchema()
          create an avro field with null schema (it is a space holder)
static org.apache.avro.Schema getAcceptedType(org.apache.avro.Schema in)
          extract schema from a nullable union
static org.apache.hadoop.fs.Path getLast(org.apache.hadoop.fs.Path path, org.apache.hadoop.fs.FileSystem fs)
          get last file of a hdfs path if it is a directory; or return the file itself if path is a file
static org.apache.hadoop.fs.Path getLast(String path, org.apache.hadoop.fs.FileSystem fs)
          get last file of a hdfs path if it is a directory; or return the file itself if path is a file
static org.apache.avro.Schema.Field getUDField(org.apache.avro.Schema s, int index)
          get field schema given index number
static boolean isAcceptableUnion(org.apache.avro.Schema in)
          determine whether a union is a nullable union; note that this function doesn't check containing types of the input union recursively.
static boolean isTupleWrapper(ResourceSchema.ResourceFieldSchema pigSchema)
          check whether it is just a wrapped tuple
static boolean isUDPartialRecordSchema(org.apache.avro.Schema s)
          check whether a schema is a space holder (using field name)
static boolean noDir(org.apache.hadoop.fs.FileStatus[] ss)
          check whether there is NO directory in the input file (status) list
static ResourceSchema.ResourceFieldSchema wrapAsTuple(ResourceSchema.ResourceFieldSchema subFieldSchema)
          wrap a pig schema as tuple
static org.apache.avro.Schema wrapAsUnion(org.apache.avro.Schema schema, boolean nullable)
          Wrap an avro schema as a nullable union if needed.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

BooleanSchema

public static org.apache.avro.Schema BooleanSchema

LongSchema

public static org.apache.avro.Schema LongSchema

FloatSchema

public static org.apache.avro.Schema FloatSchema

DoubleSchema

public static org.apache.avro.Schema DoubleSchema

IntSchema

public static org.apache.avro.Schema IntSchema

StringSchema

public static org.apache.avro.Schema StringSchema

BytesSchema

public static org.apache.avro.Schema BytesSchema

NullSchema

public static org.apache.avro.Schema NullSchema

PATH_FILTER

public static org.apache.hadoop.fs.PathFilter PATH_FILTER
ignore hdfs files with prefix "_" and "."

Constructor Detail

AvroStorageUtils

public AvroStorageUtils()
Method Detail

createUDField

public static org.apache.avro.Schema.Field createUDField(int index,
                                                         org.apache.avro.Schema s)
create an avro field using the given schema


createUDPartialRecordSchema

public static org.apache.avro.Schema createUDPartialRecordSchema()
create an avro field with null schema (it is a space holder)


isUDPartialRecordSchema

public static boolean isUDPartialRecordSchema(org.apache.avro.Schema s)
check whether a schema is a space holder (using field name)


getUDField

public static org.apache.avro.Schema.Field getUDField(org.apache.avro.Schema s,
                                                      int index)
get field schema given index number


addInputPaths

public static boolean addInputPaths(String pathString,
                                    org.apache.hadoop.mapreduce.Job job)
                             throws IOException
get input paths to job config

Throws:
IOException

noDir

public static boolean noDir(org.apache.hadoop.fs.FileStatus[] ss)
check whether there is NO directory in the input file (status) list


getLast

public static org.apache.hadoop.fs.Path getLast(String path,
                                                org.apache.hadoop.fs.FileSystem fs)
                                         throws IOException
get last file of a hdfs path if it is a directory; or return the file itself if path is a file

Throws:
IOException

getLast

public static org.apache.hadoop.fs.Path getLast(org.apache.hadoop.fs.Path path,
                                                org.apache.hadoop.fs.FileSystem fs)
                                         throws IOException
get last file of a hdfs path if it is a directory; or return the file itself if path is a file

Throws:
IOException

wrapAsUnion

public static org.apache.avro.Schema wrapAsUnion(org.apache.avro.Schema schema,
                                                 boolean nullable)
Wrap an avro schema as a nullable union if needed. For instance, wrap schema "int" as ["null", "int"]


containsRecursiveRecord

public static boolean containsRecursiveRecord(org.apache.avro.Schema s)
determine whether the input schema contains recursive records


containsRecursiveRecord

protected static boolean containsRecursiveRecord(org.apache.avro.Schema s,
                                                 Set<String> definedRecordNames)
Called by containsRecursiveRecord(Schema) and it recursively checks whether the input schema contains recursive records.


containsGenericUnion

public static boolean containsGenericUnion(org.apache.avro.Schema s)
determine whether the input schema contains generic unions


isAcceptableUnion

public static boolean isAcceptableUnion(org.apache.avro.Schema in)
determine whether a union is a nullable union; note that this function doesn't check containing types of the input union recursively.


wrapAsTuple

public static ResourceSchema.ResourceFieldSchema wrapAsTuple(ResourceSchema.ResourceFieldSchema subFieldSchema)
                                                      throws IOException
wrap a pig schema as tuple

Throws:
IOException

isTupleWrapper

public static boolean isTupleWrapper(ResourceSchema.ResourceFieldSchema pigSchema)
check whether it is just a wrapped tuple


getAcceptedType

public static org.apache.avro.Schema getAcceptedType(org.apache.avro.Schema in)
extract schema from a nullable union



Copyright © ${year} The Apache Software Foundation