public class AvroStorageUtils extends Object
Modifier and Type | Field and Description |
---|---|
static org.apache.avro.Schema |
BooleanSchema |
static org.apache.avro.Schema |
BytesSchema |
static org.apache.avro.Schema |
DoubleSchema |
static org.apache.avro.Schema |
FloatSchema |
static org.apache.avro.Schema |
IntSchema |
static org.apache.avro.Schema |
LongSchema |
static org.apache.avro.Schema |
NullSchema |
static org.apache.hadoop.fs.PathFilter |
PATH_FILTER
ignore hdfs files with prefix "_" and "."
|
static org.apache.avro.Schema |
StringSchema |
Constructor and Description |
---|
AvroStorageUtils() |
Modifier and Type | Method and Description |
---|---|
static boolean |
containsGenericUnion(org.apache.avro.Schema s)
determine whether the input schema contains generic unions
|
protected static boolean |
containsGenericUnion(org.apache.avro.Schema s,
Set<org.apache.avro.Schema> visitedRecords)
Called by
containsGenericUnion(Schema) and it recursively checks
whether the input schema contains generic unions. |
static boolean |
containsRecursiveRecord(org.apache.avro.Schema s)
determine whether the input schema contains recursive records
|
protected static boolean |
containsRecursiveRecord(org.apache.avro.Schema s,
Set<String> definedRecordNames)
Called by
containsRecursiveRecord(Schema) and it recursively checks
whether the input schema contains recursive records. |
static org.apache.avro.Schema.Field |
createUDField(int index,
org.apache.avro.Schema s)
create an avro field using the given schema
|
static org.apache.avro.Schema |
createUDPartialRecordSchema()
create an avro field with null schema (it is a space holder)
|
static org.apache.avro.Schema |
getAcceptedType(org.apache.avro.Schema in)
extract schema from a nullable union
|
static Set<org.apache.hadoop.fs.Path> |
getAllFilesRecursively(Set<org.apache.hadoop.fs.Path> basePaths,
org.apache.hadoop.conf.Configuration conf)
Returns all non-hidden files recursively inside the base paths given
|
static org.apache.hadoop.fs.Path |
getLast(org.apache.hadoop.fs.Path path,
org.apache.hadoop.fs.FileSystem fs)
get last file of a hdfs path if it is a directory;
or return the file itself if path is a file
|
static Set<org.apache.hadoop.fs.Path> |
getPaths(String pathString,
org.apache.hadoop.conf.Configuration conf,
boolean failIfNotFound)
Gets the list of paths from the pathString specified which may contain
comma-separated paths and glob style path
|
static org.apache.avro.Schema |
getSchema(org.apache.hadoop.fs.Path path,
org.apache.hadoop.fs.FileSystem fs)
This method is called by
#getAvroSchema . |
static Map<org.apache.hadoop.fs.Path,Map<Integer,Integer>> |
getSchemaToMergedSchemaMap(org.apache.avro.Schema mergedSchema,
Map<org.apache.hadoop.fs.Path,org.apache.avro.Schema> mergedFiles)
When merging multiple avro record schemas, we build a map (schemaToMergedSchemaMap)
to associate each input record with a remapping of its fields relative to the merged
schema.
|
static org.apache.avro.Schema.Field |
getUDField(org.apache.avro.Schema s,
int index)
get field schema given index number
|
static boolean |
isAcceptableUnion(org.apache.avro.Schema in)
determine whether a union is a nullable union;
note that this function doesn't check containing
types of the input union recursively.
|
static boolean |
isTupleWrapper(ResourceSchema.ResourceFieldSchema pigSchema)
check whether it is just a wrapped tuple
|
static boolean |
isUDPartialRecordSchema(org.apache.avro.Schema s)
check whether a schema is a space holder (using field name)
|
static org.apache.avro.Schema |
mergeSchema(org.apache.avro.Schema x,
org.apache.avro.Schema y)
This method merges two avro schemas into one.
|
static boolean |
noDir(org.apache.hadoop.fs.FileStatus[] ss)
check whether there is NO directory in the input file (status) list
|
static ResourceSchema.ResourceFieldSchema |
wrapAsTuple(ResourceSchema.ResourceFieldSchema subFieldSchema)
wrap a pig schema as tuple
|
static org.apache.avro.Schema |
wrapAsUnion(org.apache.avro.Schema schema,
boolean nullable)
Wrap an avro schema as a nullable union if needed.
|
public static org.apache.avro.Schema BooleanSchema
public static org.apache.avro.Schema LongSchema
public static org.apache.avro.Schema FloatSchema
public static org.apache.avro.Schema DoubleSchema
public static org.apache.avro.Schema IntSchema
public static org.apache.avro.Schema StringSchema
public static org.apache.avro.Schema BytesSchema
public static org.apache.avro.Schema NullSchema
public static org.apache.hadoop.fs.PathFilter PATH_FILTER
public static org.apache.avro.Schema.Field createUDField(int index, org.apache.avro.Schema s)
public static org.apache.avro.Schema createUDPartialRecordSchema()
public static boolean isUDPartialRecordSchema(org.apache.avro.Schema s)
public static org.apache.avro.Schema.Field getUDField(org.apache.avro.Schema s, int index)
public static Set<org.apache.hadoop.fs.Path> getPaths(String pathString, org.apache.hadoop.conf.Configuration conf, boolean failIfNotFound) throws IOException
IOException
public static Set<org.apache.hadoop.fs.Path> getAllFilesRecursively(Set<org.apache.hadoop.fs.Path> basePaths, org.apache.hadoop.conf.Configuration conf) throws IOException
IOException
public static boolean noDir(org.apache.hadoop.fs.FileStatus[] ss)
public static org.apache.hadoop.fs.Path getLast(org.apache.hadoop.fs.Path path, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOException
public static org.apache.avro.Schema mergeSchema(org.apache.avro.Schema x, org.apache.avro.Schema y) throws IOException
x
- first avro schema to mergey
- second avro schema to mergeIOException
public static Map<org.apache.hadoop.fs.Path,Map<Integer,Integer>> getSchemaToMergedSchemaMap(org.apache.avro.Schema mergedSchema, Map<org.apache.hadoop.fs.Path,org.apache.avro.Schema> mergedFiles) throws IOException
mergedSchema
- new schema generated from multiple input schemasmergedFiles
- input avro files that are mergedIOException
public static org.apache.avro.Schema wrapAsUnion(org.apache.avro.Schema schema, boolean nullable)
public static boolean containsRecursiveRecord(org.apache.avro.Schema s)
protected static boolean containsRecursiveRecord(org.apache.avro.Schema s, Set<String> definedRecordNames)
containsRecursiveRecord(Schema)
and it recursively checks
whether the input schema contains recursive records.public static boolean containsGenericUnion(org.apache.avro.Schema s)
protected static boolean containsGenericUnion(org.apache.avro.Schema s, Set<org.apache.avro.Schema> visitedRecords)
containsGenericUnion(Schema)
and it recursively checks
whether the input schema contains generic unions.public static boolean isAcceptableUnion(org.apache.avro.Schema in)
public static ResourceSchema.ResourceFieldSchema wrapAsTuple(ResourceSchema.ResourceFieldSchema subFieldSchema) throws IOException
IOException
public static boolean isTupleWrapper(ResourceSchema.ResourceFieldSchema pigSchema)
public static org.apache.avro.Schema getAcceptedType(org.apache.avro.Schema in)
public static org.apache.avro.Schema getSchema(org.apache.hadoop.fs.Path path, org.apache.hadoop.fs.FileSystem fs) throws IOException
#getAvroSchema
. The default implementation
returns the schema of an avro file; or the schema of the last file in a first-level
directory (it does not contain sub-directories).path
- path of a file or first level directoryfs
- file systemIOException
Copyright © 2007-2012 The Apache Software Foundation