org.apache.pig.impl.io
Class FileLocalizer

java.lang.Object
  extended by org.apache.pig.impl.io.FileLocalizer

public class FileLocalizer
extends Object


Nested Class Summary
static class FileLocalizer.DataStorageInputStreamIterator
           
static class FileLocalizer.FetchFileRet
           
 
Field Summary
static String LOCAL_PREFIX
           
static org.apache.hadoop.fs.permission.FsPermission OWNER_ONLY_PERMS
           
static int STYLE_UNIX
           
static int STYLE_WINDOWS
           
 
Constructor Summary
FileLocalizer()
           
 
Method Summary
static OutputStream create(String fileSpec, boolean append, PigContext pigContext)
           
static OutputStream create(String fileSpec, PigContext pigContext)
           
static boolean delete(String fileSpec, PigContext pigContext)
           
static void deleteTempFiles()
           
static FileLocalizer.FetchFileRet fetchFile(Properties properties, String filePath)
          Ensures that the passed path is on the local file system, fetching it to the java.io.tmpdir if necessary.
static FileLocalizer.FetchFileRet[] fetchFiles(Properties properties, String filePath)
          Ensures that the passed files pointed to by path are on the local file system, fetching them to the java.io.tmpdir if necessary.
static FileLocalizer.FetchFileRet fetchResource(String name)
          Ensures that the passed resource is available from the local file system, fetching it to a temporary directory.
static boolean fileExists(String filename, DataStorage store)
          Deprecated. Use fileExists(String, PigContext) instead
static boolean fileExists(String filename, PigContext context)
           
static String fullPath(String fileName, DataStorage storage)
          Deprecated. Use fullPath(String, PigContext) instead
static String fullPath(String filename, PigContext pigContext)
           
static long getSize(String fileName)
           
static long getSize(String fileName, Properties properties)
           
static org.apache.hadoop.fs.Path getTemporaryPath(PigContext pigContext)
           
static org.apache.hadoop.fs.Path getTemporaryPath(PigContext pigContext, String suffix)
           
static String hadoopify(String filename, PigContext pigContext)
           
static boolean isDirectory(String filename, DataStorage store)
          Deprecated. Use isDirectory(String, PigContext) instead.
static boolean isDirectory(String filename, PigContext context)
           
static boolean isFile(String filename, DataStorage store)
          Deprecated. Use isFile(String, PigContext) instead
static boolean isFile(String filename, PigContext context)
           
static InputStream open(String fileName, ExecType execType, DataStorage storage)
          Deprecated. Use open(String, PigContext) instead
static SeekableInputStream open(String fileSpec, long offset, PigContext pigContext)
           
static InputStream open(String fileSpec, PigContext pigContext)
           
static InputStream openDFSFile(String fileName)
          This function is meant to be used if the mappers/reducers want to access any HDFS file
static InputStream openDFSFile(String fileName, Properties properties)
           
static String parseCygPath(String path, int style)
          Convert path from Windows convention to Unix convention.
static void setInitialized(boolean initialized)
          This method is only used by test code to reset state.
static void setR(Random r)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

LOCAL_PREFIX

public static final String LOCAL_PREFIX
See Also:
Constant Field Values

STYLE_UNIX

public static final int STYLE_UNIX
See Also:
Constant Field Values

STYLE_WINDOWS

public static final int STYLE_WINDOWS
See Also:
Constant Field Values

OWNER_ONLY_PERMS

public static org.apache.hadoop.fs.permission.FsPermission OWNER_ONLY_PERMS
Constructor Detail

FileLocalizer

public FileLocalizer()
Method Detail

openDFSFile

public static InputStream openDFSFile(String fileName)
                               throws IOException
This function is meant to be used if the mappers/reducers want to access any HDFS file

Parameters:
fileName -
Returns:
InputStream of the open file.
Throws:
IOException

openDFSFile

public static InputStream openDFSFile(String fileName,
                                      Properties properties)
                               throws IOException
Throws:
IOException

getSize

public static long getSize(String fileName)
                    throws IOException
Throws:
IOException

getSize

public static long getSize(String fileName,
                           Properties properties)
                    throws IOException
Throws:
IOException

open

@Deprecated
public static InputStream open(String fileName,
                                          ExecType execType,
                                          DataStorage storage)
                        throws IOException
Deprecated. Use open(String, PigContext) instead

This function returns an input stream to a local file system file or a file residing on Hadoop's DFS

Parameters:
fileName - The filename to open
execType - execType indicating whether executing in local mode or MapReduce mode (Hadoop)
storage - The DataStorage object used to open the fileSpec
Returns:
InputStream to the fileSpec
Throws:
IOException

fullPath

@Deprecated
public static String fullPath(String fileName,
                                         DataStorage storage)
Deprecated. Use fullPath(String, PigContext) instead


open

public static InputStream open(String fileSpec,
                               PigContext pigContext)
                        throws IOException
Throws:
IOException

open

public static SeekableInputStream open(String fileSpec,
                                       long offset,
                                       PigContext pigContext)
                                throws IOException
Parameters:
fileSpec -
offset -
pigContext -
Returns:
SeekableInputStream
Throws:
IOException - This is an overloaded version of open where there is a need to seek in stream. Currently seek is supported only in file, not in directory or glob.

create

public static OutputStream create(String fileSpec,
                                  PigContext pigContext)
                           throws IOException
Throws:
IOException

create

public static OutputStream create(String fileSpec,
                                  boolean append,
                                  PigContext pigContext)
                           throws IOException
Throws:
IOException

delete

public static boolean delete(String fileSpec,
                             PigContext pigContext)
                      throws IOException
Throws:
IOException

setInitialized

public static void setInitialized(boolean initialized)
This method is only used by test code to reset state.

Parameters:
initialized -

deleteTempFiles

public static void deleteTempFiles()

getTemporaryPath

public static org.apache.hadoop.fs.Path getTemporaryPath(PigContext pigContext)
                                                  throws IOException
Throws:
IOException

getTemporaryPath

public static org.apache.hadoop.fs.Path getTemporaryPath(PigContext pigContext,
                                                         String suffix)
                                                  throws IOException
Throws:
IOException

hadoopify

public static String hadoopify(String filename,
                               PigContext pigContext)
                        throws IOException
Throws:
IOException

fullPath

public static String fullPath(String filename,
                              PigContext pigContext)
                       throws IOException
Throws:
IOException

fileExists

public static boolean fileExists(String filename,
                                 PigContext context)
                          throws IOException
Throws:
IOException

fileExists

@Deprecated
public static boolean fileExists(String filename,
                                            DataStorage store)
                          throws IOException
Deprecated. Use fileExists(String, PigContext) instead

Throws:
IOException

isFile

public static boolean isFile(String filename,
                             PigContext context)
                      throws IOException
Throws:
IOException

isFile

@Deprecated
public static boolean isFile(String filename,
                                        DataStorage store)
                      throws IOException
Deprecated. Use isFile(String, PigContext) instead

Throws:
IOException

isDirectory

public static boolean isDirectory(String filename,
                                  PigContext context)
                           throws IOException
Throws:
IOException

isDirectory

@Deprecated
public static boolean isDirectory(String filename,
                                             DataStorage store)
                           throws IOException
Deprecated. Use isDirectory(String, PigContext) instead.

Throws:
IOException

setR

public static void setR(Random r)

parseCygPath

public static String parseCygPath(String path,
                                  int style)
Convert path from Windows convention to Unix convention. Invoked under cygwin.

Parameters:
path - path in Windows convention
Returns:
path in Unix convention, null if fail

fetchFile

public static FileLocalizer.FetchFileRet fetchFile(Properties properties,
                                                   String filePath)
                                            throws IOException
Ensures that the passed path is on the local file system, fetching it to the java.io.tmpdir if necessary. If pig.jars.relative.to.dfs is true and dfs is not null, then a relative path is assumed to be relative to the passed dfs active directory. Else they are assumed to be relative to the local working directory.

Throws:
IOException

fetchFiles

public static FileLocalizer.FetchFileRet[] fetchFiles(Properties properties,
                                                      String filePath)
                                               throws IOException
Ensures that the passed files pointed to by path are on the local file system, fetching them to the java.io.tmpdir if necessary. If pig.jars.relative.to.dfs is true and dfs is not null, then a relative path is assumed to be relative to the passed dfs active directory. Else they are assumed to be relative to the local working directory.

Throws:
IOException

fetchResource

public static FileLocalizer.FetchFileRet fetchResource(String name)
                                                throws IOException,
                                                       ResourceNotFoundException
Ensures that the passed resource is available from the local file system, fetching it to a temporary directory.

Throws:
ResourceNotFoundException
IOException


Copyright © 2007-2012 The Apache Software Foundation