org.apache.pig
Class PigServer

java.lang.Object
  extended by org.apache.pig.PigServer

public class PigServer
extends Object

This class is the program's connection to Pig. Typically a program will create a PigServer instance. The programmer then registers queries using registerQuery() and retrieves results using openIterator() or store().


Nested Class Summary
static class PigServer.SortInfoSetter
           
 
Constructor Summary
PigServer(ExecType execType)
           
PigServer(ExecType execType, Properties properties)
           
PigServer(PigContext context)
           
PigServer(PigContext context, boolean connect)
           
PigServer(String execTypeString)
           
 
Method Summary
 void addPathToSkip(String path)
          Add a path to be skipped while automatically shipping binaries for streaming.
 long capacity()
          Returns the unused byte capacity of an HDFS filesystem.
 LogicalPlan clonePlan(String alias)
           
 void debugOff()
           
 void debugOn()
           
 boolean deleteFile(String filename)
           
 void discardBatch()
          Discards a batch of Pig commands.
 Schema dumpSchema(String alias)
           
 List<ExecJob> executeBatch()
          Submits a batch of Pig commands for execution.
 boolean existsFile(String filename)
           
 void explain(String alias, PrintStream stream)
          Provide information on how a pig query will be executed.
 void explain(String alias, String format, boolean verbose, boolean markAsExecute, PrintStream lps, PrintStream pps, PrintStream eps)
          Provide information on how a pig query will be executed.
 long fileSize(String filename)
          Returns the length of a file in bytes which exists in the HDFS (accounts for replication).
 Map<String,LogicalPlan> getAliases()
           
 Set<String> getAliasKeySet()
           
 Map<LogicalOperator,DataBag> getExamples(String alias)
           
 PigContext getPigContext()
           
 boolean isBatchEmpty()
          Returns whether there is anything to process in the current batch.
 boolean isBatchOn()
          Retrieve the current execution mode.
 String[] listPaths(String dir)
           
 boolean mkdirs(String dirs)
           
 Iterator<Tuple> openIterator(String id)
          Forces execution of query (and all queries from which it reads), in order to materialize result
static ExecType parseExecType(String str)
           
 void printAliases()
           
 void registerFunction(String function, FuncSpec funcSpec)
          Defines an alias for the given function spec.
 void registerFunction(String function, String functionSpec)
          Deprecated. 
 void registerJar(String name)
          Registers a jar file.
 void registerQuery(String query)
           
 void registerQuery(String query, int startLine)
          Register a query with the Pig runtime.
 void registerScript(String fileName)
           
 void registerStreamingCommand(String commandAlias, StreamingCommand command)
          Defines an alias for the given streaming command.
 boolean renameFile(String source, String target)
           
 void setBatchOn()
          Starts batch execution mode.
 void setDefaultParallel(int p)
           
 void setJobName(String name)
           
 void setJobPriority(String priority)
           
 void shutdown()
           
 ExecJob store(String id, String filename)
          Store an alias into a file
 ExecJob store(String id, String filename, String func)
          forces execution of query (and all queries from which it reads), in order to store result in file
 long totalHadoopTimeSpent()
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

PigServer

public PigServer(String execTypeString)
          throws ExecException,
                 IOException
Throws:
ExecException
IOException

PigServer

public PigServer(ExecType execType)
          throws ExecException
Throws:
ExecException

PigServer

public PigServer(ExecType execType,
                 Properties properties)
          throws ExecException
Throws:
ExecException

PigServer

public PigServer(PigContext context)
          throws ExecException
Throws:
ExecException

PigServer

public PigServer(PigContext context,
                 boolean connect)
          throws ExecException
Throws:
ExecException
Method Detail

parseExecType

public static ExecType parseExecType(String str)
                              throws IOException
Throws:
IOException

getPigContext

public PigContext getPigContext()

debugOn

public void debugOn()

debugOff

public void debugOff()

setDefaultParallel

public void setDefaultParallel(int p)

setBatchOn

public void setBatchOn()
Starts batch execution mode.


isBatchOn

public boolean isBatchOn()
Retrieve the current execution mode.

Returns:
true if the execution mode is batch; false otherwise.

isBatchEmpty

public boolean isBatchEmpty()
                     throws FrontendException
Returns whether there is anything to process in the current batch.

Returns:
true if there are no stores to process in the current batch, false otherwise.
Throws:
FrontendException

executeBatch

public List<ExecJob> executeBatch()
                           throws FrontendException,
                                  ExecException
Submits a batch of Pig commands for execution.

Throws:
FrontendException
ExecException

discardBatch

public void discardBatch()
                  throws FrontendException
Discards a batch of Pig commands.

Throws:
FrontendException
ExecException

addPathToSkip

public void addPathToSkip(String path)
Add a path to be skipped while automatically shipping binaries for streaming.

Parameters:
path - path to be skipped

registerFunction

@Deprecated
public void registerFunction(String function,
                                        String functionSpec)
Deprecated. 

Defines an alias for the given function spec. This is useful for functions that require arguments to the constructor.

Parameters:
function - - the new function alias to define.
functionSpec - - the name of the function and any arguments. It should have the form: classname('arg1', 'arg2', ...)

registerFunction

public void registerFunction(String function,
                             FuncSpec funcSpec)
Defines an alias for the given function spec. This is useful for functions that require arguments to the constructor.

Parameters:
function - - the new function alias to define.
funcSpec - - the FuncSpec object representing the name of the function class and any arguments to constructor.

registerStreamingCommand

public void registerStreamingCommand(String commandAlias,
                                     StreamingCommand command)
Defines an alias for the given streaming command.

Parameters:
commandAlias - - the new command alias to define
command - - streaming command to be executed

registerJar

public void registerJar(String name)
                 throws IOException
Registers a jar file. Name of the jar file can be an absolute or relative path. If multiple resources are found with the specified name, the first one is registered as returned by getSystemResources. A warning is issued to inform the user.

Parameters:
name - of the jar file to register
Throws:
IOException

registerQuery

public void registerQuery(String query,
                          int startLine)
                   throws IOException
Register a query with the Pig runtime. The query is parsed and registered, but it is not executed until it is needed.

Parameters:
query - a Pig Latin expression to be evaluated.
startLine - line number of the query within the whold script
Throws:
IOException

clonePlan

public LogicalPlan clonePlan(String alias)
                      throws IOException
Throws:
IOException

registerQuery

public void registerQuery(String query)
                   throws IOException
Throws:
IOException

registerScript

public void registerScript(String fileName)
                    throws IOException
Throws:
IOException

printAliases

public void printAliases()
                  throws FrontendException
Throws:
FrontendException

dumpSchema

public Schema dumpSchema(String alias)
                  throws IOException
Throws:
IOException

setJobName

public void setJobName(String name)

setJobPriority

public void setJobPriority(String priority)

openIterator

public Iterator<Tuple> openIterator(String id)
                             throws IOException
Forces execution of query (and all queries from which it reads), in order to materialize result

Throws:
IOException

store

public ExecJob store(String id,
                     String filename)
              throws IOException
Store an alias into a file

Parameters:
id - The alias to store
filename - The file to which to store to
Throws:
IOException

store

public ExecJob store(String id,
                     String filename,
                     String func)
              throws IOException
forces execution of query (and all queries from which it reads), in order to store result in file

Throws:
IOException

explain

public void explain(String alias,
                    PrintStream stream)
             throws IOException
Provide information on how a pig query will be executed. For now this information is very developer focussed, and probably not very useful to the average user.

Parameters:
alias - Name of alias to explain.
stream - PrintStream to write explanation to.
Throws:
IOException - if the requested alias cannot be found.

explain

public void explain(String alias,
                    String format,
                    boolean verbose,
                    boolean markAsExecute,
                    PrintStream lps,
                    PrintStream pps,
                    PrintStream eps)
             throws IOException
Provide information on how a pig query will be executed.

Parameters:
alias - Name of alias to explain.
format - Format in which the explain should be printed
verbose - Controls the amount of information printed
markAsExecute - When set will treat the explain like a call to execute in the respoect that all the pending stores are marked as complete.
lps - Stream to print the logical tree
pps - Stream to print the physical tree
eps - Stream to print the execution tree
Throws:
IOException - if the requested alias cannot be found.

capacity

public long capacity()
              throws IOException
Returns the unused byte capacity of an HDFS filesystem. This value does not take into account a replication factor, as that can vary from file to file. Thus if you are using this to determine if you data set will fit in the HDFS, you need to divide the result of this call by your specific replication setting.

Returns:
unused byte capacity of the file system.
Throws:
IOException

fileSize

public long fileSize(String filename)
              throws IOException
Returns the length of a file in bytes which exists in the HDFS (accounts for replication).

Parameters:
filename -
Returns:
length of the file in bytes
Throws:
IOException

existsFile

public boolean existsFile(String filename)
                   throws IOException
Throws:
IOException

deleteFile

public boolean deleteFile(String filename)
                   throws IOException
Throws:
IOException

renameFile

public boolean renameFile(String source,
                          String target)
                   throws IOException
Throws:
IOException

mkdirs

public boolean mkdirs(String dirs)
               throws IOException
Throws:
IOException

listPaths

public String[] listPaths(String dir)
                   throws IOException
Throws:
IOException

totalHadoopTimeSpent

public long totalHadoopTimeSpent()

getAliases

public Map<String,LogicalPlan> getAliases()

shutdown

public void shutdown()

getAliasKeySet

public Set<String> getAliasKeySet()

getExamples

public Map<LogicalOperator,DataBag> getExamples(String alias)


Copyright © ${year} The Apache Software Foundation