@InterfaceAudience.Public @InterfaceStability.Stable public class PigServer extends Object
Modifier and Type | Class and Description |
---|---|
protected class |
PigServer.Graph |
Modifier and Type | Field and Description |
---|---|
protected Deque<PigServer.Graph> |
graphs |
protected org.apache.commons.logging.Log |
log |
protected PigContext |
pigContext |
static String |
PRETTY_PRINT_SCHEMA_PROPERTY |
protected String |
scope |
Constructor and Description |
---|
PigServer(ExecType execType) |
PigServer(ExecType execType,
org.apache.hadoop.conf.Configuration conf) |
PigServer(ExecType execType,
Properties properties) |
PigServer(PigContext context) |
PigServer(PigContext context,
boolean connect) |
PigServer(Properties properties) |
PigServer(String execTypeString) |
PigServer(String execTypeString,
Properties properties) |
Modifier and Type | Method and Description |
---|---|
void |
addPathToSkip(String path)
Add a path to be skipped while automatically shipping binaries for
streaming.
|
long |
capacity()
Returns the unused byte capacity of an HDFS filesystem.
|
void |
debugOff()
Set the logging level to the default.
|
void |
debugOn()
Set the logging level to DEBUG.
|
boolean |
deleteFile(String filename)
Delete a file.
|
void |
discardBatch()
Discards a batch of Pig commands.
|
Schema |
dumpSchema(String alias)
Write the schema for an alias to System.out.
|
Schema |
dumpSchemaNested(String alias,
String nestedAlias)
Write the schema for a nestedAlias to System.out.
|
List<ExecJob> |
executeBatch()
Submits a batch of Pig commands for execution.
|
List<ExecJob> |
executeBatch(boolean parseAndBuild)
Submits a batch of Pig commands for execution.
|
boolean |
existsFile(String filename)
Test whether a file exists.
|
void |
explain(String alias,
PrintStream stream)
Provide information on how a pig query will be executed.
|
void |
explain(String alias,
String format,
boolean verbose,
boolean markAsExecute,
PrintStream lps,
PrintStream eps,
File dir,
String suffix)
Provide information on how a pig query will be executed.
|
long |
fileSize(String filename)
Returns the length of a file in bytes which exists in the HDFS (accounts for replication).
|
Map<String,LogicalPlan> |
getAliases()
Return a map containing the logical plan associated with each alias.
|
Set<String> |
getAliasKeySet()
Get the set of all current aliases.
|
protected PigServer.Graph |
getClonedGraph()
Creates a clone of the current DAG
|
PigServer.Graph |
getCurrentDAG()
Current DAG
|
Map<Operator,DataBag> |
getExamples(String alias) |
String |
getJobName() |
String |
getJobPriority() |
protected List<ExecJob> |
getJobs(PigStats stats)
Retrieves a list of Job objects from the PigStats object
|
String |
getLastRel() |
LogicalPlanData |
getLogicalPlanData()
Returns data associated with LogicalPlan.
|
PigContext |
getPigContext() |
boolean |
isBatchEmpty()
Returns whether there is anything to process in the current batch.
|
boolean |
isBatchOn()
Retrieve the current execution mode.
|
boolean |
isDebugOn() |
protected PigStats |
launchPlan(LogicalPlan lp,
String jobName)
A common method for launching the jobs according to the logical plan
|
String[] |
listPaths(String dir)
List the contents of a directory.
|
boolean |
mkdirs(String dirs)
Make a directory.
|
Iterator<Tuple> |
openIterator(String id)
Executes a Pig Latin script up to and including indicated alias.
|
protected List<String> |
paramMapToList(Map<String,String> params) |
void |
parseAndBuild()
This method parses the scripts and builds the LogicalPlan.
|
void |
printAliases()
Intended to be used by unit tests only.
|
void |
printHistory(boolean withNumbers) |
void |
registerCode(String path,
String scriptingLang,
String namespace)
Universal Scripting Language Support, see PIG-928
|
void |
registerFunction(String function,
FuncSpec funcSpec)
Defines an alias for the given function spec.
|
void |
registerJar(String name)
Registers a jar file.
|
void |
registerQuery(String query)
Register a query with the Pig runtime.
|
void |
registerQuery(String query,
int startLine)
Register a query with the Pig runtime.
|
void |
registerScript(InputStream in)
Register a pig script from InputStream source which is more general and extensible
the pig script can be from local file, then you can use FileInputStream.
|
void |
registerScript(InputStream in,
List<String> paramsFiles)
Register a pig script from InputStream source which is more general and extensible
the pig script can be from local file, then you can use FileInputStream.
|
void |
registerScript(InputStream in,
Map<String,String> params)
Register a pig script from InputStream source which is more general and extensible
the pig script can be from local file, then you can use FileInputStream.
|
void |
registerScript(InputStream in,
Map<String,String> params,
List<String> paramsFiles)
Register a pig script from InputStream.
The pig script can be from local file, then you can use FileInputStream. |
void |
registerScript(String fileName)
Register a query with the Pig runtime.
|
void |
registerScript(String fileName,
List<String> paramsFiles)
Register a pig script file.
|
void |
registerScript(String fileName,
Map<String,String> params)
Register a pig script file.
|
void |
registerScript(String fileName,
Map<String,String> params,
List<String> paramsFiles)
Register a pig script file.
|
void |
registerStreamingCommand(String commandAlias,
StreamingCommand command)
Defines an alias for the given streaming command.
|
boolean |
renameFile(String source,
String target)
Rename a file.
|
static void |
resetScope() |
void |
setBatchOn()
Starts batch execution mode.
|
void |
setDefaultParallel(int p)
Set the default parallelism for this job
|
void |
setJobName(String name)
Set the name of the job.
|
void |
setJobPriority(String priority)
Set Hadoop job priority.
|
void |
setSkipParseInRegisterForBatch(boolean skipParseInRegisterForBatch)
Set whether to skip parsing while registering the query in batch mode
|
void |
setValidateEachStatement(boolean validateEachStatement)
This can be called to indicate if the query is being parsed/compiled
in a mode that expects each statement to be validated as it is
entered, instead of just doing it once for whole script.
|
void |
shutdown()
Reclaims resources used by this instance of PigServer.
|
ExecJob |
store(String id,
String filename)
Executes a Pig Latin script up to and including indicated alias and stores the resulting
records into a file.
|
ExecJob |
store(String id,
String filename,
String func)
Executes a Pig Latin script up to and including indicated alias and stores the resulting
records into a file.
|
protected final org.apache.commons.logging.Log log
public static final String PRETTY_PRINT_SCHEMA_PROPERTY
protected final Deque<PigServer.Graph> graphs
protected final PigContext pigContext
protected final String scope
public PigServer(String execTypeString) throws ExecException, IOException
execTypeString
- can be 'mapreduce' or 'local'. Local mode will
use Hadoop's local job runner to execute the job on the local machine.
Mapreduce mode will connect to a cluster to execute the job. If
execTypeString is not one of these two, Pig will deduce the ExecutionEngine
if it is on the classpath and use it for the backend execution.ExecException
IOException
public PigServer(String execTypeString, Properties properties) throws ExecException, IOException
ExecException
IOException
public PigServer(Properties properties) throws ExecException, IOException
ExecException
IOException
public PigServer(ExecType execType) throws ExecException
execType
- execution type to start the engine. Local mode will
use Hadoop's local job runner to execute the job on the local machine.
Mapreduce mode will connect to a cluster to execute the job.ExecException
public PigServer(ExecType execType, Properties properties) throws ExecException
ExecException
public PigServer(ExecType execType, org.apache.hadoop.conf.Configuration conf) throws ExecException
ExecException
public PigServer(PigContext context) throws ExecException
ExecException
public PigServer(PigContext context, boolean connect) throws ExecException
ExecException
public static void resetScope()
public PigContext getPigContext()
public PigServer.Graph getCurrentDAG()
public void debugOn()
public void debugOff()
public void setDefaultParallel(int p)
p
- default number of reducers to use for this job.public void setBatchOn()
public boolean isBatchOn()
public boolean isBatchEmpty() throws FrontendException
FrontendException
public void parseAndBuild() throws IOException
executeBatch(boolean)
with
argument as false. Do Not use executeBatch()
after
calling this method as that will re-parse and build the script.IOException
public List<ExecJob> executeBatch() throws IOException
IOException
public List<ExecJob> executeBatch(boolean parseAndBuild) throws IOException
parseAndBuild()
before. Pass false as an argument in which case.parseAndBuild
- IOException
protected List<ExecJob> getJobs(PigStats stats)
stats
- public void discardBatch() throws FrontendException
FrontendException
public void addPathToSkip(String path)
path
- path to be skippedpublic void registerFunction(String function, FuncSpec funcSpec)
function
- - the new function alias to define.funcSpec
- - the FuncSpec object representing the name of
the function class and any arguments to constructor.public void registerStreamingCommand(String commandAlias, StreamingCommand command)
commandAlias
- - the new command alias to definecommand
- - streaming command to be executedpublic void registerJar(String name) throws IOException
name
- of the jar file to registerIOException
public void registerCode(String path, String scriptingLang, String namespace) throws IOException
path
- path of the script filescriptingLang
- language keyword or scriptingEngine used to interpret the scriptnamespace
- namespace defined for functions of this scriptIOException
public void registerQuery(String query, int startLine) throws IOException
query
- a Pig Latin expression to be evaluated.startLine
- line number of the query within the whole scriptIOException
public void registerQuery(String query) throws IOException
registerQuery(String, int)
with startLine set to 1.query
- a Pig Latin expression to be evaluated.IOException
public void registerScript(InputStream in) throws IOException
in
- IOException
public void registerScript(InputStream in, Map<String,String> params) throws IOException
in
- params
- the key is the parameter name, and the value is the parameter valueIOException
public void registerScript(InputStream in, List<String> paramsFiles) throws IOException
in
- paramsFiles
- files which have the parameter settingIOException
public void registerScript(InputStream in, Map<String,String> params, List<String> paramsFiles) throws IOException
in
- params
- the key is the parameter name, and the value is the parameter valueparamsFiles
- files which have the parameter settingIOException
protected PigServer.Graph getClonedGraph() throws IOException
IOException
public void registerScript(String fileName) throws IOException
fileName
- file to read query from.IOException
public void registerScript(String fileName, Map<String,String> params) throws IOException
fileName
- pig script fileparams
- the key is the parameter name, and the value is the parameter valueIOException
public void registerScript(String fileName, List<String> paramsFiles) throws IOException
fileName
- pig script fileparamsFiles
- files which have the parameter settingIOException
public void registerScript(String fileName, Map<String,String> params, List<String> paramsFiles) throws IOException
fileName
- pig scriptparams
- the key is the parameter name, and the value is the parameter valueparamsFiles
- files which have the parameter settingIOException
public void printAliases() throws FrontendException
FrontendException
public Schema dumpSchema(String alias) throws IOException
alias
- Alias whose schema will be written outIOException
public Schema dumpSchemaNested(String alias, String nestedAlias) throws IOException
alias
- Alias whose schema has nestedAliasnestedAlias
- Alias whose schema will be written outIOException
public void setJobName(String name)
name
- of jobpublic void setJobPriority(String priority)
priority
- valid values are found in JobPriority
public Iterator<Tuple> openIterator(String id) throws IOException
PigServer server = new PigServer(); server.registerQuery("A = load 'foo';"); server.registerQuery("B = filter A by $0 > 0;"); server.registerQuery("C = order B by $1;");Then
server.openIterator("B");filtered but unsorted data will be returned. If instead a user does
server.openIterator("C");filtered and sorted data will be returned.
id
- Alias to open iterator forIOException
public ExecJob store(String id, String filename) throws IOException
PigServer server = new PigServer(); server.registerQuery("A = load 'foo';"); server.registerQuery("B = filter A by $0 > 0;"); server.registerQuery("C = order B by $1;");Then
server.store("B", "bar");filtered but unsorted data will be stored to the file bar. If instead a user does
server.store("C", "bar");filtered and sorted data will be stored to the file bar. Equivalent to calling
store(String, String, String)
with
org.apache.pig.PigStorage as the store function.id
- The alias to storefilename
- The file to which to store toExecJob
containing information about this jobIOException
public ExecJob store(String id, String filename, String func) throws IOException
PigServer server = new PigServer(); server.registerQuery("A = load 'foo';"); server.registerQuery("B = filter A by $0 > 0;"); server.registerQuery("C = order B by $1;");Then
server.store("B", "bar", "mystorefunc");filtered but unsorted data will be stored to the file bar using mystorefunc. If instead a user does
server.store("C", "bar", "mystorefunc");filtered and sorted data will be stored to the file bar using mystorefunc.
id
- The alias to storefilename
- The file to which to store tofunc
- store function to useExecJob
containing information about this jobIOException
public void explain(String alias, PrintStream stream) throws IOException
alias
- Name of alias to explain.stream
- PrintStream to write explanation to.IOException
- if the requested alias cannot be found.public void explain(String alias, String format, boolean verbose, boolean markAsExecute, PrintStream lps, PrintStream eps, File dir, String suffix) throws IOException
alias
- Name of alias to explain.format
- Format in which the explain should be printed. If text, then the plan will
be printed in plain text. Otherwise, the execution plan will be printed in
DOT format.verbose
- Controls the amount of information printedmarkAsExecute
- When set will treat the explain like a
call to execute in the respoect that all the pending stores are
marked as complete.lps
- Stream to print the logical treeeps
- Stream to print the ExecutionEngine trees. If null, then will print to filesdir
- Directory to print ExecutionEngine trees. If null, will use epssuffix
- Suffix of file namesIOException
- if the requested alias cannot be found.public long capacity() throws IOException
IOException
public long fileSize(String filename) throws IOException
filename
- IOException
public boolean existsFile(String filename) throws IOException
filename
- to testIOException
public boolean deleteFile(String filename) throws IOException
filename
- to deleteIOException
public boolean renameFile(String source, String target) throws IOException
source
- file to renametarget
- new file nameIOException
public boolean mkdirs(String dirs) throws IOException
dirs
- directory to makeIOException
public String[] listPaths(String dir) throws IOException
dir
- name of directory to listIOException
public Map<String,LogicalPlan> getAliases()
public void shutdown()
public Map<Operator,DataBag> getExamples(String alias) throws IOException
IOException
public void printHistory(boolean withNumbers)
protected PigStats launchPlan(LogicalPlan lp, String jobName) throws ExecException, FrontendException
lp
- The logical planjobName
- A String containing the job name to be usedExecException
FrontendException
public LogicalPlanData getLogicalPlanData()
registerQuery(String)
or registerScript(InputStream)
methods.public void setValidateEachStatement(boolean validateEachStatement)
validateEachStatement
- public void setSkipParseInRegisterForBatch(boolean skipParseInRegisterForBatch)
skipParseInRegisterForBatch
- public String getLastRel()
public boolean isDebugOn()
public String getJobName()
public String getJobPriority()
Copyright © 2007-2017 The Apache Software Foundation