为了编译能通过,maven需要加入仓库地址以及一些必须要的包的依赖情况:
- pentaho*仓库:
在properties里面配置版本号:
<kettle.version>6.0.0.0-353</kettle.version>
<repository>
<id>pentaho1</id>
<name>Pentaho Repository1</name>
<url>http://repository.pentaho.org/artifactory/repo/</url>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
2.kettle一些核心的依赖,由于有些jar包下载不下来,所以exclution掉之后再去mvn仓库找合适的包:
<!-- kettle begin -->
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-core</artifactId>
<version>${kettle.version}</version>
<exclusions>
<exclusion>
<groupId>jug-lgpl</groupId>
<artifactId>jug-lgpl</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-vfs2</artifactId>
</exclusion>
<exclusion>
<groupId>secondstring</groupId>
<artifactId>secondstring</artifactId>
</exclusion>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion>
<artifactId>xercesImpl</artifactId>
<groupId>xerces</groupId>
</exclusion>
<exclusion>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>batik-js</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-engine</artifactId>
<version>${kettle.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-vfs2</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>pentaho-library</groupId>
<artifactId>libbase</artifactId>
<version>${kettle.version}</version>
</dependency>
<dependency>
<groupId>pentaho-library</groupId>
<artifactId>libformula</artifactId>
<version>${kettle.version}</version>
</dependency>
<dependency>
<groupId>pentaho</groupId>
<artifactId>metastore</artifactId>
<version>${kettle.version}</version>
</dependency>
<dependency>
<groupId>pentaho</groupId>
<artifactId>pentaho-mongodb-plugin</artifactId>
<version>${kettle.version}</version>
</dependency>
<dependency>
<groupId>pentaho</groupId>
<artifactId>pentaho-mongo-utils</artifactId>
<version>6.0.0.0-351</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-vfs2</artifactId>
<version>2.1</version>
</dependency>
<dependency>
<groupId>net.sourceforge.jexcelapi</groupId>
<artifactId>jxl</artifactId>
<version>2.6.12</version>
</dependency>
<dependency>
<groupId>rhino</groupId>
<artifactId>js</artifactId>
<version>1.7R2</version>
</dependency>
<!-- kettle end -->
3.下载jar包时当遇到500m大小的jar时,暂停mvn手动下载然后移动到本地仓库
插件的路径:D:\dev\apache-maven-3.3.9\repository\pentaho\pentaho-big-data-plugin\6.0.0.0-353
4.java调本地ktr代码:
import java.util.List; import org.junit.Test;
import org.junit.runner.RunWith;
import org.pentaho.di.core.KettleEnvironment;
import org.pentaho.di.core.Result;
import org.pentaho.di.core.RowMetaAndData;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.job.Job;
import org.pentaho.di.job.JobMeta;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import cn.shunlu.repository.BaseDataInitTest; @RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations="/applicationContext.xml")
public class KettleTest extends BaseDataInitTest{ private static Logger logger = LoggerFactory.getLogger(KettleTest.class); private static final String KETTLE_PLUGIN_BASE_FOLDER = "E:\\dev\\pentaho\\data-integration\\plugins\\";
// private static final String KETTLE_PLUGIN_BASE_FOLDER = "C:\\dev\\pentaho6\\data-integration\\system\\karaf\\system\\pentaho";
// private static final String KETTLE_PLUGIN_BASE_FOLDER = "C:\\dev\\pentaho6\\data-integration\\plugins"; /**
* 调转换
*/
@Test
public void test() {
logger.info("hi"+ logger.getClass().getName());
try
{
//这里是设置我们pentaho的运行环境,实际上我们加入的maven依赖只是为了让代码编译通过,实际真正干活的还是pentaho自己
//也可以自己建个maven的私服把pentaho里面的运行环境的jar丢上去,就能通过java调了
System.setProperty("KETTLE_PLUGIN_BASE_FOLDERS", KETTLE_PLUGIN_BASE_FOLDER);
String transPath = "e:\\tmp\\2.ktr"; KettleEnvironment.init(); TransMeta transMeta = new TransMeta(transPath); Trans trans = new Trans(transMeta);
System.out.println(trans);
trans.setVariable("importOrExport", "出口");
trans.setVariable("batchId", "57bebcfd816c0dffb72d6fa4");
trans.setVariable("codeMap_type", "country");
trans.execute(null); trans.waitUntilFinished(); Result r = trans.getResult();
List<RowMetaAndData> rowsResult = r.getRows(); if (trans.getErrors() > 0)
{
throw new Exception(trans.getResult().getLogText());
}
}
catch (Exception e)
{
e.printStackTrace();
}
} /**
*
* @description 调job
* @date 2016年9月6日
* @author
*/
@Test
public void testJob() throws Exception{
logger.info("hi"+ logger.getClass().getName());
try {
System.setProperty("KETTLE_PLUGIN_BASE_FOLDERS", KETTLE_PLUGIN_BASE_FOLDER); String jobPath = "e:\\tmp\\job1.kjb";
KettleEnvironment.init(); JobMeta jobMeta = new JobMeta(jobPath, null); Job job = new Job(null, jobMeta); job.setVariable("importOrExport", "出口");
job.setVariable("batchId", "57bebcfd816c0dffb72d6fa4");
job.setVariable("codeMap_type", "country"); job.start();
job.waitUntilFinished(); if (job.getErrors() > 0) {
throw new Exception(
"There are errors during job exception!(执行job发生异常)");
}
} catch (KettleException e) {
e.printStackTrace();
}
} }
注:加依赖的目的就是为了让编译能通过,最后运行的环境其实还是我们手动指定的plugins目录如图所示:
这里只是写了个testCase来调用我们本地.ktr和.kjb pentaho这边接收参数的话,我也是只做了个简单的job.
调转换就直接这么接收参数:
如果是调job接收命名参数需要在他的下一个步骤转换里面设置写命名参数就行了:
然后这个转换还是跟上一个转换一样的,而且参数也是那么接收的.${paramName}这样子.其实代码网上很多,但是整合maven那些依赖之间的关系特麻烦.
最近整合发现个问题,如果不将mongo-plugins和mongo-utils包放入plugins目录里面也无法成功调用,因此需要去到,
增加完之后的plugins目录: