Forum Home » Fuse Distributions » Fuse ESB

Thread: Performance issues to load csv file

 

Permlink Replies: 0
vs.souza

Posts: 2
Registered: 07/16/10
Performance issues to load csv file
Posted: Feb 15, 2012 1:33 PM
  Click to reply to this thread Reply
Hello fellows,

I created a bundle to load a CSV file, validate some information and send it to an ActiveMQ queue. The problem is that my CSV file has 30.000.000 registers and about 1.2 GB and my bundle is taking about 12 hours to load 750.000 itens to the queue. My ServiceMix is 4.4.3 and I am loading the file woking wit camel and bindy. Bellow I post my camel-context.xml and my bindy class:

camel-context.xml:

<?xml version="1.0" encoding="UTF-8"?>

<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:camel="http://camel.apache.org/schema/spring"
xsi:schemaLocation="
http://www.springframework.org/schema/beans http://www.springframework.org/schema/beans/spring-beans.xsd
http://camel.apache.org/schema/spring http://camel.apache.org/schema/spring/camel-spring.xsd">

<bean id="bindyDataFormat" class="org.apache.camel.dataformat.bindy.csv.BindyCsvDataFormat">
<constructor-arg value="com.test.integration.camel.spring.poc.file"/>
</bean>

<camel:camelContext xmlns="http://camel.apache.org/schema/spring">
<camel:package>com.test.integration.camel.spring.poc</camel:package>
<camel:route>
<camel:from uri="file:/home/jedimaster/Java-Env/Sandbox/From?delete=true"/>
<camel:log message="Started unmarshalling file ${file:name} at ${date:now:hh:MM:ss.SSS}..."/>
<camel:split streaming="true">
<camel:tokenize token="\n"/>
<camel:unmarshal ref="bindyDataFormat"/>
<camel:to uri="activemq:filemove-events"/>
</camel:split>
<camel:log message="Finished unmarshalling file ${file:name} at ${date:now:hh:MM:ss.SSS}..."/>
</camel:route>

</camel:camelContext>

</beans>

bindy bean:

package com.test.integration.camel.spring.poc.file;

import java.io.Serializable;

import org.apache.camel.dataformat.bindy.annotation.CsvRecord;
import org.apache.camel.dataformat.bindy.annotation.DataField;

@CsvRecord(separator=",", quote="\"")
public class CSVEventRecordBean implements Serializable{

private static final long serialVersionUID = -8806841912643394977L;

@DataField(pos=1)
private String eventDate;

@DataField(pos=2)
private String userId;

@DataField(pos=3)
private String systemId;

public String getEventDate() {
return eventDate;
}

public void setEventDate(String eventDate) {
this.eventDate = eventDate;
}

public String getUserId() {
return userId;
}

public void setUserId(String userId) {
this.userId = userId;
}

public String getSystemId() {
return "Bean Generated: " + systemId;
}

public void setSystemId(String systemId) {
this.systemId = systemId;
}

}

How can I improve the performance considerably to make it faster? Do you have any suggestions?

Cheers.

Edited by: vs.souza on Feb 15, 2012 1:34 PM