Unable to read Large excel file getting Reading timeout Error

Hello Team ,

I am not able to read large Excel file from Recorder. we are getting Reading time out errror.

Please find the below error
2019-04-15 20:38:01,086 +0530 ERROR [Playback logic] com.workfusion.studio.rpa.recorder.player.PlaybackLogic - com.workfusion.studio.rpa.recorder.player.result.PlaybackFailure@6ede2e16
com.workfusion.studio.rpa.recorder.playback.PlaybackMultiException: Error executing TemplateAction[templateName=ExcelGetRangeAction.ftl,id=2,name=Optional[ExcelGetRangeAction],parent=1,arguments=ActionArguments[targetVariable=[excel_data],delay=[0],xsi:type=[recorder:ExcelGetRangeAction, recorder:ExcelGetRangeAction],pollingInterval=[300],active=[true],OpenSpreadsheetAction_filePath=[D:\closed\ClosedReport.xlsx],actionDetails=[(START:END to ‘excel_data’)],awaitTimeout=[5000]]]
at com.workfusion.studio.rpa.recorder.playback.action.OpenSpreadsheetAction.execute(OpenSpreadsheetAction.java:52)
at com.workfusion.studio.rpa.recorder.playback.action.OpenSpreadsheetAction.execute(OpenSpreadsheetAction.java:16)
at com.workfusion.studio.rpa.recorder.playback.player.ActionPlayer.next(ActionPlayer.java:64)
at com.workfusion.studio.rpa.recorder.player.PlaybackLogic.playNextAction(PlaybackLogic.java:152)
at com.workfusion.studio.rpa.recorder.player.PlaybackLogic.run(PlaybackLogic.java:112)
at java.lang.Thread.run(Thread.java:745)
Caused by: org.openqa.selenium.remote.UnreachableBrowserException: Error communicating with the remote browser. It may have died.
Build info: version: ‘9.2.0.4’, revision: ‘1a10eeeced’, time: ‘2018-11-29T10:44:59.891Z’
System info: host: ‘ECXLC1710’, ip: ‘172.29.8.105’, os.name: ‘Windows 10’, os.arch: ‘amd64’, os.version: ‘10.0’, java.version: ‘1.8.0_121’
Driver info: driver.version: RemoteWebDriver
Capabilities [{imageSimilarityThreshold=0.8, extra.executor.id={Name=RPA Recorder}, CLOSE_ALL_WINDOWS=false, browserName=universal, javascriptEnabled=true, extra.capabilities.context={“browserType”:“universal”,“startInPrivate”:false,“blockImages”:false,“maximizeOnStartup”:false,“customCapabilities”:{“platform”:“WINDOWS”,“javascriptEnabled”:true,“SEARCH_ALL_WINDOWS”:true,“CLOSE_ALL_WINDOWS”:false,“imageSimilarityThreshold”:“0.8”},“executorId”:{“Name”:“RPA Recorder”}}, platformName=WINDOWS, SEARCH_ALL_WINDOWS=true, platform=WINDOWS}]
Session ID: 79db6a8a-c430-4cac-987c-9b63f8232036
at org.openqa.selenium.remote.RemoteWebDriver.execute(RemoteWebDriver.java:762)
at org.openqa.selenium.remote.RemoteWebDriver.executeScript(RemoteWebDriver.java:677)
at com.workfusion.rpa.helpers.Excel.getRange(Excel.java:727)
at com.workfusion.rpa.helpers.Excel$getRange$0.callStatic(Unknown Source)
at org.codehaus.groovy.runtime.callsite.CallSiteArray.defaultCallStatic(CallSiteArray.java:55)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callStatic(AbstractCallSite.java:197)
at org.codehaus.groovy.runtime.callsite.AbstractCallSite.callStatic(AbstractCallSite.java:217)
at Script3.run(Script3.groovy:8)
at com.workfusion.studio.rpa.recorder.playback.shell.GroovyShellWrapper.executeScript(GroovyShellWrapper.java:48)
at com.workfusion.studio.rpa.recorder.playback.player.PlaybackContext.executeScript(PlaybackContext.java:65)
at com.workfusion.studio.rpa.recorder.playback.action.template.TemplateAction.executeBehavior(TemplateAction.java:33)
at com.workfusion.studio.rpa.recorder.playback.flow.StandardControlFlow.execute(StandardControlFlow.java:46)
at com.workfusion.studio.rpa.recorder.playback.action.template.TemplateAction.execute(TemplateAction.java:28)
at com.workfusion.studio.rpa.recorder.playback.action.template.TemplateAction.execute(TemplateAction.java:15)
… 4 common frames omitted
Caused by: java.net.SocketTimeoutException: Read timed out
at java.net.SocketInputStream.socketRead0(Native Method)
at java.net.SocketInputStream.socketRead(SocketInputStream.java:116)
at java.net.SocketInputStream.read(SocketInputStream.java:171)
at java.net.SocketInputStream.read(SocketInputStream.java:141)
at org.apache.http.impl.io.SessionInputBufferImpl.streamRead(SessionInputBufferImpl.java:137)
at org.apache.http.impl.io.SessionInputBufferImpl.fillBuffer(SessionInputBufferImpl.java:153)
at org.apache.http.impl.io.SessionInputBufferImpl.readLine(SessionInputBufferImpl.java:282)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:140)
at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:259)
at org.apache.http.impl.DefaultBHttpClientConnection.receiveResponseHeader(DefaultBHttpClientConnection.java:163)
at org.apache.http.impl.conn.CPoolProxy.receiveResponseHeader(CPoolProxy.java:167)
at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:273)
at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:125)
at org.apache.http.impl.execchain.MainClientExec.execute(MainClientExec.java:271)
at org.apache.http.impl.execchain.ProtocolExec.execute(ProtocolExec.java:184)
at org.apache.http.impl.execchain.RetryExec.execute(RetryExec.java:88)
at org.apache.http.impl.execchain.RedirectExec.execute(RedirectExec.java:110)
at org.apache.http.impl.client.InternalHttpClient.doExecute(InternalHttpClient.java:184)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:71)
at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
at org.openqa.selenium.remote.internal.ApacheHttpClient.fallBackExecute(ApacheHttpClient.java:139)
at org.openqa.selenium.remote.internal.ApacheHttpClient.execute(ApacheHttpClient.java:87)
at org.openqa.selenium.remote.HttpCommandExecutor.execute(HttpCommandExecutor.java:161)
at org.openqa.selenium.remote.RemoteWebDriver.execute(RemoteWebDriver.java:741)
… 17 common frames omitted

Request you to please suggest how to resolve this issue.

below code we are using to read the excel file.

<?xml version="1.0" encoding="UTF-8"?>
<config
	xmlns="http://web-harvest.sourceforge.net/schema/1.0/config"
	scriptlang="groovy">

	<robotics-flow>
		<robot driver="universal" close-on-completion="true"
			start-in-private="false">
			<capability name="SEARCH_ALL_WINDOWS" value="true" />
			<capability name="CLOSE_ALL_WINDOWS" value="false" />
			<script><![CDATA[
				import com.workfusion.studio.rpa.recorder.api.*
				import com.workfusion.studio.rpa.recorder.api.types.*
				import com.workfusion.studio.rpa.recorder.api.custom.*
				import com.workfusion.studio.rpa.recorder.api.internal.representation.*

				import com.workfusion.bot.exception.*


				def excel_data = RTable.builder().build()


				com.workfusion.rpa.helpers.RPA.metaClass.static.$ = { Closure c -> c.call() } // Support for Expression action. Should be implemented in RPA class in next release.

				enableTypeOnScreen()


				openExcel("D:\\closed\\ClosedReport.xlsx")

				try {

					excel_data = RTable.fromRows(getRange("D:\\closed\\ClosedReport.xlsx", "A1"))


				} finally {
					closeExcel("D:\\closed\\ClosedReport.xlsx")
				}


				Resource.createFileSkip("D:\\closed\\output.txt")



				excel_data.getRows().each( {element ->


				})




				putAt("excel_data", excel_data)
			]]></script>
		</robot>
	</robotics-flow>
	<export include-original-data="true">
		<single-column name="excel_data"
			value="${excel_data.toString()}" />
	</export>

</config>

thanks in advance
Diptiranjan Panda

Hello Diptiranjan.
What’s the size of your Excel file? Please be informed that can be the issue with big files. For example, please see this thread:

the excel size is about 20mb and its contains 51 columns and 74000 records .

i am able to read 50000 records .but when i am trying to read 74000 records i am getting reading timeout errors

Thanks @diptiranjanpanda. In this case I can suggest same as in the post that I posted above. You can try to divide your files to several parts or try to increase memory as described in the solution.

Thanks for that solution i will work on it and let you know the results .sorry for another question
can we read 1000 records at a time for the big excel file. is there any solution for reading chunks of data from big excel file.

can we use sax parser for loading small amount of data in the Java heap memory which is under apache poi ooxml sax parser and SharedStrings;
but those jars are not available in the workfusion rpagrid
for the reason we are getting below start up error.
startup failed: Script1.groovy: 24: unable to resolve class org.apache.poi.xssf.model.SharedStrings

i have tried in normal eclipse and able to read large excel file.

Advantage of sax parser:

Data sheets are read using a SAX parser to keep the memory footprint relatively small, so this should be able to read enormous workbooks. The styles table and the shared-string table must be kept in memory. The standard POI styles table class is used, but a custom (read-only) class is used for the shared string table because the standard POI SharedStringsTable grows very quickly with the number of unique strings.

block of code :

public List<String> process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
		ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
		XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
		StylesTable styles = xssfReader.getStylesTable();
		XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
		int index = 0;
		while (iter.hasNext()) {
			if (blankRowNum == 10)
				break;
			InputStream stream = iter.next();
			String sheetName = iter.getSheetName();
			results.add(ExcelValidator.SHEET_NAME_PREFIX + sheetName);
			processSheet(styles, strings, new SheetToCSV(), stream);
			stream.close();
			++index;
		}

		return results;
	}

public void processSheet(StylesTable styles, ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,
			InputStream sheetInputStream) throws IOException, ParserConfigurationException, SAXException {
		DataFormatter formatter = new DataFormatter();
		InputSource sheetSource = new InputSource(sheetInputStream);
		try {
			XMLReader sheetParser = SAXHelper.newXMLReader();
			ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
			sheetParser.setContentHandler(handler);
			sheetParser.parse(sheetSource);
		} catch (ParserConfigurationException e) {
			throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
		}
	}

Could you please look into this and suggest us.

Thanks a lot
Diptiranjan Panda

Could you please also share your “import” section for this block of code?

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;

import javax.xml.parsers.ParserConfigurationException;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.Styles;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

these are the import statement

in the above import below are not available in the existing jar used by our Workfusion grid dependency for that reason we are gettong start up failed exception

import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.Styles;
import org.apache.poi.xssf.model.StylesTable;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

Not sure about all these classes, but we have org.apache.poi.util.SAXHelper instead of org.apache.poi.ooxml.util.SAXHelper. Does it work for you?

1 Like

YES it is resolved issue.
but import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.Styles;
import org.apache.poi.xssf.model.StylesTable; these are very important for chunk processing. Parses and shows the content of one sheet
* using the specified styles and shared-strings tables.

so other import statement will required for opc package.

Could you please suggest on this.

1 Like

Hi @diptiranjanpanda.
I checked from our side and can advise that we don’t have this package org.apache.poi.xssf.model in WorkFusion Studio. We use poi-3.17, so perhaps these classes had been not included to it yet. You can also refer to this documentation: https://poi.apache.org/apidocs/3.17/

1 Like

Hello Lera,

Thanks for the update .but i have checked the dependency in the rpa grid of the workfusion .it is showing its as 4.00 version.

PFB screenshot for the same

Could you please tell me why all dependency of apache poi Binary Distribution is not present in the Workfusion.
All the below jars are available in binary distribution of apache poi.

For the reason we are not implementing lots of functionality of achape poi and Workfusion create Limitation for us …

Is there any specific reason that the maven build of apache poi dont not add these dependency in the workfusion.

Thanks
Diptiranjan Panda

Cannot advise you why it’s implemented such way. You can try to implement all your imports and procedure via executeGroovyScript() method like in this example:

   <config xmlns="http://web-harvest.sourceforge.net/schema/1.0/config" scriptlang="groovy">
   <robotics-flow>
   	<robot driver="desktop" close-on-completion="true"
   		start-in-private="false">
   		<capability name="SEARCH_ALL_WINDOWS" value="true" />
   		<capability name="CLOSE_ALL_WINDOWS" value="false" />
   		<script><![CDATA[    		
     executeGroovyScript('''
    import java.io.File;
   import java.io.IOException;
   import java.io.InputStream;
   import java.io.PrintStream;
   import javax.xml.parsers.ParserConfigurationException;
   import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
   import org.apache.poi.openxml4j.opc.OPCPackage;
   import org.apache.poi.openxml4j.opc.PackageAccess;
   import org.apache.poi.ss.usermodel.DataFormatter;
   import org.apache.poi.ss.util.CellAddress;
   import org.apache.poi.ss.util.CellReference;
   import org.apache.poi.ooxml.util.SAXHelper;
   import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
   import org.apache.poi.xssf.eventusermodel.XSSFReader;
   import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
   import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
   import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
   import org.apache.poi.xssf.model.SharedStrings;
   import org.apache.poi.xssf.model.Styles;
   import org.apache.poi.xssf.model.StylesTable;
   import org.apache.poi.xssf.usermodel.XSSFComment;
   import org.xml.sax.ContentHandler;
   import org.xml.sax.InputSource;
   import org.xml.sax.SAXException;
   import org.xml.sax.XMLReader;
    
    public List<String> process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
   		ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
   		XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
   		StylesTable styles = xssfReader.getStylesTable();
   		XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
   		int index = 0;
   		while (iter.hasNext()) {
   			if (blankRowNum == 10)
   				break;
   			InputStream stream = iter.next();
   			String sheetName = iter.getSheetName();
   			results.add(ExcelValidator.SHEET_NAME_PREFIX + sheetName);
   			processSheet(styles, strings, new SheetToCSV(), stream);
   			stream.close();
   			++index;
   		}

   		return results;
   	}
   ''')	
   	]]></script>
   	</robot>
   </robotics-flow>
   <export include-original-data="true"></export>
</config>
1 Like