Fetch an image from mail body and using OCR extract all the details from image



I am trying to fetch image links from mail body and perform OCR on it. Below is sample mail body which consists an image

I have used org.apache.poi.hsmf.MAPIMessage api, to get the mail body as text e.g


Can you please suggest how to extract image links and perform OCR on it.


Hi @sd00465077 do you use drag-and-drop Recorder or do you code in WorkFusion Studio?


i am using code for this…please find below my code snippet

<?xml version="1.0" encoding="UTF-8"?>
<config xmlns="http://web-harvest.sourceforge.net/schema/1.0/config"

		import org.apache.commons.vfs2.FileType;
		import java.io.File;
		import java.util.ArrayList;
		import org.apache.poi.hsmf.MAPIMessage;
		import org.apache.poi.hsmf.datatypes.AttachmentChunks;
		import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
		import java.io.FilenameFilter;
		import java.nio.file.Files;
		import org.apache.poi.ss.usermodel.Cell;
		import org.apache.poi.ss.usermodel.Row;
		import org.apache.poi.ss.usermodel.Sheet;
		import org.apache.poi.ss.usermodel.Workbook;
		import org.apache.poi.ss.util.CellReference;
		import org.apache.poi.xssf.streaming.SXSSFWorkbook;
		import com.google.gson.Gson;

		def filepath= "C:\\Users\\sd00465077\\Desktop\\Werner_emails\\";

			File file = new File(filepath);

			File[] files = file.listFiles(new FilenameFilter() {

						public boolean accept(File dir, String name) {
								return true;
							} else {
								return false;

			for(File f:files){
				def filename =f.getName();


			data = new ArrayList()
			sys.defineVariable("data", data);

			for(File f:files) {
				def filename = f.getName();

				Map rec = new HashMap();
				MAPIMessage msg = new MAPIMessage(filepath + filename);
				String attDirName = filename + "-att";

				rec.put("From", msg.getDisplayFrom().toString());
				rec.put("To",	msg.getDisplayTo().toString());
				rec.put("CC", msg.getDisplayCC().toString());
				rec.put("Subject", msg.getSubject().toString());
				rec.put("Email body", msg.getTextBody().toString());

				def Inline_attachment = msg.getTextBody().contains(".png")|msg.getTextBody().contains(".jpeg")|msg.getTextBody().contains(".jpg");

				rec.put("Email inline attachments", Inline_attachment);

				AttachmentChunks[] attachments = msg.getAttachmentFiles();
				if(attachments.length == 0){
					rec.put("attachments", "no attachments");
				if(attachments.length > 0) {

					def temp_dir = "C:\\Users\\sd00465077\\Desktop\\Werner_emails\\Temp\\" + filename
					sys.defineVariable("temp_dir", temp_dir);

					File d = new File(temp_dir);

					if(d.mkdir()) {
						for(AttachmentChunks attachment : attachments) {
							String fileName1 = attachment.getAttachFileName().toString();

							if(attachment.getAttachLongFileName() != null) {
								fileName1 = attachment.getAttachLongFileName().toString();

							rec.put("attachments", fileName1);
							File fi = new File(d, fileName1);

							OutputStream fileOut = null;
							try {
								fileOut = new FileOutputStream(fi);
							} finally {
								if(fileOut != null) {
					} else {

						System.err.println("Can't create directory "+ attDirName);

		catch (Exception e){
			System.out.println("Exception raised "+e.getMessage());
			mail_extract = new groovy.json.JsonBuilder(data)

		mail_extract = new groovy.json.JsonBuilder(data)

	<var-def name="CSV_report">
		<file path="CSV_report.csv" action="write" type="binary">

	<export include-original-data="false">
		<!-- <single-column name="Attachment_links" value="${fileOut}" /> -->