SegmentationTransformer.java
/**
*
*/
package edu.odu.cs.extract.control;
import org.jdom.Document;
import edu.odu.cs.extract.dataflow.Dataflow;
import edu.odu.cs.extract.dataflow.QuickTransformer;
import edu.odu.cs.extract.dataflow.TransformationResult;
import edu.odu.cs.extract.inputprocessing.segmentation.Segmentation;
import edu.odu.cs.extract.utils.Properties;
/**
* Transforms a PDF file dataflow into Raw IDM by attempting a direct translation
* of text PDF, but passing pages thought to be scanned on for OCR and thenby trimming to a selected number of pages
* OCR-to-rawIDM conversion.
*
* @author zeil
*
*/
public class SegmentationTransformer extends QuickTransformer {
/**
*
*/
public SegmentationTransformer() {
super();
}
/* (non-Javadoc)
* @see edu.odu.cs.extract.dataflow.ThreadedTransformer#doTransform(edu.odu.cs.extract.dataflow.Dataflow[])
*/
@Override
public TransformationResult doTransform(Dataflow[] in) throws Exception {
String status = "success";
String message = "OK";
IDMDataflow inputDF = (IDMDataflow) in[0];
Document unsegmentedIDM = inputDF.getDocument();
String mergeFailed = unsegmentedIDM.getRootElement().getAttributeValue("OCRmerge");
if (mergeFailed != null && "failed".equals(mergeFailed)) {
status = "warning";
message = "unable to merge pages from OCR";
}
// Segment document
Document segmentedIDM = new Segmentation(unsegmentedIDM).reSegment();
IDMDataflow outputDF = new IDMDataflow (in[0].getTrace(), segmentedIDM);
return new TransformationResult(outputDF,status, message, null);
}
@Override
public String getOutputExtension() {
Properties p = Properties.getProperties();
return p.getProperty(Properties.Names.SEGMENTATION_OUT_EXT);
}
}
unformatted source
© 2015-2016, Old Dominion Univ.