Friday, May 13, 2011

Java Text File Splitter Class

Here is a Java class that splits a text file into multiple files using a supplied string as a separator.

It accepts two arguments in the constructor: a path to the file, and a token to be used for separation.

Usage example:

TextFileSplitter tfs = new TextFileSplitter('myfile.txt', 'Section');
tfs.run();

It will create a sub-directory in the same directory as the file where it will place the resulting files.

This is a very basic version that does not throw any exceptions.

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;

public class TextFileSplitter {

 private static String fullPath ;
 private static String token;

 public TextFileSplitter(String path, String token) {
  this.fullPath = path;
  this.token = token;
 }


 public void run(){

  ArrayList al = splitIntoTokens(fullPath, token);

  if(al.size()==0){
   return;
  }
  String dir = getDirectoryFromFileName(fullPath);

  int pageIndex =1;
  Iterator iter = al.listIterator();
  String name = getFileName(fullPath);

  while(iter.hasNext()){
   String str = (String)iter.next();
   String fileName = dir+"\\"+ name + "_"+ pageIndex + ".txt";
   try {
    System.out.println(fileName);
    writeFile(fileName, str);
   }
   catch (IOException e) {
    e.printStackTrace();
   }
   pageIndex++;
  }
 }


 private String getFileName(String path){

  String fileName = null;
  String separator = File.separator;

  int pos = path.lastIndexOf(separator);
  int pos2 = path.lastIndexOf(".");

  if(pos2>-1)
   fileName =path.substring(pos+1, pos2);
  else
   fileName =path.substring(pos+1);

  return fileName;
 }

 private String getDirectoryFromFileName(String fname) {

  String parent =  (new File(fname).getParent());

  String name = getFileName(fname);

  String dirPath = parent+"\\" + name;

  File dir = new File(dirPath);

  if(!dir.exists()){
   boolean success = dir.mkdir();
   if (!success) {
    System.out.println("Directory creation failed");
    return null;
   }
  }
  return dirPath;
 }




 private  void writeFile(String fname, String str) throws IOException{
  BufferedWriter out=new BufferedWriter ( new FileWriter(fname));
  out.write(str);
  out.close();
 }
 
 //splits a file into an ArrayList of strings using the separator passed
 
 private ArrayList splitIntoTokens(String fname, String token){
  String line;
  StringBuffer sb = new StringBuffer();
  ArrayList al = new ArrayList();
  String lineSeparator =  System.getProperty("line.separator");
  try {
   FileInputStream fis = new FileInputStream(fname);
            BufferedReader reader=
              new BufferedReader
                (new InputStreamReader(fis));

            while((line = reader.readLine()) != null) {
    if((line.indexOf(token))>-1){
     al.add(sb.toString());
     sb = new StringBuffer();
    }
    sb.append(line + lineSeparator);
   }
            //add last section
            al.add(sb.toString());
   reader.close();
  }
  catch (IOException e) {
            System.err.println("*** IOexception ***" + e.getMessage());
  }
  return al;

 }
}

No comments:

Post a Comment