import java.io.BufferedReader;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.PrintWriter;
import java.util.ArrayList;

// This depends on JSONObject.java, which you can get from
//   https://github.com/douglascrockford/JSON-java

/*
 * StringEscapePythonUtils provides two static methods that implement 
 * the string-escape encoding that Python v2.7.2 implements in 
 * stringobject.c as the __repr__ and DecodeEscape.  These convert between
 * a byte array and a python string literal.  
 * 
 * Such a string literal is useful for putting byte arrays into JSON strings,
 * which are really unicode character strings and thus assume an encoding for
 * the bytes.  When the input byte array is mostly latinate characters in
 * the range between the space character and char(127), then this escaping
 * does mostly nothing.
 * 
 * The method names are roughly modeled on Java's existing StringEscapeUtils, 
 * which provides tools for escaping/unescaping the Java and JavaScript 
 * flavors of string literal. 
 */
public class StringEscapePythonUtils {
	/*
	 * Convert an escaped string into a byte array, i.e. convert a python string
	 * literal into its corresponding byte array.
	 */
	public static byte[] unescapePython(String escapedString) throws Exception {
		// simple state machine iterates over the bytes
		// in the escapedString and converts
		byte[] escaped = escapedString.getBytes();
		byte[] unescaped = new byte[escaped.length];
		int j = 0;
		for (int i = 0; i < escaped.length; i++) {
			// if its not special then just move on
			if (escaped[i] != '\\') {
				unescaped[j] = escaped[i];
				j++;
				continue;
			}
			// if there is no next byte, throw incorrect encoding error
			if (i + 1 >= escaped.length)
				throw new Exception(
						"String incorrectly escaped, ends with escape character.");
			// deal with hex first
			if (escaped[i + 1] == 'x') {
				// if there's no next byte, throw incorrect encoding error
				if (i + 3 >= escaped.length)
					throw new Exception(
							"String incorrectly escaped, ends early with incorrect hex encoding.");
				unescaped[j] = (byte) ((Character.digit(escaped[i + 2], 16) << 4) + Character
						.digit(escaped[i + 3], 16));
				j++;
				i += 3;
			}
			// deal with n, then t, then r
			else if (escaped[i + 1] == 'n') {
				unescaped[j] = '\n';
				j++;
				i++;
			} else if (escaped[i + 1] == 't') {
				unescaped[j] = '\t';
				j++;
				i++;
			} else if (escaped[i + 1] == 'r') {
				unescaped[j] = '\r';
				j++;
				i++;
			} else if (escaped[i + 1] == '\\') {
				unescaped[j] = escaped[i + 1];
				j++;
				i++;
			} else if (escaped[i + 1] == '\'') {
				unescaped[j] = escaped[i + 1];
				j++;
				i++;
			} else {
				// invalid character
				throw new Exception(
						"String incorrectly escaped, invalid escaped character");
			}
		}
		byte[] unescapedTrim = new byte[j];
		for (int k = 0; k < j; k++) {
			unescapedTrim[k] = unescaped[k];
		}
		// return byte array, not string. Callers can convert to string.
		return unescapedTrim;
	}

	/*
	 * Converts a byte array into an escaped character string that could be used
	 * as a python string literal.
	 */
	public static String escapePython(byte[] raw) throws Exception {
		StringBuilder escaped = new StringBuilder();
		byte c;
		for (int i = 0; i < raw.length; i++) {
			c = raw[i];
			if (c == '\'') {
				escaped.append('\\');
				escaped.append('\'');
			} else if (c == '\\') {
				escaped.append('\\');
				escaped.append('\\');
			} else if (c == '\t') {
				escaped.append('\\');
				escaped.append('t');
			} else if (c == '\n') {
				escaped.append('\\');
				escaped.append('n');
			} else if (c == '\r') {
				escaped.append('\\');
				escaped.append('r');
			} else if (c < ' ' || c >= 0x7f) {
				// Outside safe range, so represent as escaped hex
				String hexEscaped;
				hexEscaped = String.format("\\x%02x", c & 0xff);
				escaped.append(hexEscaped);
			} else {
				// Just a normal character, so emit it unchanged.
				escaped.append((char) c);
			}
		}
		return escaped.toString();
	}

	/*
	 * Tests whether unescapePython can generate an output that can be written
	 * to a file on disk. Iterates over all JSON strings in a file of one
	 * JSON-string per line, and unpacks the doc.body.raw for each, and writes
	 * them all to a single file -- separated by newlines. This file can then be
	 * compared to a similar file generated by python. If it matches exactly,
	 * then we consider this test to have passed.
	 */
	public static void unescapePythonFileTest(String inPath, String outPath)
			throws Exception {
		BufferedReader in = new BufferedReader(new FileReader(inPath));
		PrintWriter out = new PrintWriter(new FileWriter(outPath));
		String rawLine; // entire JSON string below (one per line in file)
		String rawBody; // string-escaped body content from inside JSON object
		JSONObject jo;
		JSONObject body;
		while (in.ready()) {
			rawLine = in.readLine();
			jo = new JSONObject(rawLine);
			body = (JSONObject) jo.get("body");
			rawBody = (String) body.get("raw");
			// write unescaped byte array to file, followed by a newline
			System.out.println(rawBody);
			out.println(new String(unescapePython(rawBody)));
		}
		out.close();
		in.close();
	}

	/*
	 * Reads first JSON doc from inPath and verifies that escape(unescape(str))
	 * is the identity.
	 */
	public static void identityEscapePythonTest(String inPath, String outPath)
			throws Exception {
		BufferedReader in = new BufferedReader(new FileReader(inPath));
		PrintWriter out = new PrintWriter(new FileWriter(outPath));
		String rawLine; // entire JSON string below (one per line in file)
		String rawBody; // string-escaped body content from inside JSON object
		JSONObject jo;
		JSONObject body;
		rawLine = in.readLine();
		jo = new JSONObject(rawLine);
		body = (JSONObject) jo.get("body");
		rawBody = ((String) body.get("raw"));
		byte[] unescaped = unescapePython(rawBody);
		String reescaped = escapePython(unescaped);
		body.put("raw", reescaped);
		out.println(jo.toString());
		out.close();
		in.close();
		for (int i = 0; i < reescaped.length(); i++) {
			if (reescaped.charAt(i) != rawBody.charAt(i)) {
				System.out.println(i);
				System.out.println(rawBody.substring(i - 20, i + 20));
				System.out.println(unescaped.toString().substring(i - 20,
						i + 20));
				System.out.println(reescaped.substring(i - 20, i + 20));
				System.out.println("\n");
			}
		}
	}

	/*
	 * Run both the unescape test and identity test
	 */
	public static void main(String[] args) throws Exception {
		String inPath = "part-01359";
		String outPath = "part-01359-transformed";
		System.err.println("Generating " + outPath);
		unescapePythonFileTest(inPath, outPath);

		String outPath2 = "part-01359-identity";
		System.err.println("Generating " + outPath2);
		identityEscapePythonTest(inPath, outPath2);

		System.out.println("success");
	}
}