// File created: 2007-10-23 12:06:14

package ope.adventure.parsers;

import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.InputMismatchException;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import ope.adventure.book.Book;
import ope.adventure.book.BookSection;

import ope.adventure.util.Utils;

public final class BookParser {
	private BookParser() {}

	private static String  shortTitle;
	private static byte    sanityNeed, sanityCost;
	private static boolean referenced = true, browseable = true;

	private static Map<String, List<BookSection>> contents;

	private static final Pattern
		PARAGRAPH_REGEX = Pattern.compile("(?m)^$"),
		KEYWORD_REGEX   = Pattern.compile("[*/_]");

	public static String[] getParagraphs(final String s) {
		return BookParser.PARAGRAPH_REGEX.split(s);
	}

	public static Book parse(final String s) {
		contents = new HashMap<String, List<BookSection>>();

		final Scanner scan = new Scanner(s);

		final String name = readSection(scan);
		final String desc = readSection(scan);

		shortTitle = Utils.lineSplit(name)[0];

		scan.useDelimiter(Utils.EOL_REGEX);

		if (!scan.hasNextInt())
			throw new BadBookException(
				"expected section count following title and description");

		for (int sectionNum = scan.nextInt(); sectionNum-- > 0;)
			parseKeywords(readSection(scan));

		readMetadata(scan);

		return new Book(
			name, desc, shortTitle,
			sanityNeed, sanityCost,
			referenced, browseable,
			contents);
	}

	private static String readSection(final Scanner scan) {
		final StringBuilder section = new StringBuilder();

		scan.useDelimiter(Utils.EOL_REGEX);

		if (!scan.hasNextInt())
			throw new BadBookException(
				"expected paragraph count to precede section");

		int paragraphNum = scan.nextInt();

		scan.useDelimiter(PARAGRAPH_REGEX);

		while (paragraphNum-- > 0) {
			if (!scan.hasNext())
				throw new BadBookException(
					"expected " + (paragraphNum+1) + " more paragraphs");

			section.append(scan.next());
		}

		return section.toString().trim();
	}

	private static void parseKeywords(final String s) {

		// We need the keyword position for snippet making
		final class Kw {
			public final String kw;
			public final int pos;
			public Kw(final String k, final int p) {
				kw = k;
				pos = p;
			}
		}

		final List<Kw> keywords = new ArrayList<Kw>();
		final Set<String> added = new HashSet<String>();
		final StringBuilder sb = new StringBuilder(s);
		final Matcher matcher = KEYWORD_REGEX.matcher(sb);

		while (matcher.find()) {
			final int kwStart = matcher.end();
			int kwEnd;
			if (matcher.find())
				kwEnd = matcher.start();
			else
				kwEnd = sb.length();

			// change ** to * and the like
			if (kwStart == kwEnd) {
				sb.replace(kwStart - 1, kwEnd + 1,
					String.valueOf(sb.charAt(kwStart)));

				// matcher doesn't like a length-changing string: manual fix
				// region() resets: pass kwStart+1 instead of regionStart()
				matcher.region(kwStart+1, sb.length());
			} else {
				final String keyword = Utils.toLower(sb
					.substring(kwStart, kwEnd)
					.replaceAll("\\p{javaWhitespace}+", " "));

				if (!added.contains(keyword)) {
					added.add(keyword);
					keywords.add(new Kw(keyword, kwStart));
				}
			}
		}

		// Can't do this within the loop, since there might be more ** -> *
		// type changes. Have to do all the changes first.

		final BookSection finalSection = new BookSection(sb.toString());
		for (final Kw kw : keywords) {
			final String snippet
				= makeSnippet(finalSection.getContents(), kw.pos).trim();

			final String keyword = Utils.toLower(kw.kw);

			addKeyword(keyword, finalSection, snippet);

			final String[] subKws = keyword.split("\\s+");
			if (subKws.length > 1)
				for (final String subKw : subKws)
					addKeyword(subKw, finalSection, snippet);
		}
	}

	private static void addKeyword(
		final String kw,
		final BookSection section, final String snippet
	) {
		// add each keyword only once
		if (!section.addSnippet(snippet, kw))
			return;

		List<BookSection> sectionsWithKw = contents.get(kw);

		// first section with this keyword: add list to contents
		if (sectionsWithKw == null) {
			sectionsWithKw = new ArrayList<BookSection>();
			contents.put(kw, sectionsWithKw);
		}

		sectionsWithKw.add(section);
	}

	private static void readMetadata(final Scanner scan) {
		scan.useDelimiter(Utils.EOL_REGEX);

		while (scan.hasNext())
			useMetadata(scan.next());
	}

	private static void useMetadata(final String line) {
		final String[] words = line.split(" ");

		if (words.length == 0)
			return;

		if (words[0].equals("NO_INDEX")) {
			browseable = false;
			checkTrail(words, 1);
			return;

		} else if (words[0].equals("NOT_REFERENCED")) {
			referenced = false;
			checkTrail(words, 1);
			return;
		}

		if (words.length == 1) {
			badMetadata(line);
			return;
		}

		if (words[0].equals("SANREQ")) {
			sanityNeed = Byte.parseByte(words[1]);
			checkTrail(words, 2);

		} else if (words[0].equals("SANCOST")) {
			sanityCost = Byte.parseByte(words[1]);
			checkTrail(words, 2);

		} else
			badMetadata(line);
	}

	private static void checkTrail(final String[] words, final int maxLen) {
		if (words.length > maxLen)
			System.err.printf(
				"Book :: trailing metadata in '%s': %s\n",
				shortTitle,
				Utils.join(Arrays.copyOfRange(words, 1, words.length)));
	}

	private static void badMetadata(final String s) {
		System.err.printf(
			"Book :: unrecognized metadata in '%s': %s\n",
			shortTitle, s);
	}

	/* A snippet is just the 4 words before and after a kw, and only up to any
	 * paragraph break.
	 * It's what's output for "browse <book> <keyword>".
	 * And is surprisingly hard to figure out.
	 */
	private static String makeSnippet(
		final String s,
		final int kwPos
	) {
		// The start positions of the four words before kw
		final int[] before = new int[4];

		final Matcher m = Pattern.compile("\\s+").matcher(s);
		final Matcher parMatcher = PARAGRAPH_REGEX.matcher(s);

		// There's evidently no way of searching backward for a regex.
		// Thus we have to do it in a bit tricky way...

		// Move m to start of correct paragraph
		int begPar = 0;
		while (parMatcher.find()) {
			if (parMatcher.end() > kwPos)
				break;
			begPar = parMatcher.end();
		}
		if (!m.find(begPar))
			assert false;

		// Keep rotating the last 4 word starts into before
		for (int b = 0, prev = m.end();;) {
			if (!m.find())
				assert false;
			assert m.end() != kwPos;

			if (m.end() >= kwPos)
				break;

			before[b] = prev;
			b = (b+1) % before.length;
			prev = m.end();
		}

		Arrays.sort(before);
		for (int i = 0; i < before.length; ++i) {
			if (before[i] < begPar)
				before[i] = begPar;
			else
				break;
		}

		// Now before[0] is the start pos of our snippet.
		// But make sure there are no paragraph breaks there...
		int beg;
		int parEnd = s.length();

		if (parMatcher.find(before[0]+1)) {
			// A paragraph break exists after before[0].
			// (Evul follows: using the same var in 3 different contexts)

			beg = parMatcher.start();

			// Find where between the befores the paragraph break is.
			beg = -Arrays.binarySearch(before, beg) - 1;

			// It shouldn't be one of the befores.
			assert beg >= 0 && beg <= before.length;

			if (beg == before.length)
				// It's after all the befores, so ignore it.
				beg = before[0];
			else
				// Pick the before which is after the paragraph break.
				beg = before[beg];

			if (parMatcher.find(beg+1))
				parEnd = parMatcher.end();
		} else
			beg = before[0];

		// So now beg is the start pos of our snippet.
		// parEnd is the end of the paragraph.
		// Finding the end pos is a bit easier:
		int end = 0;
		for (int words = before.length; words-- > 0;) {
			if (m.find())
				end = m.end();
			else
				end = s.length();

			if (end >= parEnd) {
				end = parEnd;
				break;
			}
		}

		final StringBuilder sb = new StringBuilder(
			"... ".length() +
			(end - beg + 1) +
			" ...".length());

		if (beg != begPar) sb.append("... ");

		sb.append(s.substring(beg, end).trim().replaceAll("\\s+", " "));

		if (end != parEnd) sb.append(" ...");

		return sb.toString();
	}
}

final class BadBookException extends InputMismatchException {
	public BadBookException(final String msg) {
		super(msg);
	}
}
