#include <cmath>
#include <fstream>
#include <iostream>
#include <iomanip>
#include "AI_Player_Impl.h"
#include "LogicPlayer.h"

using namespace std;

AI_Player_Impl::AI_Player_Impl(LogicPlayer* _player) : AI_Player(p_standard), player(_player) 
{
}

void AI_Player_Impl::load(PLAYER_ID player_id)
{
	ostringstream filename;
	filename << "conf\\p" << player_id << ".ai_params";
	ifstream ai_params(filename.str().c_str());

	if (!ai_params)
	{
		ai_params.close();
		ai_params.clear();
		ai_params.open("conf\\default.ai_params");
	}

	string buf;
	while (getline(ai_params, buf))
	{
		key_value kv(buf);

		if (kv.key == "H")
		{
			string_to(kv.value, H);
		}
		else if (kv.key == "DR")
		{
			string_to(kv.value, dr);
		}
		else if (kv.key == "START_LR")
		{
			string_to(kv.value, start_lr);
		}
		else if (kv.key == "END_LR")
		{
			string_to(kv.value, end_lr);
		}
		else if (kv.key == "DL")
		{
			string_to(kv.value, dL);
		}
		else if (kv.key == "LAMBDA")
		{
			string_to(kv.value, lambda);
		}
	}
	
	params["H"] = H;
	params["DR"] = dr;
	params["START_LR"] = start_lr;
	params["END_LR"] = end_lr;
	params["DL"] = dL;
	params["LAMBDA"] = lambda;

	ai_params.close();
}

MOVE_ID AI_Player_Impl::current_move() 
{ 
	if (player->history->empty()) return -1;
	return player->history->front()->move; 
}

MOVE_ID AI_Player_Impl::calculate_next_move()
{
	AI_StateInspector* si = player->state_inspector;
	AI_State state;	si->get_state(state);
	AI_QValues* qvalues = player->qvalues;
	AI_History* history = player->history;

	vector<MOVE_ID>& moves = player->moves;
	q_info qinfo = qvalues->get_qinfo(state);
	MQV_Map& mqv_map = qinfo.mqv_map;

	float lr = qinfo.lr;
		
	MOVE_ID next_move = si->get_state_value("USER_MOVE_CHOICE");
	STATE_VALUE best_q=0;
	if (next_move == NULL_MOVE)
	{
		std::vector<pair<int, double> > val(moves.size());

		MQV_iter it = mqv_map.begin();
		MQV_iter end = mqv_map.end();
		best_q = -INT_MAX;

		for (int i=0; i < moves.size(); i++)
		{
			if ((it == end) || (*it).first != moves[i])
			{
				val[i] = make_pair(i, 0);
				best_q = (best_q >= 0 ? best_q : 0);
				if (it != end && (*it).first < moves[i])
				{
					MQV_iter tmp = it++;
					mqv_map.erase(tmp);
				}
			}
			else
			{
				qr_info qr = (*it).second;
				best_q = (best_q >= qr.qvalue ? best_q : qr.qvalue);
				val[i] = make_pair(i, qr.qvalue);
				++it;
			}
		}

		random_shuffle(val.begin(), val.end());
		int index = 0;
		while ((rand()/(double)RAND_MAX) > exp((val[index].second-best_q)/(lambda*lr*best_q))) ++index;
		next_move = moves[val[index].first];
	}

	if (!history->empty())
	{
		history_info* hist_info = history->front();
		hist_info->obj_delta += si->get_state_value("OBJ_DELTA") + si->get_state_value("USER_REINFORCEMENT");
	
		hist_iter it = history->begin();
		hist_iter end = history->end();

		STATE_VALUE reinforcement = si->get_state_value("USER_REINFORCEMENT");
		STATE_VALUE nq = best_q;

		while (it != end)
		{
			hist_info = *it;
			//qvalues->update_reward(hist_info->state, hist_info->move, reinforcement);
			//hist_info->obj_delta += reinforcement;
			nq = hist_info->obj_delta + dr*nq;
			it++;
		}
		
		if (history->size() == H)
		{
			hist_info = history->back();
			history->pop_back();
			qvalues->update_qvalue(hist_info->state, hist_info->move, nq);
			delete hist_info;
		}
	}
	
	history->push_front(new history_info(next_move, state, mqv_map[next_move].reward));

	return next_move;
}