
#pragma once

#ifdef __cplusplus

} // pause: extern "C"{

#include <math.h>
//float  __cdecl sqrtf( float );

#pragma warning(push)
#pragma warning(disable : 4127)

const TMatrix4x4f TMATRIX4X4F_IDENTITY =
{
	1.0f, 0.0f, 0.0f, 0.0f,
	0.0f, 1.0f, 0.0f, 0.0f,
	0.0f, 0.0f, 1.0f, 0.0f,
	0.0f, 0.0f, 0.0f, 1.0f
};

inline TMatrix4x4f* Matrix4x4fMultiply(TMatrix4x4f const& a_tLeft, TMatrix4x4f const& a_tRight, TMatrix4x4f* a_pOut)
{
	a_pOut->_11 = a_tLeft._11*a_tRight._11 + a_tLeft._12*a_tRight._21 + a_tLeft._13*a_tRight._31 + a_tLeft._14*a_tRight._41;
	a_pOut->_12 = a_tLeft._11*a_tRight._12 + a_tLeft._12*a_tRight._22 + a_tLeft._13*a_tRight._32 + a_tLeft._14*a_tRight._42;
	a_pOut->_13 = a_tLeft._11*a_tRight._13 + a_tLeft._12*a_tRight._23 + a_tLeft._13*a_tRight._33 + a_tLeft._14*a_tRight._43;
	a_pOut->_14 = a_tLeft._11*a_tRight._14 + a_tLeft._12*a_tRight._24 + a_tLeft._13*a_tRight._34 + a_tLeft._14*a_tRight._44;
	a_pOut->_21 = a_tLeft._21*a_tRight._11 + a_tLeft._22*a_tRight._21 + a_tLeft._23*a_tRight._31 + a_tLeft._24*a_tRight._41;
	a_pOut->_22 = a_tLeft._21*a_tRight._12 + a_tLeft._22*a_tRight._22 + a_tLeft._23*a_tRight._32 + a_tLeft._24*a_tRight._42;
	a_pOut->_23 = a_tLeft._21*a_tRight._13 + a_tLeft._22*a_tRight._23 + a_tLeft._23*a_tRight._33 + a_tLeft._24*a_tRight._43;
	a_pOut->_24 = a_tLeft._21*a_tRight._14 + a_tLeft._22*a_tRight._24 + a_tLeft._23*a_tRight._34 + a_tLeft._24*a_tRight._44;
	a_pOut->_31 = a_tLeft._31*a_tRight._11 + a_tLeft._32*a_tRight._21 + a_tLeft._33*a_tRight._31 + a_tLeft._34*a_tRight._41;
	a_pOut->_32 = a_tLeft._31*a_tRight._12 + a_tLeft._32*a_tRight._22 + a_tLeft._33*a_tRight._32 + a_tLeft._34*a_tRight._42;
	a_pOut->_33 = a_tLeft._31*a_tRight._13 + a_tLeft._32*a_tRight._23 + a_tLeft._33*a_tRight._33 + a_tLeft._34*a_tRight._43;
	a_pOut->_34 = a_tLeft._31*a_tRight._14 + a_tLeft._32*a_tRight._24 + a_tLeft._33*a_tRight._34 + a_tLeft._34*a_tRight._44;
	a_pOut->_41 = a_tLeft._41*a_tRight._11 + a_tLeft._42*a_tRight._21 + a_tLeft._43*a_tRight._31 + a_tLeft._44*a_tRight._41;
	a_pOut->_42 = a_tLeft._41*a_tRight._12 + a_tLeft._42*a_tRight._22 + a_tLeft._43*a_tRight._32 + a_tLeft._44*a_tRight._42;
	a_pOut->_43 = a_tLeft._41*a_tRight._13 + a_tLeft._42*a_tRight._23 + a_tLeft._43*a_tRight._33 + a_tLeft._44*a_tRight._43;
	a_pOut->_44 = a_tLeft._41*a_tRight._14 + a_tLeft._42*a_tRight._24 + a_tLeft._43*a_tRight._34 + a_tLeft._44*a_tRight._44;
	return a_pOut;
}

template<int t_nRow, int t_nCol>
inline float SubDeterminant(TMatrix4x4f const& a_tMatrix)
{
	static int const aColIndices[] =
	{
		1, 2, 3,
		0, 2, 3,
		0, 1, 3,
		0, 1, 2
	};
	int const * const pCol = aColIndices+t_nCol*3;

	static int const aRowIndices[] =
	{
		4, 8, 12,
		0, 8, 12,
		0, 4, 12,
		0, 4, 8
	};
	int const * const pRow = aRowIndices+t_nRow*3;

	float const* const pMatrix = reinterpret_cast<float const*>(&a_tMatrix);

	return pMatrix[pRow[0]+pCol[0]] * pMatrix[pRow[1]+pCol[1]] * pMatrix[pRow[2]+pCol[2]] +
		   pMatrix[pRow[1]+pCol[0]] * pMatrix[pRow[2]+pCol[1]] * pMatrix[pRow[0]+pCol[2]] +
		   pMatrix[pRow[2]+pCol[0]] * pMatrix[pRow[0]+pCol[1]] * pMatrix[pRow[1]+pCol[2]] -
		   pMatrix[pRow[0]+pCol[2]] * pMatrix[pRow[1]+pCol[1]] * pMatrix[pRow[2]+pCol[0]] -
		   pMatrix[pRow[1]+pCol[2]] * pMatrix[pRow[2]+pCol[1]] * pMatrix[pRow[0]+pCol[0]] -
		   pMatrix[pRow[2]+pCol[2]] * pMatrix[pRow[0]+pCol[1]] * pMatrix[pRow[1]+pCol[0]];
}

inline TMatrix4x4f* Matrix4x4fInverse(TMatrix4x4f const& a_tOrig, TMatrix4x4f* a_pOut)
{
	float f = 1.0f /
		(a_tOrig._44*SubDeterminant<3,3>(a_tOrig) - a_tOrig._43*SubDeterminant<3,2>(a_tOrig) +
		 a_tOrig._42*SubDeterminant<3,1>(a_tOrig) - a_tOrig._41*SubDeterminant<3,0>(a_tOrig));
	a_pOut->_11 = f * SubDeterminant<0,0>(a_tOrig);
	a_pOut->_12 = -f * SubDeterminant<1,0>(a_tOrig);
	a_pOut->_13 = f * SubDeterminant<2,0>(a_tOrig);
	a_pOut->_14 = -f * SubDeterminant<3,0>(a_tOrig);
	a_pOut->_21 = -f * SubDeterminant<0,1>(a_tOrig);
	a_pOut->_22 = f * SubDeterminant<1,1>(a_tOrig);
	a_pOut->_23 = -f * SubDeterminant<2,1>(a_tOrig);
	a_pOut->_24 = f * SubDeterminant<3,1>(a_tOrig);
	a_pOut->_31 = f * SubDeterminant<0,2>(a_tOrig);
	a_pOut->_32 = -f * SubDeterminant<1,2>(a_tOrig);
	a_pOut->_33 = f * SubDeterminant<2,2>(a_tOrig);
	a_pOut->_34 = -f * SubDeterminant<3,2>(a_tOrig);
	a_pOut->_41 = -f * SubDeterminant<0,3>(a_tOrig);
	a_pOut->_42 = f * SubDeterminant<1,3>(a_tOrig);
	a_pOut->_43 = -f * SubDeterminant<2,3>(a_tOrig);
	a_pOut->_44 = f * SubDeterminant<3,3>(a_tOrig);
	return a_pOut;
}

template<class TMatrix>
TVector3f TransformVector3(TMatrix const& a_tMatrix, TVector3f const& a_tVector)
{
	float const fW = 1.0f/(MatrixItemf<0, 3>(a_tMatrix)*a_tVector.x + MatrixItemf<1, 3>(a_tMatrix)*a_tVector.y + MatrixItemf<2, 3>(a_tMatrix)*a_tVector.z + MatrixItemf<3, 3>(a_tMatrix));
	TVector3f t =
	{
		fW*(MatrixItemf<0, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<1, 0>(a_tMatrix)*a_tVector.y + MatrixItemf<2, 0>(a_tMatrix)*a_tVector.z + MatrixItemf<3, 0>(a_tMatrix)),
		fW*(MatrixItemf<0, 1>(a_tMatrix)*a_tVector.x + MatrixItemf<1, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<2, 1>(a_tMatrix)*a_tVector.z + MatrixItemf<3, 1>(a_tMatrix)),
		fW*(MatrixItemf<0, 2>(a_tMatrix)*a_tVector.x + MatrixItemf<1, 2>(a_tMatrix)*a_tVector.y + MatrixItemf<2, 2>(a_tMatrix)*a_tVector.z + MatrixItemf<3, 2>(a_tMatrix))
	};
	return t;
}

template<class TMatrix>
TVector3f TransformDirectionVector3Inv(TMatrix const& a_tMatrix, TVector3f const& a_tVector)
{
	TVector3f t =
	{
		MatrixItemf<0, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<0, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<0, 2>(a_tMatrix)*a_tVector.z,
		MatrixItemf<1, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<1, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<1, 2>(a_tMatrix)*a_tVector.z,
		MatrixItemf<2, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<2, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<2, 2>(a_tMatrix)*a_tVector.z
	};
	return t;
}

template<class TMatrix>
TVector3f TransformDirectionVector3(TMatrix const& a_tMatrix, TVector3f const& a_tVector)
{
	TMatrix4x4f x1;
	MatrixAssign(&x1, a_tMatrix);
	TMatrix4x4f x2;
	Matrix4x4fInverse(x1, &x2);
	return TransformDirectionVector3Inv(x2, a_tVector);
}

template<class TMatrix>
TVector3f TransformDirectionVector3InvLen(TMatrix const& a_tMatrix, TVector3f const& a_tVector)
{
	float fW = 1.0f/(MatrixItemf<3, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<3, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<3, 2>(a_tMatrix)*a_tVector.z);
	if (fW == 0.0f) fW = 1.0f;
	TVector3f t =
	{
		fW*(MatrixItemf<0, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<0, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<0, 2>(a_tMatrix)*a_tVector.z),
		fW*(MatrixItemf<1, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<1, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<1, 2>(a_tMatrix)*a_tVector.z),
		fW*(MatrixItemf<2, 0>(a_tMatrix)*a_tVector.x + MatrixItemf<2, 1>(a_tMatrix)*a_tVector.y + MatrixItemf<2, 2>(a_tMatrix)*a_tVector.z)
	};
	return t;
}

template<class TMatrix>
TVector3f TransformDirectionVector3Len(TMatrix const& a_tMatrix, TVector3f const& a_tVector)
{
	TMatrix4x4f x1;
	MatrixAssign(&x1, a_tMatrix);
	TMatrix4x4f x2;
	Matrix4x4fInverse(x1, &x2);
	return TransformDirectionVector3InvLen(x2, a_tVector);
}

// Matrix operations using expression templates

// item accessor for TMatrix4x4f - compatibility with "Matrix" is defined by this accessor, not by TMatrix4x4f
template<int t_nRow, int t_nCol>
float const& MatrixItemf(TMatrix4x4f const& a_tMatrix)
{
	return reinterpret_cast<float const*>(&a_tMatrix)[(t_nRow<<2) + t_nCol];
}

// Matrix multiplication
template<typename TLeftOp, typename TRightOp>
class CMatrix4x4Mulf
{
public:
	CMatrix4x4Mulf(TLeftOp const& a_tLeft, TRightOp const& a_tRight) :
		m_tLeft(a_tLeft), m_tRight(a_tRight)
	{
	}

	template<int t_nRow, int t_nCol, class TLeftOp, class TRightOp>
	friend float MatrixItemf(CMatrix4x4Mulf<TLeftOp, TRightOp> const& a_tMatrix);

private:
	TLeftOp const& m_tLeft;
	TRightOp const& m_tRight;
};

template<int t_nRow, int t_nCol, class TLeftOp, class TRightOp>
float MatrixItemf(CMatrix4x4Mulf<TLeftOp, TRightOp> const& a_tMatrix)
{
	return
		MatrixItemf<0, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 0>(a_tMatrix.m_tLeft) +
		MatrixItemf<1, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 1>(a_tMatrix.m_tLeft) +
		MatrixItemf<2, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 2>(a_tMatrix.m_tLeft) +
		MatrixItemf<3, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 3>(a_tMatrix.m_tLeft);
}

template<typename TRightOp>
class CMatrix4x4MulCachef
{
public:
	CMatrix4x4MulCachef(TMatrix4x4f const& a_tLeft, TRightOp const& a_tRight) :
		m_tLeft(a_tLeft), m_tRight(a_tRight)
	{
	}

//	template<int t_nRow, int t_nCol, class TLeftOp, class TRightOp2>
//	friend float MatrixItemf(CMatrix4x4MulCachef<TRightOp2> const& a_tMatrix);
//
//private:
	TMatrix4x4f const m_tLeft;
	TRightOp const& m_tRight;
};

template<int t_nRow, int t_nCol, class TRightOp>
float MatrixItemf(CMatrix4x4MulCachef<TRightOp> const& a_tMatrix)
{
	return
		MatrixItemf<0, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 0>(a_tMatrix.m_tLeft) +
		MatrixItemf<1, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 1>(a_tMatrix.m_tLeft) +
		MatrixItemf<2, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 2>(a_tMatrix.m_tLeft) +
		MatrixItemf<3, t_nCol>(a_tMatrix.m_tRight)*MatrixItemf<t_nRow, 3>(a_tMatrix.m_tLeft);
}

// Matrix translation
class CTranslationf
{
public:
	CTranslationf(float a_fX, float a_fY, float a_fZ) : m_fX(a_fX), m_fY(a_fY), m_fZ(a_fZ)
	{
	}

	template<int t_nRow, int t_nCol>
	friend float MatrixItemf(CTranslationf const& a_cTrans);

	CTranslationf operator ~() const
	{
		return CTranslationf(-m_fX, -m_fY, -m_fZ);
	}

private:
	float m_fX;
	float m_fY;
	float m_fZ;
};

template<int t_nRow, int t_nCol>
inline float MatrixItemf(CTranslationf const& a_cTrans)
{
	if (t_nRow == t_nCol)
		return 1.0f;
	if (t_nRow != 3)
		return 0.0f;
	if (t_nCol == 0)
		return a_cTrans.m_fX;
	if (t_nCol == 1)
		return a_cTrans.m_fY;
	if (t_nCol == 2)
		return a_cTrans.m_fZ;
}

// Matrix scale
class CScalef
{
public:
	explicit CScalef(float a_fScale)
	{
		m_fScale[0] = m_fScale[1] = m_fScale[2] = a_fScale;
	}
	CScalef(float a_fScaleX, float a_fScaleY, float a_fScaleZ)
	{
		m_fScale[0] = a_fScaleX;
		m_fScale[1] = a_fScaleY;
		m_fScale[2] = a_fScaleZ;
	}

	template<int t_nRow, int t_nCol>
	friend float MatrixItemf(CScalef const& a_cScale);

	CScalef operator ~() const
	{
		return CScalef(1.0f/m_fScale[0], 1.0f/m_fScale[1], 1.0f/m_fScale[2]);
	}

private:
	float m_fScale[3];
};

template<int t_nRow, int t_nCol>
inline float MatrixItemf(CScalef const& a_cTrans)
{
	if (t_nRow != t_nCol)
		return 0.0f;
	return t_nRow == 3 ? 1.0f : a_cTrans.m_fScale[t_nRow];
}

// Matrix rotation (using quaternions)
const TQuaternionf TQUATERNIONF_IDENTITY = {0.0f, 0.0f, 0.0f, 1.0f};

class CQuaternionf
{
public:
	CQuaternionf() : m_t(TQUATERNIONF_IDENTITY) {}
	CQuaternionf(TQuaternionf const& a_t) : m_t(a_t) {}
	CQuaternionf(float const a_fAxisX, float const a_fAxisY, float const a_fAxisZ, float const a_fAngle)
	{
		float const f = sinf(0.5f*a_fAngle)/sqrtf(a_fAxisX*a_fAxisX + a_fAxisY*a_fAxisY + a_fAxisZ*a_fAxisZ);
		m_t.fW = cosf(0.5f*a_fAngle);
		m_t.fX = a_fAxisX*f;
		m_t.fY = a_fAxisY*f;
		m_t.fZ = a_fAxisZ*f;
	}
	CQuaternionf(float const* a_fAxis, float const a_fAngle)
	{
		float f = sinf(0.5f*a_fAngle)/sqrtf(a_fAxis[0]*a_fAxis[0] + a_fAxis[1]*a_fAxis[1] + a_fAxis[2]*a_fAxis[2]);
		m_t.fW = cosf(0.5f*a_fAngle);
		m_t.fX = a_fAxis[0]*f;
		m_t.fY = a_fAxis[1]*f;
		m_t.fZ = a_fAxis[2]*f;
	}
	CQuaternionf(CQuaternionf const& a_c) : m_t(a_c.m_t) {}
	explicit CQuaternionf(TMatrix4x4f a_tRotation)
	{
		// remove scale factor from matrix
		float fTmp = 1.0f/sqrtf(a_tRotation._11*a_tRotation._11 + a_tRotation._12*a_tRotation._12 + a_tRotation._13*a_tRotation._13);
		a_tRotation._11 *= fTmp; a_tRotation._12 *= fTmp; a_tRotation._13 *= fTmp;
		fTmp = 1.0f/sqrtf(a_tRotation._21*a_tRotation._21 + a_tRotation._22*a_tRotation._22 + a_tRotation._23*a_tRotation._23);
		a_tRotation._21 *= fTmp; a_tRotation._22 *= fTmp; a_tRotation._23 *= fTmp;
		fTmp = 1.0f/sqrtf(a_tRotation._31*a_tRotation._31 + a_tRotation._32*a_tRotation._32 + a_tRotation._33*a_tRotation._33);
		a_tRotation._31 *= fTmp; a_tRotation._32 *= fTmp; a_tRotation._33 *= fTmp;

		float fT = a_tRotation._11 + a_tRotation._22 + a_tRotation._33 + 1.0f;
		if (fT > 1e-4f)
		{
			float const fSqrT = sqrtf(fT);
			float const fS = 0.5f / fSqrT;
			m_t.fX = (a_tRotation._32 - a_tRotation._23) * fS;
			m_t.fY = (a_tRotation._13 - a_tRotation._31) * fS;
			m_t.fZ = (a_tRotation._21 - a_tRotation._12) * fS;
			m_t.fW = 0.5f * fSqrT;
		}
		else
		{
			if (a_tRotation._11 > a_tRotation._22 && a_tRotation._11 > a_tRotation._33)
			{
				float const fSqrT = sqrtf(1.0f + a_tRotation._11 - a_tRotation._22 - a_tRotation._33);
				float const fS = 0.5f / fSqrT;
				m_t.fX = 0.5f * fSqrT;
				m_t.fY = (a_tRotation._12 + a_tRotation._21) * fS;
				m_t.fZ = (a_tRotation._13 + a_tRotation._31) * fS;
				m_t.fW = (a_tRotation._23 - a_tRotation._32) * fS;
			}
			else if (a_tRotation._22 > a_tRotation._33)
			{
				float const fSqrT = sqrtf(1.0f + a_tRotation._22 - a_tRotation._11 - a_tRotation._33);
				float const fS = 0.5f / fSqrT;
				m_t.fX = (a_tRotation._12 + a_tRotation._21) * fS;
				m_t.fY = 0.5f * fSqrT;
				m_t.fZ = (a_tRotation._23 + a_tRotation._32) * fS;
				m_t.fW = (a_tRotation._13 - a_tRotation._31) * fS;
			}
			else
			{
				float const fSqrT = sqrtf(1.0f + a_tRotation._33 - a_tRotation._11 - a_tRotation._22);
				float const fS = 0.5f / fSqrT;
				m_t.fX = (a_tRotation._13 + a_tRotation._31) * fS;
				m_t.fY = (a_tRotation._23 + a_tRotation._32) * fS;
				m_t.fZ = 0.5f * fSqrT;
				m_t.fW = (a_tRotation._12 - a_tRotation._21) * fS;
			}
		}
	}

	template<int t_nRow, int t_nCol>
	friend float MatrixItemf(CQuaternionf const& a_tQuat);

	CQuaternionf operator +(CQuaternionf const& a_rhs) const
	{
		TQuaternionf const t1 = {m_t.fX*a_rhs.m_t.fW, m_t.fY*a_rhs.m_t.fW, m_t.fZ*a_rhs.m_t.fW};
		TQuaternionf const t2 = {a_rhs.m_t.fX*m_t.fW, a_rhs.m_t.fY*m_t.fW, a_rhs.m_t.fZ*m_t.fW};
		TQuaternionf const t3 = {
			a_rhs.m_t.fY*m_t.fZ-a_rhs.m_t.fZ*m_t.fY + t1.fX + t2.fX,
			a_rhs.m_t.fZ*m_t.fX-a_rhs.m_t.fX*m_t.fZ + t1.fY + t2.fY,
			a_rhs.m_t.fX*m_t.fY-a_rhs.m_t.fY*m_t.fX + t1.fZ + t2.fZ,
			a_rhs.m_t.fW * m_t.fW -
			a_rhs.m_t.fX * m_t.fX -
			a_rhs.m_t.fY * m_t.fY -
			a_rhs.m_t.fZ * m_t.fZ};

		return t3;
	}

	CQuaternionf const& operator +=(CQuaternionf const& a_rhs)
	{
		TQuaternionf const t1 = {m_t.fX*a_rhs.m_t.fW, m_t.fY*a_rhs.m_t.fW, m_t.fZ*a_rhs.m_t.fW};
		TQuaternionf const t2 = {a_rhs.m_t.fX*m_t.fW, a_rhs.m_t.fY*m_t.fW, a_rhs.m_t.fZ*m_t.fW};
		TQuaternionf const t3 = {
			a_rhs.m_t.fY*m_t.fZ-a_rhs.m_t.fZ*m_t.fY + t1.fX + t2.fX,
			a_rhs.m_t.fZ*m_t.fX-a_rhs.m_t.fX*m_t.fZ + t1.fY + t2.fY,
			a_rhs.m_t.fX*m_t.fY-a_rhs.m_t.fY*m_t.fX + t1.fZ + t2.fZ,
			a_rhs.m_t.fW * m_t.fW -
			a_rhs.m_t.fX * m_t.fX -
			a_rhs.m_t.fY * m_t.fY -
			a_rhs.m_t.fZ * m_t.fZ};
		m_t = t3;
		return *this;
	}
	CQuaternionf operator ~() const
	{
		TQuaternionf t = {m_t.fX, m_t.fY, m_t.fZ, -m_t.fW};
		return t;
	}

	operator TQuaternionf const&() const
	{
		return m_t;
	}

	TQuaternionf* operator&()
	{
		return &m_t;
	}

	void Normalize()
	{
		float const f = 1.0f/sqrtf(m_t.fX*m_t.fX + m_t.fY*m_t.fY + m_t.fZ*m_t.fZ + m_t.fW*m_t.fW);
		m_t.fX *= f;
		m_t.fY *= f;
		m_t.fZ *= f;
		m_t.fW *= f;
	}

private:
	TQuaternionf m_t;
};

template<int t_nRow, int t_nCol>
inline float MatrixItemf(CQuaternionf const& a_tQuat)
{
	// TODO: static assert (t_nRow >= 0 && t_nRow < 4 && t_nCol >= 0 && t_nCol < 4)
	switch (t_nRow)
	{
	case 0: switch (t_nCol) {
		case 0: return 1.0f - 2.0f * (a_tQuat.m_t.fY*a_tQuat.m_t.fY + a_tQuat.m_t.fZ*a_tQuat.m_t.fZ);
		case 1: return 2.0f * (a_tQuat.m_t.fX*a_tQuat.m_t.fY - a_tQuat.m_t.fZ*a_tQuat.m_t.fW);
		case 2: return 2.0f * (a_tQuat.m_t.fZ*a_tQuat.m_t.fX + a_tQuat.m_t.fY*a_tQuat.m_t.fW);
		case 3: return 0.0f; }
	case 1: switch (t_nCol) {
		case 0: return 2.0f * (a_tQuat.m_t.fX*a_tQuat.m_t.fY + a_tQuat.m_t.fZ*a_tQuat.m_t.fW);
		case 1: return 1.0f - 2.0f * (a_tQuat.m_t.fZ*a_tQuat.m_t.fZ + a_tQuat.m_t.fX*a_tQuat.m_t.fX);
		case 2: return 2.0f * (a_tQuat.m_t.fY*a_tQuat.m_t.fZ - a_tQuat.m_t.fX*a_tQuat.m_t.fW);
		case 3: return 0.0f; }
	case 2: switch (t_nCol) {
		case 0: return 2.0f * (a_tQuat.m_t.fZ*a_tQuat.m_t.fX - a_tQuat.m_t.fY*a_tQuat.m_t.fW);
		case 1: return 2.0f * (a_tQuat.m_t.fY*a_tQuat.m_t.fZ + a_tQuat.m_t.fX*a_tQuat.m_t.fW);
		case 2: return 1.0f - 2.0f * (a_tQuat.m_t.fY*a_tQuat.m_t.fY + a_tQuat.m_t.fX*a_tQuat.m_t.fX);
		case 3: return 0.0f; }
	case 3: switch (t_nCol) {
		case 0: return 0.0f;
		case 1: return 0.0f;
		case 2: return 0.0f;
		case 3: return 1.0f; }
	}
	return 0.0f; // just to prevent warning C4715: not all paths return value..
}

inline CQuaternionf TrackballRotation(float const a_fX1, float const a_fY1, float const a_fX2, float const a_fY2)
{
    if (a_fX1 == a_fX2 && a_fY1 == a_fY2)
	{
		return TQUATERNIONF_IDENTITY;
    }

    float const fD1 = a_fX1*a_fX1 + a_fY1*a_fY1;
	float const fZ1 = fD1 < 0.5f ? sqrtf(1-fD1) : (0.5f/sqrtf(fD1));

	float const fD2 = a_fX2*a_fX2 + a_fY2*a_fY2;
	float const fZ2 = fD2 < 0.5f ? sqrtf(1-fD2) : (0.5f/sqrtf(fD2));

	float const tAxis[3] = {a_fY2*fZ1-fZ2*a_fY1, fZ2*a_fX1-a_fX2*fZ1, a_fX2*a_fY1-a_fY2*a_fX1};

	float fT = 0.5f*sqrtf((a_fX1-a_fX2)*(a_fX1-a_fX2) + (a_fY1-a_fY2)*(a_fY1-a_fY2) + (fZ1-fZ2)*(fZ1-fZ2));
    if (fT > 1.0f) fT = 1.0f;
    if (fT < -1.0f) fT = -1.0f;

	return CQuaternionf(tAxis, 2.0f * asinf(fT));
}


// Matrix 'cache' - can hold intermediate results

class CMatrix4x4f
{
public:
	CMatrix4x4f() : m_tData(TMATRIX4X4F_IDENTITY)
	{
	}
	CMatrix4x4f(TMatrix4x4f const& a_tOrig) : m_tData(a_tOrig)
	{
	}
	CMatrix4x4f(CMatrix4x4f const& a_cOrig) : m_tData(a_cOrig.m_tData)
	{
	}
	template<class TMatrix>
	explicit CMatrix4x4f(TMatrix const& a_cOrig)
	{
		operator=(a_cOrig);
	}
	CMatrix4x4f& operator =(CMatrix4x4f const& a_tRightOp)
	{
		m_tData = a_tRightOp.m_tData;
		return *this;
	}
	template<class TMatrix>
	inline CMatrix4x4f& operator=(TMatrix const& a_tRightOp)
	{
		m_tData._11 = MatrixItemf<0, 0>(a_tRightOp);
		m_tData._12 = MatrixItemf<0, 1>(a_tRightOp);
		m_tData._13 = MatrixItemf<0, 2>(a_tRightOp);
		m_tData._14 = MatrixItemf<0, 3>(a_tRightOp);
		m_tData._21 = MatrixItemf<1, 0>(a_tRightOp);
		m_tData._22 = MatrixItemf<1, 1>(a_tRightOp);
		m_tData._23 = MatrixItemf<1, 2>(a_tRightOp);
		m_tData._24 = MatrixItemf<1, 3>(a_tRightOp);
		m_tData._31 = MatrixItemf<2, 0>(a_tRightOp);
		m_tData._32 = MatrixItemf<2, 1>(a_tRightOp);
		m_tData._33 = MatrixItemf<2, 2>(a_tRightOp);
		m_tData._34 = MatrixItemf<2, 3>(a_tRightOp);
		m_tData._41 = MatrixItemf<3, 0>(a_tRightOp);
		m_tData._42 = MatrixItemf<3, 1>(a_tRightOp);
		m_tData._43 = MatrixItemf<3, 2>(a_tRightOp);
		m_tData._44 = MatrixItemf<3, 3>(a_tRightOp);
		return *this;
	}

	operator TMatrix4x4f const&() const
	{
		return m_tData;
	}
	operator TMatrix4x4f const*() const
	{
		return &m_tData;
	}

	TMatrix4x4f* operator &()
	{
		return &m_tData;
	}

	template<int t_nRow, int t_nCol>
	friend float MatrixItemf(CMatrix4x4f const& a_cMatrix);

private:
	TMatrix4x4f m_tData;
};

template<int t_nRow, int t_nCol>
inline float MatrixItemf(CMatrix4x4f const& a_cMatrix)
{
	return MatrixItemf<t_nRow, t_nCol>(a_cMatrix.m_tData);
}

// overloaded operators

// this function is too powerfull -> it is substituted with a serie of more specialized functions
// for CMatrix4x4f, TMatrix4x4f, CQuaternionf, CScalef, CTranslationf
//template<typename TLeftOp, typename TRightOp>
//inline CMatrix4x4Mulf<TLeftOp, TRightOp> operator*(TLeftOp const& a_tLeftOp, TRightOp const& a_tRightOp)
//{
//	return CMatrix4x4Mulf<TLeftOp, TRightOp>(a_tLeftOp, a_tRightOp);
//}
template<typename TRightOp> inline CMatrix4x4Mulf<CMatrix4x4f, TRightOp> operator*(CMatrix4x4f const& a_tLeftOp, TRightOp const& a_tRightOp)
{
	return CMatrix4x4Mulf<CMatrix4x4f, TRightOp>(a_tLeftOp, a_tRightOp);
}
template<typename TRightOp> inline CMatrix4x4Mulf<TMatrix4x4f, TRightOp> operator*(TMatrix4x4f const& a_tLeftOp, TRightOp const& a_tRightOp)
{
	return CMatrix4x4Mulf<TMatrix4x4f, TRightOp>(a_tLeftOp, a_tRightOp);
}
template<typename TRightOp> inline CMatrix4x4Mulf<CQuaternionf, TRightOp> operator*(CQuaternionf const& a_tLeftOp, TRightOp const& a_tRightOp)
{
	return CMatrix4x4Mulf<CQuaternionf, TRightOp>(a_tLeftOp, a_tRightOp);
}
template<typename TRightOp> inline CMatrix4x4Mulf<CScalef, TRightOp> operator*(CScalef const& a_tLeftOp, TRightOp const& a_tRightOp)
{
	return CMatrix4x4Mulf<CScalef, TRightOp>(a_tLeftOp, a_tRightOp);
}
template<typename TRightOp> inline CMatrix4x4Mulf<CTranslationf, TRightOp> operator*(CTranslationf const& a_tLeftOp, TRightOp const& a_tRightOp)
{
	return CMatrix4x4Mulf<CTranslationf, TRightOp>(a_tLeftOp, a_tRightOp);
}

template<typename TRightOp, typename TSecondRightOp>
inline CMatrix4x4Mulf<CMatrix4x4MulCachef<TRightOp>, TSecondRightOp> operator*(CMatrix4x4MulCachef<TRightOp> const& a_tLeftOp, TSecondRightOp const& a_tRightOp)
{
	return CMatrix4x4Mulf<CMatrix4x4MulCachef<TRightOp>, TSecondRightOp>(a_tLeftOp, a_tRightOp);
}

template<typename TLeftOp, typename TRightOp, typename TSecondRightOp>
inline CMatrix4x4MulCachef<TSecondRightOp> operator*(CMatrix4x4Mulf<TLeftOp, TRightOp> const& a_tLeftOp, TSecondRightOp const& a_tRightOp)
{
	return CMatrix4x4MulCachef<TSecondRightOp>(CMatrix4x4f(a_tLeftOp), a_tRightOp);
}

inline bool operator ==(TMatrix4x4f const& a_lhs, TMatrix4x4f const& a_rhs) // make it accept any form of Matrix ???
{
	return
		a_lhs._11 == a_rhs._11 && a_lhs._12 == a_rhs._12 && 
		a_lhs._13 == a_rhs._13 && a_lhs._14 == a_rhs._14 && 
		a_lhs._21 == a_rhs._21 && a_lhs._22 == a_rhs._22 && 
		a_lhs._23 == a_rhs._23 && a_lhs._24 == a_rhs._24 && 
		a_lhs._31 == a_rhs._31 && a_lhs._32 == a_rhs._32 && 
		a_lhs._33 == a_rhs._33 && a_lhs._34 == a_rhs._34 && 
		a_lhs._41 == a_rhs._41 && a_lhs._42 == a_rhs._42 && 
		a_lhs._43 == a_rhs._43 && a_lhs._44 == a_rhs._44;
}

template<class TMatrix>
inline void MatrixAssign(TMatrix4x4f* a_pOut, TMatrix const& a_tRightOp)
{
	a_pOut->_11 = MatrixItemf<0, 0>(a_tRightOp);
	a_pOut->_12 = MatrixItemf<0, 1>(a_tRightOp);
	a_pOut->_13 = MatrixItemf<0, 2>(a_tRightOp);
	a_pOut->_14 = MatrixItemf<0, 3>(a_tRightOp);
	a_pOut->_21 = MatrixItemf<1, 0>(a_tRightOp);
	a_pOut->_22 = MatrixItemf<1, 1>(a_tRightOp);
	a_pOut->_23 = MatrixItemf<1, 2>(a_tRightOp);
	a_pOut->_24 = MatrixItemf<1, 3>(a_tRightOp);
	a_pOut->_31 = MatrixItemf<2, 0>(a_tRightOp);
	a_pOut->_32 = MatrixItemf<2, 1>(a_tRightOp);
	a_pOut->_33 = MatrixItemf<2, 2>(a_tRightOp);
	a_pOut->_34 = MatrixItemf<2, 3>(a_tRightOp);
	a_pOut->_41 = MatrixItemf<3, 0>(a_tRightOp);
	a_pOut->_42 = MatrixItemf<3, 1>(a_tRightOp);
	a_pOut->_43 = MatrixItemf<3, 2>(a_tRightOp);
	a_pOut->_44 = MatrixItemf<3, 3>(a_tRightOp);
}

inline TVector3f operator %(TVector3f const& a_lhs, TVector3f const& a_rhs)
{
	TVector3f t = {a_lhs.y*a_rhs.z-a_lhs.z*a_rhs.y, a_lhs.z*a_rhs.x-a_lhs.x*a_rhs.z, a_lhs.x*a_rhs.y-a_lhs.y*a_rhs.x};
	return t;
}

inline float operator *(TVector3f const& a_lhs, TVector3f const& a_rhs)
{
	return a_lhs.x*a_rhs.x + a_lhs.y*a_rhs.y + a_lhs.z*a_rhs.z;
}

inline TVector3f operator *(TVector3f const& a_lhs, float const a_rhs)
{
	TVector3f t = {a_lhs.x*a_rhs, a_lhs.y*a_rhs, a_lhs.z*a_rhs};
	return t;
}

inline TVector3f operator *(float const a_lhs, TVector3f const& a_rhs)
{
	TVector3f t = {a_lhs*a_rhs.x, a_lhs*a_rhs.y, a_lhs*a_rhs.z};
	return t;
}

inline TVector3f operator +(TVector3f const& a_lhs, TVector3f const& a_rhs)
{
	TVector3f t = {a_lhs.x+a_rhs.x, a_lhs.y+a_rhs.y, a_lhs.z+a_rhs.z};
	return t;
}

inline TVector3f operator -(TVector3f const& a_lhs, TVector3f const& a_rhs)
{
	TVector3f t = {a_lhs.x-a_rhs.x, a_lhs.y-a_rhs.y, a_lhs.z-a_rhs.z};
	return t;
}

inline TVector3f operator -(TVector3f const& a_rhs)
{
	TVector3f t = {-a_rhs.x, -a_rhs.y, -a_rhs.z};
	return t;
}

inline TVector3f const& operator +=(TVector3f& a_lhs, TVector3f const& a_rhs)
{
	a_lhs.x += a_rhs.x;
	a_lhs.y += a_rhs.y;
	a_lhs.z += a_rhs.z;
	return a_lhs;
}

inline TVector3f const& operator -=(TVector3f& a_lhs, TVector3f const& a_rhs)
{
	a_lhs.x -= a_rhs.x;
	a_lhs.y -= a_rhs.y;
	a_lhs.z -= a_rhs.z;
	return a_lhs;
}

struct CVector3f : public TVector3f
{
	CVector3f(float a_x, float a_y, float a_z) { x = a_x; y = a_y; z = a_z; }
};

enum ETriangleIntersectionWith
{
	WithLine = 0,
	WithRay,
	WithSegment
};
template<ETriangleIntersectionWith t_with>
inline bool TriangleIntersection(TVector3f const& a_tTri1, TVector3f const& a_tTri2, TVector3f const& a_tTri3, TVector3f const& a_tLine1, TVector3f const& a_tLine2, TVector3f& a_tIntersection)
{
	// get triangle edge vectors and plane normal
	TVector3f const u = a_tTri2 - a_tTri1;
	TVector3f const v = a_tTri3 - a_tTri1;
	TVector3f const n = u % v;
	if (n.x == 0.0f && n.y == 0.0f && n.z == 0.0f)
		return false; // triangle is degenerate

	TVector3f const dir = a_tLine2 - a_tLine1;             // ray direction vector
	TVector3f const w0 = a_tLine1 - a_tTri1;
	float const a = -(n * w0);
	float const b = n * dir;
	if (fabsf(b) < 1e-6f)
		return false; // ray is parallel to triangle plane

	// get intersect point of ray with triangle plane
	float const r = a / b;
	switch (t_with)
	{
		case WithSegment:
			if (r > 1.0f)
				return false; // intersection after second point
		case WithRay:
			if (r < 0.0f)
				return false; // ray goes away from triangle
		case WithLine:
			break;
	}

	a_tIntersection = a_tLine1 + r * dir;           // intersect point of ray and plane

	// is I inside T?
	float    uu, uv, vv, wu, wv, D;
	uu = u*u;
	uv = u*v;
	vv = v*v;
	TVector3f const w = a_tIntersection - a_tTri1;
	wu = w*u;
	wv = w*v;
	D = uv * uv - uu * vv;

	// get and test parametric coords
	float s, t;
	s = (uv * wv - vv * wu) / D;
	if (s < 0.0 || s > 1.0)        // I is outside T
		return false;
	t = (uv * wu - uu * wv) / D;
	if (t < 0.0 || (s + t) > 1.0)  // I is outside T
		return false;

	return true;                      // I is in T
}


#pragma warning(pop)

//template <int t_nDegree, class TControlPoint>
//class CBezierTriangle
//{
//public:
//
//private:
//	TControlPoint m_aPts[((t_nDegree+2)*(t_nDegree+1))>>1];
//};
//
//template <class T, class TBaseGeometrySink>
//class CBezierSurfaceTriangulatorGS : public TBaseGeometrySink
//{
//public:
//	CTriangulateBezierSurfacesGS();
//
//	STDMETHOD(DirectBezierTriangles)(ULONG a_nDegree, ULONG nVertices, ULONG nStride, BYTE* pData)
//	{
//	}
//
//	STDMETHOD(DirectBezierQuads)(ULONG a_nDegreeU, ULONG a_nDegreeV, ULONG nVertices, ULONG nStride, BYTE* pData)
//	{
//	}
//
//	STDMETHOD(IndirectBezierTriangles)(ULONG a_nDegree, ULONG nVertices, ULONG nStride, BYTE* pData, ULONG a_nIndices, ULONG* a_pIndices)
//	{
//		return E_NOTIMPL;
//	}
//
//	STDMETHOD(IndirectBezierQuads)(ULONG a_nDegreeU, ULONG a_nDegreeV, ULONG nVertices, ULONG nStride, BYTE* pData, ULONG a_nIndices, ULONG* a_pIndices)
//	{
//		return E_NOTIMPL;
//	}
//};

extern "C"{ // continue: extern "C"{

#endif//__cplusplus