
;************************************************************
;IEEE 64 Bit Floating Point Library       (c) 2003 M.Cibulski
;
;Vector and Matrix Routines
;
;************************************************************

.DSEG

flt_mat_buffer:	.byte	9*FLT_SIZE
flt_vec_buffer:	.byte	3*FLT_SIZE

.CSEG

;****************************************************************************************
;Scalar Product
;
;Parameters:
;r16/17	Vector 1
;r18/19	Vector 2
;****************************************************************************************

flt_scalar_product:
	.EQU	flt_scp_x	=  1
	.EQU	flt_scp_y	=  3
	.EQU	flt_scp_lspace	=  5
	LOCAL	flt_scp_lspace

	std	Y+flt_scp_x	,r16
	std	Y+flt_scp_x+1	,r17
	std	Y+flt_scp_y	,r18
	std	Y+flt_scp_y+1	,r19

	f_load_array	flt_scp_x	,0
	f_push
	f_load_array	flt_scp_y	,0
	f_mul
	f_push
	f_load_array	flt_scp_x	,1
	f_push
	f_load_array	flt_scp_y	,1
	f_mul
	f_add
	f_push
	f_load_array	flt_scp_x	,2
	f_push
	f_load_array	flt_scp_y	,2
	f_mul
	f_add

	ENDLOCAL	flt_scp_lspace
	ret

;****************************************************************************************
;Make Unit Vector
;
;Parameters:
;r16/17	Vector ptr
;r18/19	Unit vector ptr
;****************************************************************************************

flt_vec_makeunit:
	.EQU	flt_vmu_vec	=  1
	.EQU	flt_vmu_uvec	=  3
	.EQU	flt_vmu_lspace	=  5
	LOCAL	flt_vmu_lspace

	std	Y+flt_vmu_vec	,r16
	std	Y+flt_vmu_vec+1	,r17
	std	Y+flt_vmu_uvec	,r18
	std	Y+flt_vmu_uvec+1	,r19

	movw	r19:r18	,r17:r16
	rcall	flt_scalar_product

	tst	AKKU_E1		;AKKU is Zero ?
	brne	flt_vmu_not_zero		;no

	tst	AKKU_E2
	brne	flt_vmu_not_zero		;no

	clt			;return false
	rjmp	flt_vmu_end

flt_vmu_not_zero:
	f_sqrt
	f_push
	f_const	flt__1
	f_xy
	f_div
	f_push
	f_push
	f_push

	f_load_array	flt_vmu_vec	,0
	f_mul
	f_store_array	flt_vmu_uvec	,0

	f_load_array	flt_vmu_vec	,1
	f_mul
	f_store_array	flt_vmu_uvec	,1

	f_load_array	flt_vmu_vec	,2
	f_mul
	f_store_array	flt_vmu_uvec	,2

	set			;return true

flt_vmu_end:
	ENDLOCAL	flt_vmu_lspace
	ret

;****************************************************************************************
;Row Col Product (called by matrix multiplication)
;
;Parameters:
;r16/17	Row Vector
;r18/19	Col Vector
;****************************************************************************************

flt_rowcol_product:
	.EQU	flt_rcp_row	=  1
	.EQU	flt_rcp_col	=  3
	.EQU	flt_rcp_lspace	=  5
	LOCAL	flt_rcp_lspace

	std	Y+flt_rcp_col	,r18
	std	Y+flt_rcp_col+1	,r19
	std	Y+flt_rcp_row	,r16
	std	Y+flt_rcp_row+1	,r17

	f_load_array	flt_rcp_col	,0
	f_push
	f_load_array	flt_rcp_row	,0
	f_mul
	f_push
	f_load_array	flt_rcp_col	,1
	f_push
	f_load_array	flt_rcp_row	,3
	f_mul
	f_add
	f_push
	f_load_array	flt_rcp_col	,2
	f_push
	f_load_array	flt_rcp_row	,6
	f_mul
	f_add

	ENDLOCAL	flt_rcp_lspace
	ret

;****************************************************************************************
;Matrix x Vector Multiplication
;Vector and result vector may be at the same place
;
;Parameters:
;r16/17	Matrix
;r18/19	Vector
;r20/r21	Result Vector
;****************************************************************************************

flt_mat_vec_mul:
	.EQU	flt_mvm_m	=  1
	.EQU	flt_mvm_v	=  3
	.EQU	flt_mvm_r	=  5
	.EQU	flt_mvm_row	=  7
	.EQU	flt_mvm_buffer	=  8		;result vector buffer
	.EQU	flt_mvm_lspace	= 32
	LOCAL	flt_mvm_lspace

	std	Y+flt_mvm_m	,r16
	std	Y+flt_mvm_m+1	,r17
	std	Y+flt_mvm_v	,r18
	std	Y+flt_mvm_v+1	,r19
	std	Y+flt_mvm_r	,r20
	std	Y+flt_mvm_r+1	,r21

	ldi	r16	,16		;offset to Z in first column vector
	std	Y+flt_mvm_row	,r16

flt_mvm_01:
	ldd	r16	,Y+flt_mvm_m		;matrix a base adress
	ldd	r17	,Y+flt_mvm_m+1
	clr	r20
	ldd	r21	,Y+flt_mvm_row
	add	r17	,r21		;add row offset
	adc	r16	,r20

	ldd	r18	,Y+flt_mvm_v
	ldd	r19	,Y+flt_mvm_v+1

	rcall	flt_rowcol_product

	movw	xh:xl	,yh:yl		;adress of local vector buffer
	adiw	xh:xl	,flt_mvm_buffer
	ldd	r17	,Y+flt_mvm_row		;row offset (0, 8, 16)
	clr	r16
	add	xl	,r17		;add offset
	adc	xh	,r16

	call	flt_store_x

	ldd	r16	,Y+flt_mmp_row		;row offset
	subi	r16	,8		;one row up
	std	Y+flt_mvm_row	,r16
	brcc	flt_mvm_01			;not negative

	ldd	xh	,Y+flt_mvm_r		;copy buffer into result vector
	ldd	xl	,Y+flt_mvm_r+1
	movw	zh:zl	,yh:yl
	adiw	zh:zl	,flt_mvm_buffer
	ldi	r16	,3*FLT_SIZE

flt_mvm_copyloop:
	ld	r0	,Z+
	st	X+	,r0
	dec	r16
	brne	flt_mvm_copyloop

	ENDLOCAL	flt_mvm_lspace
	ret

;****************************************************************************************
;Matrix x Matrix Multiplication
;
;Parameters:
;r16/17	Matrix A
;r18/19	Matrix B
;r20/r21	Matrix AxB
;****************************************************************************************

flt_mat_mat_mul:
	.EQU	flt_mmp_a	=  1
	.EQU	flt_mmp_b	=  3
	.EQU	flt_mmp_c	=  5
	.EQU	flt_mmp_row	=  7
	.EQU	flt_mmp_col	=  8
	.EQU	flt_mmp_lspace	=  9
	LOCAL	flt_mmp_lspace

	std	Y+flt_mmp_a	,r16
	std	Y+flt_mmp_a+1	,r17
	std	Y+flt_mmp_b	,r18
	std	Y+flt_mmp_b+1	,r19
	std	Y+flt_mmp_c	,r20
	std	Y+flt_mmp_c+1	,r21

	ldi	r16	,48		;offset to third column vector
	std	Y+flt_mmp_col	,r16

flt_matmatprod_01:
	ldi	r16	,16		;offset to Z in first column vector
	std	Y+flt_mmp_row	,r16

flt_matmatprod_02:
	ldd	r16	,Y+flt_mmp_a		;matrix a base adress
	ldd	r17	,Y+flt_mmp_a+1
	clr	r20
	ldd	r21	,Y+flt_mmp_row
	add	r17	,r21		;add row offset
	adc	r16	,r20

	ldd	r18	,Y+flt_mmp_b
	ldd	r19	,Y+flt_mmp_b+1
	clr	r20
	ldd	r21	,Y+flt_mmp_col
	add	r19	,r21
	adc	r18	,r20

	rcall	flt_rowcol_product

	ldd	r16	,Y+flt_mmp_row		;row offset (0, 8, 16)
	ldd	r17	,Y+flt_mmp_col		;column offset (0, 24, 48)
	add	r17	,r16		;matrix element's offset
	clr	r16
	ldd	xh	,Y+flt_mmp_c		;result matrix' base adress
	ldd	xl	,Y+flt_mmp_c+1
	add	xl	,r17		;add offset
	adc	xh	,r16

	call	flt_store_x

	ldd	r16	,Y+flt_mmp_row		;row offset
	subi	r16	,8		;one row up
	std	Y+flt_mmp_row	,r16
	brcc	flt_matmatprod_02			;not negative

	ldd	r16	,Y+flt_mmp_col		;column offset
	subi	r16	,24		;one column left
	std	Y+flt_mmp_col	,r16
	brcc	flt_matmatprod_01

	ENDLOCAL	flt_mmp_lspace
	ret

;****************************************************************************************
;Matrix Inversion
;
;Parameters:
;r16/17	Matrix
;r18/19	Inverse
;****************************************************************************************

;det = x11 * (x22 * x33 - x32 * x23) -
;      x21 * (x12 * x33 - x32 * x13) +
;      x31 * (x12 * x23 - x22 * x13);

;Cofactor Matrix
;J.x11 =  (x22 * x33 - x32 * x23);
;J.x12 = -(x21 * x33 - x31 * x23);
;J.x13 =  (x21 * x32 - x31 * x22);
;J.x21 = -(x12 * x33 - x32 * x13);
;J.x22 =  (x11 * x33 - x31 * x13);
;J.x23 = -(x11 * x32 - x31 * x12);
;J.x31 =  (x12 * x23 - x22 * x13);
;J.x32 = -(x11 * x23 - x21 * x13);
;J.x33 =  (x11 * x22 - x21 * x12);

;Inverse is transposed cofactor matrix

flt_mat_invert:
	.EQU	flt_mai_m1	=  1
	.EQU	flt_mai_m2	=  3
	.EQU	flt_mai_m3	=  5
	.EQU	flt_mai_i1	=  7
	.EQU	flt_mai_i2	=  9
	.EQU	flt_mai_i3	=  11
	.EQU	flt_mai_det	=  13
	.EQU	flt_mai_index	=  21
	.EQU	flt_mai_lspace	=  22
	LOCAL	flt_mai_lspace

	mov	xh	,r16
	mov	xl	,r17
	std	Y+flt_mai_m1	,xh
	std	Y+flt_mai_m1+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_mai_m2	,xh
	std	Y+flt_mai_m2+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_mai_m3	,xh
	std	Y+flt_mai_m3+1	,xl

	mov	xh	,r18
	mov	xl	,r19
	std	Y+flt_mai_i1	,xh
	std	Y+flt_mai_i1+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_mai_i2	,xh
	std	Y+flt_mai_i2+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_mai_i3	,xh
	std	Y+flt_mai_i3+1	,xl

	;det = x11 * (x22 * x33 - x32 * x23) -
	;      x21 * (x12 * x33 - x32 * x13) +
	;      x31 * (x12 * x23 - x22 * x13);

	;J.x11 =  (x22 * x33 - x32 * x23);
	f_load_array	flt_mai_m2	,1
	f_push
	f_load_array	flt_mai_m3	,2
	f_mul
	f_push
	f_load_array	flt_mai_m2	,2
	f_push
	f_load_array	flt_mai_m3	,1
	f_mul
	f_sub
	f_store_array	flt_mai_i1	,0

	f_push
	f_load_array	flt_mai_m1	,0
	f_mul
	f_push

	;J.x12 = -(x12 * x33 - x32 * x13) = (x32 * x13 - x12 * x33);
	f_load_array	flt_mai_m2	,2
	f_push
	f_load_array	flt_mai_m3	,0
	f_mul
	f_push
	f_load_array	flt_mai_m2	,0
	f_push
	f_load_array	flt_mai_m3	,2
	f_mul
	f_sub
	f_store_array	flt_mai_i2	,0

	f_push
	f_load_array	flt_mai_m1	,1
	f_mul
	f_add
	f_push

	;J.x13 =  (x12 * x23 - x22 * x13);
	f_load_array	flt_mai_m2	,0
	f_push
	f_load_array	flt_mai_m3	,1
	f_mul
	f_push
	f_load_array	flt_mai_m2	,1
	f_push
	f_load_array	flt_mai_m3	,0
	f_mul
	f_sub
	f_store_array	flt_mai_i3	,0

	f_push
	f_load_array	flt_mai_m1	,2
	f_mul
	f_add
	f_store_l	flt_mai_det

	;J.x21 = -(x21 * x33 - x31 * x23) = (x31 * x23 - x21 * x33);
	f_load_array	flt_mai_m1	,2
	f_push
	f_load_array	flt_mai_m3	,1
	f_mul
	f_push
	f_load_array	flt_mai_m1	,1
	f_push
	f_load_array	flt_mai_m3	,2
	f_mul
	f_sub
	f_store_array	flt_mai_i1	,1

	;J.x22 =  (x11 * x33 - x31 * x13);
	f_load_array	flt_mai_m1	,0
	f_push
	f_load_array	flt_mai_m3	,2
	f_mul
	f_push
	f_load_array	flt_mai_m1	,2
	f_push
	f_load_array	flt_mai_m3	,0
	f_mul
	f_sub
	f_store_array	flt_mai_i2	,1

	;J.x23 = -(x11 * x23 - x21 * x13) = (x21 * x13 - x11 * x23);
	f_load_array	flt_mai_m1	,1
	f_push
	f_load_array	flt_mai_m3	,0
	f_mul
	f_push
	f_load_array	flt_mai_m1	,0
	f_push
	f_load_array	flt_mai_m3	,1
	f_mul
	f_sub
	f_store_array	flt_mai_i3	,1

	;J.x31 =  (x21 * x32 - x31 * x22);
	f_load_array	flt_mai_m1	,1
	f_push
	f_load_array	flt_mai_m2	,2
	f_mul
	f_push
	f_load_array	flt_mai_m1	,2
	f_push
	f_load_array	flt_mai_m2	,1
	f_mul
	f_sub
	f_store_array	flt_mai_i1	,2

	;J.x32 = -(x11 * x32 - x31 * x12) = (x31 * x12 - x11 * x32);
	f_load_array	flt_mai_m1	,2
	f_push
	f_load_array	flt_mai_m2	,0
	f_mul
	f_push
	f_load_array	flt_mai_m1	,0
	f_push
	f_load_array	flt_mai_m2	,2
	f_mul
	f_sub
	f_store_array	flt_mai_i2	,2

	;J.x33 =  (x11 * x22 - x21 * x12);
	f_load_array	flt_mai_m1	,0
	f_push
	f_load_array	flt_mai_m2	,1
	f_mul
	f_push
	f_load_array	flt_mai_m1	,1
	f_push
	f_load_array	flt_mai_m2	,0
	f_mul
	f_sub
	f_store_array	flt_mai_i3	,2

	ldi	r16	,64		;offset to last matrix element
	std	Y+flt_mai_index	,r16

flt_matinv_01:
	ldd	xh	,Y+flt_mai_i1		;matrix i base adress
	ldd	xl	,Y+flt_mai_i1+1
	clr	r16
	ldd	r17	,Y+flt_mai_index
	add	xl	,r17		;add offset
	adc	xh	,r16
	call	flt_load_x

	f_push
	f_load_l	flt_mai_det
	f_div

	ldd	xh	,Y+flt_mai_i1		;matrix i base adress
	ldd	xl	,Y+flt_mai_i1+1
	clr	r16
	ldd	r17	,Y+flt_mai_index
	add	xl	,r17		;add offset
	adc	xh	,r16
	call	flt_store_x

	ldd	r16	,Y+flt_mai_index
	subi	r16	,8		;one column left
	std	Y+flt_mai_index	,r16
	brcc	flt_matinv_01

	ENDLOCAL	flt_mai_lspace
	ret


;****************************************************************************************
;Rotation Matrix (rotation around X axis)
;
;Parameters:
;r16:r17	Matrix ptr
;AKKU	rotation angle (radians)
;
;         (  1    0    0  )
;Matrix = (  0   cos -sin )
;         (  0   sin  cos )
;
;****************************************************************************************

flt_rotmatrix_x:
	.EQU	flt_rmx_m1	=  1
	.EQU	flt_rmx_m2	=  3
	.EQU	flt_rmx_m3	=  5
	.EQU	flt_rmx_lspace	=  7
	LOCAL	flt_rmx_lspace

	mov	xh	,r16
	mov	xl	,r17
	std	Y+flt_rmx_m1	,xh
	std	Y+flt_rmx_m1+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_rmx_m2	,xh
	std	Y+flt_rmx_m2+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_rmx_m3	,xh
	std	Y+flt_mai_m3+1	,xl

	f_push
	f_sin
	f_store_array	flt_rmx_m2	,2
	f_chs	r16
	f_store_array	flt_rmx_m3	,1

	f_pop
	f_cos
	f_store_array	flt_rmx_m2	,1
	f_store_array	flt_rmx_m3	,2

	f_const	flt__1
	f_store_array	flt_rmx_m1	,0

	f_const	flt__0
	f_store_array	flt_rmx_m1	,1
	f_store_array	flt_rmx_m1	,2
	f_store_array	flt_rmx_m2	,0
	f_store_array	flt_rmx_m3	,0

	ENDLOCAL	flt_rmx_lspace
	ret


;****************************************************************************************
;Rotation Matrix (rotation around Y axis)
;
;Parameters:
;r16:r17	Matrix ptr
;AKKU	rotation angle (radians)
;
;         ( cos   0   sin )
;Matrix = (  0    1    0  )
;         (-sin   0   cos )
;
;****************************************************************************************

flt_rotmatrix_y:
	.EQU	flt_rmy_m1	=  1
	.EQU	flt_rmy_m2	=  3
	.EQU	flt_rmy_m3	=  5
	.EQU	flt_rmy_lspace	=  7
	LOCAL	flt_rmy_lspace

	mov	xh	,r16
	mov	xl	,r17
	std	Y+flt_rmy_m1	,xh
	std	Y+flt_rmy_m1+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_rmy_m2	,xh
	std	Y+flt_rmy_m2+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_rmy_m3	,xh
	std	Y+flt_mai_m3+1	,xl

	f_push
	f_sin
	f_store_array	flt_rmy_m3	,0
	f_chs	r16
	f_store_array	flt_rmy_m1	,2

	f_pop
	f_cos
	f_store_array	flt_rmy_m1	,0
	f_store_array	flt_rmy_m3	,2

	f_const	flt__1
	f_store_array	flt_rmy_m2	,1

	f_const	flt__0
	f_store_array	flt_rmy_m1	,1
	f_store_array	flt_rmy_m2	,0
	f_store_array	flt_rmy_m2	,2
	f_store_array	flt_rmy_m3	,1

	ENDLOCAL	flt_rmy_lspace
	ret


;****************************************************************************************
;Rotation Matrix (rotation around Z axis)
;
;Parameters:
;r16:r17	Matrix ptr
;AKKU	rotation angle (radians)
;
;         ( cos -sin    0 )
;Matrix = ( sin  cos    0 )
;         (  0	0     1 )
;
;****************************************************************************************

flt_rotmatrix_z:
	.EQU	flt_rmz_m1	=  1
	.EQU	flt_rmz_m2	=  3
	.EQU	flt_rmz_m3	=  5
	.EQU	flt_rmz_lspace	=  7
	LOCAL	flt_rmz_lspace

	mov	xh	,r16
	mov	xl	,r17
	std	Y+flt_rmz_m1	,xh
	std	Y+flt_rmz_m1+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_rmz_m2	,xh
	std	Y+flt_rmz_m2+1	,xl
	adiw	xh:xl	,24
	std	Y+flt_rmz_m3	,xh
	std	Y+flt_mai_m3+1	,xl

	f_push
	f_sin
	f_store_array	flt_rmz_m1	,1
	f_chs	r16
	f_store_array	flt_rmz_m2	,0

	f_pop
	f_cos
	f_store_array	flt_rmz_m1	,0
	f_store_array	flt_rmz_m2	,1

	f_const	flt__1
	f_store_array	flt_rmz_m3	,2

	f_const	flt__0
	f_store_array	flt_rmz_m1	,2
	f_store_array	flt_rmz_m2	,2
	f_store_array	flt_rmz_m3	,0
	f_store_array	flt_rmz_m3	,1

	ENDLOCAL	flt_rmz_lspace
	ret


;****************************************************************************************
