;;; -*- Mode: LISP; Syntax: Common-lisp; Package: CMI; Base: 10; Patch-File: Yes -*-

(in-package 'cmi)

(def-sprint-delivery-ucode sprint-delivery-setup

  "Set flag indicating we are on our first petit cycle (for parity checking and trace saving)."
  (set-router-state *sr-on-first-pass*)

  ;; time-limit <- src-vp-ratio * 64k
  ;; This uses the scratch-ram-swapper to do a multiply by 64k.
  (with-mm-reg (save-sram-contents)
    (ui (alu-simp scratch-ram (save-sram-contents $exp) (y src-vp-ratio))
	(sram-w write))
    (ui (alu-simp scratch-ram-swap (time-limit $exp) (y save-sram-contents))
	(sram-w write)))
	     
  (setreg old-time-limit time-limit)

  "Compute slice count."
  (with-mm-reg (temp)
    (setreg slice-count (constant 0))
    (setreg temp (constant 0))
    (ucblock ()
      (label add-another-slice)
      (setreg temp (+ temp (constant 32)))
      (setreg slice-count (+ slice-count 1))
      (when-< (temp length)
	(jump add-another-slice))))

  ;; Compute index-bits from index-limit.
  (with-mm-reg (temp)
    (setreg index-bits zero)
    (unless-router-state (logior *sr-queue-shared-p* *sr-queue-shared-aref32-p*)
      (setreg temp (half (- index-limit 1)))

      (ucblock ()
	(label not-zero-yet)
	(ui (alu-simp temp (temp (half $exp))))
	(ui (alu-simp (+ index-bits 1) (index-bits $exp)) ; The branch condition being used here is
					; based on the value of temp before the
					; halving.  This is the reason why the
					; initial value of temp is halved.
	    (seq cjp not-addr-zero not-zero-yet)))))

  "Initialize cycle length (there is a minimum to get timing right)"
  (setreg temp-length length)

  ;; length is message-length, this includes a physical address. -KB0
  (when-less (temp-length (constant (eval (+ 5 (* 2 (delivery-chip-cube-dimensions))))))
    "indicate we need a scratch area by loading magic constant"
    (setreg temp-length (constant (eval (+ 5 (* 2 (delivery-chip-cube-dimensions)))))))
  
  "setup the rug registers for router cube action"
  (set-rug-register-always :cube-control
			   (setup-rug-register-data
			     :cube-control
			     :which-dims-send
			     (car (rassoc (list :Router-to-Cube-Pin[0-11])
					  *which-dims-send-encoding* :test #'equal))
			     :which-dims-receive
			     (car (rassoc (list :Cube-Pin[0-11]-to-Cube-In[0-15])
					  *which-dims-receive-encoding* :test #'equal))))
  (setup-rug-register-always :message-p)
  
  "Initialize mod counter for router buffer length"
  (with-mm-reg (init-rbuf-ptr)
    (setreg init-rbuf-ptr (+ temp-length
			     (constant
			       (eval
				 (+ (- *chip-number-of-dimensions*)	; c
				    -1					; fencepost correction
				    1					; parity
				    )))))
    (when-not-zero index-bits
      (setreg init-rbuf-ptr (+ init-rbuf-ptr (constant 2)))		; two dummy cycles
      (setreg init-rbuf-ptr (+ init-rbuf-ptr index-bits)))
    (setreg rbuffer-pointer init-rbuf-ptr)
    (ui (setreg permanent-rbuffer-pointer init-rbuf-ptr)
	(enc-fld-a reset-odd)))

  (setreg message-hold (constant (eval *sr-message-hold*)))

  "Clear the recv-p so that the initial garbage coming out of the router is ignored."
  (lls (nil nil) (constant (eval *sr-recv-p*)) dont-care zero-flag sink-flag :always)

  (lls (nil nil) (constant (eval *sr-zero-maddr*)) dont-care zero-flag sink-flag :always)
  (lls (t nil) (constant (eval *sr-all-ones-maddr*)) dont-care zero-flag sink-flag :always)
  
  ;; Fill up TB (the address transposer) with 0's.
  (ucblock ()
    (ui (mop-maddr (constant (eval *sr-recv-p*))))
    (ui	(seq ldct 31))
    (label zero-tb-loop)
    (ui (sop :memory-bus :transposer-b
	     :memory-bus-direction :write
	     :memory-bus-pointer-control :post-add)
	(seq rpct zero-tb-loop)))

  "check to make sure we haven't run out of scratch memory"
  (when-greater (temp-length (constant (eval 128)))
    (uc-ferror "Router message length is too long")
    )

  "cycle starts partway through on the receiving side"
  "first go to end of receive buffer"
  (setreg temp-dest (+ (constant (eval *sr-dest-buffer*)) temp-length))
  "this is where receive buffer starts: (dest + length - (2c + 5))"
  (setreg temp-dest (- temp-dest (constant (eval (+ 4
						    (* 2 (delivery-chip-cube-dimensions)))))))

  (setreg temp-dest (constant (eval *sr-dest-buffer*)))
    
  (setreg left-in-block (constant 32))
  (setreg current-dest-base (constant (eval *sr-dest-base-table*)))
  (ui (mop-maddr (constant (eval *sr-zero-maddr*)))
      (sop :memory-bus :condition
	   :memory-bus-direction :write))


  "check for rbuffer overflow"
  (with-mm-reg (last)
    (setreg last (constant (eval *sr-rbuffer-base*)))
    (setreg last (+ last rbuffer-pointer))
    (when-greater (last (constant (eval (1- (+ *location-of-router-buffers*
					       *length-of-router-buffers*)))))
      (uc-ferror "Router buffer overflow:  router message is too long.")))

  ;;Clear the entire landing zone in case we are overwriting and using a non-multiple-of-32 bit message length. -KB0
  (with-mm-reg (temp)
    (setreg temp (constant (eval (+ 12 *sr-dest-buffer*))))
    (lls-up (constant (eval *max-router-message-length*))
	    (nil nil)
	    temp dont-care zero-flag sink-flag :always
	    :reset-dest nil))

  ;; Clear all 12 chip bits in the source buffer (we only write some of them).
  (with-mm-reg (temp)
    (setreg temp (constant (eval *sr-source-buffer*)))
    (lls-up (constant 12)
	    (nil nil)
	    temp dont-care zero-flag sink-flag :always
	    :reset-dest nil))

  (when-router-state *sr-save-trace*
    ;; Set up the pointers for the two parts of the trace.
    (setreg current-number-petit-cycles trace-address)
    (setreg trace-address (+ trace-address src-vp-ratio)))

  ;; Set up the indirect addressing parameters for the sprint chip.
  (set-sprint-config-for-ia-macro)
  (with-mm-reg (scratch-address)
    (setreg scratch-address (constant (eval *sr-scratch-slice*)))
    (load-sprint-rug-with-value scratch-address :vp-scratch-address))

  ;; setup router-mode for in-router-combiner-name
  ;; Set the Rug Register from the *ROUTER-SPRINT-DEFAULT-MODE-SLICE*. The value of the
  ;; FOP-CODE must be OR'ed with the default value to get the correct :ROUTER-MODE
  (ucblock-ns
    (ui (alu-simp (and router-state (constant (eval 3))))
	(mop::loadi :data 0)
	(arg-instr t))
    (loadb (or :a :b) (constant (eval *router-sprint-default-mode-slice*)) zero-flag :inv 1)
    (store (constant (eval cm-temp-bit-0)) sink-flag)
    "Set the Rug Register."
    (ui (mop::loada 0)			; ALU doesn't matter here.
	(mop-maddr (constant (eval cm-temp-bit-0))))
    (ui (mop::rug-w-a :address #.(rug-register-address (rug-register-named :router-mode)))))

  ;; We don't latch cell addresses but deliver to fixed processors.
  (write-rug-register :cell-address-0 (constant #x3210))

  "setup the :EMPTY-P Rug Register."
  "Set the EJECTOR mask in :EMPTY-P"
  (with-mm-reg (are-there-eunuchs)
    (setreg are-there-eunuchs (constant 1))
    (lls (:a :a)
	 (constant (eval *router-eunuch-mask-slice*))
	 (constant (eval *router-eunuch-mask-slice*))
	 zero-flag
	 sink-flag
	 :always)
    (latch-saved-global)
    (ui	(seq cjp saved-global establish-empty-p))

    (ucblock-ns
      ;; There are no eunuch chips in the system.  Update the
      ;; *router-sprint-deliver-all-slice* to show all 6 ejectors and also update
      ;; cell-address-1.
      (ui (setreg are-there-eunuchs (constant 0)))
      (ui (mop::loadi :data #x1f80))
      (ui (mop-maddr (constant (eval *router-sprint-deliver-all-slice*)))
	  (mop::rug-r-c :address :sink))
      (write-rug-register :cell-address-1 (constant #x0054)))

    (label establish-empty-p)
    (ucblock ()
      (ui (alu-simp (and router-state (constant (eval *sr-single-ejector*)))))
      (ui (alu-simp are-there-eunuchs)
	  (seq cjp addr-zero test-for-eunuchs))

      (ucblock-ns
	;; Enable only one ejector on each chip.
	(ui (mop::loada 0)
	    (mop-maddr (constant (eval *router-single-ejector-slice*))))
	(ui (seq jmap load-empty-p)))

      (label test-for-eunuchs)
      (ui (seq cjp addr-zero there-are-no-eunuchs))
      (ucblock-ns
	;; There are some eunuch chips so we only use 4 ejectors accross the whole
	;; machine.
	(setreg number-ejectors (constant 4))
	(ui (mop::loada 0)
	    (mop-maddr (constant (eval *router-sprint-ejector-slice*))))
	(ui (seq jmap load-empty-p)))

      (label there-are-no-eunuchs)
      (ucblock-ns
	;; There are no eunuch chips at all so we use 6 ejectors accross the whole
	;; machine.
	(setreg number-ejectors (constant 6))
	(ui (mop::loada 0)
	    (mop-maddr (constant (eval *router-ejector-slice*)))))

      (label load-empty-p)
      (ui (mop::rug-w-a :address #.(rug-register-address (rug-register-named :empty-p))))))

  ;; When we aren't combining in memory we store each 32 bit chunk to memory as it
  ;; arrives.  We need to update IA base register for each chunk, but can't use lbl and
  ;; lbh to do that because of a Boxer bug.  (Ie we can't write the value from a
  ;; sequencer register.)  To work around this we load a table in memory ahead of time
  ;; which has the appropriate base register values.
  (with-mm-reg (save-sram base inc)
    (setreg save-sram scratch-ram)
    (setreg base dest-base)
    (if-router-state *sr-aref32-p*
      (ucblock ((inter-slice-stride 'queue-index-limit))
	(setreg inc inter-slice-stride))
      (if-router-state *sr-queue-shared-aref32-p*
	(setreg inc index-limit)
	(setreg inc (constant 1))))
    (dotimes (i slice-count)
      (ui (alu-simp base (y $exp))
	  (lbl :sprint-lbl-data 0)
	  (arg-instr t)
	  (sram-w write))
      (ui (alu-simp (+ base inc) (base $exp))
	  (dbus-src-sel scratch-ram-swap)
	  (ybus-src-sel bypass)
	  (lbh :sprint-lbh-data 0)
	  (arg-instr t))
      (ui (mop-maddr (+ (constant (eval *sr-dest-base-table*)) i))
	  (sop :memory-bus :bypass-register
	       :memory-bus-direction :read)
	  (we-cntl hi-lo)))
    (ui (alu-simp save-sram)
	(ybus-src-sel alu)
	(sram-w write)))


  ;; Save the indirect addressing base and bounds registers.
  (ucblock-ns
    (write-sprint-rug-to-memory :vp-base (constant (eval *sr-save-ia-base*)))
    (write-sprint-rug-to-memory :vp-size (constant (eval *sr-save-ia-bound*))))

  ;; Compute and store element-limit
  (with-mm-reg (bound)

    ;; NB: *sr-aref32-p* must be checked before *sr-queue-p*.
    (if-router-state (logior *sr-aref32-p* *sr-queue-shared-aref32-p*)
      (setreg bound index-limit)

      (if-router-state *sr-queue-p*
	(setreg bound (constant #x000fffff))

	(ucblock-ns
	  ;; bound <- index-limit * slice-count

	  (ui (alu-simp (+ slice-count lit) (y $exp))
	      (lit-vert alu) (na-sf table-start )
	      (ybus-dest-sel dispatch) (ybus-src-sel alu))

	  ;; Note that the table start label is one ui before the beginning of the table.
	  ;; This is needed because slice-count starts at 1, not 0.
	  (label table-start)

	  (ui (alu-simp index-limit (bound $exp))
	      (seq jmap dispatch))

	  ;; case slice-count = 1.
	  (ui (seq jmap load-bound))
	  ;; case slice-count = 2.
	  (ui (alu-simp bound (bound (2* $exp)))
	      (seq jmap load-bound))
	  ;; case slice-count = 3.
	  (ui (alu-simp bound (bound (2* $exp)))
	      (seq jmap slice-count-is-three))
	  ;; case slice-count = 4.
	  (ui (alu-simp (+ bound bound) (bound (2* $exp)))
	      (seq jmap load-bound))

	  (label slice-count-is-three)
	  (ui (alu-simp (+ bound index-limit) (bound $exp))))))

      (label load-bound)

      ;; Load the computed bound into memory.
      (with-mm-reg (saved-sram)
	(ui (alu-simp bound
		      (saved-sram scratch-ram)
		      (y $exp))
	    (sram-w write)
	    (arg-instr t)
	    (lbl))
	(ui (alu-simp scratch-ram-swap)
	    (arg-instr t)
	    (lbh))
	(ui (mop-maddr (constant (eval *sr-element-limit*)))
	    (sop :memory-bus :bypass-register
		 :memory-bus-direction :read)
	    (we-cntl hi-lo))
	(ui (alu-simp saved-sram)
	    (sram-w write))))

  (when-router-state *sr-queue-p*
    ;; Store the address of the queue index limit.
    (with-mm-reg (saved-sram)
      (ui (alu-simp queue-index-limit
		    (saved-sram scratch-ram)
		    (y $exp))
	  (sram-w write)
	  (arg-instr t)
	  (lbl))
      (ui (alu-simp scratch-ram-swap)
	  (arg-instr t)
	  (lbh))
      (ui (mop-maddr (constant (eval *sr-queue-index-limit*)))
	  (sop :memory-bus :bypass-register
	       :memory-bus-direction :read)
	  (we-cntl hi-lo))))

  ;; Store index-limit into memory as *sr-notify-limit*, and notify into memory as
  ;; *sr-notify-base*. 
  (with-mm-reg (saved-sram)
    (ui (alu-simp index-limit
		  (saved-sram scratch-ram)
		  (y $exp))
	(sram-w write)
	(arg-instr t)
	(lbl))
    (ui (alu-simp scratch-ram-swap)
	(arg-instr t)
	(lbh))
    (ui (mop-maddr (constant (eval *sr-notify-limit*)))
	(sop :memory-bus :bypass-register
	     :memory-bus-direction :read)
	(we-cntl hi-lo))
    (ui (alu-simp notify (y $exp))
	(sram-w write)
	(arg-instr t)
	(lbl))
    (ui (alu-simp scratch-ram-swap)
	(arg-instr t)
	(lbh))
    (ui (mop-maddr (constant (eval *sr-notify-base*)))
	(sop :memory-bus :bypass-register
	     :memory-bus-direction :read)
	(we-cntl hi-lo))
    (ui (alu-simp saved-sram)
	(sram-w write)))

  (when-router-state *sr-queue-p*
    (unless-router-state *sr-aref32-p*
      (with-mm-reg (queue-limit)
	(setreg queue-limit (constant (eval *sr-queue-limit*)))
	(ui (alu-simp queue-index-limit)
	    (ybus-dest-sel sr))
	(lls-up (constant 20) (:b nil) queue-limit :sr dont-care sink-flag :always))))

  (when-router-state *sr-aref32-p*

    ;; Load the sprint bounds registers now.
    (load-sprint-rug-from-memory :vp-size (constant (eval *sr-element-limit*)))

    ;; For queue-aref32 write real-index-limit to *sr-queue-limit* processorwise.
    (when-router-state *sr-queue-p*
      (with-mm-reg (queue-limit)
	(setreg queue-limit (constant (eval *sr-queue-limit*)))
	(ucblock ((real-index-limit 'notify))
	  (ui (alu-simp (+ real-index-limit 1))
	      (ybus-dest-sel sr))
	  (lls-up (constant 15) (:b nil) queue-limit :sr dont-care sink-flag :always))))))

(eval-when (load eval)
  ;(warn "You must do a (cmi::link-paris) to apply the sprint-delivery-setup patch") 
  (cmi::increment-patch-level 3))




