Prevent 16bits of data hanging around and confusing the fifo empty check v6.4.9
authorMichael McMaster <michael@codesrc.com>
Fri, 21 May 2021 10:59:55 +0000 (20:59 +1000)
committerMichael McMaster <michael@codesrc.com>
Fri, 21 May 2021 11:09:14 +0000 (21:09 +1000)
STM32CubeMX/2021/2021.ioc
STM32CubeMX/2021/Src/fmc.c
src/firmware/config.c
src/firmware/main.c
src/firmware/scsiPhy.c

index 968c1e5467963d691b4adf0fa921a0cc6b89d9fd..294e7b09330f7fc6c52575f673227a9877761a6a 100644 (file)
@@ -54,8 +54,9 @@ FMC.AddressHoldTime1=2
 FMC.AddressSetupTime1=4
 FMC.BusTurnAroundDuration1=2
 FMC.DataSetupTime1=8
-FMC.IPParameters=AddressSetupTime1,AddressHoldTime1,DataSetupTime1,BusTurnAroundDuration1,NSMemoryDataWidth1-NorPsramChipSelect1_1,WriteOperation1-NorPsramChipSelect1_1
+FMC.IPParameters=AddressSetupTime1,AddressHoldTime1,DataSetupTime1,BusTurnAroundDuration1,NSMemoryDataWidth1-NorPsramChipSelect1_1,WriteOperation1-NorPsramChipSelect1_1,WriteFifo1
 FMC.NSMemoryDataWidth1-NorPsramChipSelect1_1=FMC_NORSRAM_MEM_BUS_WIDTH_16
+FMC.WriteFifo1=FMC_WRITE_FIFO_DISABLE
 FMC.WriteOperation1-NorPsramChipSelect1_1=FMC_WRITE_OPERATION_ENABLE
 File.Version=6
 KeepUserPlacement=false
index 06cab2625c991b92e1c071c81fc2bcb881280c28..a5f765a44617da40c25d9552edc048c8e3590392 100644 (file)
@@ -50,13 +50,12 @@ void MX_FMC_Init(void)
   hsram1.Init.WriteBurst = FMC_WRITE_BURST_DISABLE;
   hsram1.Init.ContinuousClock = FMC_CONTINUOUS_CLOCK_SYNC_ONLY;
   hsram1.Init.WriteFifo = FMC_WRITE_FIFO_DISABLE;
-  // WE MAY start writing another 512 bytes before this FIFO is empty!
 
   hsram1.Init.PageSize = FMC_PAGE_SIZE_NONE;
   /* Timing */
 
   // 1 clock to read the address, + 1 for synchroniser skew
-  Timing.AddressSetupTime = 5;
+  Timing.AddressSetupTime = 4;
   Timing.AddressHoldTime = 2;
 
   // Writes to device:
@@ -67,12 +66,12 @@ void MX_FMC_Init(void)
   // Reads from device:
   //   3 for syncroniser
   //   1 to write back to fsmc bus.
-  Timing.DataSetupTime = 9;
+  Timing.DataSetupTime = 8;
 
   // Allow a clock for us to release signals
   // Need to avoid both devices acting as outputs
   // on the multiplexed lines at the same time.
-  Timing.BusTurnAroundDuration = 3;
+  Timing.BusTurnAroundDuration = 2;
 
   Timing.CLKDivision = 16; // Ignored for async
   Timing.DataLatency = 17; // Ignored for async
index 091eaf7f465390b801d9f7f36ce2a459e6494d82..9052d148b2e6e4f5cfc66be961691bee016358b6 100755 (executable)
@@ -36,7 +36,7 @@
 \r
 #include <string.h>\r
 \r
-static const uint16_t FIRMWARE_VERSION = 0x0648;\r
+static const uint16_t FIRMWARE_VERSION = 0x0649;\r
 \r
 // Optional static config\r
 extern uint8_t* __fixed_config;\r
index c7dd55425474ae561fa20e0ff96f83fccd07cc7d..46c9208d6d2f7cbe68a831286162d63d0d781041 100755 (executable)
@@ -115,7 +115,17 @@ void mainInit()
         }\r
         else\r
         {\r
-            BSP_SD_WriteBlocks_DMA(scsiDev.data, h * 2000, 1);\r
+            uint8_t random[1024];\r
+            for (int p = 0; p < 512; ++p) random[p] = h + p ^ 0xAA;\r
+            BSP_SD_WriteBlocks_DMA(random, h * 2000, 1);\r
+            BSP_SD_ReadBlocks_DMA(scsiDev.data, h * 2000, 1);\r
+            BSP_SD_WriteBlocks_DMA(random, h * 2000 + 1, 2);\r
+            BSP_SD_ReadBlocks_DMA(&(scsiDev.data[512]), h * 2000 + 1, 2);\r
+            if (memcmp(random, scsiDev.data, 512) ||\r
+                memcmp(random, &(scsiDev.data[512]), 1024))\r
+            {\r
+                while (1) {}\r
+            }\r
         }\r
     }\r
     s2s_ledOff();\r
index df1ebc308cb3d34fe9aeea8471371899f1d68d5c..0c0c46c0e8ff0cac258752f3b99bdfc7bde4da05 100755 (executable)
@@ -1,19 +1,19 @@
-//     Copyright (C) 2013 Michael McMaster <michael@codesrc.com>\r
+//    Copyright (C) 2013 Michael McMaster <michael@codesrc.com>\r
 //\r
-//     This file is part of SCSI2SD.\r
+//    This file is part of SCSI2SD.\r
 //\r
-//     SCSI2SD is free software: you can redistribute it and/or modify\r
-//     it under the terms of the GNU General Public License as published by\r
-//     the Free Software Foundation, either version 3 of the License, or\r
-//     (at your option) any later version.\r
+//    SCSI2SD is free software: you can redistribute it and/or modify\r
+//    it under the terms of the GNU General Public License as published by\r
+//    the Free Software Foundation, either version 3 of the License, or\r
+//    (at your option) any later version.\r
 //\r
-//     SCSI2SD is distributed in the hope that it will be useful,\r
-//     but WITHOUT ANY WARRANTY; without even the implied warranty of\r
-//     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
-//     GNU General Public License for more details.\r
+//    SCSI2SD is distributed in the hope that it will be useful,\r
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
+//    GNU General Public License for more details.\r
 //\r
-//     You should have received a copy of the GNU General Public License\r
-//     along with SCSI2SD.  If not, see <http://www.gnu.org/licenses/>.\r
+//    You should have received a copy of the GNU General Public License\r
+//    along with SCSI2SD.  If not, see <http://www.gnu.org/licenses/>.\r
 \r
 #ifdef STM32F2xx\r
 #include "stm32f2xx.h"\r
@@ -67,803 +67,813 @@ volatile uint8_t scsiTxDMAComplete;
 // vector table.\r
 void EXTI4_IRQHandler()\r
 {\r
-       // Make sure that interrupt flag is set\r
-       if (__HAL_GPIO_EXTI_GET_IT(GPIO_PIN_4) != RESET) {\r
+    // Make sure that interrupt flag is set\r
+    if (__HAL_GPIO_EXTI_GET_IT(GPIO_PIN_4) != RESET) {\r
 \r
-               // Clear interrupt flag\r
-               __HAL_GPIO_EXTI_CLEAR_IT(GPIO_PIN_4);\r
+        // Clear interrupt flag\r
+        __HAL_GPIO_EXTI_CLEAR_IT(GPIO_PIN_4);\r
 \r
-               uint8_t statusFlags = *SCSI_STS_SCSI;\r
+        uint8_t statusFlags = *SCSI_STS_SCSI;\r
 \r
-               scsiDev.resetFlag = scsiDev.resetFlag || (statusFlags & 0x04);\r
+        scsiDev.resetFlag = scsiDev.resetFlag || (statusFlags & 0x04);\r
 \r
-               // selFlag is required for Philips P2000C which releases it after 600ns\r
-               // without waiting for BSY.\r
-               // Also required for some early Mac Plus roms\r
-               if (statusFlags & 0x08) // Check SEL flag\r
-               {\r
-                       scsiDev.selFlag = *SCSI_STS_SELECTED;\r
-               }\r
-       }\r
+        // selFlag is required for Philips P2000C which releases it after 600ns\r
+        // without waiting for BSY.\r
+        // Also required for some early Mac Plus roms\r
+        if (statusFlags & 0x08) // Check SEL flag\r
+        {\r
+            scsiDev.selFlag = *SCSI_STS_SELECTED;\r
+        }\r
+    }\r
 }\r
 \r
 void\r
 scsiSetDataCount(uint32_t count)\r
 {\r
-       *SCSI_DATA_CNT_HI = (count >> 16) & 0xff;\r
-       *SCSI_DATA_CNT_MID = (count >> 8) & 0xff;\r
-       *SCSI_DATA_CNT_LO = count & 0xff;\r
-       *SCSI_DATA_CNT_SET = 1;\r
+    *SCSI_DATA_CNT_HI = (count >> 16) & 0xff;\r
+    *SCSI_DATA_CNT_MID = (count >> 8) & 0xff;\r
+    *SCSI_DATA_CNT_LO = count & 0xff;\r
+    *SCSI_DATA_CNT_SET = 1;\r
 \r
 #ifdef STM32F4xx\r
-       __NOP();\r
-       __NOP();\r
+    __NOP();\r
+    __NOP();\r
 #endif\r
 }\r
 \r
 int scsiFifoReady(void)\r
 {\r
-       __NOP();\r
-       uint8_t test1 = HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin);\r
-       __NOP();\r
+    __NOP();\r
+    uint8_t test1 = HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin);\r
+    __NOP();\r
 #ifdef STM32F4xx\r
-       __NOP();\r
-       __NOP();\r
-       __NOP();\r
+    __NOP();\r
+    __NOP();\r
+    __NOP();\r
 #endif\r
-       uint8_t test2 = HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin);\r
-       return test1 != 0 && test2 != 0;\r
+    uint8_t test2 = HAL_GPIO_ReadPin(GPIOE, FPGA_GPIO3_Pin);\r
+    return test1 != 0 && test2 != 0;\r
 }\r
 \r
 uint8_t\r
 scsiReadByte(void)\r
 {\r
-       scsiSetDataCount(1);\r
+    scsiSetDataCount(1);\r
 \r
-       // Ready immediately. setDataCount resets fifos\r
+    // Ready immediately. setDataCount resets fifos\r
 \r
-       //__disable_irq();\r
-       while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
-       {\r
-               //__WFI(); // Wait for interrupt\r
-       }\r
-       //__enable_irq();\r
+    //__disable_irq();\r
+    while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+    {\r
+        //__WFI(); // Wait for interrupt\r
+    }\r
+    //__enable_irq();\r
 \r
-       uint8_t val = scsiPhyRx();\r
-       // TODO scsiDev.parityError = scsiDev.parityError || SCSI_Parity_Error_Read();\r
+    uint8_t val = scsiPhyRx();\r
+    // TODO scsiDev.parityError = scsiDev.parityError || SCSI_Parity_Error_Read();\r
 \r
-       return val;\r
+    return val;\r
 }\r
 \r
 \r
 void\r
 scsiReadPIO(uint8_t* data, uint32_t count, int* parityError)\r
 {\r
-       uint16_t* fifoData = (uint16_t*)data;\r
-       uint32_t count16 = (count + 1) / 2;\r
-\r
-       int i = 0;\r
-       while ((i  < count16) && likely(!scsiDev.resetFlag))\r
-       {\r
-               // Wait until FIFO is full (or complete)\r
-               while (!scsiFifoReady() && likely(!scsiDev.resetFlag))\r
-               {\r
-                       // spin\r
-               }\r
-\r
-               if (count16 - i >= SCSI_FIFO_DEPTH16)\r
-               {\r
-                       uint32_t chunk16 = SCSI_FIFO_DEPTH16;\r
-\r
-                       // Let gcc unroll the loop as much as possible.\r
-                       for (uint32_t k = 0; k + 128 <= chunk16; k += 128)\r
-                       {\r
-                               fifoData[i + k] = scsiPhyRx();\r
-                               fifoData[i + k + 1] = scsiPhyRx();\r
-                               fifoData[i + k + 2] = scsiPhyRx();\r
-                               fifoData[i + k + 3] = scsiPhyRx();\r
-                               fifoData[i + k + 4] = scsiPhyRx();\r
-                               fifoData[i + k + 5] = scsiPhyRx();\r
-                               fifoData[i + k + 6] = scsiPhyRx();\r
-                               fifoData[i + k + 7] = scsiPhyRx();\r
-                               fifoData[i + k + 8] = scsiPhyRx();\r
-                               fifoData[i + k + 9] = scsiPhyRx();\r
-                               fifoData[i + k + 10] = scsiPhyRx();\r
-                               fifoData[i + k + 11] = scsiPhyRx();\r
-                               fifoData[i + k + 12] = scsiPhyRx();\r
-                               fifoData[i + k + 13] = scsiPhyRx();\r
-                               fifoData[i + k + 14] = scsiPhyRx();\r
-                               fifoData[i + k + 15] = scsiPhyRx();\r
-                               fifoData[i + k + 16] = scsiPhyRx();\r
-                               fifoData[i + k + 17] = scsiPhyRx();\r
-                               fifoData[i + k + 18] = scsiPhyRx();\r
-                               fifoData[i + k + 19] = scsiPhyRx();\r
-                               fifoData[i + k + 20] = scsiPhyRx();\r
-                               fifoData[i + k + 21] = scsiPhyRx();\r
-                               fifoData[i + k + 22] = scsiPhyRx();\r
-                               fifoData[i + k + 23] = scsiPhyRx();\r
-                               fifoData[i + k + 24] = scsiPhyRx();\r
-                               fifoData[i + k + 25] = scsiPhyRx();\r
-                               fifoData[i + k + 26] = scsiPhyRx();\r
-                               fifoData[i + k + 27] = scsiPhyRx();\r
-                               fifoData[i + k + 28] = scsiPhyRx();\r
-                               fifoData[i + k + 29] = scsiPhyRx();\r
-                               fifoData[i + k + 30] = scsiPhyRx();\r
-                               fifoData[i + k + 31] = scsiPhyRx();\r
-                               fifoData[i + k + 32] = scsiPhyRx();\r
-                               fifoData[i + k + 33] = scsiPhyRx();\r
-                               fifoData[i + k + 34] = scsiPhyRx();\r
-                               fifoData[i + k + 35] = scsiPhyRx();\r
-                               fifoData[i + k + 36] = scsiPhyRx();\r
-                               fifoData[i + k + 37] = scsiPhyRx();\r
-                               fifoData[i + k + 38] = scsiPhyRx();\r
-                               fifoData[i + k + 39] = scsiPhyRx();\r
-                               fifoData[i + k + 40] = scsiPhyRx();\r
-                               fifoData[i + k + 41] = scsiPhyRx();\r
-                               fifoData[i + k + 42] = scsiPhyRx();\r
-                               fifoData[i + k + 43] = scsiPhyRx();\r
-                               fifoData[i + k + 44] = scsiPhyRx();\r
-                               fifoData[i + k + 45] = scsiPhyRx();\r
-                               fifoData[i + k + 46] = scsiPhyRx();\r
-                               fifoData[i + k + 47] = scsiPhyRx();\r
-                               fifoData[i + k + 48] = scsiPhyRx();\r
-                               fifoData[i + k + 49] = scsiPhyRx();\r
-                               fifoData[i + k + 50] = scsiPhyRx();\r
-                               fifoData[i + k + 51] = scsiPhyRx();\r
-                               fifoData[i + k + 52] = scsiPhyRx();\r
-                               fifoData[i + k + 53] = scsiPhyRx();\r
-                               fifoData[i + k + 54] = scsiPhyRx();\r
-                               fifoData[i + k + 55] = scsiPhyRx();\r
-                               fifoData[i + k + 56] = scsiPhyRx();\r
-                               fifoData[i + k + 57] = scsiPhyRx();\r
-                               fifoData[i + k + 58] = scsiPhyRx();\r
-                               fifoData[i + k + 59] = scsiPhyRx();\r
-                               fifoData[i + k + 60] = scsiPhyRx();\r
-                               fifoData[i + k + 61] = scsiPhyRx();\r
-                               fifoData[i + k + 62] = scsiPhyRx();\r
-                               fifoData[i + k + 63] = scsiPhyRx();\r
-                               fifoData[i + k + 64] = scsiPhyRx();\r
-                               fifoData[i + k + 65] = scsiPhyRx();\r
-                               fifoData[i + k + 66] = scsiPhyRx();\r
-                               fifoData[i + k + 67] = scsiPhyRx();\r
-                               fifoData[i + k + 68] = scsiPhyRx();\r
-                               fifoData[i + k + 69] = scsiPhyRx();\r
-                               fifoData[i + k + 70] = scsiPhyRx();\r
-                               fifoData[i + k + 71] = scsiPhyRx();\r
-                               fifoData[i + k + 72] = scsiPhyRx();\r
-                               fifoData[i + k + 73] = scsiPhyRx();\r
-                               fifoData[i + k + 74] = scsiPhyRx();\r
-                               fifoData[i + k + 75] = scsiPhyRx();\r
-                               fifoData[i + k + 76] = scsiPhyRx();\r
-                               fifoData[i + k + 77] = scsiPhyRx();\r
-                               fifoData[i + k + 78] = scsiPhyRx();\r
-                               fifoData[i + k + 79] = scsiPhyRx();\r
-                               fifoData[i + k + 80] = scsiPhyRx();\r
-                               fifoData[i + k + 81] = scsiPhyRx();\r
-                               fifoData[i + k + 82] = scsiPhyRx();\r
-                               fifoData[i + k + 83] = scsiPhyRx();\r
-                               fifoData[i + k + 84] = scsiPhyRx();\r
-                               fifoData[i + k + 85] = scsiPhyRx();\r
-                               fifoData[i + k + 86] = scsiPhyRx();\r
-                               fifoData[i + k + 87] = scsiPhyRx();\r
-                               fifoData[i + k + 88] = scsiPhyRx();\r
-                               fifoData[i + k + 89] = scsiPhyRx();\r
-                               fifoData[i + k + 90] = scsiPhyRx();\r
-                               fifoData[i + k + 91] = scsiPhyRx();\r
-                               fifoData[i + k + 92] = scsiPhyRx();\r
-                               fifoData[i + k + 93] = scsiPhyRx();\r
-                               fifoData[i + k + 94] = scsiPhyRx();\r
-                               fifoData[i + k + 95] = scsiPhyRx();\r
-                               fifoData[i + k + 96] = scsiPhyRx();\r
-                               fifoData[i + k + 97] = scsiPhyRx();\r
-                               fifoData[i + k + 98] = scsiPhyRx();\r
-                               fifoData[i + k + 99] = scsiPhyRx();\r
-                               fifoData[i + k + 100] = scsiPhyRx();\r
-                               fifoData[i + k + 101] = scsiPhyRx();\r
-                               fifoData[i + k + 102] = scsiPhyRx();\r
-                               fifoData[i + k + 103] = scsiPhyRx();\r
-                               fifoData[i + k + 104] = scsiPhyRx();\r
-                               fifoData[i + k + 105] = scsiPhyRx();\r
-                               fifoData[i + k + 106] = scsiPhyRx();\r
-                               fifoData[i + k + 107] = scsiPhyRx();\r
-                               fifoData[i + k + 108] = scsiPhyRx();\r
-                               fifoData[i + k + 109] = scsiPhyRx();\r
-                               fifoData[i + k + 110] = scsiPhyRx();\r
-                               fifoData[i + k + 111] = scsiPhyRx();\r
-                               fifoData[i + k + 112] = scsiPhyRx();\r
-                               fifoData[i + k + 113] = scsiPhyRx();\r
-                               fifoData[i + k + 114] = scsiPhyRx();\r
-                               fifoData[i + k + 115] = scsiPhyRx();\r
-                               fifoData[i + k + 116] = scsiPhyRx();\r
-                               fifoData[i + k + 117] = scsiPhyRx();\r
-                               fifoData[i + k + 118] = scsiPhyRx();\r
-                               fifoData[i + k + 119] = scsiPhyRx();\r
-                               fifoData[i + k + 120] = scsiPhyRx();\r
-                               fifoData[i + k + 121] = scsiPhyRx();\r
-                               fifoData[i + k + 122] = scsiPhyRx();\r
-                               fifoData[i + k + 123] = scsiPhyRx();\r
-                               fifoData[i + k + 124] = scsiPhyRx();\r
-                               fifoData[i + k + 125] = scsiPhyRx();\r
-                               fifoData[i + k + 126] = scsiPhyRx();\r
-                               fifoData[i + k + 127] = scsiPhyRx();\r
-                       }\r
-\r
-                       i += chunk16;\r
-               }\r
-               else\r
-               {\r
-                       uint32_t chunk16 = count16 - i;\r
-\r
-                       uint32_t k = 0;\r
-                       for (; k + 4 <= chunk16; k += 4)\r
-                       {\r
-                               fifoData[i + k] = scsiPhyRx();\r
-                               fifoData[i + 1 + k] = scsiPhyRx();\r
-                               fifoData[i + 2 + k] = scsiPhyRx();\r
-                               fifoData[i + 3 + k] = scsiPhyRx();\r
-                       }\r
-                       for (; k < chunk16; ++k)\r
-                       {\r
-                               fifoData[i + k] = scsiPhyRx();\r
-                       }\r
-                       i += chunk16;\r
-               }\r
-       }\r
-\r
-       *parityError |= scsiParityError();\r
+    uint16_t* fifoData = (uint16_t*)data;\r
+    uint32_t count16 = (count + 1) / 2;\r
+\r
+    int i = 0;\r
+    while ((i  < count16) && likely(!scsiDev.resetFlag))\r
+    {\r
+        // Wait until FIFO is full (or complete)\r
+        while (!scsiFifoReady() && likely(!scsiDev.resetFlag))\r
+        {\r
+            // spin\r
+        }\r
+\r
+        if (count16 - i >= SCSI_FIFO_DEPTH16)\r
+        {\r
+            uint32_t chunk16 = SCSI_FIFO_DEPTH16;\r
+\r
+            // Let gcc unroll the loop as much as possible.\r
+            for (uint32_t k = 0; k + 128 <= chunk16; k += 128)\r
+            {\r
+                fifoData[i + k] = scsiPhyRx();\r
+                fifoData[i + k + 1] = scsiPhyRx();\r
+                fifoData[i + k + 2] = scsiPhyRx();\r
+                fifoData[i + k + 3] = scsiPhyRx();\r
+                fifoData[i + k + 4] = scsiPhyRx();\r
+                fifoData[i + k + 5] = scsiPhyRx();\r
+                fifoData[i + k + 6] = scsiPhyRx();\r
+                fifoData[i + k + 7] = scsiPhyRx();\r
+                fifoData[i + k + 8] = scsiPhyRx();\r
+                fifoData[i + k + 9] = scsiPhyRx();\r
+                fifoData[i + k + 10] = scsiPhyRx();\r
+                fifoData[i + k + 11] = scsiPhyRx();\r
+                fifoData[i + k + 12] = scsiPhyRx();\r
+                fifoData[i + k + 13] = scsiPhyRx();\r
+                fifoData[i + k + 14] = scsiPhyRx();\r
+                fifoData[i + k + 15] = scsiPhyRx();\r
+                fifoData[i + k + 16] = scsiPhyRx();\r
+                fifoData[i + k + 17] = scsiPhyRx();\r
+                fifoData[i + k + 18] = scsiPhyRx();\r
+                fifoData[i + k + 19] = scsiPhyRx();\r
+                fifoData[i + k + 20] = scsiPhyRx();\r
+                fifoData[i + k + 21] = scsiPhyRx();\r
+                fifoData[i + k + 22] = scsiPhyRx();\r
+                fifoData[i + k + 23] = scsiPhyRx();\r
+                fifoData[i + k + 24] = scsiPhyRx();\r
+                fifoData[i + k + 25] = scsiPhyRx();\r
+                fifoData[i + k + 26] = scsiPhyRx();\r
+                fifoData[i + k + 27] = scsiPhyRx();\r
+                fifoData[i + k + 28] = scsiPhyRx();\r
+                fifoData[i + k + 29] = scsiPhyRx();\r
+                fifoData[i + k + 30] = scsiPhyRx();\r
+                fifoData[i + k + 31] = scsiPhyRx();\r
+                fifoData[i + k + 32] = scsiPhyRx();\r
+                fifoData[i + k + 33] = scsiPhyRx();\r
+                fifoData[i + k + 34] = scsiPhyRx();\r
+                fifoData[i + k + 35] = scsiPhyRx();\r
+                fifoData[i + k + 36] = scsiPhyRx();\r
+                fifoData[i + k + 37] = scsiPhyRx();\r
+                fifoData[i + k + 38] = scsiPhyRx();\r
+                fifoData[i + k + 39] = scsiPhyRx();\r
+                fifoData[i + k + 40] = scsiPhyRx();\r
+                fifoData[i + k + 41] = scsiPhyRx();\r
+                fifoData[i + k + 42] = scsiPhyRx();\r
+                fifoData[i + k + 43] = scsiPhyRx();\r
+                fifoData[i + k + 44] = scsiPhyRx();\r
+                fifoData[i + k + 45] = scsiPhyRx();\r
+                fifoData[i + k + 46] = scsiPhyRx();\r
+                fifoData[i + k + 47] = scsiPhyRx();\r
+                fifoData[i + k + 48] = scsiPhyRx();\r
+                fifoData[i + k + 49] = scsiPhyRx();\r
+                fifoData[i + k + 50] = scsiPhyRx();\r
+                fifoData[i + k + 51] = scsiPhyRx();\r
+                fifoData[i + k + 52] = scsiPhyRx();\r
+                fifoData[i + k + 53] = scsiPhyRx();\r
+                fifoData[i + k + 54] = scsiPhyRx();\r
+                fifoData[i + k + 55] = scsiPhyRx();\r
+                fifoData[i + k + 56] = scsiPhyRx();\r
+                fifoData[i + k + 57] = scsiPhyRx();\r
+                fifoData[i + k + 58] = scsiPhyRx();\r
+                fifoData[i + k + 59] = scsiPhyRx();\r
+                fifoData[i + k + 60] = scsiPhyRx();\r
+                fifoData[i + k + 61] = scsiPhyRx();\r
+                fifoData[i + k + 62] = scsiPhyRx();\r
+                fifoData[i + k + 63] = scsiPhyRx();\r
+                fifoData[i + k + 64] = scsiPhyRx();\r
+                fifoData[i + k + 65] = scsiPhyRx();\r
+                fifoData[i + k + 66] = scsiPhyRx();\r
+                fifoData[i + k + 67] = scsiPhyRx();\r
+                fifoData[i + k + 68] = scsiPhyRx();\r
+                fifoData[i + k + 69] = scsiPhyRx();\r
+                fifoData[i + k + 70] = scsiPhyRx();\r
+                fifoData[i + k + 71] = scsiPhyRx();\r
+                fifoData[i + k + 72] = scsiPhyRx();\r
+                fifoData[i + k + 73] = scsiPhyRx();\r
+                fifoData[i + k + 74] = scsiPhyRx();\r
+                fifoData[i + k + 75] = scsiPhyRx();\r
+                fifoData[i + k + 76] = scsiPhyRx();\r
+                fifoData[i + k + 77] = scsiPhyRx();\r
+                fifoData[i + k + 78] = scsiPhyRx();\r
+                fifoData[i + k + 79] = scsiPhyRx();\r
+                fifoData[i + k + 80] = scsiPhyRx();\r
+                fifoData[i + k + 81] = scsiPhyRx();\r
+                fifoData[i + k + 82] = scsiPhyRx();\r
+                fifoData[i + k + 83] = scsiPhyRx();\r
+                fifoData[i + k + 84] = scsiPhyRx();\r
+                fifoData[i + k + 85] = scsiPhyRx();\r
+                fifoData[i + k + 86] = scsiPhyRx();\r
+                fifoData[i + k + 87] = scsiPhyRx();\r
+                fifoData[i + k + 88] = scsiPhyRx();\r
+                fifoData[i + k + 89] = scsiPhyRx();\r
+                fifoData[i + k + 90] = scsiPhyRx();\r
+                fifoData[i + k + 91] = scsiPhyRx();\r
+                fifoData[i + k + 92] = scsiPhyRx();\r
+                fifoData[i + k + 93] = scsiPhyRx();\r
+                fifoData[i + k + 94] = scsiPhyRx();\r
+                fifoData[i + k + 95] = scsiPhyRx();\r
+                fifoData[i + k + 96] = scsiPhyRx();\r
+                fifoData[i + k + 97] = scsiPhyRx();\r
+                fifoData[i + k + 98] = scsiPhyRx();\r
+                fifoData[i + k + 99] = scsiPhyRx();\r
+                fifoData[i + k + 100] = scsiPhyRx();\r
+                fifoData[i + k + 101] = scsiPhyRx();\r
+                fifoData[i + k + 102] = scsiPhyRx();\r
+                fifoData[i + k + 103] = scsiPhyRx();\r
+                fifoData[i + k + 104] = scsiPhyRx();\r
+                fifoData[i + k + 105] = scsiPhyRx();\r
+                fifoData[i + k + 106] = scsiPhyRx();\r
+                fifoData[i + k + 107] = scsiPhyRx();\r
+                fifoData[i + k + 108] = scsiPhyRx();\r
+                fifoData[i + k + 109] = scsiPhyRx();\r
+                fifoData[i + k + 110] = scsiPhyRx();\r
+                fifoData[i + k + 111] = scsiPhyRx();\r
+                fifoData[i + k + 112] = scsiPhyRx();\r
+                fifoData[i + k + 113] = scsiPhyRx();\r
+                fifoData[i + k + 114] = scsiPhyRx();\r
+                fifoData[i + k + 115] = scsiPhyRx();\r
+                fifoData[i + k + 116] = scsiPhyRx();\r
+                fifoData[i + k + 117] = scsiPhyRx();\r
+                fifoData[i + k + 118] = scsiPhyRx();\r
+                fifoData[i + k + 119] = scsiPhyRx();\r
+                fifoData[i + k + 120] = scsiPhyRx();\r
+                fifoData[i + k + 121] = scsiPhyRx();\r
+                fifoData[i + k + 122] = scsiPhyRx();\r
+                fifoData[i + k + 123] = scsiPhyRx();\r
+                fifoData[i + k + 124] = scsiPhyRx();\r
+                fifoData[i + k + 125] = scsiPhyRx();\r
+                fifoData[i + k + 126] = scsiPhyRx();\r
+                fifoData[i + k + 127] = scsiPhyRx();\r
+            }\r
+\r
+            i += chunk16;\r
+        }\r
+        else\r
+        {\r
+            uint32_t chunk16 = count16 - i;\r
+\r
+            uint32_t k = 0;\r
+            for (; k + 4 <= chunk16; k += 4)\r
+            {\r
+                fifoData[i + k] = scsiPhyRx();\r
+                fifoData[i + 1 + k] = scsiPhyRx();\r
+                fifoData[i + 2 + k] = scsiPhyRx();\r
+                fifoData[i + 3 + k] = scsiPhyRx();\r
+            }\r
+            for (; k < chunk16; ++k)\r
+            {\r
+                fifoData[i + k] = scsiPhyRx();\r
+            }\r
+            i += chunk16;\r
+        }\r
+    }\r
+\r
+    *parityError |= scsiParityError();\r
 }\r
 \r
 void\r
 scsiRead(uint8_t* data, uint32_t count, int* parityError)\r
 {\r
-       int i = 0;\r
-       *parityError = 0;\r
+    int i = 0;\r
+    *parityError = 0;\r
 \r
-       while (i < count && likely(!scsiDev.resetFlag))\r
-       {\r
-               uint32_t chunk = ((count - i) > SCSI_XFER_MAX)\r
-                       ? SCSI_XFER_MAX : (count - i);\r
-               scsiSetDataCount(chunk);\r
+    while (i < count && likely(!scsiDev.resetFlag))\r
+    {\r
+        uint32_t chunk = ((count - i) > SCSI_XFER_MAX)\r
+            ? SCSI_XFER_MAX : (count - i);\r
+        scsiSetDataCount(chunk);\r
 \r
-               scsiReadPIO(data + i, chunk, parityError);\r
+        scsiReadPIO(data + i, chunk, parityError);\r
 \r
-               while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
-               {\r
-                   __disable_irq();\r
+        while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+        {\r
+            __disable_irq();\r
             if (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
             {\r
-                       __WFI();\r
+                __WFI();\r
             }\r
-                   __enable_irq();\r
-               }\r
+            __enable_irq();\r
+        }\r
 \r
-               i += chunk;\r
-       }\r
+        i += chunk;\r
+    }\r
 }\r
 \r
 void\r
 scsiWriteByte(uint8_t value)\r
 {\r
-       scsiSetDataCount(1);\r
-       scsiPhyTx(value);\r
-\r
-       //__disable_irq();\r
-       while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
-       {\r
-               //__WFI();\r
-       }\r
-       //__enable_irq();\r
+    scsiSetDataCount(1);\r
+    scsiPhyTx(value);\r
+\r
+    //__disable_irq();\r
+    while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+    {\r
+        //__WFI();\r
+    }\r
+    //__enable_irq();\r
 }\r
 \r
 void\r
 scsiWritePIO(const uint8_t* data, uint32_t count)\r
 {\r
-       uint16_t* fifoData = (uint16_t*)data;\r
-       uint32_t count16 = (count + 1) / 2;\r
-\r
-       int i = 0;\r
-       while ((i  < count16) && likely(!scsiDev.resetFlag))\r
-       {\r
-               while (!scsiFifoReady() && likely(!scsiDev.resetFlag))\r
-               {\r
-                       // Spin\r
-               }\r
-\r
-               if (count16 - i >= SCSI_FIFO_DEPTH16)\r
-               {\r
-                       uint32_t chunk16 = SCSI_FIFO_DEPTH16;\r
-\r
-                       // Let gcc unroll the loop as much as possible.\r
-                       for (uint32_t k = 0; k + 128 <= chunk16; k += 128)\r
-                       {\r
-                               scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]);\r
-                               scsiPhyTx32(fifoData[i + 2 + k], fifoData[i + k + 3]);\r
-                               scsiPhyTx32(fifoData[i + 4 + k], fifoData[i + k + 5]);\r
-                               scsiPhyTx32(fifoData[i + 6 + k], fifoData[i + k + 7]);\r
-                               scsiPhyTx32(fifoData[i + 8 + k], fifoData[i + k + 9]);\r
-                               scsiPhyTx32(fifoData[i + 10 + k], fifoData[i + k + 11]);\r
-                               scsiPhyTx32(fifoData[i + 12 + k], fifoData[i + k + 13]);\r
-                               scsiPhyTx32(fifoData[i + 14 + k], fifoData[i + k + 15]);\r
-                               scsiPhyTx32(fifoData[i + 16 + k], fifoData[i + k + 17]);\r
-                               scsiPhyTx32(fifoData[i + 18 + k], fifoData[i + k + 19]);\r
-                               scsiPhyTx32(fifoData[i + 20 + k], fifoData[i + k + 21]);\r
-                               scsiPhyTx32(fifoData[i + 22 + k], fifoData[i + k + 23]);\r
-                               scsiPhyTx32(fifoData[i + 24 + k], fifoData[i + k + 25]);\r
-                               scsiPhyTx32(fifoData[i + 26 + k], fifoData[i + k + 27]);\r
-                               scsiPhyTx32(fifoData[i + 28 + k], fifoData[i + k + 29]);\r
-                               scsiPhyTx32(fifoData[i + 30 + k], fifoData[i + k + 31]);\r
-\r
-                               scsiPhyTx32(fifoData[i + 32 + k], fifoData[i + k + 33]);\r
-                               scsiPhyTx32(fifoData[i + 34 + k], fifoData[i + k + 35]);\r
-                               scsiPhyTx32(fifoData[i + 36 + k], fifoData[i + k + 37]);\r
-                               scsiPhyTx32(fifoData[i + 38 + k], fifoData[i + k + 39]);\r
-                               scsiPhyTx32(fifoData[i + 40 + k], fifoData[i + k + 41]);\r
-                               scsiPhyTx32(fifoData[i + 42 + k], fifoData[i + k + 43]);\r
-                               scsiPhyTx32(fifoData[i + 44 + k], fifoData[i + k + 45]);\r
-                               scsiPhyTx32(fifoData[i + 46 + k], fifoData[i + k + 47]);\r
-                               scsiPhyTx32(fifoData[i + 48 + k], fifoData[i + k + 49]);\r
-                               scsiPhyTx32(fifoData[i + 50 + k], fifoData[i + k + 51]);\r
-                               scsiPhyTx32(fifoData[i + 52 + k], fifoData[i + k + 53]);\r
-                               scsiPhyTx32(fifoData[i + 54 + k], fifoData[i + k + 55]);\r
-                               scsiPhyTx32(fifoData[i + 56 + k], fifoData[i + k + 57]);\r
-                               scsiPhyTx32(fifoData[i + 58 + k], fifoData[i + k + 59]);\r
-                               scsiPhyTx32(fifoData[i + 60 + k], fifoData[i + k + 61]);\r
-                               scsiPhyTx32(fifoData[i + 62 + k], fifoData[i + k + 63]);\r
-\r
-                               scsiPhyTx32(fifoData[i + 64 + k], fifoData[i + k + 65]);\r
-                               scsiPhyTx32(fifoData[i + 66 + k], fifoData[i + k + 67]);\r
-                               scsiPhyTx32(fifoData[i + 68 + k], fifoData[i + k + 69]);\r
-                               scsiPhyTx32(fifoData[i + 70 + k], fifoData[i + k + 71]);\r
-                               scsiPhyTx32(fifoData[i + 72 + k], fifoData[i + k + 73]);\r
-                               scsiPhyTx32(fifoData[i + 74 + k], fifoData[i + k + 75]);\r
-                               scsiPhyTx32(fifoData[i + 76 + k], fifoData[i + k + 77]);\r
-                               scsiPhyTx32(fifoData[i + 78 + k], fifoData[i + k + 79]);\r
-                               scsiPhyTx32(fifoData[i + 80 + k], fifoData[i + k + 81]);\r
-                               scsiPhyTx32(fifoData[i + 82 + k], fifoData[i + k + 83]);\r
-                               scsiPhyTx32(fifoData[i + 84 + k], fifoData[i + k + 85]);\r
-                               scsiPhyTx32(fifoData[i + 86 + k], fifoData[i + k + 87]);\r
-                               scsiPhyTx32(fifoData[i + 88 + k], fifoData[i + k + 89]);\r
-                               scsiPhyTx32(fifoData[i + 90 + k], fifoData[i + k + 91]);\r
-                               scsiPhyTx32(fifoData[i + 92 + k], fifoData[i + k + 93]);\r
-                               scsiPhyTx32(fifoData[i + 94 + k], fifoData[i + k + 95]);\r
-\r
-                               scsiPhyTx32(fifoData[i + 96 + k], fifoData[i + k + 97]);\r
-                               scsiPhyTx32(fifoData[i + 98 + k], fifoData[i + k + 99]);\r
-                               scsiPhyTx32(fifoData[i + 100 + k], fifoData[i + k + 101]);\r
-                               scsiPhyTx32(fifoData[i + 102 + k], fifoData[i + k + 103]);\r
-                               scsiPhyTx32(fifoData[i + 104 + k], fifoData[i + k + 105]);\r
-                               scsiPhyTx32(fifoData[i + 106 + k], fifoData[i + k + 107]);\r
-                               scsiPhyTx32(fifoData[i + 108 + k], fifoData[i + k + 109]);\r
-                               scsiPhyTx32(fifoData[i + 110 + k], fifoData[i + k + 111]);\r
-                               scsiPhyTx32(fifoData[i + 112 + k], fifoData[i + k + 113]);\r
-                               scsiPhyTx32(fifoData[i + 114 + k], fifoData[i + k + 115]);\r
-                               scsiPhyTx32(fifoData[i + 116 + k], fifoData[i + k + 117]);\r
-                               scsiPhyTx32(fifoData[i + 118 + k], fifoData[i + k + 119]);\r
-                               scsiPhyTx32(fifoData[i + 120 + k], fifoData[i + k + 121]);\r
-                               scsiPhyTx32(fifoData[i + 122 + k], fifoData[i + k + 123]);\r
-                               scsiPhyTx32(fifoData[i + 124 + k], fifoData[i + k + 125]);\r
-                               scsiPhyTx32(fifoData[i + 126 + k], fifoData[i + k + 127]);\r
-\r
-                       }\r
-\r
-                       i += chunk16;\r
-               }\r
-               else\r
-               {\r
-                       uint32_t chunk16 = count16 - i;\r
-\r
-                       uint32_t k = 0;\r
-                       for (; k + 4 <= chunk16; k += 4)\r
-                       {\r
-                               scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]);\r
-                               scsiPhyTx32(fifoData[i + k + 2], fifoData[i + k + 3]);\r
-                       }\r
-                       for (; k < chunk16; ++k)\r
-                       {\r
-                               scsiPhyTx(fifoData[i + k]);\r
-                       }\r
-                       i += chunk16;\r
-               }\r
-       }\r
+    uint16_t* fifoData = (uint16_t*)data;\r
+    uint32_t count16 = (count + 1) / 2;\r
+\r
+    int i = 0;\r
+    while ((i  < count16) && likely(!scsiDev.resetFlag))\r
+    {\r
+        while (!scsiFifoReady() && likely(!scsiDev.resetFlag))\r
+        {\r
+            // Spin\r
+        }\r
+\r
+        if (count16 - i >= SCSI_FIFO_DEPTH16)\r
+        {\r
+            uint32_t chunk16 = SCSI_FIFO_DEPTH16;\r
+\r
+            // Let gcc unroll the loop as much as possible.\r
+            for (uint32_t k = 0; k + 128 <= chunk16; k += 128)\r
+            {\r
+                scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]);\r
+                scsiPhyTx32(fifoData[i + 2 + k], fifoData[i + k + 3]);\r
+                scsiPhyTx32(fifoData[i + 4 + k], fifoData[i + k + 5]);\r
+                scsiPhyTx32(fifoData[i + 6 + k], fifoData[i + k + 7]);\r
+                scsiPhyTx32(fifoData[i + 8 + k], fifoData[i + k + 9]);\r
+                scsiPhyTx32(fifoData[i + 10 + k], fifoData[i + k + 11]);\r
+                scsiPhyTx32(fifoData[i + 12 + k], fifoData[i + k + 13]);\r
+                scsiPhyTx32(fifoData[i + 14 + k], fifoData[i + k + 15]);\r
+                scsiPhyTx32(fifoData[i + 16 + k], fifoData[i + k + 17]);\r
+                scsiPhyTx32(fifoData[i + 18 + k], fifoData[i + k + 19]);\r
+                scsiPhyTx32(fifoData[i + 20 + k], fifoData[i + k + 21]);\r
+                scsiPhyTx32(fifoData[i + 22 + k], fifoData[i + k + 23]);\r
+                scsiPhyTx32(fifoData[i + 24 + k], fifoData[i + k + 25]);\r
+                scsiPhyTx32(fifoData[i + 26 + k], fifoData[i + k + 27]);\r
+                scsiPhyTx32(fifoData[i + 28 + k], fifoData[i + k + 29]);\r
+                scsiPhyTx32(fifoData[i + 30 + k], fifoData[i + k + 31]);\r
+\r
+                scsiPhyTx32(fifoData[i + 32 + k], fifoData[i + k + 33]);\r
+                scsiPhyTx32(fifoData[i + 34 + k], fifoData[i + k + 35]);\r
+                scsiPhyTx32(fifoData[i + 36 + k], fifoData[i + k + 37]);\r
+                scsiPhyTx32(fifoData[i + 38 + k], fifoData[i + k + 39]);\r
+                scsiPhyTx32(fifoData[i + 40 + k], fifoData[i + k + 41]);\r
+                scsiPhyTx32(fifoData[i + 42 + k], fifoData[i + k + 43]);\r
+                scsiPhyTx32(fifoData[i + 44 + k], fifoData[i + k + 45]);\r
+                scsiPhyTx32(fifoData[i + 46 + k], fifoData[i + k + 47]);\r
+                scsiPhyTx32(fifoData[i + 48 + k], fifoData[i + k + 49]);\r
+                scsiPhyTx32(fifoData[i + 50 + k], fifoData[i + k + 51]);\r
+                scsiPhyTx32(fifoData[i + 52 + k], fifoData[i + k + 53]);\r
+                scsiPhyTx32(fifoData[i + 54 + k], fifoData[i + k + 55]);\r
+                scsiPhyTx32(fifoData[i + 56 + k], fifoData[i + k + 57]);\r
+                scsiPhyTx32(fifoData[i + 58 + k], fifoData[i + k + 59]);\r
+                scsiPhyTx32(fifoData[i + 60 + k], fifoData[i + k + 61]);\r
+                scsiPhyTx32(fifoData[i + 62 + k], fifoData[i + k + 63]);\r
+\r
+                scsiPhyTx32(fifoData[i + 64 + k], fifoData[i + k + 65]);\r
+                scsiPhyTx32(fifoData[i + 66 + k], fifoData[i + k + 67]);\r
+                scsiPhyTx32(fifoData[i + 68 + k], fifoData[i + k + 69]);\r
+                scsiPhyTx32(fifoData[i + 70 + k], fifoData[i + k + 71]);\r
+                scsiPhyTx32(fifoData[i + 72 + k], fifoData[i + k + 73]);\r
+                scsiPhyTx32(fifoData[i + 74 + k], fifoData[i + k + 75]);\r
+                scsiPhyTx32(fifoData[i + 76 + k], fifoData[i + k + 77]);\r
+                scsiPhyTx32(fifoData[i + 78 + k], fifoData[i + k + 79]);\r
+                scsiPhyTx32(fifoData[i + 80 + k], fifoData[i + k + 81]);\r
+                scsiPhyTx32(fifoData[i + 82 + k], fifoData[i + k + 83]);\r
+                scsiPhyTx32(fifoData[i + 84 + k], fifoData[i + k + 85]);\r
+                scsiPhyTx32(fifoData[i + 86 + k], fifoData[i + k + 87]);\r
+                scsiPhyTx32(fifoData[i + 88 + k], fifoData[i + k + 89]);\r
+                scsiPhyTx32(fifoData[i + 90 + k], fifoData[i + k + 91]);\r
+                scsiPhyTx32(fifoData[i + 92 + k], fifoData[i + k + 93]);\r
+                scsiPhyTx32(fifoData[i + 94 + k], fifoData[i + k + 95]);\r
+\r
+                scsiPhyTx32(fifoData[i + 96 + k], fifoData[i + k + 97]);\r
+                scsiPhyTx32(fifoData[i + 98 + k], fifoData[i + k + 99]);\r
+                scsiPhyTx32(fifoData[i + 100 + k], fifoData[i + k + 101]);\r
+                scsiPhyTx32(fifoData[i + 102 + k], fifoData[i + k + 103]);\r
+                scsiPhyTx32(fifoData[i + 104 + k], fifoData[i + k + 105]);\r
+                scsiPhyTx32(fifoData[i + 106 + k], fifoData[i + k + 107]);\r
+                scsiPhyTx32(fifoData[i + 108 + k], fifoData[i + k + 109]);\r
+                scsiPhyTx32(fifoData[i + 110 + k], fifoData[i + k + 111]);\r
+                scsiPhyTx32(fifoData[i + 112 + k], fifoData[i + k + 113]);\r
+                scsiPhyTx32(fifoData[i + 114 + k], fifoData[i + k + 115]);\r
+                scsiPhyTx32(fifoData[i + 116 + k], fifoData[i + k + 117]);\r
+                scsiPhyTx32(fifoData[i + 118 + k], fifoData[i + k + 119]);\r
+                scsiPhyTx32(fifoData[i + 120 + k], fifoData[i + k + 121]);\r
+                scsiPhyTx32(fifoData[i + 122 + k], fifoData[i + k + 123]);\r
+                scsiPhyTx32(fifoData[i + 124 + k], fifoData[i + k + 125]);\r
+\r
+                // Last write must be 16bit to avoid having data waiting in the AHB bus\r
+                // somewhere still waiting to be written while we're off checking\r
+                // for empty fifos\r
+                // Note also that the fmc fifo is disabled on stm32f446 because it's too big\r
+                // (64 bytes) and we may think the fpga fifo is empty even though\r
+                // there's pending writes\r
+                scsiPhyTx(fifoData[i + 126 + k]);\r
+                scsiPhyTx(fifoData[i + k + 127]);\r
+\r
+            }\r
+\r
+            i += chunk16;\r
+        }\r
+        else\r
+        {\r
+            uint32_t chunk16 = count16 - i;\r
+\r
+            uint32_t k = 0;\r
+            // Note that last 4 bytes will fall through to next loop, which avoids\r
+            // ending on a 32bit write.\r
+            for (; k + 4 < chunk16; k += 4)\r
+            {\r
+                scsiPhyTx32(fifoData[i + k], fifoData[i + k + 1]);\r
+                scsiPhyTx32(fifoData[i + k + 2], fifoData[i + k + 3]);\r
+            }\r
+            for (; k < chunk16; ++k)\r
+            {\r
+                scsiPhyTx(fifoData[i + k]);\r
+            }\r
+            i += chunk16;\r
+        }\r
+    }\r
 }\r
 \r
 \r
 void\r
 scsiWrite(const uint8_t* data, uint32_t count)\r
 {\r
-       int i = 0;\r
-       while (i < count && likely(!scsiDev.resetFlag))\r
-       {\r
-               uint32_t chunk = ((count - i) > SCSI_XFER_MAX)\r
-                       ? SCSI_XFER_MAX : (count - i);\r
-               scsiSetDataCount(chunk);\r
-\r
-               scsiWritePIO(data + i, chunk);\r
-\r
-               while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
-               {\r
-                   __disable_irq();\r
-                   if (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+    int i = 0;\r
+    while (i < count && likely(!scsiDev.resetFlag))\r
+    {\r
+        uint32_t chunk = ((count - i) > SCSI_XFER_MAX)\r
+            ? SCSI_XFER_MAX : (count - i);\r
+        scsiSetDataCount(chunk);\r
+\r
+        scsiWritePIO(data + i, chunk);\r
+\r
+        while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
+        {\r
+            __disable_irq();\r
+            if (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
             {\r
-                       __WFI();\r
+                __WFI();\r
             }\r
-                   __enable_irq();\r
-               }\r
+            __enable_irq();\r
+        }\r
 \r
-               i += chunk;\r
-       }\r
+        i += chunk;\r
+    }\r
 }\r
 \r
 static inline void busSettleDelay(void)\r
 {\r
-       // Data Release time (switching IO) = 400ns\r
-       // + Bus Settle time (switching phase) = 400ns.\r
-       s2s_delay_us(1); // Close enough.\r
+    // Data Release time (switching IO) = 400ns\r
+    // + Bus Settle time (switching phase) = 400ns.\r
+    s2s_delay_us(1); // Close enough.\r
 }\r
 \r
 void scsiEnterBusFree()\r
 {\r
-       *SCSI_CTRL_BSY = 0x00;\r
-       // We now have a Bus Clear Delay of 800ns to release remaining signals.\r
-       *SCSI_CTRL_PHASE = 0;\r
+    *SCSI_CTRL_BSY = 0x00;\r
+    // We now have a Bus Clear Delay of 800ns to release remaining signals.\r
+    *SCSI_CTRL_PHASE = 0;\r
 }\r
 \r
 static void\r
 scsiSetTiming(\r
-       uint8_t assertClocks,\r
-       uint8_t deskew,\r
-       uint8_t hold,\r
-       uint8_t glitch)\r
+    uint8_t assertClocks,\r
+    uint8_t deskew,\r
+    uint8_t hold,\r
+    uint8_t glitch)\r
 {\r
-       *SCSI_CTRL_DESKEW = ((hold & 7) << 5) | (deskew & 0x1F);\r
-       *SCSI_CTRL_TIMING = (assertClocks & 0x3F);\r
-       *SCSI_CTRL_TIMING3 = (glitch & 0xF);\r
+    *SCSI_CTRL_DESKEW = ((hold & 7) << 5) | (deskew & 0x1F);\r
+    *SCSI_CTRL_TIMING = (assertClocks & 0x3F);\r
+    *SCSI_CTRL_TIMING3 = (glitch & 0xF);\r
 }\r
 \r
 static void\r
 scsiSetDefaultTiming()\r
 {\r
-       const uint8_t* asyncTiming = asyncTimings[0];\r
-       scsiSetTiming(\r
-               asyncTiming[0],\r
-               asyncTiming[1],\r
-               asyncTiming[2],\r
-               asyncTiming[3]);\r
+    const uint8_t* asyncTiming = asyncTimings[0];\r
+    scsiSetTiming(\r
+        asyncTiming[0],\r
+        asyncTiming[1],\r
+        asyncTiming[2],\r
+        asyncTiming[3]);\r
 }\r
 \r
 void scsiEnterPhase(int newPhase)\r
 {\r
-       uint32_t delay = scsiEnterPhaseImmediate(newPhase);\r
-       if (delay > 0)\r
-       {\r
-               s2s_delay_us(delay);\r
-       }\r
+    uint32_t delay = scsiEnterPhaseImmediate(newPhase);\r
+    if (delay > 0)\r
+    {\r
+        s2s_delay_us(delay);\r
+    }\r
 }\r
 \r
 // Returns microsecond delay\r
 uint32_t scsiEnterPhaseImmediate(int newPhase)\r
 {\r
-       // ANSI INCITS 362-2002 SPI-3 10.7.1:\r
-       // Phase changes are not allowed while REQ or ACK is asserted.\r
-       while (likely(!scsiDev.resetFlag) && scsiStatusACK()) {}\r
-\r
-       int oldPhase = *SCSI_CTRL_PHASE;\r
-\r
-       if (newPhase != oldPhase)\r
-       {\r
-               if ((newPhase == DATA_IN || newPhase == DATA_OUT) &&\r
-                       scsiDev.target->syncOffset)\r
-               {\r
-                       if (scsiDev.target->syncPeriod < 23)\r
-                       {\r
-                               scsiSetTiming(SCSI_FAST20_ASSERT, SCSI_FAST20_DESKEW, SCSI_FAST20_HOLD, 1);\r
-                       }\r
-                       else if (scsiDev.target->syncPeriod <= 25)\r
-                       {\r
-                               if (newPhase == DATA_IN)\r
-                               {\r
-                                       scsiSetTiming(SCSI_FAST10_WRITE_ASSERT, SCSI_FAST10_DESKEW, SCSI_FAST10_HOLD, 1);\r
-                               }\r
-                               else\r
-                               {\r
-                                       scsiSetTiming(SCSI_FAST10_READ_ASSERT, SCSI_FAST10_DESKEW, SCSI_FAST10_HOLD, 1);\r
-                               }\r
-                       }\r
-                       else\r
-                       {\r
-                               // Amiga A3000 OS3.9 sets period to 35 and fails with\r
-                               // glitch == 1.\r
-                               int glitch =\r
-                                       scsiDev.target->syncPeriod < 35 ? 1 :\r
-                                               (scsiDev.target->syncPeriod < 45 ? 2 : 5);\r
-                               int deskew = syncDeskew(scsiDev.target->syncPeriod);\r
-                               int assertion;\r
-                               if (newPhase == DATA_IN)\r
-                               {\r
-                                       assertion = syncAssertionWrite(scsiDev.target->syncPeriod, deskew);\r
-                               }\r
-                               else\r
-                               {\r
-                                       assertion = syncAssertionRead(scsiDev.target->syncPeriod);\r
-                               }\r
-                               scsiSetTiming(\r
-                                       assertion,\r
-                                       deskew,\r
-                                       syncHold(scsiDev.target->syncPeriod),\r
-                                       glitch);\r
-                       }\r
-\r
-                       *SCSI_CTRL_SYNC_OFFSET = scsiDev.target->syncOffset;\r
-               }\r
-               else if (newPhase >= 0)\r
-               {\r
-\r
-                       *SCSI_CTRL_SYNC_OFFSET = 0;\r
-                       const uint8_t* asyncTiming;\r
-\r
-                       if (scsiDev.boardCfg.scsiSpeed == S2S_CFG_SPEED_NoLimit)\r
-                       {\r
-                               asyncTiming = asyncTimings[SCSI_ASYNC_SAFE];\r
-                       }\r
-                       else if (scsiDev.boardCfg.scsiSpeed >= S2S_CFG_SPEED_TURBO)\r
-                       {\r
-                               asyncTiming = asyncTimings[SCSI_ASYNC_TURBO];\r
-                       }\r
-                       else if (scsiDev.boardCfg.scsiSpeed >= S2S_CFG_SPEED_ASYNC_50)\r
-                       {\r
-                               asyncTiming = asyncTimings[SCSI_ASYNC_50];\r
-                       } else if (scsiDev.boardCfg.scsiSpeed >= S2S_CFG_SPEED_ASYNC_33) {\r
-\r
-                               asyncTiming = asyncTimings[SCSI_ASYNC_33];\r
-\r
-                       } else {\r
-                               asyncTiming = asyncTimings[SCSI_ASYNC_15];\r
-                       }\r
-                       scsiSetTiming(\r
-                               asyncTiming[0],\r
-                               asyncTiming[1],\r
-                               asyncTiming[2],\r
-                               asyncTiming[3]);\r
-               }\r
-\r
-               uint32_t delayUs = 0;\r
-               if (newPhase >= 0)\r
-               {\r
-                       *SCSI_CTRL_PHASE = newPhase;\r
-                       delayUs += 1; // busSettleDelay\r
-\r
-                       if (scsiDev.compatMode < COMPAT_SCSI2)\r
-                       {\r
-                               // EMU EMAX needs 100uS ! 10uS is not enough.\r
-                               delayUs += 100;\r
-                       }\r
-               }\r
-               else\r
-               {\r
-                       *SCSI_CTRL_PHASE = 0;\r
-               }\r
-\r
-               return delayUs;\r
-       }\r
-\r
-       return 0; // No change\r
+    // ANSI INCITS 362-2002 SPI-3 10.7.1:\r
+    // Phase changes are not allowed while REQ or ACK is asserted.\r
+    while (likely(!scsiDev.resetFlag) && scsiStatusACK()) {}\r
+\r
+    int oldPhase = *SCSI_CTRL_PHASE;\r
+\r
+    if (newPhase != oldPhase)\r
+    {\r
+        if ((newPhase == DATA_IN || newPhase == DATA_OUT) &&\r
+            scsiDev.target->syncOffset)\r
+        {\r
+            if (scsiDev.target->syncPeriod < 23)\r
+            {\r
+                scsiSetTiming(SCSI_FAST20_ASSERT, SCSI_FAST20_DESKEW, SCSI_FAST20_HOLD, 1);\r
+            }\r
+            else if (scsiDev.target->syncPeriod <= 25)\r
+            {\r
+                if (newPhase == DATA_IN)\r
+                {\r
+                    scsiSetTiming(SCSI_FAST10_WRITE_ASSERT, SCSI_FAST10_DESKEW, SCSI_FAST10_HOLD, 1);\r
+                }\r
+                else\r
+                {\r
+                    scsiSetTiming(SCSI_FAST10_READ_ASSERT, SCSI_FAST10_DESKEW, SCSI_FAST10_HOLD, 1);\r
+                }\r
+            }\r
+            else\r
+            {\r
+                // Amiga A3000 OS3.9 sets period to 35 and fails with\r
+                // glitch == 1.\r
+                int glitch =\r
+                    scsiDev.target->syncPeriod < 35 ? 1 :\r
+                        (scsiDev.target->syncPeriod < 45 ? 2 : 5);\r
+                int deskew = syncDeskew(scsiDev.target->syncPeriod);\r
+                int assertion;\r
+                if (newPhase == DATA_IN)\r
+                {\r
+                    assertion = syncAssertionWrite(scsiDev.target->syncPeriod, deskew);\r
+                }\r
+                else\r
+                {\r
+                    assertion = syncAssertionRead(scsiDev.target->syncPeriod);\r
+                }\r
+                scsiSetTiming(\r
+                    assertion,\r
+                    deskew,\r
+                    syncHold(scsiDev.target->syncPeriod),\r
+                    glitch);\r
+            }\r
+\r
+            *SCSI_CTRL_SYNC_OFFSET = scsiDev.target->syncOffset;\r
+        }\r
+        else if (newPhase >= 0)\r
+        {\r
+\r
+            *SCSI_CTRL_SYNC_OFFSET = 0;\r
+            const uint8_t* asyncTiming;\r
+\r
+            if (scsiDev.boardCfg.scsiSpeed == S2S_CFG_SPEED_NoLimit)\r
+            {\r
+                asyncTiming = asyncTimings[SCSI_ASYNC_SAFE];\r
+            }\r
+            else if (scsiDev.boardCfg.scsiSpeed >= S2S_CFG_SPEED_TURBO)\r
+            {\r
+                asyncTiming = asyncTimings[SCSI_ASYNC_TURBO];\r
+            }\r
+            else if (scsiDev.boardCfg.scsiSpeed >= S2S_CFG_SPEED_ASYNC_50)\r
+            {\r
+                asyncTiming = asyncTimings[SCSI_ASYNC_50];\r
+            } else if (scsiDev.boardCfg.scsiSpeed >= S2S_CFG_SPEED_ASYNC_33) {\r
+\r
+                asyncTiming = asyncTimings[SCSI_ASYNC_33];\r
+\r
+            } else {\r
+                asyncTiming = asyncTimings[SCSI_ASYNC_15];\r
+            }\r
+            scsiSetTiming(\r
+                asyncTiming[0],\r
+                asyncTiming[1],\r
+                asyncTiming[2],\r
+                asyncTiming[3]);\r
+        }\r
+\r
+        uint32_t delayUs = 0;\r
+        if (newPhase >= 0)\r
+        {\r
+            *SCSI_CTRL_PHASE = newPhase;\r
+            delayUs += 1; // busSettleDelay\r
+\r
+            if (scsiDev.compatMode < COMPAT_SCSI2)\r
+            {\r
+                // EMU EMAX needs 100uS ! 10uS is not enough.\r
+                delayUs += 100;\r
+            }\r
+        }\r
+        else\r
+        {\r
+            *SCSI_CTRL_PHASE = 0;\r
+        }\r
+\r
+        return delayUs;\r
+    }\r
+\r
+    return 0; // No change\r
 }\r
 \r
 // Returns a "safe" estimate of the host SCSI speed of\r
 // theoretical speed / 2\r
 uint32_t s2s_getScsiRateKBs()\r
 {\r
-       if (scsiDev.target->syncOffset)\r
-       {\r
-               if (scsiDev.target->syncPeriod < 23)\r
-               {\r
-                       return 20 / 2;\r
-               }\r
-               else if (scsiDev.target->syncPeriod <= 25)\r
-               {\r
-                       return 10 / 2;\r
-               }\r
-               else\r
-               {\r
-                       // 1000000000 / (scsiDev.target->syncPeriod * 4) bytes per second\r
-                       // (1000000000 / (scsiDev.target->syncPeriod * 4)) / 1000  kB/s\r
-                       return (1000000 / (scsiDev.target->syncPeriod * 4)) / 2;\r
-               }\r
-       }\r
-       else\r
-       {\r
-               return 0;\r
-       }\r
+    if (scsiDev.target->syncOffset)\r
+    {\r
+        if (scsiDev.target->syncPeriod < 23)\r
+        {\r
+            return 20 / 2;\r
+        }\r
+        else if (scsiDev.target->syncPeriod <= 25)\r
+        {\r
+            return 10 / 2;\r
+        }\r
+        else\r
+        {\r
+            // 1000000000 / (scsiDev.target->syncPeriod * 4) bytes per second\r
+            // (1000000000 / (scsiDev.target->syncPeriod * 4)) / 1000  kB/s\r
+            return (1000000 / (scsiDev.target->syncPeriod * 4)) / 2;\r
+        }\r
+    }\r
+    else\r
+    {\r
+        return 0;\r
+    }\r
 }\r
 \r
+\r
 void scsiPhyReset()\r
 {\r
-       if (dmaInProgress)\r
-       {\r
-               HAL_DMA_Abort(&memToFSMC);\r
-               HAL_DMA_Abort(&fsmcToMem);\r
-\r
-               dmaInProgress = 0;\r
-       }\r
-\r
-       s2s_fpgaReset(); // Clears fifos etc.\r
-\r
-       *SCSI_CTRL_PHASE = 0x00;\r
-       *SCSI_CTRL_BSY = 0x00;\r
-       *SCSI_CTRL_DBX = 0;\r
-\r
-       *SCSI_CTRL_SYNC_OFFSET = 0;\r
-       scsiSetDefaultTiming();\r
-\r
-       // DMA Benchmark code\r
-       // Currently 14.9MB/s.\r
-       #ifdef DMA_BENCHMARK\r
-       while(1)\r
-       {\r
-               s2s_ledOn();\r
-               // 100MB\r
-               for (int i = 0; i < (100LL * 1024 * 1024 / SCSI_FIFO_DEPTH); ++i)\r
-               {\r
-                       HAL_DMA_Start(\r
-                               &memToFSMC,\r
-                               (uint32_t) &scsiDev.data[0],\r
-                               (uint32_t) SCSI_FIFO_DATA,\r
-                               SCSI_FIFO_DEPTH / 4);\r
-\r
-                       HAL_DMA_PollForTransfer(\r
-                               &memToFSMC,\r
-                               HAL_DMA_FULL_TRANSFER,\r
-                               0xffffffff);\r
-\r
-                       s2s_fpgaReset();\r
-               }\r
-               s2s_ledOff();\r
-\r
-               for(int i = 0; i < 10; ++i) s2s_delay_ms(1000);\r
-       }\r
-       #endif\r
-\r
-       // PIO Benchmark code\r
-       // Currently 16.7MB/s.\r
-       //#define PIO_BENCHMARK 1\r
-       #ifdef PIO_BENCHMARK\r
-       while(1)\r
-       {\r
-               s2s_ledOn();\r
-\r
-               scsiEnterPhase(DATA_IN); // Need IO flag set for fifo ready flag\r
-\r
-               // 100MB\r
-               for (int i = 0; i < (100LL * 1024 * 1024 / SCSI_FIFO_DEPTH); ++i)\r
-               {\r
-                       scsiSetDataCount(1); // Resets fifos.\r
-\r
-                       // Shouldn't block\r
-                       scsiDev.resetFlag = 0;\r
-                       scsiWritePIO(&scsiDev.data[0], SCSI_FIFO_DEPTH);\r
-               }\r
-               s2s_ledOff();\r
-\r
-               for(int i = 0; i < 10; ++i) s2s_delay_ms(1000);\r
-       }\r
-       #endif\r
-\r
-       #ifdef SCSI_FREQ_TEST\r
-       while(1)\r
-       {\r
-               *SCSI_CTRL_DBX = 0xAA;\r
-               *SCSI_CTRL_DBX = 0x55;\r
-       }\r
-       #endif\r
-\r
+    if (dmaInProgress)\r
+    {\r
+        HAL_DMA_Abort(&memToFSMC);\r
+        HAL_DMA_Abort(&fsmcToMem);\r
+\r
+        dmaInProgress = 0;\r
+    }\r
+\r
+    s2s_fpgaReset(); // Clears fifos etc.\r
+\r
+    *SCSI_CTRL_PHASE = 0x00;\r
+    *SCSI_CTRL_BSY = 0x00;\r
+    *SCSI_CTRL_DBX = 0;\r
+\r
+    *SCSI_CTRL_SYNC_OFFSET = 0;\r
+    scsiSetDefaultTiming();\r
+\r
+    // DMA Benchmark code\r
+    // Currently 14.9MB/s.\r
+    #ifdef DMA_BENCHMARK\r
+    while(1)\r
+    {\r
+        s2s_ledOn();\r
+        // 100MB\r
+        for (int i = 0; i < (100LL * 1024 * 1024 / SCSI_FIFO_DEPTH); ++i)\r
+        {\r
+            HAL_DMA_Start(\r
+                &memToFSMC,\r
+                (uint32_t) &scsiDev.data[0],\r
+                (uint32_t) SCSI_FIFO_DATA,\r
+                SCSI_FIFO_DEPTH / 4);\r
+\r
+            HAL_DMA_PollForTransfer(\r
+                &memToFSMC,\r
+                HAL_DMA_FULL_TRANSFER,\r
+                0xffffffff);\r
+\r
+            s2s_fpgaReset();\r
+        }\r
+        s2s_ledOff();\r
+\r
+        for(int i = 0; i < 10; ++i) s2s_delay_ms(1000);\r
+    }\r
+    #endif\r
+\r
+    // PIO Benchmark code\r
+    // Currently 16.7MB/s.\r
+    //#define PIO_BENCHMARK 1\r
+    #ifdef PIO_BENCHMARK\r
+    while(1)\r
+    {\r
+        s2s_ledOn();\r
+\r
+        scsiEnterPhase(DATA_IN); // Need IO flag set for fifo ready flag\r
+\r
+        // 100MB\r
+        for (int i = 0; i < (100LL * 1024 * 1024 / SCSI_FIFO_DEPTH); ++i)\r
+        {\r
+            scsiSetDataCount(1); // Resets fifos.\r
+\r
+            // Shouldn't block\r
+            scsiDev.resetFlag = 0;\r
+            scsiWritePIO(&scsiDev.data[0], SCSI_FIFO_DEPTH);\r
+        }\r
+        s2s_ledOff();\r
+\r
+        for(int i = 0; i < 10; ++i) s2s_delay_ms(1000);\r
+    }\r
+    #endif\r
+\r
+    #ifdef SCSI_FREQ_TEST\r
+    while(1)\r
+    {\r
+        *SCSI_CTRL_DBX = 0xAA;\r
+        *SCSI_CTRL_DBX = 0x55;\r
+    }\r
+    #endif\r
 }\r
 \r
 static void scsiPhyInitDMA()\r
 {\r
-       // One-time init only.\r
-       static uint8_t init = 0;\r
-       if (init == 0)\r
-       {\r
-               init = 1;\r
-\r
-               // Memory to memory transfers can only be done using DMA2\r
-               __DMA2_CLK_ENABLE();\r
-\r
-               // Transmit SCSI data. The source data is treated as the\r
-               // peripheral (even though this is memory-to-memory)\r
-               memToFSMC.Instance = DMA2_Stream0;\r
-               memToFSMC.Init.Channel = DMA_CHANNEL_0;\r
-               memToFSMC.Init.Direction = DMA_MEMORY_TO_MEMORY;\r
-               memToFSMC.Init.PeriphInc = DMA_PINC_ENABLE;\r
-               memToFSMC.Init.MemInc = DMA_MINC_DISABLE;\r
-               memToFSMC.Init.PeriphDataAlignment = DMA_PDATAALIGN_WORD;\r
-               memToFSMC.Init.MemDataAlignment = DMA_MDATAALIGN_HALFWORD;\r
-               memToFSMC.Init.Mode = DMA_NORMAL;\r
-               memToFSMC.Init.Priority = DMA_PRIORITY_LOW;\r
-               // FIFO mode is needed to allow conversion from 32bit words to the\r
-               // 16bit FSMC interface.\r
-               memToFSMC.Init.FIFOMode = DMA_FIFOMODE_ENABLE;\r
-\r
-               // We only use 1 word (4 bytes) in the fifo at a time. Normally it's\r
-               // better to let the DMA fifo fill up then do burst transfers, but\r
-               // bursting out the FSMC interface will be very slow and may starve\r
-               // other (faster) transfers. We don't want to risk the SDIO transfers\r
-               // from overrun/underrun conditions.\r
-               memToFSMC.Init.FIFOThreshold = DMA_FIFO_THRESHOLD_1QUARTERFULL;\r
-               memToFSMC.Init.MemBurst = DMA_MBURST_SINGLE;\r
-               memToFSMC.Init.PeriphBurst = DMA_PBURST_SINGLE;\r
-               HAL_DMA_Init(&memToFSMC);\r
-\r
-               // Receive SCSI data. The source data (fsmc) is treated as the\r
-               // peripheral (even though this is memory-to-memory)\r
-               fsmcToMem.Instance = DMA2_Stream1;\r
-               fsmcToMem.Init.Channel = DMA_CHANNEL_0;\r
-               fsmcToMem.Init.Direction = DMA_MEMORY_TO_MEMORY;\r
-               fsmcToMem.Init.PeriphInc = DMA_PINC_DISABLE;\r
-               fsmcToMem.Init.MemInc = DMA_MINC_ENABLE;\r
-               fsmcToMem.Init.PeriphDataAlignment = DMA_PDATAALIGN_HALFWORD;\r
-               fsmcToMem.Init.MemDataAlignment = DMA_MDATAALIGN_WORD;\r
-               fsmcToMem.Init.Mode = DMA_NORMAL;\r
-               fsmcToMem.Init.Priority = DMA_PRIORITY_LOW;\r
-               fsmcToMem.Init.FIFOMode = DMA_FIFOMODE_ENABLE;\r
-               fsmcToMem.Init.FIFOThreshold = DMA_FIFO_THRESHOLD_1QUARTERFULL;\r
-               fsmcToMem.Init.MemBurst = DMA_MBURST_SINGLE;\r
-               fsmcToMem.Init.PeriphBurst = DMA_PBURST_SINGLE;\r
-               HAL_DMA_Init(&fsmcToMem);\r
-\r
-               // TODO configure IRQs\r
-       }\r
+    // One-time init only.\r
+    static uint8_t init = 0;\r
+    if (init == 0)\r
+    {\r
+        init = 1;\r
+\r
+        // Memory to memory transfers can only be done using DMA2\r
+        __DMA2_CLK_ENABLE();\r
+\r
+        // Transmit SCSI data. The source data is treated as the\r
+        // peripheral (even though this is memory-to-memory)\r
+        memToFSMC.Instance = DMA2_Stream0;\r
+        memToFSMC.Init.Channel = DMA_CHANNEL_0;\r
+        memToFSMC.Init.Direction = DMA_MEMORY_TO_MEMORY;\r
+        memToFSMC.Init.PeriphInc = DMA_PINC_ENABLE;\r
+        memToFSMC.Init.MemInc = DMA_MINC_DISABLE;\r
+        memToFSMC.Init.PeriphDataAlignment = DMA_PDATAALIGN_WORD;\r
+        memToFSMC.Init.MemDataAlignment = DMA_MDATAALIGN_HALFWORD;\r
+        memToFSMC.Init.Mode = DMA_NORMAL;\r
+        memToFSMC.Init.Priority = DMA_PRIORITY_LOW;\r
+        // FIFO mode is needed to allow conversion from 32bit words to the\r
+        // 16bit FSMC interface.\r
+        memToFSMC.Init.FIFOMode = DMA_FIFOMODE_ENABLE;\r
+\r
+        // We only use 1 word (4 bytes) in the fifo at a time. Normally it's\r
+        // better to let the DMA fifo fill up then do burst transfers, but\r
+        // bursting out the FSMC interface will be very slow and may starve\r
+        // other (faster) transfers. We don't want to risk the SDIO transfers\r
+        // from overrun/underrun conditions.\r
+        memToFSMC.Init.FIFOThreshold = DMA_FIFO_THRESHOLD_1QUARTERFULL;\r
+        memToFSMC.Init.MemBurst = DMA_MBURST_SINGLE;\r
+        memToFSMC.Init.PeriphBurst = DMA_PBURST_SINGLE;\r
+        HAL_DMA_Init(&memToFSMC);\r
+\r
+        // Receive SCSI data. The source data (fsmc) is treated as the\r
+        // peripheral (even though this is memory-to-memory)\r
+        fsmcToMem.Instance = DMA2_Stream1;\r
+        fsmcToMem.Init.Channel = DMA_CHANNEL_0;\r
+        fsmcToMem.Init.Direction = DMA_MEMORY_TO_MEMORY;\r
+        fsmcToMem.Init.PeriphInc = DMA_PINC_DISABLE;\r
+        fsmcToMem.Init.MemInc = DMA_MINC_ENABLE;\r
+        fsmcToMem.Init.PeriphDataAlignment = DMA_PDATAALIGN_HALFWORD;\r
+        fsmcToMem.Init.MemDataAlignment = DMA_MDATAALIGN_WORD;\r
+        fsmcToMem.Init.Mode = DMA_NORMAL;\r
+        fsmcToMem.Init.Priority = DMA_PRIORITY_LOW;\r
+        fsmcToMem.Init.FIFOMode = DMA_FIFOMODE_ENABLE;\r
+        fsmcToMem.Init.FIFOThreshold = DMA_FIFO_THRESHOLD_1QUARTERFULL;\r
+        fsmcToMem.Init.MemBurst = DMA_MBURST_SINGLE;\r
+        fsmcToMem.Init.PeriphBurst = DMA_PBURST_SINGLE;\r
+        HAL_DMA_Init(&fsmcToMem);\r
+\r
+        // TODO configure IRQs\r
+    }\r
 }\r
 \r
 \r
 void scsiPhyInit()\r
 {\r
-       scsiPhyInitDMA();\r
+    scsiPhyInitDMA();\r
 \r
-       *SCSI_CTRL_IDMASK = 0x00; // Reset in scsiPhyConfig\r
-       *SCSI_CTRL_PHASE = 0x00;\r
-       *SCSI_CTRL_BSY = 0x00;\r
-       *SCSI_CTRL_DBX = 0;\r
+    *SCSI_CTRL_IDMASK = 0x00; // Reset in scsiPhyConfig\r
+    *SCSI_CTRL_PHASE = 0x00;\r
+    *SCSI_CTRL_BSY = 0x00;\r
+    *SCSI_CTRL_DBX = 0;\r
 \r
-       *SCSI_CTRL_SYNC_OFFSET = 0;\r
-       scsiSetDefaultTiming();\r
+    *SCSI_CTRL_SYNC_OFFSET = 0;\r
+    scsiSetDefaultTiming();\r
 \r
-       *SCSI_CTRL_SEL_TIMING = SCSI_DEFAULT_SELECTION;\r
+    *SCSI_CTRL_SEL_TIMING = SCSI_DEFAULT_SELECTION;\r
 \r
 }\r
 \r
 void scsiPhyConfig()\r
 {\r
-       if (scsiDev.boardCfg.flags6 & S2S_CFG_ENABLE_TERMINATOR)\r
-       {\r
-               HAL_GPIO_WritePin(nTERM_EN_GPIO_Port, nTERM_EN_Pin, GPIO_PIN_RESET);\r
-       }\r
-       else\r
-       {\r
-               HAL_GPIO_WritePin(nTERM_EN_GPIO_Port, nTERM_EN_Pin, GPIO_PIN_SET);\r
-       }\r
-\r
-\r
-       uint8_t idMask = 0;\r
-       for (int i = 0; i < 8; ++i)\r
-       {\r
-               const S2S_TargetCfg* cfg = s2s_getConfigById(i);\r
-               if (cfg && (cfg->scsiId & S2S_CFG_TARGET_ENABLED))\r
-               {\r
-                       idMask |= (1 << i);\r
-               }\r
-       }\r
-       *SCSI_CTRL_IDMASK = idMask;\r
-\r
-       *SCSI_CTRL_FLAGS =\r
-               ((scsiDev.boardCfg.flags & S2S_CFG_DISABLE_GLITCH) ?\r
-                       SCSI_CTRL_FLAGS_DISABLE_GLITCH : 0) |\r
-               ((scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY) ?\r
-                       SCSI_CTRL_FLAGS_ENABLE_PARITY : 0);\r
-\r
-       *SCSI_CTRL_SEL_TIMING =\r
-               (scsiDev.boardCfg.flags & S2S_CFG_ENABLE_SEL_LATCH) ?\r
-                       SCSI_FAST_SELECTION : SCSI_DEFAULT_SELECTION;\r
+    if (scsiDev.boardCfg.flags6 & S2S_CFG_ENABLE_TERMINATOR)\r
+    {\r
+        HAL_GPIO_WritePin(nTERM_EN_GPIO_Port, nTERM_EN_Pin, GPIO_PIN_RESET);\r
+    }\r
+    else\r
+    {\r
+        HAL_GPIO_WritePin(nTERM_EN_GPIO_Port, nTERM_EN_Pin, GPIO_PIN_SET);\r
+    }\r
+\r
+\r
+    uint8_t idMask = 0;\r
+    for (int i = 0; i < 8; ++i)\r
+    {\r
+        const S2S_TargetCfg* cfg = s2s_getConfigById(i);\r
+        if (cfg && (cfg->scsiId & S2S_CFG_TARGET_ENABLED))\r
+        {\r
+            idMask |= (1 << i);\r
+        }\r
+    }\r
+    *SCSI_CTRL_IDMASK = idMask;\r
+\r
+    *SCSI_CTRL_FLAGS =\r
+        ((scsiDev.boardCfg.flags & S2S_CFG_DISABLE_GLITCH) ?\r
+            SCSI_CTRL_FLAGS_DISABLE_GLITCH : 0) |\r
+        ((scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY) ?\r
+            SCSI_CTRL_FLAGS_ENABLE_PARITY : 0);\r
+\r
+    *SCSI_CTRL_SEL_TIMING =\r
+        (scsiDev.boardCfg.flags & S2S_CFG_ENABLE_SEL_LATCH) ?\r
+            SCSI_FAST_SELECTION : SCSI_DEFAULT_SELECTION;\r
 }\r
 \r
 \r
@@ -876,47 +886,47 @@ void scsiPhyConfig()
 // 64 = fpga comms error\r
 int scsiSelfTest()\r
 {\r
-       if (scsiDev.phase != BUS_FREE)\r
-       {\r
-               return 32;\r
-       }\r
-\r
-       // Acquire the SCSI bus.\r
-       for (int i = 0; i < 100; ++i)\r
-       {\r
-               if (scsiStatusBSY())\r
-               {\r
-                       s2s_delay_ms(1);\r
-               }\r
-       }\r
-       if (scsiStatusBSY())\r
-       {\r
-               // Error, couldn't acquire scsi bus\r
-               return 32;\r
-       }\r
-       *SCSI_CTRL_BSY = 1;\r
-       s2s_delay_ms(1);\r
-       if (! scsiStatusBSY())\r
-       {\r
-               *SCSI_CTRL_BSY = 0;\r
-\r
-               // Error, BSY doesn't work.\r
-               return 32;\r
-       }\r
-\r
-       // Should be safe to use the bus now.\r
-\r
-       int result = 0;\r
-\r
-       *SCSI_CTRL_DBX = 0;\r
-       busSettleDelay();\r
-       if ((*SCSI_STS_DBX & 0xff) != 0)\r
-       {\r
-               result = 1;\r
-       }\r
-\r
-       *SCSI_CTRL_BSY = 0;\r
-\r
-       return result;\r
+    if (scsiDev.phase != BUS_FREE)\r
+    {\r
+        return 32;\r
+    }\r
+\r
+    // Acquire the SCSI bus.\r
+    for (int i = 0; i < 100; ++i)\r
+    {\r
+        if (scsiStatusBSY())\r
+        {\r
+            s2s_delay_ms(1);\r
+        }\r
+    }\r
+    if (scsiStatusBSY())\r
+    {\r
+        // Error, couldn't acquire scsi bus\r
+        return 32;\r
+    }\r
+    *SCSI_CTRL_BSY = 1;\r
+    s2s_delay_ms(1);\r
+    if (! scsiStatusBSY())\r
+    {\r
+        *SCSI_CTRL_BSY = 0;\r
+\r
+        // Error, BSY doesn't work.\r
+        return 32;\r
+    }\r
+\r
+    // Should be safe to use the bus now.\r
+\r
+    int result = 0;\r
+\r
+    *SCSI_CTRL_DBX = 0;\r
+    busSettleDelay();\r
+    if ((*SCSI_STS_DBX & 0xff) != 0)\r
+    {\r
+        result = 1;\r
+    }\r
+\r
+    *SCSI_CTRL_BSY = 0;\r
+\r
+    return result;\r
 }\r
 \r