SWV tracing for diagnosing hangs.
[SCSI2SD.git] / software / SCSI2SD / src / sd.c
index fb290f1..fdc2b7d 100755 (executable)
@@ -14,6 +14,8 @@
 //\r
 //     You should have received a copy of the GNU General Public License\r
 //     along with SCSI2SD.  If not, see <http://www.gnu.org/licenses/>.\r
+#pragma GCC push_options\r
+#pragma GCC optimize("-flto")\r
 \r
 #include "device.h"\r
 #include "scsi.h"\r
@@ -21,6 +23,8 @@
 #include "disk.h"\r
 #include "sd.h"\r
 #include "led.h"\r
+#include "time.h"\r
+#include "trace.h"\r
 \r
 #include "scsiPhy.h"\r
 \r
 // Global\r
 SdDevice sdDev;\r
 \r
+enum SD_IO_STATE { SD_DMA, SD_ACCEPTED, SD_BUSY, SD_IDLE };\r
+static int sdIOState = SD_IDLE;\r
+\r
+// Private DMA variables.\r
+static uint8 sdDMARxChan = CY_DMA_INVALID_CHANNEL;\r
+static uint8 sdDMATxChan = CY_DMA_INVALID_CHANNEL;\r
+\r
+// Dummy location for DMA to send unchecked CRC bytes to\r
+static uint8 discardBuffer;\r
+\r
+// 2 bytes CRC, response, 8bits to close the clock..\r
+// "NCR" time is up to 8 bytes.\r
+static uint8_t writeResponseBuffer[8];\r
+\r
+static uint8_t writeStartToken = 0xFC;\r
+\r
+// Source of dummy SPI bytes for DMA\r
+static uint8 dummyBuffer = 0xFF;\r
+\r
+volatile uint8_t sdRxDMAComplete;\r
+volatile uint8_t sdTxDMAComplete;\r
+\r
+CY_ISR_PROTO(sdRxISR);\r
+CY_ISR(sdRxISR)\r
+{\r
+       sdRxDMAComplete = 1;\r
+}\r
+CY_ISR_PROTO(sdTxISR);\r
+CY_ISR(sdTxISR)\r
+{\r
+       sdTxDMAComplete = 1;\r
+}\r
+\r
 static uint8 sdCrc7(uint8* chr, uint8 cnt, uint8 crc)\r
 {\r
        uint8 a;\r
@@ -47,104 +84,161 @@ static uint8 sdCrc7(uint8* chr, uint8 cnt, uint8 crc)
 }\r
 \r
 // Read and write 1 byte.\r
-static uint8 sdSpiByte(uint8 value)\r
+static uint8_t sdSpiByte(uint8_t value)\r
 {\r
        SDCard_WriteTxData(value);\r
+       trace(trace_spinSpiByte);\r
        while (!(SDCard_ReadRxStatus() & SDCard_STS_RX_FIFO_NOT_EMPTY)) {}\r
+       trace(trace_sdSpiByte);\r
        return SDCard_ReadRxData();\r
 }\r
 \r
-static void sdSendCRCCommand(uint8 cmd, uint32 param)\r
+static uint16_t sdDoCommand(\r
+       uint8_t cmd,\r
+       uint32_t param,\r
+       int useCRC,\r
+       int use2byteResponse)\r
 {\r
-       uint8 send[6];\r
+       int waitWhileBusy = (cmd != SD_GO_IDLE_STATE) && (cmd != SD_STOP_TRANSMISSION);\r
+\r
+       // "busy" probe. We'll examine the results later.\r
+       if (waitWhileBusy)\r
+       {\r
+               SDCard_WriteTxData(0xFF);\r
+       }\r
 \r
+       // send is static as the address must remain consistent for the static\r
+       // DMA descriptors to work.\r
+       static uint8_t send[7];\r
        send[0] = cmd | 0x40;\r
        send[1] = param >> 24;\r
        send[2] = param >> 16;\r
        send[3] = param >> 8;\r
        send[4] = param;\r
-       send[5] = (sdCrc7(send, 5, 0) << 1) | 1;\r
+       if (unlikely(useCRC))\r
+       {\r
+               send[5] = (sdCrc7(send, 5, 0) << 1) | 1;\r
+       }\r
+       else\r
+       {\r
+               send[5] = 1; // stop bit\r
+       }\r
+       send[6] = 0xFF; // Result code or stuff byte.\r
 \r
-       for(cmd = 0; cmd < sizeof(send); cmd++)\r
+       static uint8_t dmaRxTd = CY_DMA_INVALID_TD;\r
+       static uint8_t dmaTxTd = CY_DMA_INVALID_TD;\r
+       if (unlikely(dmaRxTd == CY_DMA_INVALID_TD))\r
        {\r
-               sdSpiByte(send[cmd]);\r
+               dmaRxTd = CyDmaTdAllocate();\r
+               dmaTxTd = CyDmaTdAllocate();\r
+               CyDmaTdSetConfiguration(dmaTxTd, sizeof(send), CY_DMA_DISABLE_TD, TD_INC_SRC_ADR|SD_TX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaTxTd, LO16((uint32)&send), LO16((uint32)SDCard_TXDATA_PTR));\r
+               CyDmaTdSetConfiguration(dmaRxTd, sizeof(send), CY_DMA_DISABLE_TD, SD_RX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaRxTd, LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&discardBuffer));\r
        }\r
-       // Allow command to process before reading result code.\r
-       sdSpiByte(0xFF);\r
-}\r
 \r
-static void sdSendCommand(uint8 cmd, uint32 param)\r
-{\r
-       uint8 send[6];\r
+       sdTxDMAComplete = 0;\r
+       sdRxDMAComplete = 0;\r
 \r
-       send[0] = cmd | 0x40;\r
-       send[1] = param >> 24;\r
-       send[2] = param >> 16;\r
-       send[3] = param >> 8;\r
-       send[4] = param;\r
-       send[5] = 0;\r
+       CyDmaChSetInitialTd(sdDMARxChan, dmaRxTd);\r
+       CyDmaChSetInitialTd(sdDMATxChan, dmaTxTd);\r
+\r
+       // Some Samsung cards enter a busy-state after single-sector reads.\r
+       // But we also need to wait for R1B to complete from the multi-sector\r
+       // reads.\r
+       if (waitWhileBusy)\r
+       {\r
+               trace(trace_spinSDRxFIFO);\r
+               while (!(SDCard_ReadRxStatus() & SDCard_STS_RX_FIFO_NOT_EMPTY)) {}\r
+               int busy = SDCard_ReadRxData() != 0xFF;\r
+               if (unlikely(busy))\r
+               {\r
+                       trace(trace_spinSDBusy);\r
+                       while (sdSpiByte(0xFF) != 0xFF) {}\r
+               }\r
+       }\r
+\r
+       // The DMA controller is a bit trigger-happy. It will retain\r
+       // a drq request that was triggered while the channel was\r
+       // disabled.\r
+       CyDmaClearPendingDrq(sdDMATxChan);\r
+       CyDmaClearPendingDrq(sdDMARxChan);\r
 \r
-       for(cmd = 0; cmd < sizeof(send); cmd++)\r
+       // There is no flow control, so we must ensure we can read the bytes\r
+       // before we start transmitting\r
+       CyDmaChEnable(sdDMARxChan, 1);\r
+       CyDmaChEnable(sdDMATxChan, 1);\r
+\r
+       trace(trace_spinSDDMA);\r
+       while (!(sdTxDMAComplete && sdRxDMAComplete)) { __WFI(); }\r
+\r
+       uint16_t response = discardBuffer;\r
+       if (unlikely(cmd == SD_STOP_TRANSMISSION))\r
        {\r
-               sdSpiByte(send[cmd]);\r
+               // Stuff byte is required for this command only.\r
+               // Part 1 Simplified standard 3.01\r
+               // "The stop command has an execution delay due to the serial command\r
+               // transmission."\r
+               response = sdSpiByte(0xFF);\r
        }\r
-       // Allow command to process before reading result code.\r
-       sdSpiByte(0xFF);\r
-}\r
 \r
-static uint8 sdReadResp()\r
-{\r
-       uint8 v;\r
-       uint8 i = 128;\r
-       do\r
+       uint32_t start = getTime_ms();\r
+\r
+       trace(trace_spinSDBusy);\r
+       while ((response & 0x80) && likely(elapsedTime_ms(start) <= 200))\r
        {\r
-               v = sdSpiByte(0xFF);\r
-       } while(i-- && (v & 0x80));\r
-       return v;\r
+               response = sdSpiByte(0xFF);\r
+       }\r
+       if (unlikely(use2byteResponse))\r
+       {\r
+               response = (response << 8) | sdSpiByte(0xFF);\r
+       }\r
+       return response;\r
 }\r
 \r
-static uint8 sdCommandAndResponse(uint8 cmd, uint32 param)\r
+\r
+static inline uint16_t sdCommandAndResponse(uint8_t cmd, uint32_t param)\r
 {\r
-       sdSpiByte(0xFF);\r
-       sdSendCommand(cmd, param);\r
-       return sdReadResp();\r
+       return sdDoCommand(cmd, param, 0, 0);\r
 }\r
 \r
-static uint8 sdCRCCommandAndResponse(uint8 cmd, uint32 param)\r
+static inline uint16_t sdCRCCommandAndResponse(uint8_t cmd, uint32_t param)\r
 {\r
-       sdSpiByte(0xFF);\r
-       sdSendCRCCommand(cmd, param);\r
-       return sdReadResp();\r
+       return sdDoCommand(cmd, param, 1, 0);\r
 }\r
 \r
 // Clear the sticky status bits on error.\r
 static void sdClearStatus()\r
 {\r
-       uint8 r2hi = sdCRCCommandAndResponse(SD_SEND_STATUS, 0);\r
-       uint8 r2lo = sdSpiByte(0xFF);\r
-       (void) r2hi; (void) r2lo;\r
+       sdSpiByte(0xFF);\r
+       uint16_t r2 = sdDoCommand(SD_SEND_STATUS, 0, 1, 1);\r
+       (void) r2;\r
 }\r
 \r
-\r
-void sdPrepareRead()\r
+void\r
+sdReadMultiSectorPrep()\r
 {\r
        uint8 v;\r
        uint32 scsiLBA = (transfer.lba + transfer.currentBlock);\r
-       uint32 sdLBA = SCSISector2SD(scsiLBA);\r
-       \r
+       uint32 sdLBA =\r
+               SCSISector2SD(\r
+                       scsiDev.target->cfg->sdSectorStart,\r
+                       scsiDev.target->liveCfg.bytesPerSector,\r
+                       scsiLBA);\r
+\r
        if (!sdDev.ccs)\r
        {\r
                sdLBA = sdLBA * SD_SECTOR_SIZE;\r
        }\r
        v = sdCommandAndResponse(SD_READ_MULTIPLE_BLOCK, sdLBA);\r
-       if (v)\r
+       if (unlikely(v))\r
        {\r
                scsiDiskReset();\r
                sdClearStatus();\r
 \r
                scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
                scsiDev.phase = STATUS;\r
        }\r
        else\r
@@ -153,21 +247,24 @@ void sdPrepareRead()
        }\r
 }\r
 \r
-static void doReadSector(uint32_t numBytes)\r
+static void\r
+dmaReadSector(uint8_t* outputBuffer)\r
 {\r
-       int prep, i, guard;\r
-\r
        // Wait for a start-block token.\r
-       // Don't wait more than 100ms, which is the timeout recommended\r
-       // in the standard.\r
-       //100ms @ 64Hz = 6400000\r
-       int maxWait = 6400000;\r
-       uint8 token = sdSpiByte(0xFF);\r
-       while (token != 0xFE && (maxWait-- > 0))\r
+       // Don't wait more than 200ms.  The standard recommends 100ms.\r
+       uint32_t start = getTime_ms();\r
+       uint8_t token = sdSpiByte(0xFF);\r
+       trace(trace_spinSDBusy);\r
+       while (token != 0xFE && likely(elapsedTime_ms(start) <= 200))\r
        {\r
+               if (unlikely(token && ((token & 0xE0) == 0)))\r
+               {\r
+                       // Error token!\r
+                       break;\r
+               }\r
                token = sdSpiByte(0xFF);\r
        }\r
-       if (token != 0xFE)\r
+       if (unlikely(token != 0xFE))\r
        {\r
                if (transfer.multiBlock)\r
                {\r
@@ -176,196 +273,130 @@ static void doReadSector(uint32_t numBytes)
                if (scsiDev.status != CHECK_CONDITION)\r
                {\r
                        scsiDev.status = CHECK_CONDITION;\r
-                       scsiDev.sense.code = HARDWARE_ERROR;\r
-                       scsiDev.sense.asc = UNRECOVERED_READ_ERROR;\r
+                       scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                       scsiDev.target->sense.asc = UNRECOVERED_READ_ERROR;\r
                        scsiDev.phase = STATUS;\r
                }\r
+               sdClearStatus();\r
                return;\r
        }\r
 \r
-       // Don't do a bus settle delay if we're already in the correct phase.\r
-       if (transfer.currentBlock == 0)\r
+       static uint8_t dmaRxTd[2] = { CY_DMA_INVALID_TD, CY_DMA_INVALID_TD};\r
+       static uint8_t dmaTxTd = CY_DMA_INVALID_TD;\r
+       if (unlikely(dmaRxTd[0] == CY_DMA_INVALID_TD))\r
        {\r
-               scsiEnterPhase(DATA_IN);\r
-       }\r
+               dmaRxTd[0] = CyDmaTdAllocate();\r
+               dmaRxTd[1] = CyDmaTdAllocate();\r
+               dmaTxTd = CyDmaTdAllocate();\r
+               \r
+               // Receive 512 bytes of data and then 2 bytes CRC.\r
+               CyDmaTdSetConfiguration(dmaRxTd[0], SD_SECTOR_SIZE, dmaRxTd[1], TD_INC_DST_ADR);\r
+               CyDmaTdSetConfiguration(dmaRxTd[1], 2, CY_DMA_DISABLE_TD, SD_RX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaRxTd[1], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&discardBuffer));\r
+       \r
+               CyDmaTdSetConfiguration(dmaTxTd, SD_SECTOR_SIZE + 2, CY_DMA_DISABLE_TD, SD_TX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaTxTd, LO16((uint32)&dummyBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
 \r
-       // Quickly seed the FIFO\r
-       prep = 4;\r
-       CY_SET_REG8(SDCard_TXDATA_PTR, 0xFF); // Put a byte in the FIFO\r
-       CY_SET_REG8(SDCard_TXDATA_PTR, 0xFF); // Put a byte in the FIFO\r
-       CY_SET_REG8(SDCard_TXDATA_PTR, 0xFF); // Put a byte in the FIFO\r
-       CY_SET_REG8(SDCard_TXDATA_PTR, 0xFF); // Put a byte in the FIFO\r
+       }\r
+       CyDmaTdSetAddress(dmaRxTd[0], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)outputBuffer));\r
 \r
-       i = 0;\r
-       guard = 0;\r
+       sdIOState = SD_DMA;\r
+       sdTxDMAComplete = 0;\r
+       sdRxDMAComplete = 0;\r
 \r
-       // This loop is critically important for performance.\r
-       // We stream data straight from the SDCard fifos into the SCSI component\r
-       // FIFO's. If the loop isn't fast enough, the transmit FIFO's will empty,\r
-       // and performance will suffer. Every clock cycle counts.\r
-       while (i < numBytes && !scsiDev.resetFlag)\r
-       {\r
-               uint8_t sdRxStatus = CY_GET_REG8(SDCard_RX_STATUS_PTR);\r
-               uint8_t scsiStatus = CY_GET_REG8(scsiTarget_StatusReg__STATUS_REG);\r
+       // Re-loading the initial TD's here is very important, or else\r
+       // we'll be re-using the last-used TD, which would be the last\r
+       // in the chain (ie. CRC TD)\r
+       CyDmaChSetInitialTd(sdDMARxChan, dmaRxTd[0]);\r
+       CyDmaChSetInitialTd(sdDMATxChan, dmaTxTd);\r
 \r
-               // Read from the SPIM fifo if there is room to stream the byte to the\r
-               // SCSI fifos\r
-               if((sdRxStatus & SDCard_STS_RX_FIFO_NOT_EMPTY) &&\r
-                       (scsiDev.resetFlag || (scsiStatus & 1)) // SCSI TX FIFO NOT FULL\r
-                       )\r
-               {\r
-                       uint8_t val = CY_GET_REG8(SDCard_RXDATA_PTR);\r
-                       CY_SET_REG8(scsiTarget_datapath__F0_REG, val);\r
-                       guard++;\r
-               }\r
+       // The DMA controller is a bit trigger-happy. It will retain\r
+       // a drq request that was triggered while the channel was\r
+       // disabled.\r
+       CyDmaClearPendingDrq(sdDMATxChan);\r
+       CyDmaClearPendingDrq(sdDMARxChan);\r
 \r
-               // Byte has been sent out the SCSI interface.\r
-               if (scsiDev.resetFlag || (scsiStatus & 2)) // SCSI RX FIFO NOT EMPTY\r
-               {\r
-                       CY_GET_REG8(scsiTarget_datapath__F1_REG);\r
-                       ++i;\r
-               }\r
+       // There is no flow control, so we must ensure we can read the bytes\r
+       // before we start transmitting\r
+       CyDmaChEnable(sdDMARxChan, 1);\r
+       CyDmaChEnable(sdDMATxChan, 1);\r
+}\r
 \r
-               // How many bytes are in a 4-byte FIFO ? 5.  4 FIFO bytes PLUS one byte\r
-               // being processed bit-by-bit. Artifically limit the number of bytes in the \r
-               // "combined" SPIM TX and RX FIFOS to the individual FIFO size.\r
-               // Unlike the SCSI component, SPIM doesn't check if there's room in\r
-               // the output FIFO before starting to transmit.\r
-               if ((prep - guard < 4) && (prep < numBytes))\r
-               {\r
-                       CY_SET_REG8(SDCard_TXDATA_PTR, 0xFF); // Put a byte in the FIFO\r
-                       prep++;\r
-               }\r
+int\r
+sdReadSectorDMAPoll()\r
+{\r
+       if (sdRxDMAComplete && sdTxDMAComplete)\r
+       {\r
+               // DMA transfer is complete\r
+               sdIOState = SD_IDLE;\r
+               return 1;\r
        }\r
-\r
-       // Read and discard remaining bytes.\r
-       while (i < SD_SECTOR_SIZE)\r
+       else\r
        {\r
-               uint8_t sdRxStatus = CY_GET_REG8(SDCard_RX_STATUS_PTR);\r
-               if(sdRxStatus & SDCard_STS_RX_FIFO_NOT_EMPTY)\r
-               {\r
-                       CY_GET_REG8(SDCard_RXDATA_PTR);\r
-                       guard++;\r
-                       i++;\r
-               }\r
-\r
-               if ((prep - guard < 4) && (prep < SD_SECTOR_SIZE))\r
-               {\r
-                       CY_SET_REG8(SDCard_TXDATA_PTR, 0xFF); // Put a byte in the FIFO\r
-                       prep++;\r
-               }\r
+               return 0;\r
        }\r
-\r
-       sdSpiByte(0xFF); // CRC\r
-       sdSpiByte(0xFF); // CRC\r
-       scsiDev.dataLen = numBytes;\r
-       scsiDev.dataPtr = numBytes;\r
-       \r
-       while (SCSI_ReadPin(SCSI_In_ACK) && !scsiDev.resetFlag) {}\r
 }\r
 \r
-static void doReadSectorSingle(uint32 sdBlock, int sdBytes)\r
+void sdReadSingleSectorDMA(uint32_t lba, uint8_t* outputBuffer)\r
 {\r
        uint8 v;\r
        if (!sdDev.ccs)\r
        {\r
-               sdBlock = sdBlock * SD_SECTOR_SIZE;\r
-       }       \r
-       v = sdCommandAndResponse(SD_READ_SINGLE_BLOCK, sdBlock);\r
-       if (v)\r
+               lba = lba * SD_SECTOR_SIZE;\r
+       }\r
+       v = sdCommandAndResponse(SD_READ_SINGLE_BLOCK, lba);\r
+       if (unlikely(v))\r
        {\r
                scsiDiskReset();\r
                sdClearStatus();\r
 \r
                scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
                scsiDev.phase = STATUS;\r
        }\r
        else\r
        {\r
-               doReadSector(sdBytes);\r
+               dmaReadSector(outputBuffer);\r
        }\r
 }\r
 \r
-\r
-void sdReadSectorSingle()\r
+void\r
+sdReadMultiSectorDMA(uint8_t* outputBuffer)\r
 {\r
-       uint32 scsiLBA = (transfer.lba + transfer.currentBlock);\r
-       uint32 sdLBA = SCSISector2SD(scsiLBA);\r
-       \r
-       int sdSectors = SDSectorsPerSCSISector();\r
-       int i;\r
-       for (i = 0; (i < sdSectors - 1) && (scsiDev.status != CHECK_CONDITION); ++i)\r
-       {\r
-               doReadSectorSingle(sdLBA + i, SD_SECTOR_SIZE);\r
-       }\r
-\r
-       if (scsiDev.status != CHECK_CONDITION)\r
-       {\r
-               int remaining = config->bytesPerSector % SD_SECTOR_SIZE;\r
-               if (remaining == 0) remaining = SD_SECTOR_SIZE; // Full sector needed.\r
-               doReadSectorSingle(sdLBA + i, remaining);\r
-       }\r
+       // Pre: sdReadMultiSectorPrep called.\r
+       dmaReadSector(outputBuffer);\r
 }\r
 \r
-void sdReadSectorMulti()\r
+\r
+void sdCompleteRead()\r
 {\r
-       // Pre: sdPrepareRead called.\r
-       int sdSectors = SDSectorsPerSCSISector();\r
-       int i;\r
-       for (i = 0; (i < sdSectors - 1) && (scsiDev.status != CHECK_CONDITION); ++i)\r
+       if (unlikely(sdIOState != SD_IDLE))\r
        {\r
-               doReadSector(SD_SECTOR_SIZE);\r
+               // Not much choice but to wait until we've completed the transfer.\r
+               // Cancelling the transfer can't be done as we have no way to reset\r
+               // the SD card.\r
+               trace(trace_spinSDCompleteRead);\r
+               while (!sdReadSectorDMAPoll()) { /* spin */ }\r
        }\r
-\r
-       if (scsiDev.status != CHECK_CONDITION)\r
+       \r
+       if (transfer.inProgress)\r
        {\r
-               int remaining = config->bytesPerSector % SD_SECTOR_SIZE;\r
-               if (remaining == 0) remaining = SD_SECTOR_SIZE; // Full sector needed.\r
-               doReadSector(remaining);\r
-       }\r
-}\r
-\r
-\r
-void sdCompleteRead()\r
-{\r
-       transfer.inProgress = 0;\r
+               transfer.inProgress = 0;\r
+               uint8 r1b = sdCommandAndResponse(SD_STOP_TRANSMISSION, 0);\r
 \r
-       // We cannot send even a single "padding" byte, as we normally would when\r
-       // sending a command.  If we've just finished reading the very last block\r
-       // on the card, then reading an additional dummy byte will just trigger\r
-       // an error condition as we're trying to read past-the-end of the storage\r
-       // device.\r
-       // ie. do not use sdCommandAndResponse here.\r
-       uint8 r1b;\r
-       sdSendCommand(SD_STOP_TRANSMISSION, 0);\r
-       r1b = sdReadResp();\r
-\r
-       if (r1b)\r
-       {\r
-               // Try very hard to make sure the transmission stops\r
-               int retries = 255;\r
-               while (r1b && retries)\r
+               if (unlikely(r1b))\r
                {\r
-                       r1b = sdCommandAndResponse(SD_STOP_TRANSMISSION, 0);\r
-                       retries--;\r
+                       scsiDev.status = CHECK_CONDITION;\r
+                       scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                       scsiDev.target->sense.asc = UNRECOVERED_READ_ERROR;\r
+                       scsiDev.phase = STATUS;\r
                }\r
-\r
-               scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = UNRECOVERED_READ_ERROR;\r
-               scsiDev.phase = STATUS;\r
        }\r
 \r
-       // R1b has an optional trailing "busy" signal.\r
-       {\r
-               uint8 busy;\r
-               do\r
-               {\r
-                       busy = sdSpiByte(0xFF);\r
-               } while (busy == 0);\r
-       }\r
+       // R1b has an optional trailing "busy" signal, but we defer waiting on this.\r
+       // The next call so sdCommandAndResponse will wait for the busy state to\r
+       // clear.\r
 }\r
 \r
 static void sdWaitWriteBusy()\r
@@ -377,174 +408,161 @@ static void sdWaitWriteBusy()
        } while (val != 0xFF);\r
 }\r
 \r
-static int doWriteSector(uint32_t numBytes)\r
+void\r
+sdWriteMultiSectorDMA(uint8_t* outputBuffer)\r
 {\r
-       int prep, i, guard;\r
-       int result, maxWait;\r
-       uint8 dataToken;\r
+       static uint8_t dmaRxTd[2] = { CY_DMA_INVALID_TD, CY_DMA_INVALID_TD};\r
+       static uint8_t dmaTxTd[3] = { CY_DMA_INVALID_TD, CY_DMA_INVALID_TD, CY_DMA_INVALID_TD};\r
+       if (unlikely(dmaRxTd[0] == CY_DMA_INVALID_TD))\r
+       {\r
+               dmaRxTd[0] = CyDmaTdAllocate();\r
+               dmaRxTd[1] = CyDmaTdAllocate();\r
+               dmaTxTd[0] = CyDmaTdAllocate();\r
+               dmaTxTd[1] = CyDmaTdAllocate();\r
+               dmaTxTd[2] = CyDmaTdAllocate();\r
+               \r
+               // Transmit 512 bytes of data and then 2 bytes CRC, and then get the response byte\r
+               // We need to do this without stopping the clock\r
+               CyDmaTdSetConfiguration(dmaTxTd[0], 1, dmaTxTd[1], TD_INC_SRC_ADR);\r
+               CyDmaTdSetAddress(dmaTxTd[0], LO16((uint32)&writeStartToken), LO16((uint32)SDCard_TXDATA_PTR));\r
 \r
-       // Don't do a bus settle delay if we're already in the correct phase.\r
-       if (transfer.currentBlock == 0)\r
-       {\r
-               scsiEnterPhase(DATA_OUT);\r
-       }\r
-       \r
-       sdSpiByte(0xFC); // MULTIPLE byte start token\r
-       \r
-       prep = 0;\r
-       i = 0;\r
-       guard = 0;\r
-\r
-       // This loop is critically important for performance.\r
-       // We stream data straight from the SCSI fifos into the SPIM component\r
-       // FIFO's. If the loop isn't fast enough, the transmit FIFO's will empty,\r
-       // and performance will suffer. Every clock cycle counts.       \r
-       while (i < numBytes && !scsiDev.resetFlag)\r
-       {\r
-               uint8_t sdRxStatus = CY_GET_REG8(SDCard_RX_STATUS_PTR);\r
-               uint8_t scsiStatus = CY_GET_REG8(scsiTarget_StatusReg__STATUS_REG);\r
-\r
-               // Read from the SCSI fifo if there is room to stream the byte to the\r
-               // SPIM fifos\r
-               // See sdReadSector for comment on guard (FIFO size is really 5)\r
-               if((guard - i < 4) &&\r
-                       (scsiDev.resetFlag || (scsiStatus & 2))\r
-                       ) // SCSI RX FIFO NOT EMPTY\r
-               {\r
-                       uint8_t val = CY_GET_REG8(scsiTarget_datapath__F1_REG);\r
-                       CY_SET_REG8(SDCard_TXDATA_PTR, val);\r
-                       guard++;\r
-               }\r
+               CyDmaTdSetConfiguration(dmaTxTd[1], SD_SECTOR_SIZE, dmaTxTd[2], TD_INC_SRC_ADR);\r
 \r
-               // Byte has been sent out the SPIM interface.\r
-               if (sdRxStatus & SDCard_STS_RX_FIFO_NOT_EMPTY)\r
-               {\r
-                        CY_GET_REG8(SDCard_RXDATA_PTR);\r
-                       ++i;\r
-               }\r
+               CyDmaTdSetConfiguration(dmaTxTd[2], 2 + sizeof(writeResponseBuffer), CY_DMA_DISABLE_TD, SD_TX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaTxTd[2], LO16((uint32)&dummyBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
 \r
-               if (prep < numBytes &&\r
-                       (scsiDev.resetFlag || (scsiStatus & 1)) // SCSI TX FIFO NOT FULL\r
-                       )\r
-               {\r
-                       // Trigger the SCSI component to read a byte\r
-                       CY_SET_REG8(scsiTarget_datapath__F0_REG, 0xFF);\r
-                       prep++;\r
-               }\r
+               CyDmaTdSetConfiguration(dmaRxTd[0], SD_SECTOR_SIZE + 3, dmaRxTd[1], 0);\r
+               CyDmaTdSetAddress(dmaRxTd[0], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&discardBuffer));\r
+               CyDmaTdSetConfiguration(dmaRxTd[1], sizeof(writeResponseBuffer), CY_DMA_DISABLE_TD, SD_RX_DMA__TD_TERMOUT_EN|TD_INC_DST_ADR);\r
+               CyDmaTdSetAddress(dmaRxTd[1], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&writeResponseBuffer));\r
        }\r
-       \r
-       // Write remaining bytes as 0x00\r
-       while (i < SD_SECTOR_SIZE)\r
-       {\r
-               uint8_t sdRxStatus = CY_GET_REG8(SDCard_RX_STATUS_PTR);\r
+       CyDmaTdSetAddress(dmaTxTd[1], LO16((uint32)outputBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
 \r
-               if(guard - i < 4)\r
-               {\r
-                       CY_SET_REG8(SDCard_TXDATA_PTR, 0x00);\r
-                       guard++;\r
-               }\r
 \r
-               // Byte has been sent out the SPIM interface.\r
-               if (sdRxStatus & SDCard_STS_RX_FIFO_NOT_EMPTY)\r
-               {\r
-                        CY_GET_REG8(SDCard_RXDATA_PTR);\r
-                       ++i;\r
-               }\r
-       }\r
-       \r
-       sdSpiByte(0x00); // CRC\r
-       sdSpiByte(0x00); // CRC\r
+       sdIOState = SD_DMA;\r
+       // The DMA controller is a bit trigger-happy. It will retain\r
+       // a drq request that was triggered while the channel was\r
+       // disabled.\r
+       CyDmaClearPendingDrq(sdDMATxChan);\r
+       CyDmaClearPendingDrq(sdDMARxChan);\r
 \r
-       // Don't wait more than 1s.\r
-       // My 2g Kingston micro-sd card doesn't respond immediately.\r
-       // My 16Gb card does.\r
-       maxWait = 1000000;\r
-       dataToken = sdSpiByte(0xFF); // Response\r
-       while (dataToken == 0xFF && maxWait-- > 0)\r
-       {\r
-               CyDelayUs(1);\r
-               dataToken = sdSpiByte(0xFF);\r
-       }\r
-       if (((dataToken & 0x1F) >> 1) != 0x2) // Accepted.\r
-       {\r
-               uint8 r1b, busy;\r
-               \r
-               sdWaitWriteBusy();\r
+       sdTxDMAComplete = 0;\r
+       sdRxDMAComplete = 0;\r
 \r
-               r1b = sdCommandAndResponse(SD_STOP_TRANSMISSION, 0);\r
-               (void) r1b;\r
-               sdSpiByte(0xFF);\r
+       // Re-loading the initial TD's here is very important, or else\r
+       // we'll be re-using the last-used TD, which would be the last\r
+       // in the chain (ie. CRC TD)\r
+       CyDmaChSetInitialTd(sdDMARxChan, dmaRxTd[0]);\r
+       CyDmaChSetInitialTd(sdDMATxChan, dmaTxTd[0]);\r
+\r
+       // There is no flow control, so we must ensure we can read the bytes\r
+       // before we start transmitting\r
+       CyDmaChEnable(sdDMARxChan, 1);\r
+       CyDmaChEnable(sdDMATxChan, 1);\r
+}\r
 \r
-               // R1b has an optional trailing "busy" signal.\r
-               do\r
+int\r
+sdWriteSectorDMAPoll(int sendStopToken)\r
+{\r
+       if (sdRxDMAComplete && sdTxDMAComplete)\r
+       {\r
+               if (sdIOState == SD_DMA)\r
                {\r
-                       busy = sdSpiByte(0xFF);\r
-               } while (busy == 0);\r
+                       // Retry a few times. The data token format is:\r
+                       // XXX0AAA1\r
+                       int i = 0;\r
+                       uint8_t dataToken;\r
+                       do\r
+                       {\r
+                               dataToken = writeResponseBuffer[i]; // Response\r
+                               ++i;\r
+                       } while (((dataToken & 0x0101) != 1) && (i < sizeof(writeResponseBuffer)));\r
+\r
+                       // At this point we should either have an accepted token, or we'll\r
+                       // timeout and proceed into the error case below.\r
+                       if (unlikely(((dataToken & 0x1F) >> 1) != 0x2)) // Accepted.\r
+                       {\r
+                               sdIOState = SD_IDLE;\r
+\r
+                               sdWaitWriteBusy();\r
+                               sdSpiByte(0xFD); // STOP TOKEN\r
+                               sdWaitWriteBusy();\r
+\r
+                               transfer.inProgress = 0;\r
+                               scsiDiskReset();\r
+                               sdClearStatus();\r
+\r
+                               scsiDev.status = CHECK_CONDITION;\r
+                               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+                               scsiDev.phase = STATUS;\r
+                       }\r
+                       else\r
+                       {\r
+                               sdIOState = SD_ACCEPTED;\r
+                       }\r
+               }\r
 \r
-               // Wait for the card to come out of busy.\r
-               sdWaitWriteBusy();\r
+               if (sdIOState == SD_ACCEPTED)\r
+               {\r
+                       // Wait while the SD card is busy\r
+                       if (sdSpiByte(0xFF) == 0xFF)\r
+                       {\r
+                               if (sendStopToken)\r
+                               {\r
+                                       sdIOState = SD_BUSY;\r
+                                       transfer.inProgress = 0;\r
+\r
+                                       sdSpiByte(0xFD); // STOP TOKEN\r
+                               }\r
+                               else\r
+                               {\r
+                                       sdIOState = SD_IDLE;\r
+                               }\r
+                       }\r
+               }\r
 \r
-               transfer.inProgress = 0;\r
-               scsiDiskReset();\r
-               sdClearStatus();\r
+               if (sdIOState == SD_BUSY)\r
+               {\r
+                       // Wait while the SD card is busy\r
+                       if (sdSpiByte(0xFF) == 0xFF)\r
+                       {\r
+                               sdIOState = SD_IDLE;\r
+                       }\r
+               }\r
 \r
-               scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
-               scsiDev.phase = STATUS;\r
-               result = 0;\r
+               return sdIOState == SD_IDLE;\r
        }\r
        else\r
        {\r
-               sdWaitWriteBusy();\r
-               result = 1;\r
+               return 0;\r
        }\r
-\r
-       while (SCSI_ReadPin(SCSI_In_ACK) && !scsiDev.resetFlag) {}\r
-\r
-       return result;\r
 }\r
 \r
-int sdWriteSector()\r
+void sdCompleteWrite()\r
 {\r
-       int result = 1;\r
-       // Pre: sdPrepareWrite called.\r
-       int sdSectors = SDSectorsPerSCSISector();\r
-       int i;\r
-       for (i = 0; result && (i < sdSectors - 1) && (scsiDev.status != CHECK_CONDITION); ++i)\r
+       if (unlikely(sdIOState != SD_IDLE))\r
        {\r
-               result = doWriteSector(SD_SECTOR_SIZE);\r
+               // Not much choice but to wait until we've completed the transfer.\r
+               // Cancelling the transfer can't be done as we have no way to reset\r
+               // the SD card.\r
+               trace(trace_spinSDCompleteWrite);\r
+               while (!sdWriteSectorDMAPoll(1)) { /* spin */ }\r
        }\r
 \r
-       if (result && scsiDev.status != CHECK_CONDITION)\r
+       if (transfer.inProgress && likely(scsiDev.phase == DATA_OUT))\r
        {\r
-               int remaining = config->bytesPerSector % SD_SECTOR_SIZE;\r
-               if (remaining == 0) remaining = SD_SECTOR_SIZE; // Full sector needed.\r
-               result = doWriteSector(remaining);\r
+               uint16_t r2 = sdDoCommand(SD_SEND_STATUS, 0, 0, 1);\r
+               if (unlikely(r2))\r
+               {\r
+                       sdClearStatus();\r
+                       scsiDev.status = CHECK_CONDITION;\r
+                       scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                       scsiDev.target->sense.asc = WRITE_ERROR_AUTO_REALLOCATION_FAILED;\r
+                       scsiDev.phase = STATUS;\r
+               }\r
        }\r
-       return result;\r
-}\r
-\r
-void sdCompleteWrite()\r
-{\r
        transfer.inProgress = 0;\r
-\r
-       uint8 r1, r2;\r
-\r
-       sdSpiByte(0xFD); // STOP TOKEN\r
-       // Wait for the card to come out of busy.\r
-       sdWaitWriteBusy();\r
-\r
-       r1 = sdCommandAndResponse(13, 0); // send status\r
-       r2 = sdSpiByte(0xFF);\r
-       if (r1 || r2)\r
-       {\r
-               sdClearStatus();\r
-               scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = WRITE_ERROR_AUTO_REALLOCATION_FAILED;\r
-               scsiDev.phase = STATUS;\r
-       }\r
 }\r
 \r
 \r
@@ -555,6 +573,8 @@ static int sendIfCond()
 \r
        do\r
        {\r
+               // 11:8 Host voltage. 1 = 2.7-3.6V\r
+               // 7:0 Echo bits. Ignore.\r
                uint8 status = sdCRCCommandAndResponse(SD_SEND_IF_COND, 0x000001AA);\r
 \r
                if (status == SD_R1_IDLE)\r
@@ -585,49 +605,80 @@ static int sendIfCond()
 \r
 static int sdOpCond()\r
 {\r
-       int retries = 50;\r
+       uint32_t start = getTime_ms();\r
 \r
        uint8 status;\r
        do\r
        {\r
-               CyDelay(33); // Spec says to retry for 1 second.\r
-\r
                sdCRCCommandAndResponse(SD_APP_CMD, 0);\r
                // Host Capacity Support = 1 (SDHC/SDXC supported)\r
                status = sdCRCCommandAndResponse(SD_APP_SEND_OP_COND, 0x40000000);\r
 \r
                sdClearStatus();\r
-       } while ((status != 0) && (--retries > 0));\r
 \r
-       return retries > 0;\r
+       // Spec says to poll for 1 second.\r
+       } while ((status != 0) && (elapsedTime_ms(start) < 1000));\r
+\r
+       return status == 0;\r
 }\r
 \r
 static int sdReadOCR()\r
 {\r
-       uint8 buf[4];\r
-       int i;\r
-       \r
-       uint8 status = sdCRCCommandAndResponse(SD_READ_OCR, 0);\r
-       if(status){goto bad;}\r
+       uint32_t start = getTime_ms();\r
+       int complete;\r
+       uint8 status;\r
 \r
-       for (i = 0; i < 4; ++i)\r
+       do\r
        {\r
-               buf[i] = sdSpiByte(0xFF);\r
-       }\r
+               uint8 buf[4];\r
+               int i;\r
 \r
-       sdDev.ccs = (buf[0] & 0x40) ? 1 : 0;\r
+               status = sdCRCCommandAndResponse(SD_READ_OCR, 0);\r
+               if(status) { break; }\r
 \r
-       return 1;\r
-bad:\r
-       return 0;\r
+               for (i = 0; i < 4; ++i)\r
+               {\r
+                       buf[i] = sdSpiByte(0xFF);\r
+               }\r
+\r
+               sdDev.ccs = (buf[0] & 0x40) ? 1 : 0;\r
+               complete = (buf[0] & 0x80);\r
+\r
+       } while (!status &&\r
+               !complete &&\r
+               (elapsedTime_ms(start) < 1000));\r
+\r
+       return (status == 0) && complete;\r
+}\r
+\r
+static void sdReadCID()\r
+{\r
+       uint8 startToken;\r
+       int maxWait, i;\r
+\r
+       uint8 status = sdCRCCommandAndResponse(SD_SEND_CID, 0);\r
+       if(status){return;}\r
+\r
+       maxWait = 1023;\r
+       do\r
+       {\r
+               startToken = sdSpiByte(0xFF);\r
+       } while(maxWait-- && (startToken != 0xFE));\r
+       if (startToken != 0xFE) { return; }\r
+\r
+       for (i = 0; i < 16; ++i)\r
+       {\r
+               sdDev.cid[i] = sdSpiByte(0xFF);\r
+       }\r
+       sdSpiByte(0xFF); // CRC\r
+       sdSpiByte(0xFF); // CRC\r
 }\r
 \r
 static int sdReadCSD()\r
 {\r
        uint8 startToken;\r
        int maxWait, i;\r
-       uint8 buf[16];\r
-       \r
+\r
        uint8 status = sdCRCCommandAndResponse(SD_SEND_CSD, 0);\r
        if(status){goto bad;}\r
 \r
@@ -640,29 +691,29 @@ static int sdReadCSD()
 \r
        for (i = 0; i < 16; ++i)\r
        {\r
-               buf[i] = sdSpiByte(0xFF);\r
+               sdDev.csd[i] = sdSpiByte(0xFF);\r
        }\r
        sdSpiByte(0xFF); // CRC\r
        sdSpiByte(0xFF); // CRC\r
 \r
-       if ((buf[0] >> 6) == 0x00)\r
+       if ((sdDev.csd[0] >> 6) == 0x00)\r
        {\r
                // CSD version 1\r
                // C_SIZE in bits [73:62]\r
-               uint32 c_size = (((((uint32)buf[6]) & 0x3) << 16) | (((uint32)buf[7]) << 8) | buf[8]) >> 6;\r
-               uint32 c_mult = (((((uint32)buf[9]) & 0x3) << 8) | ((uint32)buf[0xa])) >> 7;\r
-               uint32 sectorSize = buf[5] & 0x0F;\r
+               uint32 c_size = (((((uint32)sdDev.csd[6]) & 0x3) << 16) | (((uint32)sdDev.csd[7]) << 8) | sdDev.csd[8]) >> 6;\r
+               uint32 c_mult = (((((uint32)sdDev.csd[9]) & 0x3) << 8) | ((uint32)sdDev.csd[0xa])) >> 7;\r
+               uint32 sectorSize = sdDev.csd[5] & 0x0F;\r
                sdDev.capacity = ((c_size+1) * ((uint64)1 << (c_mult+2)) * ((uint64)1 << sectorSize)) / SD_SECTOR_SIZE;\r
        }\r
-       else if ((buf[0] >> 6) == 0x01)\r
+       else if ((sdDev.csd[0] >> 6) == 0x01)\r
        {\r
                // CSD version 2\r
                // C_SIZE in bits [69:48]\r
 \r
                uint32 c_size =\r
-                       ((((uint32)buf[7]) & 0x3F) << 16) |\r
-                       (((uint32)buf[8]) << 8) |\r
-                       ((uint32)buf[7]);\r
+                       ((((uint32)sdDev.csd[7]) & 0x3F) << 16) |\r
+                       (((uint32)sdDev.csd[8]) << 8) |\r
+                       ((uint32)sdDev.csd[7]);\r
                sdDev.capacity = (c_size + 1) * 1024;\r
        }\r
        else\r
@@ -675,19 +726,60 @@ bad:
        return 0;\r
 }\r
 \r
+static void sdInitDMA()\r
+{\r
+       // One-time init only.\r
+       if (sdDMATxChan == CY_DMA_INVALID_CHANNEL)\r
+       {\r
+               sdDMATxChan =\r
+                       SD_TX_DMA_DmaInitialize(\r
+                               1, // Bytes per burst\r
+                               1, // request per burst\r
+                               HI16(CYDEV_SRAM_BASE),\r
+                               HI16(CYDEV_PERIPH_BASE)\r
+                               );\r
+\r
+               sdDMARxChan =\r
+                       SD_RX_DMA_DmaInitialize(\r
+                               1, // Bytes per burst\r
+                               1, // request per burst\r
+                               HI16(CYDEV_PERIPH_BASE),\r
+                               HI16(CYDEV_SRAM_BASE)\r
+                               );\r
+\r
+               CyDmaChDisable(sdDMATxChan);\r
+               CyDmaChDisable(sdDMARxChan);\r
+\r
+               SD_RX_DMA_COMPLETE_StartEx(sdRxISR);\r
+               SD_TX_DMA_COMPLETE_StartEx(sdTxISR);\r
+       }\r
+}\r
+\r
 int sdInit()\r
 {\r
        int result = 0;\r
        int i;\r
        uint8 v;\r
-       \r
+\r
        sdDev.version = 0;\r
        sdDev.ccs = 0;\r
        sdDev.capacity = 0;\r
+       memset(sdDev.csd, 0, sizeof(sdDev.csd));\r
+       memset(sdDev.cid, 0, sizeof(sdDev.cid));\r
 \r
+       sdInitDMA();\r
+\r
+       SD_CS_SetDriveMode(SD_CS_DM_STRONG);\r
        SD_CS_Write(1); // Set CS inactive (active low)\r
-       SD_Init_Clk_Start(); // Turn on the slow 400KHz clock\r
-       SD_Clk_Ctl_Write(0); // Select the 400KHz clock source.\r
+\r
+       // Set the SPI clock for 400kHz transfers\r
+       // 25MHz / 400kHz approx factor of 63.\r
+       // The register contains (divider - 1)\r
+       uint16_t clkDiv25MHz =  SD_Data_Clk_GetDividerRegister();\r
+       SD_Data_Clk_SetDivider(((clkDiv25MHz + 1) * 63) - 1);\r
+       // Wait for the clock to settle.\r
+       CyDelayUs(1);\r
+\r
        SDCard_Start(); // Enable SPI hardware\r
 \r
        // Power on sequence. 74 clock cycles of a "1" while CS unasserted.\r
@@ -699,13 +791,14 @@ int sdInit()
        SD_CS_Write(0); // Set CS active (active low)\r
        CyDelayUs(1);\r
 \r
-       v = sdCRCCommandAndResponse(SD_GO_IDLE_STATE, 0);\r
+       sdSpiByte(0xFF);\r
+       v = sdDoCommand(SD_GO_IDLE_STATE, 0, 1, 0);\r
        if(v != 1){goto bad;}\r
 \r
        ledOn();\r
-       if (!sendIfCond()) goto bad; // Sets V1 or V2 flag\r
-       if (!sdOpCond()) goto bad;\r
-       if (!sdReadOCR()) goto bad;\r
+       if (!sendIfCond()) goto bad; // Sets V1 or V2 flag  CMD8\r
+       if (!sdOpCond()) goto bad; // ACMD41. Wait for init completes.\r
+       if (!sdReadOCR()) goto bad; // CMD58. Get CCS flag. Only valid after init.\r
 \r
        // This command will be ignored if sdDev.ccs is set.\r
        // SDHC and SDXC are always 512bytes.\r
@@ -714,24 +807,16 @@ int sdInit()
        v = sdCRCCommandAndResponse(SD_CRC_ON_OFF, 0); //crc off\r
        if(v){goto bad;}\r
 \r
-       // now set the sd card up for full speed\r
+       // now set the sd card back to full speed.\r
        // The SD Card spec says we can run SPI @ 25MHz\r
-       // But the PSoC 5LP SPIM datasheet says the most we can do is 18MHz.\r
-       // I've confirmed that no data is ever put into the RX FIFO when run at\r
-       // 20MHz or 25MHz.\r
-       // ... and then we get timing analysis failures if the BUS_CLK is over 62MHz.\r
-       // So we run the MASTER_CLK and BUS_CLK at 60MHz, and run the SPI clock at 30MHz\r
-       // (15MHz SPI transfer clock).\r
        SDCard_Stop();\r
-       \r
+\r
        // We can't run at full-speed with the pullup resistors enabled.\r
        SD_MISO_SetDriveMode(SD_MISO_DM_DIG_HIZ);\r
        SD_MOSI_SetDriveMode(SD_MOSI_DM_STRONG);\r
        SD_SCK_SetDriveMode(SD_SCK_DM_STRONG);\r
-       \r
-       SD_Data_Clk_Start(); // Turn on the fast clock\r
-       SD_Clk_Ctl_Write(1); // Select the fast clock source.\r
-       SD_Init_Clk_Stop(); // Stop the slow clock.\r
+\r
+       SD_Data_Clk_SetDivider(clkDiv25MHz);\r
        CyDelayUs(1);\r
        SDCard_Start();\r
 \r
@@ -742,11 +827,13 @@ int sdInit()
        SDCard_ClearFIFO();\r
 \r
        if (!sdReadCSD()) goto bad;\r
+       sdReadCID();\r
 \r
        result = 1;\r
        goto out;\r
 \r
 bad:\r
+       SD_Data_Clk_SetDivider(clkDiv25MHz); // Restore the clock for our next retry\r
        sdDev.capacity = 0;\r
 \r
 out:\r
@@ -756,33 +843,39 @@ out:
 \r
 }\r
 \r
-void sdPrepareWrite()\r
+void sdWriteMultiSectorPrep()\r
 {\r
        uint8 v;\r
-       \r
+\r
        // Set the number of blocks to pre-erase by the multiple block write command\r
        // We don't care about the response - if the command is not accepted, writes\r
        // will just be a bit slower.\r
        // Max 22bit parameter.\r
-       uint32_t sdBlocks = transfer.blocks * SDSectorsPerSCSISector();\r
+       uint32_t sdBlocks =\r
+               transfer.blocks *\r
+                       SDSectorsPerSCSISector(scsiDev.target->liveCfg.bytesPerSector);\r
        uint32 blocks = sdBlocks > 0x7FFFFF ? 0x7FFFFF : sdBlocks;\r
        sdCommandAndResponse(SD_APP_CMD, 0);\r
        sdCommandAndResponse(SD_APP_SET_WR_BLK_ERASE_COUNT, blocks);\r
 \r
        uint32 scsiLBA = (transfer.lba + transfer.currentBlock);\r
-       uint32 sdLBA = SCSISector2SD(scsiLBA);\r
+       uint32 sdLBA =\r
+               SCSISector2SD(\r
+                       scsiDev.target->cfg->sdSectorStart,\r
+                       scsiDev.target->liveCfg.bytesPerSector,\r
+                       scsiLBA);\r
        if (!sdDev.ccs)\r
        {\r
                sdLBA = sdLBA * SD_SECTOR_SIZE;\r
        }\r
-       v = sdCommandAndResponse(25, sdLBA);\r
-       if (v)\r
+       v = sdCommandAndResponse(SD_WRITE_MULTIPLE_BLOCK, sdLBA);\r
+       if (unlikely(v))\r
        {\r
                scsiDiskReset();\r
                sdClearStatus();\r
                scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
                scsiDev.phase = STATUS;\r
        }\r
        else\r
@@ -791,3 +884,58 @@ void sdPrepareWrite()
        }\r
 }\r
 \r
+void sdPoll()\r
+{\r
+       // Check if there's an SD card present.\r
+       if ((scsiDev.phase == BUS_FREE) &&\r
+               (sdIOState == SD_IDLE))\r
+       {\r
+               // The CS line is pulled high by the SD card.\r
+               // De-assert the line, and check if it's high.\r
+               // This isn't foolproof as it'll be left floating without\r
+               // an SD card. We can't use the built-in pull-down resistor as it will\r
+               // overpower the SD pullup resistor.\r
+               SD_CS_Write(0);\r
+               SD_CS_SetDriveMode(SD_CS_DM_DIG_HIZ);\r
+\r
+               CyDelayCycles(64);\r
+               uint8_t cs = SD_CS_Read();\r
+               SD_CS_SetDriveMode(SD_CS_DM_STRONG)     ;\r
+\r
+               if (cs && !(blockDev.state & DISK_PRESENT))\r
+               {\r
+                       static int firstInit = 1;\r
+\r
+                       // Debounce\r
+                       CyDelay(250);\r
+\r
+                       if (sdInit())\r
+                       {\r
+                               blockDev.state |= DISK_PRESENT | DISK_INITIALISED;\r
+\r
+                               if (!firstInit)\r
+                               {\r
+                                       int i;\r
+                                       for (i = 0; i < MAX_SCSI_TARGETS; ++i)\r
+                                       {\r
+                                               scsiDev.targets[i].unitAttention = PARAMETERS_CHANGED;\r
+                                       }\r
+                               }\r
+                               firstInit = 0;\r
+                       }\r
+               }\r
+               else if (!cs && (blockDev.state & DISK_PRESENT))\r
+               {\r
+                       sdDev.capacity = 0;\r
+                       blockDev.state &= ~DISK_PRESENT;\r
+                       blockDev.state &= ~DISK_INITIALISED;\r
+                       int i;\r
+                       for (i = 0; i < MAX_SCSI_TARGETS; ++i)\r
+                       {\r
+                               scsiDev.targets[i].unitAttention = PARAMETERS_CHANGED;\r
+                       }\r
+               }\r
+       }\r
+}\r
+\r
+#pragma GCC pop_options\r