SWV tracing for diagnosing hangs.
[SCSI2SD.git] / software / SCSI2SD / src / sd.c
index 19e02fd..fdc2b7d 100755 (executable)
@@ -14,6 +14,8 @@
 //\r
 //     You should have received a copy of the GNU General Public License\r
 //     along with SCSI2SD.  If not, see <http://www.gnu.org/licenses/>.\r
+#pragma GCC push_options\r
+#pragma GCC optimize("-flto")\r
 \r
 #include "device.h"\r
 #include "scsi.h"\r
@@ -21,6 +23,8 @@
 #include "disk.h"\r
 #include "sd.h"\r
 #include "led.h"\r
+#include "time.h"\r
+#include "trace.h"\r
 \r
 #include "scsiPhy.h"\r
 \r
 // Global\r
 SdDevice sdDev;\r
 \r
+enum SD_IO_STATE { SD_DMA, SD_ACCEPTED, SD_BUSY, SD_IDLE };\r
+static int sdIOState = SD_IDLE;\r
+\r
 // Private DMA variables.\r
-static int dmaInProgress = 0;\r
 static uint8 sdDMARxChan = CY_DMA_INVALID_CHANNEL;\r
 static uint8 sdDMATxChan = CY_DMA_INVALID_CHANNEL;\r
 \r
-// DMA descriptors\r
-static uint8 sdDMARxTd[1] = { CY_DMA_INVALID_TD };\r
-static uint8 sdDMATxTd[1] = { CY_DMA_INVALID_TD };\r
-\r
 // Dummy location for DMA to send unchecked CRC bytes to\r
 static uint8 discardBuffer;\r
 \r
+// 2 bytes CRC, response, 8bits to close the clock..\r
+// "NCR" time is up to 8 bytes.\r
+static uint8_t writeResponseBuffer[8];\r
+\r
+static uint8_t writeStartToken = 0xFC;\r
+\r
 // Source of dummy SPI bytes for DMA\r
 static uint8 dummyBuffer = 0xFF;\r
 \r
-volatile static uint8 rxDMAComplete;\r
-volatile static uint8 txDMAComplete;\r
+volatile uint8_t sdRxDMAComplete;\r
+volatile uint8_t sdTxDMAComplete;\r
 \r
 CY_ISR_PROTO(sdRxISR);\r
 CY_ISR(sdRxISR)\r
 {\r
-       rxDMAComplete = 1;\r
+       sdRxDMAComplete = 1;\r
 }\r
 CY_ISR_PROTO(sdTxISR);\r
 CY_ISR(sdTxISR)\r
 {\r
-       txDMAComplete = 1;\r
+       sdTxDMAComplete = 1;\r
 }\r
 \r
 static uint8 sdCrc7(uint8* chr, uint8 cnt, uint8 crc)\r
@@ -76,105 +84,161 @@ static uint8 sdCrc7(uint8* chr, uint8 cnt, uint8 crc)
 }\r
 \r
 // Read and write 1 byte.\r
-static uint8 sdSpiByte(uint8 value)\r
+static uint8_t sdSpiByte(uint8_t value)\r
 {\r
        SDCard_WriteTxData(value);\r
+       trace(trace_spinSpiByte);\r
        while (!(SDCard_ReadRxStatus() & SDCard_STS_RX_FIFO_NOT_EMPTY)) {}\r
+       trace(trace_sdSpiByte);\r
        return SDCard_ReadRxData();\r
 }\r
 \r
-static void sdSendCRCCommand(uint8 cmd, uint32 param)\r
+static uint16_t sdDoCommand(\r
+       uint8_t cmd,\r
+       uint32_t param,\r
+       int useCRC,\r
+       int use2byteResponse)\r
 {\r
-       uint8 send[6];\r
+       int waitWhileBusy = (cmd != SD_GO_IDLE_STATE) && (cmd != SD_STOP_TRANSMISSION);\r
+\r
+       // "busy" probe. We'll examine the results later.\r
+       if (waitWhileBusy)\r
+       {\r
+               SDCard_WriteTxData(0xFF);\r
+       }\r
 \r
+       // send is static as the address must remain consistent for the static\r
+       // DMA descriptors to work.\r
+       static uint8_t send[7];\r
        send[0] = cmd | 0x40;\r
        send[1] = param >> 24;\r
        send[2] = param >> 16;\r
        send[3] = param >> 8;\r
        send[4] = param;\r
-       send[5] = (sdCrc7(send, 5, 0) << 1) | 1;\r
+       if (unlikely(useCRC))\r
+       {\r
+               send[5] = (sdCrc7(send, 5, 0) << 1) | 1;\r
+       }\r
+       else\r
+       {\r
+               send[5] = 1; // stop bit\r
+       }\r
+       send[6] = 0xFF; // Result code or stuff byte.\r
+\r
+       static uint8_t dmaRxTd = CY_DMA_INVALID_TD;\r
+       static uint8_t dmaTxTd = CY_DMA_INVALID_TD;\r
+       if (unlikely(dmaRxTd == CY_DMA_INVALID_TD))\r
+       {\r
+               dmaRxTd = CyDmaTdAllocate();\r
+               dmaTxTd = CyDmaTdAllocate();\r
+               CyDmaTdSetConfiguration(dmaTxTd, sizeof(send), CY_DMA_DISABLE_TD, TD_INC_SRC_ADR|SD_TX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaTxTd, LO16((uint32)&send), LO16((uint32)SDCard_TXDATA_PTR));\r
+               CyDmaTdSetConfiguration(dmaRxTd, sizeof(send), CY_DMA_DISABLE_TD, SD_RX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaRxTd, LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&discardBuffer));\r
+       }\r
+\r
+       sdTxDMAComplete = 0;\r
+       sdRxDMAComplete = 0;\r
+\r
+       CyDmaChSetInitialTd(sdDMARxChan, dmaRxTd);\r
+       CyDmaChSetInitialTd(sdDMATxChan, dmaTxTd);\r
 \r
-       for(cmd = 0; cmd < sizeof(send); cmd++)\r
+       // Some Samsung cards enter a busy-state after single-sector reads.\r
+       // But we also need to wait for R1B to complete from the multi-sector\r
+       // reads.\r
+       if (waitWhileBusy)\r
        {\r
-               sdSpiByte(send[cmd]);\r
+               trace(trace_spinSDRxFIFO);\r
+               while (!(SDCard_ReadRxStatus() & SDCard_STS_RX_FIFO_NOT_EMPTY)) {}\r
+               int busy = SDCard_ReadRxData() != 0xFF;\r
+               if (unlikely(busy))\r
+               {\r
+                       trace(trace_spinSDBusy);\r
+                       while (sdSpiByte(0xFF) != 0xFF) {}\r
+               }\r
        }\r
-       // Allow command to process before reading result code.\r
-       sdSpiByte(0xFF);\r
-}\r
 \r
-static void sdSendCommand(uint8 cmd, uint32 param)\r
-{\r
-       uint8 send[6];\r
+       // The DMA controller is a bit trigger-happy. It will retain\r
+       // a drq request that was triggered while the channel was\r
+       // disabled.\r
+       CyDmaClearPendingDrq(sdDMATxChan);\r
+       CyDmaClearPendingDrq(sdDMARxChan);\r
 \r
-       send[0] = cmd | 0x40;\r
-       send[1] = param >> 24;\r
-       send[2] = param >> 16;\r
-       send[3] = param >> 8;\r
-       send[4] = param;\r
-       send[5] = 0;\r
+       // There is no flow control, so we must ensure we can read the bytes\r
+       // before we start transmitting\r
+       CyDmaChEnable(sdDMARxChan, 1);\r
+       CyDmaChEnable(sdDMATxChan, 1);\r
 \r
-       for(cmd = 0; cmd < sizeof(send); cmd++)\r
+       trace(trace_spinSDDMA);\r
+       while (!(sdTxDMAComplete && sdRxDMAComplete)) { __WFI(); }\r
+\r
+       uint16_t response = discardBuffer;\r
+       if (unlikely(cmd == SD_STOP_TRANSMISSION))\r
        {\r
-               sdSpiByte(send[cmd]);\r
+               // Stuff byte is required for this command only.\r
+               // Part 1 Simplified standard 3.01\r
+               // "The stop command has an execution delay due to the serial command\r
+               // transmission."\r
+               response = sdSpiByte(0xFF);\r
        }\r
-       // Allow command to process before reading result code.\r
-       sdSpiByte(0xFF);\r
-}\r
 \r
-static uint8 sdReadResp()\r
-{\r
-       uint8 v;\r
-       uint8 i = 128;\r
-       do\r
+       uint32_t start = getTime_ms();\r
+\r
+       trace(trace_spinSDBusy);\r
+       while ((response & 0x80) && likely(elapsedTime_ms(start) <= 200))\r
+       {\r
+               response = sdSpiByte(0xFF);\r
+       }\r
+       if (unlikely(use2byteResponse))\r
        {\r
-               v = sdSpiByte(0xFF);\r
-       } while(i-- && (v & 0x80));\r
-       return v;\r
+               response = (response << 8) | sdSpiByte(0xFF);\r
+       }\r
+       return response;\r
 }\r
 \r
-static uint8 sdCommandAndResponse(uint8 cmd, uint32 param)\r
+\r
+static inline uint16_t sdCommandAndResponse(uint8_t cmd, uint32_t param)\r
 {\r
-       sdSpiByte(0xFF);\r
-       sdSendCommand(cmd, param);\r
-       return sdReadResp();\r
+       return sdDoCommand(cmd, param, 0, 0);\r
 }\r
 \r
-static uint8 sdCRCCommandAndResponse(uint8 cmd, uint32 param)\r
+static inline uint16_t sdCRCCommandAndResponse(uint8_t cmd, uint32_t param)\r
 {\r
-       sdSpiByte(0xFF);\r
-       sdSendCRCCommand(cmd, param);\r
-       return sdReadResp();\r
+       return sdDoCommand(cmd, param, 1, 0);\r
 }\r
 \r
 // Clear the sticky status bits on error.\r
 static void sdClearStatus()\r
 {\r
-       uint8 r2hi = sdCRCCommandAndResponse(SD_SEND_STATUS, 0);\r
-       uint8 r2lo = sdSpiByte(0xFF);\r
-       (void) r2hi; (void) r2lo;\r
+       sdSpiByte(0xFF);\r
+       uint16_t r2 = sdDoCommand(SD_SEND_STATUS, 0, 1, 1);\r
+       (void) r2;\r
 }\r
 \r
-\r
 void\r
 sdReadMultiSectorPrep()\r
 {\r
        uint8 v;\r
        uint32 scsiLBA = (transfer.lba + transfer.currentBlock);\r
-       uint32 sdLBA = SCSISector2SD(scsiLBA);\r
+       uint32 sdLBA =\r
+               SCSISector2SD(\r
+                       scsiDev.target->cfg->sdSectorStart,\r
+                       scsiDev.target->liveCfg.bytesPerSector,\r
+                       scsiLBA);\r
 \r
        if (!sdDev.ccs)\r
        {\r
                sdLBA = sdLBA * SD_SECTOR_SIZE;\r
        }\r
        v = sdCommandAndResponse(SD_READ_MULTIPLE_BLOCK, sdLBA);\r
-       if (v)\r
+       if (unlikely(v))\r
        {\r
                scsiDiskReset();\r
                sdClearStatus();\r
 \r
                scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
                scsiDev.phase = STATUS;\r
        }\r
        else\r
@@ -187,16 +251,20 @@ static void
 dmaReadSector(uint8_t* outputBuffer)\r
 {\r
        // Wait for a start-block token.\r
-       // Don't wait more than 100ms, which is the timeout recommended\r
-       // in the standard.\r
-       //100ms @ 64Hz = 6400000\r
-       int maxWait = 6400000;\r
-       uint8 token = sdSpiByte(0xFF);\r
-       while (token != 0xFE && (maxWait-- > 0))\r
+       // Don't wait more than 200ms.  The standard recommends 100ms.\r
+       uint32_t start = getTime_ms();\r
+       uint8_t token = sdSpiByte(0xFF);\r
+       trace(trace_spinSDBusy);\r
+       while (token != 0xFE && likely(elapsedTime_ms(start) <= 200))\r
        {\r
+               if (unlikely(token && ((token & 0xE0) == 0)))\r
+               {\r
+                       // Error token!\r
+                       break;\r
+               }\r
                token = sdSpiByte(0xFF);\r
        }\r
-       if (token != 0xFE)\r
+       if (unlikely(token != 0xFE))\r
        {\r
                if (transfer.multiBlock)\r
                {\r
@@ -205,33 +273,48 @@ dmaReadSector(uint8_t* outputBuffer)
                if (scsiDev.status != CHECK_CONDITION)\r
                {\r
                        scsiDev.status = CHECK_CONDITION;\r
-                       scsiDev.sense.code = HARDWARE_ERROR;\r
-                       scsiDev.sense.asc = UNRECOVERED_READ_ERROR;\r
+                       scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                       scsiDev.target->sense.asc = UNRECOVERED_READ_ERROR;\r
                        scsiDev.phase = STATUS;\r
                }\r
+               sdClearStatus();\r
                return;\r
        }\r
 \r
-       CyDmaTdSetConfiguration(sdDMARxTd[0], SD_SECTOR_SIZE, CY_DMA_DISABLE_TD, TD_INC_DST_ADR | SD_RX_DMA__TD_TERMOUT_EN);\r
-       CyDmaTdSetAddress(sdDMARxTd[0], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)outputBuffer));\r
-       CyDmaTdSetConfiguration(sdDMATxTd[0], SD_SECTOR_SIZE, CY_DMA_DISABLE_TD, SD_TX_DMA__TD_TERMOUT_EN);\r
-       CyDmaTdSetAddress(sdDMATxTd[0], LO16((uint32)&dummyBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
+       static uint8_t dmaRxTd[2] = { CY_DMA_INVALID_TD, CY_DMA_INVALID_TD};\r
+       static uint8_t dmaTxTd = CY_DMA_INVALID_TD;\r
+       if (unlikely(dmaRxTd[0] == CY_DMA_INVALID_TD))\r
+       {\r
+               dmaRxTd[0] = CyDmaTdAllocate();\r
+               dmaRxTd[1] = CyDmaTdAllocate();\r
+               dmaTxTd = CyDmaTdAllocate();\r
+               \r
+               // Receive 512 bytes of data and then 2 bytes CRC.\r
+               CyDmaTdSetConfiguration(dmaRxTd[0], SD_SECTOR_SIZE, dmaRxTd[1], TD_INC_DST_ADR);\r
+               CyDmaTdSetConfiguration(dmaRxTd[1], 2, CY_DMA_DISABLE_TD, SD_RX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaRxTd[1], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&discardBuffer));\r
+       \r
+               CyDmaTdSetConfiguration(dmaTxTd, SD_SECTOR_SIZE + 2, CY_DMA_DISABLE_TD, SD_TX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaTxTd, LO16((uint32)&dummyBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
 \r
-       dmaInProgress = 1;\r
-       // The DMA controller is a bit trigger-happy. It will retain\r
-       // a drq request that was triggered while the channel was\r
-       // disabled.\r
-       CyDmaClearPendingDrq(sdDMATxChan);\r
-       CyDmaClearPendingDrq(sdDMARxChan);\r
+       }\r
+       CyDmaTdSetAddress(dmaRxTd[0], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)outputBuffer));\r
 \r
-       txDMAComplete = 0;\r
-       rxDMAComplete = 0;\r
+       sdIOState = SD_DMA;\r
+       sdTxDMAComplete = 0;\r
+       sdRxDMAComplete = 0;\r
 \r
        // Re-loading the initial TD's here is very important, or else\r
        // we'll be re-using the last-used TD, which would be the last\r
        // in the chain (ie. CRC TD)\r
-       CyDmaChSetInitialTd(sdDMARxChan, sdDMARxTd[0]);\r
-       CyDmaChSetInitialTd(sdDMATxChan, sdDMATxTd[0]);\r
+       CyDmaChSetInitialTd(sdDMARxChan, dmaRxTd[0]);\r
+       CyDmaChSetInitialTd(sdDMATxChan, dmaTxTd);\r
+\r
+       // The DMA controller is a bit trigger-happy. It will retain\r
+       // a drq request that was triggered while the channel was\r
+       // disabled.\r
+       CyDmaClearPendingDrq(sdDMATxChan);\r
+       CyDmaClearPendingDrq(sdDMARxChan);\r
 \r
        // There is no flow control, so we must ensure we can read the bytes\r
        // before we start transmitting\r
@@ -242,14 +325,10 @@ dmaReadSector(uint8_t* outputBuffer)
 int\r
 sdReadSectorDMAPoll()\r
 {\r
-       if (rxDMAComplete && txDMAComplete)\r
+       if (sdRxDMAComplete && sdTxDMAComplete)\r
        {\r
                // DMA transfer is complete\r
-               dmaInProgress = 0;\r
-\r
-               sdSpiByte(0xFF); // CRC\r
-               sdSpiByte(0xFF); // CRC\r
-\r
+               sdIOState = SD_IDLE;\r
                return 1;\r
        }\r
        else\r
@@ -266,14 +345,14 @@ void sdReadSingleSectorDMA(uint32_t lba, uint8_t* outputBuffer)
                lba = lba * SD_SECTOR_SIZE;\r
        }\r
        v = sdCommandAndResponse(SD_READ_SINGLE_BLOCK, lba);\r
-       if (v)\r
+       if (unlikely(v))\r
        {\r
                scsiDiskReset();\r
                sdClearStatus();\r
 \r
                scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
                scsiDev.phase = STATUS;\r
        }\r
        else\r
@@ -292,50 +371,32 @@ sdReadMultiSectorDMA(uint8_t* outputBuffer)
 \r
 void sdCompleteRead()\r
 {\r
-       if (dmaInProgress)\r
+       if (unlikely(sdIOState != SD_IDLE))\r
        {\r
                // Not much choice but to wait until we've completed the transfer.\r
                // Cancelling the transfer can't be done as we have no way to reset\r
                // the SD card.\r
+               trace(trace_spinSDCompleteRead);\r
                while (!sdReadSectorDMAPoll()) { /* spin */ }\r
        }\r
+       \r
+       if (transfer.inProgress)\r
+       {\r
+               transfer.inProgress = 0;\r
+               uint8 r1b = sdCommandAndResponse(SD_STOP_TRANSMISSION, 0);\r
 \r
-       transfer.inProgress = 0;\r
-\r
-       // We cannot send even a single "padding" byte, as we normally would when\r
-       // sending a command.  If we've just finished reading the very last block\r
-       // on the card, then reading an additional dummy byte will just trigger\r
-       // an error condition as we're trying to read past-the-end of the storage\r
-       // device.\r
-       // ie. do not use sdCommandAndResponse here.\r
-       uint8 r1b;\r
-       sdSendCommand(SD_STOP_TRANSMISSION, 0);\r
-       r1b = sdReadResp();\r
-\r
-       if (r1b)\r
-       {\r
-               // Try very hard to make sure the transmission stops\r
-               int retries = 255;\r
-               while (r1b && retries)\r
+               if (unlikely(r1b))\r
                {\r
-                       r1b = sdCommandAndResponse(SD_STOP_TRANSMISSION, 0);\r
-                       retries--;\r
+                       scsiDev.status = CHECK_CONDITION;\r
+                       scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                       scsiDev.target->sense.asc = UNRECOVERED_READ_ERROR;\r
+                       scsiDev.phase = STATUS;\r
                }\r
-\r
-               scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = UNRECOVERED_READ_ERROR;\r
-               scsiDev.phase = STATUS;\r
        }\r
 \r
-       // R1b has an optional trailing "busy" signal.\r
-       {\r
-               uint8 busy;\r
-               do\r
-               {\r
-                       busy = sdSpiByte(0xFF);\r
-               } while (busy == 0);\r
-       }\r
+       // R1b has an optional trailing "busy" signal, but we defer waiting on this.\r
+       // The next call so sdCommandAndResponse will wait for the busy state to\r
+       // clear.\r
 }\r
 \r
 static void sdWaitWriteBusy()\r
@@ -350,28 +411,49 @@ static void sdWaitWriteBusy()
 void\r
 sdWriteMultiSectorDMA(uint8_t* outputBuffer)\r
 {\r
-       sdSpiByte(0xFC); // MULTIPLE byte start token\r
+       static uint8_t dmaRxTd[2] = { CY_DMA_INVALID_TD, CY_DMA_INVALID_TD};\r
+       static uint8_t dmaTxTd[3] = { CY_DMA_INVALID_TD, CY_DMA_INVALID_TD, CY_DMA_INVALID_TD};\r
+       if (unlikely(dmaRxTd[0] == CY_DMA_INVALID_TD))\r
+       {\r
+               dmaRxTd[0] = CyDmaTdAllocate();\r
+               dmaRxTd[1] = CyDmaTdAllocate();\r
+               dmaTxTd[0] = CyDmaTdAllocate();\r
+               dmaTxTd[1] = CyDmaTdAllocate();\r
+               dmaTxTd[2] = CyDmaTdAllocate();\r
+               \r
+               // Transmit 512 bytes of data and then 2 bytes CRC, and then get the response byte\r
+               // We need to do this without stopping the clock\r
+               CyDmaTdSetConfiguration(dmaTxTd[0], 1, dmaTxTd[1], TD_INC_SRC_ADR);\r
+               CyDmaTdSetAddress(dmaTxTd[0], LO16((uint32)&writeStartToken), LO16((uint32)SDCard_TXDATA_PTR));\r
 \r
-       CyDmaTdSetConfiguration(sdDMATxTd[0], SD_SECTOR_SIZE, CY_DMA_DISABLE_TD, TD_INC_SRC_ADR | SD_TX_DMA__TD_TERMOUT_EN);\r
-       CyDmaTdSetAddress(sdDMATxTd[0], LO16((uint32)outputBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
-       CyDmaTdSetConfiguration(sdDMARxTd[0], SD_SECTOR_SIZE, CY_DMA_DISABLE_TD, SD_RX_DMA__TD_TERMOUT_EN);\r
-       CyDmaTdSetAddress(sdDMARxTd[0], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&discardBuffer));\r
-       \r
-       dmaInProgress = 1;\r
+               CyDmaTdSetConfiguration(dmaTxTd[1], SD_SECTOR_SIZE, dmaTxTd[2], TD_INC_SRC_ADR);\r
+\r
+               CyDmaTdSetConfiguration(dmaTxTd[2], 2 + sizeof(writeResponseBuffer), CY_DMA_DISABLE_TD, SD_TX_DMA__TD_TERMOUT_EN);\r
+               CyDmaTdSetAddress(dmaTxTd[2], LO16((uint32)&dummyBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
+\r
+               CyDmaTdSetConfiguration(dmaRxTd[0], SD_SECTOR_SIZE + 3, dmaRxTd[1], 0);\r
+               CyDmaTdSetAddress(dmaRxTd[0], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&discardBuffer));\r
+               CyDmaTdSetConfiguration(dmaRxTd[1], sizeof(writeResponseBuffer), CY_DMA_DISABLE_TD, SD_RX_DMA__TD_TERMOUT_EN|TD_INC_DST_ADR);\r
+               CyDmaTdSetAddress(dmaRxTd[1], LO16((uint32)SDCard_RXDATA_PTR), LO16((uint32)&writeResponseBuffer));\r
+       }\r
+       CyDmaTdSetAddress(dmaTxTd[1], LO16((uint32)outputBuffer), LO16((uint32)SDCard_TXDATA_PTR));\r
+\r
+\r
+       sdIOState = SD_DMA;\r
        // The DMA controller is a bit trigger-happy. It will retain\r
        // a drq request that was triggered while the channel was\r
        // disabled.\r
        CyDmaClearPendingDrq(sdDMATxChan);\r
        CyDmaClearPendingDrq(sdDMARxChan);\r
 \r
-       txDMAComplete = 0;\r
-       rxDMAComplete = 0;\r
+       sdTxDMAComplete = 0;\r
+       sdRxDMAComplete = 0;\r
 \r
        // Re-loading the initial TD's here is very important, or else\r
        // we'll be re-using the last-used TD, which would be the last\r
        // in the chain (ie. CRC TD)\r
-       CyDmaChSetInitialTd(sdDMARxChan, sdDMARxTd[0]);\r
-       CyDmaChSetInitialTd(sdDMATxChan, sdDMATxTd[0]);\r
+       CyDmaChSetInitialTd(sdDMARxChan, dmaRxTd[0]);\r
+       CyDmaChSetInitialTd(sdDMATxChan, dmaTxTd[0]);\r
 \r
        // There is no flow control, so we must ensure we can read the bytes\r
        // before we start transmitting\r
@@ -380,60 +462,76 @@ sdWriteMultiSectorDMA(uint8_t* outputBuffer)
 }\r
 \r
 int\r
-sdWriteSectorDMAPoll()\r
+sdWriteSectorDMAPoll(int sendStopToken)\r
 {\r
-       if (rxDMAComplete && txDMAComplete)\r
+       if (sdRxDMAComplete && sdTxDMAComplete)\r
        {\r
-               // DMA transfer is complete\r
-               dmaInProgress = 0;\r
-\r
-               sdSpiByte(0x00); // CRC\r
-               sdSpiByte(0x00); // CRC\r
-\r
-               // Don't wait more than 1s.\r
-               // My 2g Kingston micro-sd card doesn't respond immediately.\r
-               // My 16Gb card does.\r
-               int maxWait = 1000000;\r
-               uint8_t dataToken = sdSpiByte(0xFF); // Response\r
-               while (dataToken == 0xFF && maxWait-- > 0)\r
-               {\r
-                       CyDelayUs(1);\r
-                       dataToken = sdSpiByte(0xFF);\r
-               }\r
-               if (((dataToken & 0x1F) >> 1) != 0x2) // Accepted.\r
+               if (sdIOState == SD_DMA)\r
                {\r
-                       uint8 r1b, busy;\r
-               \r
-                       sdWaitWriteBusy();\r
-\r
-                       r1b = sdCommandAndResponse(SD_STOP_TRANSMISSION, 0);\r
-                       (void) r1b;\r
-                       sdSpiByte(0xFF);\r
-\r
-                       // R1b has an optional trailing "busy" signal.\r
+                       // Retry a few times. The data token format is:\r
+                       // XXX0AAA1\r
+                       int i = 0;\r
+                       uint8_t dataToken;\r
                        do\r
                        {\r
-                               busy = sdSpiByte(0xFF);\r
-                       } while (busy == 0);\r
-\r
-                       // Wait for the card to come out of busy.\r
-                       sdWaitWriteBusy();\r
+                               dataToken = writeResponseBuffer[i]; // Response\r
+                               ++i;\r
+                       } while (((dataToken & 0x0101) != 1) && (i < sizeof(writeResponseBuffer)));\r
 \r
-                       transfer.inProgress = 0;\r
-                       scsiDiskReset();\r
-                       sdClearStatus();\r
+                       // At this point we should either have an accepted token, or we'll\r
+                       // timeout and proceed into the error case below.\r
+                       if (unlikely(((dataToken & 0x1F) >> 1) != 0x2)) // Accepted.\r
+                       {\r
+                               sdIOState = SD_IDLE;\r
+\r
+                               sdWaitWriteBusy();\r
+                               sdSpiByte(0xFD); // STOP TOKEN\r
+                               sdWaitWriteBusy();\r
+\r
+                               transfer.inProgress = 0;\r
+                               scsiDiskReset();\r
+                               sdClearStatus();\r
+\r
+                               scsiDev.status = CHECK_CONDITION;\r
+                               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+                               scsiDev.phase = STATUS;\r
+                       }\r
+                       else\r
+                       {\r
+                               sdIOState = SD_ACCEPTED;\r
+                       }\r
+               }\r
 \r
-                       scsiDev.status = CHECK_CONDITION;\r
-                       scsiDev.sense.code = HARDWARE_ERROR;\r
-                       scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
-                       scsiDev.phase = STATUS;\r
+               if (sdIOState == SD_ACCEPTED)\r
+               {\r
+                       // Wait while the SD card is busy\r
+                       if (sdSpiByte(0xFF) == 0xFF)\r
+                       {\r
+                               if (sendStopToken)\r
+                               {\r
+                                       sdIOState = SD_BUSY;\r
+                                       transfer.inProgress = 0;\r
+\r
+                                       sdSpiByte(0xFD); // STOP TOKEN\r
+                               }\r
+                               else\r
+                               {\r
+                                       sdIOState = SD_IDLE;\r
+                               }\r
+                       }\r
                }\r
-               else\r
+\r
+               if (sdIOState == SD_BUSY)\r
                {\r
-                       sdWaitWriteBusy();\r
-               }               \r
+                       // Wait while the SD card is busy\r
+                       if (sdSpiByte(0xFF) == 0xFF)\r
+                       {\r
+                               sdIOState = SD_IDLE;\r
+                       }\r
+               }\r
 \r
-               return 1;\r
+               return sdIOState == SD_IDLE;\r
        }\r
        else\r
        {\r
@@ -443,32 +541,28 @@ sdWriteSectorDMAPoll()
 \r
 void sdCompleteWrite()\r
 {\r
-       if (dmaInProgress)\r
+       if (unlikely(sdIOState != SD_IDLE))\r
        {\r
                // Not much choice but to wait until we've completed the transfer.\r
                // Cancelling the transfer can't be done as we have no way to reset\r
                // the SD card.\r
-               while (!sdWriteSectorDMAPoll()) { /* spin */ }\r
+               trace(trace_spinSDCompleteWrite);\r
+               while (!sdWriteSectorDMAPoll(1)) { /* spin */ }\r
        }\r
-       \r
-       transfer.inProgress = 0;\r
 \r
-       uint8 r1, r2;\r
-\r
-       sdSpiByte(0xFD); // STOP TOKEN\r
-       // Wait for the card to come out of busy.\r
-       sdWaitWriteBusy();\r
-\r
-       r1 = sdCommandAndResponse(13, 0); // send status\r
-       r2 = sdSpiByte(0xFF);\r
-       if (r1 || r2)\r
+       if (transfer.inProgress && likely(scsiDev.phase == DATA_OUT))\r
        {\r
-               sdClearStatus();\r
-               scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = WRITE_ERROR_AUTO_REALLOCATION_FAILED;\r
-               scsiDev.phase = STATUS;\r
+               uint16_t r2 = sdDoCommand(SD_SEND_STATUS, 0, 0, 1);\r
+               if (unlikely(r2))\r
+               {\r
+                       sdClearStatus();\r
+                       scsiDev.status = CHECK_CONDITION;\r
+                       scsiDev.target->sense.code = HARDWARE_ERROR;\r
+                       scsiDev.target->sense.asc = WRITE_ERROR_AUTO_REALLOCATION_FAILED;\r
+                       scsiDev.phase = STATUS;\r
+               }\r
        }\r
+       transfer.inProgress = 0;\r
 }\r
 \r
 \r
@@ -479,6 +573,8 @@ static int sendIfCond()
 \r
        do\r
        {\r
+               // 11:8 Host voltage. 1 = 2.7-3.6V\r
+               // 7:0 Echo bits. Ignore.\r
                uint8 status = sdCRCCommandAndResponse(SD_SEND_IF_COND, 0x000001AA);\r
 \r
                if (status == SD_R1_IDLE)\r
@@ -509,49 +605,80 @@ static int sendIfCond()
 \r
 static int sdOpCond()\r
 {\r
-       int retries = 50;\r
+       uint32_t start = getTime_ms();\r
 \r
        uint8 status;\r
        do\r
        {\r
-               CyDelay(33); // Spec says to retry for 1 second.\r
-\r
                sdCRCCommandAndResponse(SD_APP_CMD, 0);\r
                // Host Capacity Support = 1 (SDHC/SDXC supported)\r
                status = sdCRCCommandAndResponse(SD_APP_SEND_OP_COND, 0x40000000);\r
 \r
                sdClearStatus();\r
-       } while ((status != 0) && (--retries > 0));\r
 \r
-       return retries > 0;\r
+       // Spec says to poll for 1 second.\r
+       } while ((status != 0) && (elapsedTime_ms(start) < 1000));\r
+\r
+       return status == 0;\r
 }\r
 \r
 static int sdReadOCR()\r
 {\r
-       uint8 buf[4];\r
-       int i;\r
-       \r
-       uint8 status = sdCRCCommandAndResponse(SD_READ_OCR, 0);\r
-       if(status){goto bad;}\r
+       uint32_t start = getTime_ms();\r
+       int complete;\r
+       uint8 status;\r
 \r
-       for (i = 0; i < 4; ++i)\r
+       do\r
        {\r
-               buf[i] = sdSpiByte(0xFF);\r
-       }\r
+               uint8 buf[4];\r
+               int i;\r
 \r
-       sdDev.ccs = (buf[0] & 0x40) ? 1 : 0;\r
+               status = sdCRCCommandAndResponse(SD_READ_OCR, 0);\r
+               if(status) { break; }\r
 \r
-       return 1;\r
-bad:\r
-       return 0;\r
+               for (i = 0; i < 4; ++i)\r
+               {\r
+                       buf[i] = sdSpiByte(0xFF);\r
+               }\r
+\r
+               sdDev.ccs = (buf[0] & 0x40) ? 1 : 0;\r
+               complete = (buf[0] & 0x80);\r
+\r
+       } while (!status &&\r
+               !complete &&\r
+               (elapsedTime_ms(start) < 1000));\r
+\r
+       return (status == 0) && complete;\r
+}\r
+\r
+static void sdReadCID()\r
+{\r
+       uint8 startToken;\r
+       int maxWait, i;\r
+\r
+       uint8 status = sdCRCCommandAndResponse(SD_SEND_CID, 0);\r
+       if(status){return;}\r
+\r
+       maxWait = 1023;\r
+       do\r
+       {\r
+               startToken = sdSpiByte(0xFF);\r
+       } while(maxWait-- && (startToken != 0xFE));\r
+       if (startToken != 0xFE) { return; }\r
+\r
+       for (i = 0; i < 16; ++i)\r
+       {\r
+               sdDev.cid[i] = sdSpiByte(0xFF);\r
+       }\r
+       sdSpiByte(0xFF); // CRC\r
+       sdSpiByte(0xFF); // CRC\r
 }\r
 \r
 static int sdReadCSD()\r
 {\r
        uint8 startToken;\r
        int maxWait, i;\r
-       uint8 buf[16];\r
-       \r
+\r
        uint8 status = sdCRCCommandAndResponse(SD_SEND_CSD, 0);\r
        if(status){goto bad;}\r
 \r
@@ -564,29 +691,29 @@ static int sdReadCSD()
 \r
        for (i = 0; i < 16; ++i)\r
        {\r
-               buf[i] = sdSpiByte(0xFF);\r
+               sdDev.csd[i] = sdSpiByte(0xFF);\r
        }\r
        sdSpiByte(0xFF); // CRC\r
        sdSpiByte(0xFF); // CRC\r
 \r
-       if ((buf[0] >> 6) == 0x00)\r
+       if ((sdDev.csd[0] >> 6) == 0x00)\r
        {\r
                // CSD version 1\r
                // C_SIZE in bits [73:62]\r
-               uint32 c_size = (((((uint32)buf[6]) & 0x3) << 16) | (((uint32)buf[7]) << 8) | buf[8]) >> 6;\r
-               uint32 c_mult = (((((uint32)buf[9]) & 0x3) << 8) | ((uint32)buf[0xa])) >> 7;\r
-               uint32 sectorSize = buf[5] & 0x0F;\r
+               uint32 c_size = (((((uint32)sdDev.csd[6]) & 0x3) << 16) | (((uint32)sdDev.csd[7]) << 8) | sdDev.csd[8]) >> 6;\r
+               uint32 c_mult = (((((uint32)sdDev.csd[9]) & 0x3) << 8) | ((uint32)sdDev.csd[0xa])) >> 7;\r
+               uint32 sectorSize = sdDev.csd[5] & 0x0F;\r
                sdDev.capacity = ((c_size+1) * ((uint64)1 << (c_mult+2)) * ((uint64)1 << sectorSize)) / SD_SECTOR_SIZE;\r
        }\r
-       else if ((buf[0] >> 6) == 0x01)\r
+       else if ((sdDev.csd[0] >> 6) == 0x01)\r
        {\r
                // CSD version 2\r
                // C_SIZE in bits [69:48]\r
 \r
                uint32 c_size =\r
-                       ((((uint32)buf[7]) & 0x3F) << 16) |\r
-                       (((uint32)buf[8]) << 8) |\r
-                       ((uint32)buf[7]);\r
+                       ((((uint32)sdDev.csd[7]) & 0x3F) << 16) |\r
+                       (((uint32)sdDev.csd[8]) << 8) |\r
+                       ((uint32)sdDev.csd[7]);\r
                sdDev.capacity = (c_size + 1) * 1024;\r
        }\r
        else\r
@@ -623,9 +750,6 @@ static void sdInitDMA()
                CyDmaChDisable(sdDMATxChan);\r
                CyDmaChDisable(sdDMARxChan);\r
 \r
-               sdDMARxTd[0] = CyDmaTdAllocate();\r
-               sdDMATxTd[0] = CyDmaTdAllocate();\r
-\r
                SD_RX_DMA_COMPLETE_StartEx(sdRxISR);\r
                SD_TX_DMA_COMPLETE_StartEx(sdTxISR);\r
        }\r
@@ -636,19 +760,23 @@ int sdInit()
        int result = 0;\r
        int i;\r
        uint8 v;\r
-       \r
+\r
        sdDev.version = 0;\r
        sdDev.ccs = 0;\r
        sdDev.capacity = 0;\r
+       memset(sdDev.csd, 0, sizeof(sdDev.csd));\r
+       memset(sdDev.cid, 0, sizeof(sdDev.cid));\r
 \r
        sdInitDMA();\r
 \r
+       SD_CS_SetDriveMode(SD_CS_DM_STRONG);\r
        SD_CS_Write(1); // Set CS inactive (active low)\r
 \r
        // Set the SPI clock for 400kHz transfers\r
        // 25MHz / 400kHz approx factor of 63.\r
+       // The register contains (divider - 1)\r
        uint16_t clkDiv25MHz =  SD_Data_Clk_GetDividerRegister();\r
-       SD_Data_Clk_SetDivider(clkDiv25MHz * 63);\r
+       SD_Data_Clk_SetDivider(((clkDiv25MHz + 1) * 63) - 1);\r
        // Wait for the clock to settle.\r
        CyDelayUs(1);\r
 \r
@@ -663,13 +791,14 @@ int sdInit()
        SD_CS_Write(0); // Set CS active (active low)\r
        CyDelayUs(1);\r
 \r
-       v = sdCRCCommandAndResponse(SD_GO_IDLE_STATE, 0);\r
+       sdSpiByte(0xFF);\r
+       v = sdDoCommand(SD_GO_IDLE_STATE, 0, 1, 0);\r
        if(v != 1){goto bad;}\r
 \r
        ledOn();\r
-       if (!sendIfCond()) goto bad; // Sets V1 or V2 flag\r
-       if (!sdOpCond()) goto bad;\r
-       if (!sdReadOCR()) goto bad;\r
+       if (!sendIfCond()) goto bad; // Sets V1 or V2 flag  CMD8\r
+       if (!sdOpCond()) goto bad; // ACMD41. Wait for init completes.\r
+       if (!sdReadOCR()) goto bad; // CMD58. Get CCS flag. Only valid after init.\r
 \r
        // This command will be ignored if sdDev.ccs is set.\r
        // SDHC and SDXC are always 512bytes.\r
@@ -698,11 +827,13 @@ int sdInit()
        SDCard_ClearFIFO();\r
 \r
        if (!sdReadCSD()) goto bad;\r
+       sdReadCID();\r
 \r
        result = 1;\r
        goto out;\r
 \r
 bad:\r
+       SD_Data_Clk_SetDivider(clkDiv25MHz); // Restore the clock for our next retry\r
        sdDev.capacity = 0;\r
 \r
 out:\r
@@ -715,30 +846,36 @@ out:
 void sdWriteMultiSectorPrep()\r
 {\r
        uint8 v;\r
-       \r
+\r
        // Set the number of blocks to pre-erase by the multiple block write command\r
        // We don't care about the response - if the command is not accepted, writes\r
        // will just be a bit slower.\r
        // Max 22bit parameter.\r
-       uint32_t sdBlocks = transfer.blocks * SDSectorsPerSCSISector();\r
+       uint32_t sdBlocks =\r
+               transfer.blocks *\r
+                       SDSectorsPerSCSISector(scsiDev.target->liveCfg.bytesPerSector);\r
        uint32 blocks = sdBlocks > 0x7FFFFF ? 0x7FFFFF : sdBlocks;\r
        sdCommandAndResponse(SD_APP_CMD, 0);\r
        sdCommandAndResponse(SD_APP_SET_WR_BLK_ERASE_COUNT, blocks);\r
 \r
        uint32 scsiLBA = (transfer.lba + transfer.currentBlock);\r
-       uint32 sdLBA = SCSISector2SD(scsiLBA);\r
+       uint32 sdLBA =\r
+               SCSISector2SD(\r
+                       scsiDev.target->cfg->sdSectorStart,\r
+                       scsiDev.target->liveCfg.bytesPerSector,\r
+                       scsiLBA);\r
        if (!sdDev.ccs)\r
        {\r
                sdLBA = sdLBA * SD_SECTOR_SIZE;\r
        }\r
-       v = sdCommandAndResponse(25, sdLBA);\r
-       if (v)\r
+       v = sdCommandAndResponse(SD_WRITE_MULTIPLE_BLOCK, sdLBA);\r
+       if (unlikely(v))\r
        {\r
                scsiDiskReset();\r
                sdClearStatus();\r
                scsiDev.status = CHECK_CONDITION;\r
-               scsiDev.sense.code = HARDWARE_ERROR;\r
-               scsiDev.sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
+               scsiDev.target->sense.code = HARDWARE_ERROR;\r
+               scsiDev.target->sense.asc = LOGICAL_UNIT_COMMUNICATION_FAILURE;\r
                scsiDev.phase = STATUS;\r
        }\r
        else\r
@@ -747,3 +884,58 @@ void sdWriteMultiSectorPrep()
        }\r
 }\r
 \r
+void sdPoll()\r
+{\r
+       // Check if there's an SD card present.\r
+       if ((scsiDev.phase == BUS_FREE) &&\r
+               (sdIOState == SD_IDLE))\r
+       {\r
+               // The CS line is pulled high by the SD card.\r
+               // De-assert the line, and check if it's high.\r
+               // This isn't foolproof as it'll be left floating without\r
+               // an SD card. We can't use the built-in pull-down resistor as it will\r
+               // overpower the SD pullup resistor.\r
+               SD_CS_Write(0);\r
+               SD_CS_SetDriveMode(SD_CS_DM_DIG_HIZ);\r
+\r
+               CyDelayCycles(64);\r
+               uint8_t cs = SD_CS_Read();\r
+               SD_CS_SetDriveMode(SD_CS_DM_STRONG)     ;\r
+\r
+               if (cs && !(blockDev.state & DISK_PRESENT))\r
+               {\r
+                       static int firstInit = 1;\r
+\r
+                       // Debounce\r
+                       CyDelay(250);\r
+\r
+                       if (sdInit())\r
+                       {\r
+                               blockDev.state |= DISK_PRESENT | DISK_INITIALISED;\r
+\r
+                               if (!firstInit)\r
+                               {\r
+                                       int i;\r
+                                       for (i = 0; i < MAX_SCSI_TARGETS; ++i)\r
+                                       {\r
+                                               scsiDev.targets[i].unitAttention = PARAMETERS_CHANGED;\r
+                                       }\r
+                               }\r
+                               firstInit = 0;\r
+                       }\r
+               }\r
+               else if (!cs && (blockDev.state & DISK_PRESENT))\r
+               {\r
+                       sdDev.capacity = 0;\r
+                       blockDev.state &= ~DISK_PRESENT;\r
+                       blockDev.state &= ~DISK_INITIALISED;\r
+                       int i;\r
+                       for (i = 0; i < MAX_SCSI_TARGETS; ++i)\r
+                       {\r
+                               scsiDev.targets[i].unitAttention = PARAMETERS_CHANGED;\r
+                       }\r
+               }\r
+       }\r
+}\r
+\r
+#pragma GCC pop_options\r