Improve sync transfer stability by measuring host speed before blind writes to SD...
authorMichael McMaster <michael@codesrc.com>
Tue, 24 Dec 2019 06:13:47 +0000 (16:13 +1000)
committerMichael McMaster <michael@codesrc.com>
Tue, 24 Dec 2019 06:13:47 +0000 (16:13 +1000)
Makefile
src/firmware/bsp.c
src/firmware/bsp.h
src/firmware/config.c
src/firmware/disk.c
src/firmware/link.ld
src/firmware/scsi.c
src/firmware/scsi.h
src/firmware/scsiPhy.c
src/firmware/scsiPhy.h
src/scsi2sd-util6/Makefile

index 9be0d9a..b81734a 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -160,6 +160,9 @@ build/firmware.elf: $(SRC) rtl/fpga_bitmap.o $(STM32OBJS)
 build/firmware.bin: build/firmware.elf
        $(OBJCOPY) -O binary $< $@
 
+# Example to hard-code config within firmware
+#sudo arm-none-eabi-objcopy --update-section .fixed_config=config.dat firmware.elf -O binary firmware.bin
+
 build/stm32cubemx/%.o:
        mkdir -p build/stm32cubemx
        $(ARMCC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(STM32CubeMX_INCUDE) $(INCLUDE) $^
index 0c0a477..a679439 100644 (file)
@@ -23,15 +23,15 @@ static int usingFastClock = 0;
 
 // TODO keep clock routines consistent with those in STM32Cubemx main.c
 
-uint32_t s2s_getSdRateMBs()
+uint32_t s2s_getSdRateKBs()
 {
        if (usingFastClock)
        {
-               return 18; // ((72MHz / 2) / 8bits) * 4bitparallel
+               return 18000; // ((72MHz / 2) / 8bits) * 4bitparallel
        }
        else
        {
-               return 12; // ((48MHz / 2) / 8bits) * 4bitparallel
+               return 12000; // ((48MHz / 2) / 8bits) * 4bitparallel
        }
 }
 
index a11c850..1048129 100644 (file)
@@ -27,7 +27,7 @@
 void s2s_setNormalClock();
 void s2s_setFastClock();
 
-uint32_t s2s_getSdRateMBs();
+uint32_t s2s_getSdRateKBs();
 
 #endif
 
index 46bebb8..ef09bc0 100755 (executable)
 \r
 #include <string.h>\r
 \r
-static const uint16_t FIRMWARE_VERSION = 0x0629;\r
+static const uint16_t FIRMWARE_VERSION = 0x062A;\r
+\r
+// Optional static config\r
+extern uint8_t* __fixed_config;\r
 \r
 // 1 flash row\r
 static const uint8_t DEFAULT_CONFIG[128] =\r
@@ -87,7 +90,14 @@ void s2s_configInit(S2S_BoardCfg* config)
 {\r
        usbInEpState = USB_IDLE;\r
 \r
-       if ((blockDev.state & DISK_PRESENT) && sdDev.capacity)\r
+       if (memcmp(__fixed_config, "BCFG", 4) == 0)\r
+       {\r
+               // Use hardcoded config\r
+               memcpy(s2s_cfg, __fixed_config, S2S_CFG_SIZE);\r
+               memcpy(config, s2s_cfg, sizeof(S2S_BoardCfg));\r
+       }\r
+\r
+       else if ((blockDev.state & DISK_PRESENT) && sdDev.capacity)\r
        {\r
                int cfgSectors = (S2S_CFG_SIZE + 511) / 512;\r
                BSP_SD_ReadBlocks_DMA(\r
index 773ffd3..3c562e3 100755 (executable)
@@ -548,6 +548,29 @@ int scsiDiskCommand()
        return commandHandled;\r
 }\r
 \r
+static uint32_t\r
+calcReadahead(uint32_t totalBytes, uint32_t sdSpeedKBs, uint32_t scsiSpeedKBs)\r
+{\r
+       if (scsiSpeedKBs == 0 || scsiDev.hostSpeedMeasured == 0)\r
+       {\r
+               return totalBytes;\r
+       }\r
+\r
+       // uint32_t readAheadBytes = totalBytes * (1 - scsiSpeedKBs / sdSpeedKBs);\r
+       // Won't overflow with 65536 max bytes, 20000 max scsi speed.\r
+       uint32_t readAheadBytes = totalBytes - totalBytes * scsiSpeedKBs / sdSpeedKBs;\r
+\r
+       // Round up to nearest FIFO size (* 4 for safety)\r
+       readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 4) * SCSI_FIFO_DEPTH;\r
+\r
+       if (readAheadBytes > totalBytes)\r
+       {\r
+               readAheadBytes = totalBytes;\r
+       }\r
+\r
+       return readAheadBytes;\r
+}\r
+\r
 void scsiDiskPoll()\r
 {\r
        uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
@@ -704,18 +727,16 @@ void scsiDiskPoll()
                                transfer.lba);\r
                int i = 0;\r
                int clearBSY = 0;\r
-               int extraSectors = 0;\r
 \r
                int parityError = 0;\r
                int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;\r
 \r
-               uint32_t scsiSpeed = s2s_getScsiRateMBs();\r
-\r
                uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
 \r
                static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)");\r
 \r
                // Start reading and filling fifos as soon as possible.\r
+               DWT->CYCCNT = 0; // Start counting cycles\r
                scsiSetDataCount(transfer.blocks * bytesPerSector);\r
 \r
                while ((i < totalSDSectors) &&\r
@@ -733,31 +754,15 @@ void scsiDiskPoll()
                                // no flow control. This can be handled if a) the scsi interface\r
                                // doesn't block and b) we read enough SCSI sectors first so that\r
                                // the SD interface cannot catch up.\r
-                               int prevExtraSectors = extraSectors;\r
                                uint32_t totalBytes = sectors * SD_SECTOR_SIZE;\r
-                               extraSectors = 0;\r
 \r
-                               int32_t readAheadBytes = totalBytes;\r
-                               uint32_t sdSpeed = s2s_getSdRateMBs() + (scsiDev.sdUnderrunCount / 2);\r
-                               // if (have blind writes)\r
-                               if (scsiSpeed > 0 && scsiDev.sdUnderrunCount < 16)\r
-                               {\r
-                                       // readAhead = sectors * (sd / scsi - 1 + 0.1);\r
-                                       readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes;\r
-\r
-                                       // Round up to nearest FIFO size.\r
-                                       readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 1) * SCSI_FIFO_DEPTH;\r
-\r
-                                       if (readAheadBytes > totalBytes)\r
-                                       {\r
-                                               readAheadBytes = totalBytes;\r
-                                       }\r
-                               }\r
-\r
-                               uint32_t prevExtraBytes = prevExtraSectors * SD_SECTOR_SIZE;\r
-                               uint32_t scsiBytesRead = prevExtraBytes;\r
-                               readAheadBytes -= prevExtraBytes; // Must be signed!\r
+                               uint32_t sdSpeedKBs = s2s_getSdRateKBs() + (scsiDev.sdUnderrunCount * 256);\r
+                               uint32_t readAheadBytes = calcReadahead(\r
+                                       totalBytes,\r
+                                       sdSpeedKBs,\r
+                                       scsiDev.hostSpeedKBs);\r
 \r
+                               uint32_t scsiBytesRead = 0;\r
                                if (readAheadBytes > 0)\r
                                {\r
                                        scsiReadPIO(\r
@@ -765,6 +770,42 @@ void scsiDiskPoll()
                                                readAheadBytes,\r
                                                &parityError);\r
                                        scsiBytesRead += readAheadBytes;\r
+\r
+                                       if (i == 0)\r
+                                       {\r
+                                               uint32_t elapsedCycles = DWT->CYCCNT;\r
+\r
+                                               // uint32_t rateKBs = (readAheadBytes / 1000) / (elapsedCycles / HAL_RCC_GetHCLKFreq());\r
+                                               // Scaled by 4 to avoid overflow w/ max 65536 at 108MHz.\r
+                                               uint32_t rateKBs = ((readAheadBytes / 4) * (HAL_RCC_GetHCLKFreq() / 1000) / elapsedCycles) * 4;\r
+\r
+                                               scsiDev.hostSpeedKBs = (scsiDev.hostSpeedKBs + rateKBs) / 2;\r
+                                               scsiDev.hostSpeedMeasured = 1;\r
+\r
+                                               if (rateKBs < scsiDev.hostSpeedKBs)\r
+                                               {\r
+                                                       // Our readahead was too slow; assume remaining bytes\r
+                                                       // will be as well.\r
+                                                       if (readAheadBytes < totalBytes)\r
+                                                       {\r
+                                                               uint32_t properReadahead = calcReadahead(\r
+                                                                       totalBytes,\r
+                                                                       sdSpeedKBs,\r
+                                                                       rateKBs);\r
+\r
+                                                               if (properReadahead > readAheadBytes)\r
+                                                               {\r
+                                                                       uint32_t diff = properReadahead - readAheadBytes;\r
+                                                                       readAheadBytes = properReadahead;\r
+                                                                       scsiReadPIO(\r
+                                                                               &scsiDev.data[scsiBytesRead],\r
+                                                                               diff,\r
+                                                                               &parityError);\r
+                                                                       scsiBytesRead += diff;\r
+                                                               }\r
+                                                       }\r
+                                               }\r
+                                       }\r
                                }\r
 \r
                                HAL_SD_WriteBlocks_DMA(&hsd, (uint32_t*) (&scsiDev.data[0]), (i + sdLBA) * 512ll, SD_SECTOR_SIZE, sectors);\r
@@ -783,22 +824,6 @@ void scsiDiskPoll()
                                        scsiBytesRead += (totalBytes - readAheadBytes);\r
                                }\r
 \r
-                               if (!underrun && rem > sectors)\r
-                               {\r
-                                       // We probably have some time to waste reading more here.\r
-                                       // While noting this is going to drop us down into\r
-                                       // half-duplex operation (hence why we read max / 4 only)\r
-\r
-                                       extraSectors = rem - sectors > (maxSectors / 4)\r
-                                               ? (maxSectors / 4)\r
-                                               : rem - sectors;\r
-\r
-                                       scsiReadPIO(\r
-                                               &scsiDev.data[0],\r
-                                               extraSectors * SD_SECTOR_SIZE,\r
-                                               &parityError);\r
-                               }\r
-\r
                                uint32_t dmaFinishTime = s2s_getTime_ms();\r
                                while ((!hsd.SdTransferCplt ||\r
                                                __HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) &&\r
index bcd2ed3..ec18c4e 100755 (executable)
@@ -34,6 +34,16 @@ SECTIONS
     . = ALIGN(4);
   } >FLASH_ISR
 
+  /* Store config settings into FLASH */
+  .fixed_config :
+  {
+    . = ALIGN(4);
+    __fixed_config = .;        /* create a global symbol at config start */
+    . += 1024;
+    KEEP(*(.fixed_config))
+    . = ALIGN(4);
+  } >CONFIG
+
   /* The program code and other data goes into FLASH */
   .text :
   {
index 25f0283..016ecbb 100755 (executable)
@@ -965,6 +965,11 @@ static void process_MessageOut()
                                scsiWrite(SDTR, sizeof(SDTR));\r
                                scsiDev.needSyncNegotiationAck = 1; // Check if this message is rejected.\r
                                scsiDev.sdUnderrunCount = 0;  // reset counter, may work now.\r
+\r
+                               // Set to the theoretical speed, then adjust if we measure lower\r
+                               // actual speeds.\r
+                               scsiDev.hostSpeedKBs = s2s_getScsiRateKBs();\r
+                               scsiDev.hostSpeedMeasured = 0;\r
                        }\r
                }\r
                else\r
@@ -1125,6 +1130,8 @@ void scsiInit()
        scsiDev.phase = BUS_FREE;\r
        scsiDev.target = NULL;\r
        scsiDev.compatMode = COMPAT_UNKNOWN;\r
+       scsiDev.hostSpeedKBs = 0;\r
+       scsiDev.hostSpeedMeasured = 0;\r
 \r
        int i;\r
        for (i = 0; i < S2S_MAX_TARGETS; ++i)\r
index cbfa980..64353ad 100755 (executable)
@@ -165,6 +165,10 @@ typedef struct
 
        int needSyncNegotiationAck;
        int sdUnderrunCount;
+
+       // Estimate of the SCSI host actual speed
+       uint32_t hostSpeedKBs;
+       int hostSpeedMeasured;
 } ScsiDevice;
 
 extern ScsiDevice scsiDev;
index a380127..2f27b1f 100755 (executable)
@@ -677,21 +677,25 @@ uint32_t scsiEnterPhaseImmediate(int newPhase)
        return 0; // No change\r
 }\r
 \r
-uint32_t s2s_getScsiRateMBs()\r
+// Returns a "safe" estimate of the host SCSI speed of\r
+// theoretical speed / 2\r
+uint32_t s2s_getScsiRateKBs()\r
 {\r
        if (scsiDev.target->syncOffset)\r
        {\r
                if (scsiDev.target->syncPeriod < 23)\r
                {\r
-                       return 20;\r
+                       return 20 / 2;\r
                }\r
                else if (scsiDev.target->syncPeriod <= 25)\r
                {\r
-                       return 10;\r
+                       return 10 / 2;\r
                }\r
                else\r
                {\r
-                       return 1000 / (scsiDev.target->syncPeriod * 4);\r
+                       // 1000000000 / (scsiDev.target->syncPeriod * 4) bytes per second\r
+                       // (1000000000 / (scsiDev.target->syncPeriod * 4)) / 1000  kB/s\r
+                       return (1000000 / (scsiDev.target->syncPeriod * 4)) / 2;\r
                }\r
        }\r
        else\r
index c2288db..360d594 100755 (executable)
@@ -117,6 +117,6 @@ int scsiWriteDMAPoll();
 
 int scsiSelfTest(void);
 
-uint32_t s2s_getScsiRateMBs();
+uint32_t s2s_getScsiRateKBs();
 
 #endif
index 4579620..088f019 100755 (executable)
@@ -96,7 +96,7 @@ ifeq ($(TARGET),Linux)
        BUILD := $(PWD)/build/linux
        LIBUSB_CONFIG+=--disable-shared
        LDFLAGS_LIBUSB+= -ludev -lpthread
-all: $(BUILD)/scsi2sd-test
+#all: $(BUILD)/scsi2sd-test
 
 endif
 ifeq ($(TARGET),Darwin)