Improve sync transfer stability by measuring host speed before blind writes to SD...
authorMichael McMaster <michael@codesrc.com>
Tue, 24 Dec 2019 06:13:47 +0000 (16:13 +1000)
committerMichael McMaster <michael@codesrc.com>
Tue, 24 Dec 2019 06:13:47 +0000 (16:13 +1000)
Makefile
src/firmware/bsp.c
src/firmware/bsp.h
src/firmware/config.c
src/firmware/disk.c
src/firmware/link.ld
src/firmware/scsi.c
src/firmware/scsi.h
src/firmware/scsiPhy.c
src/firmware/scsiPhy.h
src/scsi2sd-util6/Makefile

index 9be0d9a64b08408f3c6fc163a6cadf642b759ce9..b81734a6bbf94e2b2d9b496bfa2f154f17e78555 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -160,6 +160,9 @@ build/firmware.elf: $(SRC) rtl/fpga_bitmap.o $(STM32OBJS)
 build/firmware.bin: build/firmware.elf
        $(OBJCOPY) -O binary $< $@
 
+# Example to hard-code config within firmware
+#sudo arm-none-eabi-objcopy --update-section .fixed_config=config.dat firmware.elf -O binary firmware.bin
+
 build/stm32cubemx/%.o:
        mkdir -p build/stm32cubemx
        $(ARMCC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $(STM32CubeMX_INCUDE) $(INCLUDE) $^
index 0c0a4774a488b3e0bd6851f98c00d3d261ab393f..a6794398dc50b2ae91ef27e70223e70e74d76c86 100644 (file)
@@ -23,15 +23,15 @@ static int usingFastClock = 0;
 
 // TODO keep clock routines consistent with those in STM32Cubemx main.c
 
-uint32_t s2s_getSdRateMBs()
+uint32_t s2s_getSdRateKBs()
 {
        if (usingFastClock)
        {
-               return 18; // ((72MHz / 2) / 8bits) * 4bitparallel
+               return 18000; // ((72MHz / 2) / 8bits) * 4bitparallel
        }
        else
        {
-               return 12; // ((48MHz / 2) / 8bits) * 4bitparallel
+               return 12000; // ((48MHz / 2) / 8bits) * 4bitparallel
        }
 }
 
index a11c850a91eac9975be9199f776776ca3e137550..10481294830abe7d8e79979638f8068f5a19a3a2 100644 (file)
@@ -27,7 +27,7 @@
 void s2s_setNormalClock();
 void s2s_setFastClock();
 
-uint32_t s2s_getSdRateMBs();
+uint32_t s2s_getSdRateKBs();
 
 #endif
 
index 46bebb834c738ed1fb810f824938be1740ed0c54..ef09bc068249c29dd77b428dbdee9c0bc14047ee 100755 (executable)
 \r
 #include <string.h>\r
 \r
-static const uint16_t FIRMWARE_VERSION = 0x0629;\r
+static const uint16_t FIRMWARE_VERSION = 0x062A;\r
+\r
+// Optional static config\r
+extern uint8_t* __fixed_config;\r
 \r
 // 1 flash row\r
 static const uint8_t DEFAULT_CONFIG[128] =\r
@@ -87,7 +90,14 @@ void s2s_configInit(S2S_BoardCfg* config)
 {\r
        usbInEpState = USB_IDLE;\r
 \r
-       if ((blockDev.state & DISK_PRESENT) && sdDev.capacity)\r
+       if (memcmp(__fixed_config, "BCFG", 4) == 0)\r
+       {\r
+               // Use hardcoded config\r
+               memcpy(s2s_cfg, __fixed_config, S2S_CFG_SIZE);\r
+               memcpy(config, s2s_cfg, sizeof(S2S_BoardCfg));\r
+       }\r
+\r
+       else if ((blockDev.state & DISK_PRESENT) && sdDev.capacity)\r
        {\r
                int cfgSectors = (S2S_CFG_SIZE + 511) / 512;\r
                BSP_SD_ReadBlocks_DMA(\r
index 773ffd3415a783f54501d7ee3c89cbeed6c4c8a8..3c562e30ee5c7f58a31a2d241918a5cf31395b37 100755 (executable)
@@ -548,6 +548,29 @@ int scsiDiskCommand()
        return commandHandled;\r
 }\r
 \r
+static uint32_t\r
+calcReadahead(uint32_t totalBytes, uint32_t sdSpeedKBs, uint32_t scsiSpeedKBs)\r
+{\r
+       if (scsiSpeedKBs == 0 || scsiDev.hostSpeedMeasured == 0)\r
+       {\r
+               return totalBytes;\r
+       }\r
+\r
+       // uint32_t readAheadBytes = totalBytes * (1 - scsiSpeedKBs / sdSpeedKBs);\r
+       // Won't overflow with 65536 max bytes, 20000 max scsi speed.\r
+       uint32_t readAheadBytes = totalBytes - totalBytes * scsiSpeedKBs / sdSpeedKBs;\r
+\r
+       // Round up to nearest FIFO size (* 4 for safety)\r
+       readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 4) * SCSI_FIFO_DEPTH;\r
+\r
+       if (readAheadBytes > totalBytes)\r
+       {\r
+               readAheadBytes = totalBytes;\r
+       }\r
+\r
+       return readAheadBytes;\r
+}\r
+\r
 void scsiDiskPoll()\r
 {\r
        uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
@@ -704,18 +727,16 @@ void scsiDiskPoll()
                                transfer.lba);\r
                int i = 0;\r
                int clearBSY = 0;\r
-               int extraSectors = 0;\r
 \r
                int parityError = 0;\r
                int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;\r
 \r
-               uint32_t scsiSpeed = s2s_getScsiRateMBs();\r
-\r
                uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
 \r
                static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)");\r
 \r
                // Start reading and filling fifos as soon as possible.\r
+               DWT->CYCCNT = 0; // Start counting cycles\r
                scsiSetDataCount(transfer.blocks * bytesPerSector);\r
 \r
                while ((i < totalSDSectors) &&\r
@@ -733,31 +754,15 @@ void scsiDiskPoll()
                                // no flow control. This can be handled if a) the scsi interface\r
                                // doesn't block and b) we read enough SCSI sectors first so that\r
                                // the SD interface cannot catch up.\r
-                               int prevExtraSectors = extraSectors;\r
                                uint32_t totalBytes = sectors * SD_SECTOR_SIZE;\r
-                               extraSectors = 0;\r
 \r
-                               int32_t readAheadBytes = totalBytes;\r
-                               uint32_t sdSpeed = s2s_getSdRateMBs() + (scsiDev.sdUnderrunCount / 2);\r
-                               // if (have blind writes)\r
-                               if (scsiSpeed > 0 && scsiDev.sdUnderrunCount < 16)\r
-                               {\r
-                                       // readAhead = sectors * (sd / scsi - 1 + 0.1);\r
-                                       readAheadBytes = totalBytes * sdSpeed / scsiSpeed - totalBytes;\r
-\r
-                                       // Round up to nearest FIFO size.\r
-                                       readAheadBytes = ((readAheadBytes / SCSI_FIFO_DEPTH) + 1) * SCSI_FIFO_DEPTH;\r
-\r
-                                       if (readAheadBytes > totalBytes)\r
-                                       {\r
-                                               readAheadBytes = totalBytes;\r
-                                       }\r
-                               }\r
-\r
-                               uint32_t prevExtraBytes = prevExtraSectors * SD_SECTOR_SIZE;\r
-                               uint32_t scsiBytesRead = prevExtraBytes;\r
-                               readAheadBytes -= prevExtraBytes; // Must be signed!\r
+                               uint32_t sdSpeedKBs = s2s_getSdRateKBs() + (scsiDev.sdUnderrunCount * 256);\r
+                               uint32_t readAheadBytes = calcReadahead(\r
+                                       totalBytes,\r
+                                       sdSpeedKBs,\r
+                                       scsiDev.hostSpeedKBs);\r
 \r
+                               uint32_t scsiBytesRead = 0;\r
                                if (readAheadBytes > 0)\r
                                {\r
                                        scsiReadPIO(\r
@@ -765,6 +770,42 @@ void scsiDiskPoll()
                                                readAheadBytes,\r
                                                &parityError);\r
                                        scsiBytesRead += readAheadBytes;\r
+\r
+                                       if (i == 0)\r
+                                       {\r
+                                               uint32_t elapsedCycles = DWT->CYCCNT;\r
+\r
+                                               // uint32_t rateKBs = (readAheadBytes / 1000) / (elapsedCycles / HAL_RCC_GetHCLKFreq());\r
+                                               // Scaled by 4 to avoid overflow w/ max 65536 at 108MHz.\r
+                                               uint32_t rateKBs = ((readAheadBytes / 4) * (HAL_RCC_GetHCLKFreq() / 1000) / elapsedCycles) * 4;\r
+\r
+                                               scsiDev.hostSpeedKBs = (scsiDev.hostSpeedKBs + rateKBs) / 2;\r
+                                               scsiDev.hostSpeedMeasured = 1;\r
+\r
+                                               if (rateKBs < scsiDev.hostSpeedKBs)\r
+                                               {\r
+                                                       // Our readahead was too slow; assume remaining bytes\r
+                                                       // will be as well.\r
+                                                       if (readAheadBytes < totalBytes)\r
+                                                       {\r
+                                                               uint32_t properReadahead = calcReadahead(\r
+                                                                       totalBytes,\r
+                                                                       sdSpeedKBs,\r
+                                                                       rateKBs);\r
+\r
+                                                               if (properReadahead > readAheadBytes)\r
+                                                               {\r
+                                                                       uint32_t diff = properReadahead - readAheadBytes;\r
+                                                                       readAheadBytes = properReadahead;\r
+                                                                       scsiReadPIO(\r
+                                                                               &scsiDev.data[scsiBytesRead],\r
+                                                                               diff,\r
+                                                                               &parityError);\r
+                                                                       scsiBytesRead += diff;\r
+                                                               }\r
+                                                       }\r
+                                               }\r
+                                       }\r
                                }\r
 \r
                                HAL_SD_WriteBlocks_DMA(&hsd, (uint32_t*) (&scsiDev.data[0]), (i + sdLBA) * 512ll, SD_SECTOR_SIZE, sectors);\r
@@ -783,22 +824,6 @@ void scsiDiskPoll()
                                        scsiBytesRead += (totalBytes - readAheadBytes);\r
                                }\r
 \r
-                               if (!underrun && rem > sectors)\r
-                               {\r
-                                       // We probably have some time to waste reading more here.\r
-                                       // While noting this is going to drop us down into\r
-                                       // half-duplex operation (hence why we read max / 4 only)\r
-\r
-                                       extraSectors = rem - sectors > (maxSectors / 4)\r
-                                               ? (maxSectors / 4)\r
-                                               : rem - sectors;\r
-\r
-                                       scsiReadPIO(\r
-                                               &scsiDev.data[0],\r
-                                               extraSectors * SD_SECTOR_SIZE,\r
-                                               &parityError);\r
-                               }\r
-\r
                                uint32_t dmaFinishTime = s2s_getTime_ms();\r
                                while ((!hsd.SdTransferCplt ||\r
                                                __HAL_SD_SDIO_GET_FLAG(&hsd, SDIO_FLAG_TXACT)) &&\r
index bcd2ed376ea545ef50e3e88c09465fe431aa6e0b..ec18c4e8864736712cac953ba1f8ccf42fa1e2c5 100755 (executable)
@@ -34,6 +34,16 @@ SECTIONS
     . = ALIGN(4);
   } >FLASH_ISR
 
+  /* Store config settings into FLASH */
+  .fixed_config :
+  {
+    . = ALIGN(4);
+    __fixed_config = .;        /* create a global symbol at config start */
+    . += 1024;
+    KEEP(*(.fixed_config))
+    . = ALIGN(4);
+  } >CONFIG
+
   /* The program code and other data goes into FLASH */
   .text :
   {
index 25f02839b89987d3afabb6a062d104ebc6652410..016ecbb523f05d58bbc768c1442c425cf9290354 100755 (executable)
@@ -965,6 +965,11 @@ static void process_MessageOut()
                                scsiWrite(SDTR, sizeof(SDTR));\r
                                scsiDev.needSyncNegotiationAck = 1; // Check if this message is rejected.\r
                                scsiDev.sdUnderrunCount = 0;  // reset counter, may work now.\r
+\r
+                               // Set to the theoretical speed, then adjust if we measure lower\r
+                               // actual speeds.\r
+                               scsiDev.hostSpeedKBs = s2s_getScsiRateKBs();\r
+                               scsiDev.hostSpeedMeasured = 0;\r
                        }\r
                }\r
                else\r
@@ -1125,6 +1130,8 @@ void scsiInit()
        scsiDev.phase = BUS_FREE;\r
        scsiDev.target = NULL;\r
        scsiDev.compatMode = COMPAT_UNKNOWN;\r
+       scsiDev.hostSpeedKBs = 0;\r
+       scsiDev.hostSpeedMeasured = 0;\r
 \r
        int i;\r
        for (i = 0; i < S2S_MAX_TARGETS; ++i)\r
index cbfa980795e1c81238c8c91294708b0e0b127af4..64353ada8d845688ec080251ecc81294750e9d7c 100755 (executable)
@@ -165,6 +165,10 @@ typedef struct
 
        int needSyncNegotiationAck;
        int sdUnderrunCount;
+
+       // Estimate of the SCSI host actual speed
+       uint32_t hostSpeedKBs;
+       int hostSpeedMeasured;
 } ScsiDevice;
 
 extern ScsiDevice scsiDev;
index a38012734457ad91c62c5a1e30663bf3d98e8caf..2f27b1fda7fa25a40c00d2171c79043f2d136d90 100755 (executable)
@@ -677,21 +677,25 @@ uint32_t scsiEnterPhaseImmediate(int newPhase)
        return 0; // No change\r
 }\r
 \r
-uint32_t s2s_getScsiRateMBs()\r
+// Returns a "safe" estimate of the host SCSI speed of\r
+// theoretical speed / 2\r
+uint32_t s2s_getScsiRateKBs()\r
 {\r
        if (scsiDev.target->syncOffset)\r
        {\r
                if (scsiDev.target->syncPeriod < 23)\r
                {\r
-                       return 20;\r
+                       return 20 / 2;\r
                }\r
                else if (scsiDev.target->syncPeriod <= 25)\r
                {\r
-                       return 10;\r
+                       return 10 / 2;\r
                }\r
                else\r
                {\r
-                       return 1000 / (scsiDev.target->syncPeriod * 4);\r
+                       // 1000000000 / (scsiDev.target->syncPeriod * 4) bytes per second\r
+                       // (1000000000 / (scsiDev.target->syncPeriod * 4)) / 1000  kB/s\r
+                       return (1000000 / (scsiDev.target->syncPeriod * 4)) / 2;\r
                }\r
        }\r
        else\r
index c2288db782e713e052256ab4a224da9ed5814351..360d594cb347ae43b71a9811dba7c3709283ba35 100755 (executable)
@@ -117,6 +117,6 @@ int scsiWriteDMAPoll();
 
 int scsiSelfTest(void);
 
-uint32_t s2s_getScsiRateMBs();
+uint32_t s2s_getScsiRateKBs();
 
 #endif
index 45796205ce443d18910813145ed5c64e8aebf6ec..088f019563eebac65f5e90ab32b190329539e92c 100755 (executable)
@@ -96,7 +96,7 @@ ifeq ($(TARGET),Linux)
        BUILD := $(PWD)/build/linux
        LIBUSB_CONFIG+=--disable-shared
        LDFLAGS_LIBUSB+= -ludev -lpthread
-all: $(BUILD)/scsi2sd-test
+#all: $(BUILD)/scsi2sd-test
 
 endif
 ifeq ($(TARGET),Darwin)