Slight write speed improvement
[SCSI2SD-V6.git] / src / firmware / disk.c
1 //      Copyright (C) 2013 Michael McMaster <michael@codesrc.com>\r
2 //      Copyright (C) 2014 Doug Brown <doug@downtowndougbrown.com>\r
3 //\r
4 //      This file is part of SCSI2SD.\r
5 //\r
6 //      SCSI2SD is free software: you can redistribute it and/or modify\r
7 //      it under the terms of the GNU General Public License as published by\r
8 //      the Free Software Foundation, either version 3 of the License, or\r
9 //      (at your option) any later version.\r
10 //\r
11 //      SCSI2SD is distributed in the hope that it will be useful,\r
12 //      but WITHOUT ANY WARRANTY; without even the implied warranty of\r
13 //      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\r
14 //      GNU General Public License for more details.\r
15 //\r
16 //      You should have received a copy of the GNU General Public License\r
17 //      along with SCSI2SD.  If not, see <http://www.gnu.org/licenses/>.\r
18 \r
19 #ifdef STM32F2xx\r
20 #include "stm32f2xx.h"\r
21 #endif\r
22 #ifdef STM32F4xx\r
23 #include "stm32f4xx.h"\r
24 #endif\r
25 \r
26 #include <assert.h>\r
27 \r
28 // For SD write direct routines\r
29 #include "sdio.h"\r
30 #include "bsp_driver_sd.h"\r
31 \r
32 \r
33 #include "scsi.h"\r
34 #include "scsiPhy.h"\r
35 #include "config.h"\r
36 #include "disk.h"\r
37 #include "sd.h"\r
38 #include "time.h"\r
39 #include "bsp.h"\r
40 \r
41 #include <string.h>\r
42 \r
43 // Global\r
44 BlockDevice blockDev;\r
45 Transfer transfer;\r
46 \r
47 static int doSdInit()\r
48 {\r
49         int result = 0;\r
50         if (blockDev.state & DISK_PRESENT)\r
51         {\r
52                 blockDev.state = blockDev.state | DISK_INITIALISED;\r
53         }\r
54         return result;\r
55 }\r
56 \r
57 // Callback once all data has been read in the data out phase.\r
58 static void doFormatUnitComplete(void)\r
59 {\r
60         // TODO start writing the initialisation pattern to the SD\r
61         // card\r
62         scsiDev.phase = STATUS;\r
63 }\r
64 \r
65 static void doFormatUnitSkipData(int bytes)\r
66 {\r
67         // We may not have enough memory to store the initialisation pattern and\r
68         // defect list data.  Since we're not making use of it yet anyway, just\r
69         // discard the bytes.\r
70         scsiEnterPhase(DATA_OUT);\r
71         int i;\r
72         for (i = 0; i < bytes; ++i)\r
73         {\r
74                 scsiReadByte();\r
75         }\r
76 }\r
77 \r
78 // Callback from the data out phase.\r
79 static void doFormatUnitPatternHeader(void)\r
80 {\r
81         int defectLength =\r
82                 ((((uint16_t)scsiDev.data[2])) << 8) +\r
83                         scsiDev.data[3];\r
84 \r
85         int patternLength =\r
86                 ((((uint16_t)scsiDev.data[4 + 2])) << 8) +\r
87                 scsiDev.data[4 + 3];\r
88 \r
89                 doFormatUnitSkipData(defectLength + patternLength);\r
90                 doFormatUnitComplete();\r
91 }\r
92 \r
93 // Callback from the data out phase.\r
94 static void doFormatUnitHeader(void)\r
95 {\r
96         int IP = (scsiDev.data[1] & 0x08) ? 1 : 0;\r
97         int DSP = (scsiDev.data[1] & 0x04) ? 1 : 0;\r
98 \r
99         if (! DSP) // disable save parameters\r
100         {\r
101                 // Save the "MODE SELECT savable parameters"\r
102                 s2s_configSave(\r
103                         scsiDev.target->targetId,\r
104                         scsiDev.target->liveCfg.bytesPerSector);\r
105         }\r
106 \r
107         if (IP)\r
108         {\r
109                 // We need to read the initialisation pattern header first.\r
110                 scsiDev.dataLen += 4;\r
111                 scsiDev.phase = DATA_OUT;\r
112                 scsiDev.postDataOutHook = doFormatUnitPatternHeader;\r
113         }\r
114         else\r
115         {\r
116                 // Read the defect list data\r
117                 int defectLength =\r
118                         ((((uint16_t)scsiDev.data[2])) << 8) +\r
119                         scsiDev.data[3];\r
120                 doFormatUnitSkipData(defectLength);\r
121                 doFormatUnitComplete();\r
122         }\r
123 }\r
124 \r
125 static void doReadCapacity()\r
126 {\r
127         uint32_t lba = (((uint32_t) scsiDev.cdb[2]) << 24) +\r
128                 (((uint32_t) scsiDev.cdb[3]) << 16) +\r
129                 (((uint32_t) scsiDev.cdb[4]) << 8) +\r
130                 scsiDev.cdb[5];\r
131         int pmi = scsiDev.cdb[8] & 1;\r
132 \r
133         uint32_t capacity = getScsiCapacity(\r
134                 scsiDev.target->cfg->sdSectorStart,\r
135                 scsiDev.target->liveCfg.bytesPerSector,\r
136                 scsiDev.target->cfg->scsiSectors);\r
137 \r
138         if (!pmi && lba)\r
139         {\r
140                 // error.\r
141                 // We don't do anything with the "partial medium indicator", and\r
142                 // assume that delays are constant across each block. But the spec\r
143                 // says we must return this error if pmi is specified incorrectly.\r
144                 scsiDev.status = CHECK_CONDITION;\r
145                 scsiDev.target->sense.code = ILLEGAL_REQUEST;\r
146                 scsiDev.target->sense.asc = INVALID_FIELD_IN_CDB;\r
147                 scsiDev.phase = STATUS;\r
148         }\r
149         else if (capacity > 0)\r
150         {\r
151                 uint32_t highestBlock = capacity - 1;\r
152 \r
153                 scsiDev.data[0] = highestBlock >> 24;\r
154                 scsiDev.data[1] = highestBlock >> 16;\r
155                 scsiDev.data[2] = highestBlock >> 8;\r
156                 scsiDev.data[3] = highestBlock;\r
157 \r
158                 uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
159                 scsiDev.data[4] = bytesPerSector >> 24;\r
160                 scsiDev.data[5] = bytesPerSector >> 16;\r
161                 scsiDev.data[6] = bytesPerSector >> 8;\r
162                 scsiDev.data[7] = bytesPerSector;\r
163                 scsiDev.dataLen = 8;\r
164                 scsiDev.phase = DATA_IN;\r
165         }\r
166         else\r
167         {\r
168                 scsiDev.status = CHECK_CONDITION;\r
169                 scsiDev.target->sense.code = NOT_READY;\r
170                 scsiDev.target->sense.asc = MEDIUM_NOT_PRESENT;\r
171                 scsiDev.phase = STATUS;\r
172         }\r
173 }\r
174 \r
175 static void doWrite(uint32_t lba, uint32_t blocks)\r
176 {\r
177         if (unlikely(scsiDev.target->cfg->deviceType == S2S_CFG_FLOPPY_14MB)) {\r
178                 // Floppies are supposed to be slow. Some systems can't handle a floppy\r
179                 // without an access time\r
180                 s2s_delay_ms(10);\r
181         }\r
182 \r
183         uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
184 \r
185         if (unlikely(blockDev.state & DISK_WP) ||\r
186                 unlikely(scsiDev.target->cfg->deviceType == S2S_CFG_OPTICAL))\r
187 \r
188         {\r
189                 scsiDev.status = CHECK_CONDITION;\r
190                 scsiDev.target->sense.code = ILLEGAL_REQUEST;\r
191                 scsiDev.target->sense.asc = WRITE_PROTECTED;\r
192                 scsiDev.phase = STATUS;\r
193         }\r
194         else if (unlikely(((uint64_t) lba) + blocks >\r
195                 getScsiCapacity(\r
196                         scsiDev.target->cfg->sdSectorStart,\r
197                         bytesPerSector,\r
198                         scsiDev.target->cfg->scsiSectors\r
199                         )\r
200                 ))\r
201         {\r
202                 scsiDev.status = CHECK_CONDITION;\r
203                 scsiDev.target->sense.code = ILLEGAL_REQUEST;\r
204                 scsiDev.target->sense.asc = LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;\r
205                 scsiDev.phase = STATUS;\r
206         }\r
207         else\r
208         {\r
209                 transfer.lba = lba;\r
210                 transfer.blocks = blocks;\r
211                 transfer.currentBlock = 0;\r
212                 scsiDev.phase = DATA_OUT;\r
213                 scsiDev.dataLen = bytesPerSector;\r
214                 scsiDev.dataPtr = bytesPerSector;\r
215 \r
216                 // No need for single-block writes atm.  Overhead of the\r
217                 // multi-block write is minimal.\r
218                 transfer.multiBlock = 1;\r
219 \r
220 \r
221                 // TODO uint32_t sdLBA =\r
222 // TODO                         SCSISector2SD(\r
223         // TODO                         scsiDev.target->cfg->sdSectorStart,\r
224                 // TODO                 bytesPerSector,\r
225                         // TODO         lba);\r
226                 // TODO uint32_t sdBlocks = blocks * SDSectorsPerSCSISector(bytesPerSector);\r
227                 // TODO sdWriteMultiSectorPrep(sdLBA, sdBlocks);\r
228         }\r
229 }\r
230 \r
231 \r
232 static void doRead(uint32_t lba, uint32_t blocks)\r
233 {\r
234         if (unlikely(scsiDev.target->cfg->deviceType == S2S_CFG_FLOPPY_14MB)) {\r
235                 // Floppies are supposed to be slow. Some systems can't handle a floppy\r
236                 // without an access time\r
237                 s2s_delay_ms(10);\r
238         }\r
239 \r
240         uint32_t capacity = getScsiCapacity(\r
241                 scsiDev.target->cfg->sdSectorStart,\r
242                 scsiDev.target->liveCfg.bytesPerSector,\r
243                 scsiDev.target->cfg->scsiSectors);\r
244         if (unlikely(((uint64_t) lba) + blocks > capacity))\r
245         {\r
246                 scsiDev.status = CHECK_CONDITION;\r
247                 scsiDev.target->sense.code = ILLEGAL_REQUEST;\r
248                 scsiDev.target->sense.asc = LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;\r
249                 scsiDev.phase = STATUS;\r
250         }\r
251         else\r
252         {\r
253                 transfer.lba = lba;\r
254                 transfer.blocks = blocks;\r
255                 transfer.currentBlock = 0;\r
256                 scsiDev.phase = DATA_IN;\r
257                 scsiDev.dataLen = 0; // No data yet\r
258 \r
259                 uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
260                 uint32_t sdSectorPerSCSISector = SDSectorsPerSCSISector(bytesPerSector);\r
261                 uint32_t sdSectors =\r
262                         blocks * sdSectorPerSCSISector;\r
263 \r
264                 if ((\r
265                                 (sdSectors == 1) &&\r
266                                 !(scsiDev.boardCfg.flags & S2S_CFG_ENABLE_CACHE)\r
267                         ) ||\r
268                         unlikely(((uint64_t) lba) + blocks == capacity)\r
269                         )\r
270                 {\r
271                         // We get errors on reading the last sector using a multi-sector\r
272                         // read :-(\r
273                         transfer.multiBlock = 0;\r
274                 }\r
275                 else\r
276                 {\r
277                         transfer.multiBlock = 1;\r
278 \r
279                         // uint32_t sdLBA =\r
280                                 // SCSISector2SD(\r
281                                         // scsiDev.target->cfg->sdSectorStart,\r
282                                         // bytesPerSector,\r
283                                         // lba);\r
284 \r
285                         // TODO sdReadMultiSectorPrep(sdLBA, sdSectors);\r
286                 }\r
287         }\r
288 }\r
289 \r
290 static void doSeek(uint32_t lba)\r
291 {\r
292         if (lba >=\r
293                 getScsiCapacity(\r
294                         scsiDev.target->cfg->sdSectorStart,\r
295                         scsiDev.target->liveCfg.bytesPerSector,\r
296                         scsiDev.target->cfg->scsiSectors)\r
297                 )\r
298         {\r
299                 scsiDev.status = CHECK_CONDITION;\r
300                 scsiDev.target->sense.code = ILLEGAL_REQUEST;\r
301                 scsiDev.target->sense.asc = LOGICAL_BLOCK_ADDRESS_OUT_OF_RANGE;\r
302                 scsiDev.phase = STATUS;\r
303         }\r
304         else\r
305         {\r
306                 s2s_delay_ms(10);\r
307         }\r
308 }\r
309 \r
310 static int doTestUnitReady()\r
311 {\r
312         int ready = 1;\r
313         if (likely(blockDev.state == (DISK_STARTED | DISK_PRESENT | DISK_INITIALISED)))\r
314         {\r
315                 // nothing to do.\r
316         }\r
317         else if (unlikely(!(blockDev.state & DISK_STARTED)))\r
318         {\r
319                 ready = 0;\r
320                 scsiDev.status = CHECK_CONDITION;\r
321                 scsiDev.target->sense.code = NOT_READY;\r
322                 scsiDev.target->sense.asc = LOGICAL_UNIT_NOT_READY_INITIALIZING_COMMAND_REQUIRED;\r
323                 scsiDev.phase = STATUS;\r
324         }\r
325         else if (unlikely(!(blockDev.state & DISK_PRESENT)))\r
326         {\r
327                 ready = 0;\r
328                 scsiDev.status = CHECK_CONDITION;\r
329                 scsiDev.target->sense.code = NOT_READY;\r
330                 scsiDev.target->sense.asc = MEDIUM_NOT_PRESENT;\r
331                 scsiDev.phase = STATUS;\r
332         }\r
333         else if (unlikely(!(blockDev.state & DISK_INITIALISED)))\r
334         {\r
335                 ready = 0;\r
336                 scsiDev.status = CHECK_CONDITION;\r
337                 scsiDev.target->sense.code = NOT_READY;\r
338                 scsiDev.target->sense.asc = LOGICAL_UNIT_NOT_READY_CAUSE_NOT_REPORTABLE;\r
339                 scsiDev.phase = STATUS;\r
340         }\r
341         return ready;\r
342 }\r
343 \r
344 // Handle direct-access scsi device commands\r
345 int scsiDiskCommand()\r
346 {\r
347         int commandHandled = 1;\r
348 \r
349         uint8_t command = scsiDev.cdb[0];\r
350         if (unlikely(command == 0x1B))\r
351         {\r
352                 // START STOP UNIT\r
353                 // Enable or disable media access operations.\r
354                 // Ignore load/eject requests. We can't do that.\r
355                 //int immed = scsiDev.cdb[1] & 1;\r
356                 int start = scsiDev.cdb[4] & 1;\r
357 \r
358                 if (start)\r
359                 {\r
360                         blockDev.state = blockDev.state | DISK_STARTED;\r
361                         if (!(blockDev.state & DISK_INITIALISED))\r
362                         {\r
363                                 doSdInit();\r
364                         }\r
365                 }\r
366                 else\r
367                 {\r
368                         blockDev.state &= ~DISK_STARTED;\r
369                 }\r
370         }\r
371         else if (unlikely(command == 0x00))\r
372         {\r
373                 // TEST UNIT READY\r
374                 doTestUnitReady();\r
375         }\r
376         else if (unlikely(!doTestUnitReady()))\r
377         {\r
378                 // Status and sense codes already set by doTestUnitReady\r
379         }\r
380         else if (likely(command == 0x08))\r
381         {\r
382                 // READ(6)\r
383                 uint32_t lba =\r
384                         (((uint32_t) scsiDev.cdb[1] & 0x1F) << 16) +\r
385                         (((uint32_t) scsiDev.cdb[2]) << 8) +\r
386                         scsiDev.cdb[3];\r
387                 uint32_t blocks = scsiDev.cdb[4];\r
388                 if (unlikely(blocks == 0)) blocks = 256;\r
389                 doRead(lba, blocks);\r
390         }\r
391         else if (likely(command == 0x28))\r
392         {\r
393                 // READ(10)\r
394                 // Ignore all cache control bits - we don't support a memory cache.\r
395 \r
396                 uint32_t lba =\r
397                         (((uint32_t) scsiDev.cdb[2]) << 24) +\r
398                         (((uint32_t) scsiDev.cdb[3]) << 16) +\r
399                         (((uint32_t) scsiDev.cdb[4]) << 8) +\r
400                         scsiDev.cdb[5];\r
401                 uint32_t blocks =\r
402                         (((uint32_t) scsiDev.cdb[7]) << 8) +\r
403                         scsiDev.cdb[8];\r
404 \r
405                 doRead(lba, blocks);\r
406         }\r
407         else if (likely(command == 0x0A))\r
408         {\r
409                 // WRITE(6)\r
410                 uint32_t lba =\r
411                         (((uint32_t) scsiDev.cdb[1] & 0x1F) << 16) +\r
412                         (((uint32_t) scsiDev.cdb[2]) << 8) +\r
413                         scsiDev.cdb[3];\r
414                 uint32_t blocks = scsiDev.cdb[4];\r
415                 if (unlikely(blocks == 0)) blocks = 256;\r
416                 doWrite(lba, blocks);\r
417         }\r
418         else if (likely(command == 0x2A) || // WRITE(10)\r
419                 unlikely(command == 0x2E)) // WRITE AND VERIFY\r
420         {\r
421                 // Ignore all cache control bits - we don't support a memory cache.\r
422                 // Don't bother verifying either. The SD card likely stores ECC\r
423                 // along with each flash row.\r
424 \r
425                 uint32_t lba =\r
426                         (((uint32_t) scsiDev.cdb[2]) << 24) +\r
427                         (((uint32_t) scsiDev.cdb[3]) << 16) +\r
428                         (((uint32_t) scsiDev.cdb[4]) << 8) +\r
429                         scsiDev.cdb[5];\r
430                 uint32_t blocks =\r
431                         (((uint32_t) scsiDev.cdb[7]) << 8) +\r
432                         scsiDev.cdb[8];\r
433 \r
434                 doWrite(lba, blocks);\r
435         }\r
436         else if (unlikely(command == 0x04))\r
437         {\r
438                 // FORMAT UNIT\r
439                 // We don't really do any formatting, but we need to read the correct\r
440                 // number of bytes in the DATA_OUT phase to make the SCSI host happy.\r
441 \r
442                 int fmtData = (scsiDev.cdb[1] & 0x10) ? 1 : 0;\r
443                 if (fmtData)\r
444                 {\r
445                         // We need to read the parameter list, but we don't know how\r
446                         // big it is yet. Start with the header.\r
447                         scsiDev.dataLen = 4;\r
448                         scsiDev.phase = DATA_OUT;\r
449                         scsiDev.postDataOutHook = doFormatUnitHeader;\r
450                 }\r
451                 else\r
452                 {\r
453                         // No data to read, we're already finished!\r
454                 }\r
455         }\r
456         else if (unlikely(command == 0x25))\r
457         {\r
458                 // READ CAPACITY\r
459                 doReadCapacity();\r
460         }\r
461         else if (unlikely(command == 0x0B))\r
462         {\r
463                 // SEEK(6)\r
464                 uint32_t lba =\r
465                         (((uint32_t) scsiDev.cdb[1] & 0x1F) << 16) +\r
466                         (((uint32_t) scsiDev.cdb[2]) << 8) +\r
467                         scsiDev.cdb[3];\r
468 \r
469                 doSeek(lba);\r
470         }\r
471 \r
472         else if (unlikely(command == 0x2B))\r
473         {\r
474                 // SEEK(10)\r
475                 uint32_t lba =\r
476                         (((uint32_t) scsiDev.cdb[2]) << 24) +\r
477                         (((uint32_t) scsiDev.cdb[3]) << 16) +\r
478                         (((uint32_t) scsiDev.cdb[4]) << 8) +\r
479                         scsiDev.cdb[5];\r
480 \r
481                 doSeek(lba);\r
482         }\r
483         else if (unlikely(command == 0x36))\r
484         {\r
485                 // LOCK UNLOCK CACHE\r
486                 // We don't have a cache to lock data into. do nothing.\r
487         }\r
488         else if (unlikely(command == 0x34))\r
489         {\r
490                 // PRE-FETCH.\r
491                 // We don't have a cache to pre-fetch into. do nothing.\r
492         }\r
493         else if (unlikely(command == 0x1E))\r
494         {\r
495                 // PREVENT ALLOW MEDIUM REMOVAL\r
496                 // Not much we can do to prevent the user removing the SD card.\r
497                 // do nothing.\r
498         }\r
499         else if (unlikely(command == 0x01))\r
500         {\r
501                 // REZERO UNIT\r
502                 // Set the lun to a vendor-specific state. Ignore.\r
503         }\r
504         else if (unlikely(command == 0x35))\r
505         {\r
506                 // SYNCHRONIZE CACHE\r
507                 // We don't have a cache. do nothing.\r
508         }\r
509         else if (unlikely(command == 0x2F))\r
510         {\r
511                 // VERIFY\r
512                 // TODO: When they supply data to verify, we should read the data and\r
513                 // verify it. If they don't supply any data, just say success.\r
514                 if ((scsiDev.cdb[1] & 0x02) == 0)\r
515                 {\r
516                         // They are asking us to do a medium verification with no data\r
517                         // comparison. Assume success, do nothing.\r
518                 }\r
519                 else\r
520                 {\r
521                         // TODO. This means they are supplying data to verify against.\r
522                         // Technically we should probably grab the data and compare it.\r
523                         scsiDev.status = CHECK_CONDITION;\r
524                         scsiDev.target->sense.code = ILLEGAL_REQUEST;\r
525                         scsiDev.target->sense.asc = INVALID_FIELD_IN_CDB;\r
526                         scsiDev.phase = STATUS;\r
527                 }\r
528         }\r
529         else if (unlikely(command == 0x37))\r
530         {\r
531                 // READ DEFECT DATA\r
532                 uint32_t allocLength = (((uint16_t)scsiDev.cdb[7]) << 8) |\r
533                         scsiDev.cdb[8];\r
534 \r
535                 scsiDev.data[0] = 0;\r
536                 scsiDev.data[1] = scsiDev.cdb[1];\r
537                 scsiDev.data[2] = 0;\r
538                 scsiDev.data[3] = 0;\r
539                 scsiDev.dataLen = 4;\r
540 \r
541                 if (scsiDev.dataLen > allocLength)\r
542                 {\r
543                         scsiDev.dataLen = allocLength;\r
544                 }\r
545 \r
546                 scsiDev.phase = DATA_IN;\r
547         }\r
548         else\r
549         {\r
550                 commandHandled = 0;\r
551         }\r
552 \r
553         return commandHandled;\r
554 }\r
555 \r
556 void scsiDiskPoll()\r
557 {\r
558         uint32_t bytesPerSector = scsiDev.target->liveCfg.bytesPerSector;\r
559 \r
560         if (scsiDev.phase == DATA_IN &&\r
561                 transfer.currentBlock != transfer.blocks)\r
562         {\r
563                 // Take responsibility for waiting for the phase delays\r
564                 uint32_t phaseChangeDelayUs = scsiEnterPhaseImmediate(DATA_IN);\r
565 \r
566                 int totalSDSectors =\r
567                         transfer.blocks * SDSectorsPerSCSISector(bytesPerSector);\r
568                 uint32_t sdLBA =\r
569                         SCSISector2SD(\r
570                                 scsiDev.target->cfg->sdSectorStart,\r
571                                 bytesPerSector,\r
572                                 transfer.lba);\r
573 \r
574                 const int sdPerScsi = SDSectorsPerSCSISector(bytesPerSector);\r
575                 const int buffers = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
576                 int prep = 0;\r
577                 int i = 0;\r
578                 int scsiActive __attribute__((unused)) = 0; // unused if DMA disabled\r
579                 int sdActive = 0;\r
580 \r
581                 // It's highly unlikely that someone is going to use huge transfers\r
582                 // per scsi command, but if they do it'll be slower than usual.\r
583                 uint32_t totalScsiBytes = transfer.blocks * bytesPerSector;\r
584                 int useSlowDataCount = totalScsiBytes >= SCSI_XFER_MAX;\r
585                 if (!useSlowDataCount)\r
586                 {\r
587                         scsiSetDataCount(totalScsiBytes);\r
588                 }\r
589 \r
590                 while ((i < totalSDSectors) &&\r
591                         likely(scsiDev.phase == DATA_IN) &&\r
592                         likely(!scsiDev.resetFlag))\r
593                 {\r
594                         int completedDmaSectors;\r
595                         if (sdActive && (completedDmaSectors = sdReadDMAPoll(sdActive)))\r
596                         {\r
597                                 prep += completedDmaSectors;\r
598                                 sdActive -= completedDmaSectors;\r
599                         } else if (sdActive > 1)\r
600                         {\r
601                                 if ((scsiDev.data[SD_SECTOR_SIZE * (prep % buffers) + 510] != 0xAA) ||\r
602                                         (scsiDev.data[SD_SECTOR_SIZE * (prep % buffers) + 511] != 0x33))\r
603                                 {\r
604                                         prep += 1;\r
605                                         sdActive -= 1;\r
606                                 }\r
607                         }\r
608 \r
609                         if (!sdActive &&\r
610                                 (prep - i < buffers) &&\r
611                                 (prep < totalSDSectors) &&\r
612                                 ((totalSDSectors - prep) >= sdPerScsi) &&\r
613                                 (likely(!useSlowDataCount) || scsiPhyComplete()) &&\r
614                                 (HAL_SD_GetState(&hsd) != HAL_SD_STATE_BUSY)) // rx complete but IRQ not fired yet.\r
615                         {\r
616                                 // Start an SD transfer if we have space.\r
617                                 uint32_t startBuffer = prep % buffers;\r
618                                 uint32_t sectors = totalSDSectors - prep;\r
619                                 uint32_t freeBuffers = buffers - (prep - i);\r
620 \r
621                                 uint32_t contiguousBuffers = buffers - startBuffer;\r
622                                 freeBuffers = freeBuffers < contiguousBuffers\r
623                                         ? freeBuffers : contiguousBuffers;\r
624                                 sectors = sectors < freeBuffers ? sectors : freeBuffers;\r
625 \r
626                                 if (sectors > 128) sectors = 128; // 65536 DMA limit !!\r
627 \r
628                                 // Round-down when we have odd sector sizes.\r
629                                 if (sdPerScsi != 1)\r
630                                 {\r
631                                         sectors = (sectors / sdPerScsi) * sdPerScsi;\r
632                                 }\r
633 \r
634                                 for (int dodgy = 0; dodgy < sectors; dodgy++)\r
635                                 {\r
636                                         scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 510] = 0xAA;\r
637                                         scsiDev.data[SD_SECTOR_SIZE * (startBuffer + dodgy) + 511] = 0x33;\r
638                                 }\r
639 \r
640                                 sdReadDMA(sdLBA + prep, sectors, &scsiDev.data[SD_SECTOR_SIZE * startBuffer]);\r
641 \r
642                                 sdActive = sectors;\r
643 \r
644                                 if (useSlowDataCount)\r
645                                 {\r
646                                         scsiSetDataCount((sectors / sdPerScsi) * bytesPerSector);\r
647                                 }\r
648 \r
649                                 // Wait now that the SD card is busy\r
650                                 // Chances are we've probably already waited sufficient time,\r
651                                 // but it's hard to measure microseconds cheaply. So just wait\r
652                                 // extra just-in-case. Hopefully it's in parallel with dma.\r
653                                 if (phaseChangeDelayUs > 0)\r
654                                 {\r
655                                         s2s_delay_us(phaseChangeDelayUs);\r
656                                         phaseChangeDelayUs = 0;\r
657                                 }\r
658                         }\r
659 \r
660                         if ((prep - i) > 0)\r
661                         {\r
662                                 int dmaBytes = SD_SECTOR_SIZE;\r
663                                 if ((i % sdPerScsi) == (sdPerScsi - 1))\r
664                                 {\r
665                                         dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
666                                         if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
667                                 }\r
668 \r
669                                 uint8_t* scsiDmaData = &(scsiDev.data[SD_SECTOR_SIZE * (i % buffers)]);\r
670                                 scsiWritePIO(scsiDmaData, dmaBytes);\r
671 \r
672                                 ++i;\r
673                         }\r
674                 }\r
675 \r
676                 if (phaseChangeDelayUs > 0 && !scsiDev.resetFlag) // zero bytes ?\r
677                 {\r
678                         s2s_delay_us(phaseChangeDelayUs);\r
679                         phaseChangeDelayUs = 0;\r
680                 }\r
681 \r
682                 // We've finished transferring the data to the FPGA, now wait until it's\r
683                 // written to he SCSI bus.\r
684                 __disable_irq();\r
685                 while (!scsiPhyComplete() &&\r
686                         likely(scsiDev.phase == DATA_IN) &&\r
687                         likely(!scsiDev.resetFlag))\r
688                 {\r
689                         __WFI();\r
690                 }\r
691                 __enable_irq();\r
692 \r
693                 while (HAL_SD_GetState(&hsd) == HAL_SD_STATE_BUSY)\r
694                 {\r
695                         // Wait while keeping BSY.\r
696                 }\r
697 \r
698                 if (scsiDev.phase == DATA_IN)\r
699                 {\r
700                         scsiDev.phase = STATUS;\r
701                 }\r
702                 scsiDiskReset();\r
703         }\r
704         else if (scsiDev.phase == DATA_OUT &&\r
705                 transfer.currentBlock != transfer.blocks)\r
706         {\r
707                 scsiEnterPhase(DATA_OUT);\r
708 \r
709                 const int sdPerScsi = SDSectorsPerSCSISector(bytesPerSector);\r
710                 int totalSDSectors = transfer.blocks * sdPerScsi;\r
711                 uint32_t sdLBA =\r
712                         SCSISector2SD(\r
713                                 scsiDev.target->cfg->sdSectorStart,\r
714                                 bytesPerSector,\r
715                                 transfer.lba);\r
716                 int i = 0;\r
717                 int clearBSY = 0;\r
718         int disconnected = 0;\r
719 \r
720                 int parityError = 0;\r
721                 int enableParity = scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY;\r
722 \r
723                 uint32_t maxSectors = sizeof(scsiDev.data) / SD_SECTOR_SIZE;\r
724 \r
725                 static_assert(SCSI_XFER_MAX >= sizeof(scsiDev.data), "Assumes SCSI_XFER_MAX >= sizeof(scsiDev.data)");\r
726 \r
727                 // Start reading and filling fifos as soon as possible.\r
728                 // It's highly unlikely that someone is going to use huge transfers\r
729                 // per scsi command, but if they do it'll be slower than usual.\r
730                 // Note: Happens in Macintosh FWB HDD Toolkit benchmarks which default\r
731                 // to 768kb\r
732                 uint32_t totalTransferBytes = transfer.blocks * bytesPerSector;\r
733                 int useSlowDataCount = totalTransferBytes >= SCSI_XFER_MAX;\r
734                 if (!useSlowDataCount)\r
735                 {\r
736                         DWT->CYCCNT = 0; // Start counting cycles\r
737                         scsiSetDataCount(totalTransferBytes);\r
738                 }\r
739 \r
740                 while ((i < totalSDSectors) &&\r
741                         likely(scsiDev.phase == DATA_OUT) &&\r
742                         likely(!scsiDev.resetFlag))\r
743                         // KEEP GOING to ensure FIFOs are in a good state.\r
744                         // likely(!parityError || !enableParity))\r
745                 {\r
746                         if (bytesPerSector == SD_SECTOR_SIZE)\r
747                         {\r
748                 uint32_t maxXferSectors = SCSI_XFER_MAX / SD_SECTOR_SIZE;\r
749                             uint32_t rem = totalSDSectors - i;\r
750                         uint32_t sectors = rem < maxXferSectors ? rem : maxXferSectors;\r
751 \r
752                                 uint32_t totalBytes = sectors * SD_SECTOR_SIZE;\r
753 \r
754                                 if (useSlowDataCount)\r
755                                 {\r
756                                         scsiSetDataCount(totalBytes);\r
757                                 }\r
758 \r
759                                 HAL_SD_WriteBlocks_DMA(&hsd, i + sdLBA, sectors);\r
760                 int j = 0;\r
761                 int prep = 0;\r
762                 int sdActive = 0;\r
763                                 uint32_t dmaFinishTime = 0;\r
764                 while (j < sectors && !scsiDev.resetFlag)\r
765                 {\r
766                     if (sdActive &&\r
767                         HAL_SD_GetState(&hsd) != HAL_SD_STATE_BUSY)\r
768                     {\r
769                         HAL_SD_CardStateTypeDef tmpCardState = HAL_SD_GetCardState(&hsd);\r
770                         if (tmpCardState != HAL_SD_CARD_PROGRAMMING)\r
771                         {\r
772                             j += sdActive;\r
773                             sdActive = 0;\r
774                         }\r
775                     }\r
776                                 if (!sdActive && ((prep - j) > 0))\r
777                                 {\r
778                                         // Start an SD transfer if we have space.\r
779                                         HAL_SD_WriteBlocks_Data(&hsd, &scsiDev.data[SD_SECTOR_SIZE * (j % maxSectors)]);\r
780 \r
781                                         sdActive = 1;\r
782                                 }\r
783 \r
784                     if (((prep - j) < maxSectors) &&\r
785                         (prep < sectors) &&\r
786                         scsiFifoReady())\r
787                     {\r
788                                     scsiReadPIO(\r
789                                                 &scsiDev.data[(prep % maxSectors) * SD_SECTOR_SIZE],\r
790                                                 SD_SECTOR_SIZE,\r
791                                                 &parityError);\r
792                         prep++;\r
793                         if (prep == sectors)\r
794                         {\r
795                                             dmaFinishTime = s2s_getTime_ms();\r
796                         }\r
797                     }\r
798                                 \r
799                     if (i + prep >= totalSDSectors &&\r
800                         !disconnected &&\r
801                                             (!parityError || !enableParity) &&\r
802                         s2s_elapsedTime_ms(dmaFinishTime) >= 180)\r
803                                 {\r
804                                         // We're transferring over the SCSI bus faster than the SD card\r
805                                         // can write.  All data is buffered, and we're just waiting for\r
806                                         // the SD card to complete. The host won't let us disconnect.\r
807                                         // Some drivers set a 250ms timeout on transfers to complete.\r
808                                             // SD card writes are supposed to complete\r
809                                         // within 200ms, but sometimes they don't.\r
810                                         // Just pretend we're finished.\r
811                                         process_Status();\r
812                                         clearBSY = process_MessageIn(0); // Will go to BUS_FREE state but keep BSY asserted.\r
813                         disconnected = 1;\r
814                                 }\r
815                 }\r
816 \r
817                 if (scsiDev.resetFlag)\r
818                 {\r
819                     HAL_SD_Abort(&hsd);\r
820                 }\r
821                 else\r
822                 {\r
823                     while (HAL_SD_GetState(&hsd) == HAL_SD_STATE_BUSY) {} // Waits for DMA to complete\r
824                     SDMMC_CmdStopTransfer(hsd.Instance);\r
825                 }\r
826 \r
827                 HAL_SD_CardStateTypeDef cardState = HAL_SD_GetCardState(&hsd);\r
828                 while ((cardState == HAL_SD_CARD_PROGRAMMING || cardState == HAL_SD_CARD_RECEIVING) &&\r
829                                         s2s_elapsedTime_ms(dmaFinishTime) < 180)\r
830                 {\r
831                     // Wait while the SD card is writing buffer to flash\r
832                     // The card may remain in the RECEIVING state (even though it's programming) if\r
833                     // it has buffer space to receive more data available.\r
834                     cardState = HAL_SD_GetCardState(&hsd);\r
835                 }\r
836 \r
837                                 if (!disconnected && \r
838                     i + sectors >= totalSDSectors &&\r
839                                         (!parityError || !enableParity))\r
840                                 {\r
841                                         // We're transferring over the SCSI bus faster than the SD card\r
842                                         // can write.  All data is buffered, and we're just waiting for\r
843                                         // the SD card to complete. The host won't let us disconnect.\r
844                                         // Some drivers set a 250ms timeout on transfers to complete.\r
845                                         // SD card writes are supposed to complete\r
846                                         // within 200ms, but sometimes they don't.\r
847                                         // Just pretend we're finished.\r
848                                         process_Status();\r
849                                         clearBSY = process_MessageIn(0); // Will go to BUS_FREE state but keep BSY asserted.\r
850                                 }\r
851 \r
852                 cardState = HAL_SD_GetCardState(&hsd);\r
853                 while (cardState == HAL_SD_CARD_PROGRAMMING || cardState == HAL_SD_CARD_RECEIVING) \r
854                 {\r
855                     // Wait while the SD card is writing buffer to flash\r
856                     // The card may remain in the RECEIVING state (even though it's programming) if\r
857                     // it has buffer space to receive more data available.\r
858                     cardState = HAL_SD_GetCardState(&hsd);\r
859                 }\r
860 \r
861                                 i += sectors;\r
862                         }\r
863                         else\r
864                         {\r
865                                 // Well, until we have some proper non-blocking SD code, we must\r
866                                 // do this in a half-duplex fashion. We need to write as much as\r
867                                 // possible in each SD card transaction.\r
868                                 // use sg_dd from sg_utils3 tools to test.\r
869 \r
870                             uint32_t rem = totalSDSectors - i;\r
871                         uint32_t sectors = rem < maxSectors ? rem : maxSectors;\r
872 \r
873                                 if (useSlowDataCount)\r
874                                 {\r
875                                         scsiSetDataCount(sectors * bytesPerSector);\r
876                                 }\r
877 \r
878                                 for (int scsiSector = i; scsiSector < i + sectors; ++scsiSector)\r
879                                 {\r
880                                         int dmaBytes = SD_SECTOR_SIZE;\r
881                                         if ((scsiSector % sdPerScsi) == (sdPerScsi - 1))\r
882                                         {\r
883                                                 dmaBytes = bytesPerSector % SD_SECTOR_SIZE;\r
884                                                 if (dmaBytes == 0) dmaBytes = SD_SECTOR_SIZE;\r
885                                         }\r
886 \r
887                                         scsiReadPIO(&scsiDev.data[SD_SECTOR_SIZE * (scsiSector - i)], dmaBytes, &parityError);\r
888                                 }\r
889                                 if (!parityError || !enableParity)\r
890                                 {\r
891                                         BSP_SD_WriteBlocks_DMA(&scsiDev.data[0], i + sdLBA, sectors);\r
892                                 }\r
893                                 i += sectors;\r
894                         }\r
895                 }\r
896 \r
897                 // Should already be complete here as we've ready the FIFOs\r
898                 // by now. Check anyway.\r
899                 __disable_irq();\r
900                 while (!scsiPhyComplete() && likely(!scsiDev.resetFlag))\r
901                 {\r
902                         __WFI();\r
903                 }\r
904                 __enable_irq();\r
905 \r
906                 if (clearBSY)\r
907                 {\r
908                         enter_BusFree();\r
909                 }\r
910 \r
911                 if (scsiDev.phase == DATA_OUT)\r
912                 {\r
913                         if (parityError &&\r
914                                 (scsiDev.boardCfg.flags & S2S_CFG_ENABLE_PARITY))\r
915                         {\r
916                                 scsiDev.target->sense.code = ABORTED_COMMAND;\r
917                                 scsiDev.target->sense.asc = SCSI_PARITY_ERROR;\r
918                                 scsiDev.status = CHECK_CONDITION;;\r
919                         }\r
920                         scsiDev.phase = STATUS;\r
921                 }\r
922                 scsiDiskReset();\r
923         }\r
924 }\r
925 \r
926 void scsiDiskReset()\r
927 {\r
928         scsiDev.dataPtr = 0;\r
929         scsiDev.savedDataPtr = 0;\r
930         scsiDev.dataLen = 0;\r
931         // transfer.lba = 0; // Needed in Request Sense to determine failure\r
932         transfer.blocks = 0;\r
933         transfer.currentBlock = 0;\r
934 \r
935         // Cancel long running commands!\r
936 #if 0\r
937         if (\r
938                 ((scsiDev.boardCfg.flags & S2S_CFG_ENABLE_CACHE) == 0) ||\r
939                         (transfer.multiBlock == 0)\r
940                 )\r
941 #endif\r
942         {\r
943                 sdCompleteTransfer();\r
944         }\r
945 \r
946         transfer.multiBlock = 0;\r
947 }\r
948 \r
949 void scsiDiskInit()\r
950 {\r
951         scsiDiskReset();\r
952 \r
953         // Don't require the host to send us a START STOP UNIT command\r
954         blockDev.state = DISK_STARTED;\r
955 }\r
956 \r