好不容易结束了sd_spinup_disk(),马上我们就遇到了三座大山.它们是sd_read_capacity(),sd_read_write_protect_flag(),sd_read_cache_type(),要继续往下看,我们不得不先推翻这三座大山.旧的三座大山已经在*的英明领导下成功推翻了,但是今天我们的人民却身处新三座大山的压迫之下,眼前这三个函数堪比臭名昭著的房改医改教改.要知道整个sd.c这个文件也不过是1900行,可是光这三个函数就占了360行,你不服不行啊!
第一座大山,sd_read_capacity.
1130 /*
1131 * read disk capacity
1132 */
1133 static void
1134 sd_read_capacity(struct scsi_disk *sdkp, unsigned char *buffer)
1135 {
1136 unsigned char cmd[16];
1137 int the_result, retries;
1138 int sector_size = 0;
1139 int longrc = 0;
1140 struct scsi_sense_hdr sshdr;
1141 int sense_valid = 0;
1142 struct scsi_device *sdp = sdkp->device;
1143
1144 repeat:
1145 retries = 3;
1146 do {
1147 if (longrc) {
1148 memset((void *) cmd, 0, 16);
1149 cmd[0] = SERVICE_ACTION_IN;
1150 cmd[1] = SAI_READ_CAPACITY_16;
1151 cmd[13] = 12;
1152 memset((void *) buffer, 0, 12);
1153 } else {
1154 cmd[0] = READ_CAPACITY;
1155 memset((void *) &cmd[1], 0, 9);
1156 memset((void *) buffer, 0, 8);
1157 }
1158
1159 the_result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
1160 buffer, longrc ? 12 : 8, &sshdr,
1161 SD_TIMEOUT, SD_MAX_RETRIES);
1162
1163 if (media_not_present(sdkp, &sshdr))
1164 return;
1165
1166 if (the_result)
1167 sense_valid = scsi_sense_valid(&sshdr);
1168 retries--;
1169
1170 } while (the_result && retries);
1171
1172 if (the_result && !longrc) {
1173 sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY failed/n");
1174 sd_print_result(sdkp, the_result);
1175 if (driver_byte(the_result) & DRIVER_SENSE)
1176 sd_print_sense_hdr(sdkp, &sshdr);
1177 else
1178 sd_printk(KERN_NOTICE, sdkp, "Sense not available./n");
1179
1180 /* Set dirty bit for removable devices if not ready -
1181 * sometimes drives will not report this properly. */
1182 if (sdp->removable &&
1183 sense_valid && sshdr.sense_key == NOT_READY)
1184 sdp->changed = 1;
1185
1186 /* Either no media are present but the drive didn't tell us,
1187 or they are present but the read capacity command fails */
1188 /* sdkp->media_present = 0; -- not always correct */
1189 sdkp->capacity = 0; /* unknown mapped to zero - as usual */
1190
1191 return;
1192 } else if (the_result && longrc) {
1193 /* READ CAPACITY(16) has been failed */
1194 sd_printk(KERN_NOTICE, sdkp, "READ CAPACITY(16) failed/n");
1195 sd_print_result(sdkp, the_result);
1196 sd_printk(KERN_NOTICE, sdkp, "Use 0xffffffff as device size/n");
1197
1198 sdkp->capacity = 1 + (sector_t) 0xffffffff;
1199 goto got_data;
1200 }
1201
1202 if (!longrc) {
1203 sector_size = (buffer[4] << 24) |
1204 (buffer[5] << 16) | (buffer[6] << 8) | buffer[7];
1205 if (buffer[0] == 0xff && buffer[1] == 0xff &&
1206 buffer[2] == 0xff && buffer[3] == 0xff) {
1207 if(sizeof(sdkp->capacity) > 4) {
1208 sd_printk(KERN_NOTICE, sdkp, "Very big device. "
1209 "Trying to use READ CAPACITY(16)./n");
1210 longrc = 1;
1211 goto repeat;
1212 }
1213 sd_printk(KERN_ERR, sdkp, "Too big for this kernel. Use "
1214 "a kernel compiled with support for large "
1215 "block devices./n");
1216 sdkp->capacity = 0;
1217 goto got_data;
1218 }
1219 sdkp->capacity = 1 + (((sector_t)buffer[0] << 24) |
1220 (buffer[1] << 16) |
1221 (buffer[2] << 8) |
1222 buffer[3]);
1223 } else {
1224 sdkp->capacity = 1 + (((u64)buffer[0] << 56) |
1225 ((u64)buffer[1] << 48) |
1226 ((u64)buffer[2] << 40) |
1227 ((u64)buffer[3] << 32) |
1228 ((sector_t)buffer[4] << 24) |
1229 ((sector_t)buffer[5] << 16) |
1230 ((sector_t)buffer[6] << 8) |
1231 (sector_t)buffer[7]);
1232
1233 sector_size = (buffer[8] << 24) |
1234 (buffer[9] << 16) | (buffer[10] << 8) | buffer[11];
1235 }
1236
1237 /* Some devices return the total number of sectors, not the
1238 * highest sector number. Make the necessary adjustment. */
1239 if (sdp->fix_capacity) {
1240 --sdkp->capacity;
1241
1242 /* Some devices have version which report the correct sizes
1243 * and others which do not. We guess size according to a heuristic
1244 * and err on the side of lowering the capacity. */
1245 } else {
1246 if (sdp->guess_capacity)
1247 if (sdkp->capacity & 0x01) /* odd sizes are odd */
1248 --sdkp->capacity;
1249 }
1250
1251 got_data:
1252 if (sector_size == 0) {
1253 sector_size = 512;
1254 sd_printk(KERN_NOTICE, sdkp, "Sector size 0 reported, "
1255 "assuming 512./n");
1256 }
1257
1258 if (sector_size != 512 &&
1259 sector_size != 1024 &&
1260 sector_size != 2048 &&
1261 sector_size != 4096 &&
1262 sector_size != 256) {
1263 sd_printk(KERN_NOTICE, sdkp, "Unsupported sector size %d./n",
1264 sector_size);
1265 /*
1266 * The user might want to re-format the drive with
1267 * a supported sectorsize. Once this happens, it
1268 * would be relatively trivial to set the thing up.
1269 * For this reason, we leave the thing in the table.
1270 */
1271 sdkp->capacity = 0;
1272 /*
1273 * set a bogus sector size so the normal read/write
1274 * logic in the block layer will eventually refuse any
1275 * request on this device without tripping over power
1276 * of two sector size assumptions
1277 */
1278 sector_size = 512;
1279 }
1280 {
1281 /*
1282 * The msdos fs needs to know the hardware sector size
1283 * So I have created this table. See ll_rw_blk.c
1284 * Jacques Gelinas (Jacques@solucorp.qc.ca)
1285 */
1286 int hard_sector = sector_size;
1287 sector_t sz = (sdkp->capacity/2) * (hard_sector/256);
1288 request_queue_t *queue = sdp->request_queue;
1289 sector_t mb = sz;
1290
1291 blk_queue_hardsect_size(queue, hard_sector);
1292 /* avoid 64-bit division on 32-bit platforms */
1293 sector_div(sz, 625);
1294 mb -= sz - 974;
1295 sector_div(mb, 1950);
1296
1297 sd_printk(KERN_NOTICE, sdkp,
1298 "%llu %d-byte hardware sectors (%llu MB)/n",
1299 (unsigned long long)sdkp->capacity,
1300 hard_sector, (unsigned long long)mb);
1301 }
1302
1303 /* Rescale capacity to 512-byte units */
1304 if (sector_size == 4096)
1305 sdkp->capacity <<= 3;
1306 else if (sector_size == 2048)
1307 sdkp->capacity <<= 2;
1308 else if (sector_size == 1024)
1309 sdkp->capacity <<= 1;
1310 else if (sector_size == 256)
1311 sdkp->capacity >>= 1;
1312
1313 sdkp->device->sector_size = sector_size;
1314 }
洋洋洒洒200余行.简而言之,这个函数用一句话来表达就是知道这个磁盘的容量,或者专业一点说,发送READ CAPACITY命令.而熟悉SCSI命令集的兄弟们应该知道,很多SCSI命令都有至少两种版本,不同版本的命令格式会不一样,当然返回的信息量也不尽相同,比如READ CAPACITY命令就有10个字节的和16个字节的两个版本.在SBC-2的5.10节和5.11节分别介绍的是READ CAPACITY(10) command和READ CAPACITY(16) command.后者比前者多一个保护信息.但是在我们读之前我们并不知道该用哪个命令,所以这里的基本思路就是先用短命令,如果失败了就试一下长命令,这就是1211行goto repeat的目的.在goto repeat之前1210行设置了longrc为1.我们这里先给出来自SBC-2中对READ CAPACITY命令的格式定义:
我们可以用一个实例来描述这个命令,sg_readcap可以手工发送READ CAPACITY命令.下面是针对我的一个号称128M的U盘发送这个命令的结果.
[root@localhost ~]# sg_readcap /dev/sdc
Read Capacity results:
Last logical block address=257535 (0x3edff), Number of blocks=257536
Logical block length=512 bytes
Hence:
Device size: 131858432 bytes, 125.8 MiB, 0.13 GB
与此同时,我们结合代码来看,这个函数实际上比较麻烦的地方在于对buffer数组的判断.实际上buffer数组装载了READ CAPACITY命令的返回信息.而我们从1203行开始判断,首先我们知道这个buffer是我们在sd_revalidate_disk()中申请的.其大小为SD_BUF_SIZE,即512个字节.那么这个buffer的数据究竟是什么模样呢?SBC-2中Table-35对READ CAPACITY(10)的返回数据给出了如图的格式,
这里byte4,byte5,byte6,byte7共同描述了Block的大小.即所谓的扇区大小,或者说代码中的sector_size,大多数情况下我们看到的都是512bytes.这里我的这个U盘当然也属于这种情况.
RETURNED LOGICAL BLOCK ADDRESS就是告诉你这个设备有多少个Block,或者通俗点说,有多少个扇区.当然,更准确地说,如果你这个磁盘有N个Block,那么这里返回的是最后一个Block的编号,因为编号是从0开始,所以最后一个Block的编号就是N-1.所以这里返回的是N-1.而SBC-2规定,倘若byte0,byte1,byte2,byte3如果全为FF,那么说明READ CAPACITY(10)不足以读取这块磁盘的容量.这有点类似于传说中的缓冲区溢出.这种情况下再判断一下,如果sizeof(sdkp->capacity)确实大于4,那么这里溢出了我们goto repeat,改而发送READ CAPACITY(16).实际上,因为capacity是sector_t类型的,而在include/linux/types.h中,sector_t是这么定义的,
140 #ifdef CONFIG_LBD
141 typedef u64 sector_t;
142 #else
143 typedef unsigned long sector_t;
144 #endif
所以,sector_t的size有可能是是大于4的,也有可能是等于4的.如果等于4那就没办法了.只能设置capacity为0.我们没有办法记录下究竟有多少个扇区,那么我们大不了就不记录.(同时我们下面也可以看到几处我们设置了capacity为0,其目的都是一样,只做力所能及的事情,而不是强人所难,毕竟强扭的瓜不甜.)
当然如果没有溢出,那么就执行1219行,设置sdkp的capacity,刚才说了,它和byte0,byte1,byte2,byte3的共同作用的区别就是N和N-1的关系,所以这里我们看到需要加上1.因此sdkp->capacity记录的就是磁盘有多少个扇区.
而1223行这个else这一段,就是针对长命令的buffer进行处理的,因为SBC-2规定了,长命令的返回结果是下面这幅图这样的:
可以看出,这次byte0,byte1,…,byte7这8个byte共同作用来表示了Block数.而byte8,byte9,byte10,byte11共同作用表示了block的大小,或者说扇区大小.
1239行说的也就是N和N-1的那件事,有些设备不按常理出牌,它汇报的时候已经把那个1给包括进来了,所以这里咱们只能再减一,凡是有这种特殊需求的设备会设置fix_capacity.
1245行又是针对另外一些不按常理出牌的设备的应对措施.这个咱就飘过了.毕竟连磁盘的大小都要别人去猜这厂家也太无耻了.
1252行,对于那些内向的设备,我们只能假设它们是遵守游戏规则的,我们假设它们的扇区大小是大众化的512.
另一方面,1258行这一段,众所周知,扇区大小总是512,1024,2048,4096,最次的也是256.除此之外的设备基本上就可以去参加设备残奥会了,没必要拿出来丢人现眼.
1280行至1301行的目的在注释里说得很清楚,咱们可以飘过不理.只是需要注意1291行调用了blk_queue_hardsect_size(),这个函数非常的短,就是一句话,即把一个struct request_queue_t指针的成员hardsect_size的值设置为这里的参数hard_sector.还是那句话,基本上也就是设置成512,毕竟这是绝对主流.如果你的设备非要显示一下80后的与众不同的个性,那我也没办法.只是庄子曾经曰过:”莫装吊,装吊遭狗咬!”
1304行开始的这一段if-else if,就是针对sector_size调整一下capacity,因为capacity应该用来记录有多少个扇区,而我们希望在代码中统一使用512字节的扇区,(这也是Linux中的一贯规矩)所以这里需要按比例调整一下.即原本读出来是说有100个扇区,但是每个扇区比如是4096个字节,那么如果我们要以从软件角度来说以512字节进行访问,那么我就可以记录说这个磁盘有800个扇区.
最后,1313行,把sector_size也记录在sdkp的成员struct scsi_device指针device的sector_size内.
原文见:http://blog.csdn.net/fudan_abc/article/details/1927932