diff options
author | Ludovic Pouzenc <lpouzenc@gmail.com> | 2015-07-04 22:31:56 +0200 |
---|---|---|
committer | Ludovic Pouzenc <lpouzenc@gmail.com> | 2015-07-04 22:31:56 +0200 |
commit | 5534a6c1353b00cfebe6f8a155b6420687858843 (patch) | |
tree | 9b4a8cf9d16642ab3e77eaffa8835ce8b4279c29 | |
parent | 32d4e844404295546e5623251037c09f83130981 (diff) | |
download | raidguessfs-5534a6c1353b00cfebe6f8a155b6420687858843.tar.gz raidguessfs-5534a6c1353b00cfebe6f8a155b6420687858843.tar.bz2 raidguessfs-5534a6c1353b00cfebe6f8a155b6420687858843.zip |
myraid: lecture RAID sur disque de donnée ou par calcul de parité
-rw-r--r-- | mydisks.py | 5 | ||||
-rw-r--r-- | myraid.py | 224 | ||||
-rwxr-xr-x | raidguessfs.py | 6 |
3 files changed, 137 insertions, 98 deletions
@@ -68,6 +68,11 @@ class MyDisks(): self.disks_size[d] = 0 logging.debug("Exit. open_disks()") + + def is_readable(self,disk_no,offset,size): + import random + return random.randint(0,100) > 1 # FIXME : implement this (parse ddrescue log files) + def read(self,disk_no,offset,size): self.disks[disk_no].seek(offset) return self.disks[disk_no].read(size) @@ -19,21 +19,66 @@ # along with RaidGuessFS. If not, see <http://www.gnu.org/licenses/> import logging, numpy +import mydisks class MyRaid(): """Auxiliary class, managing RAID layer""" RAID_TYPES = [ '0', '1', '5', '5+0' ] RAID5_LAYOUTS = [ 'la', 'ra', 'ls', 'rs' ] + @staticmethod + def xor_blocks(fd_list, offset, size): + """Compute bitwise XOR against a bunch of disks slice""" + logging.info("Enter xor_blocks(fd_list(%i),0x%011x,%d)"%(len(fd_list), offset, size)) + + if size % 8 != 0: + raise ValueError('xor_blocks : size must be multiple of 8') + dt = numpy.dtype('<Q8') + + fd_list[0].seek(offset) + str_b1=fd_list[0].read(size) + numpy_b1 = numpy.fromstring(str_b1, dtype=dt) + all_zero = (numpy.count_nonzero(numpy_b1) == 0 ) + any_zero = all_zero + + for fd in fd_list[1:]: + fd.seek(offset) + str_b2=fd.read(size) + numpy_b2 = numpy.fromstring(str_b2, dtype=dt) + b2_zero = (numpy.count_nonzero(numpy_b2) == 0 ) + if all_zero == True: + all_zero = b2_zero + if any_zero == False: + any_zero = b2_zero + + numpy.bitwise_xor(numpy_b1,numpy_b2,numpy_b1) + + if all_zero == True: + result = 'z' + elif numpy.count_nonzero(numpy_b1) == 0: + if any_zero: + result = 'g' + else: + result = 'G' + else: + result = 'b' + + logging.info("Exit. xor_blocks(fd_list,%d,%d)"%(offset, size)) + #import binascii + #logging.debug(binascii.hexlify(numpy_b1)) + return (result,numpy_b1) + + def __init__(self, *args, **kwargs): - self.disks = [] + self.d = None self.raid_start = 0 self.raid_end = 0 - self.raid_sector_size = 512 + self.raid_sector_size = 512 # TODO : should be self.d.sector_size self.raid_chunk_size = 65536 self.raid_disk_order = [] self.raid_disk_count = 0 self.raid_layout = 'ls' + self.raid_disks = [] def get_raid_start(self): return self.raid_start @@ -53,8 +98,10 @@ class MyRaid(): def get_raid_layout(self): return self.raid_layout - def set_disks(self, disks): - self.disks = disks + def set_disks(self, new_mydisks): + # FIXME : self.d don't need to be updaed (pass on __init__) + self.d = new_mydisks + self.set_raid_disk_order(range(self.d.disk_count)) def set_raid_start(self, new_raid_start): """Update the start offset of raid data on underlying disks""" @@ -70,19 +117,19 @@ class MyRaid(): def set_raid_disk_order(self, new_raid_disk_order): """Update the raid logical disk order""" - card=len(self.disks) - check=[0]*card + check=[0] * self.d.disk_count for item in new_raid_disk_order: d = int(item) - if not 0 <= d < card: - raise ValueError('Value out of range : %i [0,%i]'%(d,card-1)) + if not 0 <= d < self.d.disk_count: + raise ValueError('Value out of range : %i [0,%i]'%(d,self.d.disk_count-1)) check[d]=check[d]+1 - for d in range(card): + for d in range(self.d.disk_count): if check[d] != 1 and check[d] != 0: raise ValueError('Disk %i appears %i times (must be 0 or 1)'%(d,check[d])) - self.raid_disk_order = new_raid_disk_order self.raid_disk_count = len(new_raid_disk_order) + self.raid_disk_order = new_raid_disk_order + self.raid_disks = [ self.d.disks[i] for i in self.raid_disk_order ] def set_raid_layout(self, new_raid_layout): if new_raid_layout in MyRaid.RAID5_LAYOUTS: @@ -91,88 +138,50 @@ class MyRaid(): raise ValueError('raid_layout has to be one of %s'%' '.join(RAID_LAYOUTS)) def sizeof_raid_result(self, raid_type): - size = self.raid_end - self.raid_start - if size <= 0 : - return 0 - else: - return { - '0' : size * self.raid_disk_count, - '1' : size if self.raid_disk_count == 2 else 0, - '5' : size * (self.raid_disk_count - 1) if self.raid_disk_count >= 3 else 0, - '5+0': size * (self.raid_disk_count - 2) if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 0, - }[raid_type] + size = max(0, self.raid_end - self.raid_start) + return { + '0' : size * self.raid_disk_count, + '1' : size if self.raid_disk_count == 2 else 0, + '5' : size * (self.raid_disk_count - 1) if self.raid_disk_count >= 3 else 0, + '5+0': size * (self.raid_disk_count - 2) if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 0, + }[raid_type] def sizeof_disk_xor(self, raid_type): - size = self.raid_end - self.raid_start - if size <= 0: - return 0 - else: - return { - '0' : 0, # TODO Could contain some plain text error message - '1' : size if self.raid_disk_count == 2 else 0, - '5' : size if self.raid_disk_count >= 3 else 0, - '5+0': size if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 0, - }[raid_type] + return max(0, self.raid_end - self.raid_start) def sizeof_disk_parity(self, raid_type): - return self.sizeof_disk_xor(raid_type) / self.raid_sector_size * 16 - - def xor_blocks(self,fd_list, offset, size): - """Compute bitwise XOR against a bunch of disks slice""" - logging.info("Enter xor_blocks(fd_list,%d,%d)"%(offset, size)) - - if size % 8 != 0: - raise ValueError('xor_blocks : size must be multiple of 8') - dt = numpy.dtype('<Q8') - - fd_list[0].seek(offset) - str_b1=fd_list[0].read(size) - numpy_b1 = numpy.fromstring(str_b1, dtype=dt) - all_zero = (numpy.count_nonzero(numpy_b1) == 0 ) - any_zero = all_zero - - for fd in fd_list[1:]: - fd.seek(offset) - str_b2=fd.read(size) - numpy_b2 = numpy.fromstring(str_b2, dtype=dt) - b2_zero = (numpy.count_nonzero(numpy_b2) == 0 ) - if all_zero == True: - all_zero = b2_zero - if any_zero == False: - any_zero = b2_zero - - numpy.bitwise_xor(numpy_b1,numpy_b2,numpy_b1) - - if all_zero == True: - result = 'z' - elif numpy.count_nonzero(numpy_b1) == 0: - if any_zero: - result = 'g' - else: - result = 'G' - else: - result = 'b' - - logging.info("Exit. xor_blocks(fd_list,%d,%d)"%(offset, size)) - #import binascii - #logging.warn(binascii.hexlify(numpy_b1)) - return (result,numpy_b1) + size = max(0, self.raid_end - self.raid_start) / self.raid_sector_size * 16 + return { + '0' : 64, + '1' : size if self.raid_disk_count == 2 else 64, + '5' : size if self.raid_disk_count >= 3 else 64, + '5+0': size if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 else 64, + }[raid_type] def read_disk_xor(self,raid_type,offset,size): - raid_disks = [ self.disks[i] for i in self.raid_disk_order ] - return self.xor_blocks(raid_disks,offset,size)[1].tostring() + """Returns raw bitwise XOR against a bunch of disks slice""" + return MyRaid.xor_blocks(self.raid_disks,offset,size)[1].tostring() def read_disk_parity(self,raid_type,offset,size): """Returns textual information about parity status of each sector""" logging.warn("Enter read_disk_parity(%s,%d,%d)"%(raid_type,offset,size)) - raid_disks = [ self.disks[i] for i in self.raid_disk_order ] + msg = { + '0' : 'There no notion of parity in RAID 0 mode\n', + '1' : None if self.raid_disk_count == 2 else 'Wrong disk count (should be 2)\n', + '5' : None if self.raid_disk_count >= 3 else 'Wrong disk count (should be >=3)\n', + '5+0': None if self.raid_disk_count >= 6 and self.raid_disk_count % 2 == 0 + else 'Wrong disk count (should be >=6 and even)\n', + }[raid_type] + if msg: + return msg[offset:offset+size] + start = self.raid_start + offset * self.raid_sector_size / 16 end = start + size * self.raid_sector_size / 16 #TODO : improove for nested levels if raid_type in ['1','5', '5+0']: result = ''.join( - [ '0x%011x %c\n'%( addr, self.xor_blocks(raid_disks, addr, self.raid_sector_size)[0]) + [ '0x%011x %c\n'%( addr, MyRaid.xor_blocks(self.raid_disks, addr, self.raid_sector_size)[0]) for addr in range(start, end, self.raid_sector_size) ]) else: @@ -184,15 +193,12 @@ class MyRaid(): def read_raid_result(self,raid_type,offset,size): """Returns actual RAID data""" - raid_disks = [ self.disks[i] for i in self.raid_disk_order ] # TODO A garder en attribut ? - disk_count = len(self.raid_disk_order) # TODO doublon ? - if raid_type == '0': segment_no = offset / self.raid_chunk_size segment_off = offset % self.raid_chunk_size - stripe_no = segment_no / disk_count + stripe_no = segment_no / self.raid_disk_count par_disk = -1 - data_disk = segment_no % disk_count + data_disk = segment_no % self.raid_disk_count off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset) @@ -208,35 +214,63 @@ class MyRaid(): elif raid_type == '5': segment_no = offset / self.raid_chunk_size segment_off = offset % self.raid_chunk_size - stripe_no = segment_no / (disk_count-1) + stripe_no = segment_no / (self.raid_disk_count-1) if self.raid_layout in ['ls','la']: - par_disk = (disk_count-1) - (stripe_no % disk_count) + par_disk = (self.raid_disk_count-1) - (stripe_no % self.raid_disk_count) else: # self.raid_layout in ['rs','ra']: - par_disk = stripe_no % disk_count + par_disk = stripe_no % self.raid_disk_count if self.raid_layout in ['ls','rs']: - data_disk = (par_disk+1 + (segment_no % (disk_count-1)) ) % disk_count + data_disk = (par_disk+1 + (segment_no % (self.raid_disk_count-1)) ) % self.raid_disk_count else: # self.raid_layout in ['la','ra']: - data_disk = segment_no % (disk_count-1) + data_disk = segment_no % (self.raid_disk_count-1) if data_disk >= par_disk: data_disk = data_disk + 1 off_disk = self.raid_start + stripe_no * self.raid_chunk_size + segment_off + # Note : self make shorter read than asked but convince the reader to be chunck aligned, which is great size2 = min(size, (segment_no+1) * self.raid_chunk_size - offset) + else: + raise Exception('Unimplemented read_raid_result() for raid_type == %s', raid_type) - logging.info("raid.read_result(%s): offset=%d,segment_no=%d,segment_off=%d,stripe_no=%d,par_disk=%d,data_disk=%d,off_disk=%d,size2=%d,segment_off+size2=%d" + logging.debug("raid.read_result(%s): offset=%d,segment_no=%d,segment_off=%d,stripe_no=%d,par_disk=%d,data_disk=%d,off_disk=%d,size2=%d,segment_off+size2=%d" % (raid_type,offset,segment_no,segment_off,stripe_no,par_disk,data_disk,off_disk,size2,segment_off+size2) ) - #TODO recorver from parity if damaged sectors in data_disk - data_fd = raid_disks[data_disk] - data_fd.seek(off_disk) - data = data_fd.read(size2) + data_fd = self.raid_disks[data_disk] - # This kills performance but don't make short reads before EOF - #if size2 > 0 and size2 < size: - # data += self.read_result(self,raid_type,offset+size2,size-size2) + if self.d.is_readable(self.raid_disk_order[data_disk],off_disk,size2): + # No damaged sectors until the end of the chunck, so just read the data disk + data_fd.seek(off_disk) + data = data_fd.read(size2) + else: + logging.warn('Try to recovering damaged chunck (raid_offset: 0x%011x, data_disk: %i, disk_offset: 0x%011x' + % (offset, self.raid_disk_order[data_disk], off_disk) ) + # Damaged sectors, check / recover every sector + other_disks = list(self.raid_disk_order) + other_disks.remove(self.raid_disk_order[data_disk]) + other_fds = list(self.raid_disks) + other_fds.remove(data_fd) + + data_arr = [] + for s in range(off_disk, off_disk+size2, self.raid_sector_size): + if self.d.is_readable(self.raid_disk_order[data_disk],s,self.raid_sector_size): + # Current sector is readable from data disk, read it + logging.debug('-> 0x%011x : readable'%s) + data_fd.seek(off_disk) + data_arr.append(data_fd.read(self.raid_sector_size)) + else: + # Current sector is dead on data disk, recover it if possible + recoverable = reduce(lambda a,b: a and b, [ + self.d.is_readable(other_disk,off_disk,self.raid_sector_size) for other_disk in other_disks + ]) + if recoverable: + logging.info('-> 0x%011x : recoverable'%s) + data_arr.append( MyRaid.xor_blocks(other_fds, s,self.raid_sector_size)[1].tostring() ) + else: + logging.warn('-> 0x%011x : unrecoverable'%s) + data_arr.append( '\0' * self.raid_sector_size) + data = ''.join(data_arr) return data - diff --git a/raidguessfs.py b/raidguessfs.py index 409ef0c..94f36e7 100755 --- a/raidguessfs.py +++ b/raidguessfs.py @@ -147,7 +147,7 @@ class RaidGuessFS(fuse.Fuse): self.d.set_disk_count(i) self.d.open_disks() self._refresh_disk_dentries() - self.raid.set_disks(self.d.disks) + self.raid.set_disks(self.d) self.raid.set_raid_end(min(self.d.disks_size)-1) self.update_raid_disk_order(range(i)) @@ -372,8 +372,8 @@ RaidGuessFS is a pseudo-filesystem that allows to guess parameters and disk orde LOG_FILENAME = "raidguessfs.log" #logging.basicConfig(filename=LOG_FILENAME,level=logging.WARN,) - #logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,) - logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,) + logging.basicConfig(filename=LOG_FILENAME,level=logging.INFO,) + #logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG,) server = RaidGuessFS(version="%prog " + fuse.__version__,usage=usage,dash_s_do='setsingle') server.multithreaded = False |