Line data Source code
1 : #include "clusterautoconfig.h"
2 :
3 : #include <unistd.h>
4 : #include <inttypes.h>
5 : #include <stdio.h>
6 : #include <stdint.h>
7 : #include <stdlib.h>
8 : #include <string.h>
9 : #include <errno.h>
10 : #include <libintl.h>
11 : #define _(String) gettext(String)
12 :
13 : #include <logging.h>
14 : #include "libgfs2.h"
15 : #include "osi_list.h"
16 : #include "fsck.h"
17 : #include "fs_recovery.h"
18 :
19 : static int rindex_modified = 0;
20 : static struct special_blocks false_rgrps;
21 : static struct osi_root rgcalc;
22 : /* Number of resource groups */
23 : static uint64_t nrgrp = 0;
24 :
25 : #define BAD_RG_PERCENT_TOLERANCE 11
26 : #define AWAY_FROM_BITMAPS 0x1000
27 : #define MAX_RGSEGMENTS 20
28 :
29 : #define ri_compare(rg, ondisk, expected, field, fmt) \
30 : if (ondisk->field != expected->field) { \
31 : log_warn( _("rindex #%d " #field " discrepancy: index 0x%" \
32 : fmt " != expected: 0x%" fmt "\n"), \
33 : rg + 1, ondisk->field, expected->field); \
34 : ondisk->field = expected->field; \
35 : rindex_modified = 1; \
36 : }
37 :
38 : /*
39 : * find_journal_entry_rgs - find all RG blocks within all journals
40 : *
41 : * Since Resource Groups (RGs) are journaled, it is not uncommon for them
42 : * to appear inside a journal. But if there is severe damage to the rindex
43 : * file or some of the RGs, we may need to hunt and peck for RGs and in that
44 : * case, we don't want to mistake these blocks that look just a real RG
45 : * for a real RG block. These are "fake" RGs that need to be ignored for
46 : * the purposes of finding where things are.
47 : *
48 : * NOTE: This function assumes that the jindex and journals have been read in,
49 : * which isn't often the case. Normally the rindex needs to be read in
50 : * first. If the rindex is damaged, that's not an option.
51 : */
52 4 : static void find_journaled_rgs(struct lgfs2_sbd *sdp)
53 : {
54 4 : int j, new = 0;
55 : unsigned int jblocks;
56 : uint64_t b, dblock;
57 : struct lgfs2_inode *ip;
58 : struct lgfs2_buffer_head *bh;
59 : int false_count;
60 :
61 4 : osi_list_init(&false_rgrps.list);
62 8 : for (j = 0; j < sdp->md.journals; j++) {
63 4 : ip = sdp->md.journal[j];
64 4 : log_debug(_("Checking for rgrps in journal%d which starts at block 0x%"PRIx64".\n"),
65 : j, ip->i_num.in_addr);
66 4 : jblocks = ip->i_size / sdp->sd_bsize;
67 4 : false_count = 0;
68 131076 : for (b = 0; b < jblocks; b++) {
69 131072 : if (lgfs2_block_map(ip, b, &new, &dblock, NULL, 0)) {
70 0 : log_crit(_("Failed to map block 0x%"PRIu64" in journal at 0x%"PRIu64": %s\n"),
71 : b, ip->i_num.in_addr, strerror(errno));
72 0 : exit(1);
73 : }
74 131072 : if (!dblock)
75 0 : break;
76 131072 : bh = lgfs2_bread(sdp, dblock);
77 131072 : if (!lgfs2_check_meta(bh->b_data, GFS2_METATYPE_RG)) {
78 : /* False rgrp found at block dblock */
79 0 : false_count++;
80 0 : special_set(&false_rgrps, dblock);
81 : }
82 131072 : lgfs2_brelse(bh);
83 : }
84 4 : log_debug("\n%d false positives identified.\n", false_count);
85 : }
86 4 : }
87 :
88 4152432 : static int is_false_rg(uint64_t block)
89 : {
90 4152432 : if (blockfind(&false_rgrps, block))
91 0 : return 1;
92 4152432 : return 0;
93 : }
94 :
95 : /*
96 : * find_shortest_rgdist - hunt and peck for the shortest distance between RGs.
97 : *
98 : * Sample several of them because an RG that's been blasted may
99 : * look like twice the distance. If we can find 6 of them, that
100 : * should be enough to figure out the correct layout.
101 : * This also figures out first_rg_dist since that's always different.
102 : *
103 : * This function was revised to return the number of segments, usually 2.
104 : * The shortest distance is now returned in the highest entry in rg_dist
105 : */
106 4 : static int find_shortest_rgdist(struct lgfs2_sbd *sdp, uint64_t *dist_array,
107 : int *dist_cnt)
108 : {
109 : uint64_t blk, block_last_rg, shortest_dist_btwn_rgs;
110 : struct lgfs2_buffer_head *bh;
111 4 : int rgs_sampled = 0;
112 : uint64_t initial_first_rg_dist;
113 4 : int gsegment = 0;
114 : int is_rgrp;
115 :
116 : /* Figure out if there are any RG-looking blocks in the journal we
117 : need to ignore. */
118 4 : find_journaled_rgs(sdp);
119 :
120 4 : initial_first_rg_dist = dist_array[0] = block_last_rg =
121 4 : LGFS2_SB_ADDR(sdp) + 1;
122 4 : shortest_dist_btwn_rgs = sdp->device.length;
123 :
124 4152432 : for (blk = LGFS2_SB_ADDR(sdp) + 1; blk < sdp->device.length; blk++) {
125 : uint64_t dist;
126 :
127 4152428 : if (blk == LGFS2_SB_ADDR(sdp) + 1)
128 4 : is_rgrp = 1;
129 4152424 : else if (is_false_rg(blk))
130 0 : is_rgrp = 0;
131 : else {
132 4152424 : bh = lgfs2_bread(sdp, blk);
133 4152424 : is_rgrp = (lgfs2_check_meta(bh->b_data, GFS2_METATYPE_RG) == 0);
134 4152424 : lgfs2_brelse(bh);
135 : }
136 4152428 : if (!is_rgrp) {
137 4152104 : if (rgs_sampled >= 6) {
138 : uint64_t nblk;
139 :
140 8 : log_info(_("rgrp not found at block 0x%"PRIx64". "
141 : "Last found rgrp was 0x%"PRIx64". "
142 : "Checking the next one.\n"),
143 : blk, block_last_rg);
144 : /* check for just a damaged rgrp */
145 8 : nblk = blk + dist_array[gsegment];
146 8 : if (is_false_rg(nblk)) {
147 0 : is_rgrp = 0;
148 : } else {
149 8 : bh = lgfs2_bread(sdp, nblk);
150 8 : is_rgrp = (((lgfs2_check_meta(bh->b_data,
151 8 : GFS2_METATYPE_RG) == 0)));
152 8 : lgfs2_brelse(bh);
153 : }
154 8 : if (is_rgrp) {
155 0 : log_info(_("Next rgrp is intact, so "
156 : "this one is damaged.\n"));
157 0 : blk = nblk - 1;
158 0 : dist_cnt[gsegment]++;
159 0 : continue;
160 : }
161 8 : log_info(_("Looking for new segment.\n"));
162 8 : blk -= 16;
163 8 : rgs_sampled = 0;
164 8 : shortest_dist_btwn_rgs = sdp->device.length;
165 : /* That last one didn't pan out, so: */
166 8 : dist_cnt[gsegment]--;
167 8 : gsegment++;
168 8 : if (gsegment >= MAX_RGSEGMENTS)
169 0 : break;
170 : }
171 4152104 : if ((blk - block_last_rg) > (524288 * 2)) {
172 0 : log_info(_("No rgrps were found within 4GB "
173 : "of the last rgrp. Must be the "
174 : "end of the file system.\n"));
175 :
176 0 : break;
177 : }
178 4152104 : continue;
179 : }
180 :
181 324 : dist_cnt[gsegment]++;
182 324 : if (rgs_sampled >= 6) {
183 248 : block_last_rg = blk;
184 248 : blk += dist_array[gsegment] - 1; /* prev value in
185 : array minus 1. */
186 248 : continue;
187 : }
188 76 : log_info(_("segment %d: rgrp found at block 0x%"PRIx64"\n"),
189 : gsegment + 1, blk);
190 76 : dist = blk - block_last_rg;
191 76 : if (blk > LGFS2_SB_ADDR(sdp) + 1) { /* not the very first rgrp */
192 :
193 72 : log_info("dist 0x%"PRIx64" = 0x%"PRIx64" - 0x%"PRIx64" ",
194 : dist, blk, block_last_rg);
195 : /**
196 : * We found an RG. Check to see if we need to set the
197 : * first_rg_dist based on whether it is still at its
198 : * initial value (i.e. the fs.) The first rg distance
199 : * is different from the rest because of the
200 : * superblock and 64K dead space.
201 : **/
202 72 : if (dist_array[0] == initial_first_rg_dist) {
203 4 : dist_array[0] = dist;
204 4 : dist_cnt[0] = 1;
205 4 : rgs_sampled = 0;
206 : }
207 72 : if (dist < shortest_dist_btwn_rgs) {
208 12 : shortest_dist_btwn_rgs = dist;
209 12 : log_info( _("(shortest so far)"));
210 : }
211 72 : log_info("\n");
212 72 : if (++rgs_sampled == 6) {
213 12 : dist_array[gsegment] = shortest_dist_btwn_rgs;
214 12 : log_info(_("Settled on distance 0x%"PRIx64" for segment %d\n"),
215 : dist_array[gsegment], gsegment + 1);
216 : }
217 : } else {
218 4 : gsegment++;
219 4 : if (gsegment >= MAX_RGSEGMENTS)
220 0 : break;
221 : }
222 76 : block_last_rg = blk;
223 76 : if (rgs_sampled < 6)
224 64 : blk += 250; /* skip ahead for performance */
225 : else
226 12 : blk += shortest_dist_btwn_rgs - 1;
227 : }
228 4 : if (gsegment >= MAX_RGSEGMENTS) {
229 0 : log_err(_("Maximum number of rgrp grow segments reached.\n"));
230 0 : log_err(_("This file system has more than %d resource "
231 : "group segments.\n"), MAX_RGSEGMENTS);
232 : }
233 : /* -------------------------------------------------------------- */
234 : /* Sanity-check our first_rg_dist. If RG #2 got nuked, the */
235 : /* first_rg_dist would measure from #1 to #3, which would be bad. */
236 : /* We need to take remedial measures to fix it (from the index). */
237 : /* -------------------------------------------------------------- */
238 4 : if (*dist_array >= shortest_dist_btwn_rgs +
239 4 : (shortest_dist_btwn_rgs / 4)) {
240 : struct gfs2_rindex ri;
241 :
242 : /* read in the second RG index entry for this subd. */
243 0 : lgfs2_readi(sdp->md.riinode, &ri, sizeof(ri), sizeof(ri));
244 :
245 0 : if (be64_to_cpu(ri.ri_addr) > LGFS2_SB_ADDR(sdp) + 1) { /* sanity check */
246 0 : log_warn( _("rgrp 2 is damaged: getting dist from index: "));
247 0 : *dist_array = be64_to_cpu(ri.ri_addr) - (LGFS2_SB_ADDR(sdp) + 1);
248 0 : log_warn("0x%"PRIx64"\n", *dist_array);
249 : } else {
250 0 : log_warn( _("rgrp index 2 is damaged: extrapolating dist: "));
251 0 : *dist_array = sdp->device.length - (nrgrp - 1) *
252 0 : (sdp->device.length / nrgrp);
253 0 : log_warn("0x%"PRIx64"\n", *dist_array);
254 : }
255 0 : log_debug(_("Adjusted first rgrp distance: 0x%"PRIx64"\n"), *dist_array);
256 : } /* if first RG distance is within tolerance */
257 :
258 4 : special_free(&false_rgrps);
259 4 : return gsegment;
260 : }
261 :
262 : /*
263 : * count_usedspace - count the used bits in a rgrp bitmap buffer
264 : */
265 168 : static uint64_t count_usedspace(struct lgfs2_sbd *sdp, int first,
266 : struct lgfs2_buffer_head *bh)
267 : {
268 : int off, x, y, bytes_to_check;
269 168 : uint32_t rg_used = 0;
270 : unsigned int state;
271 :
272 : /* Count up the free blocks in the bitmap */
273 168 : if (first)
274 160 : off = sizeof(struct gfs2_rgrp);
275 : else
276 8 : off = sizeof(struct gfs2_meta_header);
277 168 : bytes_to_check = sdp->sd_bsize - off;
278 667624 : for (x = 0; x < bytes_to_check; x++) {
279 : unsigned char *byte;
280 :
281 667456 : byte = (unsigned char *)&bh->b_data[off + x];
282 667456 : if (*byte == 0x55) {
283 128 : rg_used += GFS2_NBBY;
284 128 : continue;
285 : }
286 667328 : if (*byte == 0x00)
287 667322 : continue;
288 30 : for (y = 0; y < GFS2_NBBY; y++) {
289 24 : state = (*byte >> (GFS2_BIT_SIZE * y)) & GFS2_BIT_MASK;
290 24 : if (state == GFS2_BLKST_FREE ||
291 : state == GFS2_BLKST_UNLINKED)
292 2 : continue;
293 22 : rg_used++;
294 : }
295 : }
296 168 : return rg_used;
297 : }
298 :
299 : /*
300 : * find_next_rgrp_dist - find the distance to the next rgrp
301 : *
302 : * This function is only called if the rgrps are determined to be on uneven
303 : * boundaries. In a normal gfs2 file system, after mkfs.gfs2, all the
304 : * rgrps but the first and second one will be the same distance from the
305 : * previous rgrp. (The first rgrp will predictably be after the superblock
306 : * and the second one will be adjusted based on the number 64KB skipped
307 : * at the start of the file system.) The only way we can deviate from that
308 : * pattern is if the user did gfs_grow on a gfs1 file system, then converted
309 : * it to gfs2 using gfs2_convert.
310 : *
311 : * This function finds the distance to the next rgrp for these cases.
312 : */
313 160 : static uint64_t find_next_rgrp_dist(struct lgfs2_sbd *sdp, uint64_t blk,
314 : struct lgfs2_rgrp_tree *prevrgd)
315 : {
316 160 : struct osi_node *n, *next = NULL;
317 160 : uint64_t rgrp_dist = 0, used_blocks, block, next_block, twogigs;
318 160 : struct lgfs2_rgrp_tree *rgd = NULL, *next_rgd;
319 : struct lgfs2_buffer_head *bh;
320 : int first, length, b, found;
321 : uint64_t mega_in_blocks;
322 : uint32_t free_blocks;
323 :
324 160 : for (n = osi_first(&sdp->rgtree); n; n = next) {
325 0 : next = osi_next(n);
326 0 : rgd = (struct lgfs2_rgrp_tree *)n;
327 0 : if (rgd->rt_addr == blk)
328 0 : break;
329 : }
330 160 : if (rgd && n && osi_next(n) && rgd->rt_addr == blk) {
331 : /* coverity[returned_null:SUPPRESS] False positive */
332 0 : n = osi_next(n);
333 0 : next_rgd = (struct lgfs2_rgrp_tree *)n;
334 0 : rgrp_dist = next_rgd->rt_addr - rgd->rt_addr;
335 0 : return rgrp_dist;
336 : }
337 160 : mega_in_blocks = (1024 * 1024) / sdp->sd_bsize;
338 160 : twogigs = (uint64_t)mega_in_blocks * 2048;
339 : /* Unfortunately, if we fall through to here we can't trust the
340 : rindex. So we have to analyze the current rgrp to figure out
341 : the bare minimum block number where it ends. If we don't have
342 : rindex, all we know about this rgrp is what's on disk: its
343 : rg_free. If we analyze the rgrp's bitmap and the bitmaps that
344 : follow, we can figure out how many bits are used. If we add
345 : rg_free, we get the total number of blocks this rgrp
346 : represents. After that should be the next rgrp, but it may
347 : skip a few blocks (hopefully no more than 4). */
348 160 : used_blocks = 0;
349 160 : length = 0;
350 160 : block = prevrgd->rt_addr;
351 160 : first = 1;
352 160 : found = 0;
353 10 : while (1) {
354 : struct gfs2_meta_header *mh;
355 :
356 170 : if (block >= sdp->device.length)
357 0 : break;
358 170 : if (block >= prevrgd->rt_addr + twogigs)
359 0 : break;
360 170 : bh = lgfs2_bread(sdp, block);
361 170 : mh = (struct gfs2_meta_header *)bh->b_data;
362 170 : if ((be32_to_cpu(mh->mh_magic) != GFS2_MAGIC) ||
363 168 : (first && be32_to_cpu(mh->mh_type) != GFS2_METATYPE_RG) ||
364 8 : (!first && be32_to_cpu(mh->mh_type) != GFS2_METATYPE_RB)) {
365 2 : lgfs2_brelse(bh);
366 2 : break;
367 : }
368 168 : if (first) {
369 : struct gfs2_rgrp *rg;
370 :
371 160 : rg = (struct gfs2_rgrp *)bh->b_data;
372 160 : free_blocks = be32_to_cpu(rg->rg_free);
373 : }
374 168 : used_blocks += count_usedspace(sdp, first, bh);
375 168 : first = 0;
376 168 : block++;
377 168 : length++;
378 168 : lgfs2_brelse(bh);
379 : /* Check if this distance points to an rgrp:
380 : We have to look for blocks that resemble rgrps and bitmaps.
381 : If they do, we need to count blocks used and free and see
382 : if adding that number of free blocks accounts for the
383 : next rgrp we find. Otherwise, you could have a length of
384 : 6 with additional user blocks that just happen to look like
385 : bitmap blocks. Count them all as bitmaps and you'll be
386 : hopelessly lost. */
387 168 : rgrp_dist = used_blocks + free_blocks + length;
388 168 : next_block = prevrgd->rt_addr + rgrp_dist;
389 : /* Now we account for block rounding done by mkfs.gfs2 */
390 846 : for (b = 0; b <= length + GFS2_NBBY; b++) {
391 846 : if (next_block + b >= sdp->device.length)
392 10 : break;
393 836 : bh = lgfs2_bread(sdp, next_block + b);
394 836 : mh = (struct gfs2_meta_header *)bh->b_data;
395 836 : if (be32_to_cpu(mh->mh_magic) == GFS2_MAGIC) {
396 158 : if (be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RG)
397 158 : found = 1;
398 : /* if the first thing we find is a bitmap,
399 : there must be a damaged rgrp on the
400 : previous block. */
401 158 : if (be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RB) {
402 0 : found = 1;
403 0 : rgrp_dist--;
404 : }
405 : }
406 836 : lgfs2_brelse(bh);
407 836 : if (found)
408 158 : break;
409 678 : rgrp_dist++;
410 : }
411 168 : if (found) {
412 158 : log_info(_("rgrp found at 0x%"PRIx64", length=%d, used=%"PRIu64", free=%d\n"),
413 : prevrgd->rt_addr, length, used_blocks, free_blocks);
414 158 : break;
415 : }
416 : }
417 160 : return rgrp_dist;
418 : }
419 :
420 : /*
421 : * hunt_and_peck - find the distance to the next rgrp
422 : *
423 : * This function is only called if the rgrps are determined to be on uneven
424 : * boundaries, and also corrupt. So we have to go out searching for one.
425 : */
426 0 : static uint64_t hunt_and_peck(struct lgfs2_sbd *sdp, uint64_t blk,
427 : struct lgfs2_rgrp_tree *prevrgd, uint64_t last_bump)
428 : {
429 0 : uint64_t rgrp_dist = 0, block, twogigs, last_block, last_meg;
430 : struct lgfs2_buffer_head *bh;
431 : struct gfs2_meta_header *mh;
432 : int b, mega_in_blocks;
433 :
434 : /* Skip ahead the previous amount: we might get lucky.
435 : If we're close to the end of the device, take the rest. */
436 0 : if (lgfs2_check_range(sdp, blk + last_bump))
437 0 : return sdp->fssize - blk;
438 :
439 0 : bh = lgfs2_bread(sdp, blk + last_bump);
440 0 : mh = (struct gfs2_meta_header *)bh->b_data;
441 0 : if (be32_to_cpu(mh->mh_magic) == GFS2_MAGIC &&
442 0 : be32_to_cpu(mh->mh_type) == GFS2_METATYPE_RG) {
443 0 : log_info(_("rgrp found at 0x%"PRIx64", length=%"PRIu64"\n"),
444 : blk + last_bump, last_bump);
445 0 : lgfs2_brelse(bh);
446 0 : return last_bump;
447 : }
448 0 : lgfs2_brelse(bh);
449 :
450 0 : rgrp_dist = AWAY_FROM_BITMAPS; /* Get away from any bitmaps
451 : associated with the previous rgrp */
452 0 : block = prevrgd->rt_addr + rgrp_dist;
453 : /* Now we account for block rounding done by mkfs.gfs2. A rgrp can
454 : be at most 2GB in size, so that's where we call it. We do somewhat
455 : obscure math here to avoid integer overflows. */
456 0 : mega_in_blocks = (1024 * 1024) / sdp->sd_bsize;
457 0 : twogigs = (uint64_t)2048 * mega_in_blocks;
458 0 : if (block + twogigs <= sdp->fssize) {
459 0 : last_block = twogigs;
460 0 : last_meg = 0;
461 : } else {
462 : /* There won't be a rgrp in the last megabyte. */
463 0 : last_block = sdp->fssize - block - mega_in_blocks;
464 0 : last_meg = mega_in_blocks;
465 : }
466 0 : for (b = AWAY_FROM_BITMAPS; b < last_block; b++) {
467 : uint32_t magic, type;
468 :
469 0 : bh = lgfs2_bread(sdp, block + b);
470 0 : mh = (struct gfs2_meta_header *)bh->b_data;
471 0 : magic = be32_to_cpu(mh->mh_magic);
472 0 : type = be32_to_cpu(mh->mh_type);
473 0 : lgfs2_brelse(bh);
474 0 : if (magic == GFS2_MAGIC) {
475 0 : if (type == GFS2_METATYPE_RG)
476 0 : break;
477 : /* if the first thing we find is a bitmap, there must
478 : be a damaged rgrp on the previous block. */
479 0 : if (type == GFS2_METATYPE_RB) {
480 0 : rgrp_dist--;
481 0 : break;
482 : }
483 : }
484 0 : rgrp_dist++;
485 : }
486 0 : return rgrp_dist + last_meg;
487 : }
488 :
489 : /*
490 : * rindex_rebuild - rebuild a corrupt Resource Group (RG) index manually
491 : * where trust_lvl == DISTRUST
492 : *
493 : * If this routine is called, it means we have RGs in odd/unexpected places,
494 : * and there is a corrupt RG or RG index entry. It also means we can't trust
495 : * the RG index to be sane, and the RGs don't agree with how mkfs would have
496 : * built them by default. So we have no choice but to go through and count
497 : * them by hand. We've tried twice to recover the RGs and RG index, and
498 : * failed, so this is our last chance to remedy the situation.
499 : *
500 : * This routine tries to minimize performance impact by:
501 : * 1. Skipping through the filesystem at known increments when possible.
502 : * 2. Shuffle through every block when RGs are not found at the predicted
503 : * locations.
504 : *
505 : * Note: A GFS2 filesystem differs from a GFS1 file system in that there will
506 : * only be ONE chunk (i.e. no artificial subdevices on either size of the
507 : * journals). The journals and even the rindex are kept as part of the file
508 : * system, so we need to rebuild that information by hand. Also, with GFS1,
509 : * the different chunks ("subdevices") could have different RG sizes, which
510 : * made for quite a mess when trying to recover RGs. GFS2 always uses the
511 : * same RG size determined by the original mkfs, so recovery is easier.
512 : *
513 : * If "gfs_grow" is specified the file system was most likely converted
514 : * from gfs1 to gfs2 after a gfs_grow operation. In that case, the rgrps
515 : * will not be on predictable boundaries.
516 : */
517 4 : static int rindex_rebuild(struct fsck_cx *cx, int *num_rgs, int gfs_grow)
518 : {
519 4 : struct lgfs2_sbd *sdp = cx->sdp;
520 4 : struct osi_node *n, *next = NULL;
521 : struct lgfs2_buffer_head *bh;
522 4 : uint64_t rg_dist[MAX_RGSEGMENTS] = {0, };
523 4 : int rg_dcnt[MAX_RGSEGMENTS] = {0, };
524 : uint64_t blk;
525 : uint64_t fwd_block, block_bump;
526 : struct lgfs2_rgrp_tree *calc_rgd, *prev_rgd;
527 : int number_of_rgs, rgi, segment_rgs;
528 4 : int rg_was_fnd = 0, corrupt_rgs = 0;
529 4 : int error = -1, j, i;
530 4 : int grow_segments, segment = 0;
531 :
532 : /*
533 : * In order to continue, we need to initialize the jindex. We need
534 : * the journals in order to correctly eliminate false positives during
535 : * rgrp repair. IOW, we need to properly ignore rgrps that appear in
536 : * the journals, and we can only do that if we have the journals.
537 : * To make matters worse, journals may span several (small) rgrps,
538 : * so we can't go by the rgrps.
539 : */
540 4 : if (init_jindex(cx, 0) != 0) {
541 0 : log_crit(_("Error: Can't read jindex required for rindex "
542 : "repairs.\n"));
543 0 : return -1;
544 : }
545 :
546 4 : rgcalc.osi_node = NULL;
547 4 : grow_segments = find_shortest_rgdist(sdp, &rg_dist[0], &rg_dcnt[0]);
548 16 : for (i = 0; i < grow_segments; i++)
549 12 : log_info(_("Segment %d: rgrp distance: 0x%"PRIx64", count: %d\n"),
550 : i + 1, rg_dist[i], rg_dcnt[i]);
551 4 : number_of_rgs = segment_rgs = 0;
552 : /* -------------------------------------------------------------- */
553 : /* Now go through the RGs and verify their integrity, fixing as */
554 : /* needed when corruption is encountered. */
555 : /* -------------------------------------------------------------- */
556 4 : prev_rgd = NULL;
557 4 : block_bump = rg_dist[0];
558 4 : blk = LGFS2_SB_ADDR(sdp) + 1;
559 178 : while (blk <= sdp->device.length) {
560 176 : log_debug( _("Block 0x%"PRIx64"\n"), blk);
561 176 : bh = lgfs2_bread(sdp, blk);
562 176 : rg_was_fnd = (!lgfs2_check_meta(bh->b_data, GFS2_METATYPE_RG));
563 176 : lgfs2_brelse(bh);
564 : /* Allocate a new RG and index. */
565 176 : calc_rgd = lgfs2_rgrp_insert(&rgcalc, blk);
566 176 : if (!calc_rgd) {
567 0 : log_crit( _("Can't allocate memory for rgrp repair.\n"));
568 0 : goto out;
569 : }
570 176 : calc_rgd->rt_length = 1;
571 176 : if (!rg_was_fnd) { /* if not an RG */
572 : /* ------------------------------------------------- */
573 : /* This SHOULD be an RG but isn't. */
574 : /* ------------------------------------------------- */
575 10 : corrupt_rgs++;
576 10 : if (corrupt_rgs < 5)
577 8 : log_debug(_("Missing or damaged rgrp at block %"PRIu64" (0x%"PRIx64")\n"),
578 : blk, blk);
579 : else {
580 2 : log_crit( _("Error: too many missing or "
581 : "damaged rgrps using this method. "
582 : "Time to try another method.\n"));
583 2 : goto out;
584 : }
585 : }
586 : /* ------------------------------------------------ */
587 : /* Now go through and count the bitmaps for this RG */
588 : /* ------------------------------------------------ */
589 830 : for (fwd_block = blk + 1; fwd_block < sdp->device.length; fwd_block++) {
590 : int bitmap_was_fnd;
591 830 : bh = lgfs2_bread(sdp, fwd_block);
592 830 : bitmap_was_fnd = !lgfs2_check_meta(bh->b_data, GFS2_METATYPE_RB);
593 830 : lgfs2_brelse(bh);
594 830 : if (bitmap_was_fnd) /* if a bitmap */
595 656 : calc_rgd->rt_length++;
596 : else
597 174 : break; /* end of bitmap, so call it quits. */
598 : } /* for subsequent bitmaps */
599 :
600 174 : calc_rgd->rt_data0 = calc_rgd->rt_addr +
601 174 : calc_rgd->rt_length;
602 174 : if (prev_rgd) {
603 : uint32_t rgblocks;
604 :
605 170 : prev_rgd->rt_length = lgfs2_rgblocks2bitblocks(sdp->sd_bsize, block_bump, &rgblocks);
606 170 : prev_rgd->rt_data = rgblocks;
607 170 : prev_rgd->rt_data0 = prev_rgd->rt_addr +
608 170 : prev_rgd->rt_length;
609 170 : prev_rgd->rt_data -= prev_rgd->rt_data %
610 : GFS2_NBBY;
611 170 : prev_rgd->rt_bitbytes = prev_rgd->rt_data /
612 : GFS2_NBBY;
613 170 : log_debug(_("Prev ri_data set to: 0x%"PRIx32"\n"), prev_rgd->rt_data);
614 : }
615 174 : number_of_rgs++;
616 174 : segment_rgs++;
617 174 : if (rg_was_fnd)
618 166 : log_info( _(" rgrp %d at block 0x%"PRIx64" intact\n"),
619 : number_of_rgs, blk);
620 : else
621 8 : log_warn( _("* rgrp %d at block 0x%"PRIx64" *** DAMAGED ***\n"),
622 : number_of_rgs, blk);
623 174 : prev_rgd = calc_rgd;
624 : /*
625 : * Figure out where our next rgrp should be.
626 : */
627 174 : if ((blk == LGFS2_SB_ADDR(sdp) + 1) || (!gfs_grow)) {
628 14 : block_bump = rg_dist[segment];
629 14 : if (segment_rgs >= rg_dcnt[segment]) {
630 14 : log_debug(_("End of segment %d\n"), ++segment);
631 14 : segment_rgs = 0;
632 14 : if (segment >= grow_segments) {
633 0 : log_debug(_("Last segment.\n"));
634 0 : break;
635 : }
636 : }
637 : /* if we have uniformly-spaced rgrps, there may be
638 : some wasted space at the end of the device.
639 : Since we don't want to create a short rgrp and
640 : break our uniformity, just quit here. */
641 14 : if (blk + (2 * block_bump) > sdp->device.length)
642 0 : break;
643 160 : } else if (rg_was_fnd)
644 160 : block_bump = find_next_rgrp_dist(sdp, blk, prev_rgd);
645 : else
646 0 : block_bump = hunt_and_peck(sdp, blk, prev_rgd,
647 : block_bump);
648 174 : if (block_bump != 1) {
649 174 : if (rg_was_fnd)
650 166 : log_info(_(" [length 0x%"PRIx64"]\n"), block_bump);
651 : else
652 8 : log_warn(_(" [length 0x%"PRIx64"]\n"), block_bump);
653 : } else {
654 0 : log_warn("\n");
655 : }
656 174 : blk += block_bump;
657 : } /* for each rg block */
658 : /* ----------------------------------------------------------------- */
659 : /* If we got to the end of the fs, we still need to fix the */
660 : /* allocation information for the very last RG. */
661 : /* ----------------------------------------------------------------- */
662 2 : if (prev_rgd && !prev_rgd->rt_data) {
663 : uint32_t rgblocks;
664 :
665 2 : prev_rgd->rt_length = lgfs2_rgblocks2bitblocks(sdp->sd_bsize, block_bump, &rgblocks);
666 2 : prev_rgd->rt_data0 = prev_rgd->rt_addr + prev_rgd->rt_length;
667 2 : prev_rgd->rt_data = rgblocks;
668 2 : prev_rgd->rt_data -= prev_rgd->rt_data % GFS2_NBBY;
669 2 : prev_rgd->rt_bitbytes = prev_rgd->rt_data / GFS2_NBBY;
670 2 : log_debug(_("Prev ri_data set to: 0x%"PRIx32"\n"), prev_rgd->rt_data);
671 2 : prev_rgd = NULL; /* make sure we don't use it later */
672 : }
673 : /* ---------------------------------------------- */
674 : /* Now dump out the information (if verbose mode) */
675 : /* ---------------------------------------------- */
676 2 : log_debug( _("rindex rebuilt as follows:\n"));
677 164 : for (n = osi_first(&rgcalc), rgi = 0; n; n = next, rgi++) {
678 162 : next = osi_next(n);
679 162 : calc_rgd = (struct lgfs2_rgrp_tree *)n;
680 162 : log_debug("%d: 0x%"PRIx64"/%"PRIx32"/0x%"PRIx64"/0x%"PRIx32"/0x%"PRIx32"\n",
681 : rgi + 1, calc_rgd->rt_addr, calc_rgd->rt_length,
682 : calc_rgd->rt_data0, calc_rgd->rt_data,
683 : calc_rgd->rt_bitbytes);
684 : }
685 2 : *num_rgs = number_of_rgs;
686 2 : error = 0;
687 4 : out:
688 8 : for (j = 0; j < sdp->md.journals; j++)
689 4 : lgfs2_inode_put(&sdp->md.journal[j]);
690 4 : lgfs2_inode_put(&sdp->md.jiinode);
691 4 : free(sdp->md.journal);
692 4 : return error;
693 : }
694 :
695 : #define DIV_RU(x, y) (((x) + (y) - 1) / (y))
696 :
697 : /**
698 : * how_many_rgrps - figure out how many RG to put in a subdevice
699 : * @w: the command line
700 : * @dev: the device
701 : *
702 : * Returns: the number of RGs
703 : */
704 10 : static uint64_t how_many_rgrps(struct lgfs2_sbd *sdp, struct lgfs2_device *dev)
705 : {
706 : uint64_t n;
707 : uint32_t rgblocks1, rgblocksn, bitblocks1, bitblocksn;
708 :
709 : while (1) {
710 10 : n = DIV_RU(dev->length, (sdp->rgsize << 20) / sdp->sd_bsize);
711 :
712 : /* check to see if the rg length overflows max # bitblks */
713 10 : bitblocksn = lgfs2_rgblocks2bitblocks(sdp->sd_bsize, dev->length / n, &rgblocksn);
714 : /* calculate size of the first rgrp */
715 10 : bitblocks1 = lgfs2_rgblocks2bitblocks(sdp->sd_bsize, dev->length - (n - 1) * (dev->length / n),
716 : &rgblocks1);
717 10 : if (bitblocks1 <= 2149 && bitblocksn <= 2149)
718 10 : break;
719 :
720 0 : sdp->rgsize -= LGFS2_DEFAULT_RGSIZE; /* smaller rgs */
721 :
722 0 : if (sdp->rgsize < LGFS2_DEFAULT_RGSIZE) {
723 0 : log_err(_("Cannot use the entire device with block size %u bytes.\n"),
724 : sdp->sd_bsize);
725 0 : return 0;
726 : }
727 : }
728 10 : log_debug(" rg sz = %"PRIu32"\n nrgrp = %"PRIu64"\n", sdp->rgsize, n);
729 10 : return n;
730 : }
731 :
732 : /**
733 : * compute_rgrp_layout - figure out where the RG in a FS are
734 : */
735 2 : static struct osi_root compute_rgrp_layout(struct lgfs2_sbd *sdp)
736 : {
737 : struct lgfs2_device *dev;
738 2 : struct lgfs2_rgrp_tree *rl, *rlast = NULL;
739 2 : unsigned int rgrp = 0, rglength;
740 2 : struct osi_root rgtree = {NULL};
741 : uint64_t rgaddr;
742 :
743 2 : dev = &sdp->device;
744 :
745 2 : dev->length -= LGFS2_SB_ADDR(sdp) + 1;
746 2 : nrgrp = how_many_rgrps(sdp, dev);
747 2 : if (nrgrp == 0)
748 0 : return (struct osi_root){NULL};
749 2 : rglength = dev->length / nrgrp;
750 :
751 2562 : for (; rgrp < nrgrp; rgrp++) {
752 2560 : if (rgrp) {
753 2558 : rgaddr = rlast->rt_addr + rlast->rt_skip;
754 2558 : rl = lgfs2_rgrp_insert(&rgtree, rgaddr);
755 2558 : rl->rt_skip = rglength;
756 : } else {
757 2 : rgaddr = LGFS2_SB_ADDR(sdp) + 1;
758 2 : rl = lgfs2_rgrp_insert(&rgtree, rgaddr);
759 2 : rl->rt_skip = dev->length -
760 2 : (nrgrp - 1) * (dev->length / nrgrp);
761 : }
762 2560 : rlast = rl;
763 : }
764 2 : return rgtree;
765 : }
766 :
767 2 : static int calc_rgrps(struct lgfs2_sbd *sdp)
768 : {
769 2 : struct osi_node *n, *next = NULL;
770 : struct lgfs2_rgrp_tree *rl;
771 : uint32_t rgblocks, bitblocks;
772 :
773 2562 : for (n = osi_first(&rgcalc); n; n = next) {
774 2560 : next = osi_next(n);
775 2560 : rl = (struct lgfs2_rgrp_tree *)n;
776 :
777 2560 : bitblocks = lgfs2_rgblocks2bitblocks(sdp->sd_bsize, rl->rt_skip, &rgblocks);
778 :
779 2560 : rl->rt_length = bitblocks;
780 2560 : rl->rt_data0 = rl->rt_addr + bitblocks;
781 2560 : rl->rt_data = rgblocks;
782 2560 : rl->rt_bitbytes = rgblocks / GFS2_NBBY;
783 2560 : rl->rt_free = rgblocks;
784 :
785 2560 : if (lgfs2_compute_bitstructs(sdp->sd_bsize, rl))
786 0 : return -1;
787 :
788 2560 : sdp->blks_total += rgblocks;
789 2560 : sdp->fssize = rl->rt_data0 + rl->rt_data;
790 : }
791 2 : return 0;
792 : }
793 :
794 : /*
795 : * gfs2_rindex_calculate - calculate what the rindex should look like
796 : * in a perfect world (trust_lvl == OPEN_MINDED)
797 : *
798 : * Calculate what the rindex should look like,
799 : * so we can later check if all RG index entries are sane.
800 : * This is a lot easier for gfs2 because we can just call the same libgfs2
801 : * functions used by mkfs.
802 : *
803 : * Returns: 0 on success, -1 on failure
804 : * Sets: sdp->rglist to a linked list of fsck_rgrp structs representing
805 : * what we think the rindex should really look like.
806 : */
807 2 : static int rindex_calculate(struct lgfs2_sbd *sdp, int *num_rgs)
808 : {
809 2 : uint64_t num_rgrps = 0;
810 :
811 : /* ----------------------------------------------------------------- */
812 : /* Calculate how many RGs there are supposed to be based on the */
813 : /* rindex filesize. Remember that our trust level is open-minded */
814 : /* here. If the filesize of the rindex file is not a multiple of */
815 : /* our rindex structures, then something's wrong and we can't trust */
816 : /* the index. */
817 : /* ----------------------------------------------------------------- */
818 2 : *num_rgs = sdp->md.riinode->i_size / sizeof(struct gfs2_rindex);
819 :
820 2 : rgcalc.osi_node = NULL;
821 2 : lgfs2_fix_device_geometry(sdp);
822 :
823 : /* Try all possible rgrp sizes: 2048, 1024, 512, 256, 128, 64, 32 */
824 10 : for (sdp->rgsize = LGFS2_DEFAULT_RGSIZE; sdp->rgsize >= 32;
825 8 : sdp->rgsize /= 2) {
826 8 : num_rgrps = how_many_rgrps(sdp, &sdp->device);
827 8 : if (num_rgrps == *num_rgs) {
828 0 : log_info(_("rgsize must be: %u (0x%x)\n"),
829 : sdp->rgsize, sdp->rgsize);
830 0 : break;
831 : }
832 : }
833 : /* Compute the default resource group layout as mkfs would have done */
834 2 : rgcalc = compute_rgrp_layout(sdp);
835 2 : if (calc_rgrps(sdp)) { /* Calculate but don't write to disk. */
836 0 : fprintf(stderr, _("Failed to build resource groups\n"));
837 0 : exit(-1);
838 : }
839 2 : log_debug(_("fs_total_size = 0x%"PRIx64" blocks.\n"), sdp->device.length);
840 2 : log_warn( _("L3: number of rgs in the index = %d.\n"), *num_rgs);
841 2 : return 0;
842 : }
843 :
844 : /*
845 : * rewrite_rg_block - rewrite ("fix") a buffer with rg or bitmap data
846 : * returns: 0 if the rg was repaired, otherwise 1
847 : */
848 4 : static int rewrite_rg_block(struct fsck_cx *cx, struct lgfs2_rgrp_tree *rg,
849 : uint64_t errblock)
850 : {
851 4 : struct lgfs2_sbd *sdp = cx->sdp;
852 4 : int x = errblock - rg->rt_addr;
853 4 : const char *typedesc = x ? "GFS2_METATYPE_RB" : "GFS2_METATYPE_RG";
854 : ssize_t ret;
855 : char *buf;
856 :
857 4 : log_err(_("Block #%"PRIu64" (0x%"PRIx64") (%d of %"PRIu32") is not %s.\n"),
858 : rg->rt_addr + x, rg->rt_addr + x, x+1, rg->rt_length, typedesc);
859 4 : if (!query(cx, _("Fix the resource group? (y/n)")))
860 0 : return 1;
861 :
862 4 : log_err(_("Attempting to repair the resource group.\n"));
863 :
864 4 : buf = calloc(1, sdp->sd_bsize);
865 4 : if (buf == NULL) {
866 0 : log_err(_("Failed to allocate resource group block: %s"), strerror(errno));
867 0 : return 1;
868 : }
869 4 : ret = pread(sdp->device_fd, buf, sdp->sd_bsize, errblock * sdp->sd_bsize);
870 4 : if (ret != sdp->sd_bsize) {
871 0 : log_err(_("Failed to read resource group block %"PRIu64": %s\n"),
872 : errblock, strerror(errno));
873 0 : free(buf);
874 0 : return 1;
875 : }
876 4 : if (x) {
877 0 : struct gfs2_meta_header mh = {
878 0 : .mh_magic = cpu_to_be32(GFS2_MAGIC),
879 0 : .mh_type = cpu_to_be32(GFS2_METATYPE_RB),
880 0 : .mh_format = cpu_to_be32(GFS2_FORMAT_RB)
881 : };
882 0 : memcpy(buf, &mh, sizeof(mh));
883 : } else {
884 4 : rg->rt_free = rg->rt_data;
885 4 : lgfs2_rgrp_out(rg, buf);
886 : }
887 4 : ret = pwrite(sdp->device_fd, buf, sdp->sd_bsize, errblock * sdp->sd_bsize);
888 4 : if (ret != sdp->sd_bsize) {
889 0 : log_err(_("Failed to write resource group block %"PRIu64": %s\n"),
890 : errblock, strerror(errno));
891 0 : free(buf);
892 0 : return 1;
893 : }
894 4 : free(buf);
895 4 : return 0;
896 : }
897 :
898 : /*
899 : * expect_rindex_sanity - the rindex file seems trustworthy, so use those
900 : * values as our expected values and assume the
901 : * damage is only to the rgrps themselves.
902 : */
903 4 : static int expect_rindex_sanity(struct lgfs2_sbd *sdp, int *num_rgs)
904 : {
905 4 : struct osi_node *n, *next = NULL;
906 : struct lgfs2_rgrp_tree *rgd, *exp;
907 :
908 4 : *num_rgs = sdp->md.riinode->i_size / sizeof(struct gfs2_rindex) ;
909 188 : for (n = osi_first(&sdp->rgtree); n; n = next) {
910 184 : next = osi_next(n);
911 184 : rgd = (struct lgfs2_rgrp_tree *)n;
912 184 : exp = lgfs2_rgrp_insert(&rgcalc, rgd->rt_addr);
913 184 : if (exp == NULL) {
914 0 : fprintf(stderr, "Out of memory in %s\n", __FUNCTION__);
915 0 : exit(-1);
916 : }
917 184 : exp->rt_data0 = rgd->rt_data0;
918 184 : exp->rt_data = rgd->rt_data;
919 184 : exp->rt_length = rgd->rt_length;
920 184 : exp->rt_bitbytes = rgd->rt_bitbytes;
921 184 : exp->rt_flags = rgd->rt_flags;
922 184 : exp->rt_free = rgd->rt_free;
923 184 : exp->rt_igeneration = rgd->rt_igeneration;
924 184 : exp->rt_dinodes = rgd->rt_dinodes;
925 184 : exp->rt_skip = rgd->rt_skip;
926 184 : exp->rt_bits = NULL;
927 184 : lgfs2_compute_bitstructs(sdp->sd_bsize, exp);
928 : }
929 4 : nrgrp = *num_rgs;
930 4 : return 0;
931 : }
932 :
933 : /*
934 : * rindex_repair - try to repair a damaged rg index (rindex)
935 : * trust_lvl - This is how much we trust the rindex file.
936 : * BLIND_FAITH means we take the rindex at face value.
937 : * OPEN_MINDED means it might be okay, but we should verify it.
938 : * DISTRUST means it's not to be trusted, so we should go to
939 : * greater lengths to build it from scratch.
940 : * INDIGNATION means we have corruption, but the file system
941 : * was converted from GFS via gfs2_convert, and its rgrps are
942 : * not on nice boundaries thanks to previous gfs_grow ops. Lovely.
943 : */
944 68 : int rindex_repair(struct fsck_cx *cx, int trust_lvl, int *ok)
945 : {
946 68 : struct lgfs2_sbd *sdp = cx->sdp;
947 68 : struct osi_node *n, *next = NULL, *e, *enext;
948 : int error, discrepancies, percent;
949 68 : int calc_rg_count = 0, rg;
950 : struct gfs2_rindex buf;
951 :
952 68 : if (trust_lvl == BLIND_FAITH)
953 56 : return 0;
954 12 : if (trust_lvl == YE_OF_LITTLE_FAITH) { /* if rindex seems sane */
955 : /* Don't free previous incarnations in memory, if any.
956 : * We need them to copy in the next function:
957 : * lgfs2_rgrp_free(&sdp->rglist); */
958 6 : if (!(*ok)) {
959 2 : log_err(_("The rindex file does not meet our "
960 : "expectations.\n"));
961 2 : return -1;
962 : }
963 4 : error = expect_rindex_sanity(sdp, &calc_rg_count);
964 4 : if (error) {
965 0 : lgfs2_rgrp_free(sdp, &rgcalc);
966 0 : return error;
967 : }
968 6 : } else if (trust_lvl == OPEN_MINDED) { /* If we can't trust RG index */
969 : /* Free previous incarnations in memory, if any. */
970 2 : lgfs2_rgrp_free(sdp, &sdp->rgtree);
971 :
972 : /* Calculate our own RG index for comparison */
973 2 : error = rindex_calculate(sdp, &calc_rg_count);
974 2 : if (error) { /* If calculated RGs don't match the fs */
975 0 : lgfs2_rgrp_free(sdp, &rgcalc);
976 0 : return -1;
977 : }
978 4 : } else if (trust_lvl == DISTRUST) { /* If we can't trust RG index */
979 : /* Free previous incarnations in memory, if any. */
980 2 : lgfs2_rgrp_free(sdp, &sdp->rgtree);
981 :
982 2 : error = rindex_rebuild(cx, &calc_rg_count, 0);
983 2 : if (error) {
984 2 : log_crit( _("Error rebuilding rgrp list.\n"));
985 2 : lgfs2_rgrp_free(sdp, &rgcalc);
986 2 : return -1;
987 : }
988 2 : } else if (trust_lvl == INDIGNATION) { /* If we can't trust anything */
989 : /* Free previous incarnations in memory, if any. */
990 2 : lgfs2_rgrp_free(sdp, &sdp->rgtree);
991 :
992 2 : error = rindex_rebuild(cx, &calc_rg_count, 1);
993 2 : if (error) {
994 0 : log_crit( _("Error rebuilding rgrp list.\n"));
995 0 : lgfs2_rgrp_free(sdp, &rgcalc);
996 0 : return -1;
997 : }
998 : }
999 : /* Read in the rindex */
1000 8 : sdp->rgtree.osi_node = NULL; /* Just to be safe */
1001 8 : lgfs2_rindex_read(sdp, &nrgrp, ok);
1002 8 : if (sdp->md.riinode->i_size % sizeof(struct gfs2_rindex)) {
1003 0 : log_warn( _("WARNING: rindex file has an invalid size.\n"));
1004 0 : if (!query(cx, _("Truncate the rindex size? (y/n)"))) {
1005 0 : log_err(_("The rindex was not repaired.\n"));
1006 0 : lgfs2_rgrp_free(sdp, &rgcalc);
1007 0 : lgfs2_rgrp_free(sdp, &sdp->rgtree);
1008 0 : return -1;
1009 : }
1010 0 : sdp->md.riinode->i_size /= sizeof(struct gfs2_rindex);
1011 0 : sdp->md.riinode->i_size *= sizeof(struct gfs2_rindex);
1012 0 : lgfs2_bmodified(sdp->md.riinode->i_bh);
1013 0 : log_err(_("Changing rindex size to %"PRIu64".\n"), sdp->md.riinode->i_size);
1014 : }
1015 8 : log_warn(_("L%d: number of rgs expected = %"PRIu64".\n"), trust_lvl + 1, nrgrp);
1016 8 : if (calc_rg_count != nrgrp) {
1017 : int most_that_fit;
1018 :
1019 2 : log_warn( _("L%d: They don't match; either (1) the fs was "
1020 : "extended, (2) an odd\n"), trust_lvl + 1);
1021 2 : log_warn( _("L%d: rgrp size was used, or (3) we have a corrupt "
1022 : "rg index.\n"), trust_lvl + 1);
1023 : /* If the trust level is OPEN_MINDED, we would have calculated
1024 : the rindex based on the device size. If it's not the same
1025 : number, don't trust it. Complain about the discrepancy,
1026 : then try again with a little more DISTRUST. */
1027 3 : if ((trust_lvl < DISTRUST) ||
1028 1 : !query(cx, _("Attempt to use what rgrps we can? (y/n)"))) {
1029 1 : lgfs2_rgrp_free(sdp, &rgcalc);
1030 1 : lgfs2_rgrp_free(sdp, &sdp->rgtree);
1031 1 : log_err(_("The rindex was not repaired.\n"));
1032 1 : return -1;
1033 : }
1034 : /* We cannot grow rindex at this point. Since pass1 has not
1035 : yet run, we can't allocate blocks. Therefore we must use
1036 : whatever will fix in the space given. */
1037 1 : most_that_fit = sdp->md.riinode->i_size / sizeof(struct gfs2_rindex);
1038 1 : log_debug(_("The most we can fit is %d rgrps\n"),
1039 : most_that_fit);
1040 1 : if (most_that_fit < calc_rg_count)
1041 0 : calc_rg_count = most_that_fit;
1042 1 : log_err(_("Attempting to fix rindex with %d rgrps.\n"),
1043 : calc_rg_count);
1044 : }
1045 : /* ------------------------------------------------------------- */
1046 : /* Now compare the rindex to what we think it should be. */
1047 : /* See how far off our expected values are. If too much, abort. */
1048 : /* The theory is: if we calculated the index to have 32 RGs and */
1049 : /* we have a large number that are completely wrong, we should */
1050 : /* abandon this method of recovery and try a better one. */
1051 : /* ------------------------------------------------------------- */
1052 7 : discrepancies = 0;
1053 7 : for (rg = 0, n = osi_first(&sdp->rgtree), e = osi_first(&rgcalc);
1054 434 : n && e && !fsck_abort && rg < calc_rg_count; rg++) {
1055 : struct lgfs2_rgrp_tree *expected, *actual;
1056 :
1057 427 : next = osi_next(n);
1058 427 : enext = osi_next(e);
1059 :
1060 427 : expected = (struct lgfs2_rgrp_tree *)e;
1061 427 : actual = (struct lgfs2_rgrp_tree *)n;
1062 427 : if (actual->rt_addr < expected->rt_addr) {
1063 5 : n = next;
1064 5 : discrepancies++;
1065 5 : log_info(_("%d addr: 0x%"PRIx64" < 0x%"PRIx64" * mismatch\n"),
1066 : rg + 1, actual->rt_addr, expected->rt_addr);
1067 5 : continue;
1068 422 : } else if (expected->rt_addr < actual->rt_addr) {
1069 76 : e = enext;
1070 76 : discrepancies++;
1071 76 : log_info(_("%d addr: 0x%"PRIx64" > 0x%"PRIx64" * mismatch\n"),
1072 : rg + 1, actual->rt_addr, expected->rt_addr);
1073 76 : continue;
1074 : }
1075 346 : if (actual->rt_length != expected->rt_length ||
1076 344 : actual->rt_data0 != expected->rt_data0 ||
1077 344 : actual->rt_data != expected->rt_data ||
1078 344 : actual->rt_bitbytes != expected->rt_bitbytes) {
1079 2 : discrepancies++;
1080 2 : log_info(_("%d addr: 0x%"PRIx64" 0x%"PRIx64" * has mismatch\n"),
1081 : rg + 1, actual->rt_addr, expected->rt_addr);
1082 : }
1083 346 : n = next;
1084 346 : e = enext;
1085 : }
1086 7 : if (rg) {
1087 : /* Check to see if more than 2% of the rgrps are wrong. */
1088 7 : percent = (discrepancies * 100) / rg;
1089 7 : if (percent > BAD_RG_PERCENT_TOLERANCE) {
1090 1 : log_warn( _("Level %d didn't work. Too many "
1091 : "discrepancies.\n"), trust_lvl + 1);
1092 1 : log_warn( _("%d out of %d rgrps (%d percent) did not "
1093 : "match what was expected.\n"),
1094 : discrepancies, rg, percent);
1095 1 : lgfs2_rgrp_free(sdp, &rgcalc);
1096 1 : lgfs2_rgrp_free(sdp, &sdp->rgtree);
1097 1 : return -1;
1098 : }
1099 : }
1100 6 : log_debug("Calculated %d rgrps: Total: %d Match: %d Mismatch: %d\n",
1101 : calc_rg_count, rg, rg - discrepancies, discrepancies);
1102 : /* ------------------------------------------------------------- */
1103 : /* Now compare the rindex to what we think it should be. */
1104 : /* Our rindex should be pretty predictable unless we've grown */
1105 : /* so look for index problems first before looking at the rgs. */
1106 : /* ------------------------------------------------------------- */
1107 6 : for (rg = 0, n = osi_first(&sdp->rgtree), e = osi_first(&rgcalc);
1108 352 : e && !fsck_abort && rg < calc_rg_count; rg++) {
1109 : struct lgfs2_rgrp_tree *expected, *actual;
1110 :
1111 346 : if (n)
1112 346 : next = osi_next(n);
1113 346 : enext = osi_next(e);
1114 346 : expected = (struct lgfs2_rgrp_tree *)e;
1115 346 : actual = (struct lgfs2_rgrp_tree *)n;
1116 :
1117 : /* If the next "actual" rgrp in memory is too far away,
1118 : fill in a new one with the expected value. -or-
1119 : If we ran out of actual rindex entries due to rindex
1120 : damage, fill in a new one with the expected values. */
1121 346 : if (!n || /* end of actual rindex */
1122 346 : expected->rt_addr < actual->rt_addr) {
1123 1 : log_err(_("Entry missing from rindex: 0x%"PRIx64"\n"),
1124 : expected->rt_addr);
1125 1 : actual = lgfs2_rgrp_insert(&sdp->rgtree, expected->rt_addr);
1126 1 : if (!actual) {
1127 0 : log_err(_("Out of memory!\n"));
1128 0 : break;
1129 : }
1130 1 : rindex_modified = 1;
1131 1 : next = n; /* Ensure that the old actual gets checked
1132 : against a new expected, since we added */
1133 : } else {
1134 345 : ri_compare(rg, actual, expected, rt_addr, PRIx64);
1135 345 : ri_compare(rg, actual, expected, rt_length, PRIx32);
1136 345 : ri_compare(rg, actual, expected, rt_data0, PRIx64);
1137 345 : ri_compare(rg, actual, expected, rt_data, PRIx32);
1138 345 : ri_compare(rg, actual, expected, rt_bitbytes, PRIx32);
1139 : }
1140 : /* If we modified the index, write it back to disk. */
1141 346 : if (rindex_modified) {
1142 2 : if (query(cx, _("Fix the index? (y/n)"))) {
1143 2 : lgfs2_rindex_out(expected, (char *)&buf);
1144 2 : lgfs2_writei(sdp->md.riinode, (char *)&buf,
1145 : rg * sizeof(struct gfs2_rindex),
1146 : sizeof(struct gfs2_rindex));
1147 2 : actual->rt_addr = expected->rt_addr;
1148 2 : actual->rt_length = expected->rt_length;
1149 2 : actual->rt_data0 = expected->rt_data0;
1150 2 : actual->rt_data = expected->rt_data;
1151 2 : actual->rt_bitbytes = expected->rt_bitbytes;
1152 : /* If our rindex was hosed, ri_length is bad */
1153 : /* Therefore, lgfs2_compute_bitstructs might */
1154 : /* have malloced the wrong length for bitmap */
1155 : /* buffers. So we have to redo it. */
1156 2 : if (actual->rt_bits) {
1157 0 : free(actual->rt_bits);
1158 0 : actual->rt_bits = NULL;
1159 : }
1160 : }
1161 : else
1162 0 : log_err( _("rindex not fixed.\n"));
1163 2 : lgfs2_compute_bitstructs(sdp->sd_bsize, actual);
1164 2 : rindex_modified = 0;
1165 : }
1166 346 : e = enext;
1167 346 : if (n)
1168 346 : n = next;
1169 : }
1170 : /* ------------------------------------------------------------- */
1171 : /* Read the real RGs and check their integrity. */
1172 : /* Now we can somewhat trust the rindex and the RG addresses, */
1173 : /* so let's read them in, check them and optionally fix them. */
1174 : /* ------------------------------------------------------------- */
1175 352 : for (rg = 0, n = osi_first(&sdp->rgtree); n && !fsck_abort &&
1176 692 : rg < calc_rg_count; n = next, rg++) {
1177 : struct lgfs2_rgrp_tree *rgd;
1178 346 : uint64_t prev_err = 0, errblock;
1179 : int i;
1180 :
1181 346 : next = osi_next(n);
1182 : /* Now we try repeatedly to read in the rg. For every block */
1183 : /* we encounter that has errors, repair it and try again. */
1184 346 : i = 0;
1185 : do {
1186 350 : rgd = (struct lgfs2_rgrp_tree *)n;
1187 350 : errblock = lgfs2_rgrp_read(sdp, rgd);
1188 350 : if (errblock) {
1189 4 : if (errblock == prev_err)
1190 0 : break;
1191 4 : prev_err = errblock;
1192 4 : rewrite_rg_block(cx, rgd, errblock);
1193 : } else {
1194 346 : lgfs2_rgrp_relse(sdp, rgd);
1195 346 : break;
1196 : }
1197 4 : i++;
1198 4 : } while (i < rgd->rt_length);
1199 : }
1200 6 : lgfs2_rgrp_free(sdp, &rgcalc);
1201 6 : lgfs2_rgrp_free(sdp, &sdp->rgtree);
1202 : /* We shouldn't need to worry about getting the user's permission to
1203 : make changes here. If b_modified is true, they already gave their
1204 : permission. */
1205 6 : if (sdp->md.riinode->i_bh->b_modified) {
1206 0 : log_debug("Syncing rindex inode changes to disk.\n");
1207 0 : lgfs2_dinode_out(sdp->md.riinode, sdp->md.riinode->i_bh->b_data);
1208 0 : lgfs2_bwrite(sdp->md.riinode->i_bh);
1209 : }
1210 6 : return 0;
1211 : }
|