Line data Source code
1 : #include "clusterautoconfig.h"
2 :
3 : #include <inttypes.h>
4 : #include <stdio.h>
5 : #include <stdlib.h>
6 : #include <string.h>
7 : #include <sys/types.h>
8 : #include <sys/stat.h>
9 : #include <unistd.h>
10 : #include <libintl.h>
11 : #include <ctype.h>
12 : #include <fcntl.h>
13 : #define _(String) gettext(String)
14 :
15 : #include <logging.h>
16 : #include "libgfs2.h"
17 : #include "link.h"
18 : #include "osi_tree.h"
19 : #include "fsck.h"
20 : #include "util.h"
21 : #include "metawalk.h"
22 : #include "inode_hash.h"
23 :
24 : #define COMFORTABLE_BLKS 5242880 /* 20GB in 4K blocks */
25 :
26 : /* There are two bitmaps: (1) The "blockmap" that fsck uses to keep track of
27 : what block type has been discovered, and (2) The rgrp bitmap. Function
28 : blockmap_set is used to set the former and gfs2_set_bitmap
29 : is used to set the latter. The two must be kept in sync, otherwise
30 : you'll get bitmap mismatches. This function checks the status of the
31 : bitmap whenever the blockmap changes, and fixes it accordingly. */
32 1982897 : int check_n_fix_bitmap(struct fsck_cx *cx, struct lgfs2_rgrp_tree *rgd,
33 : uint64_t blk, int error_on_dinode, int new_state)
34 : {
35 1982897 : struct lgfs2_sbd *sdp = cx->sdp;
36 : int old_state;
37 1982897 : int treat_as_inode = 0;
38 1982897 : int rewrite_rgrp = 0;
39 1982897 : const char *allocdesc[5] = {"free", "data", "unlinked", "inode", "reserved"};
40 : static struct lgfs2_rgrp_tree *prevrgd = NULL;
41 :
42 1982897 : if (prevrgd && rgrp_contains_block(prevrgd, blk)) {
43 1935030 : rgd = prevrgd;
44 47867 : } else if (rgd == NULL || !rgrp_contains_block(rgd, blk)) {
45 519 : rgd = lgfs2_blk2rgrpd(sdp, blk);
46 519 : prevrgd = rgd;
47 : }
48 1982897 : old_state = lgfs2_get_bitmap(sdp, blk, rgd);
49 1982897 : if (old_state < 0) {
50 0 : log_err(_("Block %"PRIu64" (0x%"PRIx64") is not represented in the "
51 : "system bitmap; part of an rgrp or superblock.\n"),
52 : blk, blk);
53 0 : return -1;
54 : }
55 1982897 : if (old_state == new_state)
56 1982894 : return 0;
57 :
58 3 : if (error_on_dinode && old_state == GFS2_BLKST_DINODE &&
59 : new_state != GFS2_BLKST_FREE) {
60 0 : log_debug(_("Reference as '%s' to block %"PRIu64" (0x%"PRIx64") which "
61 : "was marked as dinode. Needs further investigation.\n"),
62 : allocdesc[new_state], blk, blk);
63 0 : return 1;
64 : }
65 : /* Keep these messages as short as possible, or the output gets to be
66 : huge and unmanageable. */
67 3 : log_err(_("Block %"PRIu64" (0x%"PRIx64") was '%s', should be %s.\n"),
68 : blk, blk, allocdesc[old_state], allocdesc[new_state]);
69 3 : if (!query(cx, _("Fix the bitmap? (y/n)"))) {
70 0 : log_err( _("The bitmap inconsistency was ignored.\n"));
71 0 : return 0;
72 : }
73 : /* If the new bitmap state is free (and therefore the old state was
74 : not) we have to add to the free space in the rgrp. If the old
75 : bitmap state was free (and therefore it no longer is) we have to
76 : subtract to the free space. If the type changed from dinode to
77 : data or data to dinode, no change in free space. */
78 3 : lgfs2_set_bitmap(rgd, blk, new_state);
79 3 : if (new_state == GFS2_BLKST_FREE) {
80 2 : rgd->rt_free++;
81 2 : rewrite_rgrp = 1;
82 1 : } else if (old_state == GFS2_BLKST_FREE) {
83 1 : rgd->rt_free--;
84 1 : rewrite_rgrp = 1;
85 : }
86 : /* If we're freeing a dinode, get rid of the data structs for it. */
87 3 : if (old_state == GFS2_BLKST_DINODE ||
88 2 : old_state == GFS2_BLKST_UNLINKED) {
89 : struct dir_info *dt;
90 : struct inode_info *ii;
91 :
92 2 : dt = dirtree_find(cx, blk);
93 2 : if (dt) {
94 0 : dirtree_delete(cx, dt);
95 0 : treat_as_inode = 1;
96 : }
97 2 : ii = inodetree_find(cx, blk);
98 2 : if (ii) {
99 0 : inodetree_delete(cx, ii);
100 0 : treat_as_inode = 1;
101 : } else {
102 2 : treat_as_inode = 1;
103 : }
104 2 : if (old_state == GFS2_BLKST_DINODE) {
105 2 : if (treat_as_inode && rgd->rt_dinodes > 0)
106 2 : rgd->rt_dinodes--;
107 2 : rewrite_rgrp = 1;
108 : }
109 2 : link1_set(&nlink1map, blk, 0);
110 1 : } else if (new_state == GFS2_BLKST_DINODE) {
111 1 : rgd->rt_dinodes++;
112 1 : rewrite_rgrp = 1;
113 : }
114 3 : if (rewrite_rgrp) {
115 3 : lgfs2_rgrp_out(rgd, rgd->rt_bits[0].bi_data);
116 3 : rgd->rt_bits[0].bi_modified = 1;
117 : }
118 3 : log_err( _("The bitmap was fixed.\n"));
119 3 : return 0;
120 : }
121 :
122 : /*
123 : * _fsck_bitmap_set - Mark a block in the bitmap, and adjust free space.
124 : */
125 1982706 : int _fsck_bitmap_set(struct fsck_cx *cx, struct lgfs2_inode *ip, uint64_t bblock,
126 : const char *btype, int mark,
127 : int error_on_dinode, const char *caller, int fline)
128 : {
129 : int error;
130 : static int prev_ino_addr = 0;
131 : static int prev_mark = 0;
132 : static int prevcount = 0;
133 : static const char *prev_caller = NULL;
134 :
135 1982706 : if (print_level >= MSG_DEBUG) {
136 0 : if ((ip->i_num.in_addr == prev_ino_addr) &&
137 0 : (mark == prev_mark) && caller == prev_caller) {
138 0 : log_info("(0x%"PRIx64") ", bblock);
139 0 : prevcount++;
140 0 : if (prevcount > 10) {
141 0 : log_info("\n");
142 0 : prevcount = 0;
143 : }
144 : /* I'm circumventing the log levels here on purpose to make the
145 : output easier to debug. */
146 0 : } else if (ip->i_num.in_addr == bblock) {
147 0 : if (prevcount) {
148 0 : log_info("\n");
149 0 : prevcount = 0;
150 : }
151 0 : printf(_("(%s:%d) %s inode found at block (0x%"PRIx64"): marking as '%s'\n"),
152 : caller, fline, btype, ip->i_num.in_addr, block_type_string(mark));
153 : } else {
154 0 : if (prevcount) {
155 0 : log_info("\n");
156 0 : prevcount = 0;
157 : }
158 0 : printf(_("(%s:%d) inode (0x%"PRIx64") references %s block"
159 : " (0x%"PRIx64"): marking as '%s'\n"),
160 : caller, fline, ip->i_num.in_addr, btype, bblock, block_type_string(mark));
161 : }
162 0 : prev_ino_addr = ip->i_num.in_addr;
163 0 : prev_mark = mark;
164 0 : prev_caller = caller;
165 : }
166 1982706 : error = check_n_fix_bitmap(cx, ip->i_rgd, bblock,
167 : error_on_dinode, mark);
168 1982706 : if (error < 0)
169 0 : log_err(_("This block is not represented in the bitmap.\n"));
170 1982706 : return error;
171 : }
172 :
173 0 : struct duptree *dupfind(struct fsck_cx *cx, uint64_t block)
174 : {
175 0 : struct osi_node *node = cx->dup_blocks.osi_node;
176 :
177 0 : while (node) {
178 0 : struct duptree *dt = (struct duptree *)node;
179 :
180 0 : if (block < dt->block)
181 0 : node = node->osi_left;
182 0 : else if (block > dt->block)
183 0 : node = node->osi_right;
184 : else
185 0 : return dt;
186 : }
187 0 : return NULL;
188 : }
189 :
190 2304 : struct lgfs2_inode *fsck_system_inode(struct lgfs2_sbd *sdp, uint64_t block)
191 : {
192 2304 : if (lf_dip && lf_dip->i_num.in_addr == block)
193 0 : return lf_dip;
194 2304 : return lgfs2_is_system_inode(sdp, block);
195 : }
196 :
197 : /* fsck_load_inode - same as gfs2_load_inode() in libgfs2 but system inodes
198 : get special treatment. */
199 0 : struct lgfs2_inode *fsck_load_inode(struct lgfs2_sbd *sdp, uint64_t block)
200 : {
201 0 : struct lgfs2_inode *ip = NULL;
202 :
203 0 : ip = fsck_system_inode(sdp, block);
204 0 : if (ip)
205 0 : return ip;
206 0 : return lgfs2_inode_read(sdp, block);
207 : }
208 :
209 : /* fsck_inode_get - same as inode_get() in libgfs2 but system inodes
210 : get special treatment. */
211 189 : struct lgfs2_inode *fsck_inode_get(struct lgfs2_sbd *sdp, struct lgfs2_rgrp_tree *rgd,
212 : struct lgfs2_buffer_head *bh)
213 : {
214 : struct lgfs2_inode *sysip;
215 : struct lgfs2_inode *ip;
216 :
217 189 : sysip = fsck_system_inode(sdp, bh->b_blocknr);
218 189 : if (sysip)
219 0 : return sysip;
220 :
221 189 : ip = lgfs2_inode_get(sdp, bh);
222 189 : if (ip) {
223 189 : ip->i_rgd = rgd;
224 189 : ip->i_bh = bh;
225 : }
226 189 : return ip;
227 : }
228 :
229 : /* fsck_inode_put - same as lgfs2_inode_put() in libgfs2 but system inodes
230 : get special treatment. */
231 378 : void fsck_inode_put(struct lgfs2_inode **ip_in)
232 : {
233 378 : struct lgfs2_inode *ip = *ip_in;
234 : struct lgfs2_inode *sysip;
235 :
236 378 : sysip = fsck_system_inode(ip->i_sbd, ip->i_num.in_addr);
237 378 : if (!sysip)
238 378 : lgfs2_inode_put(ip_in);
239 378 : }
240 :
241 : /**
242 : * dirent_repair - attempt to repair a corrupt directory entry.
243 : * @bh - The buffer header that contains the bad dirent
244 : * @dh - The directory entry in native format
245 : * @dent - The directory entry in on-disk format
246 : * @type - Type of directory (DIR_LINEAR or DIR_EXHASH)
247 : * @first - TRUE if this is the first dirent in the buffer
248 : *
249 : * This function tries to repair a corrupt directory entry. All we
250 : * know at this point is that the length field is wrong.
251 : */
252 0 : static int dirent_repair(struct lgfs2_inode *ip, struct lgfs2_buffer_head *bh,
253 : struct lgfs2_dirent *d, struct gfs2_dirent *dent,
254 : int type, int first)
255 : {
256 : char *bh_end, *p;
257 0 : int calc_de_name_len = 0;
258 :
259 : /* If this is a sentinel, just fix the length and move on */
260 0 : if (first && !d->dr_inum.in_formal_ino) { /* Is it a sentinel? */
261 0 : if (type == DIR_LINEAR)
262 0 : d->dr_rec_len = ip->i_sbd->sd_bsize -
263 : sizeof(struct gfs2_dinode);
264 : else
265 0 : d->dr_rec_len = ip->i_sbd->sd_bsize -
266 : sizeof(struct gfs2_leaf);
267 : } else {
268 0 : bh_end = bh->b_data + ip->i_sbd->sd_bsize;
269 : /* first, figure out a probable name length */
270 0 : p = (char *)dent + sizeof(struct gfs2_dirent);
271 0 : while (*p && /* while there's a non-zero char and */
272 0 : isprint(*p) && /* a printable character and */
273 : p < bh_end) { /* not past end of buffer */
274 0 : calc_de_name_len++;
275 0 : p++;
276 : }
277 0 : if (!calc_de_name_len)
278 0 : return 1;
279 : /* There can often be noise at the end, so only */
280 : /* Trust the shorter of the two in case we have too much */
281 : /* Or rather, only trust ours if it's shorter. */
282 0 : if (!d->dr_name_len || d->dr_name_len > GFS2_FNAMESIZE ||
283 0 : calc_de_name_len < d->dr_name_len) /* if dent is hosed */
284 0 : d->dr_name_len = calc_de_name_len; /* use ours */
285 0 : d->dr_rec_len = GFS2_DIRENT_SIZE(d->dr_name_len);
286 : }
287 0 : lgfs2_dirent_out(d, dent);
288 0 : lgfs2_bmodified(bh);
289 0 : return 0;
290 : }
291 :
292 : /**
293 : * dirblk_truncate - truncate a directory block
294 : */
295 0 : static void dirblk_truncate(struct lgfs2_inode *ip, struct gfs2_dirent *fixb,
296 : struct lgfs2_buffer_head *bh)
297 : {
298 : char *bh_end;
299 : struct lgfs2_dirent d;
300 :
301 0 : bh_end = bh->b_data + ip->i_sbd->sd_bsize;
302 : /* truncate the block to save the most dentries. To do this we
303 : have to patch the previous dent. */
304 0 : lgfs2_dirent_in(&d, fixb);
305 0 : d.dr_rec_len = bh_end - (char *)fixb;
306 0 : lgfs2_dirent_out(&d, fixb);
307 0 : lgfs2_bmodified(bh);
308 0 : }
309 :
310 : /*
311 : * check_entries - check directory entries for a given block
312 : *
313 : * @ip - dinode associated with this leaf block
314 : * bh - buffer for the leaf block
315 : * type - type of block this is (linear or exhash)
316 : * @count - set to the count entries
317 : * @lindex - the last inde
318 : * @pass - structure pointing to pass-specific functions
319 : *
320 : * returns: 0 - good block or it was repaired to be good
321 : * -1 - error occurred
322 : */
323 512 : static int check_entries(struct fsck_cx *cx, struct lgfs2_inode *ip, struct lgfs2_buffer_head *bh,
324 : int type, uint32_t *count, int lindex,
325 : struct metawalk_fxns *pass)
326 : {
327 : struct gfs2_dirent *dent, *prev;
328 : struct lgfs2_dirent d;
329 512 : int error = 0;
330 : char *bh_end;
331 : char *filename;
332 512 : int first = 1;
333 :
334 512 : bh_end = bh->b_data + ip->i_sbd->sd_bsize;
335 :
336 512 : if (type == DIR_LINEAR) {
337 493 : dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_dinode));
338 : } else {
339 19 : dent = (struct gfs2_dirent *)(bh->b_data + sizeof(struct gfs2_leaf));
340 19 : log_debug(_("Checking leaf %"PRIu64" (0x%"PRIx64")\n"),
341 : bh->b_blocknr, bh->b_blocknr);
342 : }
343 :
344 512 : prev = NULL;
345 512 : if (!pass->check_dentry)
346 0 : return 0;
347 :
348 : while (1) {
349 2259 : if (skip_this_pass || fsck_abort)
350 0 : return FSCK_OK;
351 2259 : lgfs2_dirent_in(&d, dent);
352 2259 : filename = (char *)dent + sizeof(struct gfs2_dirent);
353 :
354 2259 : if (d.dr_rec_len < sizeof(struct gfs2_dirent) +
355 2259 : d.dr_name_len ||
356 2259 : (d.dr_inum.in_formal_ino && !d.dr_name_len && !first)) {
357 0 : log_err(_("Directory block %"PRIu64" (0x%"PRIx64"), "
358 : "entry %d of directory %"PRIu64" (0x%"PRIx64") "
359 : "is corrupt.\n"),
360 : bh->b_blocknr, bh->b_blocknr, (*count) + 1,
361 : ip->i_num.in_addr, ip->i_num.in_addr);
362 0 : if (query(cx, _("Attempt to repair it? (y/n) "))) {
363 0 : if (dirent_repair(ip, bh, &d, dent, type,
364 : first)) {
365 0 : if (first) /* make a new sentinel */
366 0 : dirblk_truncate(ip, dent, bh);
367 : else
368 0 : dirblk_truncate(ip, prev, bh);
369 0 : log_err( _("Unable to repair corrupt "
370 : "directory entry; the "
371 : "entry was removed "
372 : "instead.\n"));
373 0 : return 0;
374 : } else {
375 0 : log_err( _("Corrupt directory entry "
376 : "repaired.\n"));
377 : /* keep looping through dentries */
378 : }
379 : } else {
380 0 : log_err( _("Corrupt directory entry ignored, "
381 : "stopped after checking %d entries.\n"),
382 : *count);
383 0 : return 0;
384 : }
385 : }
386 2259 : if (!d.dr_inum.in_formal_ino) {
387 0 : if (first) {
388 0 : log_debug( _("First dirent is a sentinel (place holder).\n"));
389 0 : first = 0;
390 : } else {
391 0 : log_err(_("Directory entry with inode number of "
392 : "zero in leaf %"PRIu64" (0x%"PRIx64") of "
393 : "directory %"PRIu64" (0x%"PRIx64")!\n"),
394 : bh->b_blocknr, bh->b_blocknr,
395 : ip->i_num.in_addr, ip->i_num.in_addr);
396 0 : if (query(cx, _("Attempt to remove it? (y/n) "))) {
397 0 : dirblk_truncate(ip, prev, bh);
398 0 : log_err(_("The corrupt directory "
399 : "entry was removed.\n"));
400 : } else {
401 0 : log_err( _("Corrupt directory entry "
402 : "ignored, stopped after "
403 : "checking %d entries.\n"),
404 : *count);
405 : }
406 0 : return 0;
407 : }
408 : } else {
409 2259 : if (!d.dr_inum.in_addr && first) { /* reverse sentinel */
410 0 : log_debug( _("First dirent is a Sentinel (place holder).\n"));
411 : /* Swap the two to silently make it a proper sentinel */
412 0 : d.dr_inum.in_addr = d.dr_inum.in_formal_ino;
413 0 : d.dr_inum.in_formal_ino = 0;
414 0 : lgfs2_dirent_out(&d, dent);
415 0 : lgfs2_bmodified(bh);
416 : /* Mark dirent buffer as modified */
417 0 : first = 0;
418 : } else {
419 2259 : error = pass->check_dentry(cx, ip, dent, prev, bh,
420 : filename, count,
421 : &lindex,
422 : pass->private);
423 2259 : if (error < 0) {
424 0 : stack;
425 0 : return error;
426 : }
427 : }
428 : }
429 :
430 2259 : if ((char *)dent + d.dr_rec_len >= bh_end){
431 512 : log_debug(_("Last entry processed for %"PRIu64"->%"PRIu64
432 : "(0x%"PRIx64"->0x%"PRIx64"), di_blocks=%"PRIu64".\n"),
433 : ip->i_num.in_addr, bh->b_blocknr, ip->i_num.in_addr,
434 : bh->b_blocknr, ip->i_blocks);
435 512 : break;
436 : }
437 :
438 : /* If we didn't clear the dentry, or if we did, but it
439 : * was the first dentry, set prev */
440 1747 : if (!error || first)
441 1747 : prev = dent;
442 1747 : first = 0;
443 1747 : dent = (struct gfs2_dirent *)((char *)dent + d.dr_rec_len);
444 : }
445 512 : return 0;
446 : }
447 :
448 : /**
449 : * check_leaf - check a leaf block for errors
450 : * Reads in the leaf block
451 : * Leaves the buffer around for further analysis (caller must lgfs2_brelse)
452 : */
453 37 : int check_leaf(struct fsck_cx *cx, struct lgfs2_inode *ip, int lindex, struct metawalk_fxns *pass,
454 : uint64_t *leaf_no, struct lgfs2_leaf *leaf, int *ref_count)
455 : {
456 37 : int error = 0, fix;
457 37 : struct lgfs2_buffer_head *lbh = NULL;
458 : struct gfs2_leaf *lfp;
459 37 : uint32_t count = 0;
460 37 : struct lgfs2_sbd *sdp = ip->i_sbd;
461 : const char *msg;
462 37 : int di_depth = ip->i_depth;
463 :
464 : /* Make sure the block number is in range. */
465 37 : if (!valid_block_ip(ip, *leaf_no)) {
466 0 : log_err( _("Leaf block #%"PRIu64" (0x%"PRIx64") is out of range for "
467 : "directory #%"PRIu64" (0x%"PRIx64") at index %d (0x%x).\n"),
468 : *leaf_no, *leaf_no, ip->i_num.in_addr, ip->i_num.in_addr,
469 : lindex, lindex);
470 0 : msg = _("that is out of range");
471 0 : goto bad_leaf;
472 : }
473 :
474 : /* Try to read in the leaf block. */
475 37 : lbh = lgfs2_bread(sdp, *leaf_no);
476 : /* Make sure it's really a valid leaf block. */
477 37 : if (lgfs2_check_meta(lbh->b_data, GFS2_METATYPE_LF)) {
478 0 : msg = _("that is not really a leaf");
479 0 : goto bad_leaf;
480 : }
481 37 : if (pass->check_leaf_depth)
482 9 : error = pass->check_leaf_depth(cx, ip, *leaf_no, *ref_count, lbh);
483 :
484 37 : if (error >= 0 && pass->check_leaf) {
485 18 : error = pass->check_leaf(cx, ip, *leaf_no, pass->private);
486 18 : if (error == -EEXIST) {
487 0 : log_info(_("Previous reference to leaf %"PRIu64" (0x%"PRIx64") "
488 : "has already checked it; skipping.\n"),
489 : *leaf_no, *leaf_no);
490 0 : lgfs2_brelse(lbh);
491 0 : return error;
492 : }
493 : }
494 : /* Early versions of GFS2 had an endianess bug in the kernel that set
495 : lf_dirent_format to cpu_to_be16(GFS2_FORMAT_DE). This was fixed
496 : to use cpu_to_be32(), but we should check for incorrect values and
497 : replace them with the correct value. */
498 :
499 37 : lgfs2_leaf_in(leaf, lbh->b_data);
500 37 : if (leaf->lf_dirent_format == (GFS2_FORMAT_DE << 16)) {
501 0 : log_debug( _("incorrect lf_dirent_format at leaf #%" PRIu64
502 : "\n"), *leaf_no);
503 0 : leaf->lf_dirent_format = GFS2_FORMAT_DE;
504 0 : lgfs2_leaf_out(leaf, lbh->b_data);
505 0 : lgfs2_bmodified(lbh);
506 0 : log_debug( _("Fixing lf_dirent_format.\n"));
507 : }
508 :
509 37 : lfp = (struct gfs2_leaf *)lbh->b_data;
510 : /* Make sure it's really a leaf. */
511 37 : if (be32_to_cpu(lfp->lf_header.mh_type) != GFS2_METATYPE_LF) {
512 0 : log_err(_("Inode %"PRIu64" (0x%"PRIx64") points to bad leaf %"PRIu64
513 : " (0x%"PRIx64").\n"),
514 : ip->i_num.in_addr, ip->i_num.in_addr, *leaf_no, *leaf_no);
515 0 : msg = _("that is not a leaf");
516 0 : goto bad_leaf;
517 : }
518 :
519 37 : if (pass->check_dentry && is_dir(ip)) {
520 19 : error = check_entries(cx, ip, lbh, DIR_EXHASH, &count, lindex,
521 : pass);
522 :
523 19 : if (skip_this_pass || fsck_abort)
524 0 : goto out;
525 :
526 19 : if (error < 0) {
527 0 : stack;
528 0 : goto out; /* This seems wrong: needs investigation */
529 : }
530 :
531 19 : if (count == leaf->lf_entries)
532 19 : goto out;
533 :
534 : /* release and re-read the leaf in case check_entries
535 : changed it. */
536 0 : lgfs2_brelse(lbh);
537 0 : lbh = lgfs2_bread(sdp, *leaf_no);
538 0 : lgfs2_leaf_in(leaf, lbh->b_data);
539 0 : if (count != leaf->lf_entries) {
540 0 : log_err(_("Leaf %"PRIu64" (0x%"PRIx64") entry count in "
541 : "directory %"PRIu64" (0x%"PRIx64") does not match "
542 : "number of entries found - is %u, found %u\n"),
543 : *leaf_no, *leaf_no, ip->i_num.in_addr, ip->i_num.in_addr,
544 : leaf->lf_entries, count);
545 0 : if (query(cx, _("Update leaf entry count? (y/n) "))) {
546 0 : leaf->lf_entries = count;
547 0 : lgfs2_leaf_out(leaf, lbh->b_data);
548 0 : lgfs2_bmodified(lbh);
549 0 : log_warn( _("Leaf entry count updated\n"));
550 : } else
551 0 : log_err( _("Leaf entry count left in "
552 : "inconsistent state\n"));
553 : }
554 : }
555 18 : out:
556 37 : if (di_depth < ip->i_depth) {
557 0 : log_debug(_("Depth of directory %"PRIu64" (0x%"PRIx64") changed from "
558 : "%d to %d; adjusting ref_count from %d to %d\n"),
559 : ip->i_num.in_addr, ip->i_num.in_addr, di_depth, ip->i_depth,
560 : *ref_count, (*ref_count) << (ip->i_depth - di_depth));
561 0 : (*ref_count) <<= (ip->i_depth - di_depth);
562 : }
563 37 : lgfs2_brelse(lbh);
564 37 : if (error < 0)
565 0 : return error;
566 37 : return 0;
567 :
568 0 : bad_leaf:
569 0 : if (lbh)
570 0 : lgfs2_brelse(lbh);
571 0 : if (pass->repair_leaf) {
572 : /* The leaf we read in is bad so we need to repair it. */
573 0 : fix = pass->repair_leaf(cx, ip, leaf_no, lindex, *ref_count, msg);
574 0 : if (fix < 0)
575 0 : return fix;
576 :
577 : }
578 0 : if (di_depth < ip->i_depth) {
579 0 : log_debug(_("Depth of directory %"PRIu64" (0x%"PRIx64") changed from "
580 : "%d to %d. Adjusting ref_count from %d to %d\n"),
581 : ip->i_num.in_addr, ip->i_num.in_addr, di_depth, ip->i_depth,
582 : *ref_count, (*ref_count) << (ip->i_depth - di_depth));
583 0 : (*ref_count) <<= (ip->i_depth - di_depth);
584 : }
585 0 : return 1;
586 : }
587 :
588 2352 : static int u64cmp(const void *p1, const void *p2)
589 : {
590 2352 : uint64_t a = *(uint64_t *)p1;
591 2352 : uint64_t b = *(uint64_t *)p2;
592 :
593 2352 : if (a > b)
594 64 : return 1;
595 2288 : if (a < b)
596 64 : return -1;
597 :
598 2224 : return 0;
599 : }
600 :
601 29 : static void dir_leaf_reada(struct lgfs2_inode *ip, __be64 *tbl, unsigned hsize)
602 : {
603 29 : uint64_t *t = alloca(hsize * sizeof(uint64_t));
604 : uint64_t leaf_no;
605 29 : struct lgfs2_sbd *sdp = ip->i_sbd;
606 29 : unsigned n = 0;
607 : unsigned i;
608 :
609 957 : for (i = 0; i < hsize; i++) {
610 928 : leaf_no = be64_to_cpu(tbl[i]);
611 928 : if (valid_block_ip(ip, leaf_no))
612 928 : t[n++] = leaf_no * sdp->sd_bsize;
613 : }
614 29 : qsort(t, n, sizeof(uint64_t), u64cmp);
615 957 : for (i = 0; i < n; i++)
616 928 : (void)posix_fadvise(sdp->device_fd, t[i], sdp->sd_bsize, POSIX_FADV_WILLNEED);
617 29 : }
618 :
619 : /* Checks exhash directory entries */
620 29 : int check_leaf_blks(struct fsck_cx *cx, struct lgfs2_inode *ip, struct metawalk_fxns *pass)
621 : {
622 29 : int error = 0;
623 29 : unsigned hsize = (1 << ip->i_depth);
624 : uint64_t leaf_no, leaf_next;
625 : uint64_t first_ok_leaf, orig_di_blocks;
626 : struct lgfs2_buffer_head *lbh;
627 : int lindex;
628 29 : struct lgfs2_sbd *sdp = ip->i_sbd;
629 : int ref_count, orig_ref_count, orig_di_depth, orig_di_height;
630 : __be64 *tbl;
631 : int chained_leaf, tbl_valid;
632 :
633 29 : tbl = get_dir_hash(ip);
634 29 : if (tbl == NULL) {
635 0 : perror("get_dir_hash");
636 0 : return -1;
637 : }
638 29 : tbl_valid = 1;
639 29 : orig_di_depth = ip->i_depth;
640 29 : orig_di_height = ip->i_height;
641 29 : orig_di_blocks = ip->i_blocks;
642 :
643 : /* Turn off system readahead */
644 29 : (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_RANDOM);
645 :
646 : /* Readahead */
647 29 : dir_leaf_reada(ip, tbl, hsize);
648 :
649 29 : if (pass->check_hash_tbl) {
650 7 : error = pass->check_hash_tbl(cx, ip, tbl, hsize, pass->private);
651 7 : if (error < 0) {
652 0 : free(tbl);
653 0 : (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
654 0 : return error;
655 : }
656 : /* If hash table changes were made, read it in again. */
657 7 : if (error) {
658 0 : free(tbl);
659 0 : tbl = get_dir_hash(ip);
660 0 : if (tbl == NULL) {
661 0 : perror("get_dir_hash");
662 0 : return -1;
663 : }
664 : }
665 : }
666 :
667 : /* Find the first valid leaf pointer in range and use it as our "old"
668 : leaf. That way, bad blocks at the beginning will be overwritten
669 : with the first valid leaf. */
670 29 : first_ok_leaf = leaf_no = -1;
671 29 : for (lindex = 0; lindex < hsize; lindex++) {
672 29 : leaf_no = be64_to_cpu(tbl[lindex]);
673 29 : if (valid_block_ip(ip, leaf_no)) {
674 29 : lbh = lgfs2_bread(sdp, leaf_no);
675 : /* Make sure it's really a valid leaf block. */
676 29 : if (lgfs2_check_meta(lbh->b_data, GFS2_METATYPE_LF) == 0) {
677 29 : lgfs2_brelse(lbh);
678 29 : first_ok_leaf = leaf_no;
679 29 : break;
680 : }
681 0 : lgfs2_brelse(lbh);
682 : }
683 : }
684 29 : if (first_ok_leaf == -1) { /* no valid leaf found */
685 0 : log_err(_("Directory #%"PRIu64" (0x%"PRIx64") has no valid leaf blocks\n"),
686 : ip->i_num.in_addr, ip->i_num.in_addr);
687 0 : free(tbl);
688 0 : (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
689 0 : return 1;
690 : }
691 29 : lindex = 0;
692 29 : leaf_next = -1;
693 66 : while (lindex < hsize) {
694 : int l;
695 :
696 37 : if (fsck_abort)
697 0 : break;
698 :
699 37 : if (!tbl_valid) {
700 0 : free(tbl);
701 0 : log_debug(_("Re-reading 0x%"PRIx64" hash table.\n"), ip->i_num.in_addr);
702 0 : tbl = get_dir_hash(ip);
703 0 : if (tbl == NULL) {
704 0 : perror("get_dir_hash");
705 0 : return -1;
706 : }
707 0 : tbl_valid = 1;
708 0 : orig_di_depth = ip->i_depth;
709 0 : orig_di_height = ip->i_height;
710 0 : orig_di_blocks = ip->i_blocks;
711 : }
712 37 : leaf_no = be64_to_cpu(tbl[lindex]);
713 :
714 : /* count the number of block pointers to this leaf. We don't
715 : need to count the current lindex, because we already know
716 : it's a reference */
717 37 : ref_count = 1;
718 :
719 928 : for (l = lindex + 1; l < hsize; l++) {
720 899 : leaf_next = be64_to_cpu(tbl[l]);
721 899 : if (leaf_next != leaf_no)
722 8 : break;
723 891 : ref_count++;
724 : }
725 37 : orig_ref_count = ref_count;
726 :
727 37 : chained_leaf = 0;
728 0 : do {
729 : struct lgfs2_leaf leaf;
730 37 : if (fsck_abort) {
731 0 : free(tbl);
732 0 : (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
733 0 : return 0;
734 : }
735 37 : error = check_leaf(cx, ip, lindex, pass, &leaf_no, &leaf,
736 : &ref_count);
737 37 : if (ref_count != orig_ref_count) {
738 0 : log_debug(_("Ref count of leaf 0x%"PRIx64
739 : " changed from %d to %d.\n"),
740 : leaf_no, orig_ref_count, ref_count);
741 0 : tbl_valid = 0;
742 : }
743 37 : if (error < 0) {
744 0 : free(tbl);
745 0 : return error;
746 : }
747 37 : if (!leaf.lf_next || error)
748 : break;
749 0 : leaf_no = leaf.lf_next;
750 0 : chained_leaf++;
751 0 : log_debug(_("Leaf chain #%d (0x%"PRIx64") detected.\n"),
752 : chained_leaf, leaf_no);
753 : } while (1); /* while we have chained leaf blocks */
754 37 : if (orig_di_depth != ip->i_depth) {
755 0 : log_debug(_("Depth of 0x%"PRIx64" changed from %d to %d\n"),
756 : ip->i_num.in_addr, orig_di_depth, ip->i_depth);
757 0 : tbl_valid = 0;
758 0 : lindex <<= (ip->i_depth - orig_di_depth);
759 0 : hsize = (1 << ip->i_depth);
760 : }
761 37 : if (orig_di_height != ip->i_height) {
762 0 : log_debug(_("Height of 0x%"PRIx64" changed from %d to %d\n"),
763 : ip->i_num.in_addr, orig_di_height, ip->i_height);
764 0 : tbl_valid = 0;
765 : }
766 37 : if (orig_di_blocks != ip->i_blocks) {
767 0 : log_debug(_("Block count of 0x%"PRIx64" changed from %"PRIu64" to %"PRIu64"\n"),
768 : ip->i_num.in_addr, orig_di_blocks, ip->i_blocks);
769 0 : tbl_valid = 0;
770 : }
771 37 : lindex += ref_count;
772 : } /* for every leaf block */
773 29 : free(tbl);
774 29 : (void)posix_fadvise(sdp->device_fd, 0, 0, POSIX_FADV_NORMAL);
775 29 : return 0;
776 : }
777 :
778 0 : static int check_eattr_entries(struct fsck_cx *cx, struct lgfs2_inode *ip,
779 : struct lgfs2_buffer_head *bh,
780 : struct metawalk_fxns *pass)
781 : {
782 0 : struct gfs2_ea_header *ea_hdr, *ea_hdr_prev = NULL;
783 0 : __be64 *ea_data_ptr = NULL;
784 : int i;
785 0 : int error = 0, err;
786 0 : uint32_t offset = (uint32_t)sizeof(struct gfs2_meta_header);
787 0 : uint32_t offset_limit = ip->i_sbd->sd_bsize - sizeof(struct gfs2_ea_header);
788 :
789 0 : if (!pass->check_eattr_entry)
790 0 : return 0;
791 :
792 0 : ea_hdr = (struct gfs2_ea_header *)(bh->b_data +
793 : sizeof(struct gfs2_meta_header));
794 :
795 : while (1){
796 0 : if (ea_hdr->ea_type == GFS2_EATYPE_UNUSED)
797 0 : error = 0;
798 : else
799 0 : error = pass->check_eattr_entry(cx, ip, bh, ea_hdr,
800 : ea_hdr_prev,
801 : pass->private);
802 0 : if (error < 0) {
803 0 : stack;
804 0 : return -1;
805 : }
806 0 : if (error == 0 && pass->check_eattr_extentry &&
807 0 : ea_hdr->ea_num_ptrs) {
808 0 : uint32_t tot_ealen = 0;
809 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
810 :
811 0 : ea_data_ptr = ((__be64 *)((char *)ea_hdr +
812 0 : sizeof(struct gfs2_ea_header) +
813 0 : ((ea_hdr->ea_name_len + 7) & ~7)));
814 :
815 : /* It is possible when a EA is shrunk
816 : ** to have ea_num_ptrs be greater than
817 : ** the number required for ** data.
818 : ** In this case, the EA ** code leaves
819 : ** the blocks ** there for **
820 : ** reuse........... */
821 :
822 0 : for(i = 0; i < ea_hdr->ea_num_ptrs; i++){
823 0 : err = pass->check_eattr_extentry(cx, ip, i,
824 : ea_data_ptr, bh, tot_ealen,
825 : ea_hdr, ea_hdr_prev,
826 : pass->private);
827 0 : if (err)
828 0 : error = err;
829 0 : tot_ealen += sdp->sd_bsize -
830 : sizeof(struct gfs2_meta_header);
831 0 : ea_data_ptr++;
832 : }
833 : }
834 0 : offset += be32_to_cpu(ea_hdr->ea_rec_len);
835 0 : if (ea_hdr->ea_flags & GFS2_EAFLAG_LAST ||
836 0 : offset > offset_limit || ea_hdr->ea_rec_len == 0) {
837 : break;
838 : }
839 0 : ea_hdr_prev = ea_hdr;
840 0 : ea_hdr = (struct gfs2_ea_header *)
841 : ((char *)(ea_hdr) +
842 0 : be32_to_cpu(ea_hdr->ea_rec_len));
843 : }
844 :
845 0 : return error;
846 : }
847 :
848 : /**
849 : * check_leaf_eattr
850 : * @ip: the inode the eattr comes from
851 : * @block: block number of the leaf
852 : *
853 : * Returns: 0 on success, 1 if removal is needed, -1 on error
854 : */
855 0 : static int check_leaf_eattr(struct fsck_cx *cx, struct lgfs2_inode *ip, uint64_t block,
856 : uint64_t parent, struct metawalk_fxns *pass)
857 : {
858 0 : struct lgfs2_buffer_head *bh = NULL;
859 :
860 0 : if (pass->check_eattr_leaf) {
861 0 : int error = 0;
862 :
863 0 : log_debug(_("Checking EA leaf block #%"PRIu64" (0x%"PRIx64") for "
864 : "inode #%"PRIu64" (0x%"PRIx64").\n"),
865 : block, block, ip->i_num.in_addr, ip->i_num.in_addr);
866 :
867 0 : error = pass->check_eattr_leaf(cx, ip, block, parent, &bh,
868 : pass->private);
869 0 : if (error < 0) {
870 0 : stack;
871 0 : return -1;
872 : }
873 0 : if (error > 0) {
874 0 : if (bh)
875 0 : lgfs2_brelse(bh);
876 0 : return 1;
877 : }
878 0 : if (bh) {
879 0 : error = check_eattr_entries(cx, ip, bh, pass);
880 0 : lgfs2_brelse(bh);
881 : }
882 0 : return error;
883 : }
884 :
885 0 : return 0;
886 : }
887 :
888 : /**
889 : * check_indirect_eattr
890 : * @ip: the inode the eattr comes from
891 : * @indirect_block
892 : *
893 : * Returns: 0 on success -1 on error
894 : */
895 0 : static int check_indirect_eattr(struct fsck_cx *cx, struct lgfs2_inode *ip, uint64_t indirect,
896 : struct lgfs2_buffer_head *indirect_buf,
897 : struct metawalk_fxns *pass)
898 : {
899 0 : int error = 0, err;
900 : __be64 *ea_leaf_ptr, *end;
901 : uint64_t block;
902 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
903 0 : int first_ea_is_bad = 0;
904 0 : uint64_t di_eattr_save = ip->i_eattr;
905 0 : uint64_t offset = sizeof(struct gfs2_meta_header);
906 0 : int leaf_pointers = 0, leaf_pointer_errors = 0;
907 :
908 0 : ea_leaf_ptr = (__be64 *)(indirect_buf->b_data + offset);
909 0 : end = ea_leaf_ptr + ((sdp->sd_bsize - offset) / 8);
910 :
911 0 : while (*ea_leaf_ptr && (ea_leaf_ptr < end)){
912 0 : block = be64_to_cpu(*ea_leaf_ptr);
913 0 : leaf_pointers++;
914 0 : err = check_leaf_eattr(cx, ip, block, indirect, pass);
915 0 : if (err) {
916 0 : error = err;
917 0 : log_err(_("Error detected in leaf block %"PRIu64" (0x%"PRIx64") "
918 : "referenced by indirect block %"PRIu64" (0x%"PRIx64").\n"),
919 : block, block, indirect, indirect);
920 0 : log_err(_("Subsequent leaf block pointers should be "
921 : "cleared.\n"));
922 : }
923 0 : if (error) { /* leaf blocks following an error must also be
924 : treated as error blocks and cleared. */
925 0 : leaf_pointer_errors++;
926 0 : log_err(_("Pointer to EA leaf block %"PRIu64" (0x%"PRIx64") in "
927 : "indirect block %"PRIu64" (0x%"PRIx64") should be cleared.\n"),
928 : block, block, indirect, indirect);
929 : }
930 : /* If the first eattr lead is bad, we can't have a hole, so we
931 : have to treat this as an unrecoverable eattr error and
932 : delete all eattr info. Calling finish_eattr_indir here
933 : causes ip->i_di.di_eattr = 0 and that ensures that
934 : subsequent calls to check_leaf_eattr result in the eattr
935 : check_leaf_block nuking them all "due to previous errors" */
936 0 : if (leaf_pointers == 1 && leaf_pointer_errors == 1) {
937 0 : first_ea_is_bad = 1;
938 0 : if (pass->finish_eattr_indir)
939 0 : pass->finish_eattr_indir(cx, ip, leaf_pointers,
940 : leaf_pointer_errors,
941 : pass->private);
942 0 : } else if (leaf_pointer_errors) {
943 : /* This is a bit tricky. We can't have eattr holes.
944 : So if we have 4 good eattrs, 1 bad eattr and 5 more
945 : good ones: GGGGBGGGGG, we need to tell
946 : check_leaf_eattr to delete all eattrs after the bad
947 : one. So we want: GGGG when we finish. To do that,
948 : we set di_eattr to 0 temporarily. */
949 0 : ip->i_eattr = 0;
950 0 : lgfs2_bmodified(ip->i_bh);
951 : }
952 0 : ea_leaf_ptr++;
953 : }
954 : /* If we temporarily nuked the ea block to prevent checking past
955 : a corrupt ea leaf, we need to restore the saved di_eattr block. */
956 0 : if (di_eattr_save != 0)
957 0 : ip->i_eattr = di_eattr_save;
958 0 : if (pass->finish_eattr_indir) {
959 0 : if (!first_ea_is_bad) {
960 0 : pass->finish_eattr_indir(cx, ip, leaf_pointers,
961 : leaf_pointer_errors,
962 : pass->private);
963 : }
964 0 : if (pass->delete_block && leaf_pointer_errors &&
965 : leaf_pointer_errors == leaf_pointers) {
966 0 : pass->delete_block(cx, ip, indirect, NULL, "leaf", NULL);
967 0 : error = 1;
968 : }
969 : }
970 :
971 0 : return error;
972 : }
973 :
974 : /**
975 : * check_inode_eattr - check the EA's for a single inode
976 : * @ip: the inode whose EA to check
977 : *
978 : * Returns: 0 on success, -1 on error
979 : */
980 924 : int check_inode_eattr(struct fsck_cx *cx, struct lgfs2_inode *ip, struct metawalk_fxns *pass)
981 : {
982 924 : int error = 0;
983 924 : struct lgfs2_buffer_head *indirect_buf = NULL;
984 :
985 924 : if (!ip->i_eattr)
986 924 : return 0;
987 :
988 0 : if (ip->i_flags & GFS2_DIF_EA_INDIRECT){
989 0 : if (!pass->check_eattr_indir)
990 0 : return 0;
991 :
992 0 : log_debug(_("Checking EA indirect block #%"PRIu64" (0x%"PRIx64") for "
993 : "inode #%"PRIu64" (0x%"PRIx64")..\n"),
994 : ip->i_eattr, ip->i_eattr, ip->i_num.in_addr, ip->i_num.in_addr);
995 0 : error = pass->check_eattr_indir(cx, ip, ip->i_eattr, ip->i_num.in_addr,
996 : &indirect_buf, pass->private);
997 0 : if (!error) {
998 0 : error = check_indirect_eattr(cx, ip, ip->i_eattr,
999 : indirect_buf, pass);
1000 0 : if (error)
1001 0 : stack;
1002 : }
1003 0 : if (indirect_buf)
1004 0 : lgfs2_brelse(indirect_buf);
1005 0 : return error;
1006 : }
1007 0 : error = check_leaf_eattr(cx, ip, ip->i_eattr, ip->i_num.in_addr, pass);
1008 0 : if (error)
1009 0 : stack;
1010 :
1011 0 : return error;
1012 : }
1013 :
1014 : /**
1015 : * free_metalist - free all metadata on a multi-level metadata list
1016 : */
1017 929 : static void free_metalist(struct lgfs2_inode *ip, osi_list_t *mlp)
1018 : {
1019 929 : unsigned int height = ip->i_height;
1020 : unsigned int i;
1021 : struct lgfs2_buffer_head *nbh;
1022 :
1023 2587 : for (i = 0; i <= height; i++) {
1024 : osi_list_t *list;
1025 :
1026 1658 : list = &mlp[i];
1027 24605 : while (!osi_list_empty(list)) {
1028 22947 : nbh = osi_list_entry(list->next,
1029 : struct lgfs2_buffer_head, b_altlist);
1030 22947 : if (nbh == ip->i_bh)
1031 929 : osi_list_del_init(&nbh->b_altlist);
1032 : else
1033 22018 : lgfs2_brelse(nbh);
1034 : }
1035 : }
1036 929 : }
1037 :
1038 136 : static void file_ra(struct lgfs2_inode *ip, struct lgfs2_buffer_head *bh,
1039 : int head_size, int maxptrs, int h)
1040 : {
1041 136 : struct lgfs2_sbd *sdp = ip->i_sbd;
1042 136 : uint64_t sblock = 0, block;
1043 136 : int extlen = 0;
1044 : __be64 *p;
1045 :
1046 136 : if (h + 2 == ip->i_height) {
1047 10 : p = (__be64 *)(bh->b_data + head_size);
1048 10 : if (*p && *(p + 1)) {
1049 10 : sblock = be64_to_cpu(*p);
1050 10 : p++;
1051 10 : block = be64_to_cpu(*p);
1052 10 : extlen = block - sblock;
1053 10 : if (extlen > 1 && extlen <= maxptrs) {
1054 0 : (void)posix_fadvise(sdp->device_fd,
1055 0 : sblock * sdp->sd_bsize,
1056 0 : (extlen + 1) * sdp->sd_bsize,
1057 : POSIX_FADV_WILLNEED);
1058 0 : return;
1059 : }
1060 : }
1061 10 : extlen = 0;
1062 : }
1063 136 : for (p = (__be64 *)(bh->b_data + head_size);
1064 32039 : p < (__be64 *)(bh->b_data + sdp->sd_bsize); p++) {
1065 31903 : if (*p) {
1066 7373 : if (!sblock) {
1067 126 : sblock = be64_to_cpu(*p);
1068 126 : extlen = 1;
1069 126 : continue;
1070 : }
1071 7247 : block = be64_to_cpu(*p);
1072 7247 : if (block == sblock + extlen) {
1073 6921 : extlen++;
1074 6921 : continue;
1075 : }
1076 : }
1077 24856 : if (extlen && sblock) {
1078 85 : if (extlen > 1)
1079 73 : extlen--;
1080 85 : (void)posix_fadvise(sdp->device_fd, sblock * sdp->sd_bsize,
1081 85 : extlen * sdp->sd_bsize,
1082 : POSIX_FADV_WILLNEED);
1083 85 : extlen = 0;
1084 85 : p--;
1085 : }
1086 : }
1087 136 : if (extlen)
1088 51 : (void)posix_fadvise(sdp->device_fd, sblock * sdp->sd_bsize,
1089 51 : extlen * sdp->sd_bsize, POSIX_FADV_WILLNEED);
1090 : }
1091 :
1092 22018 : static int do_check_metalist(struct fsck_cx *cx, struct iptr iptr, int height, struct lgfs2_buffer_head **bhp,
1093 : struct metawalk_fxns *pass)
1094 : {
1095 22018 : struct lgfs2_inode *ip = iptr.ipt_ip;
1096 22018 : uint64_t block = iptr_block(iptr);
1097 22018 : int was_duplicate = 0;
1098 22018 : int is_valid = 1;
1099 : int error;
1100 :
1101 22018 : if (pass->check_metalist == NULL)
1102 0 : return 0;
1103 :
1104 22018 : error = pass->check_metalist(cx, iptr, bhp, height, &is_valid,
1105 : &was_duplicate, pass->private);
1106 22018 : if (error == META_ERROR) {
1107 0 : stack;
1108 0 : log_info("\n");
1109 0 : log_info(_("Serious metadata error on block %"PRIu64" (0x%"PRIx64").\n"),
1110 : block, block);
1111 0 : return error;
1112 : }
1113 22018 : if (error == META_SKIP_FURTHER) {
1114 0 : log_info("\n");
1115 0 : log_info(_("Unrecoverable metadata error on block %"PRIu64" (0x%"PRIx64")\n"),
1116 : block, block);
1117 0 : log_info(_("Further metadata will be skipped.\n"));
1118 0 : return error;
1119 : }
1120 22018 : if (!is_valid) {
1121 0 : log_debug("Skipping rejected block %"PRIu64" (0x%"PRIx64")\n", block, block);
1122 0 : if (pass->invalid_meta_is_fatal)
1123 0 : return META_ERROR;
1124 0 : return META_SKIP_ONE;
1125 : }
1126 22018 : if (was_duplicate) {
1127 0 : log_debug("Skipping duplicate %"PRIu64" (0x%"PRIx64")\n", block, block);
1128 0 : return META_SKIP_ONE;
1129 : }
1130 22018 : if (!valid_block_ip(ip, block)) {
1131 0 : log_debug("Skipping invalid block %"PRIu64" (0x%"PRIx64")\n", block, block);
1132 0 : if (pass->invalid_meta_is_fatal)
1133 0 : return META_ERROR;
1134 0 : return META_SKIP_ONE;
1135 : }
1136 22018 : return error;
1137 : }
1138 :
1139 : /**
1140 : * build_and_check_metalist - check a bunch of indirect blocks
1141 : * This includes hash table blocks for directories
1142 : * which are technically "data" in the bitmap.
1143 : *
1144 : * Returns: 0 - all is well, process the blocks this metadata references
1145 : * 1 - something went wrong, but process the sub-blocks anyway
1146 : * -1 - something went wrong, so don't process the sub-blocks
1147 : * @ip:
1148 : * @mlp:
1149 : */
1150 929 : static int build_and_check_metalist(struct fsck_cx *cx, struct lgfs2_inode *ip, osi_list_t *mlp,
1151 : struct metawalk_fxns *pass)
1152 : {
1153 929 : uint32_t height = ip->i_height;
1154 929 : struct lgfs2_buffer_head *metabh = ip->i_bh;
1155 : osi_list_t *prev_list, *cur_list, *tmp;
1156 929 : struct iptr iptr = { .ipt_ip = ip, NULL, 0};
1157 : int h, head_size, iblk_type;
1158 : __be64 *undoptr;
1159 : int maxptrs;
1160 : int error;
1161 :
1162 929 : osi_list_add(&metabh->b_altlist, &mlp[0]);
1163 :
1164 : /* Directories are special. Their 'data' is the hash table, which is
1165 : basically an indirect block list. Their height is not important
1166 : because it checks everything through the hash table using
1167 : "depth" field calculations. However, we still have to check the
1168 : indirect blocks, even if the height == 1. */
1169 929 : if (is_dir(ip))
1170 455 : height++;
1171 :
1172 : /* if (<there are no indirect blocks to check>) */
1173 929 : if (height < 2)
1174 704 : return META_IS_GOOD;
1175 479 : for (h = 1; h < height; h++) {
1176 255 : if (h > 1) {
1177 30 : if (is_dir(ip) &&
1178 0 : h == ip->i_height + 1)
1179 0 : iblk_type = GFS2_METATYPE_JD;
1180 : else
1181 30 : iblk_type = GFS2_METATYPE_IN;
1182 30 : head_size = sizeof(struct gfs2_meta_header);
1183 30 : maxptrs = ip->i_sbd->sd_inptrs;
1184 : } else {
1185 225 : iblk_type = GFS2_METATYPE_DI;
1186 225 : head_size = sizeof(struct gfs2_dinode);
1187 225 : maxptrs = ip->i_sbd->sd_diptrs;
1188 : }
1189 255 : prev_list = &mlp[h - 1];
1190 255 : cur_list = &mlp[h];
1191 :
1192 662 : for (tmp = prev_list->next; tmp != prev_list; tmp = tmp->next) {
1193 408 : iptr.ipt_off = head_size;
1194 408 : iptr.ipt_bh = osi_list_entry(tmp, struct lgfs2_buffer_head, b_altlist);
1195 :
1196 408 : if (lgfs2_check_meta(iptr_buf(iptr), iblk_type)) {
1197 1 : if (pass->invalid_meta_is_fatal)
1198 1 : return META_ERROR;
1199 :
1200 0 : continue;
1201 : }
1202 407 : if (pass->readahead)
1203 136 : file_ra(ip, iptr.ipt_bh, head_size, maxptrs, h);
1204 :
1205 : /* Now check the metadata itself */
1206 95378 : for (; iptr.ipt_off < ip->i_sbd->sd_bsize; iptr.ipt_off += sizeof(uint64_t)) {
1207 94971 : struct lgfs2_buffer_head *nbh = NULL;
1208 :
1209 94971 : if (skip_this_pass || fsck_abort)
1210 0 : return META_IS_GOOD;
1211 94971 : if (!iptr_block(iptr))
1212 72953 : continue;
1213 :
1214 22018 : error = do_check_metalist(cx, iptr, h, &nbh, pass);
1215 22018 : if (error == META_ERROR || error == META_SKIP_FURTHER)
1216 0 : goto error_undo;
1217 22018 : if (error == META_SKIP_ONE)
1218 0 : continue;
1219 22018 : if (!nbh)
1220 7361 : nbh = lgfs2_bread(ip->i_sbd, iptr_block(iptr));
1221 22018 : osi_list_add_prev(&nbh->b_altlist, cur_list);
1222 : } /* for all data on the indirect block */
1223 : } /* for blocks at that height */
1224 : } /* for height */
1225 224 : return 0;
1226 :
1227 0 : error_undo: /* undo what we've done so far for this block */
1228 0 : if (pass->undo_check_meta == NULL)
1229 0 : return error;
1230 :
1231 0 : log_info(_("Undoing the work we did before the error on block %"PRIu64" (0x%"PRIx64").\n"),
1232 : iptr.ipt_bh->b_blocknr, iptr.ipt_bh->b_blocknr);
1233 0 : for (undoptr = (__be64 *)(iptr_buf(iptr) + head_size);
1234 0 : undoptr < iptr_ptr(iptr) && undoptr < iptr_endptr(iptr);
1235 0 : undoptr++) {
1236 0 : uint64_t block = be64_to_cpu(*undoptr);
1237 :
1238 0 : if (block == 0)
1239 0 : continue;
1240 :
1241 0 : pass->undo_check_meta(cx, ip, block, h, pass->private);
1242 : }
1243 0 : return error;
1244 : }
1245 :
1246 22084 : static unsigned int hdr_size(struct lgfs2_buffer_head *bh, unsigned int height)
1247 : {
1248 22084 : if (height > 1)
1249 21835 : return sizeof(struct gfs2_meta_header);
1250 249 : return sizeof(struct gfs2_dinode);
1251 : }
1252 :
1253 : struct error_block {
1254 : uint64_t metablk; /* metadata block where error was found */
1255 : int metaoff; /* offset in that metadata block where error found */
1256 : uint64_t errblk; /* error block */
1257 : };
1258 :
1259 0 : static void report_data_error(uint64_t metablock, int offset, uint64_t block,
1260 : struct error_block *error_blk,
1261 : int rc, int error)
1262 : {
1263 0 : log_info("\n");
1264 0 : if (rc < 0) {
1265 : /* A fatal error trumps a non-fatal one. */
1266 0 : if ((error_blk->errblk == 0) ||
1267 : (rc < error)) {
1268 0 : log_debug(_("Fatal error on metadata "
1269 : "block 0x%"PRIx64", "
1270 : "offset 0x%x, referencing data "
1271 : "block 0x%"PRIx64" "
1272 : "preempts non-fatal error on "
1273 : "block 0x%"PRIx64"\n"),
1274 : metablock,
1275 : offset,
1276 : block,
1277 : error_blk->errblk);
1278 0 : error_blk->metablk = metablock;
1279 0 : error_blk->metaoff = offset;
1280 0 : error_blk->errblk = block;
1281 : }
1282 0 : log_info(_("Unrecoverable "));
1283 : } else { /* nonfatal error */
1284 0 : if (error_blk->errblk == 0) {
1285 0 : error_blk->metablk = metablock;
1286 0 : error_blk->metaoff = offset;
1287 0 : error_blk->errblk = block;
1288 : }
1289 : }
1290 0 : log_info(_("data block error %d on metadata "
1291 : "block %"PRId64" (0x%"PRIx64"), "
1292 : "offset %d (0x%x), referencing "
1293 : "data block %"PRId64" (0x%"PRIx64").\n"),
1294 : rc,
1295 : metablock, metablock,
1296 : offset, offset,
1297 : block, block);
1298 0 : }
1299 :
1300 : /**
1301 : * check_data - check all data pointers for a given buffer
1302 : * This does not include "data" blocks that are really
1303 : * hash table blocks for directories.
1304 : *
1305 : * @ip:
1306 : *
1307 : * returns: +ENOENT if there are too many bad pointers
1308 : * -1 if a more serious error occurred.
1309 : * 0 if no errors occurred
1310 : * 1 if errors were found and corrected
1311 : * 2 (ENOENT) is there were too many bad pointers
1312 : */
1313 22084 : static int metawalk_check_data(struct fsck_cx *cx, struct lgfs2_inode *ip, struct metawalk_fxns *pass,
1314 : struct lgfs2_buffer_head *bh, unsigned int height,
1315 : uint64_t *blks_checked, struct error_block *error_blk)
1316 : {
1317 22084 : int error = 0, rc = 0;
1318 : uint64_t block;
1319 22084 : __be64 *ptr_start = (__be64 *)(bh->b_data + hdr_size(bh, height));
1320 22084 : __be64 *ptr_end = (__be64 *)(bh->b_data + ip->i_sbd->sd_bsize);
1321 : __be64 *ptr;
1322 22084 : uint64_t metablock = bh->b_blocknr;
1323 :
1324 : /* If there isn't much pointer corruption check the pointers */
1325 22084 : log_debug("Processing data blocks for inode 0x%"PRIx64", metadata block 0x%"PRIx64".\n",
1326 : ip->i_num.in_addr, metablock);
1327 6044718 : for (ptr = ptr_start ; ptr < ptr_end && !fsck_abort; ptr++) {
1328 6022634 : if (!*ptr)
1329 134546 : continue;
1330 :
1331 5888088 : if (skip_this_pass || fsck_abort)
1332 0 : return error;
1333 5888088 : block = be64_to_cpu(*ptr);
1334 : /* It's important that we don't call valid_block() and
1335 : bypass calling check_data on invalid blocks because that
1336 : would defeat the rangecheck_block related functions in
1337 : pass1. Therefore the individual check_data functions
1338 : should do a range check. */
1339 5888088 : rc = pass->check_data(cx, ip, metablock, block, pass->private,
1340 : bh, ptr);
1341 5888088 : if (rc && (!error || (rc < error))) {
1342 0 : report_data_error(metablock, (char *)ptr - bh->b_data, block, error_blk, rc, error);
1343 0 : error = rc;
1344 : }
1345 5888088 : if (rc < 0)
1346 0 : return rc;
1347 5888088 : (*blks_checked)++;
1348 : }
1349 22084 : return error;
1350 : }
1351 :
1352 0 : static int report_undo_data_error(uint64_t metablock, int offset, uint64_t block,
1353 : struct error_block *error_blk,
1354 : int *found_error_blk, int error)
1355 : {
1356 0 : if (metablock == error_blk->metablk &&
1357 0 : offset == error_blk->metaoff &&
1358 0 : block == error_blk->errblk) {
1359 0 : if (error < 0) { /* A fatal error that stopped it? */
1360 0 : log_debug(_("Stopping the undo process: "
1361 : "fatal error block 0x%"PRIx64" was "
1362 : "found at metadata block 0x%"PRIx64","
1363 : "offset 0x%x.\n"),
1364 : error_blk->errblk,
1365 : error_blk->metablk,
1366 : error_blk->metaoff);
1367 0 : return 1;
1368 : }
1369 0 : *found_error_blk = 1;
1370 0 : log_debug(_("The non-fatal error block 0x%"PRIx64" was "
1371 : "found at metadata block 0x%"PRIx64", offset "
1372 : "0x%d, but undo processing will continue "
1373 : "until the end of this metadata block.\n"),
1374 : error_blk->errblk,
1375 : error_blk->metablk,
1376 : error_blk->metaoff);
1377 : }
1378 0 : return 0;
1379 : }
1380 :
1381 0 : static int undo_check_data(struct fsck_cx *cx, struct lgfs2_inode *ip, struct metawalk_fxns *pass,
1382 : struct lgfs2_buffer_head *bh, unsigned int height,
1383 : struct error_block *error_blk, int error)
1384 : {
1385 0 : __be64 *ptr_start = (__be64 *)(bh->b_data + hdr_size(bh, height));
1386 0 : __be64 *ptr_end = (__be64 *)(bh->b_data + ip->i_sbd->sd_bsize);
1387 : __be64 *ptr;
1388 0 : uint64_t metablock = bh->b_blocknr;
1389 0 : int rc = 0;
1390 : uint64_t block;
1391 0 : int found_error_blk = 0;
1392 :
1393 : /* If there isn't much pointer corruption check the pointers */
1394 0 : for (ptr = ptr_start ; ptr < ptr_end && !fsck_abort; ptr++) {
1395 0 : if (!*ptr)
1396 0 : continue;
1397 :
1398 0 : if (skip_this_pass || fsck_abort)
1399 0 : return 1;
1400 0 : block = be64_to_cpu(*ptr);
1401 0 : if (report_undo_data_error(metablock, (char *)ptr - bh->b_data,
1402 : block, error_blk, &found_error_blk, error))
1403 0 : return 1;
1404 0 : rc = pass->undo_check_data(cx, ip, block, pass->private);
1405 0 : if (rc < 0)
1406 0 : return rc;
1407 : }
1408 0 : return found_error_blk;
1409 : }
1410 :
1411 22084 : static unsigned int should_check(struct lgfs2_buffer_head *bh, unsigned int height)
1412 : {
1413 22084 : int iblk_type = height > 1 ? GFS2_METATYPE_IN : GFS2_METATYPE_DI;
1414 :
1415 22084 : return lgfs2_check_meta(bh->b_data, iblk_type) == 0;
1416 : }
1417 :
1418 : /**
1419 : * check_metatree
1420 : * @ip: inode structure in memory
1421 : * @pass: structure passed in from caller to determine the sub-functions
1422 : *
1423 : */
1424 1533 : int check_metatree(struct fsck_cx *cx, struct lgfs2_inode *ip, struct metawalk_fxns *pass)
1425 : {
1426 1533 : unsigned int height = ip->i_height;
1427 1533 : osi_list_t *metalist = alloca((height + 1) * sizeof(*metalist));
1428 : osi_list_t *list, *tmp;
1429 : struct lgfs2_buffer_head *bh;
1430 : unsigned int i;
1431 1533 : uint64_t blks_checked = 0;
1432 : int error, rc;
1433 1533 : int metadata_clean = 0;
1434 1533 : struct error_block error_blk = {0, 0, 0};
1435 1533 : int hit_error_blk = 0;
1436 :
1437 1533 : if (!height && !is_dir(ip))
1438 604 : return 0;
1439 :
1440 : /* metalist has one extra element for directories (see build_and_check_metalist). */
1441 2587 : for (i = 0; i <= height; i++)
1442 1658 : osi_list_init(&metalist[i]);
1443 :
1444 : /* create and check the metadata list for each height */
1445 929 : error = build_and_check_metalist(cx, ip, metalist, pass);
1446 929 : if (error) {
1447 1 : stack;
1448 1 : goto undo_metalist;
1449 : }
1450 :
1451 928 : metadata_clean = 1;
1452 : /* For directories, we've already checked the "data" blocks which
1453 : * comprise the directory hash table, so we perform the directory
1454 : * checks and exit. */
1455 928 : if (is_dir(ip)) {
1456 455 : if (!(ip->i_flags & GFS2_DIF_EXHASH))
1457 434 : goto out;
1458 : /* check validity of leaf blocks and leaf chains */
1459 21 : error = check_leaf_blks(cx, ip, pass);
1460 21 : if (error)
1461 0 : goto undo_metalist;
1462 21 : goto out;
1463 : }
1464 :
1465 : /* check data blocks */
1466 473 : list = &metalist[height - 1];
1467 473 : if (ip->i_blocks > COMFORTABLE_BLKS)
1468 0 : last_reported_fblock = -10000000;
1469 :
1470 22557 : for (tmp = list->next; !error && tmp != list; tmp = tmp->next) {
1471 22084 : if (fsck_abort) {
1472 0 : free_metalist(ip, metalist);
1473 0 : return 0;
1474 : }
1475 22084 : bh = osi_list_entry(tmp, struct lgfs2_buffer_head, b_altlist);
1476 22084 : if (!should_check(bh, height))
1477 0 : continue;
1478 :
1479 22084 : if (pass->check_data)
1480 22084 : error = metawalk_check_data(cx, ip, pass, bh, height,
1481 : &blks_checked, &error_blk);
1482 22084 : if (pass->big_file_msg && ip->i_blocks > COMFORTABLE_BLKS)
1483 0 : pass->big_file_msg(cx, ip, blks_checked);
1484 : }
1485 473 : if (pass->big_file_msg && ip->i_blocks > COMFORTABLE_BLKS) {
1486 0 : log_notice( _("\rLarge file at %"PRIu64" (0x%"PRIx64") - 100 percent "
1487 : "complete. "
1488 : "\n"),
1489 : ip->i_num.in_addr, ip->i_num.in_addr);
1490 0 : fflush(stdout);
1491 : }
1492 473 : undo_metalist:
1493 474 : if (!error)
1494 473 : goto out;
1495 1 : log_err(_("Error: inode %"PRIu64" (0x%"PRIx64") had unrecoverable errors at "
1496 : "metadata block %"PRIu64" (0x%"PRIx64"), offset %d (0x%x), block "
1497 : "%"PRIu64" (0x%"PRIx64").\n"),
1498 : ip->i_num.in_addr, ip->i_num.in_addr, error_blk.metablk, error_blk.metablk,
1499 : error_blk.metaoff, error_blk.metaoff, error_blk.errblk, error_blk.errblk);
1500 1 : if (!query(cx, _("Remove the invalid inode? (y/n) "))) {
1501 0 : free_metalist(ip, metalist);
1502 0 : log_err(_("Invalid inode not deleted.\n"));
1503 0 : return error;
1504 : }
1505 1 : for (i = 0; pass->undo_check_meta && i < height; i++) {
1506 0 : while (!osi_list_empty(&metalist[i])) {
1507 0 : list = &metalist[i];
1508 0 : bh = osi_list_entry(list->next,
1509 : struct lgfs2_buffer_head,
1510 : b_altlist);
1511 0 : log_err(_("Undoing metadata work for block %"PRIu64" (0x%"PRIx64")\n"),
1512 : bh->b_blocknr, bh->b_blocknr);
1513 0 : if (i)
1514 0 : rc = pass->undo_check_meta(cx, ip, bh->b_blocknr,
1515 : i, pass->private);
1516 : else
1517 0 : rc = 0;
1518 0 : if (metadata_clean && rc == 0 && i == height - 1 &&
1519 : !hit_error_blk) {
1520 0 : if (should_check(bh, height)) {
1521 0 : rc = undo_check_data(cx, ip, pass,
1522 : bh,
1523 : height,
1524 : &error_blk,
1525 : error);
1526 0 : if (rc > 0) {
1527 0 : hit_error_blk = 1;
1528 0 : log_err("Reached the error "
1529 : "block undoing work "
1530 : "for inode %"PRIu64" "
1531 : "(0x%"PRIx64").\n",
1532 : ip->i_num.in_addr, ip->i_num.in_addr);
1533 0 : rc = 0;
1534 : }
1535 : }
1536 : }
1537 0 : if (bh == ip->i_bh)
1538 0 : osi_list_del(&bh->b_altlist);
1539 : else
1540 0 : lgfs2_brelse(bh);
1541 : }
1542 : }
1543 : /* There may be leftover duplicate records, so we need to delete them.
1544 : For example, if a metadata block was found to be a duplicate, we
1545 : may not have added it to the metalist, which means it's not there
1546 : to undo. */
1547 1 : delete_all_dups(cx, ip);
1548 : /* Set the dinode as "bad" so it gets deleted */
1549 1 : fsck_bitmap_set(cx, ip, ip->i_num.in_addr, "corrupt", GFS2_BLKST_FREE);
1550 1 : log_err(_("The corrupt inode was invalidated.\n"));
1551 0 : out:
1552 929 : free_metalist(ip, metalist);
1553 929 : return error;
1554 : }
1555 :
1556 : /* Checks stuffed inode directories */
1557 493 : int check_linear_dir(struct fsck_cx *cx, struct lgfs2_inode *ip, struct lgfs2_buffer_head *bh,
1558 : struct metawalk_fxns *pass)
1559 : {
1560 493 : int error = 0;
1561 493 : uint32_t count = 0;
1562 :
1563 493 : error = check_entries(cx, ip, bh, DIR_LINEAR, &count, 0, pass);
1564 493 : if (error < 0) {
1565 0 : stack;
1566 0 : return -1;
1567 : }
1568 :
1569 493 : return error;
1570 : }
1571 :
1572 224 : int check_dir(struct fsck_cx *cx, struct lgfs2_inode *ip, struct metawalk_fxns *pass)
1573 : {
1574 224 : int error = 0;
1575 :
1576 224 : if (ip->i_flags & GFS2_DIF_EXHASH)
1577 7 : error = check_leaf_blks(cx, ip, pass);
1578 : else
1579 217 : error = check_linear_dir(cx, ip, ip->i_bh, pass);
1580 :
1581 224 : if (error < 0)
1582 0 : stack;
1583 :
1584 224 : return error;
1585 : }
|