Line data Source code
1 : #include "clusterautoconfig.h"
2 :
3 : #include <errno.h>
4 : #include <inttypes.h>
5 : #include <stdlib.h>
6 : #include <string.h>
7 : #include <unistd.h>
8 : #include <libintl.h>
9 : #define _(String) gettext(String)
10 :
11 : #include <logging.h>
12 : #include "fsck.h"
13 : #include "fs_recovery.h"
14 : #include "libgfs2.h"
15 : #include "metawalk.h"
16 : #include "util.h"
17 :
18 : #define JOURNAL_NAME_SIZE 18
19 : #define JOURNAL_SEQ_TOLERANCE 10
20 :
21 : static unsigned int sd_found_jblocks = 0;
22 : static unsigned int sd_replayed_jblocks = 0;
23 : static unsigned int sd_found_metablocks = 0;
24 : static unsigned int sd_replayed_metablocks = 0;
25 : static unsigned int sd_found_revokes = 0;
26 : static osi_list_t sd_revoke_list;
27 : static unsigned int sd_replay_tail;
28 :
29 : struct revoke_replay {
30 : osi_list_t rr_list;
31 : uint64_t rr_blkno;
32 : unsigned int rr_where;
33 : };
34 :
35 0 : static int revoke_add(struct lgfs2_sbd *sdp, uint64_t blkno, unsigned int where)
36 : {
37 0 : osi_list_t *tmp, *head = &sd_revoke_list;
38 : struct revoke_replay *rr;
39 0 : int found = 0;
40 :
41 0 : osi_list_foreach(tmp, head) {
42 0 : rr = osi_list_entry(tmp, struct revoke_replay, rr_list);
43 0 : if (rr->rr_blkno == blkno) {
44 0 : found = 1;
45 0 : break;
46 : }
47 : }
48 :
49 0 : if (found) {
50 0 : rr->rr_where = where;
51 0 : return 0;
52 : }
53 :
54 0 : rr = malloc(sizeof(struct revoke_replay));
55 0 : if (!rr)
56 0 : return -ENOMEM;
57 :
58 0 : rr->rr_blkno = blkno;
59 0 : rr->rr_where = where;
60 0 : osi_list_add(&rr->rr_list, head);
61 0 : return 1;
62 : }
63 :
64 0 : static int revoke_check(struct lgfs2_sbd *sdp, uint64_t blkno, unsigned int where)
65 : {
66 : osi_list_t *tmp;
67 : struct revoke_replay *rr;
68 : int wrap, a, b;
69 0 : int found = 0;
70 :
71 0 : osi_list_foreach(tmp, &sd_revoke_list) {
72 0 : rr = osi_list_entry(tmp, struct revoke_replay, rr_list);
73 0 : if (rr->rr_blkno == blkno) {
74 0 : found = 1;
75 0 : break;
76 : }
77 : }
78 :
79 0 : if (!found)
80 0 : return 0;
81 :
82 0 : wrap = (rr->rr_where < sd_replay_tail);
83 0 : a = (sd_replay_tail < where);
84 0 : b = (where < rr->rr_where);
85 0 : return (wrap) ? (a || b) : (a && b);
86 : }
87 :
88 0 : static void revoke_clean(struct lgfs2_sbd *sdp)
89 : {
90 0 : osi_list_t *head = &sd_revoke_list;
91 : struct revoke_replay *rr;
92 :
93 0 : while (!osi_list_empty(head)) {
94 0 : rr = osi_list_entry(head->next, struct revoke_replay, rr_list);
95 0 : osi_list_del(&rr->rr_list);
96 0 : free(rr);
97 : }
98 0 : }
99 :
100 0 : static void refresh_rgrp(struct lgfs2_sbd *sdp, struct lgfs2_rgrp_tree *rgd,
101 : struct lgfs2_buffer_head *bh, uint64_t blkno)
102 : {
103 : int i;
104 :
105 0 : log_debug(_("Block is part of rgrp 0x%"PRIx64"; refreshing the rgrp.\n"),
106 : rgd->rt_addr);
107 0 : for (i = 0; i < rgd->rt_length; i++) {
108 0 : if (rgd->rt_addr + i != blkno)
109 0 : continue;
110 :
111 0 : memcpy(rgd->rt_bits[i].bi_data, bh->b_data, sdp->sd_bsize);
112 0 : rgd->rt_bits[i].bi_modified = 1;
113 0 : if (i == 0) /* this is the rgrp itself */
114 0 : lgfs2_rgrp_in(rgd, rgd->rt_bits[0].bi_data);
115 0 : break;
116 : }
117 0 : }
118 :
119 0 : static int buf_lo_scan_elements(struct lgfs2_inode *ip, unsigned int start,
120 : struct gfs2_log_descriptor *ld, __be64 *ptr,
121 : int pass)
122 : {
123 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
124 0 : unsigned int blks = be32_to_cpu(ld->ld_data1);
125 : uint64_t blkno;
126 0 : int error = 0;
127 : struct lgfs2_rgrp_tree *rgd;
128 :
129 0 : if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
130 0 : return 0;
131 :
132 0 : lgfs2_replay_incr_blk(ip, &start);
133 :
134 0 : for (; blks; lgfs2_replay_incr_blk(ip, &start), blks--) {
135 : struct lgfs2_buffer_head *bh_log, *bh_ip;
136 : struct gfs2_meta_header *mhp;
137 :
138 0 : sd_found_metablocks++;
139 :
140 0 : blkno = be64_to_cpu(*ptr);
141 0 : ptr++;
142 0 : if (revoke_check(sdp, blkno, start))
143 0 : continue;
144 :
145 0 : error = lgfs2_replay_read_block(ip, start, &bh_log);
146 0 : if (error)
147 0 : return error;
148 :
149 0 : log_info(_("Journal replay writing metadata block #%"PRIu64" (0x%"PRIx64") for journal+0x%x\n"),
150 : blkno, blkno, start);
151 0 : bh_ip = lgfs2_bget(sdp, blkno);
152 0 : if (!bh_ip) {
153 0 : log_err(_("Out of memory when replaying journals.\n"));
154 0 : lgfs2_bfree(&bh_log);
155 0 : return FSCK_ERROR;
156 : }
157 0 : memcpy(bh_ip->b_data, bh_log->b_data, sdp->sd_bsize);
158 :
159 0 : mhp = (struct gfs2_meta_header *)bh_ip->b_data;
160 0 : if (be32_to_cpu(mhp->mh_magic) != GFS2_MAGIC) {
161 0 : log_err(_("Journal corruption detected at block #%"PRIu64" (0x%"PRIx64") for journal+0x%x.\n"),
162 : blkno, blkno, start);
163 0 : error = -EIO;
164 : } else {
165 0 : lgfs2_bmodified(bh_ip);
166 0 : rgd = lgfs2_blk2rgrpd(sdp, blkno);
167 0 : if (rgd && blkno < rgd->rt_data0)
168 0 : refresh_rgrp(sdp, rgd, bh_ip, blkno);
169 : }
170 :
171 0 : lgfs2_brelse(bh_log);
172 0 : lgfs2_brelse(bh_ip);
173 0 : if (error)
174 0 : break;
175 :
176 0 : sd_replayed_metablocks++;
177 : }
178 0 : return error;
179 : }
180 :
181 0 : static int revoke_lo_scan_elements(struct lgfs2_inode *ip, unsigned int start,
182 : struct gfs2_log_descriptor *ld, __be64 *ptr,
183 : int pass)
184 : {
185 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
186 0 : unsigned int blks = be32_to_cpu(ld->ld_length);
187 0 : unsigned int revokes = be32_to_cpu(ld->ld_data1);
188 : unsigned int offset;
189 : uint64_t blkno;
190 0 : int first = 1;
191 : int error;
192 :
193 0 : if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
194 0 : return 0;
195 :
196 0 : offset = sizeof(struct gfs2_log_descriptor);
197 :
198 0 : for (; blks; lgfs2_replay_incr_blk(ip, &start), blks--) {
199 : struct lgfs2_buffer_head *bh;
200 :
201 0 : error = lgfs2_replay_read_block(ip, start, &bh);
202 0 : if (error)
203 0 : return error;
204 :
205 0 : if (!first) {
206 0 : if (lgfs2_check_meta(bh->b_data, GFS2_METATYPE_LB))
207 0 : continue;
208 : }
209 0 : while (offset + sizeof(uint64_t) <= sdp->sd_bsize) {
210 0 : blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
211 0 : log_info(_("Journal replay processing revoke for block #%"PRIu64" (0x%"PRIx64") for journal+0x%x\n"),
212 : blkno, blkno, start);
213 0 : error = revoke_add(sdp, blkno, start);
214 0 : if (error < 0) {
215 0 : lgfs2_bfree(&bh);
216 0 : return error;
217 : }
218 0 : else if (error)
219 0 : sd_found_revokes++;
220 :
221 0 : if (!--revokes)
222 0 : break;
223 0 : offset += sizeof(uint64_t);
224 : }
225 :
226 0 : lgfs2_bmodified(bh);
227 0 : lgfs2_brelse(bh);
228 0 : offset = sizeof(struct gfs2_meta_header);
229 0 : first = 0;
230 : }
231 0 : return 0;
232 : }
233 :
234 0 : static int databuf_lo_scan_elements(struct lgfs2_inode *ip, unsigned int start,
235 : struct gfs2_log_descriptor *ld,
236 : __be64 *ptr, int pass)
237 : {
238 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
239 0 : unsigned int blks = be32_to_cpu(ld->ld_data1);
240 : uint64_t blkno;
241 : uint64_t esc;
242 0 : int error = 0;
243 :
244 0 : if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
245 0 : return 0;
246 :
247 0 : lgfs2_replay_incr_blk(ip, &start);
248 0 : for (; blks; lgfs2_replay_incr_blk(ip, &start), blks--) {
249 : struct lgfs2_buffer_head *bh_log, *bh_ip;
250 :
251 0 : blkno = be64_to_cpu(*ptr);
252 0 : ptr++;
253 0 : esc = be64_to_cpu(*ptr);
254 0 : ptr++;
255 :
256 0 : sd_found_jblocks++;
257 :
258 0 : if (revoke_check(sdp, blkno, start))
259 0 : continue;
260 :
261 0 : error = lgfs2_replay_read_block(ip, start, &bh_log);
262 0 : if (error)
263 0 : return error;
264 :
265 0 : log_info(_("Journal replay writing data block #%"PRIu64" (0x%"PRIx64") for journal+0x%x\n"),
266 : blkno, blkno, start);
267 0 : bh_ip = lgfs2_bget(sdp, blkno);
268 0 : if (!bh_ip) {
269 0 : log_err(_("Out of memory when replaying journals.\n"));
270 0 : lgfs2_bfree(&bh_log);
271 0 : return FSCK_ERROR;
272 : }
273 0 : memcpy(bh_ip->b_data, bh_log->b_data, sdp->sd_bsize);
274 :
275 : /* Unescape */
276 0 : if (esc) {
277 0 : __be32 *eptr = (__be32 *)bh_ip->b_data;
278 0 : *eptr = cpu_to_be32(GFS2_MAGIC);
279 : }
280 :
281 0 : lgfs2_brelse(bh_log);
282 0 : lgfs2_bmodified(bh_ip);
283 0 : lgfs2_brelse(bh_ip);
284 :
285 0 : sd_replayed_jblocks++;
286 : }
287 0 : return error;
288 : }
289 :
290 : /**
291 : * foreach_descriptor - go through the active part of the log
292 : * @ip: the journal incore inode
293 : * @start: the first log header in the active region
294 : * @end: the last log header (don't process the contents of this entry))
295 : *
296 : * Call a given function once for every log descriptor in the active
297 : * portion of the log.
298 : *
299 : * Returns: errno
300 : */
301 :
302 0 : static int foreach_descriptor(struct lgfs2_inode *ip, unsigned int start,
303 : unsigned int end, int pass)
304 : {
305 : struct lgfs2_buffer_head *bh;
306 : struct gfs2_log_descriptor *ld;
307 0 : int error = 0;
308 : uint32_t length;
309 : __be64 *ptr;
310 0 : unsigned int offset = sizeof(struct gfs2_log_descriptor);
311 0 : offset += sizeof(__be64) - 1;
312 0 : offset &= ~(sizeof(__be64) - 1);
313 :
314 0 : while (start != end) {
315 : struct gfs2_meta_header *mhp;
316 :
317 0 : error = lgfs2_replay_read_block(ip, start, &bh);
318 0 : if (error)
319 0 : return error;
320 0 : mhp = (struct gfs2_meta_header *)bh->b_data;
321 0 : if (be32_to_cpu(mhp->mh_magic) != GFS2_MAGIC) {
322 0 : lgfs2_bmodified(bh);
323 0 : lgfs2_brelse(bh);
324 0 : return -EIO;
325 : }
326 0 : ld = (struct gfs2_log_descriptor *)bh->b_data;
327 0 : length = be32_to_cpu(ld->ld_length);
328 :
329 0 : if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
330 : struct lgfs2_log_header lh;
331 :
332 0 : error = lgfs2_get_log_header(ip, start, &lh);
333 0 : if (!error) {
334 0 : lgfs2_replay_incr_blk(ip, &start);
335 0 : lgfs2_bmodified(bh);
336 0 : lgfs2_brelse(bh);
337 0 : continue;
338 : }
339 0 : if (error == 1) {
340 0 : log_err(_("Journal corruption detected at "
341 : "journal+0x%x.\n"), start);
342 0 : error = -EIO;
343 : }
344 0 : lgfs2_bmodified(bh);
345 0 : lgfs2_brelse(bh);
346 0 : return error;
347 0 : } else if (lgfs2_check_meta(bh->b_data, GFS2_METATYPE_LD)) {
348 0 : lgfs2_bmodified(bh);
349 0 : lgfs2_brelse(bh);
350 0 : return -EIO;
351 : }
352 0 : ptr = (__be64 *)(bh->b_data + offset);
353 0 : error = databuf_lo_scan_elements(ip, start, ld, ptr, pass);
354 0 : if (error) {
355 0 : lgfs2_bmodified(bh);
356 0 : lgfs2_brelse(bh);
357 0 : return error;
358 : }
359 0 : error = buf_lo_scan_elements(ip, start, ld, ptr, pass);
360 0 : if (error) {
361 0 : lgfs2_bmodified(bh);
362 0 : lgfs2_brelse(bh);
363 0 : return error;
364 : }
365 0 : error = revoke_lo_scan_elements(ip, start, ld, ptr, pass);
366 0 : if (error) {
367 0 : lgfs2_bmodified(bh);
368 0 : lgfs2_brelse(bh);
369 0 : return error;
370 : }
371 :
372 0 : while (length--)
373 0 : lgfs2_replay_incr_blk(ip, &start);
374 :
375 0 : lgfs2_bmodified(bh);
376 0 : lgfs2_brelse(bh);
377 : }
378 :
379 0 : return 0;
380 : }
381 :
382 : /**
383 : * check_journal_seq_no - Check and Fix log header sequencing problems
384 : * @ip: the journal incore inode
385 : * @fix: if 1, fix the sequence numbers, otherwise just report the problem
386 : *
387 : * Returns: The number of sequencing errors (hopefully none).
388 : */
389 62 : static int check_journal_seq_no(struct lgfs2_inode *ip, int fix)
390 : {
391 62 : int error = 0, wrapped = 0;
392 62 : uint32_t jd_blocks = ip->i_size / ip->i_sbd->sd_bsize;
393 : uint32_t blk;
394 : struct lgfs2_log_header lh;
395 62 : uint64_t highest_seq = 0, lowest_seq = 0, prev_seq = 0;
396 62 : int new = 0;
397 : uint64_t dblock;
398 : struct lgfs2_buffer_head *bh;
399 62 : int seq_errors = 0;
400 :
401 62 : memset(&lh, 0, sizeof(lh));
402 1892414 : for (blk = 0; blk < jd_blocks; blk++) {
403 1892352 : error = lgfs2_get_log_header(ip, blk, &lh);
404 1892352 : if (error == 1) /* if not a log header */
405 0 : continue; /* just journal data--ignore it */
406 1892352 : if (!lowest_seq || lh.lh_sequence < lowest_seq)
407 186 : lowest_seq = lh.lh_sequence;
408 1892352 : if (!highest_seq || lh.lh_sequence > highest_seq)
409 801226 : highest_seq = lh.lh_sequence;
410 1892352 : if (lh.lh_sequence > prev_seq) {
411 1892290 : prev_seq = lh.lh_sequence;
412 1892290 : continue;
413 : }
414 : /* The sequence number is not higher than the previous one,
415 : so it's either wrap-around or a sequencing problem. */
416 62 : if (!wrapped && lh.lh_sequence == lowest_seq) {
417 62 : wrapped = 1;
418 62 : prev_seq = lh.lh_sequence;
419 62 : continue;
420 : }
421 0 : log_err(_("Journal block %"PRIu32" (0x%"PRIx32"): sequence no. 0x%"PRIx64" "
422 : "out of order.\n"), blk, blk, lh.lh_sequence);
423 0 : log_info(_("Low: 0x%"PRIx64", High: 0x%"PRIx64", Prev: 0x%"PRIx64"\n"),
424 : lowest_seq, highest_seq, prev_seq);
425 0 : seq_errors++;
426 0 : if (!fix)
427 0 : continue;
428 0 : highest_seq++;
429 0 : prev_seq = highest_seq;
430 0 : log_warn(_("Renumbering it as 0x%"PRIx64"\n"), highest_seq);
431 0 : if (lgfs2_block_map(ip, blk, &new, &dblock, NULL, 0)) {
432 0 : log_crit(_("Failed to map block 0x%"PRIx32" in journal at 0x%"PRIx64": %s\n"),
433 : blk, ip->i_num.in_addr, strerror(errno));
434 0 : exit(1);
435 : }
436 0 : bh = lgfs2_bread(ip->i_sbd, dblock);
437 0 : ((struct gfs2_log_header *)bh->b_data)->lh_sequence = cpu_to_be64(highest_seq);
438 0 : lgfs2_bmodified(bh);
439 0 : lgfs2_brelse(bh);
440 : }
441 62 : if (seq_errors && fix) {
442 0 : log_err(_("%d sequence errors fixed.\n"), seq_errors);
443 0 : seq_errors = 0;
444 : }
445 62 : return seq_errors;
446 : }
447 :
448 : /**
449 : * preen_is_safe - Can we safely preen the file system?
450 : *
451 : * If a preen option was specified (-a or -p) we're likely to have been
452 : * called from rc.sysinit. We need to determine whether this is shared
453 : * storage or not. If it's local storage (locking protocol==lock_nolock)
454 : * it's safe to preen the file system. If it's lock_dlm, it's likely
455 : * mounted by other nodes in the cluster, which is dangerous and therefore,
456 : * we should warn the user to run fsck.gfs2 manually when it's safe.
457 : */
458 14 : int preen_is_safe(struct lgfs2_sbd *sdp, const struct fsck_options * const opts)
459 : {
460 14 : if (!opts->preen)
461 14 : return 1; /* not called by rc.sysinit--we're okay to preen */
462 0 : if (opts->force)
463 0 : return 1; /* user's responsibility--we're okay to preen */
464 0 : if (!memcmp(sdp->sd_lockproto + 5, "nolock", 6))
465 0 : return 1; /* local file system--preen is okay */
466 0 : return 0; /* might be mounted on another node--not guaranteed safe */
467 : }
468 :
469 : /**
470 : * gfs2_recover_journal - recovery a given journal
471 : * @ip: the journal incore inode
472 : * j: which journal to check
473 : * preen: Was preen (-a or -p) specified?
474 : * force_check: Was -f specified to force the check?
475 : * @was_clean: if the journal was originally clean, this is set to 1.
476 : * if the journal was dirty from the start, this is set to 0.
477 : *
478 : * Acquire the journal's lock, check to see if the journal is clean, and
479 : * do recovery if necessary.
480 : *
481 : * Returns: errno
482 : */
483 :
484 62 : static int recover_journal(struct lgfs2_inode *ip, int j, struct fsck_cx *cx, int *was_clean)
485 : {
486 62 : struct lgfs2_sbd *sdp = ip->i_sbd;
487 : struct lgfs2_log_header head;
488 : unsigned int pass;
489 : int error;
490 :
491 62 : *was_clean = 0;
492 62 : log_info( _("jid=%u: Looking at journal...\n"), j);
493 :
494 62 : osi_list_init(&sd_revoke_list);
495 62 : error = lgfs2_find_jhead(ip, &head);
496 62 : if (!error) {
497 62 : error = check_journal_seq_no(ip, 0);
498 62 : if (error > JOURNAL_SEQ_TOLERANCE) {
499 0 : log_err( _("Journal #%d (\"journal%d\") has %d "
500 : "sequencing errors; tolerance is %d.\n"),
501 : j+1, j, error, JOURNAL_SEQ_TOLERANCE);
502 0 : goto out;
503 : }
504 : }
505 62 : if (error) {
506 0 : if (cx->opts->no) {
507 0 : log_err( _("Journal #%d (\"journal%d\") is corrupt\n"),j+1, j);
508 0 : log_err( _("Not fixing it due to the -n option.\n"));
509 0 : goto out;
510 : }
511 0 : if (!preen_is_safe(sdp, cx->opts)) {
512 0 : log_err(_("Journal #%d (\"journal%d\") is corrupt.\n"),
513 : j+1, j);
514 0 : log_err(_("I'm not fixing it because it may be unsafe:\n"
515 : "Locking protocol is not lock_nolock and "
516 : "the -a or -p option was specified.\n"));
517 0 : log_err(_("Please make sure no node has the file system "
518 : "mounted then rerun fsck.gfs2 manually "
519 : "without -a or -p.\n"));
520 0 : goto out;
521 : }
522 0 : if (!query(cx, _("\nJournal #%d (\"journal%d\") is "
523 : "corrupt. Okay to repair it? (y/n)"),
524 : j+1, j)) {
525 0 : log_err( _("jid=%u: The journal was not repaired.\n"),
526 : j);
527 0 : goto out;
528 : }
529 0 : log_info( _("jid=%u: Repairing journal...\n"), j);
530 0 : error = check_journal_seq_no(ip, 1);
531 0 : if (error) {
532 0 : log_err( _("jid=%u: Unable to fix the bad journal.\n"),
533 : j);
534 0 : goto out;
535 : }
536 0 : error = lgfs2_find_jhead(ip, &head);
537 0 : if (error) {
538 0 : log_err( _("jid=%u: Unable to fix the bad journal.\n"),
539 : j);
540 0 : goto out;
541 : }
542 0 : log_err( _("jid=%u: The journal was successfully fixed.\n"),
543 : j);
544 : }
545 62 : if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) {
546 62 : log_info( _("jid=%u: Journal is clean.\n"), j);
547 62 : *was_clean = 1;
548 62 : return 0;
549 : }
550 0 : if (cx->opts->no) {
551 0 : log_err(_("Journal #%d (\"journal%d\") is dirty\n"),j+1, j);
552 0 : log_err(_("not replaying due to the -n option.\n"));
553 0 : goto out;
554 : }
555 0 : if (!preen_is_safe(sdp, cx->opts)) {
556 0 : log_err( _("Journal #%d (\"journal%d\") is dirty\n"), j+1, j);
557 0 : log_err( _("I'm not replaying it because it may be unsafe:\n"
558 : "Locking protocol is not lock_nolock and "
559 : "the -a or -p option was specified.\n"));
560 0 : log_err( _("Please make sure no node has the file system "
561 : "mounted then rerun fsck.gfs2 manually "
562 : "without -a or -p.\n"));
563 0 : error = FSCK_ERROR;
564 0 : goto out;
565 : }
566 0 : if (!query(cx, _("\nJournal #%d (\"journal%d\") is dirty. Okay to "
567 : "replay it? (y/n)"), j+1, j))
568 0 : goto reinit;
569 :
570 0 : log_info( _("jid=%u: Replaying journal...\n"), j);
571 :
572 0 : sd_found_jblocks = sd_replayed_jblocks = 0;
573 0 : sd_found_metablocks = sd_replayed_metablocks = 0;
574 0 : sd_found_revokes = 0;
575 0 : sd_replay_tail = head.lh_tail;
576 0 : for (pass = 0; pass < 2; pass++) {
577 0 : error = foreach_descriptor(ip, head.lh_tail,
578 : head.lh_blkno, pass);
579 0 : if (error) {
580 0 : log_err(_("Error found during journal replay.\n"));
581 0 : goto out;
582 : }
583 : }
584 0 : log_info( _("jid=%u: Found %u revoke tags\n"), j, sd_found_revokes);
585 0 : revoke_clean(sdp);
586 0 : error = lgfs2_clean_journal(ip, &head);
587 0 : if (error)
588 0 : goto out;
589 0 : log_err( _("jid=%u: Replayed %u of %u journaled data blocks\n"),
590 : j, sd_replayed_jblocks, sd_found_jblocks);
591 0 : log_err( _("jid=%u: Replayed %u of %u metadata blocks\n"),
592 : j, sd_replayed_metablocks, sd_found_metablocks);
593 :
594 : /* Check for errors and give them the option to reinitialize the
595 : journal. */
596 0 : out:
597 0 : if (!error) {
598 0 : log_info( _("jid=%u: Done\n"), j);
599 0 : return 0;
600 : }
601 0 : log_err( _("jid=%u: Failed\n"), j);
602 0 : reinit:
603 0 : if (query(cx, _("Do you want to clear the journal instead? (y/n)"))) {
604 0 : error = lgfs2_write_journal(sdp->md.journal[j], sdp->sd_bsize,
605 0 : sdp->md.journal[j]->i_size /
606 0 : sdp->sd_bsize);
607 0 : log_err(_("jid=%u: journal was cleared.\n"), j);
608 : } else {
609 0 : log_err( _("jid=%u: journal not cleared.\n"), j);
610 : }
611 0 : return error;
612 : }
613 :
614 : /* We can't use the rangecheck function from pass1 because we haven't gone
615 : * through initialization properly yet. */
616 1899322 : static int rangecheck_jblock(struct lgfs2_inode *ip, uint64_t block)
617 : {
618 1899322 : if((block > ip->i_sbd->fssize) || (block <= LGFS2_SB_ADDR(ip->i_sbd))) {
619 0 : log_info( _("Bad block pointer (out of range) found in "
620 : "journal inode %"PRIu64" (0x%"PRIx64").\n"),
621 : ip->i_num.in_addr, ip->i_num.in_addr);
622 0 : return META_ERROR; /* Exits check_metatree quicker */
623 : }
624 1899322 : return META_IS_GOOD;
625 : }
626 :
627 6970 : static int rangecheck_jmeta(struct fsck_cx *cx, struct iptr iptr, struct lgfs2_buffer_head **bh, int h,
628 : int *is_valid, int *was_duplicate, void *private)
629 : {
630 6970 : struct lgfs2_inode *ip = iptr.ipt_ip;
631 6970 : uint64_t block = iptr_block(iptr);
632 : int rc;
633 :
634 6970 : *bh = NULL;
635 6970 : *was_duplicate = 0;
636 6970 : *is_valid = 0;
637 6970 : rc = rangecheck_jblock(ip, block);
638 6970 : if (rc == META_IS_GOOD) {
639 6970 : *bh = lgfs2_bread(ip->i_sbd, block);
640 6970 : *is_valid = (lgfs2_check_meta((*bh)->b_data, GFS2_METATYPE_IN) == 0);
641 6970 : if (!(*is_valid)) {
642 0 : log_err( _("Journal at block %"PRIu64" (0x%"PRIx64") has a bad "
643 : "indirect block pointer %"PRIu64" (0x%"PRIx64") "
644 : "(points to something that is not an "
645 : "indirect block).\n"),
646 : ip->i_num.in_addr, ip->i_num.in_addr, block, block);
647 0 : lgfs2_brelse(*bh);
648 0 : *bh = NULL;
649 0 : return META_SKIP_FURTHER;
650 : }
651 : }
652 6970 : return rc;
653 : }
654 :
655 1892352 : static int rangecheck_jdata(struct fsck_cx *cx, struct lgfs2_inode *ip, uint64_t metablock,
656 : uint64_t block, void *private,
657 : struct lgfs2_buffer_head *bh, __be64 *ptr)
658 : {
659 1892352 : return rangecheck_jblock(ip, block);
660 : }
661 :
662 : static struct metawalk_fxns rangecheck_journal = {
663 : .private = NULL,
664 : .invalid_meta_is_fatal = 1,
665 : .check_metalist = rangecheck_jmeta,
666 : .check_data = rangecheck_jdata,
667 : };
668 :
669 : /*
670 : * replay_journals - replay the journals
671 : * sdp: the super block
672 : * preen: Was preen (-a or -p) specified?
673 : * force_check: Was -f specified to force the check?
674 : * @clean_journals - set to the number of clean journals we find
675 : *
676 : * There should be a flag to the fsck to enable/disable this
677 : * feature. The fsck falls back to clearing the journal if an
678 : * inconsistency is found, but only for the bad journal.
679 : *
680 : * Returns: 0 on success, -1 on failure
681 : */
682 56 : int replay_journals(struct fsck_cx *cx, int *clean_journals)
683 : {
684 56 : struct lgfs2_sbd *sdp = cx->sdp;
685 56 : int dirty_journals = 0;
686 56 : int gave_msg = 0;
687 56 : int error = 0;
688 56 : int clean = 0;
689 : int i;
690 :
691 56 : *clean_journals = 0;
692 :
693 56 : cx->jnl_size = LGFS2_DEFAULT_JSIZE;
694 :
695 119 : for(i = 0; i < sdp->md.journals; i++) {
696 63 : if (sdp->md.journal[i]) {
697 63 : error = check_metatree(cx, sdp->md.journal[i],
698 : &rangecheck_journal);
699 63 : if (error)
700 : /* Don't use fsck_inode_put here because it's a
701 : system file and we need to dismantle it. */
702 1 : lgfs2_inode_put(&sdp->md.journal[i]);
703 63 : error = 0; /* bad journal is non-fatal */
704 : }
705 63 : if (!sdp->md.journal[i]) {
706 1 : log_err(_("File system journal \"journal%d\" is "
707 : "missing or corrupt: pass1 will try to "
708 : "recreate it.\n"), i);
709 1 : continue;
710 : }
711 62 : if (!error) {
712 62 : uint64_t jsize = sdp->md.journal[i]->i_size / (1024 * 1024);
713 :
714 62 : if (cx->jnl_size == LGFS2_DEFAULT_JSIZE && jsize &&
715 55 : jsize != cx->jnl_size)
716 15 : cx->jnl_size = jsize;
717 62 : error = recover_journal(sdp->md.journal[i], i, cx, &clean);
718 62 : if (!clean)
719 0 : dirty_journals++;
720 62 : if (!gave_msg && dirty_journals == 1 && !cx->opts->no &&
721 0 : preen_is_safe(sdp, cx->opts)) {
722 0 : gave_msg = 1;
723 0 : log_notice( _("Recovering journals (this may "
724 : "take a while)\n"));
725 : }
726 62 : *clean_journals += clean;
727 : }
728 : }
729 : /* Sync the buffers to disk so we get a fresh start. */
730 56 : fsync(sdp->device_fd);
731 56 : return error;
732 : }
733 :
734 : /*
735 : * ji_update - fill in journal info
736 : * sdp: the incore superblock pointer
737 : *
738 : * Given the inode for the journal index, read in all
739 : * the journal inodes.
740 : *
741 : * Returns: 0 on success, -1 on failure
742 : */
743 60 : int ji_update(struct lgfs2_sbd *sdp)
744 : {
745 60 : struct lgfs2_inode *jip, *ip = sdp->md.jiinode;
746 : char journal_name[JOURNAL_NAME_SIZE];
747 : int i;
748 :
749 60 : if (!ip) {
750 0 : log_crit(_("Journal index inode not found.\n"));
751 0 : return -1;
752 : }
753 :
754 : /* The per_node directory will have 3 directory entries per node,
755 : plus two for "." and "..". So we subtract the 2 and divide by 3.
756 : If per_node is missing or damaged, we have to trust jindex has
757 : the correct number of entries. */
758 60 : if (sdp->md.pinode) /* if per_node was read in properly */
759 60 : sdp->md.journals = (sdp->md.pinode->i_entries - 2) / 3;
760 : else
761 0 : sdp->md.journals = ip->i_entries - 2;
762 :
763 60 : if (!(sdp->md.journal = calloc(sdp->md.journals,
764 : sizeof(struct lgfs2_inode *)))) {
765 0 : log_err(_("Unable to allocate journal index\n"));
766 0 : return -1;
767 : }
768 60 : memset(journal_name, 0, sizeof(*journal_name));
769 127 : for (i = 0; i < sdp->md.journals; i++) {
770 : /* FIXME check snprintf return code */
771 : int len;
772 67 : len = snprintf(journal_name, JOURNAL_NAME_SIZE, "journal%u", i);
773 67 : jip = lgfs2_lookupi(sdp->md.jiinode, journal_name, len);
774 67 : sdp->md.journal[i] = jip;
775 : }
776 60 : return 0;
777 : }
778 :
779 0 : static void bad_journalname(const char *filename, int len)
780 : {
781 0 : if (len >= 64)
782 0 : len = 63;
783 0 : log_debug(_("Journal index entry '%.*s' has an invalid filename.\n"),
784 : len, filename);
785 0 : }
786 :
787 : /**
788 : * check_jindex_dent - check the jindex directory entries
789 : *
790 : * This function makes sure the directory entries of the jindex are valid.
791 : * If they're not '.' or '..' they better have the form journalXXX.
792 : */
793 187 : static int check_jindex_dent(struct fsck_cx *cx, struct lgfs2_inode *ip, struct gfs2_dirent *dent,
794 : struct gfs2_dirent *prev_de,
795 : struct lgfs2_buffer_head *bh, char *filename,
796 : uint32_t *count, int *lindex, void *priv)
797 : {
798 : struct lgfs2_dirent d;
799 : int i;
800 :
801 187 : lgfs2_dirent_in(&d, dent);
802 :
803 187 : if (d.dr_name_len == 1 && filename[0] == '.')
804 60 : goto dirent_good;
805 127 : if (d.dr_name_len == 2 && filename[0] == '.' && filename[1] == '.')
806 60 : goto dirent_good;
807 :
808 67 : if ((d.dr_name_len >= 11) || /* "journal9999" */
809 67 : (d.dr_name_len <= 7) ||
810 67 : (strncmp(filename, "journal", 7))) {
811 0 : bad_journalname(filename, d.dr_name_len);
812 0 : return -1;
813 : }
814 134 : for (i = 7; i < d.dr_name_len; i++) {
815 67 : if (filename[i] < '0' || filename[i] > '9') {
816 0 : bad_journalname(filename, d.dr_name_len);
817 0 : return -2;
818 : }
819 : }
820 :
821 67 : dirent_good:
822 : /* Return the number of leaf entries so metawalk doesn't flag this
823 : leaf as having none. */
824 187 : *count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries);
825 187 : return 0;
826 : }
827 :
828 : static struct metawalk_fxns jindex_check_fxns = {
829 : .private = NULL,
830 : .check_dentry = check_jindex_dent,
831 : };
832 :
833 0 : int build_jindex(struct fsck_cx *cx)
834 : {
835 0 : struct lgfs2_sbd *sdp = cx->sdp;
836 : struct lgfs2_inode *jindex;
837 :
838 0 : jindex = lgfs2_createi(sdp->master_dir, "jindex", S_IFDIR | 0700,
839 : GFS2_DIF_SYSTEM);
840 0 : if (jindex == NULL) {
841 0 : return errno;
842 : }
843 0 : sdp->md.journal = calloc(sdp->md.journals, sizeof(struct lgfs2_inode *));
844 0 : if (sdp->md.journal == NULL) {
845 0 : lgfs2_inode_put(&jindex);
846 0 : return ENOSPC;
847 : }
848 0 : for (unsigned j = 0; j < sdp->md.journals; j++) {
849 0 : int ret = lgfs2_build_journal(sdp, j, jindex, cx->jnl_size);
850 0 : if (ret)
851 0 : return ret;
852 0 : lgfs2_inode_put(&sdp->md.journal[j]);
853 : }
854 0 : free(sdp->md.journal);
855 0 : lgfs2_inode_put(&jindex);
856 0 : return 0;
857 : }
858 :
859 60 : int init_jindex(struct fsck_cx *cx, int allow_ji_rebuild)
860 : {
861 60 : struct lgfs2_sbd *sdp = cx->sdp;
862 : int error;
863 :
864 60 : log_debug(_("Validating the journal index.\n"));
865 : /* rgrepair requires the journals be read in in order to distinguish
866 : "real" rgrps from rgrps that are just copies left in journals. */
867 : /* coverity[identity_transfer:SUPPRESS] */
868 60 : sdp->md.jiinode = lgfs2_lookupi(sdp->master_dir, "jindex", 6);
869 :
870 60 : if (!sdp->md.jiinode) {
871 : int err;
872 :
873 0 : if (!allow_ji_rebuild) {
874 0 : log_crit(_("Error: jindex and rindex files are both "
875 : "corrupt.\n"));
876 0 : return -1;
877 : }
878 0 : if (!query(cx, _("The gfs2 system jindex inode is missing. "
879 : "Okay to rebuild it? (y/n) "))) {
880 0 : log_crit(_("Error: cannot proceed without a valid "
881 : "jindex file.\n"));
882 0 : return -1;
883 : }
884 :
885 0 : err = build_jindex(cx);
886 0 : if (err) {
887 0 : log_crit(_("Error %d rebuilding jindex\n"), err);
888 0 : return err;
889 : }
890 : /* coverity[pass_freed_arg:SUPPRESS] */
891 0 : sdp->md.jiinode = lgfs2_lookupi(sdp->master_dir, "jindex", 6);
892 : }
893 :
894 : /* check for irrelevant entries in jindex. Can't use check_dir because
895 : that creates and destroys the inode, which we don't want. */
896 :
897 60 : log_debug(_("Checking the integrity of the journal index.\n"));
898 60 : if (sdp->md.jiinode->i_flags & GFS2_DIF_EXHASH)
899 1 : error = check_leaf_blks(cx, sdp->md.jiinode,
900 : &jindex_check_fxns);
901 : else
902 59 : error = check_linear_dir(cx, sdp->md.jiinode,
903 59 : sdp->md.jiinode->i_bh,
904 : &jindex_check_fxns);
905 60 : if (error) {
906 0 : log_err(_("The system journal index is damaged.\n"));
907 0 : if (!query(cx, _("Okay to rebuild it? (y/n) "))) {
908 0 : log_crit(_("Error: cannot proceed without a "
909 : "valid jindex file.\n"));
910 0 : return -1;
911 : }
912 0 : lgfs2_inode_put(&sdp->md.jiinode);
913 0 : lgfs2_dirent_del(sdp->master_dir, "jindex", 6);
914 0 : log_err(_("Corrupt journal index was removed.\n"));
915 0 : error = build_jindex(cx);
916 0 : if (error) {
917 0 : log_err(_("Error rebuilding journal "
918 : "index: Cannot continue.\n"));
919 0 : return error;
920 : }
921 : /* coverity[pass_freed_arg:SUPPRESS] */
922 0 : sdp->md.jiinode = lgfs2_lookupi(sdp->master_dir, "jindex", 6);
923 : }
924 :
925 : /* read in the ji data */
926 60 : if (ji_update(sdp)){
927 0 : log_err( _("Unable to read jindex inode.\n"));
928 0 : return -1;
929 : }
930 60 : return 0;
931 : }
|