Line data Source code
1 : #include "clusterautoconfig.h"
2 :
3 : #include <errno.h>
4 : #include <inttypes.h>
5 : #include <stdlib.h>
6 : #include <string.h>
7 : #include <time.h>
8 : #include <unistd.h>
9 : #include <libintl.h>
10 : #define _(String) gettext(String)
11 :
12 : #include <logging.h>
13 : #include "fsck.h"
14 : #include "fs_recovery.h"
15 : #include "libgfs2.h"
16 : #include "metawalk.h"
17 : #include "util.h"
18 :
19 : #define JOURNAL_NAME_SIZE 18
20 : #define JOURNAL_SEQ_TOLERANCE 10
21 :
22 : static unsigned int sd_found_jblocks = 0;
23 : static unsigned int sd_replayed_jblocks = 0;
24 : static unsigned int sd_found_metablocks = 0;
25 : static unsigned int sd_replayed_metablocks = 0;
26 : static unsigned int sd_found_revokes = 0;
27 : static osi_list_t sd_revoke_list;
28 : static unsigned int sd_replay_tail;
29 :
30 : struct revoke_replay {
31 : osi_list_t rr_list;
32 : uint64_t rr_blkno;
33 : unsigned int rr_where;
34 : };
35 :
36 0 : static int revoke_add(struct lgfs2_sbd *sdp, uint64_t blkno, unsigned int where)
37 : {
38 0 : osi_list_t *tmp, *head = &sd_revoke_list;
39 : struct revoke_replay *rr;
40 0 : int found = 0;
41 :
42 0 : osi_list_foreach(tmp, head) {
43 0 : rr = osi_list_entry(tmp, struct revoke_replay, rr_list);
44 0 : if (rr->rr_blkno == blkno) {
45 0 : found = 1;
46 0 : break;
47 : }
48 : }
49 :
50 0 : if (found) {
51 0 : rr->rr_where = where;
52 0 : return 0;
53 : }
54 :
55 0 : rr = malloc(sizeof(struct revoke_replay));
56 0 : if (!rr)
57 0 : return -ENOMEM;
58 :
59 0 : rr->rr_blkno = blkno;
60 0 : rr->rr_where = where;
61 0 : osi_list_add(&rr->rr_list, head);
62 0 : return 1;
63 : }
64 :
65 0 : static int revoke_check(struct lgfs2_sbd *sdp, uint64_t blkno, unsigned int where)
66 : {
67 : osi_list_t *tmp;
68 : struct revoke_replay *rr;
69 : int wrap, a, b;
70 0 : int found = 0;
71 :
72 0 : osi_list_foreach(tmp, &sd_revoke_list) {
73 0 : rr = osi_list_entry(tmp, struct revoke_replay, rr_list);
74 0 : if (rr->rr_blkno == blkno) {
75 0 : found = 1;
76 0 : break;
77 : }
78 : }
79 :
80 0 : if (!found)
81 0 : return 0;
82 :
83 0 : wrap = (rr->rr_where < sd_replay_tail);
84 0 : a = (sd_replay_tail < where);
85 0 : b = (where < rr->rr_where);
86 0 : return (wrap) ? (a || b) : (a && b);
87 : }
88 :
89 0 : static void revoke_clean(struct lgfs2_sbd *sdp)
90 : {
91 0 : osi_list_t *head = &sd_revoke_list;
92 : struct revoke_replay *rr;
93 :
94 0 : while (!osi_list_empty(head)) {
95 0 : rr = osi_list_entry(head->next, struct revoke_replay, rr_list);
96 0 : osi_list_del(&rr->rr_list);
97 0 : free(rr);
98 : }
99 0 : }
100 :
101 0 : static void refresh_rgrp(struct lgfs2_sbd *sdp, struct lgfs2_rgrp_tree *rgd,
102 : struct lgfs2_buffer_head *bh, uint64_t blkno)
103 : {
104 : int i;
105 :
106 0 : log_debug(_("Block is part of rgrp 0x%"PRIx64"; refreshing the rgrp.\n"),
107 : rgd->rt_addr);
108 0 : for (i = 0; i < rgd->rt_length; i++) {
109 0 : if (rgd->rt_addr + i != blkno)
110 0 : continue;
111 :
112 0 : memcpy(rgd->rt_bits[i].bi_data, bh->b_data, sdp->sd_bsize);
113 0 : rgd->rt_bits[i].bi_modified = 1;
114 0 : if (i == 0) /* this is the rgrp itself */
115 0 : lgfs2_rgrp_in(rgd, rgd->rt_bits[0].bi_data);
116 0 : break;
117 : }
118 0 : }
119 :
120 0 : static int buf_lo_scan_elements(struct lgfs2_inode *ip, unsigned int start,
121 : struct gfs2_log_descriptor *ld, __be64 *ptr,
122 : int pass)
123 : {
124 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
125 0 : unsigned int blks = be32_to_cpu(ld->ld_data1);
126 : uint64_t blkno;
127 0 : int error = 0;
128 : struct lgfs2_rgrp_tree *rgd;
129 :
130 0 : if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
131 0 : return 0;
132 :
133 0 : lgfs2_replay_incr_blk(ip, &start);
134 :
135 0 : for (; blks; lgfs2_replay_incr_blk(ip, &start), blks--) {
136 : struct lgfs2_buffer_head *bh_log, *bh_ip;
137 : struct gfs2_meta_header *mhp;
138 :
139 0 : sd_found_metablocks++;
140 :
141 0 : blkno = be64_to_cpu(*ptr);
142 0 : ptr++;
143 0 : if (revoke_check(sdp, blkno, start))
144 0 : continue;
145 :
146 0 : error = lgfs2_replay_read_block(ip, start, &bh_log);
147 0 : if (error)
148 0 : return error;
149 :
150 0 : log_info(_("Journal replay writing metadata block #%"PRIu64" (0x%"PRIx64") for journal+0x%x\n"),
151 : blkno, blkno, start);
152 0 : bh_ip = lgfs2_bget(sdp, blkno);
153 0 : if (!bh_ip) {
154 0 : log_err(_("Out of memory when replaying journals.\n"));
155 0 : lgfs2_bfree(&bh_log);
156 0 : return FSCK_ERROR;
157 : }
158 0 : memcpy(bh_ip->b_data, bh_log->b_data, sdp->sd_bsize);
159 :
160 0 : mhp = (struct gfs2_meta_header *)bh_ip->b_data;
161 0 : if (be32_to_cpu(mhp->mh_magic) != GFS2_MAGIC) {
162 0 : log_err(_("Journal corruption detected at block #%"PRIu64" (0x%"PRIx64") for journal+0x%x.\n"),
163 : blkno, blkno, start);
164 0 : error = -EIO;
165 : } else {
166 0 : lgfs2_bmodified(bh_ip);
167 0 : rgd = lgfs2_blk2rgrpd(sdp, blkno);
168 0 : if (rgd && blkno < rgd->rt_data0)
169 0 : refresh_rgrp(sdp, rgd, bh_ip, blkno);
170 : }
171 :
172 0 : lgfs2_brelse(bh_log);
173 0 : lgfs2_brelse(bh_ip);
174 0 : if (error)
175 0 : break;
176 :
177 0 : sd_replayed_metablocks++;
178 : }
179 0 : return error;
180 : }
181 :
182 0 : static int revoke_lo_scan_elements(struct lgfs2_inode *ip, unsigned int start,
183 : struct gfs2_log_descriptor *ld, __be64 *ptr,
184 : int pass)
185 : {
186 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
187 0 : unsigned int blks = be32_to_cpu(ld->ld_length);
188 0 : unsigned int revokes = be32_to_cpu(ld->ld_data1);
189 : unsigned int offset;
190 : uint64_t blkno;
191 0 : int first = 1;
192 : int error;
193 :
194 0 : if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
195 0 : return 0;
196 :
197 0 : offset = sizeof(struct gfs2_log_descriptor);
198 :
199 0 : for (; blks; lgfs2_replay_incr_blk(ip, &start), blks--) {
200 : struct lgfs2_buffer_head *bh;
201 :
202 0 : error = lgfs2_replay_read_block(ip, start, &bh);
203 0 : if (error)
204 0 : return error;
205 :
206 0 : if (!first) {
207 0 : if (lgfs2_check_meta(bh->b_data, GFS2_METATYPE_LB))
208 0 : continue;
209 : }
210 0 : while (offset + sizeof(uint64_t) <= sdp->sd_bsize) {
211 0 : blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));
212 0 : log_info(_("Journal replay processing revoke for block #%"PRIu64" (0x%"PRIx64") for journal+0x%x\n"),
213 : blkno, blkno, start);
214 0 : error = revoke_add(sdp, blkno, start);
215 0 : if (error < 0) {
216 0 : lgfs2_bfree(&bh);
217 0 : return error;
218 : }
219 0 : else if (error)
220 0 : sd_found_revokes++;
221 :
222 0 : if (!--revokes)
223 0 : break;
224 0 : offset += sizeof(uint64_t);
225 : }
226 :
227 0 : lgfs2_bmodified(bh);
228 0 : lgfs2_brelse(bh);
229 0 : offset = sizeof(struct gfs2_meta_header);
230 0 : first = 0;
231 : }
232 0 : return 0;
233 : }
234 :
235 0 : static int databuf_lo_scan_elements(struct lgfs2_inode *ip, unsigned int start,
236 : struct gfs2_log_descriptor *ld,
237 : __be64 *ptr, int pass)
238 : {
239 0 : struct lgfs2_sbd *sdp = ip->i_sbd;
240 0 : unsigned int blks = be32_to_cpu(ld->ld_data1);
241 : uint64_t blkno;
242 : uint64_t esc;
243 0 : int error = 0;
244 :
245 0 : if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
246 0 : return 0;
247 :
248 0 : lgfs2_replay_incr_blk(ip, &start);
249 0 : for (; blks; lgfs2_replay_incr_blk(ip, &start), blks--) {
250 : struct lgfs2_buffer_head *bh_log, *bh_ip;
251 :
252 0 : blkno = be64_to_cpu(*ptr);
253 0 : ptr++;
254 0 : esc = be64_to_cpu(*ptr);
255 0 : ptr++;
256 :
257 0 : sd_found_jblocks++;
258 :
259 0 : if (revoke_check(sdp, blkno, start))
260 0 : continue;
261 :
262 0 : error = lgfs2_replay_read_block(ip, start, &bh_log);
263 0 : if (error)
264 0 : return error;
265 :
266 0 : log_info(_("Journal replay writing data block #%"PRIu64" (0x%"PRIx64") for journal+0x%x\n"),
267 : blkno, blkno, start);
268 0 : bh_ip = lgfs2_bget(sdp, blkno);
269 0 : if (!bh_ip) {
270 0 : log_err(_("Out of memory when replaying journals.\n"));
271 0 : lgfs2_bfree(&bh_log);
272 0 : return FSCK_ERROR;
273 : }
274 0 : memcpy(bh_ip->b_data, bh_log->b_data, sdp->sd_bsize);
275 :
276 : /* Unescape */
277 0 : if (esc) {
278 0 : __be32 *eptr = (__be32 *)bh_ip->b_data;
279 0 : *eptr = cpu_to_be32(GFS2_MAGIC);
280 : }
281 :
282 0 : lgfs2_brelse(bh_log);
283 0 : lgfs2_bmodified(bh_ip);
284 0 : lgfs2_brelse(bh_ip);
285 :
286 0 : sd_replayed_jblocks++;
287 : }
288 0 : return error;
289 : }
290 :
291 : /**
292 : * foreach_descriptor - go through the active part of the log
293 : * @ip: the journal incore inode
294 : * @start: the first log header in the active region
295 : * @end: the last log header (don't process the contents of this entry))
296 : *
297 : * Call a given function once for every log descriptor in the active
298 : * portion of the log.
299 : *
300 : * Returns: errno
301 : */
302 :
303 0 : static int foreach_descriptor(struct lgfs2_inode *ip, unsigned int start,
304 : unsigned int end, int pass)
305 : {
306 : struct lgfs2_buffer_head *bh;
307 : struct gfs2_log_descriptor *ld;
308 0 : int error = 0;
309 : uint32_t length;
310 : __be64 *ptr;
311 0 : unsigned int offset = sizeof(struct gfs2_log_descriptor);
312 0 : offset += sizeof(__be64) - 1;
313 0 : offset &= ~(sizeof(__be64) - 1);
314 :
315 0 : while (start != end) {
316 : struct gfs2_meta_header *mhp;
317 :
318 0 : error = lgfs2_replay_read_block(ip, start, &bh);
319 0 : if (error)
320 0 : return error;
321 0 : mhp = (struct gfs2_meta_header *)bh->b_data;
322 0 : if (be32_to_cpu(mhp->mh_magic) != GFS2_MAGIC) {
323 0 : lgfs2_bmodified(bh);
324 0 : lgfs2_brelse(bh);
325 0 : return -EIO;
326 : }
327 0 : ld = (struct gfs2_log_descriptor *)bh->b_data;
328 0 : length = be32_to_cpu(ld->ld_length);
329 :
330 0 : if (be32_to_cpu(ld->ld_header.mh_type) == GFS2_METATYPE_LH) {
331 : struct lgfs2_log_header lh;
332 :
333 0 : error = lgfs2_get_log_header(ip, start, &lh);
334 0 : if (!error) {
335 0 : lgfs2_replay_incr_blk(ip, &start);
336 0 : lgfs2_bmodified(bh);
337 0 : lgfs2_brelse(bh);
338 0 : continue;
339 : }
340 0 : if (error == 1) {
341 0 : log_err(_("Journal corruption detected at "
342 : "journal+0x%x.\n"), start);
343 0 : error = -EIO;
344 : }
345 0 : lgfs2_bmodified(bh);
346 0 : lgfs2_brelse(bh);
347 0 : return error;
348 0 : } else if (lgfs2_check_meta(bh->b_data, GFS2_METATYPE_LD)) {
349 0 : lgfs2_bmodified(bh);
350 0 : lgfs2_brelse(bh);
351 0 : return -EIO;
352 : }
353 0 : ptr = (__be64 *)(bh->b_data + offset);
354 0 : error = databuf_lo_scan_elements(ip, start, ld, ptr, pass);
355 0 : if (error) {
356 0 : lgfs2_bmodified(bh);
357 0 : lgfs2_brelse(bh);
358 0 : return error;
359 : }
360 0 : error = buf_lo_scan_elements(ip, start, ld, ptr, pass);
361 0 : if (error) {
362 0 : lgfs2_bmodified(bh);
363 0 : lgfs2_brelse(bh);
364 0 : return error;
365 : }
366 0 : error = revoke_lo_scan_elements(ip, start, ld, ptr, pass);
367 0 : if (error) {
368 0 : lgfs2_bmodified(bh);
369 0 : lgfs2_brelse(bh);
370 0 : return error;
371 : }
372 :
373 0 : while (length--)
374 0 : lgfs2_replay_incr_blk(ip, &start);
375 :
376 0 : lgfs2_bmodified(bh);
377 0 : lgfs2_brelse(bh);
378 : }
379 :
380 0 : return 0;
381 : }
382 :
383 : /**
384 : * check_journal_seq_no - Check and Fix log header sequencing problems
385 : * @ip: the journal incore inode
386 : * @fix: if 1, fix the sequence numbers, otherwise just report the problem
387 : *
388 : * Returns: The number of sequencing errors (hopefully none).
389 : */
390 62 : static int check_journal_seq_no(struct lgfs2_inode *ip, int fix)
391 : {
392 62 : int error = 0, wrapped = 0;
393 62 : uint32_t jd_blocks = ip->i_size / ip->i_sbd->sd_bsize;
394 : uint32_t blk;
395 : struct lgfs2_log_header lh;
396 62 : uint64_t highest_seq = 0, lowest_seq = 0, prev_seq = 0;
397 62 : int new = 0;
398 : uint64_t dblock;
399 : struct lgfs2_buffer_head *bh;
400 62 : int seq_errors = 0;
401 :
402 62 : memset(&lh, 0, sizeof(lh));
403 1892414 : for (blk = 0; blk < jd_blocks; blk++) {
404 1892352 : error = lgfs2_get_log_header(ip, blk, &lh);
405 1892352 : if (error == 1) /* if not a log header */
406 0 : continue; /* just journal data--ignore it */
407 1892352 : if (!lowest_seq || lh.lh_sequence < lowest_seq)
408 186 : lowest_seq = lh.lh_sequence;
409 1892352 : if (!highest_seq || lh.lh_sequence > highest_seq)
410 645800 : highest_seq = lh.lh_sequence;
411 1892352 : if (lh.lh_sequence > prev_seq) {
412 1892290 : prev_seq = lh.lh_sequence;
413 1892290 : continue;
414 : }
415 : /* The sequence number is not higher than the previous one,
416 : so it's either wrap-around or a sequencing problem. */
417 62 : if (!wrapped && lh.lh_sequence == lowest_seq) {
418 62 : wrapped = 1;
419 62 : prev_seq = lh.lh_sequence;
420 62 : continue;
421 : }
422 0 : log_err(_("Journal block %"PRIu32" (0x%"PRIx32"): sequence no. 0x%"PRIx64" "
423 : "out of order.\n"), blk, blk, lh.lh_sequence);
424 0 : log_info(_("Low: 0x%"PRIx64", High: 0x%"PRIx64", Prev: 0x%"PRIx64"\n"),
425 : lowest_seq, highest_seq, prev_seq);
426 0 : seq_errors++;
427 0 : if (!fix)
428 0 : continue;
429 0 : highest_seq++;
430 0 : prev_seq = highest_seq;
431 0 : log_warn(_("Renumbering it as 0x%"PRIx64"\n"), highest_seq);
432 0 : if (lgfs2_block_map(ip, blk, &new, &dblock, NULL, 0)) {
433 0 : log_crit(_("Failed to map block 0x%"PRIx32" in journal at 0x%"PRIx64": %s\n"),
434 : blk, ip->i_num.in_addr, strerror(errno));
435 0 : exit(1);
436 : }
437 0 : bh = lgfs2_bread(ip->i_sbd, dblock);
438 0 : ((struct gfs2_log_header *)bh->b_data)->lh_sequence = cpu_to_be64(highest_seq);
439 0 : lgfs2_bmodified(bh);
440 0 : lgfs2_brelse(bh);
441 : }
442 62 : if (seq_errors && fix) {
443 0 : log_err(_("%d sequence errors fixed.\n"), seq_errors);
444 0 : seq_errors = 0;
445 : }
446 62 : return seq_errors;
447 : }
448 :
449 : /**
450 : * preen_is_safe - Can we safely preen the file system?
451 : *
452 : * If a preen option was specified (-a or -p) we're likely to have been
453 : * called from rc.sysinit. We need to determine whether this is shared
454 : * storage or not. If it's local storage (locking protocol==lock_nolock)
455 : * it's safe to preen the file system. If it's lock_dlm, it's likely
456 : * mounted by other nodes in the cluster, which is dangerous and therefore,
457 : * we should warn the user to run fsck.gfs2 manually when it's safe.
458 : */
459 14 : int preen_is_safe(struct lgfs2_sbd *sdp, const struct fsck_options * const opts)
460 : {
461 14 : if (!opts->preen)
462 14 : return 1; /* not called by rc.sysinit--we're okay to preen */
463 0 : if (opts->force)
464 0 : return 1; /* user's responsibility--we're okay to preen */
465 0 : if (!memcmp(sdp->sd_lockproto + 5, "nolock", 6))
466 0 : return 1; /* local file system--preen is okay */
467 0 : return 0; /* might be mounted on another node--not guaranteed safe */
468 : }
469 :
470 : /**
471 : * gfs2_recover_journal - recovery a given journal
472 : * @ip: the journal incore inode
473 : * j: which journal to check
474 : * preen: Was preen (-a or -p) specified?
475 : * force_check: Was -f specified to force the check?
476 : * @was_clean: if the journal was originally clean, this is set to 1.
477 : * if the journal was dirty from the start, this is set to 0.
478 : *
479 : * Acquire the journal's lock, check to see if the journal is clean, and
480 : * do recovery if necessary.
481 : *
482 : * Returns: errno
483 : */
484 :
485 62 : static int recover_journal(struct lgfs2_inode *ip, int j, struct fsck_cx *cx, int *was_clean)
486 : {
487 62 : struct lgfs2_sbd *sdp = ip->i_sbd;
488 : struct lgfs2_log_header head;
489 : unsigned int pass;
490 : int error;
491 :
492 62 : *was_clean = 0;
493 62 : log_info( _("jid=%u: Looking at journal...\n"), j);
494 :
495 62 : osi_list_init(&sd_revoke_list);
496 62 : error = lgfs2_find_jhead(ip, &head);
497 62 : if (!error) {
498 62 : error = check_journal_seq_no(ip, 0);
499 62 : if (error > JOURNAL_SEQ_TOLERANCE) {
500 0 : log_err( _("Journal #%d (\"journal%d\") has %d "
501 : "sequencing errors; tolerance is %d.\n"),
502 : j+1, j, error, JOURNAL_SEQ_TOLERANCE);
503 0 : goto out;
504 : }
505 : }
506 62 : if (error) {
507 0 : if (cx->opts->no) {
508 0 : log_err( _("Journal #%d (\"journal%d\") is corrupt\n"),j+1, j);
509 0 : log_err( _("Not fixing it due to the -n option.\n"));
510 0 : goto out;
511 : }
512 0 : if (!preen_is_safe(sdp, cx->opts)) {
513 0 : log_err(_("Journal #%d (\"journal%d\") is corrupt.\n"),
514 : j+1, j);
515 0 : log_err(_("I'm not fixing it because it may be unsafe:\n"
516 : "Locking protocol is not lock_nolock and "
517 : "the -a or -p option was specified.\n"));
518 0 : log_err(_("Please make sure no node has the file system "
519 : "mounted then rerun fsck.gfs2 manually "
520 : "without -a or -p.\n"));
521 0 : goto out;
522 : }
523 0 : if (!query(cx, _("\nJournal #%d (\"journal%d\") is "
524 : "corrupt. Okay to repair it? (y/n)"),
525 : j+1, j)) {
526 0 : log_err( _("jid=%u: The journal was not repaired.\n"),
527 : j);
528 0 : goto out;
529 : }
530 0 : log_info( _("jid=%u: Repairing journal...\n"), j);
531 0 : error = check_journal_seq_no(ip, 1);
532 0 : if (error) {
533 0 : log_err( _("jid=%u: Unable to fix the bad journal.\n"),
534 : j);
535 0 : goto out;
536 : }
537 0 : error = lgfs2_find_jhead(ip, &head);
538 0 : if (error) {
539 0 : log_err( _("jid=%u: Unable to fix the bad journal.\n"),
540 : j);
541 0 : goto out;
542 : }
543 0 : log_err( _("jid=%u: The journal was successfully fixed.\n"),
544 : j);
545 : }
546 62 : if (head.lh_flags & GFS2_LOG_HEAD_UNMOUNT) {
547 62 : log_info( _("jid=%u: Journal is clean.\n"), j);
548 62 : *was_clean = 1;
549 62 : return 0;
550 : }
551 0 : if (cx->opts->no) {
552 0 : log_err(_("Journal #%d (\"journal%d\") is dirty\n"),j+1, j);
553 0 : log_err(_("not replaying due to the -n option.\n"));
554 0 : goto out;
555 : }
556 0 : if (!preen_is_safe(sdp, cx->opts)) {
557 0 : log_err( _("Journal #%d (\"journal%d\") is dirty\n"), j+1, j);
558 0 : log_err( _("I'm not replaying it because it may be unsafe:\n"
559 : "Locking protocol is not lock_nolock and "
560 : "the -a or -p option was specified.\n"));
561 0 : log_err( _("Please make sure no node has the file system "
562 : "mounted then rerun fsck.gfs2 manually "
563 : "without -a or -p.\n"));
564 0 : error = FSCK_ERROR;
565 0 : goto out;
566 : }
567 0 : if (!query(cx, _("\nJournal #%d (\"journal%d\") is dirty. Okay to "
568 : "replay it? (y/n)"), j+1, j))
569 0 : goto reinit;
570 :
571 0 : log_info( _("jid=%u: Replaying journal...\n"), j);
572 :
573 0 : sd_found_jblocks = sd_replayed_jblocks = 0;
574 0 : sd_found_metablocks = sd_replayed_metablocks = 0;
575 0 : sd_found_revokes = 0;
576 0 : sd_replay_tail = head.lh_tail;
577 0 : for (pass = 0; pass < 2; pass++) {
578 0 : error = foreach_descriptor(ip, head.lh_tail,
579 : head.lh_blkno, pass);
580 0 : if (error) {
581 0 : log_err(_("Error found during journal replay.\n"));
582 0 : goto out;
583 : }
584 : }
585 0 : log_info( _("jid=%u: Found %u revoke tags\n"), j, sd_found_revokes);
586 0 : revoke_clean(sdp);
587 0 : error = lgfs2_clean_journal(ip, &head);
588 0 : if (error)
589 0 : goto out;
590 0 : log_err( _("jid=%u: Replayed %u of %u journaled data blocks\n"),
591 : j, sd_replayed_jblocks, sd_found_jblocks);
592 0 : log_err( _("jid=%u: Replayed %u of %u metadata blocks\n"),
593 : j, sd_replayed_metablocks, sd_found_metablocks);
594 :
595 : /* Check for errors and give them the option to reinitialize the
596 : journal. */
597 0 : out:
598 0 : if (!error) {
599 0 : log_info( _("jid=%u: Done\n"), j);
600 0 : return 0;
601 : }
602 0 : log_err( _("jid=%u: Failed\n"), j);
603 0 : reinit:
604 0 : if (query(cx, _("Do you want to clear the journal instead? (y/n)"))) {
605 0 : error = lgfs2_write_journal(sdp->md.journal[j], sdp->sd_bsize,
606 0 : sdp->md.journal[j]->i_size /
607 0 : sdp->sd_bsize);
608 0 : log_err(_("jid=%u: journal was cleared.\n"), j);
609 : } else {
610 0 : log_err( _("jid=%u: journal not cleared.\n"), j);
611 : }
612 0 : return error;
613 : }
614 :
615 : /* We can't use the rangecheck function from pass1 because we haven't gone
616 : * through initialization properly yet. */
617 1899322 : static int rangecheck_jblock(struct lgfs2_inode *ip, uint64_t block)
618 : {
619 1899322 : if((block > ip->i_sbd->fssize) || (block <= LGFS2_SB_ADDR(ip->i_sbd))) {
620 0 : log_info( _("Bad block pointer (out of range) found in "
621 : "journal inode %"PRIu64" (0x%"PRIx64").\n"),
622 : ip->i_num.in_addr, ip->i_num.in_addr);
623 0 : return META_ERROR; /* Exits check_metatree quicker */
624 : }
625 1899322 : return META_IS_GOOD;
626 : }
627 :
628 6970 : static int rangecheck_jmeta(struct fsck_cx *cx, struct iptr iptr, struct lgfs2_buffer_head **bh, int h,
629 : int *is_valid, int *was_duplicate, void *private)
630 : {
631 6970 : struct lgfs2_inode *ip = iptr.ipt_ip;
632 6970 : uint64_t block = iptr_block(iptr);
633 : int rc;
634 :
635 6970 : *bh = NULL;
636 6970 : *was_duplicate = 0;
637 6970 : *is_valid = 0;
638 6970 : rc = rangecheck_jblock(ip, block);
639 6970 : if (rc == META_IS_GOOD) {
640 6970 : *bh = lgfs2_bread(ip->i_sbd, block);
641 6970 : *is_valid = (lgfs2_check_meta((*bh)->b_data, GFS2_METATYPE_IN) == 0);
642 6970 : if (!(*is_valid)) {
643 0 : log_err( _("Journal at block %"PRIu64" (0x%"PRIx64") has a bad "
644 : "indirect block pointer %"PRIu64" (0x%"PRIx64") "
645 : "(points to something that is not an "
646 : "indirect block).\n"),
647 : ip->i_num.in_addr, ip->i_num.in_addr, block, block);
648 0 : lgfs2_brelse(*bh);
649 0 : *bh = NULL;
650 0 : return META_SKIP_FURTHER;
651 : }
652 : }
653 6970 : return rc;
654 : }
655 :
656 1892352 : static int rangecheck_jdata(struct fsck_cx *cx, struct lgfs2_inode *ip, uint64_t metablock,
657 : uint64_t block, void *private,
658 : struct lgfs2_buffer_head *bh, __be64 *ptr)
659 : {
660 1892352 : return rangecheck_jblock(ip, block);
661 : }
662 :
663 : static struct metawalk_fxns rangecheck_journal = {
664 : .private = NULL,
665 : .invalid_meta_is_fatal = 1,
666 : .check_metalist = rangecheck_jmeta,
667 : .check_data = rangecheck_jdata,
668 : };
669 :
670 : /*
671 : * replay_journals - replay the journals
672 : * sdp: the super block
673 : * preen: Was preen (-a or -p) specified?
674 : * force_check: Was -f specified to force the check?
675 : * @clean_journals - set to the number of clean journals we find
676 : *
677 : * There should be a flag to the fsck to enable/disable this
678 : * feature. The fsck falls back to clearing the journal if an
679 : * inconsistency is found, but only for the bad journal.
680 : *
681 : * Returns: 0 on success, -1 on failure
682 : */
683 56 : int replay_journals(struct fsck_cx *cx, int *clean_journals)
684 : {
685 56 : struct lgfs2_sbd *sdp = cx->sdp;
686 56 : int dirty_journals = 0;
687 56 : int gave_msg = 0;
688 56 : int error = 0;
689 56 : int clean = 0;
690 : int i;
691 :
692 56 : *clean_journals = 0;
693 :
694 56 : sdp->jsize = LGFS2_DEFAULT_JSIZE;
695 :
696 119 : for(i = 0; i < sdp->md.journals; i++) {
697 63 : if (sdp->md.journal[i]) {
698 63 : error = check_metatree(cx, sdp->md.journal[i],
699 : &rangecheck_journal);
700 63 : if (error)
701 : /* Don't use fsck_inode_put here because it's a
702 : system file and we need to dismantle it. */
703 1 : lgfs2_inode_put(&sdp->md.journal[i]);
704 63 : error = 0; /* bad journal is non-fatal */
705 : }
706 63 : if (!sdp->md.journal[i]) {
707 1 : log_err(_("File system journal \"journal%d\" is "
708 : "missing or corrupt: pass1 will try to "
709 : "recreate it.\n"), i);
710 1 : continue;
711 : }
712 62 : if (!error) {
713 62 : uint64_t jsize = sdp->md.journal[i]->i_size / (1024 * 1024);
714 :
715 62 : if (sdp->jsize == LGFS2_DEFAULT_JSIZE && jsize &&
716 55 : jsize != sdp->jsize)
717 15 : sdp->jsize = jsize;
718 62 : error = recover_journal(sdp->md.journal[i], i, cx, &clean);
719 62 : if (!clean)
720 0 : dirty_journals++;
721 62 : if (!gave_msg && dirty_journals == 1 && !cx->opts->no &&
722 0 : preen_is_safe(sdp, cx->opts)) {
723 0 : gave_msg = 1;
724 0 : log_notice( _("Recovering journals (this may "
725 : "take a while)\n"));
726 : }
727 62 : *clean_journals += clean;
728 : }
729 : }
730 : /* Sync the buffers to disk so we get a fresh start. */
731 56 : fsync(sdp->device_fd);
732 56 : return error;
733 : }
734 :
735 : /*
736 : * ji_update - fill in journal info
737 : * sdp: the incore superblock pointer
738 : *
739 : * Given the inode for the journal index, read in all
740 : * the journal inodes.
741 : *
742 : * Returns: 0 on success, -1 on failure
743 : */
744 60 : int ji_update(struct lgfs2_sbd *sdp)
745 : {
746 60 : struct lgfs2_inode *jip, *ip = sdp->md.jiinode;
747 : char journal_name[JOURNAL_NAME_SIZE];
748 : int i;
749 :
750 60 : if (!ip) {
751 0 : log_crit(_("Journal index inode not found.\n"));
752 0 : return -1;
753 : }
754 :
755 : /* The per_node directory will have 3 directory entries per node,
756 : plus two for "." and "..". So we subtract the 2 and divide by 3.
757 : If per_node is missing or damaged, we have to trust jindex has
758 : the correct number of entries. */
759 60 : if (sdp->md.pinode) /* if per_node was read in properly */
760 60 : sdp->md.journals = (sdp->md.pinode->i_entries - 2) / 3;
761 : else
762 0 : sdp->md.journals = ip->i_entries - 2;
763 :
764 60 : if (!(sdp->md.journal = calloc(sdp->md.journals,
765 : sizeof(struct lgfs2_inode *)))) {
766 0 : log_err(_("Unable to allocate journal index\n"));
767 0 : return -1;
768 : }
769 60 : memset(journal_name, 0, sizeof(*journal_name));
770 127 : for (i = 0; i < sdp->md.journals; i++) {
771 : /* FIXME check snprintf return code */
772 : int len;
773 67 : len = snprintf(journal_name, JOURNAL_NAME_SIZE, "journal%u", i);
774 67 : jip = lgfs2_lookupi(sdp->md.jiinode, journal_name, len);
775 67 : sdp->md.journal[i] = jip;
776 : }
777 60 : return 0;
778 : }
779 :
780 0 : static void bad_journalname(const char *filename, int len)
781 : {
782 0 : if (len >= 64)
783 0 : len = 63;
784 0 : log_debug(_("Journal index entry '%.*s' has an invalid filename.\n"),
785 : len, filename);
786 0 : }
787 :
788 : /**
789 : * check_jindex_dent - check the jindex directory entries
790 : *
791 : * This function makes sure the directory entries of the jindex are valid.
792 : * If they're not '.' or '..' they better have the form journalXXX.
793 : */
794 187 : static int check_jindex_dent(struct fsck_cx *cx, struct lgfs2_inode *ip, struct gfs2_dirent *dent,
795 : struct gfs2_dirent *prev_de,
796 : struct lgfs2_buffer_head *bh, char *filename,
797 : uint32_t *count, int *lindex, void *priv)
798 : {
799 : struct lgfs2_dirent d;
800 : int i;
801 :
802 187 : lgfs2_dirent_in(&d, dent);
803 :
804 187 : if (d.dr_name_len == 1 && filename[0] == '.')
805 60 : goto dirent_good;
806 127 : if (d.dr_name_len == 2 && filename[0] == '.' && filename[1] == '.')
807 60 : goto dirent_good;
808 :
809 67 : if ((d.dr_name_len >= 11) || /* "journal9999" */
810 67 : (d.dr_name_len <= 7) ||
811 67 : (strncmp(filename, "journal", 7))) {
812 0 : bad_journalname(filename, d.dr_name_len);
813 0 : return -1;
814 : }
815 134 : for (i = 7; i < d.dr_name_len; i++) {
816 67 : if (filename[i] < '0' || filename[i] > '9') {
817 0 : bad_journalname(filename, d.dr_name_len);
818 0 : return -2;
819 : }
820 : }
821 :
822 67 : dirent_good:
823 : /* Return the number of leaf entries so metawalk doesn't flag this
824 : leaf as having none. */
825 187 : *count = be16_to_cpu(((struct gfs2_leaf *)bh->b_data)->lf_entries);
826 187 : return 0;
827 : }
828 :
829 : static struct metawalk_fxns jindex_check_fxns = {
830 : .private = NULL,
831 : .check_dentry = check_jindex_dent,
832 : };
833 :
834 0 : int build_jindex(struct lgfs2_sbd *sdp)
835 : {
836 : struct lgfs2_inode *jindex;
837 :
838 0 : jindex = lgfs2_createi(sdp->master_dir, "jindex", S_IFDIR | 0700,
839 : GFS2_DIF_SYSTEM);
840 0 : if (jindex == NULL) {
841 0 : return errno;
842 : }
843 0 : sdp->md.journal = calloc(sdp->md.journals, sizeof(struct lgfs2_inode *));
844 0 : if (sdp->md.journal == NULL) {
845 0 : lgfs2_inode_put(&jindex);
846 0 : return ENOSPC;
847 : }
848 0 : for (unsigned j = 0; j < sdp->md.journals; j++) {
849 0 : int ret = lgfs2_build_journal(sdp, j, jindex);
850 0 : if (ret)
851 0 : return ret;
852 0 : lgfs2_inode_put(&sdp->md.journal[j]);
853 : }
854 0 : free(sdp->md.journal);
855 0 : lgfs2_inode_put(&jindex);
856 0 : return 0;
857 : }
858 :
859 60 : int init_jindex(struct fsck_cx *cx, int allow_ji_rebuild)
860 : {
861 60 : struct lgfs2_sbd *sdp = cx->sdp;
862 : int error;
863 :
864 60 : log_debug(_("Validating the journal index.\n"));
865 : /* rgrepair requires the journals be read in in order to distinguish
866 : "real" rgrps from rgrps that are just copies left in journals. */
867 : /* coverity[identity_transfer:SUPPRESS] */
868 60 : sdp->md.jiinode = lgfs2_lookupi(sdp->master_dir, "jindex", 6);
869 :
870 60 : if (!sdp->md.jiinode) {
871 : int err;
872 :
873 0 : if (!allow_ji_rebuild) {
874 0 : log_crit(_("Error: jindex and rindex files are both "
875 : "corrupt.\n"));
876 0 : return -1;
877 : }
878 0 : if (!query(cx, _("The gfs2 system jindex inode is missing. "
879 : "Okay to rebuild it? (y/n) "))) {
880 0 : log_crit(_("Error: cannot proceed without a valid "
881 : "jindex file.\n"));
882 0 : return -1;
883 : }
884 :
885 0 : err = build_jindex(sdp);
886 0 : if (err) {
887 0 : log_crit(_("Error %d rebuilding jindex\n"), err);
888 0 : return err;
889 : }
890 : /* coverity[pass_freed_arg:SUPPRESS] */
891 0 : sdp->md.jiinode = lgfs2_lookupi(sdp->master_dir, "jindex", 6);
892 : }
893 :
894 : /* check for irrelevant entries in jindex. Can't use check_dir because
895 : that creates and destroys the inode, which we don't want. */
896 :
897 60 : log_debug(_("Checking the integrity of the journal index.\n"));
898 60 : if (sdp->md.jiinode->i_flags & GFS2_DIF_EXHASH)
899 1 : error = check_leaf_blks(cx, sdp->md.jiinode,
900 : &jindex_check_fxns);
901 : else
902 59 : error = check_linear_dir(cx, sdp->md.jiinode,
903 59 : sdp->md.jiinode->i_bh,
904 : &jindex_check_fxns);
905 60 : if (error) {
906 0 : log_err(_("The system journal index is damaged.\n"));
907 0 : if (!query(cx, _("Okay to rebuild it? (y/n) "))) {
908 0 : log_crit(_("Error: cannot proceed without a "
909 : "valid jindex file.\n"));
910 0 : return -1;
911 : }
912 0 : lgfs2_inode_put(&sdp->md.jiinode);
913 0 : lgfs2_dirent_del(sdp->master_dir, "jindex", 6);
914 0 : log_err(_("Corrupt journal index was removed.\n"));
915 0 : error = build_jindex(sdp);
916 0 : if (error) {
917 0 : log_err(_("Error rebuilding journal "
918 : "index: Cannot continue.\n"));
919 0 : return error;
920 : }
921 : /* coverity[pass_freed_arg:SUPPRESS] */
922 0 : sdp->md.jiinode = lgfs2_lookupi(sdp->master_dir, "jindex", 6);
923 : }
924 :
925 : /* read in the ji data */
926 60 : if (ji_update(sdp)){
927 0 : log_err( _("Unable to read jindex inode.\n"));
928 0 : return -1;
929 : }
930 60 : return 0;
931 : }
|