at v3.0 1054 lines 32 kB view raw
1/* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * Copyright (C) 2006, 2007 University of Szeged, Hungary 6 * 7 * This program is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 as published by 9 * the Free Software Foundation. 10 * 11 * This program is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 * more details. 15 * 16 * You should have received a copy of the GNU General Public License along with 17 * this program; if not, write to the Free Software Foundation, Inc., 51 18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Authors: Artem Bityutskiy (Битюцкий Артём) 21 * Adrian Hunter 22 * Zoltan Sogor 23 */ 24 25/* 26 * This file implements UBIFS I/O subsystem which provides various I/O-related 27 * helper functions (reading/writing/checking/validating nodes) and implements 28 * write-buffering support. Write buffers help to save space which otherwise 29 * would have been wasted for padding to the nearest minimal I/O unit boundary. 30 * Instead, data first goes to the write-buffer and is flushed when the 31 * buffer is full or when it is not used for some time (by timer). This is 32 * similar to the mechanism is used by JFFS2. 33 * 34 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum 35 * write size (@c->max_write_size). The latter is the maximum amount of bytes 36 * the underlying flash is able to program at a time, and writing in 37 * @c->max_write_size units should presumably be faster. Obviously, 38 * @c->min_io_size <= @c->max_write_size. Write-buffers are of 39 * @c->max_write_size bytes in size for maximum performance. However, when a 40 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size 41 * boundary) which contains data is written, not the whole write-buffer, 42 * because this is more space-efficient. 43 * 44 * This optimization adds few complications to the code. Indeed, on the one 45 * hand, we want to write in optimal @c->max_write_size bytes chunks, which 46 * also means aligning writes at the @c->max_write_size bytes offsets. On the 47 * other hand, we do not want to waste space when synchronizing the write 48 * buffer, so during synchronization we writes in smaller chunks. And this makes 49 * the next write offset to be not aligned to @c->max_write_size bytes. So the 50 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned 51 * to @c->max_write_size bytes again. We do this by temporarily shrinking 52 * write-buffer size (@wbuf->size). 53 * 54 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 55 * mutexes defined inside these objects. Since sometimes upper-level code 56 * has to lock the write-buffer (e.g. journal space reservation code), many 57 * functions related to write-buffers have "nolock" suffix which means that the 58 * caller has to lock the write-buffer before calling this function. 59 * 60 * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not 61 * aligned, UBIFS starts the next node from the aligned address, and the padded 62 * bytes may contain any rubbish. In other words, UBIFS does not put padding 63 * bytes in those small gaps. Common headers of nodes store real node lengths, 64 * not aligned lengths. Indexing nodes also store real lengths in branches. 65 * 66 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 67 * uses padding nodes or padding bytes, if the padding node does not fit. 68 * 69 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when 70 * they are read from the flash media. 71 */ 72 73#include <linux/crc32.h> 74#include <linux/slab.h> 75#include "ubifs.h" 76 77/** 78 * ubifs_ro_mode - switch UBIFS to read read-only mode. 79 * @c: UBIFS file-system description object 80 * @err: error code which is the reason of switching to R/O mode 81 */ 82void ubifs_ro_mode(struct ubifs_info *c, int err) 83{ 84 if (!c->ro_error) { 85 c->ro_error = 1; 86 c->no_chk_data_crc = 0; 87 c->vfs_sb->s_flags |= MS_RDONLY; 88 ubifs_warn("switched to read-only mode, error %d", err); 89 dbg_dump_stack(); 90 } 91} 92 93/** 94 * ubifs_check_node - check node. 95 * @c: UBIFS file-system description object 96 * @buf: node to check 97 * @lnum: logical eraseblock number 98 * @offs: offset within the logical eraseblock 99 * @quiet: print no messages 100 * @must_chk_crc: indicates whether to always check the CRC 101 * 102 * This function checks node magic number and CRC checksum. This function also 103 * validates node length to prevent UBIFS from becoming crazy when an attacker 104 * feeds it a file-system image with incorrect nodes. For example, too large 105 * node length in the common header could cause UBIFS to read memory outside of 106 * allocated buffer when checking the CRC checksum. 107 * 108 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 109 * true, which is controlled by corresponding UBIFS mount option. However, if 110 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 111 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are 112 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC 113 * is checked. This is because during mounting or re-mounting from R/O mode to 114 * R/W mode we may read journal nodes (when replying the journal or doing the 115 * recovery) and the journal nodes may potentially be corrupted, so checking is 116 * required. 117 * 118 * This function returns zero in case of success and %-EUCLEAN in case of bad 119 * CRC or magic. 120 */ 121int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, 122 int offs, int quiet, int must_chk_crc) 123{ 124 int err = -EINVAL, type, node_len; 125 uint32_t crc, node_crc, magic; 126 const struct ubifs_ch *ch = buf; 127 128 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 129 ubifs_assert(!(offs & 7) && offs < c->leb_size); 130 131 magic = le32_to_cpu(ch->magic); 132 if (magic != UBIFS_NODE_MAGIC) { 133 if (!quiet) 134 ubifs_err("bad magic %#08x, expected %#08x", 135 magic, UBIFS_NODE_MAGIC); 136 err = -EUCLEAN; 137 goto out; 138 } 139 140 type = ch->node_type; 141 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { 142 if (!quiet) 143 ubifs_err("bad node type %d", type); 144 goto out; 145 } 146 147 node_len = le32_to_cpu(ch->len); 148 if (node_len + offs > c->leb_size) 149 goto out_len; 150 151 if (c->ranges[type].max_len == 0) { 152 if (node_len != c->ranges[type].len) 153 goto out_len; 154 } else if (node_len < c->ranges[type].min_len || 155 node_len > c->ranges[type].max_len) 156 goto out_len; 157 158 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting && 159 !c->remounting_rw && c->no_chk_data_crc) 160 return 0; 161 162 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 163 node_crc = le32_to_cpu(ch->crc); 164 if (crc != node_crc) { 165 if (!quiet) 166 ubifs_err("bad CRC: calculated %#08x, read %#08x", 167 crc, node_crc); 168 err = -EUCLEAN; 169 goto out; 170 } 171 172 return 0; 173 174out_len: 175 if (!quiet) 176 ubifs_err("bad node length %d", node_len); 177out: 178 if (!quiet) { 179 ubifs_err("bad node at LEB %d:%d", lnum, offs); 180 dbg_dump_node(c, buf); 181 dbg_dump_stack(); 182 } 183 return err; 184} 185 186/** 187 * ubifs_pad - pad flash space. 188 * @c: UBIFS file-system description object 189 * @buf: buffer to put padding to 190 * @pad: how many bytes to pad 191 * 192 * The flash media obliges us to write only in chunks of %c->min_io_size and 193 * when we have to write less data we add padding node to the write-buffer and 194 * pad it to the next minimal I/O unit's boundary. Padding nodes help when the 195 * media is being scanned. If the amount of wasted space is not enough to fit a 196 * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes 197 * pattern (%UBIFS_PADDING_BYTE). 198 * 199 * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is 200 * used. 201 */ 202void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) 203{ 204 uint32_t crc; 205 206 ubifs_assert(pad >= 0 && !(pad & 7)); 207 208 if (pad >= UBIFS_PAD_NODE_SZ) { 209 struct ubifs_ch *ch = buf; 210 struct ubifs_pad_node *pad_node = buf; 211 212 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 213 ch->node_type = UBIFS_PAD_NODE; 214 ch->group_type = UBIFS_NO_NODE_GROUP; 215 ch->padding[0] = ch->padding[1] = 0; 216 ch->sqnum = 0; 217 ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ); 218 pad -= UBIFS_PAD_NODE_SZ; 219 pad_node->pad_len = cpu_to_le32(pad); 220 crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8); 221 ch->crc = cpu_to_le32(crc); 222 memset(buf + UBIFS_PAD_NODE_SZ, 0, pad); 223 } else if (pad > 0) 224 /* Too little space, padding node won't fit */ 225 memset(buf, UBIFS_PADDING_BYTE, pad); 226} 227 228/** 229 * next_sqnum - get next sequence number. 230 * @c: UBIFS file-system description object 231 */ 232static unsigned long long next_sqnum(struct ubifs_info *c) 233{ 234 unsigned long long sqnum; 235 236 spin_lock(&c->cnt_lock); 237 sqnum = ++c->max_sqnum; 238 spin_unlock(&c->cnt_lock); 239 240 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) { 241 if (sqnum >= SQNUM_WATERMARK) { 242 ubifs_err("sequence number overflow %llu, end of life", 243 sqnum); 244 ubifs_ro_mode(c, -EINVAL); 245 } 246 ubifs_warn("running out of sequence numbers, end of life soon"); 247 } 248 249 return sqnum; 250} 251 252/** 253 * ubifs_prepare_node - prepare node to be written to flash. 254 * @c: UBIFS file-system description object 255 * @node: the node to pad 256 * @len: node length 257 * @pad: if the buffer has to be padded 258 * 259 * This function prepares node at @node to be written to the media - it 260 * calculates node CRC, fills the common header, and adds proper padding up to 261 * the next minimum I/O unit if @pad is not zero. 262 */ 263void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad) 264{ 265 uint32_t crc; 266 struct ubifs_ch *ch = node; 267 unsigned long long sqnum = next_sqnum(c); 268 269 ubifs_assert(len >= UBIFS_CH_SZ); 270 271 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 272 ch->len = cpu_to_le32(len); 273 ch->group_type = UBIFS_NO_NODE_GROUP; 274 ch->sqnum = cpu_to_le64(sqnum); 275 ch->padding[0] = ch->padding[1] = 0; 276 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); 277 ch->crc = cpu_to_le32(crc); 278 279 if (pad) { 280 len = ALIGN(len, 8); 281 pad = ALIGN(len, c->min_io_size) - len; 282 ubifs_pad(c, node + len, pad); 283 } 284} 285 286/** 287 * ubifs_prep_grp_node - prepare node of a group to be written to flash. 288 * @c: UBIFS file-system description object 289 * @node: the node to pad 290 * @len: node length 291 * @last: indicates the last node of the group 292 * 293 * This function prepares node at @node to be written to the media - it 294 * calculates node CRC and fills the common header. 295 */ 296void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last) 297{ 298 uint32_t crc; 299 struct ubifs_ch *ch = node; 300 unsigned long long sqnum = next_sqnum(c); 301 302 ubifs_assert(len >= UBIFS_CH_SZ); 303 304 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC); 305 ch->len = cpu_to_le32(len); 306 if (last) 307 ch->group_type = UBIFS_LAST_OF_NODE_GROUP; 308 else 309 ch->group_type = UBIFS_IN_NODE_GROUP; 310 ch->sqnum = cpu_to_le64(sqnum); 311 ch->padding[0] = ch->padding[1] = 0; 312 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8); 313 ch->crc = cpu_to_le32(crc); 314} 315 316/** 317 * wbuf_timer_callback - write-buffer timer callback function. 318 * @data: timer data (write-buffer descriptor) 319 * 320 * This function is called when the write-buffer timer expires. 321 */ 322static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) 323{ 324 struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); 325 326 dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); 327 wbuf->need_sync = 1; 328 wbuf->c->need_wbuf_sync = 1; 329 ubifs_wake_up_bgt(wbuf->c); 330 return HRTIMER_NORESTART; 331} 332 333/** 334 * new_wbuf_timer - start new write-buffer timer. 335 * @wbuf: write-buffer descriptor 336 */ 337static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 338{ 339 ubifs_assert(!hrtimer_active(&wbuf->timer)); 340 341 if (wbuf->no_timer) 342 return; 343 dbg_io("set timer for jhead %s, %llu-%llu millisecs", 344 dbg_jhead(wbuf->jhead), 345 div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), 346 div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, 347 USEC_PER_SEC)); 348 hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, 349 HRTIMER_MODE_REL); 350} 351 352/** 353 * cancel_wbuf_timer - cancel write-buffer timer. 354 * @wbuf: write-buffer descriptor 355 */ 356static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) 357{ 358 if (wbuf->no_timer) 359 return; 360 wbuf->need_sync = 0; 361 hrtimer_cancel(&wbuf->timer); 362} 363 364/** 365 * ubifs_wbuf_sync_nolock - synchronize write-buffer. 366 * @wbuf: write-buffer to synchronize 367 * 368 * This function synchronizes write-buffer @buf and returns zero in case of 369 * success or a negative error code in case of failure. 370 * 371 * Note, although write-buffers are of @c->max_write_size, this function does 372 * not necessarily writes all @c->max_write_size bytes to the flash. Instead, 373 * if the write-buffer is only partially filled with data, only the used part 374 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized. 375 * This way we waste less space. 376 */ 377int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 378{ 379 struct ubifs_info *c = wbuf->c; 380 int err, dirt, sync_len; 381 382 cancel_wbuf_timer_nolock(wbuf); 383 if (!wbuf->used || wbuf->lnum == -1) 384 /* Write-buffer is empty or not seeked */ 385 return 0; 386 387 dbg_io("LEB %d:%d, %d bytes, jhead %s", 388 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 389 ubifs_assert(!(wbuf->avail & 7)); 390 ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size); 391 ubifs_assert(wbuf->size >= c->min_io_size); 392 ubifs_assert(wbuf->size <= c->max_write_size); 393 ubifs_assert(wbuf->size % c->min_io_size == 0); 394 ubifs_assert(!c->ro_media && !c->ro_mount); 395 if (c->leb_size - wbuf->offs >= c->max_write_size) 396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 397 398 if (c->ro_error) 399 return -EROFS; 400 401 /* 402 * Do not write whole write buffer but write only the minimum necessary 403 * amount of min. I/O units. 404 */ 405 sync_len = ALIGN(wbuf->used, c->min_io_size); 406 dirt = sync_len - wbuf->used; 407 if (dirt) 408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt); 409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 410 sync_len, wbuf->dtype); 411 if (err) { 412 ubifs_err("cannot write %d bytes to LEB %d:%d", 413 sync_len, wbuf->lnum, wbuf->offs); 414 dbg_dump_stack(); 415 return err; 416 } 417 418 spin_lock(&wbuf->lock); 419 wbuf->offs += sync_len; 420 /* 421 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size. 422 * But our goal is to optimize writes and make sure we write in 423 * @c->max_write_size chunks and to @c->max_write_size-aligned offset. 424 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make 425 * sure that @wbuf->offs + @wbuf->size is aligned to 426 * @c->max_write_size. This way we make sure that after next 427 * write-buffer flush we are again at the optimal offset (aligned to 428 * @c->max_write_size). 429 */ 430 if (c->leb_size - wbuf->offs < c->max_write_size) 431 wbuf->size = c->leb_size - wbuf->offs; 432 else if (wbuf->offs & (c->max_write_size - 1)) 433 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 434 else 435 wbuf->size = c->max_write_size; 436 wbuf->avail = wbuf->size; 437 wbuf->used = 0; 438 wbuf->next_ino = 0; 439 spin_unlock(&wbuf->lock); 440 441 if (wbuf->sync_callback) 442 err = wbuf->sync_callback(c, wbuf->lnum, 443 c->leb_size - wbuf->offs, dirt); 444 return err; 445} 446 447/** 448 * ubifs_wbuf_seek_nolock - seek write-buffer. 449 * @wbuf: write-buffer 450 * @lnum: logical eraseblock number to seek to 451 * @offs: logical eraseblock offset to seek to 452 * @dtype: data type 453 * 454 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 455 * The write-buffer has to be empty. Returns zero in case of success and a 456 * negative error code in case of failure. 457 */ 458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 459 int dtype) 460{ 461 const struct ubifs_info *c = wbuf->c; 462 463 dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); 464 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); 465 ubifs_assert(offs >= 0 && offs <= c->leb_size); 466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 467 ubifs_assert(lnum != wbuf->lnum); 468 ubifs_assert(wbuf->used == 0); 469 470 spin_lock(&wbuf->lock); 471 wbuf->lnum = lnum; 472 wbuf->offs = offs; 473 if (c->leb_size - wbuf->offs < c->max_write_size) 474 wbuf->size = c->leb_size - wbuf->offs; 475 else if (wbuf->offs & (c->max_write_size - 1)) 476 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs; 477 else 478 wbuf->size = c->max_write_size; 479 wbuf->avail = wbuf->size; 480 wbuf->used = 0; 481 spin_unlock(&wbuf->lock); 482 wbuf->dtype = dtype; 483 484 return 0; 485} 486 487/** 488 * ubifs_bg_wbufs_sync - synchronize write-buffers. 489 * @c: UBIFS file-system description object 490 * 491 * This function is called by background thread to synchronize write-buffers. 492 * Returns zero in case of success and a negative error code in case of 493 * failure. 494 */ 495int ubifs_bg_wbufs_sync(struct ubifs_info *c) 496{ 497 int err, i; 498 499 ubifs_assert(!c->ro_media && !c->ro_mount); 500 if (!c->need_wbuf_sync) 501 return 0; 502 c->need_wbuf_sync = 0; 503 504 if (c->ro_error) { 505 err = -EROFS; 506 goto out_timers; 507 } 508 509 dbg_io("synchronize"); 510 for (i = 0; i < c->jhead_cnt; i++) { 511 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 512 513 cond_resched(); 514 515 /* 516 * If the mutex is locked then wbuf is being changed, so 517 * synchronization is not necessary. 518 */ 519 if (mutex_is_locked(&wbuf->io_mutex)) 520 continue; 521 522 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 523 if (!wbuf->need_sync) { 524 mutex_unlock(&wbuf->io_mutex); 525 continue; 526 } 527 528 err = ubifs_wbuf_sync_nolock(wbuf); 529 mutex_unlock(&wbuf->io_mutex); 530 if (err) { 531 ubifs_err("cannot sync write-buffer, error %d", err); 532 ubifs_ro_mode(c, err); 533 goto out_timers; 534 } 535 } 536 537 return 0; 538 539out_timers: 540 /* Cancel all timers to prevent repeated errors */ 541 for (i = 0; i < c->jhead_cnt; i++) { 542 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 543 544 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 545 cancel_wbuf_timer_nolock(wbuf); 546 mutex_unlock(&wbuf->io_mutex); 547 } 548 return err; 549} 550 551/** 552 * ubifs_wbuf_write_nolock - write data to flash via write-buffer. 553 * @wbuf: write-buffer 554 * @buf: node to write 555 * @len: node length 556 * 557 * This function writes data to flash via write-buffer @wbuf. This means that 558 * the last piece of the node won't reach the flash media immediately if it 559 * does not take whole max. write unit (@c->max_write_size). Instead, the node 560 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or 561 * because more data are appended to the write-buffer). 562 * 563 * This function returns zero in case of success and a negative error code in 564 * case of failure. If the node cannot be written because there is no more 565 * space in this logical eraseblock, %-ENOSPC is returned. 566 */ 567int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 568{ 569 struct ubifs_info *c = wbuf->c; 570 int err, written, n, aligned_len = ALIGN(len, 8); 571 572 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 573 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 574 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); 575 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 576 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 577 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 578 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size); 579 ubifs_assert(wbuf->size >= c->min_io_size); 580 ubifs_assert(wbuf->size <= c->max_write_size); 581 ubifs_assert(wbuf->size % c->min_io_size == 0); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 583 ubifs_assert(!c->ro_media && !c->ro_mount); 584 ubifs_assert(!c->space_fixup); 585 if (c->leb_size - wbuf->offs >= c->max_write_size) 586 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); 587 588 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 589 err = -ENOSPC; 590 goto out; 591 } 592 593 cancel_wbuf_timer_nolock(wbuf); 594 595 if (c->ro_error) 596 return -EROFS; 597 598 if (aligned_len <= wbuf->avail) { 599 /* 600 * The node is not very large and fits entirely within 601 * write-buffer. 602 */ 603 memcpy(wbuf->buf + wbuf->used, buf, len); 604 605 if (aligned_len == wbuf->avail) { 606 dbg_io("flush jhead %s wbuf to LEB %d:%d", 607 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 608 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 609 wbuf->offs, wbuf->size, 610 wbuf->dtype); 611 if (err) 612 goto out; 613 614 spin_lock(&wbuf->lock); 615 wbuf->offs += wbuf->size; 616 if (c->leb_size - wbuf->offs >= c->max_write_size) 617 wbuf->size = c->max_write_size; 618 else 619 wbuf->size = c->leb_size - wbuf->offs; 620 wbuf->avail = wbuf->size; 621 wbuf->used = 0; 622 wbuf->next_ino = 0; 623 spin_unlock(&wbuf->lock); 624 } else { 625 spin_lock(&wbuf->lock); 626 wbuf->avail -= aligned_len; 627 wbuf->used += aligned_len; 628 spin_unlock(&wbuf->lock); 629 } 630 631 goto exit; 632 } 633 634 written = 0; 635 636 if (wbuf->used) { 637 /* 638 * The node is large enough and does not fit entirely within 639 * current available space. We have to fill and flush 640 * write-buffer and switch to the next max. write unit. 641 */ 642 dbg_io("flush jhead %s wbuf to LEB %d:%d", 643 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 644 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 645 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 646 wbuf->size, wbuf->dtype); 647 if (err) 648 goto out; 649 650 wbuf->offs += wbuf->size; 651 len -= wbuf->avail; 652 aligned_len -= wbuf->avail; 653 written += wbuf->avail; 654 } else if (wbuf->offs & (c->max_write_size - 1)) { 655 /* 656 * The write-buffer offset is not aligned to 657 * @c->max_write_size and @wbuf->size is less than 658 * @c->max_write_size. Write @wbuf->size bytes to make sure the 659 * following writes are done in optimal @c->max_write_size 660 * chunks. 661 */ 662 dbg_io("write %d bytes to LEB %d:%d", 663 wbuf->size, wbuf->lnum, wbuf->offs); 664 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, 665 wbuf->size, wbuf->dtype); 666 if (err) 667 goto out; 668 669 wbuf->offs += wbuf->size; 670 len -= wbuf->size; 671 aligned_len -= wbuf->size; 672 written += wbuf->size; 673 } 674 675 /* 676 * The remaining data may take more whole max. write units, so write the 677 * remains multiple to max. write unit size directly to the flash media. 678 * We align node length to 8-byte boundary because we anyway flash wbuf 679 * if the remaining space is less than 8 bytes. 680 */ 681 n = aligned_len >> c->max_write_shift; 682 if (n) { 683 n <<= c->max_write_shift; 684 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, 685 wbuf->offs); 686 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, 687 wbuf->offs, n, wbuf->dtype); 688 if (err) 689 goto out; 690 wbuf->offs += n; 691 aligned_len -= n; 692 len -= n; 693 written += n; 694 } 695 696 spin_lock(&wbuf->lock); 697 if (aligned_len) 698 /* 699 * And now we have what's left and what does not take whole 700 * max. write unit, so write it to the write-buffer and we are 701 * done. 702 */ 703 memcpy(wbuf->buf, buf + written, len); 704 705 if (c->leb_size - wbuf->offs >= c->max_write_size) 706 wbuf->size = c->max_write_size; 707 else 708 wbuf->size = c->leb_size - wbuf->offs; 709 wbuf->avail = wbuf->size - aligned_len; 710 wbuf->used = aligned_len; 711 wbuf->next_ino = 0; 712 spin_unlock(&wbuf->lock); 713 714exit: 715 if (wbuf->sync_callback) { 716 int free = c->leb_size - wbuf->offs - wbuf->used; 717 718 err = wbuf->sync_callback(c, wbuf->lnum, free, 0); 719 if (err) 720 goto out; 721 } 722 723 if (wbuf->used) 724 new_wbuf_timer_nolock(wbuf); 725 726 return 0; 727 728out: 729 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", 730 len, wbuf->lnum, wbuf->offs, err); 731 dbg_dump_node(c, buf); 732 dbg_dump_stack(); 733 dbg_dump_leb(c, wbuf->lnum); 734 return err; 735} 736 737/** 738 * ubifs_write_node - write node to the media. 739 * @c: UBIFS file-system description object 740 * @buf: the node to write 741 * @len: node length 742 * @lnum: logical eraseblock number 743 * @offs: offset within the logical eraseblock 744 * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN) 745 * 746 * This function automatically fills node magic number, assigns sequence 747 * number, and calculates node CRC checksum. The length of the @buf buffer has 748 * to be aligned to the minimal I/O unit size. This function automatically 749 * appends padding node and padding bytes if needed. Returns zero in case of 750 * success and a negative error code in case of failure. 751 */ 752int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, 753 int offs, int dtype) 754{ 755 int err, buf_len = ALIGN(len, c->min_io_size); 756 757 dbg_io("LEB %d:%d, %s, length %d (aligned %d)", 758 lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len, 759 buf_len); 760 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 761 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); 762 ubifs_assert(!c->ro_media && !c->ro_mount); 763 ubifs_assert(!c->space_fixup); 764 765 if (c->ro_error) 766 return -EROFS; 767 768 ubifs_prepare_node(c, buf, len, 1); 769 err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); 770 if (err) { 771 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d", 772 buf_len, lnum, offs, err); 773 dbg_dump_node(c, buf); 774 dbg_dump_stack(); 775 } 776 777 return err; 778} 779 780/** 781 * ubifs_read_node_wbuf - read node from the media or write-buffer. 782 * @wbuf: wbuf to check for un-written data 783 * @buf: buffer to read to 784 * @type: node type 785 * @len: node length 786 * @lnum: logical eraseblock number 787 * @offs: offset within the logical eraseblock 788 * 789 * This function reads a node of known type and length, checks it and stores 790 * in @buf. If the node partially or fully sits in the write-buffer, this 791 * function takes data from the buffer, otherwise it reads the flash media. 792 * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative 793 * error code in case of failure. 794 */ 795int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, 796 int lnum, int offs) 797{ 798 const struct ubifs_info *c = wbuf->c; 799 int err, rlen, overlap; 800 struct ubifs_ch *ch = buf; 801 802 dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, 803 dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); 804 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 805 ubifs_assert(!(offs & 7) && offs < c->leb_size); 806 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 807 808 spin_lock(&wbuf->lock); 809 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); 810 if (!overlap) { 811 /* We may safely unlock the write-buffer and read the data */ 812 spin_unlock(&wbuf->lock); 813 return ubifs_read_node(c, buf, type, len, lnum, offs); 814 } 815 816 /* Don't read under wbuf */ 817 rlen = wbuf->offs - offs; 818 if (rlen < 0) 819 rlen = 0; 820 821 /* Copy the rest from the write-buffer */ 822 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); 823 spin_unlock(&wbuf->lock); 824 825 if (rlen > 0) { 826 /* Read everything that goes before write-buffer */ 827 err = ubi_read(c->ubi, lnum, buf, offs, rlen); 828 if (err && err != -EBADMSG) { 829 ubifs_err("failed to read node %d from LEB %d:%d, " 830 "error %d", type, lnum, offs, err); 831 dbg_dump_stack(); 832 return err; 833 } 834 } 835 836 if (type != ch->node_type) { 837 ubifs_err("bad node type (%d but expected %d)", 838 ch->node_type, type); 839 goto out; 840 } 841 842 err = ubifs_check_node(c, buf, lnum, offs, 0, 0); 843 if (err) { 844 ubifs_err("expected node type %d", type); 845 return err; 846 } 847 848 rlen = le32_to_cpu(ch->len); 849 if (rlen != len) { 850 ubifs_err("bad node length %d, expected %d", rlen, len); 851 goto out; 852 } 853 854 return 0; 855 856out: 857 ubifs_err("bad node at LEB %d:%d", lnum, offs); 858 dbg_dump_node(c, buf); 859 dbg_dump_stack(); 860 return -EINVAL; 861} 862 863/** 864 * ubifs_read_node - read node. 865 * @c: UBIFS file-system description object 866 * @buf: buffer to read to 867 * @type: node type 868 * @len: node length (not aligned) 869 * @lnum: logical eraseblock number 870 * @offs: offset within the logical eraseblock 871 * 872 * This function reads a node of known type and and length, checks it and 873 * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched 874 * and a negative error code in case of failure. 875 */ 876int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, 877 int lnum, int offs) 878{ 879 int err, l; 880 struct ubifs_ch *ch = buf; 881 882 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); 883 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 884 ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size); 885 ubifs_assert(!(offs & 7) && offs < c->leb_size); 886 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 887 888 err = ubi_read(c->ubi, lnum, buf, offs, len); 889 if (err && err != -EBADMSG) { 890 ubifs_err("cannot read node %d from LEB %d:%d, error %d", 891 type, lnum, offs, err); 892 return err; 893 } 894 895 if (type != ch->node_type) { 896 ubifs_err("bad node type (%d but expected %d)", 897 ch->node_type, type); 898 goto out; 899 } 900 901 err = ubifs_check_node(c, buf, lnum, offs, 0, 0); 902 if (err) { 903 ubifs_err("expected node type %d", type); 904 return err; 905 } 906 907 l = le32_to_cpu(ch->len); 908 if (l != len) { 909 ubifs_err("bad node length %d, expected %d", l, len); 910 goto out; 911 } 912 913 return 0; 914 915out: 916 ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, 917 ubi_is_mapped(c->ubi, lnum)); 918 dbg_dump_node(c, buf); 919 dbg_dump_stack(); 920 return -EINVAL; 921} 922 923/** 924 * ubifs_wbuf_init - initialize write-buffer. 925 * @c: UBIFS file-system description object 926 * @wbuf: write-buffer to initialize 927 * 928 * This function initializes write-buffer. Returns zero in case of success 929 * %-ENOMEM in case of failure. 930 */ 931int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) 932{ 933 size_t size; 934 935 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL); 936 if (!wbuf->buf) 937 return -ENOMEM; 938 939 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 940 wbuf->inodes = kmalloc(size, GFP_KERNEL); 941 if (!wbuf->inodes) { 942 kfree(wbuf->buf); 943 wbuf->buf = NULL; 944 return -ENOMEM; 945 } 946 947 wbuf->used = 0; 948 wbuf->lnum = wbuf->offs = -1; 949 /* 950 * If the LEB starts at the max. write size aligned address, then 951 * write-buffer size has to be set to @c->max_write_size. Otherwise, 952 * set it to something smaller so that it ends at the closest max. 953 * write size boundary. 954 */ 955 size = c->max_write_size - (c->leb_start % c->max_write_size); 956 wbuf->avail = wbuf->size = size; 957 wbuf->dtype = UBI_UNKNOWN; 958 wbuf->sync_callback = NULL; 959 mutex_init(&wbuf->io_mutex); 960 spin_lock_init(&wbuf->lock); 961 wbuf->c = c; 962 wbuf->next_ino = 0; 963 964 hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 965 wbuf->timer.function = wbuf_timer_callback_nolock; 966 wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0); 967 wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT; 968 wbuf->delta *= 1000000000ULL; 969 ubifs_assert(wbuf->delta <= ULONG_MAX); 970 return 0; 971} 972 973/** 974 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. 975 * @wbuf: the write-buffer where to add 976 * @inum: the inode number 977 * 978 * This function adds an inode number to the inode array of the write-buffer. 979 */ 980void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum) 981{ 982 if (!wbuf->buf) 983 /* NOR flash or something similar */ 984 return; 985 986 spin_lock(&wbuf->lock); 987 if (wbuf->used) 988 wbuf->inodes[wbuf->next_ino++] = inum; 989 spin_unlock(&wbuf->lock); 990} 991 992/** 993 * wbuf_has_ino - returns if the wbuf contains data from the inode. 994 * @wbuf: the write-buffer 995 * @inum: the inode number 996 * 997 * This function returns with %1 if the write-buffer contains some data from the 998 * given inode otherwise it returns with %0. 999 */ 1000static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum) 1001{ 1002 int i, ret = 0; 1003 1004 spin_lock(&wbuf->lock); 1005 for (i = 0; i < wbuf->next_ino; i++) 1006 if (inum == wbuf->inodes[i]) { 1007 ret = 1; 1008 break; 1009 } 1010 spin_unlock(&wbuf->lock); 1011 1012 return ret; 1013} 1014 1015/** 1016 * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode. 1017 * @c: UBIFS file-system description object 1018 * @inode: inode to synchronize 1019 * 1020 * This function synchronizes write-buffers which contain nodes belonging to 1021 * @inode. Returns zero in case of success and a negative error code in case of 1022 * failure. 1023 */ 1024int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode) 1025{ 1026 int i, err = 0; 1027 1028 for (i = 0; i < c->jhead_cnt; i++) { 1029 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf; 1030 1031 if (i == GCHD) 1032 /* 1033 * GC head is special, do not look at it. Even if the 1034 * head contains something related to this inode, it is 1035 * a _copy_ of corresponding on-flash node which sits 1036 * somewhere else. 1037 */ 1038 continue; 1039 1040 if (!wbuf_has_ino(wbuf, inode->i_ino)) 1041 continue; 1042 1043 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1044 if (wbuf_has_ino(wbuf, inode->i_ino)) 1045 err = ubifs_wbuf_sync_nolock(wbuf); 1046 mutex_unlock(&wbuf->io_mutex); 1047 1048 if (err) { 1049 ubifs_ro_mode(c, err); 1050 return err; 1051 } 1052 } 1053 return 0; 1054}